pax_global_header00006660000000000000000000000064122660512400014510gustar00rootroot0000000000000052 comment=66ad12231e2d2416a0356283f7662d406895a3ea openvswitch-2.0.1+git20140120/000077500000000000000000000000001226605124000154525ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/.gitignore000066400000000000000000000007701226605124000174460ustar00rootroot00000000000000#*# *.a *.d *.ko *.la *.lo *.loT *.mod.c *.o *.o *.pyc *.so *~ *,cover .#* .*.cmd .*.swp .coverage .deps .dirstamp .libs .tmp_versions /Makefile /Makefile.in /aclocal.m4 /all-distfiles /all-gitfiles /autom4te.cache /build-arch-stamp /build-indep-stamp /compile /config.guess /config.h /config.h.in /config.log /config.status /config.sub /configure /configure-stamp /depcomp /distfiles /install-sh /manpage-check /missing /missing-distfiles /package.m4 /stamp-h1 Module.symvers TAGS cscope.* tags _debian openvswitch-2.0.1+git20140120/AUTHORS000066400000000000000000000240371226605124000165300ustar00rootroot00000000000000The following people, in alphabetical order, have either authored or signed off on commits in the Open vSwitch version control repository. Aaron Rosen arosen@clemson.edu Alexei Starovoitov ast@plumgrid.com Alexey I. Froloff raorn@altlinux.org Alex Wang alexw@nicira.com Alfredo Finelli alf@computationes.de Andrew Evans aevans@nicira.com Andrew Lambeth wal@nicira.com Andy Hill hillad@gmail.com Andy Southgate andy.southgate@citrix.com Andy Zhou azhou@nicira.com Ansis Atteka aatteka@nicira.com Anupam Chanda achanda@nicira.com Arun Sharma arun.sharma@calsoftinc.com Ben Pfaff blp@nicira.com Brian Kruger bkruger+ovsdev@gmail.com Bruce Davie bsd@nicira.com Bryan Phillippe bp@toroki.com Casey Barker crbarker@google.com Chris Wright chrisw@sous-sol.org Chuck Short zulcss@ubuntu.com Damien Millescamps damien.millescamps@6wind.com Dan Carpenter dan.carpenter@oracle.com Dan Wendlandt dan@nicira.com Daniel Roman droman@nicira.com Danny Kukawka danny.kukawka@bisect.de David Erickson derickso@stanford.edu David S. Miller davem@davemloft.net Devendra Naga devendra.aaru@gmail.com Dominic Curran dominic.curran@citrix.com Duffie Cooley dcooley@nicira.com Ed Maste emaste at freebsd.org Edward Tomasz Napierała trasz@freebsd.org Ethan Jackson ethan@nicira.com FUJITA Tomonori fujita.tomonori@lab.ntt.co.jp Gaetano Catalli gaetano.catalli@gmail.com Giuseppe Lettieri g.lettieri@iet.unipi.it Glen Gibb grg@stanford.edu Guolin Yang gyang@nicira.com Gurucharan Shetty gshetty@nicira.com Henry Mai hmai@nicira.com Hao Zheng hzheng@nicira.com Ian Campbell Ian.Campbell@citrix.com Isaku Yamahata yamahata@valinux.co.jp James P. roampune@gmail.com James Page james.page@ubuntu.com Jarno Rajahalme jarno.rajahalme@nsn.com Jean Tourrilhes jt@hpl.hp.com Jeremy Stribling strib@nicira.com Jesse Gross jesse@nicira.com Jing Ai jinga@google.com Joe Perches joe@perches.com Joe Stringer joe@wand.net.nz Jun Nakajima jun.nakajima@intel.com Justin Pettit jpettit@nicira.com Keith Amidon keith@nicira.com Krishna Kondaka kkondaka@vmware.com Kyle Mestery kmestery@cisco.com Leo Alterman lalterman@nicira.com Linda Sun lsun@vmware.com Lorand Jakab lojakab@cisco.com Luca Giraudo lgiraudo@nicira.com Mark Hamilton mhamilton@nicira.com Martin Casado casado@nicira.com Mehak Mahajan mmahajan@nicira.com Murphy McCauley murphy.mccauley@gmail.com Natasha Gude natasha@nicira.com Neil McKee neil.mckee@inmon.com Paraneetharan Chandrasekaran paraneetharanc@gmail.com Paul Fazzone pfazzone@nicira.com Paul Ingram paul@nicira.com Pavithra Ramesh paramesh@vmware.com Philippe Jung phil.jung@free.fr pritesh pritesh.kothari@cisco.com Pravin B Shelar pshelar@nicira.com Raju Subramanian rsubramanian@nicira.com Ravi Kerur Ravi.Kerur@telekom.com Reid Price reid@nicira.com Rich Lane rlane@bigswitch.com Rob Hoes rob.hoes@citrix.com Romain Lenglet romain.lenglet@berabera.info Sajjad Lateef slateef@nicira.com Sanjay Sane ssane@nicira.com Shan Wei davidshan@tencent.com Shih-Hao Li shli@nicira.com Simon Horman horms@verge.net.au Stephane A. Sezer sas@cd80.net SUGYO Kazushi sugyo.org@gmail.com Tadaaki Nagao nagao@stratosphere.co.jp Tetsuo NAKAGAWA nakagawa@mxc.nes.nec.co.jp Thomas Goirand zigo@debian.org Thomas Graf tgraf@redhat.com Thomas Lacroix thomas.lacroix@citrix.com Todd Deshane deshantm@gmail.com Tom Everman teverman@google.com Tsvi Slonim tsvi@toroki.com Tyler Coumbes coumbes@gmail.com Valient Gough vgough@pobox.com Vivien Bernet-Rollande vbr@soprive.net Wei Yongjun yjwei@cn.fujitsu.com Yasuhito Takamiya yasuhito@gmail.com Yu Zhiguo yuzg@cn.fujitsu.com ZhengLingyun konghuarukhr@163.com Zoltan Kiss zoltan.kiss@citrix.com Zhi Yong Wu zwu.kernel@gmail.com Zang MingJie zealot0630@gmail.com The following additional people are mentioned in commit logs as having provided helpful bug reports or suggestions. Aaron M. Ucko ucko@debian.org Adam Heath doogie@brainfood.com Ahmed Bilal numan252@gmail.com Alan Shieh ashieh@nicira.com Alban Browaeys prahal@yahoo.com Alex Yip alex@nicira.com Alexey I. Froloff raorn@altlinux.org Amar Padmanabhan amar@nicira.com Amey Bhide abhide@nicira.com Amre Shakimov ashakimov@vmware.com André Ruß andre.russ@hybris.com Andreas Beckmann debian@abeckmann.de Atzm Watanabe atzm@stratosphere.co.jp Bastian Blank waldi@debian.org Ben Basler bbasler@nicira.com Bob Ball bob.ball@citrix.com Brad Hall brad@nicira.com Brandon Heller brandonh@stanford.edu Brendan Kelley bkelley@nicira.com Brent Salisbury brent.salisbury@gmail.com Bryan Fulton bryan@nicira.com Bryan Osoro bosoro@nicira.com Cedric Hobbs cedric@nicira.com Christopher Paggen cpaggen@cisco.com Dave Walker DaveWalker@ubuntu.com David Palma palma@onesource.pt Derek Cormier derek.cormier@lab.ntt.co.jp Dhaval Badiani dbadiani@vmware.com DK Moon dkmoon@nicira.com Edwin Chiu echiu@nicira.com Eivind Bulie Haanaes Eric Lopez elopez@nicira.com Frido Roose fr.roose@gmail.com Gaetano Catalli gaetano.catalli@gmail.com George Shuklin amarao@desunote.ru Ghanem Bahri bahri.ghanem@gmail.com Giuseppe de Candia giuseppe.decandia@gmail.com Gordon Good ggood@nicira.com Greg Dahlman gdahlman@hotmail.com Gregor Schaffrath grsch@net.t-labs.tu-berlin.de Hassan Khan hassan.khan@seecs.edu.pk Hector Oron hector.oron@gmail.com Henrik Amren henrik@nicira.com Hiroshi Tanaka htanaka@nicira.com Hiroshi Miyata miyahiro.dazu@gmail.com Igor Ganichev iganichev@nicira.com Jacob Cherkas jcherkas@nicira.com Jad Naous jnaous@gmail.com Jamal Hadi Salim hadi@cyberus.ca James Schmidt jschmidt@nicira.com Jan Medved jmedved@juniper.net Janis Hamme janis.hamme@student.kit.edu Jari Sundell sundell.software@gmail.com Jed Daniels openvswitch@jeddaniels.com Jeff Merrick jmerrick@vmware.com Jeongkeun Lee jklee@hp.com Jian Qiu swordqiu@gmail.com Joan Cirer joan@ev0.net John Galgay john@galgay.net John Hurley john.hurley@netronome.com Kevin Mancuso kevin.mancuso@rackspace.com Kiran Shanbhog kiran@vmware.com Kirill Kabardin Koichi Yagishita yagishita.koichi@jrc.co.jp Konstantin Khorenko khorenko@openvz.org Kris zhang zhang.kris@gmail.com Krishna Miriyala krishna@nicira.com Logan Rosen logatronico@gmail.com Luca Falavigna dktrkranz@debian.org Luiz Henrique Ozaki luiz.ozaki@gmail.com Maxime Brun m.brun@alphalink.fr Michael A. Collins mike.a.collins@ark-net.org Michael Hu mhu@nicira.com Michael Mao mmao@nicira.com Michael Shigorin mike@osdn.org.ua Mike Bursell mike.bursell@citrix.com Mike Kruze mkruze@nicira.com Min Chen ustcer.tonychan@gmail.com Mikael Doverhag mdoverhag@nicira.com Nagi Reddy Jonnala njonnala@Brocade.com Niklas Andersson nandersson@nicira.com Padmanabhan Krishnan kprad1@yahoo.com Pankaj Thakkar thakkar@nicira.com Paulo Cravero pcravero@as2594.net Peter Balland peter@nicira.com Peter Phaal peter.phaal@inmon.com Prabina Pattnaik Prabina.Pattnaik@nechclst.in Pratap Reddy preddy@nicira.com Ralf Heiringhoff ralf@frosty-geek.net Ram Jothikumar rjothikumar@nicira.com Ramana Reddy gtvrreddy@gmail.com Rob Sherwood rob.sherwood@bigswitch.com Roger Leigh rleigh@codelibre.net Rogério Vinhal Nunes Roman Sokolkov rsokolkov@gmail.com Saul St. John sstjohn@cs.wisc.edu Scott Hendricks shendricks@nicira.com Sean Brady sbrady@gtfservices.com Sebastian Andrzej Siewior sebastian@breakpoint.cc Sébastien RICCIO sr@swisscenter.com Spiro Kourtessis spiro@vmware.com Srini Seetharaman seethara@stanford.edu Stephen Hemminger shemminger@vyatta.com Takayuki HAMA t-hama@cb.jp.nec.com Teemu Koponen koponen@nicira.com Timothy Chen tchen@nicira.com Valentin Bud valentin@hackaserver.com Vishal Swarankar vishal.swarnkar@gmail.com Vjekoslav Brajkovic balkan@cs.washington.edu Voravit T. voravit@kth.se YAMAMOTO Takashi yamamoto@valinux.co.jp Yeming Zhao zhaoyeming@gmail.com Ying Chen yingchen@vmware.com Yongqiang Liu liuyq7809@gmail.com kk yap yapkke@stanford.edu likunyun kunyunli@hotmail.com rahim entezari rahim.entezari@gmail.com 冯全树(Crab) fqs888@126.com 胡靖飞 hujingfei914@msn.com Thanks to all Open vSwitch contributors. If you are not listed above but believe that you should be, please write to dev@openvswitch.org. openvswitch-2.0.1+git20140120/COPYING000066400000000000000000000075401226605124000165130ustar00rootroot00000000000000This file is a summary of the licensing of files in this distribution. Some files may be marked specifically with a different license, in which case that license applies to the file in question. Most files are licensed under the Apache License, Version 2.0: Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. Files under the datapath directory are licensed under the GNU General Public License, version 2. Files under the xenserver directory are licensed on a file-by-file basis. Refer to each file for details. Files lib/sflow*.[ch] are licensed under the terms of either the Sun Industry Standards Source License 1.1, that is available at: http://host-sflow.sourceforge.net/sissl.html or the InMon sFlow License, that is available at: http://www.inmon.com/technology/sflowlicense.txt Files under python/compat are licensed under the Python Software Foundation License, version 2: PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 -------------------------------------------- 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("Python") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, PSF hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python alone or in any derivative version, provided, however, that PSF's License Agreement and PSF's notice of copyright, i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software Foundation; All Rights Reserved" are retained in Python alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates Python or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python. 4. PSF is making Python available to Licensee on an "AS IS" basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between PSF and Licensee. This License Agreement does not grant permission to use PSF trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using Python, Licensee agrees to be bound by the terms and conditions of this License Agreement. openvswitch-2.0.1+git20140120/CodingStyle000066400000000000000000000435441226605124000176330ustar00rootroot00000000000000 Open vSwitch Coding Style ========================= This file describes the coding style used in most C files in the Open vSwitch distribution. However, Linux kernel code datapath directory follows the Linux kernel's established coding conventions. BASICS Limit lines to 79 characters. Use form feeds (control+L) to divide long source files into logical pieces. A form feed should appear as the only character on a line. Do not use tabs for indentation. Avoid trailing spaces on lines. NAMING Use names that explain the purpose of a function or object. Use underscores to separate words in an identifier: multi_word_name. Use lowercase for most names. Use uppercase for macros, macro parameters, and members of enumerations. Give arrays names that are plural. Pick a unique name prefix (ending with an underscore) for each module, and apply that prefix to all of that module's externally visible names. Names of macro parameters, struct and union members, and parameters in function prototypes are not considered externally visible for this purpose. Do not use names that begin with _. If you need a name for "internal use only", use __ as a suffix instead of a prefix. Avoid negative names: "found" is a better name than "not_found". In names, a "size" is a count of bytes, a "length" is a count of characters. A buffer has size, but a string has length. The length of a string does not include the null terminator, but the size of the buffer that contains the string does. COMMENTS Comments should be written as full sentences that start with a capital letter and end with a period. Put two spaces between sentences. Write block comments as shown below. You may put the /* and */ on the same line as comment text if you prefer. /* * We redirect stderr to /dev/null because we often want to remove all * traffic control configuration on a port so its in a known state. If * this done when there is no such configuration, tc complains, so we just * always ignore it. */ Each function and each variable declared outside a function, and each struct, union, and typedef declaration should be preceded by a comment. See FUNCTION DEFINITIONS below for function comment guidelines. Each struct and union member should each have an inline comment that explains its meaning. structs and unions with many members should be additionally divided into logical groups of members by block comments, e.g.: /* An event that will wake the following call to poll_block(). */ struct poll_waiter { /* Set when the waiter is created. */ struct list node; /* Element in global waiters list. */ int fd; /* File descriptor. */ short int events; /* Events to wait for (POLLIN, POLLOUT). */ poll_fd_func *function; /* Callback function, if any, or null. */ void *aux; /* Argument to callback function. */ struct backtrace *backtrace; /* Event that created waiter, or null. */ /* Set only when poll_block() is called. */ struct pollfd *pollfd; /* Pointer to element of the pollfds array (null if added from a callback). */ }; Use XXX or FIXME comments to mark code that needs work. Don't use // comments. Don't comment out or #if 0 out code. Just remove it. The code that was there will still be in version control history. FUNCTIONS Put the return type, function name, and the braces that surround the function's code on separate lines, all starting in column 0. Before each function definition, write a comment that describes the function's purpose, including each parameter, the return value, and side effects. References to argument names should be given in single-quotes, e.g. 'arg'. The comment should not include the function name, nor need it follow any formal structure. The comment does not need to describe how a function does its work, unless this information is needed to use the function correctly (this is often better done with comments *inside* the function). Simple static functions do not need a comment. Within a file, non-static functions should come first, in the order that they are declared in the header file, followed by static functions. Static functions should be in one or more separate pages (separated by form feed characters) in logical groups. A commonly useful way to divide groups is by "level", with high-level functions first, followed by groups of progressively lower-level functions. This makes it easy for the program's reader to see the top-down structure by reading from top to bottom. All function declarations and definitions should include a prototype. Empty parentheses, e.g. "int foo();", do not include a prototype (they state that the function's parameters are unknown); write "void" in parentheses instead, e.g. "int foo(void);". Prototypes for static functions should either all go at the top of the file, separated into groups by blank lines, or they should appear at the top of each page of functions. Don't comment individual prototypes, but a comment on each group of prototypes is often appropriate. In the absence of good reasons for another order, the following parameter order is preferred. One notable exception is that data parameters and their corresponding size parameters should be paired. 1. The primary object being manipulated, if any (equivalent to the "this" pointer in C++). 2. Input-only parameters. 3. Input/output parameters. 4. Output-only parameters. 5. Status parameter. Example: /* Stores the features supported by 'netdev' into each of '*current', * '*advertised', '*supported', and '*peer' that are non-null. Each value * is a bitmap of "enum ofp_port_features" bits, in host byte order. * Returns 0 if successful, otherwise a positive errno value. On failure, * all of the passed-in values are set to 0. */ int netdev_get_features(struct netdev *netdev, uint32_t *current, uint32_t *advertised, uint32_t *supported, uint32_t *peer) { ... } Functions that destroy an instance of a dynamically-allocated type should accept and ignore a null pointer argument. Code that calls such a function (including the C standard library function free()) should omit a null-pointer check. We find that this usually makes code easier to read. FUNCTION PROTOTYPES Put the return type and function name on the same line in a function prototype: static const struct option_class *get_option_class(int code); Omit parameter names from function prototypes when the names do not give useful information, e.g.: int netdev_get_mtu(const struct netdev *, int *mtup); STATEMENTS Indent each level of code with 4 spaces. Use BSD-style brace placement: if (a()) { b(); d(); } Put a space between "if", "while", "for", etc. and the expressions that follow them. Enclose single statements in braces: if (a > b) { return a; } else { return b; } Use comments and blank lines to divide long functions into logical groups of statements. Avoid assignments inside "if" and "while" conditions. Do not put gratuitous parentheses around the expression in a return statement, that is, write "return 0;" and not "return(0);" Write only one statement per line. Indent "switch" statements like this: switch (conn->state) { case S_RECV: error = run_connection_input(conn); break; case S_PROCESS: error = 0; break; case S_SEND: error = run_connection_output(conn); break; default: NOT_REACHED(); } "switch" statements with very short, uniform cases may use an abbreviated style: switch (code) { case 200: return "OK"; case 201: return "Created"; case 202: return "Accepted"; case 204: return "No Content"; default: return "Unknown"; } Use "for (;;)" to write an infinite loop. In an if/else construct where one branch is the "normal" or "common" case and the other branch is the "uncommon" or "error" case, put the common case after the "if", not the "else". This is a form of documentation. It also places the most important code in sequential order without forcing the reader to visually skip past less important details. (Some compilers also assume that the "if" branch is the more common case, so this can be a real form of optimization as well.) RETURN VALUES For functions that return a success or failure indication, prefer one of the following return value conventions: * An "int" where 0 indicates success and a positive errno value indicates a reason for failure. * A "bool" where true indicates success and false indicates failure. MACROS Don't define an object-like macro if an enum can be used instead. Don't define a function-like macro if a "static inline" function can be used instead. If a macro's definition contains multiple statements, enclose them with "do { ... } while (0)" to allow them to work properly in all syntactic circumstances. Do use macros to eliminate the need to update different parts of a single file in parallel, e.g. a list of enums and an array that gives the name of each enum. For example: /* Logging importance levels. */ #define VLOG_LEVELS \ VLOG_LEVEL(EMER, LOG_ALERT) \ VLOG_LEVEL(ERR, LOG_ERR) \ VLOG_LEVEL(WARN, LOG_WARNING) \ VLOG_LEVEL(INFO, LOG_NOTICE) \ VLOG_LEVEL(DBG, LOG_DEBUG) enum vlog_level { #define VLOG_LEVEL(NAME, SYSLOG_LEVEL) VLL_##NAME, VLOG_LEVELS #undef VLOG_LEVEL VLL_N_LEVELS }; /* Name for each logging level. */ static const char *level_names[VLL_N_LEVELS] = { #define VLOG_LEVEL(NAME, SYSLOG_LEVEL) #NAME, VLOG_LEVELS #undef VLOG_LEVEL }; THREAD SAFETY ANNOTATIONS Use the macros in lib/compiler.h to annotate locking requirements. For example: static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; static struct ovs_rwlock rwlock = OVS_RWLOCK_INITIALIZER; void function_require_plain_mutex(void) OVS_REQUIRES(mutex); void function_require_rwlock(void) OVS_REQ_RDLOCK(rwlock); Pass lock objects, not their addresses, to the annotation macros. (Thus we have OVS_REQUIRES(mutex) above, not OVS_REQUIRES(&mutex).) SOURCE FILES Each source file should state its license in a comment at the very top, followed by a comment explaining the purpose of the code that is in that file. The comment should explain how the code in the file relates to code in other files. The goal is to allow a programmer to quickly figure out where a given module fits into the larger system. The first non-comment line in a .c source file should be: #include #include directives should appear in the following order: 1. #include 2. The module's own headers, if any. Including this before any other header (besides ) ensures that the module's header file is self-contained (see HEADER FILES) below. 3. Standard C library headers and other system headers, preferably in alphabetical order. (Occasionally one encounters a set of system headers that must be included in a particular order, in which case that order must take precedence.) 4. Open vSwitch headers, in alphabetical order. Use "", not <>, to specify Open vSwitch header names. HEADER FILES Each header file should start with its license, as described under SOURCE FILES above, followed by a "header guard" to make the header file idempotent, like so: #ifndef NETDEV_H #define NETDEV_H 1 ... #endif /* netdev.h */ Header files should be self-contained; that is, they should #include whatever additional headers are required, without requiring the client to #include them for it. Don't define the members of a struct or union in a header file, unless client code is actually intended to access them directly or if the definition is otherwise actually needed (e.g. inline functions defined in the header need them). Similarly, don't #include a header file just for the declaration of a struct or union tag (e.g. just for "struct ;"). Just declare the tag yourself. This reduces the number of header file dependencies. TYPES Use typedefs sparingly. Code is clearer if the actual type is visible at the point of declaration. Do not, in general, declare a typedef for a struct, union, or enum. Do not declare a typedef for a pointer type, because this can be very confusing to the reader. A function type is a good use for a typedef because it can clarify code. The type should be a function type, not a pointer-to-function type. That way, the typedef name can be used to declare function prototypes. (It cannot be used for function definitions, because that is explicitly prohibited by C89 and C99.) You may assume that "char" is exactly 8 bits and that "int" and "long" are at least 32 bits. Don't assume that "long" is big enough to hold a pointer. If you need to cast a pointer to an integer, use "intptr_t" or "uintptr_t" from . Use the int_t and uint_t types from for exact-width integer types. Use the PRId, PRIu, and PRIx macros from for formatting them with printf() and related functions. Use %zu to format size_t with printf(). Use bit-fields sparingly. Do not use bit-fields for layout of network protocol fields or in other circumstances where the exact format is important. Declare bit-fields to be type "unsigned int" or "signed int". Do *not* declare bit-fields of type "int": C89 allows these to be either signed or unsigned according to the compiler's whim. (A 1-bit bit-field of type "int" may have a range of -1...0!) Do not declare bit-fields of type _Bool or enum or any other type, because these are not portable. Try to order structure members such that they pack well on a system with 2-byte "short", 4-byte "int", and 4- or 8-byte "long" and pointer types. Prefer clear organization over size optimization unless you are convinced there is a size or speed benefit. Pointer declarators bind to the variable name, not the type name. Write "int *x", not "int* x" and definitely not "int * x". EXPRESSIONS Put one space on each side of infix binary and ternary operators: * / % + - << >> < <= > >= == != & ^ | && || ?: = += -= *= /= %= &= ^= |= <<= >>= Avoid comma operators. Do not put any white space around postfix, prefix, or grouping operators: () [] -> . ! ~ ++ -- + - * & Exception 1: Put a space after (but not before) the "sizeof" keyword. Exception 2: Put a space between the () used in a cast and the expression whose type is cast: (void *) 0. Break long lines before the ternary operators ? and :, rather than after them, e.g. return (out_port != VIGP_CONTROL_PATH ? alpheus_output_port(dp, skb, out_port) : alpheus_output_control(dp, skb, fwd_save_skb(skb), VIGR_ACTION)); Do not parenthesize the operands of && and || unless operator precedence makes it necessary, or unless the operands are themselves expressions that use && and ||. Thus: if (!isdigit((unsigned char)s[0]) || !isdigit((unsigned char)s[1]) || !isdigit((unsigned char)s[2])) { printf("string %s does not start with 3-digit code\n", s); } but if (rule && (!best || rule->priority > best->priority)) { best = rule; } Do parenthesize a subexpression that must be split across more than one line, e.g.: *idxp = ((l1_idx << PORT_ARRAY_L1_SHIFT) | (l2_idx << PORT_ARRAY_L2_SHIFT) | (l3_idx << PORT_ARRAY_L3_SHIFT)); Try to avoid casts. Don't cast the return value of malloc(). The "sizeof" operator is unique among C operators in that it accepts two very different kinds of operands: an expression or a type. In general, prefer to specify an expression, e.g. "int *x = xmalloc(sizeof *x);". When the operand of sizeof is an expression, there is no need to parenthesize that operand, and please don't. Use the ARRAY_SIZE macro from lib/util.h to calculate the number of elements in an array. When using a relational operator like "<" or "==", put an expression or variable argument on the left and a constant argument on the right, e.g. "x == 0", *not* "0 == x". BLANK LINES Put one blank line between top-level definitions of functions and global variables. C DIALECT Some C99 features are OK because they are widely implemented even in older compilers: * Flexible array members (e.g. struct { int foo[]; }). * "static inline" functions (but no other forms of "inline", for which GCC and C99 have differing interpretations). * "long long" * and . * bool and , but don't assume that bool or _Bool can only take on the values 0 or 1, because this behavior can't be simulated on C89 compilers. Don't use other C99 features that are not widely implemented in older compilers: * Don't use designated initializers (e.g. don't write "struct foo foo = {.a = 1};" or "int a[] = {[2] = 5};"). * Don't mix declarations and code within a block. * Don't use declarations in iteration statements (e.g. don't write "for (int i = 0; i < 10; i++)"). * Don't put a trailing comma in an enum declaration (e.g. don't write "enum { x = 1, };"). As a matter of style, avoid // comments. Avoid using GCC or Clang extensions unless you also add a fallback for other compilers. You can, however, use C99 features or GCC extensions also supported by Clang in code that compiles only on GNU/Linux (such as lib/netdev-linux.c), because GCC is the system compiler there. openvswitch-2.0.1+git20140120/DESIGN000066400000000000000000001133731226605124000163560ustar00rootroot00000000000000 Design Decisions In Open vSwitch ================================ This document describes design decisions that went into implementing Open vSwitch. While we believe these to be reasonable decisions, it is impossible to predict how Open vSwitch will be used in all environments. Understanding assumptions made by Open vSwitch is critical to a successful deployment. The end of this document contains contact information that can be used to let us know how we can make Open vSwitch more generally useful. Asynchronous Messages ===================== Over time, Open vSwitch has added many knobs that control whether a given controller receives OpenFlow asynchronous messages. This section describes how all of these features interact. First, a service controller never receives any asynchronous messages unless it changes its miss_send_len from the service controller default of zero in one of the following ways: - Sending an OFPT_SET_CONFIG message with nonzero miss_send_len. - Sending any NXT_SET_ASYNC_CONFIG message: as a side effect, this message changes the miss_send_len to OFP_DEFAULT_MISS_SEND_LEN (128) for service controllers. Second, OFPT_FLOW_REMOVED and NXT_FLOW_REMOVED messages are generated only if the flow that was removed had the OFPFF_SEND_FLOW_REM flag set. Third, OFPT_PACKET_IN and NXT_PACKET_IN messages are sent only to OpenFlow controller connections that have the correct connection ID (see "struct nx_controller_id" and "struct nx_action_controller"): - For packet-in messages generated by a NXAST_CONTROLLER action, the controller ID specified in the action. - For other packet-in messages, controller ID zero. (This is the default ID when an OpenFlow controller does not configure one.) Finally, Open vSwitch consults a per-connection table indexed by the message type, reason code, and current role. The following table shows how this table is initialized by default when an OpenFlow connection is made. An entry labeled "yes" means that the message is sent, an entry labeled "---" means that the message is suppressed. master/ message and reason code other slave ---------------------------------------- ------- ----- OFPT_PACKET_IN / NXT_PACKET_IN OFPR_NO_MATCH yes --- OFPR_ACTION yes --- OFPR_INVALID_TTL --- --- OFPT_FLOW_REMOVED / NXT_FLOW_REMOVED OFPRR_IDLE_TIMEOUT yes --- OFPRR_HARD_TIMEOUT yes --- OFPRR_DELETE yes --- OFPT_PORT_STATUS OFPPR_ADD yes yes OFPPR_DELETE yes yes OFPPR_MODIFY yes yes The NXT_SET_ASYNC_CONFIG message directly sets all of the values in this table for the current connection. The OFPC_INVALID_TTL_TO_CONTROLLER bit in the OFPT_SET_CONFIG message controls the setting for OFPR_INVALID_TTL for the "master" role. OFPAT_ENQUEUE ============= The OpenFlow 1.0 specification requires the output port of the OFPAT_ENQUEUE action to "refer to a valid physical port (i.e. < OFPP_MAX) or OFPP_IN_PORT". Although OFPP_LOCAL is not less than OFPP_MAX, it is an 'internal' port which can have QoS applied to it in Linux. Since we allow the OFPAT_ENQUEUE to apply to 'internal' ports whose port numbers are less than OFPP_MAX, we interpret OFPP_LOCAL as a physical port and support OFPAT_ENQUEUE on it as well. OFPT_FLOW_MOD ============= The OpenFlow specification for the behavior of OFPT_FLOW_MOD is confusing. The following tables summarize the Open vSwitch implementation of its behavior in the following categories: - "match on priority": Whether the flow_mod acts only on flows whose priority matches that included in the flow_mod message. - "match on out_port": Whether the flow_mod acts only on flows that output to the out_port included in the flow_mod message (if out_port is not OFPP_NONE). OpenFlow 1.1 and later have a similar feature (not listed separately here) for out_group. - "match on flow_cookie": Whether the flow_mod acts only on flows whose flow_cookie matches an optional controller-specified value and mask. - "updates flow_cookie": Whether the flow_mod changes the flow_cookie of the flow or flows that it matches to the flow_cookie included in the flow_mod message. - "updates OFPFF_ flags": Whether the flow_mod changes the OFPFF_SEND_FLOW_REM flag of the flow or flows that it matches to the setting included in the flags of the flow_mod message. - "honors OFPFF_CHECK_OVERLAP": Whether the OFPFF_CHECK_OVERLAP flag in the flow_mod is significant. - "updates idle_timeout" and "updates hard_timeout": Whether the idle_timeout and hard_timeout in the flow_mod, respectively, have an effect on the flow or flows matched by the flow_mod. - "updates idle timer": Whether the flow_mod resets the per-flow timer that measures how long a flow has been idle. - "updates hard timer": Whether the flow_mod resets the per-flow timer that measures how long it has been since a flow was modified. - "zeros counters": Whether the flow_mod resets per-flow packet and byte counters to zero. - "may add a new flow": Whether the flow_mod may add a new flow to the flow table. (Obviously this is always true for "add" commands but in some OpenFlow versions "modify" and "modify-strict" can also add new flows.) - "sends flow_removed message": Whether the flow_mod generates a flow_removed message for the flow or flows that it affects. An entry labeled "yes" means that the flow mod type does have the indicated behavior, "---" means that it does not, an empty cell means that the property is not applicable, and other values are explained below the table. OpenFlow 1.0 ------------ MODIFY DELETE ADD MODIFY STRICT DELETE STRICT === ====== ====== ====== ====== match on priority yes --- yes --- yes match on out_port --- --- --- yes yes match on flow_cookie --- --- --- --- --- match on table_id --- --- --- --- --- controller chooses table_id --- --- --- updates flow_cookie yes yes yes updates OFPFF_SEND_FLOW_REM yes + + honors OFPFF_CHECK_OVERLAP yes + + updates idle_timeout yes + + updates hard_timeout yes + + resets idle timer yes + + resets hard timer yes yes yes zeros counters yes + + may add a new flow yes yes yes sends flow_removed message --- --- --- % % (+) "modify" and "modify-strict" only take these actions when they create a new flow, not when they update an existing flow. (%) "delete" and "delete_strict" generates a flow_removed message if the deleted flow or flows have the OFPFF_SEND_FLOW_REM flag set. (Each controller can separately control whether it wants to receive the generated messages.) OpenFlow 1.1 ------------ OpenFlow 1.1 makes these changes: - The controller now must specify the table_id of the flow match searched and into which a flow may be inserted. Behavior for a table_id of 255 is undefined. - A flow_mod, except an "add", can now match on the flow_cookie. - When a flow_mod matches on the flow_cookie, "modify" and "modify-strict" never insert a new flow. MODIFY DELETE ADD MODIFY STRICT DELETE STRICT === ====== ====== ====== ====== match on priority yes --- yes --- yes match on out_port --- --- --- yes yes match on flow_cookie --- yes yes yes yes match on table_id yes yes yes yes yes controller chooses table_id yes yes yes updates flow_cookie yes --- --- updates OFPFF_SEND_FLOW_REM yes + + honors OFPFF_CHECK_OVERLAP yes + + updates idle_timeout yes + + updates hard_timeout yes + + resets idle timer yes + + resets hard timer yes yes yes zeros counters yes + + may add a new flow yes # # sends flow_removed message --- --- --- % % (+) "modify" and "modify-strict" only take these actions when they create a new flow, not when they update an existing flow. (%) "delete" and "delete_strict" generates a flow_removed message if the deleted flow or flows have the OFPFF_SEND_FLOW_REM flag set. (Each controller can separately control whether it wants to receive the generated messages.) (#) "modify" and "modify-strict" only add a new flow if the flow_mod does not match on any bits of the flow cookie OpenFlow 1.2 ------------ OpenFlow 1.2 makes these changes: - Only "add" commands ever add flows, "modify" and "modify-strict" never do. - A new flag OFPFF_RESET_COUNTS now controls whether "modify" and "modify-strict" reset counters, whereas previously they never reset counters (except when they inserted a new flow). MODIFY DELETE ADD MODIFY STRICT DELETE STRICT === ====== ====== ====== ====== match on priority yes --- yes --- yes match on out_port --- --- --- yes yes match on flow_cookie --- yes yes yes yes match on table_id yes yes yes yes yes controller chooses table_id yes yes yes updates flow_cookie yes --- --- updates OFPFF_SEND_FLOW_REM yes --- --- honors OFPFF_CHECK_OVERLAP yes --- --- updates idle_timeout yes --- --- updates hard_timeout yes --- --- resets idle timer yes --- --- resets hard timer yes yes yes zeros counters yes & & may add a new flow yes --- --- sends flow_removed message --- --- --- % % (%) "delete" and "delete_strict" generates a flow_removed message if the deleted flow or flows have the OFPFF_SEND_FLOW_REM flag set. (Each controller can separately control whether it wants to receive the generated messages.) (&) "modify" and "modify-strict" reset counters if the OFPFF_RESET_COUNTS flag is specified. OpenFlow 1.3 ------------ OpenFlow 1.3 makes these changes: - Behavior for a table_id of 255 is now defined, for "delete" and "delete-strict" commands, as meaning to delete from all tables. A table_id of 255 is now explicitly invalid for other commands. - New flags OFPFF_NO_PKT_COUNTS and OFPFF_NO_BYT_COUNTS for "add" operations. The table for 1.3 is the same as the one shown above for 1.2. VLAN Matching ============= The 802.1Q VLAN header causes more trouble than any other 4 bytes in networking. More specifically, three versions of OpenFlow and Open vSwitch have among them four different ways to match the contents and presence of the VLAN header. The following table describes how each version works. Match NXM OF1.0 OF1.1 OF1.2 ----- --------- ----------- ----------- ------------ [1] 0000/0000 ????/1,??/? ????/1,??/? 0000/0000,-- [2] 0000/ffff ffff/0,??/? ffff/0,??/? 0000/ffff,-- [3] 1xxx/1fff 0xxx/0,??/1 0xxx/0,??/1 1xxx/ffff,-- [4] z000/f000 ????/1,0y/0 fffe/0,0y/0 1000/1000,0y [5] zxxx/ffff 0xxx/0,0y/0 0xxx/0,0y/0 1xxx/ffff,0y [6] 0000/0fff [7] 0000/f000 [8] 0000/efff [9] 1001/1001 1001/1001,-- [10] 3000/3000 Each column is interpreted as follows. - Match: See the list below. - NXM: xxxx/yyyy means NXM_OF_VLAN_TCI_W with value xxxx and mask yyyy. A mask of 0000 is equivalent to omitting NXM_OF_VLAN_TCI(_W), a mask of ffff is equivalent to NXM_OF_VLAN_TCI. - OF1.0 and OF1.1: wwww/x,yy/z means dl_vlan wwww, OFPFW_DL_VLAN x, dl_vlan_pcp yy, and OFPFW_DL_VLAN_PCP z. ? means that the given nibble is ignored (and conventionally 0 for wwww or yy, conventionally 1 for x or z). means that the given match is not supported. - OF1.2: xxxx/yyyy,zz means OXM_OF_VLAN_VID_W with value xxxx and mask yyyy, and OXM_OF_VLAN_PCP (which is not maskable) with value zz. A mask of 0000 is equivalent to omitting OXM_OF_VLAN_VID(_W), a mask of ffff is equivalent to OXM_OF_VLAN_VID. -- means that OXM_OF_VLAN_PCP is omitted. means that the given match is not supported. The matches are: [1] Matches any packet, that is, one without an 802.1Q header or with an 802.1Q header with any TCI value. [2] Matches only packets without an 802.1Q header. NXM: Any match with (vlan_tci == 0) and (vlan_tci_mask & 0x1000) != 0 is equivalent to the one listed in the table. OF1.0: The spec doesn't define behavior if dl_vlan is set to 0xffff and OFPFW_DL_VLAN_PCP is not set. OF1.1: The spec says explicitly to ignore dl_vlan_pcp when dl_vlan is set to 0xffff. OF1.2: The spec doesn't say what should happen if (vlan_vid == 0) and (vlan_vid_mask & 0x1000) != 0 but (vlan_vid_mask != 0x1000), but it would be straightforward to also interpret as [2]. [3] Matches only packets that have an 802.1Q header with VID xxx (and any PCP). [4] Matches only packets that have an 802.1Q header with PCP y (and any VID). NXM: z is ((y << 1) | 1). OF1.0: The spec isn't very clear, but OVS implements it this way. OF1.2: Presumably other masks such that (vlan_vid_mask & 0x1fff) == 0x1000 would also work, but the spec doesn't define their behavior. [5] Matches only packets that have an 802.1Q header with VID xxx and PCP y. NXM: z is ((y << 1) | 1). OF1.2: Presumably other masks such that (vlan_vid_mask & 0x1fff) == 0x1fff would also work. [6] Matches packets with no 802.1Q header or with an 802.1Q header with a VID of 0. Only possible with NXM. [7] Matches packets with no 802.1Q header or with an 802.1Q header with a PCP of 0. Only possible with NXM. [8] Matches packets with no 802.1Q header or with an 802.1Q header with both VID and PCP of 0. Only possible with NXM. [9] Matches only packets that have an 802.1Q header with an odd-numbered VID (and any PCP). Only possible with NXM and OF1.2. (This is just an example; one can match on any desired VID bit pattern.) [10] Matches only packets that have an 802.1Q header with an odd-numbered PCP (and any VID). Only possible with NXM. (This is just an example; one can match on any desired VID bit pattern.) Additional notes: - OF1.2: The top three bits of OXM_OF_VLAN_VID are fixed to zero, so bits 13, 14, and 15 in the masks listed in the table may be set to arbitrary values, as long as the corresponding value bits are also zero. The suggested ffff mask for [2], [3], and [5] allows a shorter OXM representation (the mask is omitted) than the minimal 1fff mask. Flow Cookies ============ OpenFlow 1.0 and later versions have the concept of a "flow cookie", which is a 64-bit integer value attached to each flow. The treatment of the flow cookie has varied greatly across OpenFlow versions, however. In OpenFlow 1.0: - OFPFC_ADD set the cookie in the flow that it added. - OFPFC_MODIFY and OFPFC_MODIFY_STRICT updated the cookie for the flow or flows that it modified. - OFPST_FLOW messages included the flow cookie. - OFPT_FLOW_REMOVED messages reported the cookie of the flow that was removed. OpenFlow 1.1 made the following changes: - Flow mod operations OFPFC_MODIFY, OFPFC_MODIFY_STRICT, OFPFC_DELETE, and OFPFC_DELETE_STRICT, plus flow stats requests and aggregate stats requests, gained the ability to match on flow cookies with an arbitrary mask. - OFPFC_MODIFY and OFPFC_MODIFY_STRICT were changed to add a new flow, in the case of no match, only if the flow table modification operation did not match on the cookie field. (In OpenFlow 1.0, modify operations always added a new flow when there was no match.) - OFPFC_MODIFY and OFPFC_MODIFY_STRICT no longer updated flow cookies. OpenFlow 1.2 made the following changes: - OFPC_MODIFY and OFPFC_MODIFY_STRICT were changed to never add a new flow, regardless of whether the flow cookie was used for matching. Open vSwitch support for OpenFlow 1.0 implements the OpenFlow 1.0 behavior with the following extensions: - An NXM extension field NXM_NX_COOKIE(_W) allows the NXM versions of OFPFC_MODIFY, OFPFC_MODIFY_STRICT, OFPFC_DELETE, and OFPFC_DELETE_STRICT flow_mods, plus flow stats requests and aggregate stats requests, to match on flow cookies with arbitrary masks. This is much like the equivalent OpenFlow 1.1 feature. - Like OpenFlow 1.1, OFPC_MODIFY and OFPFC_MODIFY_STRICT add a new flow if there is no match and the mask is zero (or not given). - The "cookie" field in OFPT_FLOW_MOD and NXT_FLOW_MOD messages is used as the cookie value for OFPFC_ADD commands, as described in OpenFlow 1.0. For OFPFC_MODIFY and OFPFC_MODIFY_STRICT commands, the "cookie" field is used as a new cookie for flows that match unless it is UINT64_MAX, in which case the flow's cookie is not updated. - NXT_PACKET_IN (the Nicira extended version of OFPT_PACKET_IN) reports the cookie of the rule that generated the packet, or all-1-bits if no rule generated the packet. (Older versions of OVS used all-0-bits instead of all-1-bits.) The following table shows the handling of different protocols when receiving OFPFC_MODIFY and OFPFC_MODIFY_STRICT messages. A mask of 0 indicates either an explicit mask of zero or an implicit one by not specifying the NXM_NX_COOKIE(_W) field. Match Update Add on miss Add on miss cookie cookie mask!=0 mask==0 ====== ====== =========== =========== OpenFlow 1.0 no yes OpenFlow 1.1 yes no no yes OpenFlow 1.2 yes no no no NXM yes yes* no yes * Updates the flow's cookie unless the "cookie" field is UINT64_MAX. Multiple Table Support ====================== OpenFlow 1.0 has only rudimentary support for multiple flow tables. Notably, OpenFlow 1.0 does not allow the controller to specify the flow table to which a flow is to be added. Open vSwitch adds an extension for this purpose, which is enabled on a per-OpenFlow connection basis using the NXT_FLOW_MOD_TABLE_ID message. When the extension is enabled, the upper 8 bits of the 'command' member in an OFPT_FLOW_MOD or NXT_FLOW_MOD message designates the table to which a flow is to be added. The Open vSwitch software switch implementation offers 255 flow tables. On packet ingress, only the first flow table (table 0) is searched, and the contents of the remaining tables are not considered in any way. Tables other than table 0 only come into play when an NXAST_RESUBMIT_TABLE action specifies another table to search. Tables 128 and above are reserved for use by the switch itself. Controllers should use only tables 0 through 127. IPv6 ==== Open vSwitch supports stateless handling of IPv6 packets. Flows can be written to support matching TCP, UDP, and ICMPv6 headers within an IPv6 packet. Deeper matching of some Neighbor Discovery messages is also supported. IPv6 was not designed to interact well with middle-boxes. This, combined with Open vSwitch's stateless nature, have affected the processing of IPv6 traffic, which is detailed below. Extension Headers ----------------- The base IPv6 header is incredibly simple with the intention of only containing information relevant for routing packets between two endpoints. IPv6 relies heavily on the use of extension headers to provide any other functionality. Unfortunately, the extension headers were designed in such a way that it is impossible to move to the next header (including the layer-4 payload) unless the current header is understood. Open vSwitch will process the following extension headers and continue to the next header: * Fragment (see the next section) * AH (Authentication Header) * Hop-by-Hop Options * Routing * Destination Options When a header is encountered that is not in that list, it is considered "terminal". A terminal header's IPv6 protocol value is stored in "nw_proto" for matching purposes. If a terminal header is TCP, UDP, or ICMPv6, the packet will be further processed in an attempt to extract layer-4 information. Fragments --------- IPv6 requires that every link in the internet have an MTU of 1280 octets or greater (RFC 2460). As such, a terminal header (as described above in "Extension Headers") in the first fragment should generally be reachable. In this case, the terminal header's IPv6 protocol type is stored in the "nw_proto" field for matching purposes. If a terminal header cannot be found in the first fragment (one with a fragment offset of zero), the "nw_proto" field is set to 0. Subsequent fragments (those with a non-zero fragment offset) have the "nw_proto" field set to the IPv6 protocol type for fragments (44). Jumbograms ---------- An IPv6 jumbogram (RFC 2675) is a packet containing a payload longer than 65,535 octets. A jumbogram is only relevant in subnets with a link MTU greater than 65,575 octets, and are not required to be supported on nodes that do not connect to link with such large MTUs. Currently, Open vSwitch doesn't process jumbograms. In-Band Control =============== Motivation ---------- An OpenFlow switch must establish and maintain a TCP network connection to its controller. There are two basic ways to categorize the network that this connection traverses: either it is completely separate from the one that the switch is otherwise controlling, or its path may overlap the network that the switch controls. We call the former case "out-of-band control", the latter case "in-band control". Out-of-band control has the following benefits: - Simplicity: Out-of-band control slightly simplifies the switch implementation. - Reliability: Excessive switch traffic volume cannot interfere with control traffic. - Integrity: Machines not on the control network cannot impersonate a switch or a controller. - Confidentiality: Machines not on the control network cannot snoop on control traffic. In-band control, on the other hand, has the following advantages: - No dedicated port: There is no need to dedicate a physical switch port to control, which is important on switches that have few ports (e.g. wireless routers, low-end embedded platforms). - No dedicated network: There is no need to build and maintain a separate control network. This is important in many environments because it reduces proliferation of switches and wiring. Open vSwitch supports both out-of-band and in-band control. This section describes the principles behind in-band control. See the description of the Controller table in ovs-vswitchd.conf.db(5) to configure OVS for in-band control. Principles ---------- The fundamental principle of in-band control is that an OpenFlow switch must recognize and switch control traffic without involving the OpenFlow controller. All the details of implementing in-band control are special cases of this principle. The rationale for this principle is simple. If the switch does not handle in-band control traffic itself, then it will be caught in a contradiction: it must contact the controller, but it cannot, because only the controller can set up the flows that are needed to contact the controller. The following points describe important special cases of this principle. - In-band control must be implemented regardless of whether the switch is connected. It is tempting to implement the in-band control rules only when the switch is not connected to the controller, using the reasoning that the controller should have complete control once it has established a connection with the switch. This does not work in practice. Consider the case where the switch is connected to the controller. Occasionally it can happen that the controller forgets or otherwise needs to obtain the MAC address of the switch. To do so, the controller sends a broadcast ARP request. A switch that implements the in-band control rules only when it is disconnected will then send an OFPT_PACKET_IN message up to the controller. The controller will be unable to respond, because it does not know the MAC address of the switch. This is a deadlock situation that can only be resolved by the switch noticing that its connection to the controller has hung and reconnecting. - In-band control must override flows set up by the controller. It is reasonable to assume that flows set up by the OpenFlow controller should take precedence over in-band control, on the basis that the controller should be in charge of the switch. Again, this does not work in practice. Reasonable controller implementations may set up a "last resort" fallback rule that wildcards every field and, e.g., sends it up to the controller or discards it. If a controller does that, then it will isolate itself from the switch. - The switch must recognize all control traffic. The fundamental principle of in-band control states, in part, that a switch must recognize control traffic without involving the OpenFlow controller. More specifically, the switch must recognize *all* control traffic. "False negatives", that is, packets that constitute control traffic but that the switch does not recognize as control traffic, lead to control traffic storms. Consider an OpenFlow switch that only recognizes control packets sent to or from that switch. Now suppose that two switches of this type, named A and B, are connected to ports on an Ethernet hub (not a switch) and that an OpenFlow controller is connected to a third hub port. In this setup, control traffic sent by switch A will be seen by switch B, which will send it to the controller as part of an OFPT_PACKET_IN message. Switch A will then see the OFPT_PACKET_IN message's packet, re-encapsulate it in another OFPT_PACKET_IN, and send it to the controller. Switch B will then see that OFPT_PACKET_IN, and so on in an infinite loop. Incidentally, the consequences of "false positives", where packets that are not control traffic are nevertheless recognized as control traffic, are much less severe. The controller will not be able to control their behavior, but the network will remain in working order. False positives do constitute a security problem. - The switch should use echo-requests to detect disconnection. TCP will notice that a connection has hung, but this can take a considerable amount of time. For example, with default settings the Linux kernel TCP implementation will retransmit for between 13 and 30 minutes, depending on the connection's retransmission timeout, according to kernel documentation. This is far too long for a switch to be disconnected, so an OpenFlow switch should implement its own connection timeout. OpenFlow OFPT_ECHO_REQUEST messages are the best way to do this, since they test the OpenFlow connection itself. Implementation -------------- This section describes how Open vSwitch implements in-band control. Correctly implementing in-band control has proven difficult due to its many subtleties, and has thus gone through many iterations. Please read through and understand the reasoning behind the chosen rules before making modifications. Open vSwitch implements in-band control as "hidden" flows, that is, flows that are not visible through OpenFlow, and at a higher priority than wildcarded flows can be set up through OpenFlow. This is done so that the OpenFlow controller cannot interfere with them and possibly break connectivity with its switches. It is possible to see all flows, including in-band ones, with the ovs-appctl "bridge/dump-flows" command. The Open vSwitch implementation of in-band control can hide traffic to arbitrary "remotes", where each remote is one TCP port on one IP address. Currently the remotes are automatically configured as the in-band OpenFlow controllers plus the OVSDB managers, if any. (The latter is a requirement because OVSDB managers are responsible for configuring OpenFlow controllers, so if the manager cannot be reached then OpenFlow cannot be reconfigured.) The following rules (with the OFPP_NORMAL action) are set up on any bridge that has any remotes: (a) DHCP requests sent from the local port. (b) ARP replies to the local port's MAC address. (c) ARP requests from the local port's MAC address. In-band also sets up the following rules for each unique next-hop MAC address for the remotes' IPs (the "next hop" is either the remote itself, if it is on a local subnet, or the gateway to reach the remote): (d) ARP replies to the next hop's MAC address. (e) ARP requests from the next hop's MAC address. In-band also sets up the following rules for each unique remote IP address: (f) ARP replies containing the remote's IP address as a target. (g) ARP requests containing the remote's IP address as a source. In-band also sets up the following rules for each unique remote (IP,port) pair: (h) TCP traffic to the remote's IP and port. (i) TCP traffic from the remote's IP and port. The goal of these rules is to be as narrow as possible to allow a switch to join a network and be able to communicate with the remotes. As mentioned earlier, these rules have higher priority than the controller's rules, so if they are too broad, they may prevent the controller from implementing its policy. As such, in-band actively monitors some aspects of flow and packet processing so that the rules can be made more precise. In-band control monitors attempts to add flows into the datapath that could interfere with its duties. The datapath only allows exact match entries, so in-band control is able to be very precise about the flows it prevents. Flows that miss in the datapath are sent to userspace to be processed, so preventing these flows from being cached in the "fast path" does not affect correctness. The only type of flow that is currently prevented is one that would prevent DHCP replies from being seen by the local port. For example, a rule that forwarded all DHCP traffic to the controller would not be allowed, but one that forwarded to all ports (including the local port) would. As mentioned earlier, packets that miss in the datapath are sent to the userspace for processing. The userspace has its own flow table, the "classifier", so in-band checks whether any special processing is needed before the classifier is consulted. If a packet is a DHCP response to a request from the local port, the packet is forwarded to the local port, regardless of the flow table. Note that this requires L7 processing of DHCP replies to determine whether the 'chaddr' field matches the MAC address of the local port. It is interesting to note that for an L3-based in-band control mechanism, the majority of rules are devoted to ARP traffic. At first glance, some of these rules appear redundant. However, each serves an important role. First, in order to determine the MAC address of the remote side (controller or gateway) for other ARP rules, we must allow ARP traffic for our local port with rules (b) and (c). If we are between a switch and its connection to the remote, we have to allow the other switch's ARP traffic to through. This is done with rules (d) and (e), since we do not know the addresses of the other switches a priori, but do know the remote's or gateway's. Finally, if the remote is running in a local guest VM that is not reached through the local port, the switch that is connected to the VM must allow ARP traffic based on the remote's IP address, since it will not know the MAC address of the local port that is sending the traffic or the MAC address of the remote in the guest VM. With a few notable exceptions below, in-band should work in most network setups. The following are considered "supported' in the current implementation: - Locally Connected. The switch and remote are on the same subnet. This uses rules (a), (b), (c), (h), and (i). - Reached through Gateway. The switch and remote are on different subnets and must go through a gateway. This uses rules (a), (b), (c), (h), and (i). - Between Switch and Remote. This switch is between another switch and the remote, and we want to allow the other switch's traffic through. This uses rules (d), (e), (h), and (i). It uses (b) and (c) indirectly in order to know the MAC address for rules (d) and (e). Note that DHCP for the other switch will not work unless an OpenFlow controller explicitly lets this switch pass the traffic. - Between Switch and Gateway. This switch is between another switch and the gateway, and we want to allow the other switch's traffic through. This uses the same rules and logic as the "Between Switch and Remote" configuration described earlier. - Remote on Local VM. The remote is a guest VM on the system running in-band control. This uses rules (a), (b), (c), (h), and (i). - Remote on Local VM with Different Networks. The remote is a guest VM on the system running in-band control, but the local port is not used to connect to the remote. For example, an IP address is configured on eth0 of the switch. The remote's VM is connected through eth1 of the switch, but an IP address has not been configured for that port on the switch. As such, the switch will use eth0 to connect to the remote, and eth1's rules about the local port will not work. In the example, the switch attached to eth0 would use rules (a), (b), (c), (h), and (i) on eth0. The switch attached to eth1 would use rules (f), (g), (h), and (i). The following are explicitly *not* supported by in-band control: - Specify Remote by Name. Currently, the remote must be identified by IP address. A naive approach would be to permit all DNS traffic. Unfortunately, this would prevent the controller from defining any policy over DNS. Since switches that are located behind us need to connect to the remote, in-band cannot simply add a rule that allows DNS traffic from the local port. The "correct" way to support this is to parse DNS requests to allow all traffic related to a request for the remote's name through. Due to the potential security problems and amount of processing, we decided to hold off for the time-being. - Differing Remotes for Switches. All switches must know the L3 addresses for all the remotes that other switches may use, since rules need to be set up to allow traffic related to those remotes through. See rules (f), (g), (h), and (i). - Differing Routes for Switches. In order for the switch to allow other switches to connect to a remote through a gateway, it allows the gateway's traffic through with rules (d) and (e). If the routes to the remote differ for the two switches, we will not know the MAC address of the alternate gateway. Action Reproduction =================== It seems likely that many controllers, at least at startup, use the OpenFlow "flow statistics" request to obtain existing flows, then compare the flows' actions against the actions that they expect to find. Before version 1.8.0, Open vSwitch always returned exact, byte-for-byte copies of the actions that had been added to the flow table. The current version of Open vSwitch does not always do this in some exceptional cases. This section lists the exceptions that controller authors must keep in mind if they compare actual actions against desired actions in a bytewise fashion: - Open vSwitch zeros padding bytes in action structures, regardless of their values when the flows were added. - Open vSwitch "normalizes" the instructions in OpenFlow 1.1 (and later) in the following way: * OVS sorts the instructions into the following order: Apply-Actions, Clear-Actions, Write-Actions, Write-Metadata, Goto-Table. * OVS drops Apply-Actions instructions that have empty action lists. * OVS drops Write-Actions instructions that have empty action sets. Please report other discrepancies, if you notice any, so that we can fix or document them. Suggestions =========== Suggestions to improve Open vSwitch are welcome at discuss@openvswitch.org. openvswitch-2.0.1+git20140120/FAQ000066400000000000000000001462431226605124000160160ustar00rootroot00000000000000 Open vSwitch Frequently Asked Questions ========================== General ------- Q: What is Open vSwitch? A: Open vSwitch is a production quality open source software switch designed to be used as a vswitch in virtualized server environments. A vswitch forwards traffic between different VMs on the same physical host and also forwards traffic between VMs and the physical network. Open vSwitch supports standard management interfaces (e.g. sFlow, NetFlow, IPFIX, RSPAN, CLI), and is open to programmatic extension and control using OpenFlow and the OVSDB management protocol. Open vSwitch as designed to be compatible with modern switching chipsets. This means that it can be ported to existing high-fanout switches allowing the same flexible control of the physical infrastructure as the virtual infrastructure. It also means that Open vSwitch will be able to take advantage of on-NIC switching chipsets as their functionality matures. Q: What virtualization platforms can use Open vSwitch? A: Open vSwitch can currently run on any Linux-based virtualization platform (kernel 2.6.32 and newer), including: KVM, VirtualBox, Xen, Xen Cloud Platform, XenServer. As of Linux 3.3 it is part of the mainline kernel. The bulk of the code is written in platform- independent C and is easily ported to other environments. We welcome inquires about integrating Open vSwitch with other virtualization platforms. Q: How can I try Open vSwitch? A: The Open vSwitch source code can be built on a Linux system. You can build and experiment with Open vSwitch on any Linux machine. Packages for various Linux distributions are available on many platforms, including: Debian, Ubuntu, Fedora. You may also download and run a virtualization platform that already has Open vSwitch integrated. For example, download a recent ISO for XenServer or Xen Cloud Platform. Be aware that the version integrated with a particular platform may not be the most recent Open vSwitch release. Q: Does Open vSwitch only work on Linux? A: No, Open vSwitch has been ported to a number of different operating systems and hardware platforms. Most of the development work occurs on Linux, but the code should be portable to any POSIX system. We've seen Open vSwitch ported to a number of different platforms, including FreeBSD, Windows, and even non-POSIX embedded systems. By definition, the Open vSwitch Linux kernel module only works on Linux and will provide the highest performance. However, a userspace datapath is available that should be very portable. Q: What's involved with porting Open vSwitch to a new platform or switching ASIC? A: The PORTING document describes how one would go about porting Open vSwitch to a new operating system or hardware platform. Q: Why would I use Open vSwitch instead of the Linux bridge? A: Open vSwitch is specially designed to make it easier to manage VM network configuration and monitor state spread across many physical hosts in dynamic virtualized environments. Please see WHY-OVS for a more detailed description of how Open vSwitch relates to the Linux Bridge. Q: How is Open vSwitch related to distributed virtual switches like the VMware vNetwork distributed switch or the Cisco Nexus 1000V? A: Distributed vswitch applications (e.g., VMware vNetwork distributed switch, Cisco Nexus 1000V) provide a centralized way to configure and monitor the network state of VMs that are spread across many physical hosts. Open vSwitch is not a distributed vswitch itself, rather it runs on each physical host and supports remote management in a way that makes it easier for developers of virtualization/cloud management platforms to offer distributed vswitch capabilities. To aid in distribution, Open vSwitch provides two open protocols that are specially designed for remote management in virtualized network environments: OpenFlow, which exposes flow-based forwarding state, and the OVSDB management protocol, which exposes switch port state. In addition to the switch implementation itself, Open vSwitch includes tools (ovs-controller, ovs-ofctl, ovs-vsctl) that developers can script and extend to provide distributed vswitch capabilities that are closely integrated with their virtualization management platform. Q: Why doesn't Open vSwitch support distribution? A: Open vSwitch is intended to be a useful component for building flexible network infrastructure. There are many different approaches to distribution which balance trade-offs between simplicity, scalability, hardware compatibility, convergence times, logical forwarding model, etc. The goal of Open vSwitch is to be able to support all as a primitive building block rather than choose a particular point in the distributed design space. Q: How can I contribute to the Open vSwitch Community? A: You can start by joining the mailing lists and helping to answer questions. You can also suggest improvements to documentation. If you have a feature or bug you would like to work on, send a mail to one of the mailing lists: http://openvswitch.org/mlists/ Releases -------- Q: What does it mean for an Open vSwitch release to be LTS (long-term support)? A: All official releases have been through a comprehensive testing process and are suitable for production use. Planned releases will occur several times a year. If a significant bug is identified in an LTS release, we will provide an updated release that includes the fix. Releases that are not LTS may not be fixed and may just be supplanted by the next major release. The current LTS release is 1.9.x. Q: What Linux kernel versions does each Open vSwitch release work with? A: The following table lists the Linux kernel versions against which the given versions of the Open vSwitch kernel module will successfully build. The Linux kernel versions are upstream kernel versions, so Linux kernels modified from the upstream sources may not build in some cases even if they are based on a supported version. This is most notably true of Red Hat Enterprise Linux (RHEL) kernels, which are extensively modified from upstream. Open vSwitch Linux kernel ------------ ------------- 1.4.x 2.6.18 to 3.2 1.5.x 2.6.18 to 3.2 1.6.x 2.6.18 to 3.2 1.7.x 2.6.18 to 3.3 1.8.x 2.6.18 to 3.4 1.9.x 2.6.18 to 3.8 1.10.x 2.6.18 to 3.8 1.11.x 2.6.18 to 3.8 2.0.x 2.6.32 to 3.10 Open vSwitch userspace should also work with the Linux kernel module built into Linux 3.3 and later. Open vSwitch userspace is not sensitive to the Linux kernel version. It should build against almost any kernel, certainly against 2.6.32 and later. Q: What Linux kernel versions does IPFIX flow monitoring work with? A: IPFIX flow monitoring requires the Linux kernel module from Open vSwitch version 1.10.90 or later. Q: Should userspace or kernel be upgraded first to minimize downtime? In general, the Open vSwitch userspace should be used with the kernel version included in the same release or with the version from upstream Linux. However, when upgrading between two releases of Open vSwitch it is best to migrate userspace first to reduce the possbility of incompatibilities. Q: What features are not available in the Open vSwitch kernel datapath that ships as part of the upstream Linux kernel? A: The kernel module in upstream Linux 3.3 and later does not include tunnel virtual ports, that is, interfaces with type "gre", "ipsec_gre", "gre64", "ipsec_gre64", "vxlan", or "lisp". It is possible to create tunnels in Linux and attach them to Open vSwitch as system devices. However, they cannot be dynamically created through the OVSDB protocol or set the tunnel ids as a flow action. Work is in progress in adding tunnel virtual ports to the upstream Linux version of the Open vSwitch kernel module. For now, if you need these features, use the kernel module from the Open vSwitch distribution instead of the upstream Linux kernel module. The upstream kernel module does not include patch ports, but this only matters for Open vSwitch 1.9 and earlier, because Open vSwitch 1.10 and later implement patch ports without using this kernel feature. Q: What features are not available when using the userspace datapath? A: Tunnel virtual ports are not supported, as described in the previous answer. It is also not possible to use queue-related actions. On Linux kernels before 2.6.39, maximum-sized VLAN packets may not be transmitted. Terminology ----------- Q: I thought Open vSwitch was a virtual Ethernet switch, but the documentation keeps talking about bridges. What's a bridge? A: In networking, the terms "bridge" and "switch" are synonyms. Open vSwitch implements an Ethernet switch, which means that it is also an Ethernet bridge. Q: What's a VLAN? A: See the "VLAN" section below. Basic Configuration ------------------- Q: How do I configure a port as an access port? A: Add "tag=VLAN" to your "ovs-vsctl add-port" command. For example, the following commands configure br0 with eth0 as a trunk port (the default) and tap0 as an access port for VLAN 9: ovs-vsctl add-br br0 ovs-vsctl add-port br0 eth0 ovs-vsctl add-port br0 tap0 tag=9 If you want to configure an already added port as an access port, use "ovs-vsctl set", e.g.: ovs-vsctl set port tap0 tag=9 Q: How do I configure a port as a SPAN port, that is, enable mirroring of all traffic to that port? A: The following commands configure br0 with eth0 and tap0 as trunk ports. All traffic coming in or going out on eth0 or tap0 is also mirrored to tap1; any traffic arriving on tap1 is dropped: ovs-vsctl add-br br0 ovs-vsctl add-port br0 eth0 ovs-vsctl add-port br0 tap0 ovs-vsctl add-port br0 tap1 \ -- --id=@p get port tap1 \ -- --id=@m create mirror name=m0 select-all=true output-port=@p \ -- set bridge br0 mirrors=@m To later disable mirroring, run: ovs-vsctl clear bridge br0 mirrors Q: How do I configure a VLAN as an RSPAN VLAN, that is, enable mirroring of all traffic to that VLAN? A: The following commands configure br0 with eth0 as a trunk port and tap0 as an access port for VLAN 10. All traffic coming in or going out on tap0, as well as traffic coming in or going out on eth0 in VLAN 10, is also mirrored to VLAN 15 on eth0. The original tag for VLAN 10, in cases where one is present, is dropped as part of mirroring: ovs-vsctl add-br br0 ovs-vsctl add-port br0 eth0 ovs-vsctl add-port br0 tap0 tag=10 ovs-vsctl \ -- --id=@m create mirror name=m0 select-all=true select-vlan=10 \ output-vlan=15 \ -- set bridge br0 mirrors=@m To later disable mirroring, run: ovs-vsctl clear bridge br0 mirrors Mirroring to a VLAN can disrupt a network that contains unmanaged switches. See ovs-vswitchd.conf.db(5) for details. Mirroring to a GRE tunnel has fewer caveats than mirroring to a VLAN and should generally be preferred. Q: Can I mirror more than one input VLAN to an RSPAN VLAN? A: Yes, but mirroring to a VLAN strips the original VLAN tag in favor of the specified output-vlan. This loss of information may make the mirrored traffic too hard to interpret. To mirror multiple VLANs, use the commands above, but specify a comma-separated list of VLANs as the value for select-vlan. To mirror every VLAN, use the commands above, but omit select-vlan and its value entirely. When a packet arrives on a VLAN that is used as a mirror output VLAN, the mirror is disregarded. Instead, in standalone mode, OVS floods the packet across all the ports for which the mirror output VLAN is configured. (If an OpenFlow controller is in use, then it can override this behavior through the flow table.) If OVS is used as an intermediate switch, rather than an edge switch, this ensures that the RSPAN traffic is distributed through the network. Mirroring to a VLAN can disrupt a network that contains unmanaged switches. See ovs-vswitchd.conf.db(5) for details. Mirroring to a GRE tunnel has fewer caveats than mirroring to a VLAN and should generally be preferred. Q: How do I configure mirroring of all traffic to a GRE tunnel? A: The following commands configure br0 with eth0 and tap0 as trunk ports. All traffic coming in or going out on eth0 or tap0 is also mirrored to gre0, a GRE tunnel to the remote host 192.168.1.10; any traffic arriving on gre0 is dropped: ovs-vsctl add-br br0 ovs-vsctl add-port br0 eth0 ovs-vsctl add-port br0 tap0 ovs-vsctl add-port br0 gre0 \ -- set interface gre0 type=gre options:remote_ip=192.168.1.10 \ -- --id=@p get port gre0 \ -- --id=@m create mirror name=m0 select-all=true output-port=@p \ -- set bridge br0 mirrors=@m To later disable mirroring and destroy the GRE tunnel: ovs-vsctl clear bridge br0 mirrors ovs-vcstl del-port br0 gre0 Q: Does Open vSwitch support ERSPAN? A: No. ERSPAN is an undocumented proprietary protocol. As an alternative, Open vSwitch supports mirroring to a GRE tunnel (see above). Q: How do I connect two bridges? A: First, why do you want to do this? Two connected bridges are not much different from a single bridge, so you might as well just have a single bridge with all your ports on it. If you still want to connect two bridges, you can use a pair of patch ports. The following example creates bridges br0 and br1, adds eth0 and tap0 to br0, adds tap1 to br1, and then connects br0 and br1 with a pair of patch ports. ovs-vsctl add-br br0 ovs-vsctl add-port br0 eth0 ovs-vsctl add-port br0 tap0 ovs-vsctl add-br br1 ovs-vsctl add-port br1 tap1 ovs-vsctl \ -- add-port br0 patch0 \ -- set interface patch0 type=patch options:peer=patch1 \ -- add-port br1 patch1 \ -- set interface patch1 type=patch options:peer=patch0 Bridges connected with patch ports are much like a single bridge. For instance, if the example above also added eth1 to br1, and both eth0 and eth1 happened to be connected to the same next-hop switch, then you could loop your network just as you would if you added eth0 and eth1 to the same bridge (see the "Configuration Problems" section below for more information). If you are using Open vSwitch 1.9 or an earlier version, then you need to be using the kernel module bundled with Open vSwitch rather than the one that is integrated into Linux 3.3 and later, because Open vSwitch 1.9 and earlier versions need kernel support for patch ports. This also means that in Open vSwitch 1.9 and earlier, patch ports will not work with the userspace datapath, only with the kernel module. Q: Why are there so many different ways to dump flows? A: Open vSwitch uses different kinds of flows for different purposes: - OpenFlow flows are the most important kind of flow. OpenFlow controllers use these flows to define a switch's policy. OpenFlow flows support wildcards, priorities, and multiple tables. When in-band control is in use, Open vSwitch sets up a few "hidden" flows, with priority higher than a controller or the user can configure, that are not visible via OpenFlow. (See the "Controller" section of the FAQ for more information about hidden flows.) - The Open vSwitch software switch implementation uses a second kind of flow internally. These flows, called "exact-match" or "datapath" or "kernel" flows, do not support wildcards or priorities and comprise only a single table, which makes them suitable for caching. OpenFlow flows and exact-match flows also support different actions and number ports differently. Exact-match flows are an implementation detail that is subject to change in future versions of Open vSwitch. Even with the current version of Open vSwitch, hardware switch implementations do not necessarily use exact-match flows. Each of the commands for dumping flows has a different purpose: - "ovs-ofctl dump-flows
" dumps OpenFlow flows, excluding hidden flows. This is the most commonly useful form of flow dump. (Unlike the other commands, this should work with any OpenFlow switch, not just Open vSwitch.) - "ovs-appctl bridge/dump-flows
" dumps OpenFlow flows, including hidden flows. This is occasionally useful for troubleshooting suspected issues with in-band control. - "ovs-dpctl dump-flows [dp]" dumps the exact-match flow table entries for a Linux kernel-based datapath. In Open vSwitch 1.10 and later, ovs-vswitchd merges multiple switches into a single datapath, so it will show all the flows on all your kernel-based switches. This command can occasionally be useful for debugging. - "ovs-appctl dpif/dump-flows
", new in Open vSwitch 1.10, dumps exact-match flows for only the specified bridge, regardless of the type. Configuration Problems ---------------------- Q: I created a bridge and added my Ethernet port to it, using commands like these: ovs-vsctl add-br br0 ovs-vsctl add-port br0 eth0 and as soon as I ran the "add-port" command I lost all connectivity through eth0. Help! A: A physical Ethernet device that is part of an Open vSwitch bridge should not have an IP address. If one does, then that IP address will not be fully functional. You can restore functionality by moving the IP address to an Open vSwitch "internal" device, such as the network device named after the bridge itself. For example, assuming that eth0's IP address is 192.168.128.5, you could run the commands below to fix up the situation: ifconfig eth0 0.0.0.0 ifconfig br0 192.168.128.5 (If your only connection to the machine running OVS is through the IP address in question, then you would want to run all of these commands on a single command line, or put them into a script.) If there were any additional routes assigned to eth0, then you would also want to use commands to adjust these routes to go through br0. If you use DHCP to obtain an IP address, then you should kill the DHCP client that was listening on the physical Ethernet interface (e.g. eth0) and start one listening on the internal interface (e.g. br0). You might still need to manually clear the IP address from the physical interface (e.g. with "ifconfig eth0 0.0.0.0"). There is no compelling reason why Open vSwitch must work this way. However, this is the way that the Linux kernel bridge module has always worked, so it's a model that those accustomed to Linux bridging are already used to. Also, the model that most people expect is not implementable without kernel changes on all the versions of Linux that Open vSwitch supports. By the way, this issue is not specific to physical Ethernet devices. It applies to all network devices except Open vswitch "internal" devices. Q: I created a bridge and added a couple of Ethernet ports to it, using commands like these: ovs-vsctl add-br br0 ovs-vsctl add-port br0 eth0 ovs-vsctl add-port br0 eth1 and now my network seems to have melted: connectivity is unreliable (even connectivity that doesn't go through Open vSwitch), all the LEDs on my physical switches are blinking, wireshark shows duplicated packets, and CPU usage is very high. A: More than likely, you've looped your network. Probably, eth0 and eth1 are connected to the same physical Ethernet switch. This yields a scenario where OVS receives a broadcast packet on eth0 and sends it out on eth1, then the physical switch connected to eth1 sends the packet back on eth0, and so on forever. More complicated scenarios, involving a loop through multiple switches, are possible too. The solution depends on what you are trying to do: - If you added eth0 and eth1 to get higher bandwidth or higher reliability between OVS and your physical Ethernet switch, use a bond. The following commands create br0 and then add eth0 and eth1 as a bond: ovs-vsctl add-br br0 ovs-vsctl add-bond br0 bond0 eth0 eth1 Bonds have tons of configuration options. Please read the documentation on the Port table in ovs-vswitchd.conf.db(5) for all the details. - Perhaps you don't actually need eth0 and eth1 to be on the same bridge. For example, if you simply want to be able to connect each of them to virtual machines, then you can put each of them on a bridge of its own: ovs-vsctl add-br br0 ovs-vsctl add-port br0 eth0 ovs-vsctl add-br br1 ovs-vsctl add-port br1 eth1 and then connect VMs to br0 and br1. (A potential disadvantage is that traffic cannot directly pass between br0 and br1. Instead, it will go out eth0 and come back in eth1, or vice versa.) - If you have a redundant or complex network topology and you want to prevent loops, turn on spanning tree protocol (STP). The following commands create br0, enable STP, and add eth0 and eth1 to the bridge. The order is important because you don't want have to have a loop in your network even transiently: ovs-vsctl add-br br0 ovs-vsctl set bridge br0 stp_enable=true ovs-vsctl add-port br0 eth0 ovs-vsctl add-port br0 eth1 The Open vSwitch implementation of STP is not well tested. Please report any bugs you observe, but if you'd rather avoid acting as a beta tester then another option might be your best shot. Q: I can't seem to use Open vSwitch in a wireless network. A: Wireless base stations generally only allow packets with the source MAC address of NIC that completed the initial handshake. Therefore, without MAC rewriting, only a single device can communicate over a single wireless link. This isn't specific to Open vSwitch, it's enforced by the access point, so the same problems will show up with the Linux bridge or any other way to do bridging. Q: I can't seem to add my PPP interface to an Open vSwitch bridge. A: PPP most commonly carries IP packets, but Open vSwitch works only with Ethernet frames. The correct way to interface PPP to an Ethernet network is usually to use routing instead of switching. Q: Is there any documentation on the database tables and fields? A: Yes. ovs-vswitchd.conf.db(5) is a comprehensive reference. Q: When I run ovs-dpctl I no longer see the bridges I created. Instead, I only see a datapath called "ovs-system". How can I see datapath information about a particular bridge? A: In version 1.9.0, OVS switched to using a single datapath that is shared by all bridges of that type. The "ovs-appctl dpif/*" commands provide similar functionality that is scoped by the bridge. Quality of Service (QoS) ------------------------ Q: How do I configure Quality of Service (QoS)? A: Suppose that you want to set up bridge br0 connected to physical Ethernet port eth0 (a 1 Gbps device) and virtual machine interfaces vif1.0 and vif2.0, and that you want to limit traffic from vif1.0 to eth0 to 10 Mbps and from vif2.0 to eth0 to 20 Mbps. Then, you could configure the bridge this way: ovs-vsctl -- \ add-br br0 -- \ add-port br0 eth0 -- \ add-port br0 vif1.0 -- set interface vif1.0 ofport_request=5 -- \ add-port br0 vif2.0 -- set interface vif2.0 ofport_request=6 -- \ set port eth0 qos=@newqos -- \ --id=@newqos create qos type=linux-htb \ other-config:max-rate=1000000000 \ queues:123=@vif10queue \ queues:234=@vif20queue -- \ --id=@vif10queue create queue other-config:max-rate=10000000 -- \ --id=@vif20queue create queue other-config:max-rate=20000000 At this point, bridge br0 is configured with the ports and eth0 is configured with the queues that you need for QoS, but nothing is actually directing packets from vif1.0 or vif2.0 to the queues that we have set up for them. That means that all of the packets to eth0 are going to the "default queue", which is not what we want. We use OpenFlow to direct packets from vif1.0 and vif2.0 to the queues reserved for them: ovs-ofctl add-flow br0 in_port=5,actions=set_queue:123,normal ovs-ofctl add-flow br0 in_port=6,actions=set_queue:234,normal Each of the above flows matches on the input port, sets up the appropriate queue (123 for vif1.0, 234 for vif2.0), and then executes the "normal" action, which performs the same switching that Open vSwitch would have done without any OpenFlow flows being present. (We know that vif1.0 and vif2.0 have OpenFlow port numbers 5 and 6, respectively, because we set their ofport_request columns above. If we had not done that, then we would have needed to find out their port numbers before setting up these flows.) Now traffic going from vif1.0 or vif2.0 to eth0 should be rate-limited. By the way, if you delete the bridge created by the above commands, with: ovs-vsctl del-br br0 then that will leave one unreferenced QoS record and two unreferenced Queue records in the Open vSwich database. One way to clear them out, assuming you don't have other QoS or Queue records that you want to keep, is: ovs-vsctl -- --all destroy QoS -- --all destroy Queue If you do want to keep some QoS or Queue records, or the Open vSwitch you are using is older than version 1.8 (which added the --all option), then you will have to destroy QoS and Queue records individually. Q: I configured Quality of Service (QoS) in my OpenFlow network by adding records to the QoS and Queue table, but the results aren't what I expect. A: Did you install OpenFlow flows that use your queues? This is the primary way to tell Open vSwitch which queues you want to use. If you don't do this, then the default queue will be used, which will probably not have the effect you want. Refer to the previous question for an example. Q: I configured QoS, correctly, but my measurements show that it isn't working as well as I expect. A: With the Linux kernel, the Open vSwitch implementation of QoS has two aspects: - Open vSwitch configures a subset of Linux kernel QoS features, according to what is in OVSDB. It is possible that this code has bugs. If you believe that this is so, then you can configure the Linux traffic control (QoS) stack directly with the "tc" program. If you get better results that way, you can send a detailed bug report to bugs@openvswitch.org. It is certain that Open vSwitch cannot configure every Linux kernel QoS feature. If you need some feature that OVS cannot configure, then you can also use "tc" directly (or add that feature to OVS). - The Open vSwitch implementation of OpenFlow allows flows to be directed to particular queues. This is pretty simple and unlikely to have serious bugs at this point. However, most problems with QoS on Linux are not bugs in Open vSwitch at all. They tend to be either configuration errors (please see the earlier questions in this section) or issues with the traffic control (QoS) stack in Linux. The Open vSwitch developers are not experts on Linux traffic control. We suggest that, if you believe you are encountering a problem with Linux traffic control, that you consult the tc manpages (e.g. tc(8), tc-htb(8), tc-hfsc(8)), web resources (e.g. http://lartc.org/), or mailing lists (e.g. http://vger.kernel.org/vger-lists.html#netdev). VLANs ----- Q: What's a VLAN? A: At the simplest level, a VLAN (short for "virtual LAN") is a way to partition a single switch into multiple switches. Suppose, for example, that you have two groups of machines, group A and group B. You want the machines in group A to be able to talk to each other, and you want the machine in group B to be able to talk to each other, but you don't want the machines in group A to be able to talk to the machines in group B. You can do this with two switches, by plugging the machines in group A into one switch and the machines in group B into the other switch. If you only have one switch, then you can use VLANs to do the same thing, by configuring the ports for machines in group A as VLAN "access ports" for one VLAN and the ports for group B as "access ports" for a different VLAN. The switch will only forward packets between ports that are assigned to the same VLAN, so this effectively subdivides your single switch into two independent switches, one for each group of machines. So far we haven't said anything about VLAN headers. With access ports, like we've described so far, no VLAN header is present in the Ethernet frame. This means that the machines (or switches) connected to access ports need not be aware that VLANs are involved, just like in the case where we use two different physical switches. Now suppose that you have a whole bunch of switches in your network, instead of just one, and that some machines in group A are connected directly to both switches 1 and 2. To allow these machines to talk to each other, you could add an access port for group A's VLAN to switch 1 and another to switch 2, and then connect an Ethernet cable between those ports. That works fine, but it doesn't scale well as the number of switches and the number of VLANs increases, because you use up a lot of valuable switch ports just connecting together your VLANs. This is where VLAN headers come in. Instead of using one cable and two ports per VLAN to connect a pair of switches, we configure a port on each switch as a VLAN "trunk port". Packets sent and received on a trunk port carry a VLAN header that says what VLAN the packet belongs to, so that only two ports total are required to connect the switches, regardless of the number of VLANs in use. Normally, only switches (either physical or virtual) are connected to a trunk port, not individual hosts, because individual hosts don't expect to see a VLAN header in the traffic that they receive. None of the above discussion says anything about particular VLAN numbers. This is because VLAN numbers are completely arbitrary. One must only ensure that a given VLAN is numbered consistently throughout a network and that different VLANs are given different numbers. (That said, VLAN 0 is usually synonymous with a packet that has no VLAN header, and VLAN 4095 is reserved.) Q: VLANs don't work. A: Many drivers in Linux kernels before version 3.3 had VLAN-related bugs. If you are having problems with VLANs that you suspect to be driver related, then you have several options: - Upgrade to Linux 3.3 or later. - Build and install a fixed version of the particular driver that is causing trouble, if one is available. - Use a NIC whose driver does not have VLAN problems. - Use "VLAN splinters", a feature in Open vSwitch 1.4 and later that works around bugs in kernel drivers. To enable VLAN splinters on interface eth0, use the command: ovs-vsctl set interface eth0 other-config:enable-vlan-splinters=true For VLAN splinters to be effective, Open vSwitch must know which VLANs are in use. See the "VLAN splinters" section in the Interface table in ovs-vswitchd.conf.db(5) for details on how Open vSwitch infers in-use VLANs. VLAN splinters increase memory use and reduce performance, so use them only if needed. - Apply the "vlan workaround" patch from the XenServer kernel patch queue, build Open vSwitch against this patched kernel, and then use ovs-vlan-bug-workaround(8) to enable the VLAN workaround for each interface whose driver is buggy. (This is a nontrivial exercise, so this option is included only for completeness.) It is not always easy to tell whether a Linux kernel driver has buggy VLAN support. The ovs-vlan-test(8) and ovs-test(8) utilities can help you test. See their manpages for details. Of the two utilities, ovs-test(8) is newer and more thorough, but ovs-vlan-test(8) may be easier to use. Q: VLANs still don't work. I've tested the driver so I know that it's OK. A: Do you have VLANs enabled on the physical switch that OVS is attached to? Make sure that the port is configured to trunk the VLAN or VLANs that you are using with OVS. Q: Outgoing VLAN-tagged traffic goes through OVS to my physical switch and to its destination host, but OVS seems to drop incoming return traffic. A: It's possible that you have the VLAN configured on your physical switch as the "native" VLAN. In this mode, the switch treats incoming packets either tagged with the native VLAN or untagged as part of the native VLAN. It may also send outgoing packets in the native VLAN without a VLAN tag. If this is the case, you have two choices: - Change the physical switch port configuration to tag packets it forwards to OVS with the native VLAN instead of forwarding them untagged. - Change the OVS configuration for the physical port to a native VLAN mode. For example, the following sets up a bridge with port eth0 in "native-tagged" mode in VLAN 9: ovs-vsctl add-br br0 ovs-vsctl add-port br0 eth0 tag=9 vlan_mode=native-tagged In this situation, "native-untagged" mode will probably work equally well. Refer to the documentation for the Port table in ovs-vswitchd.conf.db(5) for more information. Q: I added a pair of VMs on different VLANs, like this: ovs-vsctl add-br br0 ovs-vsctl add-port br0 eth0 ovs-vsctl add-port br0 tap0 tag=9 ovs-vsctl add-port br0 tap1 tag=10 but the VMs can't access each other, the external network, or the Internet. A: It is to be expected that the VMs can't access each other. VLANs are a means to partition a network. When you configured tap0 and tap1 as access ports for different VLANs, you indicated that they should be isolated from each other. As for the external network and the Internet, it seems likely that the machines you are trying to access are not on VLAN 9 (or 10) and that the Internet is not available on VLAN 9 (or 10). Q: I added a pair of VMs on the same VLAN, like this: ovs-vsctl add-br br0 ovs-vsctl add-port br0 eth0 ovs-vsctl add-port br0 tap0 tag=9 ovs-vsctl add-port br0 tap1 tag=9 The VMs can access each other, but not the external network or the Internet. A: It seems likely that the machines you are trying to access in the external network are not on VLAN 9 and that the Internet is not available on VLAN 9. Also, ensure VLAN 9 is set up as an allowed trunk VLAN on the upstream switch port to which eth0 is connected. Q: Can I configure an IP address on a VLAN? A: Yes. Use an "internal port" configured as an access port. For example, the following configures IP address 192.168.0.7 on VLAN 9. That is, OVS will forward packets from eth0 to 192.168.0.7 only if they have an 802.1Q header with VLAN 9. Conversely, traffic forwarded from 192.168.0.7 to eth0 will be tagged with an 802.1Q header with VLAN 9: ovs-vsctl add-br br0 ovs-vsctl add-port br0 eth0 ovs-vsctl add-port br0 vlan9 tag=9 -- set interface vlan9 type=internal ifconfig vlan9 192.168.0.7 Q: My OpenFlow controller doesn't see the VLANs that I expect. A: The configuration for VLANs in the Open vSwitch database (e.g. via ovs-vsctl) only affects traffic that goes through Open vSwitch's implementation of the OpenFlow "normal switching" action. By default, when Open vSwitch isn't connected to a controller and nothing has been manually configured in the flow table, all traffic goes through the "normal switching" action. But, if you set up OpenFlow flows on your own, through a controller or using ovs-ofctl or through other means, then you have to implement VLAN handling yourself. You can use "normal switching" as a component of your OpenFlow actions, e.g. by putting "normal" into the lists of actions on ovs-ofctl or by outputting to OFPP_NORMAL from an OpenFlow controller. In situations where this is not suitable, you can implement VLAN handling yourself, e.g.: - If a packet comes in on an access port, and the flow table needs to send it out on a trunk port, then the flow can add the appropriate VLAN tag with the "mod_vlan_vid" action. - If a packet comes in on a trunk port, and the flow table needs to send it out on an access port, then the flow can strip the VLAN tag with the "strip_vlan" action. Q: I configured ports on a bridge as access ports with different VLAN tags, like this: ovs-vsctl add-br br0 ovs-vsctl set-controller br0 tcp:192.168.0.10:6633 ovs-vsctl add-port br0 eth0 ovs-vsctl add-port br0 tap0 tag=9 ovs-vsctl add-port br0 tap1 tag=10 but the VMs running behind tap0 and tap1 can still communicate, that is, they are not isolated from each other even though they are on different VLANs. A: Do you have a controller configured on br0 (as the commands above do)? If so, then this is a variant on the previous question, "My OpenFlow controller doesn't see the VLANs that I expect," and you can refer to the answer there for more information. VXLANs ----- Q: What's a VXLAN? A: VXLAN stands for Virtual eXtensible Local Area Network, and is a means to solve the scaling challenges of VLAN networks in a multi-tenant environment. VXLAN is an overlay network which transports an L2 network over an existing L3 network. For more information on VXLAN, please see the IETF draft available here: http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-03 Q: How much of the VXLAN protocol does Open vSwitch currently support? A: Open vSwitch currently supports the framing format for packets on the wire. There is currently no support for the multicast aspects of VXLAN. To get around the lack of multicast support, it is possible to pre-provision MAC to IP address mappings either manually or from a controller. Q: What destination UDP port does the VXLAN implementation in Open vSwitch use? A: By default, Open vSwitch will use the assigned IANA port for VXLAN, which is 4789. However, it is possible to configure the destination UDP port manually on a per-VXLAN tunnel basis. An example of this configuration is provided below. ovs-vsctl add-br br0 ovs-vsctl add-port br0 vxlan1 -- set interface vxlan1 type=vxlan options:remote_ip=192.168.1.2 options:key=flow options:dst_port=8472 Using OpenFlow (Manually or Via Controller) ------------------------------------------- Q: What versions of OpenFlow does Open vSwitch support? A: Open vSwitch 1.9 and earlier support only OpenFlow 1.0 (plus extensions that bring in many of the features from later versions of OpenFlow). Open vSwitch 1.10 and later have experimental support for OpenFlow 1.2 and 1.3. On these versions of Open vSwitch, the following command enables OpenFlow 1.0, 1.2, and 1.3 on bridge br0: ovs-vsctl set bridge br0 protocols=OpenFlow10,OpenFlow12,OpenFlow13 Open vSwitch version 1.12 and later will have experimental support for OpenFlow 1.1, 1.2, and 1.3. On these versions of Open vSwitch, the following command enables OpenFlow 1.0, 1.1, 1.2, and 1.3 on bridge br0: ovs-vsctl set bridge br0 protocols=OpenFlow10,OpenFlow11,OpenFlow12,OpenFlow13 Use the -O option to enable support for later versions of OpenFlow in ovs-ofctl. For example: ovs-ofctl -O OpenFlow13 dump-flows br0 Support for OpenFlow 1.1, 1.2, and 1.3 is still incomplete. Work to be done is tracked in OPENFLOW-1.1+ in the Open vSwitch sources (also via http://openvswitch.org/development/openflow-1-x-plan/). When support for a given OpenFlow version is solidly implemented, Open vSwitch will enable that version by default. Q: I'm getting "error type 45250 code 0". What's that? A: This is a Open vSwitch extension to OpenFlow error codes. Open vSwitch uses this extension when it must report an error to an OpenFlow controller but no standard OpenFlow error code is suitable. Open vSwitch logs the errors that it sends to controllers, so the easiest thing to do is probably to look at the ovs-vswitchd log to find out what the error was. If you want to dissect the extended error message yourself, the format is documented in include/openflow/nicira-ext.h in the Open vSwitch source distribution. The extended error codes are documented in lib/ofp-errors.h. Q1: Some of the traffic that I'd expect my OpenFlow controller to see doesn't actually appear through the OpenFlow connection, even though I know that it's going through. Q2: Some of the OpenFlow flows that my controller sets up don't seem to apply to certain traffic, especially traffic between OVS and the controller itself. A: By default, Open vSwitch assumes that OpenFlow controllers are connected "in-band", that is, that the controllers are actually part of the network that is being controlled. In in-band mode, Open vSwitch sets up special "hidden" flows to make sure that traffic can make it back and forth between OVS and the controllers. These hidden flows are higher priority than any flows that can be set up through OpenFlow, and they are not visible through normal OpenFlow flow table dumps. Usually, the hidden flows are desirable and helpful, but occasionally they can cause unexpected behavior. You can view the full OpenFlow flow table, including hidden flows, on bridge br0 with the command: ovs-appctl bridge/dump-flows br0 to help you debug. The hidden flows are those with priorities greater than 65535 (the maximum priority that can be set with OpenFlow). The DESIGN file at the top level of the Open vSwitch source distribution describes the in-band model in detail. If your controllers are not actually in-band (e.g. they are on localhost via 127.0.0.1, or on a separate network), then you should configure your controllers in "out-of-band" mode. If you have one controller on bridge br0, then you can configure out-of-band mode on it with: ovs-vsctl set controller br0 connection-mode=out-of-band Q: I configured all my controllers for out-of-band control mode but "ovs-appctl bridge/dump-flows" still shows some hidden flows. A: You probably have a remote manager configured (e.g. with "ovs-vsctl set-manager"). By default, Open vSwitch assumes that managers need in-band rules set up on every bridge. You can disable these rules on bridge br0 with: ovs-vsctl set bridge br0 other-config:disable-in-band=true This actually disables in-band control entirely for the bridge, as if all the bridge's controllers were configured for out-of-band control. Q: My OpenFlow controller doesn't see the VLANs that I expect. A: See answer under "VLANs", above. Q: I ran "ovs-ofctl add-flow br0 nw_dst=192.168.0.1,actions=drop" but I got a funny message like this: ofp_util|INFO|normalization changed ofp_match, details: ofp_util|INFO| pre: nw_dst=192.168.0.1 ofp_util|INFO|post: and when I ran "ovs-ofctl dump-flows br0" I saw that my nw_dst match had disappeared, so that the flow ends up matching every packet. A: The term "normalization" in the log message means that a flow cannot match on an L3 field without saying what L3 protocol is in use. The "ovs-ofctl" command above didn't specify an L3 protocol, so the L3 field match was dropped. In this case, the L3 protocol could be IP or ARP. A correct command for each possibility is, respectively: ovs-ofctl add-flow br0 ip,nw_dst=192.168.0.1,actions=drop and ovs-ofctl add-flow br0 arp,nw_dst=192.168.0.1,actions=drop Similarly, a flow cannot match on an L4 field without saying what L4 protocol is in use. For example, the flow match "tp_src=1234" is, by itself, meaningless and will be ignored. Instead, to match TCP source port 1234, write "tcp,tp_src=1234", or to match UDP source port 1234, write "udp,tp_src=1234". Q: How can I figure out the OpenFlow port number for a given port? A: The OFPT_FEATURES_REQUEST message requests an OpenFlow switch to respond with an OFPT_FEATURES_REPLY that, among other information, includes a mapping between OpenFlow port names and numbers. From a command prompt, "ovs-ofctl show br0" makes such a request and prints the response for switch br0. The Interface table in the Open vSwitch database also maps OpenFlow port names to numbers. To print the OpenFlow port number associated with interface eth0, run: ovs-vsctl get Interface eth0 ofport You can print the entire mapping with: ovs-vsctl -- --columns=name,ofport list Interface but the output mixes together interfaces from all bridges in the database, so it may be confusing if more than one bridge exists. In the Open vSwitch database, ofport value -1 means that the interface could not be created due to an error. (The Open vSwitch log should indicate the reason.) ofport value [] (the empty set) means that the interface hasn't been created yet. The latter is normally an intermittent condition (unless ovs-vswitchd is not running). Q: I added some flows with my controller or with ovs-ofctl, but when I run "ovs-dpctl dump-flows" I don't see them. A: ovs-dpctl queries a kernel datapath, not an OpenFlow switch. It won't display the information that you want. You want to use "ovs-ofctl dump-flows" instead. Q: It looks like each of the interfaces in my bonded port shows up as an individual OpenFlow port. Is that right? A: Yes, Open vSwitch makes individual bond interfaces visible as OpenFlow ports, rather than the bond as a whole. The interfaces are treated together as a bond for only a few purposes: - Sending a packet to the OFPP_NORMAL port. (When an OpenFlow controller is not configured, this happens implicitly to every packet.) - Mirrors configured for output to a bonded port. It would make a lot of sense for Open vSwitch to present a bond as a single OpenFlow port. If you want to contribute an implementation of such a feature, please bring it up on the Open vSwitch development mailing list at dev@openvswitch.org. Q: I have a sophisticated network setup involving Open vSwitch, VMs or multiple hosts, and other components. The behavior isn't what I expect. Help! A: To debug network behavior problems, trace the path of a packet, hop-by-hop, from its origin in one host to a remote host. If that's correct, then trace the path of the response packet back to the origin. Usually a simple ICMP echo request and reply ("ping") packet is good enough. Start by initiating an ongoing "ping" from the origin host to a remote host. If you are tracking down a connectivity problem, the "ping" will not display any successful output, but packets are still being sent. (In this case the packets being sent are likely ARP rather than ICMP.) Tools available for tracing include the following: - "tcpdump" and "wireshark" for observing hops across network devices, such as Open vSwitch internal devices and physical wires. - "ovs-appctl dpif/dump-flows
" in Open vSwitch 1.10 and later or "ovs-dpctl dump-flows
" in earlier versions. These tools allow one to observe the actions being taken on packets in ongoing flows. See ovs-vswitchd(8) for "ovs-appctl dpif/dump-flows" documentation, ovs-dpctl(8) for "ovs-dpctl dump-flows" documentation, and "Why are there so many different ways to dump flows?" above for some background. - "ovs-appctl ofproto/trace" to observe the logic behind how ovs-vswitchd treats packets. See ovs-vswitchd(8) for documentation. You can out more details about a given flow that "ovs-dpctl dump-flows" displays, by cutting and pasting a flow from the output into an "ovs-appctl ofproto/trace" command. - SPAN, RSPAN, and ERSPAN features of physical switches, to observe what goes on at these physical hops. Starting at the origin of a given packet, observe the packet at each hop in turn. For example, in one plausible scenario, you might: 1. "tcpdump" the "eth" interface through which an ARP egresses a VM, from inside the VM. 2. "tcpdump" the "vif" or "tap" interface through which the ARP ingresses the host machine. 3. Use "ovs-dpctl dump-flows" to spot the ARP flow and observe the host interface through which the ARP egresses the physical machine. You may need to use "ovs-dpctl show" to interpret the port numbers. If the output seems surprising, you can use "ovs-appctl ofproto/trace" to observe details of how ovs-vswitchd determined the actions in the "ovs-dpctl dump-flows" output. 4. "tcpdump" the "eth" interface through which the ARP egresses the physical machine. 5. "tcpdump" the "eth" interface through which the ARP ingresses the physical machine, at the remote host that receives the ARP. 6. Use "ovs-dpctl dump-flows" to spot the ARP flow on the remote host that receives the ARP and observe the VM "vif" or "tap" interface to which the flow is directed. Again, "ovs-dpctl show" and "ovs-appctl ofproto/trace" might help. 7. "tcpdump" the "vif" or "tap" interface to which the ARP is directed. 8. "tcpdump" the "eth" interface through which the ARP ingresses a VM, from inside the VM. It is likely that during one of these steps you will figure out the problem. If not, then follow the ARP reply back to the origin, in reverse. Q: How do I make a flow drop packets? A: An empty set of actions causes a packet to be dropped. You can specify an empty set of actions with "actions=" on the ovs-ofctl command line. For example: ovs-ofctl add-flow br0 priority=65535,actions= would cause every packet entering switch br0 to be dropped. You can write "drop" explicitly if you like. The effect is the same. Thus, the following command also causes every packet entering switch br0 to be dropped: ovs-ofctl add-flow br0 priority=65535,actions=drop Contact ------- bugs@openvswitch.org http://openvswitch.org/ openvswitch-2.0.1+git20140120/INSTALL000066400000000000000000000427771226605124000165240ustar00rootroot00000000000000 How to Install Open vSwitch on Linux, FreeBSD and NetBSD ======================================================== This document describes how to build and install Open vSwitch on a generic Linux, FreeBSD, or NetBSD host. For specifics around installation on a specific platform, please see one of these files: - INSTALL.Debian - INSTALL.Fedora - INSTALL.RHEL - INSTALL.XenServer Build Requirements ------------------ To compile the userspace programs in the Open vSwitch distribution, you will need the following software: - GNU make. - A C compiler, such as: * GCC 4.x. * Clang. Clang 3.4 and later provide useful static semantic analysis and thread-safety checks. For Ubuntu, there are nightly built packages available on clang's website. - libssl, from OpenSSL, is optional but recommended if you plan to connect the Open vSwitch to an OpenFlow controller. libssl is required to establish confidentiality and authenticity in the connections from an Open vSwitch to an OpenFlow controller. If libssl is installed, then Open vSwitch will automatically build with support for it. To compile the kernel module on Linux, you must also install the following. If you cannot build or install the kernel module, you may use the userspace-only implementation, at a cost in performance. The userspace implementation may also lack some features. Refer to INSTALL.userspace for more information. - A supported Linux kernel version. Please refer to README for a list of supported versions. The Open vSwitch datapath requires bridging support (CONFIG_BRIDGE) to be built as a kernel module. (This is common in kernels provided by Linux distributions.) The bridge module must not be loaded or in use. If the bridge module is running (check with "lsmod | grep bridge"), you must remove it ("rmmod bridge") before starting the datapath. For optional support of ingress policing, you must enable kernel configuration options NET_CLS_BASIC, NET_SCH_INGRESS, and NET_ACT_POLICE, either built-in or as modules. (NET_CLS_POLICE is obsolete and not needed.) To use GRE tunneling on Linux 2.6.37 or newer, kernel support for GRE must be compiled in or available as a module (CONFIG_NET_IPGRE_DEMUX). To configure HTB or HFSC quality of service with Open vSwitch, you must enable the respective configuration options. To use Open vSwitch support for TAP devices, you must enable CONFIG_TUN. - To build a kernel module, you need the same version of GCC that was used to build that kernel. - A kernel build directory corresponding to the Linux kernel image the module is to run on. Under Debian and Ubuntu, for example, each linux-image package containing a kernel binary has a corresponding linux-headers package with the required build infrastructure. If you are working from a Git tree or snapshot (instead of from a distribution tarball), or if you modify the Open vSwitch build system or the database schema, you will also need the following software: - Autoconf version 2.64 or later. - Automake version 1.10 or later. - Python 2.x, for x >= 4. If you modify the ovsdbmonitor tool, then you will also need the following: - pyuic4 from PyQt4 (http://www.riverbankcomputing.co.uk). To run the unit tests, you also need: - Perl. Version 5.10.1 is known to work. Earlier versions should also work. If you modify the vswitchd database schema, then the E-R diagram in the ovs-vswitchd.conf.db(5) manpage will be updated properly only if you have the following: - "dot" from graphviz (http://www.graphviz.org/). - Perl. Version 5.10.1 is known to work. Earlier versions should also work. - Python 2.x, for x >= 4. If you are going to extensively modify Open vSwitch, please consider installing the following to obtain better warnings: - "sparse" version 0.4.4 or later (http://www.kernel.org/pub/software/devel/sparse/dist/). - GNU make. - clang, version 3.4 or later Also, you may find the ovs-dev script found in utilities/ovs-dev.py useful. Installation Requirements ------------------------- The machine on which Open vSwitch is to be installed must have the following software: - libc compatible with the libc used for build. - libssl compatible with the libssl used for build, if OpenSSL was used for the build. - On Linux, the same kernel version configured as part of the build. - For optional support of ingress policing on Linux, the "tc" program from iproute2 (part of all major distributions and available at http://www.linux-foundation.org/en/Net:Iproute2). On Linux you should ensure that /dev/urandom exists. To support TAP devices, you must also ensure that /dev/net/tun exists. To run the ovsdbmonitor tool, the machine must also have the following software: - Python 2.x, for x >= 4. - Python Twisted Conch. - Python JSON. - PySide or PyQt4. - Python Zope interface module. (On Debian "lenny" the above can be installed with "apt-get install python-json python-qt4 python-zopeinterface python-twisted-conch".) Building and Installing Open vSwitch for Linux, FreeBSD or NetBSD ================================================================= Once you have installed all the prerequisites listed above in the Base Prerequisites section, follow the procedure below to build. 1. If you pulled the sources directly from an Open vSwitch Git tree, run boot.sh in the top source directory: % ./boot.sh 2. In the top source directory, configure the package by running the configure script. You can usually invoke configure without any arguments: % ./configure By default all files are installed under /usr/local. If you want to install into, e.g., /usr and /var instead of /usr/local and /usr/local/var, add options as shown here: % ./configure --prefix=/usr --localstatedir=/var To use a specific C compiler for compiling Open vSwitch user programs, also specify it on the configure command line, like so: % ./configure CC=gcc-4.2 To use 'clang' compiler: % ./configure CC=clang To build the Linux kernel module, so that you can run the kernel-based switch, pass the location of the kernel build directory on --with-linux. For example, to build for a running instance of Linux: % ./configure --with-linux=/lib/modules/`uname -r`/build If you wish to build the kernel module for an architecture other than the architecture of the machine used for the build, you may specify the kernel architecture string using the KARCH variable when invoking the configure script. For example, to build for MIPS with Linux: % ./configure --with-linux=/path/to/linux KARCH=mips The configure script accepts a number of other options and honors additional environment variables. For a full list, invoke configure with the --help option. 3. Run GNU make in the top source directory, e.g.: % make or if GNU make is installed as "gmake": % gmake For improved warnings if you installed "sparse" (see "Prerequisites"), add C=1 to the command line. 4. Consider running the testsuite. Refer to "Running the Testsuite" below, for instructions. 5. Become root by running "su" or another program. 6. Run "make install" to install the executables and manpages into the running system, by default under /usr/local. 7. If you built kernel modules, you may install and load them, e.g.: % make modules_install % /sbin/modprobe openvswitch To verify that the modules have been loaded, run "/sbin/lsmod" and check that openvswitch is listed. If the "modprobe" operation fails, look at the last few kernel log messages (e.g. with "dmesg | tail"): - The message "openvswitch: exports duplicate symbol br_should_route_hook (owned by bridge)" means that the bridge module is loaded. Run "/sbin/rmmod bridge" to remove it. If "/sbin/rmmod bridge" fails with "ERROR: Module bridge does not exist in /proc/modules", then the bridge is compiled into the kernel, rather than as a module. Open vSwitch does not support this configuration (see "Build Requirements", above). - The message "openvswitch: exports duplicate symbol dp_ioctl_hook (owned by ofdatapath)" means that the ofdatapath module from the OpenFlow reference implementation is loaded. Run "/sbin/rmmod ofdatapath" to remove it. (You might have to delete any existing datapaths beforehand, using the "dpctl" program included with the OpenFlow reference implementation. "ovs-dpctl" will not work.) - Otherwise, the most likely problem is that Open vSwitch was built for a kernel different from the one into which you are trying to load it. Run "modinfo" on openvswitch.ko and on a module built for the running kernel, e.g.: % /sbin/modinfo openvswitch.ko % /sbin/modinfo /lib/modules/`uname -r`/kernel/net/bridge/bridge.ko Compare the "vermagic" lines output by the two commands. If they differ, then Open vSwitch was built for the wrong kernel. - If you decide to report a bug or ask a question related to module loading, please include the output from the "dmesg" and "modinfo" commands mentioned above. There is an optional module parameter to openvswitch.ko called vlan_tso that enables TCP segmentation offload over VLANs on NICs that support it. Many drivers do not expose support for TSO on VLANs in a way that Open vSwitch can use but there is no way to detect whether this is the case. If you know that your particular driver can handle it (for example by testing sending large TCP packets over VLANs) then passing in a value of 1 may improve performance. Modules built for Linux kernels 2.6.37 and later, as well as specially patched versions of earlier kernels, do not need this and do not have this parameter. If you do not understand what this means or do not know if your driver will work, do not set this. 8. Initialize the configuration database using ovsdb-tool, e.g.: % mkdir -p /usr/local/etc/openvswitch % ovsdb-tool create /usr/local/etc/openvswitch/conf.db vswitchd/vswitch.ovsschema Startup ======= Before starting ovs-vswitchd itself, you need to start its configuration database, ovsdb-server. Each machine on which Open vSwitch is installed should run its own copy of ovsdb-server. Configure it to use the database you created during step 7 of installation, above, to listen on a Unix domain socket, to connect to any managers specified in the database itself, and to use the SSL configuration in the database: % ovsdb-server --remote=punix:/usr/local/var/run/openvswitch/db.sock \ --remote=db:Open_vSwitch,Open_vSwitch,manager_options \ --private-key=db:Open_vSwitch,SSL,private_key \ --certificate=db:Open_vSwitch,SSL,certificate \ --bootstrap-ca-cert=db:Open_vSwitch,SSL,ca_cert \ --pidfile --detach (If you built Open vSwitch without SSL support, then omit --private-key, --certificate, and --bootstrap-ca-cert.) Then initialize the database using ovs-vsctl. This is only necessary the first time after you create the database with ovsdb-tool (but running it at any time is harmless): % ovs-vsctl --no-wait init Then start the main Open vSwitch daemon, telling it to connect to the same Unix domain socket: % ovs-vswitchd --pidfile --detach Now you may use ovs-vsctl to set up bridges and other Open vSwitch features. For example, to create a bridge named br0 and add ports eth0 and vif1.0 to it: % ovs-vsctl add-br br0 % ovs-vsctl add-port br0 eth0 % ovs-vsctl add-port br0 vif1.0 Please refer to ovs-vsctl(8) for more details. Upgrading ========= When you upgrade Open vSwitch from one version to another, you should also upgrade the database schema: 1. Stop the Open vSwitch daemons, e.g.: % kill `cd /usr/local/var/run/openvswitch && cat ovsdb-server.pid ovs-vswitchd.pid` 2. Install the new Open vSwitch release. 3. Upgrade the database, in one of the following two ways: - If there is no important data in your database, then you may delete the database file and recreate it with ovsdb-tool, following the instructions under "Building and Installing Open vSwitch for Linux, FreeBSD or NetBSD". - If you want to preserve the contents of your database, back it up first, then use "ovsdb-tool convert" to upgrade it, e.g.: % ovsdb-tool convert /usr/local/etc/openvswitch/conf.db vswitchd/vswitch.ovsschema 4. Start the Open vSwitch daemons as described under "Building and Installing Open vSwitch for Linux, FreeBSD or NetBSD" above. Hot Upgrading ============= Upgrading Open vSwitch from one version to the next version with minimum disruption of traffic going through the system that is using that Open vSwitch needs some considerations: 1. If the upgrade only involves upgrading the userspace utilities and daemons of Open vSwitch, make sure that the new userspace version is compatible with the previously loaded kernel module. 2. An upgrade of userspace daemons means that they have to be restarted. Restarting the daemons means that the Openflow flows in the ovs-vswitchd daemon will be lost. One way to restore the flows is to let the controller re-populate it. Another way is to save the previous flows using a utility like ovs-ofctl and then re-add them after the restart. Restoring the old flows is accurate only if the new Open vSwitch interfaces retain the old 'ofport' values. 3. When the new userspace daemons get restarted, they automatically flush the old flows setup in the kernel. This can be expensive if there are hundreds of new flows that are entering the kernel but userspace daemons are busy setting up new userspace flows from either the controller or an utility like ovs-ofctl. Open vSwitch database provides an option to solve this problem through the other_config:flow-restore-wait column of the Open_vSwitch table. Refer to the ovs-vswitchd.conf.db(5) manpage for details. 4. If the upgrade also involves upgrading the kernel module, the old kernel module needs to be unloaded and the new kernel module should be loaded. This means that the kernel network devices belonging to Open vSwitch is recreated and the kernel flows are lost. The downtime of the traffic can be reduced if the userspace daemons are restarted immediately and the userspace flows are restored as soon as possible. The ovs-ctl utility's "restart" function only restarts the userspace daemons, makes sure that the 'ofport' values remain consistent across restarts, restores userspace flows using the ovs-ofctl utility and also uses the other_config:flow-restore-wait column to keep the traffic downtime to the minimum. The ovs-ctl utility's "force-reload-kmod" function does all of the above, but also replaces the old kernel module with the new one. Open vSwitch startup scripts for Debian, XenServer and RHEL use ovs-ctl's functions and it is recommended that these functions be used for other software platforms too. Running the Testsuite ===================== Open vSwitch includes a testsuite. Before you submit patches upstream, we advise that you run the tests and ensure that they pass. If you add new features to Open vSwitch, then adding tests for those features will ensure your features don't break as developers modify other areas of Open vSwitch. You must configure and build Open vSwitch (steps 1 through 3 in "Building and Installing Open vSwitch for Linux, FreeBSD or NetBSD" above) before you run the testsuite. You do not need to install Open vSwitch or to build or load the kernel module to run the testsuite. You do not need supervisor privilege to run the testsuite. To run all the unit tests in Open vSwitch, one at a time: make check This takes under 5 minutes on a modern desktop system. To run all the unit tests in Open vSwitch, up to 8 in parallel: make check TESTSUITEFLAGS=-j8 This takes under a minute on a modern 4-core desktop system. To see a list of all the available tests, run: make check TESTSUITEFLAGS=--list To run only a subset of tests, e.g. test 123 and tests 477 through 484: make check TESTSUITEFLAGS='123 477-484' (Tests do not have inter-dependencies, so you may run any subset.) To run tests matching a keyword, e.g. "ovsdb": make check TESTSUITEFLAGS='-k ovsdb' To see a complete list of test options: make check TESTSUITEFLAGS=--help The results of a testing run are reported in tests/testsuite.log. Please report test failures as bugs and include the testsuite.log in your report. If you have "valgrind" installed, then you can also run the testsuite under valgrind by using "make check-valgrind" in place of "make check". All the same options are available via TESTSUITEFLAGS. When you do this, the "valgrind" results for test are reported in files named tests/testsuite.dir//valgrind.*. You may find that the valgrind results are easier to interpret if you put "-q" in ~/.valgrindrc, since that reduces the amount of output. Sometimes a few tests may fail on some runs but not others. This is usually a bug in the testsuite, not a bug in Open vSwitch itself. If you find that a test fails intermittently, please report it, since the developers may not have noticed. Bug Reporting ------------- Please report problems to bugs@openvswitch.org. openvswitch-2.0.1+git20140120/INSTALL.Debian000066400000000000000000000107201226605124000176640ustar00rootroot00000000000000 How to Build Debian Packages for Open vSwitch ============================================= This document describes how to build Debian packages for Open vSwitch. To install Open vSwitch on Debian without building Debian packages, see INSTALL instead. These instructions should also work on Ubuntu and other Debian derivative distributions. Before You Begin ---------------- Before you begin, consider whether you really need to build packages yourself. Debian "wheezy" and "sid", as well as recent versions of Ubuntu, contain pre-built Debian packages for Open vSwitch. It is easier to install these than to build your own. To use packages from your distribution, skip ahead to "Installing .deb Packages", below. Building Open vSwitch Debian packages ------------------------------------- You may build from an Open vSwitch distribution tarball or from an Open vSwitch Git tree with these instructions. You do not need to be the superuser to build the Debian packages. 1. Install the "build-essential" and "fakeroot" packages, e.g. with "apt-get install build-essential fakeroot". 2. Obtain and unpack an Open vSwitch source distribution and "cd" into its top level directory. 3. Install the build dependencies listed under "Build-Depends:" near the top of debian/rules. You can install these any way you like, e.g. with "apt-get install". Check your work by running "dpkg-checkbuilddeps". If you've installed all the dependencies properly, dpkg-checkbuilddeps will exit without printing anything. If you forgot to install some dependencies, it will tell you which ones. 4. Run: fakeroot debian/rules binary This will do a serial build that runs the unit tests. If you prefer, you can run a faster parallel build, e.g.: DEB_BUILD_OPTIONS='parallel=8' fakeroot debian/rules binary If you are in a big hurry, you can even skip the unit tests: DEB_BUILD_OPTIONS='parallel=8 nocheck' fakeroot debian/rules binary 5. The generated .deb files will be in the parent directory of the Open vSwitch source distribution. Installing .deb Packages ------------------------ These instructions apply to installing from Debian packages that you built yourself, as described in the previous section, or from packages provided by Debian or a Debian derivative distribution such as Ubuntu. In the former case, use a command such as "dpkg -i" to install the .deb files that you build, and in the latter case use a program such as "apt-get" or "aptitude" to download and install the provided packages. You must be superuser to install Debian packages. 1. Start by installing an Open vSwitch kernel module. There are multiple ways to do this. In order of increasing manual effort, these are: * Use a Linux kernel 3.3 or later, which has an integrated Open vSwitch kernel module. The upstream Linux kernel module lacks a few features that are in the third-party module. For details, please see the FAQ, "What features are not available in the Open vSwitch kernel datapath that ships as part of the upstream Linux kernel?". * Install the "openvswitch-datapath-dkms" Debian package that you built earlier. This should automatically build and install the Open vSwitch kernel module for your running kernel. This option requires that you have a compiler and toolchain installed on the machine where you run Open vSwitch, which may be unacceptable in some production server environments. * Install the "openvswitch-datapath-source" Debian package, use "module-assistant" to build a Debian package of the Open vSwitch kernel module for your kernel, and then install that Debian package. You can install the kernel module Debian packages that you build this way on the same machine where you built it or on another machine or machines, which means that you don't necessarily have to have any build infrastructure on the machines where you use the kernel module. /usr/share/doc/openvswitch-datapath-source/README.Debian has details on the build process. * Build and install the kernel module by hand. 2. Install the "openvswitch-switch" and "openvswitch-common" packages. These packages include the core userspace components of the switch. Open vSwitch .deb packages not mentioned above are rarely useful. Please refer to their individual package descriptions to find out whether any of them are useful to you. Bug Reporting ------------- Please report problems to bugs@openvswitch.org. openvswitch-2.0.1+git20140120/INSTALL.Fedora000066400000000000000000000032331226605124000177030ustar00rootroot00000000000000 How to Install Open vSwitch on Fedora Linux =========================================== This document describes how to build and install Open vSwitch on a Fedora Linux host. If you want to install Open vSwitch on a generic Linux host, see INSTALL.Linux instead. We have tested these instructions with Fedora 16 and Fedora 17. Building Open vSwitch for Fedora -------------------------------- You may build from an Open vSwitch distribution tarball or from an Open vSwitch Git tree. Before you begin, note the RPM source directory on your version of Fedora. On Fedora 17, it is $HOME/rpmbuild/SOURCES. 1. If you are building from an Open vSwitch Git tree, then you will need to first create a distribution tarball by running "./boot.sh; ./configure; make dist" in the Git tree. 2. Copy the distribution tarball into the RPM source directory. 3. Unpack the distribution tarball into a temporary directory and "cd" into the root of the distribution tarball. 4. To build Open vSwitch userspace, run: rpmbuild -bb rhel/openvswitch-fedora.spec This produces one RPM: "openvswitch". 5. On Fedora 17, to build the Open vSwitch kernel module, run: rpmbuild -bb rhel/openvswitch-kmod-fedora.spec You might have to specify a kernel version and/or variants, e.g.: rpmbuild -bb \ -D "kversion 2.6.32-131.6.1.el6.x86_64" \ -D "kflavors default debug kdump" \ rhel/openvswitch-kmod-rhel6.spec This produces an "kmod-openvswitch" RPM for each kernel variant, in this example: "kmod-openvswitch", "kmod-openvswitch-debug", and "kmod-openvswitch-kdump". Reporting Bugs -------------- Please report problems to bugs@openvswitch.org. openvswitch-2.0.1+git20140120/INSTALL.KVM000066400000000000000000000050301226605124000171350ustar00rootroot00000000000000 How to Use Open vSwitch with KVM ================================= This document describes how to use Open vSwitch with the Kernel-based Virtual Machine (KVM). This document assumes that you have read and followed INSTALL to get Open vSwitch setup on your Linux system. Setup ----- First, follow the setup instructions in INSTALL to get a working Open vSwitch installation. KVM uses tunctl to handle various bridging modes, which you can install with the Debian/Ubuntu package uml-utilities. % apt-get install uml-utilities Next, you will need to modify or create custom versions of the qemu-ifup and qemu-ifdown scripts. In this guide, we'll create custom versions that make use of example open vSwitch bridges that we'll describe in this guide. Create the following two files and store them in known locations. For example /etc/ovs-ifup and /etc/ovs-ifdown /etc/ovs-ifup -------------------------------------------------------------------- #!/bin/sh switch='br0' /sbin/ifconfig $1 0.0.0.0 up ovs-vsctl add-port ${switch} $1 -------------------------------------------------------------------- /etc/ovs-ifdown -------------------------------------------------------------------- #!/bin/sh switch='br0' /sbin/ifconfig $1 0.0.0.0 down ovs-vsctl del-port ${switch} $1 -------------------------------------------------------------------- At the end of INSTALL, it describes basic usage of creating bridges and ports. If you haven't already, create a bridge named br0 with the following command: % ovs-vsctl add-br br0 Then, add a port to the bridge for the NIC that you want your guests to communicate over (e.g. eth0): % ovs-vsctl add-port br0 eth0 Please refer to ovs-vsctl(8) for more details. Next, we'll start a guest that will use our ifup and ifdown scripts. % kvm -m 512 -net nic,macaddr=00:11:22:EE:EE:EE -net \ tap,script=/etc/ovs-ifup,downscript=/etc/ovs-ifdown -drive \ file=/path/to/disk-image,boot=on This will start the guest and associate a tap device with it. The ovs-ifup script will add a port on the br0 bridge so that the guest will be able to communicate over that bridge. To get some more information and for debugging you can use Open vSwitch utilities such as ovs-dpctl and ovs-ofctl, For example: % ovs-dpctl show % ovs-ofctl show br0 You should see tap devices for each KVM guest added as ports to the bridge (e.g. tap0) Please refer to ovs-dpctl(8) and ovs-ofctl(8) for more details. Bug Reporting ------------- Please report problems to bugs@openvswitch.org. openvswitch-2.0.1+git20140120/INSTALL.Libvirt000066400000000000000000000043411226605124000201170ustar00rootroot00000000000000 How to Use Open vSwitch with Libvirt ==================================== This document describes how to use Open vSwitch with Libvirt 0.9.11 or later. This document assumes that you followed INSTALL or installed Open vSwitch from distribution packaging such as a .deb or .rpm. The Open vSwitch support is included by default in Libvirt 0.9.11. Consult www.libvirt.org for instructions on how to build the latest Libvirt, if your Linux distribution by default comes with an older Libvirt release. Limitations ----------- Currently there is no Open vSwitch support for networks that are managed by libvirt (e.g. NAT). As of now, only bridged networks are supported (those where the user has to manually create the bridge). Setup ----- First, create the Open vSwitch bridge by using the ovs-vsctl utility (this must be done with administrative privileges): % ovs-vsctl add-br ovsbr Once that is done, create a VM, if necessary, and edit its Domain XML file: % virsh edit Lookup in the Domain XML file the section. There should be one such XML section for each interface the VM has. ...
... And change it to something like this: ...
... The interface type must be set to "bridge". The XML element specifies to which bridge this interface will be attached to. The element indicates that the bridge in element is an Open vSwitch bridge. Then (re)start the VM and verify if the guest's vnet interface is attached to the ovsbr bridge. % ovs-vsctl show Troubleshooting --------------- If the VM does not want to start, then try to run the libvirtd process either from the terminal, so that all errors are printed in console, or inspect Libvirt/Open vSwitch log files for possible root cause. Bug Reporting ------------- Please report problems to bugs@openvswitch.org. openvswitch-2.0.1+git20140120/INSTALL.RHEL000066400000000000000000000075361226605124000172470ustar00rootroot00000000000000 How to Install Open vSwitch on Red Hat Enterprise Linux ======================================================= This document describes how to build and install Open vSwitch on a Red Hat Enterprise Linux (RHEL) host. If you want to install Open vSwitch on a generic Linux host, see INSTALL instead. We have tested these instructions with RHEL 5.6 and RHEL 6.0. Building Open vSwitch for RHEL ------------------------------ You may build from an Open vSwitch distribution tarball or from an Open vSwitch Git tree. Before you begin, note the RPM source directory on your version of RHEL. On RHEL 5, the default RPM source directory is /usr/src/redhat/SOURCES. On RHEL 6, it is $HOME/rpmbuild/SOURCES. 1. Install build prerequisites: yum install gcc make python-devel openssl-devel kernel-devel \ kernel-debug-devel 2. Some versions of the RHEL 6 kernel-devel package contain a broken "build" symlink. If you are using such a version, you must fix the problem before continuing. To find out whether you are affected, run: cd /lib/modules/ ls -l build/ where is the version number of the RHEL 6 kernel. (The trailing slash in the final command is important. Be sure to include it.) If the "ls" command produces a directory listing, your kernel-devel package is OK. If it produces a "No such file or directory" error, your kernel-devel package is buggy. If your kernel-devel package is buggy, then you can fix it with: cd /lib/modules/ rm build ln -s /usr/src/kernels/ build where is the name of an existing directory under /usr/src/kernels, whose name should be similar to but may contain some extra parts. Once you have done this, verify the fix with the same procedure you used above to check for the problem. 3. If you are building from an Open vSwitch Git tree, then you will need to first create a distribution tarball by running "./boot.sh; ./configure; make dist" in the Git tree. 4. Copy the distribution tarball into the RPM source directory. 5. Unpack the distribution tarball into a temporary directory and "cd" into the root of the distribution tarball. 6. To build Open vSwitch userspace, run: rpmbuild -bb rhel/openvswitch.spec This produces two RPMs: "openvswitch" and "openvswitch-debuginfo". If the build fails with "configure: error: source dir /lib/modules/2.6.32-279.el6.x86_64/build doesn't exist" or similar, then the kernel-devel package is missing or buggy. Go back to step 1 or 2 and fix the problem. 7. On RHEL 6, to build the Open vSwitch kernel module, copy rhel/openvswitch-kmod.files into the RPM source directory and run: rpmbuild -bb rhel/openvswitch-kmod-rhel6.spec You might have to specify a kernel version and/or variants, e.g.: rpmbuild -bb \ -D "kversion 2.6.32-131.6.1.el6.x86_64" \ -D "kflavors default debug kdump" \ rhel/openvswitch-kmod-rhel6.spec This produces an "kmod-openvswitch" RPM for each kernel variant, in this example: "kmod-openvswitch", "kmod-openvswitch-debug", and "kmod-openvswitch-kdump". A RHEL host has default firewall rules that prevent any Open vSwitch tunnel traffic from passing through. If a user configures Open vSwitch tunnels like GRE, VXLAN, LISP etc., they will either have to manually add iptables firewall rules to allow the tunnel traffic or add it through a startup script (Please refer to the "enable-protocol" command in the ovs-ctl(8) manpage). Red Hat Network Scripts Integration ----------------------------------- Simple integration with Red Hat network scripts has been implemented. Please read rhel/README.RHEL in the source tree or /usr/share/doc/openvswitch/README.RHEL in the installed openvswitch package for details. Reporting Bugs -------------- Please report problems to bugs@openvswitch.org. openvswitch-2.0.1+git20140120/INSTALL.SSL000066400000000000000000000310051226605124000171420ustar00rootroot00000000000000 Configuring Open vSwitch for SSL ================================ If you plan to configure Open vSwitch to connect across the network to an OpenFlow controller, then we recommend that you build Open vSwitch with OpenSSL. SSL support ensures integrity and confidentiality of the OpenFlow connections, increasing network security. This file explains how to configure an Open vSwitch to connect to an OpenFlow controller over SSL. Refer to INSTALL for instructions on building Open vSwitch with SSL support. Open vSwitch uses TLS version 1.0 or later (TLSv1), as specified by RFC 2246, which is very similar to SSL version 3.0. TLSv1 was released in January 1999, so all current software and hardware should implement it. This document assumes basic familiarity with public-key cryptography and public-key infrastructure. SSL Concepts for OpenFlow ------------------------- This section is an introduction to the public-key infrastructure architectures that Open vSwitch supports for SSL authentication. To connect over SSL, every Open vSwitch must have a unique private/public key pair and a certificate that signs that public key. Typically, the Open vSwitch generates its own public/private key pair. There are two common ways to obtain a certificate for a switch: * Self-signed certificates: The Open vSwitch signs its certificate with its own private key. In this case, each switch must be individually approved by the OpenFlow controller(s), since there is no central authority. This is the only switch PKI model currently supported by NOX (http://noxrepo.org). * Switch certificate authority: A certificate authority (the "switch CA") signs each Open vSwitch's public key. The OpenFlow controllers then check that any connecting switches' certificates are signed by that certificate authority. This is the only switch PKI model supported by the simple OpenFlow controller included with Open vSwitch. Each Open vSwitch must also have a copy of the CA certificate for the certificate authority that signs OpenFlow controllers' keys (the "controller CA" certificate). Typically, the same controller CA certificate is installed on all of the switches within a given administrative unit. There are two common ways for a switch to obtain the controller CA certificate: * Manually copy the certificate to the switch through some secure means, e.g. using a USB flash drive, or over the network with "scp", or even FTP or HTTP followed by manual verification. * Open vSwitch "bootstrap" mode, in which Open vSwitch accepts and saves the controller CA certificate that it obtains from the OpenFlow controller on its first connection. Thereafter the switch will only connect to controllers signed by the same CA certificate. Establishing a Public Key Infrastructure ---------------------------------------- Open vSwitch can make use of your existing public key infrastructure. If you already have a PKI, you may skip forward to the next section. Otherwise, if you do not have a PKI, the ovs-pki script included with Open vSwitch can help. To create an initial PKI structure, invoke it as: % ovs-pki init to create and populate a new PKI directory. The default location for the PKI directory depends on how the Open vSwitch tree was configured (to see the configured default, look for the --dir option description in the output of "ovs-pki --help"). The pki directory contains two important subdirectories. The controllerca subdirectory contains controller CA files, including the following: - cacert.pem: Root certificate for the controller certificate authority. Each Open vSwitch must have a copy of this file to allow it to authenticate valid controllers. - private/cakey.pem: Private signing key for the controller certificate authority. This file must be kept secret. There is no need for switches or controllers to have a copy of it. The switchca subdirectory contains switch CA files, analogous to those in the controllerca subdirectory: - cacert.pem: Root certificate for the switch certificate authority. The OpenFlow controller must have this file to enable it to authenticate valid switches. - private/cakey.pem: Private signing key for the switch certificate authority. This file must be kept secret. There is no need for switches or controllers to have a copy of it. After you create the initial structure, you can create keys and certificates for switches and controllers with ovs-pki. Refer to the ovs-pki(8) manage for complete details. A few examples of its use follow: CONTROLLER KEY GENERATION To create a controller private key and certificate in files named ctl-privkey.pem and ctl-cert.pem, run the following on the machine that contains the PKI structure: % ovs-pki req+sign ctl controller ctl-privkey.pem and ctl-cert.pem would need to be copied to the controller for its use at runtime. If you were to use ovs-controller, the simple OpenFlow controller included with Open vSwitch, then the --private-key and --certificate options, respectively, would point to these files. It is very important to make sure that no stray copies of ctl-privkey.pem are created, because they could be used to impersonate the controller. SWITCH KEY GENERATION WITH SELF-SIGNED CERTIFICATES If you are using self-signed certificates (see "SSL Concepts for OpenFlow"), this is one way to create an acceptable certificate for your controller to approve. 1. Run the following command on the Open vSwitch itself: % ovs-pki self-sign sc (This command does not require a copy of any of the PKI files generated by "ovs-pki init", and you should not copy them to the switch because some of them have contents that must remain secret for security.) The "ovs-pki self-sign" command has the following output: * sc-privkey.pem, the switch private key file. For security, the contents of this file must remain secret. There is ordinarily no need to copy this file off the Open vSwitch. * sc-cert.pem, the switch certificate, signed by the switch's own private key. Its contents are not a secret. 2. Optionally, copy controllerca/cacert.pem from the machine that has the OpenFlow PKI structure and verify that it is correct. (Otherwise, you will have to use CA certificate bootstrapping when you configure Open vSwitch in the next step.) 3. Configure Open vSwitch to use the keys and certificates (see "Configuring SSL Support", below). SWITCH KEY GENERATION WITH A SWITCH PKI (EASY METHOD) If you are using a switch PKI (see "SSL Concepts for OpenFlow", above), this method of switch key generation is a little easier than the alternate method described below, but it is also a little less secure because it requires copying a sensitive private key from file from the machine hosting the PKI to the switch. 1. Run the following on the machine that contains the PKI structure: % ovs-pki req+sign sc switch This command has the following output: * sc-privkey.pem, the switch private key file. For security, the contents of this file must remain secret. * sc-cert.pem, the switch certificate. Its contents are not a secret. 2. Copy sc-privkey.pem and sc-cert.pem, plus controllerca/cacert.pem, to the Open vSwitch. 3. Delete the copies of sc-privkey.pem and sc-cert.pem on the PKI machine and any other copies that may have been made in transit. It is very important to make sure that there are no stray copies of sc-privkey.pem, because they could be used to impersonate the switch. (Don't delete controllerca/cacert.pem! It is not security-sensitive and you will need it to configure additional switches.) 4. Configure Open vSwitch to use the keys and certificates (see "Configuring SSL Support", below). SWITCH KEY GENERATION WITH A SWITCH PKI (MORE SECURE) If you are using a switch PKI (see "SSL Concepts for OpenFlow", above), then, compared to the previous method, the method described here takes a little more work, but it does not involve copying the private key from one machine to another, so it may also be a little more secure. 1. Run the following command on the Open vSwitch itself: % ovs-pki req sc switch (This command does not require a copy of any of the PKI files generated by "ovs-pki init", and you should not copy them to the switch because some of them have contents that must remain secret for security.) The "ovs-pki req" command has the following output: * sc-privkey.pem, the switch private key file. For security, the contents of this file must remain secret. There is ordinarily no need to copy this file off the Open vSwitch. * sc-req.pem, the switch "certificate request", which is essentially the switch's public key. Its contents are not a secret. * A fingerprint, on stdout. 2. Write the fingerprint down on a slip of paper and copy sc-req.pem to the machine that contains the PKI structure. 3. On the machine that contains the PKI structure, run: % ovs-pki sign sc switch This command will output a fingerprint to stdout and request that you verify it. Check that it is the same as the fingerprint that you wrote down on the slip of paper before you answer "yes". "ovs-pki sign" creates a file named sc-cert.pem, which is the switch certificate. Its contents are not a secret. 4. Copy the generated sc-cert.pem, plus controllerca/cacert.pem from the PKI structure, to the Open vSwitch, and verify that they were copied correctly. You may delete sc-cert.pem from the machine that hosts the PKI structure now, although it is not important that you do so. (Don't delete controllerca/cacert.pem! It is not security-sensitive and you will need it to configure additional switches.) 5. Configure Open vSwitch to use the keys and certificates (see "Configuring SSL Support", below). Configuring SSL Support ----------------------- SSL configuration requires three additional configuration files. The first two of these are unique to each Open vSwitch. If you used the instructions above to build your PKI, then these files will be named sc-privkey.pem and sc-cert.pem, respectively: - A private key file, which contains the private half of an RSA or DSA key. This file can be generated on the Open vSwitch itself, for the greatest security, or it can be generated elsewhere and copied to the Open vSwitch. The contents of the private key file are secret and must not be exposed. - A certificate file, which certifies that the private key is that of a trustworthy Open vSwitch. This file has to be generated on a machine that has the private key for the switch certification authority, which should not be an Open vSwitch; ideally, it should be a machine that is not networked at all. The certificate file itself is not a secret. The third configuration file is typically the same across all the switches in a given administrative unit. If you used the instructions above to build your PKI, then this file will be named cacert.pem: - The root certificate for the controller certificate authority. The Open vSwitch verifies it that is authorized to connect to an OpenFlow controller by verifying a signature against this CA certificate. Once you have these files, configure ovs-vswitchd to use them using the ovs-vsctl "set-ssl" command, e.g.: ovs-vsctl set-ssl /etc/openvswitch/sc-privkey.pem /etc/openvswitch/sc-cert.pem /etc/openvswitch/cacert.pem Substitute the correct file names, of course, if they differ from the ones used above. You should use absolute file names (ones that begin with "/"), because ovs-vswitchd's current directory is unrelated to the one from which you run ovs-vsctl. If you are using self-signed certificates (see "SSL Concepts for OpenFlow") and you did not copy controllerca/cacert.pem from the PKI machine to the Open vSwitch, then add the --bootstrap option, e.g.: ovs-vsctl -- --bootstrap set-ssl /etc/openvswitch/sc-privkey.pem /etc/openvswitch/sc-cert.pem /etc/openvswitch/cacert.pem After you have added all of these configuration keys, you may specify "ssl:" connection methods elsewhere in the configuration database. "tcp:" connection methods are still allowed even after SSL has been configured, so for security you should use only "ssl:" connections. Reporting Bugs -------------- Please report problems to bugs@openvswitch.org. openvswitch-2.0.1+git20140120/INSTALL.XenServer000066400000000000000000000171101226605124000204230ustar00rootroot00000000000000 How to Install Open vSwitch on Citrix XenServer =============================================== This document describes how to build and install Open vSwitch on a Citrix XenServer host. If you want to install Open vSwitch on a generic Linux or BSD host, see INSTALL instead. These instructions have been tested with XenServer 5.6 FP1. Building Open vSwitch for XenServer ----------------------------------- You may build from an Open vSwitch distribution tarball or from an Open vSwitch Git tree. The recommended build environment to build RPMs for Citrix XenServer is the DDK VM available from Citrix. 1. If you are building from an Open vSwitch Git tree, then you will need to first create a distribution tarball by running "./boot.sh; ./configure; make dist" in the Git tree. You cannot run this in the DDK VM, because it lacks tools that are necessary to bootstrap the Open vSwitch distribution. Instead, you must run this on a machine that has the tools listed in INSTALL as prerequisites for building from a Git tree. 2. Copy the distribution tarball into /usr/src/redhat/SOURCES inside the DDK VM. 3. In the DDK VM, unpack the distribution tarball into a temporary directory and "cd" into the root of the distribution tarball. 4. To build Open vSwitch userspace, run: rpmbuild -bb xenserver/openvswitch-xen.spec This produces three RPMs in /usr/src/redhat/RPMS/i386: "openvswitch", "openvswitch-modules-xen", and "openvswitch-debuginfo". Build Parameters ---------------- openvswitch-xen.spec needs to know a number of pieces of information about the XenServer kernel. Usually, it can figure these out for itself, but if it does not do it correctly then you can specify them yourself as parameters to the build. Thus, the final "rpmbuild" step above can be elaborated as: VERSION= KERNEL_NAME= KERNEL_VERSION= KERNEL_FLAVOR= rpmbuild \ -D "openvswitch_version $VERSION" \ -D "kernel_name $KERNEL_NAME" \ -D "kernel_version $KERNEL_VERSION" \ -D "kernel_flavor $KERNEL_FLAVOR" \ -bb xenserver/openvswitch-xen.spec where: is the version number that appears in the name of the Open vSwitch tarball, e.g. 0.90.0. is the name of the XenServer kernel package, e.g. kernel-xen or kernel-NAME-xen, without the "kernel-" prefix. is the output of: rpm -q --queryformat "%{Version}-%{Release}" , e.g. 2.6.32.12-0.7.1.xs5.6.100.323.170596, where is the name of the -devel package corresponding to . is either "xen" or "kdump". The "xen" flavor is the main running kernel flavor and the "kdump" flavor is the crashdump kernel flavor. Commonly, one would specify "xen" here. Installing Open vSwitch for XenServer ------------------------------------- To install Open vSwitch on a XenServer host, or to upgrade to a newer version, copy the "openvswitch" and "openvswitch-modules-xen" RPMs to that host with "scp", then install them with "rpm -U", e.g.: scp openvswitch-$VERSION-1.i386.rpm \ openvswitch-modules-xen-$XEN_KERNEL_VERSION-$VERSION-1.i386.rpm \ root@: (At this point you will have to enter 's root password.) ssh root@ (At this point you will have to enter 's root password again.) rpm -U openvswitch-$VERSION-1.i386.rpm \ openvswitch-modules-xen-$XEN_KERNEL_VERSION-$VERSION-1.i386.rpm To uninstall Open vSwitch from a XenServer host, remove the packages: ssh root@ (At this point you will have to enter 's root password again.) rpm -e openvswitch openvswitch-modules-xen-$XEN_KERNEL_VERSION After installing or uninstalling Open vSwitch, the XenServer should be rebooted as soon as possible. Open vSwitch Boot Sequence on XenServer --------------------------------------- When Open vSwitch is installed on XenServer, its startup script /etc/init.d/openvswitch runs early in boot. It does roughly the following: * Loads the OVS kernel module, openvswitch. * Starts ovsdb-server, the OVS configuration database. * XenServer expects there to be no bridges configured at startup, but the OVS configuration database likely still has bridges configured from before reboot. To match XenServer expectations, the startup script deletes all configured bridges from the database. * Starts ovs-vswitchd, the OVS switching daemon. At this point in the boot process, then, there are no Open vSwitch bridges, even though all of the Open vSwitch daemons are running. Later on in boot, /etc/init.d/management-interface (part of XenServer, not Open vSwitch) creates the bridge for the XAPI management interface by invoking /opt/xensource/libexec/interface-reconfigure. Normally this program consults XAPI's database to obtain information about how to configure the bridge, but XAPI is not running yet[*] so it instead consults /var/xapi/network.dbcache, which is a cached copy of the most recent network configuration. [*] Even if XAPI were running, if this XenServer node is a pool slave then the query would have to consult the master, which requires network access, which begs the question of how to configure the management interface. XAPI starts later on in the boot process. XAPI can then create other bridges on demand using /opt/xensource/libexec/interface-reconfigure. Now that XAPI is running, that program consults XAPI directly instead of reading the cache. As part of its own startup, XAPI invokes the Open vSwitch XAPI plugin script /etc/xapi.d/openvswitch-cfg-update passing the "update" command. The plugin script does roughly the following: * Calls /opt/xensource/libexec/interface-reconfigure with the "rewrite" command, to ensure that the network cache is up-to-date. * Queries the Open vSwitch manager setting (named "vswitch_controller") from the XAPI database for the XenServer pool. * If XAPI and OVS are configured for different managers, or if OVS is configured for a manager but XAPI is not, runs "ovs-vsctl emer-reset" to bring the Open vSwitch configuration to a known state. One effect of emer-reset is to deconfigure any manager from the OVS database. * If XAPI is configured for a manager, configures the OVS manager to match with "ovs-vsctl set-manager". Notes ----- * The Open vSwitch boot sequence only configures an OVS configuration database manager. There is no way to directly configure an OpenFlow controller on XenServer and, as a consequence of the step above that deletes all of the bridges at boot time, controller configuration only persists until XenServer reboot. The configuration database manager can, however, configure controllers for bridges. See the BUGS section of ovs-controller(8) for more information on this topic. * The Open vSwitch startup script automatically adds a firewall rule to allow GRE traffic. This rule is needed for the XenServer feature called "Cross-Host Internal Networks" (CHIN) that uses GRE. If a user configures tunnels other than GRE (ex: VXLAN, LISP), they will have to either manually add a iptables firewall rule to allow the tunnel traffic or add it through a startup script (Please refer to the "enable-protocol" command in the ovs-ctl(8) manpage). Reporting Bugs -------------- Please report problems to bugs@openvswitch.org. openvswitch-2.0.1+git20140120/INSTALL.userspace000066400000000000000000000060351226605124000205000ustar00rootroot00000000000000 Using Open vSwitch without kernel support ========================================= Open vSwitch can operate, at a cost in performance, entirely in userspace, without assistance from a kernel module. This file explains how to install Open vSwitch in such a mode. The userspace-only mode of Open vSwitch is considered experimental. It has not been thoroughly tested. This version of Open vSwitch should be built manually with "configure" and "make". Debian packaging for Open vSwitch is also included, but it has not been recently tested, and so Debian packages are not a recommended way to use this version of Open vSwitch. Building and Installing ----------------------- The requirements and procedure for building, installing, and configuring Open vSwitch are the same as those given in INSTALL. You may omit configuring, building, and installing the kernel module, and the related requirements. On Linux, the userspace switch additionally requires the kernel TUN/TAP driver to be available, either built into the kernel or loaded as a module. If you are not sure, check for a directory named /sys/class/misc/tun. If it does not exist, then attempt to load the module with "modprobe tun". The tun device must also exist as /dev/net/tun. If it does not exist, then create /dev/net (if necessary) with "mkdir /dev/net", then create /dev/net/tun with "mknod /dev/net/tun c 10 200". On FreeBSD and NetBSD, the userspace switch additionally requires the kernel tap(4) driver to be available, either built into the kernel or loaded as a module. Using the Userspace Datapath with ovs-vswitchd ---------------------------------------------- To use ovs-vswitchd in userspace mode, create a bridge with datapath_type "netdev" in the configuration database. For example: ovs-vsctl add-br br0 ovs-vsctl set bridge br0 datapath_type=netdev ovs-vsctl add-port br0 eth0 ovs-vsctl add-port br0 eth1 ovs-vsctl add-port br0 eth2 ovs-vswitchd will create a TAP device as the bridge's local interface, named the same as the bridge, as well as for each configured internal interface. Currently, on FreeBSD, the functionality required for in-band control support is not implemented. To avoid related errors, you can disable the in-band support with the following command. ovs-vsctl set bridge br0 other_config:disable-in-band=true Firewall Rules -------------- On Linux, when a physical interface is in use by the userspace datapath, packets received on the interface still also pass into the kernel TCP/IP stack. This can cause surprising and incorrect behavior. You can use "iptables" to avoid this behavior, by using it to drop received packets. For example, to drop packets received on eth0: iptables -A INPUT -i eth0 -j DROP iptables -A FORWARD -i eth0 -j DROP Other settings -------------- On NetBSD, depending on your network topology and applications, the following configuration might help. See sysctl(7). sysctl net.inet.ip.checkinterface=1 Bug Reporting ------------- Please report problems to bugs@openvswitch.org. openvswitch-2.0.1+git20140120/IntegrationGuide000066400000000000000000000173251226605124000206460ustar00rootroot00000000000000 Integration Guide for Centralized Control ========================================= This document describes how to integrate Open vSwitch onto a new platform to expose the state of the switch and attached devices for centralized control. (If you are looking to port the switching components of Open vSwitch to a new platform, please see the PORTING document.) The focus of this guide is on hypervisors, but many of the interfaces are useful for hardware switches, as well. The XenServer integration is the most mature implementation, so most of the examples are drawn from it. The externally visible interface to this integration is platform-agnostic. We encourage anyone who integrates Open vSwitch to use the same interface, because keeping a uniform interface means that controllers require less customization for individual platforms (and perhaps no customization at all). Integration centers around the Open vSwitch database and mostly involves the 'external_ids' columns in several of the tables. These columns are not interpreted by Open vSwitch itself. Instead, they provide information to a controller that permits it to associate a database record with a more meaningful entity. In contrast, the 'other_config' column is used to configure behavior of the switch. The main job of the integrator, then, is to ensure that these values are correctly populated and maintained. An integrator sets the columns in the database by talking to the ovsdb-server daemon. A few of the columns can be set during startup by calling the ovs-ctl tool from inside the startup scripts. The 'xenserver/etc_init.d_openvswitch' script provides examples of its use, and the ovs-ctl(8) manpage contains complete documentation. At runtime, ovs-vsctl can be be used to set columns in the database. The script 'xenserver/etc_xensource_scripts_vif' contains examples of its use, and ovs-vsctl(8) manpage contains complete documentation. Python and C bindings to the database are provided if deeper integration with a program are needed. The XenServer ovs-xapi-sync daemon ('xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync') provides an example of using the Python bindings. More information on the python bindings is available at 'python/ovs/db/idl.py'. Information on the C bindings is available at 'lib/ovsdb-idl.h'. The following diagram shows how integration scripts fit into the Open vSwitch architecture: +----------------------------------------+ | Controller Cluster + +----------------------------------------+ | | +----------------------------------------------------------+ | | | | +--------------+---------------+ | | | | | | +-------------------+ +------------------+ | | | ovsdb-server |-----------| ovs-vswitchd | | | +-------------------+ +------------------+ | | | | | | +---------------------+ | | | | Integration scripts | | | | | (ex: ovs-xapi-sync) | | | | +---------------------+ | | | | Userspace | |----------------------------------------------------------| | | Kernel | | | | | +---------------------+ | | | OVS Kernel Module | | | +---------------------+ | +----------------------------------------------------------+ A description of the most relevant fields for integration follows. By setting these values, controllers are able to understand the network and manage it more dynamically and precisely. For more details about the database and each individual column, please refer to the ovs-vswitchd.conf.db(5) manpage. Open_vSwitch table ------------------ The Open_vSwitch table describes the switch as a whole. The 'system_type' and 'system_version' columns identify the platform to the controller. The 'external_ids:system-id' key uniquely identifies the physical host. In XenServer, the system-id will likely be the same as the UUID returned by 'xe host-list'. This key allows controllers to distinguish between multiple hypervisors. Most of this configuration can be done with the ovs-ctl command at startup. For example: ovs-ctl --system-type="XenServer" --system-version="6.0.0-50762p" \ --system-id="${UUID}" "${other_options}" start Alternatively, the ovs-vsctl command may be used to set a particular value at runtime. For example: ovs-vsctl set open_vswitch . external-ids:system-id='"${UUID}"' The 'other_config:enable-statistics' key may be set to "true" to have OVS populate the database with statistics (e.g., number of CPUs, memory, system load) for the controller's use. Bridge table ------------ The Bridge table describes individual bridges within an Open vSwitch instance. The 'external-ids:bridge-id' key uniquely identifies a particular bridge. In XenServer, this will likely be the same as the UUID returned by 'xe network-list' for that particular bridge. For example, to set the identifier for bridge "br0", the following command can be used: ovs-vsctl set Bridge br0 external-ids:bridge-id='"${UUID}"' The MAC address of the bridge may be manually configured by setting it with the "other_config:hwaddr" key. For example: ovs-vsctl set Bridge br0 other_config:hwaddr="12:34:56:78:90:ab" Interface table --------------- The Interface table describes an interface under the control of Open vSwitch. The 'external_ids' column contains keys that are used to provide additional information about the interface: attached-mac This field contains the MAC address of the device attached to the interface. On a hypervisor, this is the MAC address of the interface as seen inside a VM. It does not necessarily correlate to the host-side MAC address. For example, on XenServer, the MAC address on a VIF in the hypervisor is always FE:FF:FF:FF:FF:FF, but inside the VM a normal MAC address is seen. iface-id This field uniquely identifies the interface. In hypervisors, this allows the controller to follow VM network interfaces as VMs migrate. A well-chosen identifier should also allow an administrator or a controller to associate the interface with the corresponding object in the VM management system. For example, the Open vSwitch integration with XenServer by default uses the XenServer assigned UUID for a VIF record as the iface-id. iface-status In a hypervisor, there are situations where there are multiple interface choices for a single virtual ethernet interface inside a VM. Valid values are "active" and "inactive". A complete description is available in the ovs-vswitchd.conf.db(5) manpage. vm-id This field uniquely identifies the VM to which this interface belongs. A single VM may have multiple interfaces attached to it. As in the previous tables, the ovs-vsctl command may be used to configure the values. For example, to set the 'iface-id' on eth0, the following command can be used: ovs-vsctl set Interface eth0 external-ids:iface-id='"${UUID}"' openvswitch-2.0.1+git20140120/Makefile.am000066400000000000000000000210261226605124000175070ustar00rootroot00000000000000# Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. This file is offered as-is, # without warranty of any kind. AUTOMAKE_OPTIONS = foreign subdir-objects ACLOCAL_AMFLAGS = -I m4 SUBDIRS = datapath AM_CPPFLAGS = $(SSL_CFLAGS) AM_CPPFLAGS += -I $(top_srcdir)/include AM_CPPFLAGS += -I $(top_srcdir)/lib AM_CPPFLAGS += -I $(top_builddir)/lib AM_CFLAGS = -Wstrict-prototypes AM_CFLAGS += $(WARNING_FLAGS) if NDEBUG AM_CPPFLAGS += -DNDEBUG AM_CFLAGS += -fomit-frame-pointer endif # PYTHONDONTWRITEBYTECODE=yes keeps Python from creating .pyc and .pyo # files. Creating .py[co] works OK for any given version of Open # vSwitch, but it causes trouble if you switch from a version with # foo/__init__.py into an (older) version with plain foo.py, since # foo/__init__.pyc will cause Python to ignore foo.py. run_python = \ PYTHONDONTWRITEBYTECODE=yes \ PYTHONPATH=$(top_srcdir)/python:$$PYTHONPATH \ $(PYTHON) ALL_LOCAL = BUILT_SOURCES = CLEANFILES = CLEAN_LOCAL = DISTCLEANFILES = PYCOV_CLEAN_FILES = build-aux/check-structs,cover EXTRA_DIST = \ CodingStyle \ DESIGN \ FAQ \ INSTALL \ INSTALL.Debian \ INSTALL.Fedora \ INSTALL.KVM \ INSTALL.Libvirt \ INSTALL.RHEL \ INSTALL.SSL \ INSTALL.XenServer \ INSTALL.userspace \ IntegrationGuide \ NOTICE \ OPENFLOW-1.1+ \ PORTING \ README-OFTest \ README-gcov \ README-lisp \ REPORTING-BUGS \ SubmittingPatches \ WHY-OVS \ boot.sh \ build-aux/sodepends.pl \ build-aux/soexpand.pl \ $(MAN_FRAGMENTS) \ $(MAN_ROOTS) bin_PROGRAMS = sbin_PROGRAMS = bin_SCRIPTS = DIST_HOOKS = dist_man_MANS = dist_pkgdata_DATA = dist_pkgdata_SCRIPTS = dist_sbin_SCRIPTS = dist_scripts_SCRIPTS = dist_scripts_DATA = INSTALL_DATA_LOCAL = UNINSTALL_LOCAL = man_MANS = MAN_FRAGMENTS = MAN_ROOTS = noinst_DATA = noinst_HEADERS = noinst_LIBRARIES = noinst_man_MANS = noinst_PROGRAMS = noinst_SCRIPTS = OVSIDL_BUILT = pkgdata_DATA = sbin_SCRIPTS = scripts_SCRIPTS = scripts_DATA = SUFFIXES = check_DATA = scriptsdir = $(pkgdatadir)/scripts # This ensures that files added to EXTRA_DIST are always distributed, # even if they are inside an Automake if...endif conditional block that is # disabled by some particular "configure" run. For more information, see: # http://article.gmane.org/gmane.comp.sysutils.automake.general/10891 noinst_HEADERS += $(EXTRA_DIST) ro_c = echo '/* -*- mode: c; buffer-read-only: t -*- */' ro_shell = printf '\043 Generated automatically -- do not modify! -*- buffer-read-only: t -*-\n' SUFFIXES += .in .in: $(PERL) $(srcdir)/build-aux/soexpand.pl -I$(srcdir) < $< | \ sed \ -e 's,[@]PKIDIR[@],$(PKIDIR),g' \ -e 's,[@]LOGDIR[@],$(LOGDIR),g' \ -e 's,[@]DBDIR[@],$(DBDIR),g' \ -e 's,[@]PERL[@],$(PERL),g' \ -e 's,[@]PYTHON[@],$(PYTHON),g' \ -e 's,[@]RUNDIR[@],$(RUNDIR),g' \ -e 's,[@]VERSION[@],$(VERSION),g' \ -e 's,[@]localstatedir[@],$(localstatedir),g' \ -e 's,[@]pkgdatadir[@],$(pkgdatadir),g' \ -e 's,[@]sysconfdir[@],$(sysconfdir),g' \ -e 's,[@]bindir[@],$(bindir),g' \ -e 's,[@]sbindir[@],$(sbindir),g' \ -e 's,[@]abs_top_srcdir[@],$(abs_top_srcdir),g' \ -e 's,[@]ovsdbmonitordir[@],$(ovsdbmonitordir),g' \ > $@.tmp @if head -n 1 $@.tmp | grep '#!' > /dev/null; then \ echo chmod +x $@.tmp; \ chmod +x $@.tmp; \ fi mv $@.tmp $@ .PHONY: clean-pycov clean-pycov: cd $(srcdir) && rm -f $(PYCOV_CLEAN_FILES) CLEAN_LOCAL += clean-pycov # If we're checked out from a Git repository, make sure that every # file that is in Git is distributed. # # We only enable this check when GNU make is in use because the # Makefile in datapath/linux, needed to get the list of files to # distribute, requires GNU make extensions. if GNU_MAKE ALL_LOCAL += dist-hook-git dist-hook-git: distfiles @if test -e $(srcdir)/.git && (git --version) >/dev/null 2>&1; then \ (cd datapath && $(MAKE) distfiles); \ (cat distfiles; sed 's|^|datapath/|' datapath/distfiles) | \ sort -u > all-distfiles; \ (cd $(srcdir) && git ls-files) | grep -v '\.gitignore$$' | \ sort -u > all-gitfiles; \ comm -1 -3 all-distfiles all-gitfiles > missing-distfiles; \ if test -s missing-distfiles; then \ echo "The distribution is missing the following files:"; \ cat missing-distfiles; \ exit 1; \ fi; \ fi CLEANFILES += all-distfiles all-gitfiles missing-distfiles # The following is based on commands for the Automake "distdir" target. distfiles: Makefile @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t" | sort -u > $@ CLEANFILES += distfiles endif .PHONY: dist-hook-git # Check that every .c file includes . ALL_LOCAL += config-h-check config-h-check: @cd $(srcdir); \ if test -e .git && (git --version) >/dev/null 2>&1 && \ git --no-pager grep -L '#include ' `git ls-files | grep '\.c$$' | \ grep -vE '^datapath|^lib/sflow|^third-party'`; \ then \ echo "See above for list of violations of the rule that"; \ echo "every C source file must #include ."; \ exit 1; \ fi .PHONY: config-h-check # Check that certain data structures are always declared "static". ALL_LOCAL += static-check static-check: @if test -e $(srcdir)/.git && (git --version) >/dev/null 2>&1 && \ git --no-pager grep -n -E '^[ ]+(struct vlog_rate_limit|pthread_once_t|struct ovsthread_once).*=' $(srcdir); \ then \ echo "See above for list of violations of the rule that "; \ echo "certain data structures must always be 'static'"; \ exit 1; \ fi .PHONY: static-check # Check that assert.h is not used outside a whitelist of files. ALL_LOCAL += check-assert-h-usage check-assert-h-usage: @if test -e $(srcdir)/.git && (git --version) >/dev/null 2>&1 && \ (cd $(srcdir) && git --no-pager grep -l -E '[<]assert.h[>]') | \ $(EGREP) -v '^lib/(sflow_receiver|vlog).c$$|^tests/'; \ then \ echo "Files listed above unexpectedly #include <""assert.h"">."; \ echo "Please use ovs_assert (from util.h) instead of assert."; \ exit 1; \ fi .PHONY: check-assert-h-usage ALL_LOCAL += thread-safety-check thread-safety-check: @if test -e '$(srcdir)'/.git && (git --version) >/dev/null 2>&1 && \ grep -n -f '$(srcdir)'/build-aux/thread-safety-blacklist \ `git ls-files '$(srcdir)' | grep '\.[ch]$$' \ | $(EGREP) -v '^datapath|^lib/sflow|^third-party'` \ | $(EGREP) -v ':[ ]*/?\*'; \ then \ echo "See above for list of calls to functions that are"; \ echo "blacklisted due to thread safety issues"; \ exit 1; \ fi EXTRA_DIST += build-aux/thread-safety-blacklist if HAVE_GROFF ALL_LOCAL += manpage-check manpage-check: $(man_MANS) $(dist_man_MANS) $(noinst_man_MANS) @error=false; \ for manpage in $?; do \ LANG=en_US.UTF-8 groff -w mac -w delim -w escape -w input -w missing -w tab -T utf8 -man -p -z $$manpage >$@.tmp 2>&1; \ if grep warning: $@.tmp; then error=:; fi; \ rm -f $@.tmp; \ done; \ if $$error; then exit 1; else echo touch $@; touch $@; fi CLEANFILES += manpage-check endif include $(srcdir)/manpages.mk $(srcdir)/manpages.mk: $(MAN_ROOTS) build-aux/sodepends.pl @$(PERL) $(srcdir)/build-aux/sodepends.pl -I. -I$(srcdir) $(MAN_ROOTS) >$(@F).tmp @if cmp -s $(@F).tmp $@; then \ touch $@; \ rm -f $(@F).tmp; \ else \ mv $(@F).tmp $@; \ fi CLEANFILES += manpage-dep-check dist-hook: $(DIST_HOOKS) all-local: $(ALL_LOCAL) clean-local: $(CLEAN_LOCAL) install-data-local: $(INSTALL_DATA_LOCAL) uninstall-local: $(UNINSTALL_LOCAL) .PHONY: $(DIST_HOOKS) $(CLEAN_LOCAL) $(INSTALL_DATA_LOCAL) $(UNINSTALL_LOCAL) modules_install: if LINUX_ENABLED cd datapath/linux && $(MAKE) modules_install endif include lib/automake.mk include ofproto/automake.mk include utilities/automake.mk include tests/automake.mk include include/automake.mk include third-party/automake.mk include debian/automake.mk include vswitchd/automake.mk include ovsdb/automake.mk include rhel/automake.mk include xenserver/automake.mk include python/automake.mk include python/compat/automake.mk include tutorial/automake.mk openvswitch-2.0.1+git20140120/NEWS000066400000000000000000000665231226605124000161650ustar00rootroot00000000000000v2.0.1 - 13 Dec 2013 --------------------- - Bug fixes v2.0.0 - 15 Oct 2013 --------------------- - The ovs-vswitchd process is no longer single-threaded. Multiple threads are now used to handle flow set up and asynchronous logging. - OpenFlow: * Experimental support for OpenFlow 1.1 (in addition to 1.2 and 1.3, which had experimental support in 1.10). * New support for matching outer source and destination IP address of tunneled packets, for tunnel ports configured with the newly added "remote_ip=flow" and "local_ip=flow" options. * Support for matching on metadata 'pkt_mark' for interacting with other system components. On Linux this corresponds to the skb mark. * Support matching, rewriting SCTP ports - The Interface table in the database has a new "ifindex" column to report the interface's OS-assigned ifindex. - New "check-oftest" Makefile target for running OFTest against Open vSwitch. See README-OFTest for details. - The flow eviction threshold has been moved to the Open_vSwitch table. - Database names are now mandatory when specifying ovsdb-server options through database paths (e.g. Private key option with the database name should look like "--private-key=db:Open_vSwitch,SSL,private_key"). - Added ovs-dev.py, a utility script helpful for Open vSwitch developers. - Support for Linux kernels up to 3.10 - ovs-ofctl: * New "ofp-parse" for printing OpenFlow messages read from a file. - Added configurable flow caching support to IPFIX exporter. - Dropped support for Linux pre-2.6.32. - Log file timestamps and ovsdb commit timestamps are now reported with millisecond resolution. (Previous versions only reported whole seconds.) v1.11.0 - 28 Aug 2013 --------------------- - Support for megaflows, which allows wildcarding in the kernel (and any dpif implementation that supports wildcards). Depending on the flow table and switch configuration, flow set up rates are close to the Linux bridge. - The "tutorial" directory contains a new tutorial for some advanced Open vSwitch features. - Stable bond mode has been removed. - The autopath action has been removed. - New support for the data encapsulation format of the LISP tunnel protocol (RFC 6830). An external control plane or manual flow setup is required for EID-to-RLOC mapping. - OpenFlow: * The "dec_mpls_ttl" and "set_mpls_ttl" actions from OpenFlow 1.1 and later are now implemented. * New "stack" extension for use in actions, to push and pop from NXM fields. * The "load" and "set_field" actions can now modify the "in_port". (This allows one to enable output to a flow's input port by setting the in_port to some unused value, such as OFPP_NONE.) - ovs-dpctl: * New debugging commands "add-flow", "mod-flow", "del-flow". * "dump-flows" now has a -m option to increase output verbosity. - In dpif-based bridges, cache action translations, which can improve flow set up performance by 80% with a complicated flow table. - New syslog format, prefixed with "ovs|", to be easier to filter. - RHEL: Removes the default firewall rule that allowed GRE traffic to pass through. Any users that relied on this automatic firewall hole will have to manually configure it. The ovs-ctl(8) manpage documents the "enable-protocol" command that can be used as an alternative. - New CFM demand mode which uses data traffic to indicate interface liveness. v1.10.0 - 01 May 2013 --------------------- - Bridge compatibility support has been removed. Any uses that rely on ovs-brcompatd will have to stick with Open vSwitch 1.9.x or adapt to native Open vSwitch support (e.g. use ovs-vsctl instead of brctl). - The maximum size of the MAC learning table is now configurable. - With the Linux datapath, packets for new flows are now queued separately on a per-port basis, so it should no longer be possible for a large number of new flows arriving on one port to prevent new flows from being processed on other ports. - ovs-vsctl: * Previously ovs-vsctl would retry connecting to the database forever, causing it to hang if ovsdb-server was not running. Now, ovs-vsctl only tries once by default (use --retry to try forever). This change means that you may want to remove uses of --timeout to avoid hangs in ovs-vsctl calls. * Many "ovs-vsctl" database commands now accept an --if-exists option. Please refer to the ovs-vsctl manpage for details. - OpenFlow: - Experimental support for newer versions of OpenFlow. See the "What versions of OpenFlow does Open vSwitch support?" question in the FAQ for more details. - The OpenFlow "dp_desc" may now be configured by setting the value of other-config:dp-desc in the Bridge table. - It is possible to request the OpenFlow port number with the "ofport_request" column in the Interface table. - The NXM flow_removed message now reports the OpenFlow table ID from which the flow was removed. - Tunneling: - New support for the VXLAN tunnel protocol (see the IETF draft here: http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-03). - Tunneling requires the version of the kernel module paired with Open vSwitch 1.9.0 or later. - Inheritance of the Don't Fragment bit in IP tunnels (df_inherit) is no longer supported. - Path MTU discovery is no longer supported. - CAPWAP tunneling support removed. - Tunnels with multicast destination ports are no longer supported. - ovs-dpctl: - The "dump-flows" and "del-flows" no longer require an argument if only one datapath exists. - ovs-appctl: - New "vlog/disable-rate-limit" and "vlog/enable-rate-limit" commands available allow control over logging rate limits. - New "dpif/dump-dps", "dpif/show", and "dpif/dump-flows" command that mimic the equivalent ovs-dpctl commands. - The ofproto library is now responsible for assigning OpenFlow port numbers. An ofproto implementation should assign them when port_construct() is called. - All dpif-based bridges of a particular type share a common datapath called "ovs-", e.g. "ovs-system". The ovs-dpctl commands will now return information on that shared datapath. To get the equivalent bridge-specific information, use the new "ovs-appctl dpif/*" commands. - Backward-incompatible changes: - Earlier Open vSwitch versions treated ANY as a wildcard in flow syntax. OpenFlow 1.1 adds a port named ANY, which introduces a conflict. ANY was rarely used in flow syntax, so we chose to retire that meaning of ANY in favor of the OpenFlow 1.1 meaning. - Patch ports no longer require kernel support, so they now work with FreeBSD and the kernel module built into Linux 3.3 and later. - New "sample" action. v1.9.0 - 26 Feb 2013 ------------------------ - Datapath: - Support for ipv6 set action. - SKB mark matching and setting. - support for Linux kernels up to 3.8 - FreeBSD is now a supported platform, thanks to code contributions from Gaetano Catalli, Ed Maste, and Giuseppe Lettieri. - ovs-bugtool: New --ovs option to report only OVS related information. - New %t and %T log escapes to identify the subprogram within a cooperating group of processes or threads that emitted a log message. The default log patterns now include this information. - OpenFlow: - Allow bitwise masking for SHA and THA fields in ARP, SLL and TLL fields in IPv6 neighbor discovery messages, and IPv6 flow label. - Adds support for writing to the metadata field for a flow. - Tunneling: - The tunneling code no longer assumes input and output keys are symmetric. If they are not, PMTUD needs to be disabled for tunneling to work. Note this only applies to flow-based keys. - New support for a nonstandard form of GRE that supports a 64-bit key. - Tunnel Path MTU Discovery default value was set to 'disabled'. This feature is deprecated and will be removed soon. - Tunnel header caching removed. - ovs-ofctl: - Commands and actions that accept port numbers now also accept keywords that represent those ports (such as LOCAL, NONE, and ALL). This is also the recommended way to specify these ports, for compatibility with OpenFlow 1.1 and later (which use the OpenFlow 1.0 numbers for these ports for different purposes). - ovs-dpctl: - Support requesting the port number with the "port_no" option in the "add-if" command. - ovs-pki: The "online PKI" features have been removed, along with the ovs-pki-cgi program that facilitated it, because of some alarmist insecurity claims. We do not believe that these claims are true, but because we do not know of any users for this feature it seems better on balance to remove it. (The ovs-pki-cgi program was not included in distribution packaging.) - ovsdb-server now enforces the immutability of immutable columns. This was not enforced in earlier versions due to an oversight. - The following features are now deprecated. They will be removed no earlier than February 2013. Please email dev@openvswitch.org with concerns. - Bridge compatibility. - Stable bond mode. - The autopath action. - Interface type "null". - Numeric values for reserved ports (see "ovs-ofctl" note above). - Tunnel Path MTU Discovery. - CAPWAP tunnel support. - The data in the RARP packets can now be matched in the same way as the data in ARP packets. v1.8.0 - 26 Feb 2013 ------------------------ *** Internal only release *** - New FAQ. Please send updates and additions! - Authors of controllers, please read the new section titled "Action Reproduction" in DESIGN, which describes an Open vSwitch change in behavior in corner cases that may affect some controllers. - ovs-l3ping: - A new test utility that can create L3 tunnel between two Open vSwitches and detect connectivity issues. - ovs-ofctl: - New --sort and --rsort options for "dump-flows" command. - "mod-port" command can now control all OpenFlow config flags. - OpenFlow: - Allow general bitwise masking for IPv4 and IPv6 addresses in IPv4, IPv6, and ARP packets. (Previously, only CIDR masks were allowed.) - Allow support for arbitrary Ethernet masks. (Previously, only the multicast bit in the destination address could be individually masked.) - New field OXM_OF_METADATA, to align with OpenFlow 1.1. - The OFPST_QUEUE request now reports an error if a specified port or queue does not exist, or for requests for a specific queue on all ports, if the specified queue does not exist on any port. (Previous versions generally reported an empty set of results.) - New "flow monitor" feature to allow controllers to be notified of flow table changes as they happen. - Additional protocols are not mirrored and dropped when forward-bpdu is false. For a full list, see the ovs-vswitchd.conf.db man page. - Open vSwitch now sends RARP packets in situations where it previously sent a custom protocol, making it consistent with behavior of QEMU and VMware. - All Open vSwitch programs and log files now show timestamps in UTC, instead the local timezone, by default. v1.7.0 - 30 Jul 2012 ------------------------ - kernel modules are renamed. openvswitch_mod.ko is now openvswitch.ko and brcompat_mod.ko is now brcompat.ko. - Increased the number of NXM registers to 8. - Added ability to configure DSCP setting for manager and controller connections. By default, these connections have a DSCP value of Internetwork Control (0xc0). - Added the granular link health statistics, 'cfm_health', to an interface. - OpenFlow: - Added support to mask nd_target for ICMPv6 neighbor discovery flows. - Added support for OpenFlow 1.3 port description (OFPMP_PORT_DESC) multipart messages. - ovs-ofctl: - Added the "dump-ports-desc" command to retrieve port information using the new port description multipart messages. - ovs-test: - Added support for spawning ovs-test server from the client. - Now ovs-test is able to automatically create test bridges and ports. - "ovs-dpctl dump-flows" now prints observed TCP flags in TCP flows. - Tripled flow setup performance. - The "coverage/log" command previously available through ovs-appctl has been replaced by "coverage/show". The new command replies with coverage counter values, instead of logging them. v1.6.1 - 25 Jun 2012 ------------------------ - Allow OFPP_CONTROLLER as the in_port for packet-out messages. v1.6.0 - 24 Feb 2012 ------------------------ *** Internal only release *** - bonding - LACP bonds no longer fall back to balance-slb when negotiations fail. Instead they drop traffic. - The default bond_mode changed from SLB to active-backup, to protect unsuspecting users from the significant risks of SLB bonds (which are documented in vswitchd/INTERNALS). - Load balancing can be disabled by setting the bond-rebalance-interval to zero. - OpenFlow: - Added support for bitwise matching on TCP and UDP ports. See ovs-ofctl(8) for more information. - NXM flow dumps now include times elapsed toward idle and hard timeouts. - Added an OpenFlow extension NXT_SET_ASYNC_CONFIG that allows controllers more precise control over which OpenFlow messages they receive asynchronously. - New "fin_timeout" action. - Added "fin_timeout" support to "learn" action. - New Nicira action NXAST_CONTROLLER that offers additional features over output to OFPP_CONTROLLER. - When QoS settings for an interface do not configure a default queue (queue 0), Open vSwitch now uses a default configuration for that queue, instead of dropping all packets as in previous versions. - Logging: - Logging to console and file will have UTC timestamp as a default for all the daemons. An example of the default format is 2012-01-27T16:35:17Z. ovs-appctl can be used to change the default format as before. - The syntax of commands and options to set log levels was simplified, to make it easier to remember. - New support for limiting the number of flows in an OpenFlow flow table, with configurable policy for evicting flows upon overflow. See the Flow_Table table in ovs-vswitch.conf.db(5) for more information. - New "enable-async-messages" column in the Controller table. If set to false, OpenFlow connections to the controller will initially have all asynchronous messages disabled, overriding normal OpenFlow behavior. - ofproto-provider interface: - "struct rule" has a new member "used" that ofproto implementations should maintain by updating with ofproto_rule_update_used(). - ovsdb-client: - The new option --timestamp causes the "monitor" command to print a timestamp with every update. - CFM module CCM broadcasts can now be tagged with an 802.1p priority. v1.5.0 - 01 Jun 2012 ------------------------ - OpenFlow: - Added support for querying, modifying, and deleting flows based on flow cookie when using NXM. - Added new NXM_PACKET_IN format. - Added new NXAST_DEC_TTL action. - ovs-ofctl: - Added daemonization support to the monitor and snoop commands. - ovs-vsctl: - The "find" command supports new set relational operators {=}, {!=}, {<}, {>}, {<=}, and {>=}. - ovsdb-tool now uses the typical database and schema installation directories as defaults. - The default MAC learning timeout has been increased from 60 seconds to 300 seconds. The MAC learning timeout is now configurable. v1.4.0 - 30 Jan 2012 ------------------------ - Compatible with Open vSwitch kernel module included in Linux 3.3. - New "VLAN splinters" feature to work around buggy device drivers in old Linux versions. (This feature is deprecated. When broken device drivers are no longer in widespread use, we will delete this feature.) See ovs-vswitchd.conf.db(5) for more information. - OpenFlow: - Added ability to match on IPv6 flow label through NXM. - Added ability to match on ECN bits in IPv4 and IPv6 through NXM. - Added ability to match on TTL in IPv4 and IPv6 through NXM. - Added ability to modify ECN bits in IPv4. - Added ability to modify TTL in IPv4. - ovs-vswitchd: - Don't require the "normal" action to use mirrors. Traffic will now be properly mirrored for any flows, regardless of their actions. - Track packet and byte statistics sent on mirrors. - The sFlow implementation can now usually infer the correct agent device instead of having to be told explicitly. - ovs-appctl: - New "fdb/flush" command to flush bridge's MAC learning table. - ovs-test: - A new distributed testing tool that allows one to diagnose performance and connectivity issues. This tool currently is not included in RH or Xen packages. - RHEL packaging now supports integration with Red Hat network scripts. - bonding: - Post 1.4.*, OVS will be changing the default bond mode from balance-slb to active-backup. SLB bonds carry significant risks with them (documented vswitchd/INTERNALS) which we want to prevent unsuspecting users from running into. Users are advised to update any scripts or configuration which may be negatively impacted by explicitly setting the bond mode which they want to use. v1.3.0 - 09 Dec 2011 ------------------------ - OpenFlow: - Added an OpenFlow extension which allows the "output" action to accept NXM fields. - Added an OpenFlow extension for flexible learning. - Bumped number of NXM registers from four to five. - ovs-appctl: - New "version" command to determine version of running daemon. - If no argument is provided for "cfm/show", displays detailed information about all interfaces with CFM enabled. - If no argument is provided for "lacp/show", displays detailed information about all ports with LACP enabled. - ovs-dpctl: - New "set-if" command to modify a datapath port's configuration. - ovs-vswitchd: - The software switch now supports 255 OpenFlow tables, instead of just one. By default, only table 0 is consulted, but the new NXAST_RESUBMIT_TABLE action can look up in additional tables. Tables 128 and above are reserved for use by the switch itself; please use only tables 0 through 127. - Add support for 802.1D spanning tree (STP). - Fragment handling extensions: - New OFPC_FRAG_NX_MATCH fragment handling mode, in which L4 fields are made available for matching in fragments with offset 0. - New NXM_NX_IP_FRAG match field for matching IP fragments (usable via "ip_frag" in ovs-ofctl). - New ovs-ofctl "get-frags" and "set-frags" commands to get and set fragment handling policy. - CAPWAP tunneling now supports an extension to transport a 64-bit key. By default it remains compatible with the old version and other standards-based implementations. - Flow setups are now processed in a round-robin manner across ports to prevent any single client from monopolizing the CPU and conducting a denial of service attack. - Added support for native VLAN tagging. A new "vlan_mode" parameter can be set for "port". Possible values: "access", "trunk", "native-tagged" and "native-untagged". - test-openflowd has been removed. Please use ovs-vswitchd instead. v1.2.0 - 03 Aug 2011 ------------------------ - New "ofproto" abstraction layer to ease porting to hardware switching ASICs. - Packaging for Red Hat Enterprise Linux 5.6 and 6.0. - Datapath support for Linux kernels up to 3.0. - OpenFlow: - New "bundle" and "bundle_load" action extensions. - Database: - Implement table unique constraints. - Support cooperative locking between callers. - ovs-dpctl: - New "-s" option for "show" command prints packet and byte counters for each port. - ovs-ofctl: - New "--readd" option for "replace-flows". - ovs-vsctl: - New "show" command to print an overview of configuration. - New "comment" command to add remark that explains intentions. - ovs-brcompatd has been rewritten to fix long-standing bugs. - ovs-openflowd has been renamed test-openflowd and moved into the tests directory. Its presence confused too many users. Please use ovs-vswitchd instead. - New ovs-benchmark utility to test flow setup performance. - A new log level "off" has been added. Configuring a log facility "off" prevents any messages from being logged to it. Previously, "emer" was effectively "off" because no messages were ever logged at level "emer". Now, errors that cause a process to exit are logged at "emer" level. - "configure" option --with-l26 has been renamed --with-linux, and --with-l26-source has been renamed --with-linux-source. The old names will be removed after the next release, so please update your scripts. - The "-2.6" suffix has been dropped from the datapath/linux-2.6 and datapath/linux-2.6/compat-2.6 directories. - Feature removals: - Dropped support for "tun_id_from_cookie" OpenFlow extension. Please use the extensible match extensions instead. - Removed the Maintenance_Point and Monitor tables in an effort to simplify 802.1ag configuration. - Performance and scalability improvements - Bug fixes v1.1.0 - 05 Apr 2011 ------------------------ - Ability to define policies over IPv6 - LACP - 802.1ag CCM - Support for extensible match extensions to OpenFlow - QoS: - Support for HFSC qdisc. - Queue used by in-band control can now be configured. - Kernel: - Kernel<->userspace interface has been reworked and should be close to a stable ABI now. - "Port group" concept has been dropped. - GRE over IPSEC tunnels - Bonding: - New active backup bonding mode. - New L4 hashing support when LACP is enabled. - Source MAC hash now includes VLAN field also. - miimon support. - Greatly improved handling of large flow tables - ovs-dpctl: - "show" command now prints full vport configuration. - "dump-groups" command removed since kernel support for port groups was dropped. - ovs-vsctl: - New commands for working with the new Managers table. - "list" command enhanced with new formatting options and --columns option. - "get" command now accepts new --id option. - New "find" command. - ovs-ofctl: - New "queue-stats" command for printing queue stats. - New commands "replace-flows" and "diff-flows". - Commands to add and remove flows can now read from files. - New --flow-format option to enable or disable NXM. - New --more option to increase OpenFlow message verbosity. - Removed "tun-cookie" command, which is no longer useful. - ovs-controller enhancements for testing various features. - New ovs-vlan-test command for testing for Linux kernel driver VLAN bugs. New ovs-vlan-bug-workaround command for enabling and disabling a workaround for these driver bugs. - OpenFlow support: - "Resubmit" actions now update flow statistics. - New "register" extension for use in matching and actions, via NXM. - New "multipath" experimental action extension. - New support for matching multicast Ethernet frames, via NXM. - New extension for OpenFlow vendor error codes. - New extension to set the QoS output queue without actually sending to an output port. - Open vSwitch now reports a single flow table, instead of separate hash and wildcard tables. This better models the current implementation. - New experimental "note" action. - New "ofproto/trace" ovs-appctl command and associated utilities to ease debugging complex flow tables. - Database: - Schema documentation now includes an entity-relationship diagram. - The database is now garbage collected. In most tables, unreferenced rows will be deleted automatically. - Many tables now include statistics updated periodically by ovs-vswitchd or ovsdb-server. - Every table now has an "external-ids" column for use by OVS integrators. - There is no default controller anymore. Each bridge must have its controller individually specified. - The "fail-mode" is now a property of a Bridge instead of a Controller. - New versioning and checksum features. - New Managers table and manager_options column in Open_vSwitch table for specifying managers. The old "managers" column in the Open_vSwitch table has been removed. - Many "name" columns are now immutable. - Feature removals: - Dropped support for XenServer pre-5.6.100. - Dropped support for Linux pre-2.6.18. - Dropped controller discovery support. - Dropped "ovs-ofctl status" and the OpenFlow extension that it used. Statistics reporting in the database is a rough equivalent. - Dropped the "corekeeper" package (now separate, at http://openvswitch.org/cgi-bin/gitweb.cgi?p=corekeeper). - Performance and scalability improvements - Bug fixes v1.1.0pre2 - 13 Sep 2010 ------------------------ - Bug fixes v1.1.0pre1 - 31 Aug 2010 ------------------------ - OpenFlow 1.0 slicing (QoS) functionality - Python bindings for configuration database (no write support) - Performance and scalability improvements - Bug fixes v1.0.1 - 31 May 2010 -------------------- - New "patch" interface type - Bug fixes v1.0.0 - 15 May 2010 -------------------- - Configuration database with remote management - OpenFlow 1.0 - GRE tunneling - Support for XenServer 5.5 and 5.6 - Performance and scalability improvements - Bug fixes v0.99.2 - 18 Feb 2010 --------------------- - Bug fixes v0.99.1 - 25 Jan 2010 --------------------- - Add support for sFlow(R) - Make headers compatible with C++ - Bug fixes v0.99.0 - 14 Jan 2010 --------------------- - User-space forwarding engine - Bug fixes v0.90.7 - 29 Nov 2009 --------------------- - Add support for NetFlow active timeouts - Bug fixes v0.90.6 - 6 Oct 2009 -------------------- - Bug fixes v0.90.5 - 21 Sep 2009 --------------------- - Generalize in-band control to more diverse network setups - Bug fixes openvswitch-2.0.1+git20140120/NOTICE000066400000000000000000000012321226605124000163540ustar00rootroot00000000000000This file is included in compliance with the Apache 2.0 license, available at http://www.apache.org/licenses/LICENSE-2.0.html Open vSwitch Copyright (c) 2007, 2008, 2009, 2010, 2011 Nicira, Inc. Open vSwitch BSD port Copyright (c) 2011 Gaetano Catalli Apache Portable Runtime Copyright 2008 The Apache Software Foundation. This product includes software developed by The Apache Software Foundation (http://www.apache.org/). Portions of this software were developed at the National Center for Supercomputing Applications (NCSA) at the University of Illinois at Urbana-Champaign. lib/ovs.tmac includes troff macros written by Eric S. Raymond and Werner Lemberg. openvswitch-2.0.1+git20140120/OPENFLOW-1.1+000066400000000000000000000175521226605124000172100ustar00rootroot00000000000000 OpenFlow 1.1+ support in Open vSwitch ===================================== Open vSwitch support for OpenFlow 1.1, 1.2, and 1.3 is a work in progress. This file describes the work still to be done. The Plan -------- OpenFlow version support is not a build-time option. A single build of Open vSwitch must be able to handle all supported versions of OpenFlow. Ideally, even at runtime it should be able to support all protocol versions at the same time on different OpenFlow bridges (and perhaps even on the same bridge). At the same time, it would be a shame to litter the core of the OVS code with lots of ugly code concerned with the details of various OpenFlow protocol versions. The primary approach to compatibility is to abstract most of the details of the differences from the core code, by adding a protocol layer that translates between OF1.x and a slightly higher-level abstract representation. The core of this approach is the many struct ofputil_* structures in lib/ofp-util.h. As a consequence of this approach, OVS cannot use OpenFlow protocol definitions that closely resemble those in the OpenFlow specification, because openflow.h in different versions of the OpenFlow specification defines the same identifier with different values. Instead, openflow-common.h contains definitions that are common to all the specifications and separate protocol version-specific headers contain protocol-specific definitions renamed so as not to conflict, e.g. OFPAT10_ENQUEUE and OFPAT11_ENQUEUE for the OpenFlow 1.0 and 1.1 values for OFPAT_ENQUEUE. Generally, in cases of conflict, the protocol layer will define a more abstract OFPUTIL_* or struct ofputil_*. Here are the current approaches in a few tricky areas: * Port numbering. OpenFlow 1.0 has 16-bit port numbers and later OpenFlow versions have 32-bit port numbers. For now, OVS support for later protocol versions requires all port numbers to fall into the 16-bit range, translating the reserved OFPP_* port numbers. * Actions. OpenFlow 1.0 and later versions have very different ideas of actions. OVS reconciles by translating all the versions' actions (and instructions) to and from a common internal representation. OpenFlow 1.1 ------------ The list of remaining work items for OpenFlow 1.1 is below. It is probably incomplete. * Implement Write-Actions instruction. [required for 1.1+] * The new in_phy_port field in OFPT_PACKET_IN needs some kind of implementation. It has a sensible interpretation for tunnels but in general the physical port is not in the datapath for OVS so the value is not necessarily meaningful. We might have to just fix it as the same as in_port. [required for OF1.1; optional for OF1.2+] * OFPT_TABLE_MOD message. This is new in OF1.1, so we need to implement it. It should be implemented so that the default OVS behavior does not change. [required for OF1.1 and OF1.2] * Flow table stats (OFPST_TABLE). * Reference count (active entries) [implemented] [required for OF1.1 and OF1.2] * Packet Lookups [required for OF1.1; optional for OF1.2] * Packet Matches [required for OF1.1; optional for OF1.2] * MPLS. Simon Horman maintains a patch series that adds this feature. This is partially merged. [optional for OF1.1+] * Match and set double-tagged VLANs (QinQ). This requires kernel work for reasonable performance. [optional for OF1.1+] * VLANs tagged with 88a8 Ethertype. This requires kernel work for reasonable performance. [required for OF1.1+] * Groups. * Type all [required for OF1.1+] * Type select [optional for OF1.1+] * Type indirect [required for OF1.1+] * Type fast failover [optional for OF1.1+] * Statistics [optional for OF1.1+] OpenFlow 1.2 ------------ OpenFlow 1.2 support requires OpenFlow 1.1 as a prerequisite, plus the following additional work. (This is based on the change log at the end of the OF1.2 spec. I didn't compare the specs carefully yet.) * OFPT_FLOW_MOD: * Add ability to delete flow in all tables. [required for OF1.2+] * Update DESIGN to describe OF1.2 behavior also. [required for OF1.2+] OpenFlow 1.3 ------------ OpenFlow 1.3 support requires OpenFlow 1.2 as a prerequisite, plus the following additional work. (This is based on the change log at the end of the OF1.3 spec, reusing most of the section titles directly. I didn't compare the specs carefully yet.) * Send errors for unsupported multipart requests. [required for OF1.3+] * Add support for multipart requests. [optional for OF1.3+] * Add OFPMP_TABLE_FEATURES statistics. [optional for OF1.3+] * More flexible table miss support. [required for OF1.3+] * IPv6 extension header handling support. Fully implementing this requires kernel support. This likely will take some careful and probably time-consuming design work. The actual coding, once that is all done, is probably 2 or 3 days work. [optional for OF1.3+] * Per-flow meters. Similar to IPv6 extension headers in kernel and design requirements. Might be politically difficult to add directly to the kernel module, since its functionality overlaps with tc. Ideally, therefore, we could implement these somehow with tc, but I haven't investigated whether that makes sense. [optional for OF1.3+] * Per-connection event filtering. OF1.3 adopted Open vSwitch's existing design for this feature so implementation should be easy. [required for OF1.3+] * Auxiliary connections. An implementation in generic code might be a week's worth of work. The value of an implementation in generic code is questionable, though, since much of the benefit of axuiliary connections is supposed to be to take advantage of hardware support. (We could make the kernel module somehow send packets across the auxiliary connections directly, for some kind of "hardware" support, if we judged it useful enough.) [optional for OF1.3+] * MPLS BoS matching. (Included in Simon's MPLS series?) [optional for OF1.3+] * Provider Backbone Bridge tagging. I don't plan to implement this (but we'd accept an implementation). [optional for OF1.3+] * Rework tag order. I'm not sure whether we need to do anything for this. Part of MPLS patchset by Simon Horman. [required for v1.3+] * On-demand flow counters. I think this might be a real optimization in some cases for the software switch. [optional for OF1.3+] * Duration Statistics - New for Per Port, Per Queue, Per Group [required for v1.3+] How to contribute ----------------- If you plan to contribute code for a feature, please let everyone know on ovs-dev before you start work. This will help avoid duplicating work. Please consider the following: * Testing. Please test your code. * Unit tests. Please consider writing some. The tests directory has many examples that you can use as a starting point. * ovs-ofctl. If you add a feature that is useful for some ovs-ofctl command then you should add support for it there. * Documentation. If you add a user-visible feature, then you should document it in the appropriate manpage and mention it in NEWS as well. * Coding style (see the CodingStyle file at the top of the source tree). * The patch submission guidelines (see SubmittingPatches). I recommend using "git send-email", which automatically follows a lot of those guidelines. Bug Reporting ------------- Please report problems to bugs@openvswitch.org. openvswitch-2.0.1+git20140120/PORTING000066400000000000000000000343671226605124000165340ustar00rootroot00000000000000 How to Port Open vSwitch to New Software or Hardware ==================================================== Open vSwitch (OVS) is intended to be easily ported to new software and hardware platforms. This document describes the types of changes that are most likely to be necessary in porting OVS to Unix-like platforms. (Porting OVS to other kinds of platforms is likely to be more difficult.) Vocabulary ---------- For historical reasons, different words are used for essentially the same concept in different areas of the Open vSwitch source tree. Here is a concordance, indexed by the area of the source tree: datapath/ vport --- vswitchd/ iface port ofproto/ port bundle lib/bond.c slave bond lib/lacp.c slave lacp lib/netdev.c netdev --- database Interface Port Open vSwitch Architectural Overview ----------------------------------- The following diagram shows the very high-level architecture of Open vSwitch from a porter's perspective. +-------------------+ | ovs-vswitchd |<-->ovsdb-server +-------------------+ | ofproto |<-->OpenFlow controllers +--------+-+--------+ | netdev | | ofproto| +--------+ |provider| | netdev | +--------+ |provider| +--------+ Some of the components are generic. Modulo bugs or inadequacies, these components should not need to be modified as part of a port: - "ovs-vswitchd" is the main Open vSwitch userspace program, in vswitchd/. It reads the desired Open vSwitch configuration from the ovsdb-server program over an IPC channel and passes this configuration down to the "ofproto" library. It also passes certain status and statistical information from ofproto back into the database. - "ofproto" is the Open vSwitch library, in ofproto/, that implements an OpenFlow switch. It talks to OpenFlow controllers over the network and to switch hardware or software through an "ofproto provider", explained further below. - "netdev" is the Open vSwitch library, in lib/netdev.c, that abstracts interacting with network devices, that is, Ethernet interfaces. The netdev library is a thin layer over "netdev provider" code, explained further below. The other components may need attention during a port. You will almost certainly have to implement a "netdev provider". Depending on the type of port you are doing and the desired performance, you may also have to implement an "ofproto provider" or a lower-level component called a "dpif" provider. The following sections talk about these components in more detail. Writing a netdev Provider ------------------------- A "netdev provider" implements an operating system and hardware specific interface to "network devices", e.g. eth0 on Linux. Open vSwitch must be able to open each port on a switch as a netdev, so you will need to implement a "netdev provider" that works with your switch hardware and software. struct netdev_class, in lib/netdev-provider.h, defines the interfaces required to implement a netdev. That structure contains many function pointers, each of which has a comment that is meant to describe its behavior in detail. If the requirements are unclear, please report this as a bug. The netdev interface can be divided into a few rough categories: * Functions required to properly implement OpenFlow features. For example, OpenFlow requires the ability to report the Ethernet hardware address of a port. These functions must be implemented for minimally correct operation. * Functions required to implement optional Open vSwitch features. For example, the Open vSwitch support for in-band control requires netdev support for inspecting the TCP/IP stack's ARP table. These functions must be implemented if the corresponding OVS features are to work, but may be omitted initially. * Functions needed in some implementations but not in others. For example, most kinds of ports (see below) do not need functionality to receive packets from a network device. The existing netdev implementations may serve as useful examples during a port: * lib/netdev-linux.c implements netdev functionality for Linux network devices, using Linux kernel calls. It may be a good place to start for full-featured netdev implementations. * lib/netdev-vport.c provides support for "virtual ports" implemented by the Open vSwitch datapath module for the Linux kernel. This may serve as a model for minimal netdev implementations. * lib/netdev-dummy.c is a fake netdev implementation useful only for testing. Porting Strategies ------------------ After a netdev provider has been implemented for a system's network devices, you may choose among three basic porting strategies. The lowest-effort strategy is to use the "userspace switch" implementation built into Open vSwitch. This ought to work, without writing any more code, as long as the netdev provider that you implemented supports receiving packets. It yields poor performance, however, because every packet passes through the ovs-vswitchd process. See INSTALL.userspace for instructions on how to configure a userspace switch. If the userspace switch is not the right choice for your port, then you will have to write more code. You may implement either an "ofproto provider" or a "dpif provider". Which you should choose depends on a few different factors: * Only an ofproto provider can take full advantage of hardware with built-in support for wildcards (e.g. an ACL table or a TCAM). * A dpif provider can take advantage of the Open vSwitch built-in implementations of bonding, LACP, 802.1ag, 802.1Q VLANs, and other features. An ofproto provider has to provide its own implementations, if the hardware can support them at all. * A dpif provider is usually easier to implement, but most appropriate for software switching. It "explodes" wildcard rules into exact-match entries (with an optional wildcard mask). This allows fast hash lookups in software, but makes inefficient use of TCAMs in hardware that support wildcarding. The following sections describe how to implement each kind of port. ofproto Providers ----------------- An "ofproto provider" is what ofproto uses to directly monitor and control an OpenFlow-capable switch. struct ofproto_class, in ofproto/ofproto-provider.h, defines the interfaces to implement an ofproto provider for new hardware or software. That structure contains many function pointers, each of which has a comment that is meant to describe its behavior in detail. If the requirements are unclear, please report this as a bug. The ofproto provider interface is preliminary. Please let us know if it seems unsuitable for your purpose. We will try to improve it. Writing a dpif Provider ----------------------- Open vSwitch has a built-in ofproto provider named "ofproto-dpif", which is built on top of a library for manipulating datapaths, called "dpif". A "datapath" is a simple flow table, one that is only required to support exact-match flows, that is, flows without wildcards. When a packet arrives on a network device, the datapath looks for it in this table. If there is a match, then it performs the associated actions. If there is no match, the datapath passes the packet up to ofproto-dpif, which maintains the full OpenFlow flow table. If the packet matches in this flow table, then ofproto-dpif executes its actions and inserts a new entry into the dpif flow table. (Otherwise, ofproto-dpif passes the packet up to ofproto to send the packet to the OpenFlow controller, if one is configured.) When calculating the dpif flow, ofproto-dpif generates an exact-match flow that describes the missed packet. It makes an effort to figure out what fields can be wildcarded based on the switch's configuration and OpenFlow flow table. The dpif is free to ignore the suggested wildcards and only support the exact-match entry. However, if the dpif supports wildcarding, then it can use the masks to match multiple flows with fewer entries and potentially significantly reduce the number of flow misses handled by ofproto-dpif. The "dpif" library in turn delegates much of its functionality to a "dpif provider". The following diagram shows how dpif providers fit into the Open vSwitch architecture: _ | +-------------------+ | | ovs-vswitchd |<-->ovsdb-server | +-------------------+ | | ofproto |<-->OpenFlow controllers | +--------+-+--------+ _ | | netdev | |ofproto-| | userspace | +--------+ | dpif | | | | netdev | +--------+ | | |provider| | dpif | | | +---||---+ +--------+ | | || | dpif | | implementation of | || |provider| | ofproto provider |_ || +---||---+ | || || | _ +---||-----+---||---+ | | | |datapath| | kernel | | +--------+ _| | | | |_ +--------||---------+ || physical NIC struct dpif_class, in lib/dpif-provider.h, defines the interfaces required to implement a dpif provider for new hardware or software. That structure contains many function pointers, each of which has a comment that is meant to describe its behavior in detail. If the requirements are unclear, please report this as a bug. There are two existing dpif implementations that may serve as useful examples during a port: * lib/dpif-linux.c is a Linux-specific dpif implementation that talks to an Open vSwitch-specific kernel module (whose sources are in the "datapath" directory). The kernel module performs all of the switching work, passing packets that do not match any flow table entry up to userspace. This dpif implementation is essentially a wrapper around calls into the kernel module. * lib/dpif-netdev.c is a generic dpif implementation that performs all switching internally. This is how the Open vSwitch userspace switch is implemented. Miscellaneous Notes ------------------- Open vSwitch source code uses uint16_t, uint32_t, and uint64_t as fixed-width types in host byte order, and ovs_be16, ovs_be32, and ovs_be64 as fixed-width types in network byte order. Each of the latter is equivalent to the one of the former, but the difference in name makes the intended use obvious. The default "fail-mode" for Open vSwitch bridges is "standalone", meaning that, when the OpenFlow controllers cannot be contacted, Open vSwitch acts as a regular MAC-learning switch. This works well in virtualization environments where there is normally just one uplink (either a single physical interface or a bond). In a more general environment, it can create loops. So, if you are porting to a general-purpose switch platform, you should consider changing the default "fail-mode" to "secure", which does not behave this way. See documentation for the "fail-mode" column in the Bridge table in ovs-vswitchd.conf.db(5) for more information. lib/entropy.c assumes that it can obtain high-quality random number seeds at startup by reading from /dev/urandom. You will need to modify it if this is not true on your platform. vswitchd/system-stats.c only knows how to obtain some statistics on Linux. Optionally you may implement them for your platform as well. Why OVS Does Not Support Hybrid Providers ----------------------------------------- The "Porting Strategies" section above describes the "ofproto provider" and "dpif provider" porting strategies. Only an ofproto provider can take advantage of hardware TCAM support, and only a dpif provider can take advantage of the OVS built-in implementations of various features. It is therefore tempting to suggest a hybrid approach that shares the advantages of both strategies. However, Open vSwitch does not support a hybrid approach. Doing so may be possible, with a significant amount of extra development work, but it does not yet seem worthwhile, for the reasons explained below. First, user surprise is likely when a switch supports a feature only with a high performance penalty. For example, one user questioned why adding a particular OpenFlow action to a flow caused a 1,058x slowdown on a hardware OpenFlow implementation [1]. The action required the flow to be implemented in software. Given that implementing a flow in software on the slow management CPU of a hardware switch causes a major slowdown, software-implemented flows would only make sense for very low-volume traffic. But many of the features built into the OVS software switch implementation would need to apply to every flow to be useful. There is no value, for example, in applying bonding or 802.1Q VLAN support only to low-volume traffic. Besides supporting features of OpenFlow actions, a hybrid approach could also support forms of matching not supported by particular switching hardware, by sending all packets that might match a rule to software. But again this can cause an unacceptable slowdown by forcing bulk traffic through software in the hardware switch's slow management CPU. Consider, for example, a hardware switch that can match on the IPv6 Ethernet type but not on fields in IPv6 headers. An OpenFlow table that matched on the IPv6 Ethernet type would perform well, but adding a rule that matched only UDPv6 would force every IPv6 packet to software, slowing down not just UDPv6 but all IPv6 processing. [1] Aaron Rosen, "Modify packet fields extremely slow", openflow-discuss mailing list, June 26, 2011, archived at https://mailman.stanford.edu/pipermail/openflow-discuss/2011-June/002386.html. Questions --------- Please direct porting questions to dev@openvswitch.org. We will try to use questions to improve this porting guide. openvswitch-2.0.1+git20140120/README000066400000000000000000000102421226605124000163310ustar00rootroot00000000000000 Open vSwitch What is Open vSwitch? --------------------- Open vSwitch is a multilayer software switch licensed under the open source Apache 2 license. Our goal is to implement a production quality switch platform that supports standard management interfaces and opens the forwarding functions to programmatic extension and control. Open vSwitch is well suited to function as a virtual switch in VM environments. In addition to exposing standard control and visibility interfaces to the virtual networking layer, it was designed to support distribution across multiple physical servers. Open vSwitch supports multiple Linux-based virtualization technologies including Xen/XenServer, KVM, and VirtualBox. The bulk of the code is written in platform-independent C and is easily ported to other environments. The current release of Open vSwitch supports the following features: * Standard 802.1Q VLAN model with trunk and access ports * NIC bonding with or without LACP on upstream switch * NetFlow, sFlow(R), and mirroring for increased visibility * QoS (Quality of Service) configuration, plus policing * GRE, GRE over IPSEC, VXLAN, and LISP tunneling * 802.1ag connectivity fault management * OpenFlow 1.0 plus numerous extensions * Transactional configuration database with C and Python bindings * High-performance forwarding using a Linux kernel module The included Linux kernel module supports Linux 2.6.32 and up, with testing focused on 2.6.32 with Centos and Xen patches. Open vSwitch also has special support for Citrix XenServer and Red Hat Enterprise Linux hosts. Open vSwitch can also operate, at a cost in performance, entirely in userspace, without assistance from a kernel module. This userspace implementation should be easier to port than the kernel-based switch. It is considered experimental. What's here? ------------ The main components of this distribution are: * ovs-vswitchd, a daemon that implements the switch, along with a companion Linux kernel module for flow-based switching. * ovsdb-server, a lightweight database server that ovs-vswitchd queries to obtain its configuration. * ovs-dpctl, a tool for configuring the switch kernel module. * Scripts and specs for building RPMs for Citrix XenServer and Red Hat Enterprise Linux. The XenServer RPMs allow Open vSwitch to be installed on a Citrix XenServer host as a drop-in replacement for its switch, with additional functionality. * ovs-vsctl, a utility for querying and updating the configuration of ovs-vswitchd. * ovs-appctl, a utility that sends commands to running Open vSwitch daemons. * ovsdbmonitor, a GUI tool for remotely viewing OVS databases and OpenFlow flow tables. Open vSwitch also provides some tools: * ovs-controller, a simple OpenFlow controller. * ovs-ofctl, a utility for querying and controlling OpenFlow switches and controllers. * ovs-pki, a utility for creating and managing the public-key infrastructure for OpenFlow switches. * A patch to tcpdump that enables it to parse OpenFlow messages. What other documentation is available? -------------------------------------- To install Open vSwitch on a regular Linux or FreeBSD host, please read INSTALL. For specifics around installation on a specific platform, please see one of these files: - INSTALL.Debian - INSTALL.Fedora - INSTALL.RHEL - INSTALL.XenServer To use Open vSwitch... - ...with KVM on Linux, read INSTALL, read INSTALL.KVM. - ...with Libvirt, read INSTALL.Libvirt. - ...without using a kernel module, read INSTALL.userspace. For answers to common questions, read FAQ. To learn how to set up SSL support for Open vSwitch, read INSTALL.SSL. To learn about some advanced features of the Open vSwitch software switch, read the tutorial in tutorial/Tutorial. Each Open vSwitch userspace program is accompanied by a manpage. Many of the manpages are customized to your configuration as part of the build process, so we recommend building Open vSwitch before reading the manpages. Contact ------- bugs@openvswitch.org http://openvswitch.org/ openvswitch-2.0.1+git20140120/README-OFTest000066400000000000000000000046401226605124000175000ustar00rootroot00000000000000 How to Use OFTest With Open vSwitch =================================== This document describes how to use the OFTest OpenFlow protocol testing suite with Open vSwitch in "dummy mode". In this mode of testing, no packets travel across physical or virtual networks. Instead, Unix domain sockets stand in as simulated networks. This simulation is imperfect, but it is much easier to set up, does not require extra physical or virtual hardware, and does not require supervisor privileges. Prerequisites ------------- First, build Open vSwitch according to the instructions in INSTALL. You need not install it. Second, obtain a copy of OFTest and install its prerequisites. You need a copy of OFTest that includes commit 406614846c5 (make ovs-dummy platform work again). This commit was merged into the OFTest repository on Feb 1, 2013, so any copy of OFTest more recent than that should work. Testing OVS in dummy mode does not require root privilege, so you may ignore that requirement. Optionally, add the top-level OFTest directory (containing the "oft" program) to your $PATH. This slightly simplifies running OFTest later. Running OFTest -------------- To run OFTest in dummy mode, run the following command from your Open vSwitch build directory: make check-oftest OFT= where is the absolute path to the "oft" program in OFTest. If you added "oft" to your $PATH, you may omit the OFT variable assignment: make check-oftest By default, "check-oftest" passes "oft" just enough options to enable dummy mode. You can use OFTFLAGS to pass additional options. For example, to run just the basic.Echo test instead of all tests (the default) and enable verbose logging: make check-oftest OFT= OFTFLAGS='--verbose -T basic.Echo' Interpreting OFTest Results --------------------------- Please interpret OFTest results cautiously. Open vSwitch can fail a given test in OFTest for many reasons, including bugs in Open vSwitch, bugs in OFTest, bugs in the "dummy mode" integration, and differing interpretations of the OpenFlow standard and other standards. Open vSwitch has not been validated against OFTest. Please do report test failures that you believe to represent bugs in Open vSwitch. Include the precise versions of Open vSwitch and OFTest in your bug report, plus any other information needed to reproduce the problem. Contact ------- bugs@openvswitch.org http://openvswitch.org/ openvswitch-2.0.1+git20140120/README-gcov000066400000000000000000000006271226605124000172730ustar00rootroot00000000000000Building with gcov support ========================== The Open vSwitch "configure" script supports the following code-coverage related options: --disable-coverage --enable-coverage=no Do not build with gcov code coverage support. This is the default if no coverage option is passed to "configure". --enable-coverage --enable-coverage=yes Build with gcov code coverage support. openvswitch-2.0.1+git20140120/README-lisp000066400000000000000000000066331226605124000173070ustar00rootroot00000000000000Using LISP tunneling ==================== LISP is a layer 3 tunneling mechanism, meaning that encapsulated packets do not carry Ethernet headers, and ARP requests shouldn't be sent over the tunnel. Because of this, there are some additional steps required for setting up LISP tunnels in Open vSwitch, until support for L3 tunnels will improve. This guide assumes tunneling between two VMs connected to OVS bridges on different hypervisors reachable over IPv4. Of course, more than one VM may be connected to any of the hypervisors, and a hypervisor may communicate with several different hypervisors over the same lisp tunneling interface. A LISP "map-cache" can be implemented using flows, see example at the bottom of this file. There are several scenarios: 1) the VMs have IP addresses in the same subnet and the hypervisors are also in a single subnet (although one different from the VM's); 2) the VMs have IP addresses in the same subnet but the hypervisors are separated by a router; 3) the VMs are in different subnets. In cases 1) and 3) ARP resolution can work as normal: ARP traffic is configured not to go through the LISP tunnel. For case 1) ARP is able to reach the other VM, if both OVS instances default to MAC address learning. Case 3) requires the hypervisor be configured as the default router for the VMs. In case 2) the VMs expect ARP replies from each other, but this is not possible over a layer 3 tunnel. One solution is to have static MAC address entries preconfigured on the VMs (e.g., `arp -f /etc/ethers` on startup on Unix based VMs), or have the hypervisor do proxy ARP. On the receiving side, the packet arrives without the original MAC header. The LISP tunneling code attaches a header with harcoded source and destination MAC address 02:00:00:00:00:00. This address has all bits set to 0, except the locally administered bit, in order to avoid potential collisions with existing allocations. In order for packets to reach their intended destination, the destination MAC address needs to be rewritten. This can be done using the flow table. See below for an example setup, and the associated flow rules to enable LISP tunneling. +---+ +---+ |VM1| |VM2| +---+ +---+ | | +--[tap0]--+ +--[tap0]---+ | | | | [lisp0] OVS1 [eth0]-----------------[eth0] OVS2 [lisp0] | | | | +----------+ +-----------+ On each hypervisor, interfaces tap0, eth0, and lisp0 are added to a single bridge instance, and become numbered 1, 2, and 3 respectively: ovs-vsctl add-br br0 ovs-vsctl add-port br0 tap0 ovs-vsctl add-port br0 eth0 ovs-vsctl add-port br0 lisp0 -- set Interface lisp0 type=lisp options:remote_ip=flow options:key=flow Flows on br0 are configured as follows: priority=3,dl_dst=02:00:00:00:00:00,action=mod_dl_dst:,output:1 priority=2,in_port=1,dl_type=0x0806,action=NORMAL priority=1,in_port=1,dl_type=0x0800,vlan_tci=0,nw_src=,action=set_field:->tun_dst,output:3 priority=0,action=NORMAL Optionally, if you want to use Instance ID in a flow, you can set it with "action=set_tunnel:". openvswitch-2.0.1+git20140120/REPORTING-BUGS000066400000000000000000000033211226605124000175030ustar00rootroot00000000000000Reporting Bugs in Open vSwitch ============================== We are eager to hear from users about problems that they have encountered with Open vSwitch. This file documents how best to report bugs so as to ensure that they can be fixed as quickly as possible. Please report bugs by sending email to bugs@openvswitch.org. The most important parts of your bug report are the following: * What you did that make the problem appear. * What you expected to happen. * What actually happened. Please also include the following information: * The Open vSwitch version number (as output by "ovs-vswitchd --version"). * The Git commit number (as output by "git rev-parse HEAD"), if you built from a Git snapshot. * Any local patches or changes you have applied (if any). The following are also handy sometimes: * The kernel version on which Open vSwitch is running (from /proc/version) and the distribution and version number of your OS (e.g. "Centos 5.0"). * The contents of the vswitchd configuration database (usually /etc/openvswitch/conf.db). * The output of "ovs-dpctl show". * If you have Open vSwitch configured to connect to an OpenFlow controller, the output of "ovs-ofctl show " for each configured in the vswitchd configuration database. * A fix or workaround, if you have one. * Any other information that you think might be relevant. bugs@openvswitch.org is a public mailing list, to which anyone can subscribe, so please do not include confidential information in your bug report. Contact ------- bugs@openvswitch.org http://openvswitch.org/ openvswitch-2.0.1+git20140120/SubmittingPatches000066400000000000000000000177141226605124000210440ustar00rootroot00000000000000How to Submit Patches for Open vSwitch ====================================== Send changes to Open vSwitch as patches to dev@openvswitch.org. One patch per email, please. More details are included below. If you are using Git, then "git format-patch" takes care of most of the mechanics described below for you. Before You Start ---------------- Before you send patches at all, make sure that each patch makes sense. In particular: - A given patch should not break anything, even if later patches fix the problems that it causes. The source tree should still build and work after each patch is applied. (This enables "git bisect" to work best.) - A patch should make one logical change. Don't make multiple, logically unconnected changes to disparate subsystems in a single patch. - A patch that adds or removes user-visible features should also update the appropriate user documentation or manpages. Testing is also important: - A patch that adds or deletes files should be tested with "make distcheck" before submission. - A patch that modifies Linux kernel code should be at least build-tested on various Linux kernel versions before submission. I suggest versions 2.6.32 and whatever the current latest release version is at the time. - A patch that modifies the ofproto or vswitchd code should be tested in at least simple cases before submission. - A patch that modifies xenserver code should be tested on XenServer before submission. Email Subject ------------- The subject line of your email should be in the following format: [PATCH /] : - [PATCH /] indicates that this is the nth of a series of m patches. It helps reviewers to read patches in the correct order. You may omit this prefix if you are sending only one patch. - : indicates the area of the Open vSwitch to which the change applies (often the name of a source file or a directory). You may omit it if the change crosses multiple distinct pieces of code. - briefly describes the change. The subject, minus the [PATCH /] prefix, becomes the first line of the commit's change log message. Description ----------- The body of the email should start with a more thorough description of the change. This becomes the body of the commit message, following the subject. There is no need to duplicate the summary given in the subject. Please limit lines in the description to 79 characters in width. The description should include: - The rationale for the change. - Design description and rationale (but this might be better added as code comments). - Testing that you performed (or testing that should be done but you could not for whatever reason). There is no need to describe what the patch actually changed, if the reader can see it for himself. If the patch refers to a commit already in the Open vSwitch repository, please include both the commit number and the subject of the patch, e.g. 'commit 632d136c (vswitch: Remove restriction on datapath names.)'. If you, the person sending the patch, did not write the patch yourself, then the very first line of the body should take the form "From: ", followed by a blank line. This will automatically cause the named author to be credited with authorship in the repository. If others contributed to the patch, but are not the main authors, then please credit them as part of the description (e.g. "Thanks to Bob J. User for reporting this bug."). Please sign off on the patch as a submitter, and be sure to have the author(s) sign off for patches that you did not author. Simply include your name and email address as the last line of the commit message before any comments (and author too, if that is not you): Signed-off-by: Author Name Signed-off-by: Submitter Name By doing this, you are agreeing to the Developer's Certificate of Origin (see below for more details). Developer's Certificate of Origin --------------------------------- To help track the author of a patch as well as the submission chain, and be clear that the developer has authority to submit a patch for inclusion in openvswitch please sign off your work. The sign off certifies the following: Developer's Certificate of Origin 1.1 By making a contribution to this project, I certify that: (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. Comments -------- If you want to include any comments in your email that should not be part of the commit's change log message, put them after the description, separated by a line that contains just "---". It may be helpful to include a diffstat here for changes that touch multiple files. Patch ----- The patch should be in the body of the email following the descrition, separated by a blank line. Patches should be in "diff -up" format. We recommend that you use Git to produce your patches, in which case you should use the -M -C options to "git diff" (or other Git tools) if your patch renames or copies files. Quilt (http://savannah.nongnu.org/projects/quilt) might be useful if you do not want to use Git. Patches should be inline in the email message. Some email clients corrupt white space or wrap lines in patches. There are hints on how to configure many email clients to avoid this problem at: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=blob_plain;f=Documentation/email-clients.txt If you cannot convince your email client not to mangle patches, then sending the patch as an attachment is a second choice. Please follow the style used in the code that you are modifying. The CodingStyle file describes the coding style used in most of Open vSwitch. Use Linux kernel coding style for Linux kernel code. Example ------- From fa29a1c2c17682879e79a21bb0cdd5bbe67fa7c0 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Thu, 8 Dec 2011 13:17:24 -0800 Subject: [PATCH] datapath: Alphabetize include/net/ipv6.h compat header. Signed-off-by: Jesse Gross --- datapath/linux/Modules.mk | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk index fdd952e..f6cb88e 100644 --- a/datapath/linux/Modules.mk +++ b/datapath/linux/Modules.mk @@ -56,11 +56,11 @@ openvswitch_headers += \ linux/compat/include/net/dst.h \ linux/compat/include/net/genetlink.h \ linux/compat/include/net/ip.h \ + linux/compat/include/net/ipv6.h \ linux/compat/include/net/net_namespace.h \ linux/compat/include/net/netlink.h \ linux/compat/include/net/protocol.h \ linux/compat/include/net/route.h \ - linux/compat/include/net/ipv6.h \ linux/compat/genetlink.inc both_modules += brcompat -- 1.7.7.3 openvswitch-2.0.1+git20140120/WHY-OVS000066400000000000000000000127061226605124000165170ustar00rootroot00000000000000 Why Open vSwitch? ================= Hypervisors need the ability to bridge traffic between VMs and with the outside world. On Linux-based hypervisors, this used to mean using the built-in L2 switch (the Linux bridge), which is fast and reliable. So, it is reasonable to ask why Open vSwitch is used. The answer is that Open vSwitch is targeted at multi-server virtualization deployments, a landscape for which the previous stack is not well suited. These environments are often characterized by highly dynamic end-points, the maintenance of logical abstractions, and (sometimes) integration with or offloading to special purpose switching hardware. The following characteristics and design considerations help Open vSwitch cope with the above requirements. * The mobility of state: All network state associated with a network entity (say a virtual machine) should be easily identifiable and migratable between different hosts. This may include traditional "soft state" (such as an entry in an L2 learning table), L3 forwarding state, policy routing state, ACLs, QoS policy, monitoring configuration (e.g. NetFlow, IPFIX, sFlow), etc. Open vSwitch has support for both configuring and migrating both slow (configuration) and fast network state between instances. For example, if a VM migrates between end-hosts, it is possible to not only migrate associated configuration (SPAN rules, ACLs, QoS) but any live network state (including, for example, existing state which may be difficult to reconstruct). Further, Open vSwitch state is typed and backed by a real data-model allowing for the development of structured automation systems. * Responding to network dynamics: Virtual environments are often characterized by high-rates of change. VMs coming and going, VMs moving backwards and forwards in time, changes to the logical network environments, and so forth. Open vSwitch supports a number of features that allow a network control system to respond and adapt as the environment changes. This includes simple accounting and visibility support such as NetFlow, IPFIX, and sFlow. But perhaps more useful, Open vSwitch supports a network state database (OVSDB) that supports remote triggers. Therefore, a piece of orchestration software can "watch" various aspects of the network and respond if/when they change. This is used heavily today, for example, to respond to and track VM migrations. Open vSwitch also supports OpenFlow as a method of exporting remote access to control traffic. There are a number of uses for this including global network discovery through inspection of discovery or link-state traffic (e.g. LLDP, CDP, OSPF, etc.). * Maintenance of logical tags: Distributed virtual switches (such as VMware vDS and Cisco's Nexus 1000V) often maintain logical context within the network through appending or manipulating tags in network packets. This can be used to uniquely identify a VM (in a manner resistant to hardware spoofing), or to hold some other context that is only relevant in the logical domain. Much of the problem of building a distributed virtual switch is to efficiently and correctly manage these tags. Open vSwitch includes multiple methods for specifying and maintaining tagging rules, all of which are accessible to a remote process for orchestration. Further, in many cases these tagging rules are stored in an optimized form so they don't have to be coupled with a heavyweight network device. This allows, for example, thousands of tagging or address remapping rules to be configured, changed, and migrated. In a similar vein, Open vSwitch supports a GRE implementation that can handle thousands of simultaneous GRE tunnels and supports remote configuration for tunnel creation, configuration, and tear-down. This, for example, can be used to connect private VM networks in different data centers. * Hardware integration: Open vSwitch's forwarding path (the in-kernel datapath) is designed to be amenable to "offloading" packet processing to hardware chipsets, whether housed in a classic hardware switch chassis or in an end-host NIC. This allows for the Open vSwitch control path to be able to both control a pure software implementation or a hardware switch. There are many ongoing efforts to port Open vSwitch to hardware chipsets. These include multiple merchant silicon chipsets (Broadcom and Marvell), as well as a number of vendor-specific platforms. (The PORTING file discusses how one would go about making such a port.) The advantage of hardware integration is not only performance within virtualized environments. If physical switches also expose the Open vSwitch control abstractions, both bare-metal and virtualized hosting environments can be managed using the same mechanism for automated network control. In many ways, Open vSwitch targets a different point in the design space than previous hypervisor networking stacks, focusing on the need for automated and dynamic network control in large-scale Linux-based virtualization environments. The goal with Open vSwitch is to keep the in-kernel code as small as possible (as is necessary for performance) and to re-use existing subsystems when applicable (for example Open vSwitch uses the existing QoS stack). As of Linux 3.3, Open vSwitch is included as a part of the kernel and packaging for the userspace utilities are available on most popular distributions. openvswitch-2.0.1+git20140120/acinclude.m4000066400000000000000000000527221226605124000176530ustar00rootroot00000000000000# -*- autoconf -*- # Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. dnl OVS_ENABLE_WERROR AC_DEFUN([OVS_ENABLE_WERROR], [AC_ARG_ENABLE( [Werror], [AC_HELP_STRING([--enable-Werror], [Add -Werror to CFLAGS])], [], [enable_Werror=no]) AC_CONFIG_COMMANDS_PRE( [if test "X$enable_Werror" = Xyes; then CFLAGS="$CFLAGS -Werror" fi])]) dnl OVS_CHECK_LINUX dnl dnl Configure linux kernel source tree AC_DEFUN([OVS_CHECK_LINUX], [ AC_ARG_WITH([linux], [AC_HELP_STRING([--with-linux=/path/to/linux], [Specify the Linux kernel build directory])]) AC_ARG_WITH([linux-source], [AC_HELP_STRING([--with-linux-source=/path/to/linux-source], [Specify the Linux kernel source directory (usually figured out automatically from build directory)])]) # Deprecated equivalents to --with-linux, --with-linux-source. AC_ARG_WITH([l26]) AC_ARG_WITH([l26-source]) if test X"$with_linux" != X; then KBUILD=$with_linux elif test X"$with_l26" != X; then KBUILD=$with_l26 AC_MSG_WARN([--with-l26 is deprecated, please use --with-linux instead]) else KBUILD= fi if test X"$KBUILD" != X; then if test X"$with_linux_source" != X; then KSRC=$with_linux_source elif test X"$with_l26_source" != X; then KSRC=$with_l26_source AC_MSG_WARN([--with-l26-source is deprecated, please use --with-linux-source instead]) else KSRC= fi elif test X"$with_linux_source" != X || test X"$with_l26_source" != X; then AC_MSG_ERROR([Linux source directory may not be specified without Linux build directory]) fi if test -n "$KBUILD"; then KBUILD=`eval echo "$KBUILD"` case $KBUILD in /*) ;; *) KBUILD=`pwd`/$KBUILD ;; esac # The build directory is what the user provided. # Make sure that it exists. AC_MSG_CHECKING([for Linux build directory]) if test -d "$KBUILD"; then AC_MSG_RESULT([$KBUILD]) AC_SUBST(KBUILD) else AC_MSG_RESULT([no]) AC_ERROR([source dir $KBUILD doesn't exist]) fi # Debian breaks kernel headers into "source" header and "build" headers. # We want the source headers, but $KBUILD gives us the "build" headers. # Use heuristics to find the source headers. AC_MSG_CHECKING([for Linux source directory]) if test -n "$KSRC"; then KSRC=`eval echo "$KSRC"` case $KSRC in /*) ;; *) KSRC=`pwd`/$KSRC ;; esac if test ! -e $KSRC/include/linux/kernel.h; then AC_MSG_ERROR([$KSRC is not a kernel source directory]) fi else KSRC=$KBUILD if test ! -e $KSRC/include/linux/kernel.h; then # Debian kernel build Makefiles tend to include a line of the form: # MAKEARGS := -C /usr/src/linux-headers-3.2.0-1-common O=/usr/src/linux-headers-3.2.0-1-486 # First try to extract the source directory from this line. KSRC=`sed -n 's/.*-C \([[^ ]]*\).*/\1/p' "$KBUILD"/Makefile` if test ! -e "$KSRC"/include/linux/kernel.h; then # Didn't work. Fall back to name-based heuristics that used to work. case `echo "$KBUILD" | sed 's,/*$,,'` in # ( */build) KSRC=`echo "$KBUILD" | sed 's,/build/*$,/source,'` ;; # ( *) KSRC=`(cd $KBUILD && pwd -P) | sed 's,-[[^-]]*$,-common,'` ;; esac fi fi if test ! -e "$KSRC"/include/linux/kernel.h; then AC_MSG_ERROR([cannot find source directory (please use --with-linux-source)]) fi fi AC_MSG_RESULT([$KSRC]) AC_MSG_CHECKING([for kernel version]) version=`sed -n 's/^VERSION = //p' "$KSRC/Makefile"` patchlevel=`sed -n 's/^PATCHLEVEL = //p' "$KSRC/Makefile"` sublevel=`sed -n 's/^SUBLEVEL = //p' "$KSRC/Makefile"` if test X"$version" = X || test X"$patchlevel" = X; then AC_ERROR([cannot determine kernel version]) elif test X"$sublevel" = X; then kversion=$version.$patchlevel else kversion=$version.$patchlevel.$sublevel fi AC_MSG_RESULT([$kversion]) if test "$version" -ge 3; then if test "$version" = 3 && test "$patchlevel" -le 10; then : # Linux 3.x else AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version newer than 3.10.x is not supported]) fi else if test "$version" -le 1 || test "$patchlevel" -le 5 || test "$sublevel" -le 31; then AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version 2.6.32 or later is required]) else : # Linux 2.6.x fi fi if (test ! -e "$KBUILD"/include/linux/version.h && \ test ! -e "$KBUILD"/include/generated/uapi/linux/version.h)|| \ (test ! -e "$KBUILD"/include/linux/autoconf.h && \ test ! -e "$KBUILD"/include/generated/autoconf.h); then AC_MSG_ERROR([Linux kernel source in $KBUILD is not configured]) fi OVS_CHECK_LINUX_COMPAT fi AM_CONDITIONAL(LINUX_ENABLED, test -n "$KBUILD") ]) dnl OVS_GREP_IFELSE(FILE, REGEX, [IF-MATCH], [IF-NO-MATCH]) dnl dnl Greps FILE for REGEX. If it matches, runs IF-MATCH, otherwise IF-NO-MATCH. dnl If IF-MATCH is empty then it defines to OVS_DEFINE(HAVE_), with dnl translated to uppercase. AC_DEFUN([OVS_GREP_IFELSE], [ AC_MSG_CHECKING([whether $2 matches in $1]) if test -f $1; then grep '$2' $1 >/dev/null 2>&1 status=$? case $status in 0) AC_MSG_RESULT([yes]) m4_if([$3], [], [OVS_DEFINE([HAVE_]m4_toupper([$2]))], [$3]) ;; 1) AC_MSG_RESULT([no]) $4 ;; *) AC_MSG_ERROR([grep exited with status $status]) ;; esac else AC_MSG_RESULT([file not found]) $4 fi ]) dnl OVS_DEFINE(NAME) dnl dnl Defines NAME to 1 in kcompat.h. AC_DEFUN([OVS_DEFINE], [ echo '#define $1 1' >> datapath/linux/kcompat.h.new ]) AC_DEFUN([OVS_CHECK_LOG2_H], [ AC_MSG_CHECKING([for $KSRC/include/linux/log2.h]) if test -e $KSRC/include/linux/log2.h; then AC_MSG_RESULT([yes]) OVS_DEFINE([HAVE_LOG2_H]) else AC_MSG_RESULT([no]) fi ]) dnl OVS_CHECK_LINUX_COMPAT dnl dnl Runs various Autoconf checks on the Linux 2.6 kernel source in dnl the directory in $KBUILD. AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ rm -f datapath/linux/kcompat.h.new mkdir -p datapath/linux : > datapath/linux/kcompat.h.new OVS_GREP_IFELSE([$KSRC/arch/x86/include/asm/checksum_32.h], [src_err,], [OVS_DEFINE([HAVE_CSUM_COPY_DBG])]) OVS_GREP_IFELSE([$KSRC/include/linux/err.h], [ERR_CAST]) OVS_GREP_IFELSE([$KSRC/include/linux/etherdevice.h], [eth_hw_addr_random]) OVS_GREP_IFELSE([$KSRC/include/linux/if_vlan.h], [vlan_set_encap_proto]) OVS_GREP_IFELSE([$KSRC/include/linux/in.h], [ipv4_is_multicast]) OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h], [dev_disable_lro]) OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h], [dev_get_stats]) OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h], [dev_get_by_index_rcu]) OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h], [__skb_gso_segment]) OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h], [can_checksum_protocol]) OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h], [netdev_features_t]) OVS_GREP_IFELSE([$KSRC/include/linux/rcupdate.h], [rcu_read_lock_held], [], [OVS_GREP_IFELSE([$KSRC/include/linux/rtnetlink.h], [rcu_read_lock_held])]) # Check for the proto_data_valid member in struct sk_buff. The [^@] # is necessary because some versions of this header remove the # member but retain the kerneldoc comment that describes it (which # starts with @). The brackets must be doubled because of m4 # quoting rules. OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [[[^@]]proto_data_valid], [OVS_DEFINE([HAVE_PROTO_DATA_VALID])]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [raw], [OVS_DEFINE([HAVE_MAC_RAW])]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_dst(], [OVS_DEFINE([HAVE_SKB_DST_ACCESSOR_FUNCS])]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_copy_from_linear_data_offset]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_reset_tail_pointer]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_cow_head]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_transport_header], [OVS_DEFINE([HAVE_SKBUFF_HEADER_HELPERS])]) OVS_GREP_IFELSE([$KSRC/include/linux/icmpv6.h], [icmp6_hdr], [OVS_DEFINE([HAVE_ICMP6_HDR])]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_warn_if_lro], [OVS_DEFINE([HAVE_SKB_WARN_LRO])]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [consume_skb]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_frag_page]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_reset_mac_len]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_unclone]) OVS_GREP_IFELSE([$KSRC/include/linux/types.h], [bool], [OVS_DEFINE([HAVE_BOOL_TYPE])]) OVS_GREP_IFELSE([$KSRC/include/linux/types.h], [__wsum], [OVS_DEFINE([HAVE_CSUM_TYPES])]) OVS_GREP_IFELSE([$KSRC/include/uapi/linux/types.h], [__wsum], [OVS_DEFINE([HAVE_CSUM_TYPES])]) OVS_GREP_IFELSE([$KSRC/include/net/checksum.h], [csum_replace4]) OVS_GREP_IFELSE([$KSRC/include/net/checksum.h], [csum_unfold]) OVS_GREP_IFELSE([$KSRC/include/net/genetlink.h], [parallel_ops]) OVS_GREP_IFELSE([$KSRC/include/net/netlink.h], [nla_get_be16]) OVS_GREP_IFELSE([$KSRC/include/net/netlink.h], [nla_put_be16]) OVS_GREP_IFELSE([$KSRC/include/net/netlink.h], [nla_put_be32]) OVS_GREP_IFELSE([$KSRC/include/net/netlink.h], [nla_put_be64]) OVS_GREP_IFELSE([$KSRC/include/net/netlink.h], [nla_find_nested]) OVS_GREP_IFELSE([$KSRC/include/linux/if_vlan.h], [ADD_ALL_VLANS_CMD], [OVS_DEFINE([HAVE_VLAN_BUG_WORKAROUND])]) OVS_GREP_IFELSE([$KSRC/include/linux/percpu.h], [this_cpu_ptr]) OVS_GREP_IFELSE([$KSRC/include/linux/openvswitch.h], [openvswitch_handle_frame_hook], [OVS_DEFINE([HAVE_RHEL_OVS_HOOK])]) OVS_CHECK_LOG2_H if cmp -s datapath/linux/kcompat.h.new \ datapath/linux/kcompat.h >/dev/null 2>&1; then rm datapath/linux/kcompat.h.new else mv datapath/linux/kcompat.h.new datapath/linux/kcompat.h fi ]) dnl Checks for net/if_packet.h. AC_DEFUN([OVS_CHECK_IF_PACKET], [AC_CHECK_HEADER([net/if_packet.h], [HAVE_IF_PACKET=yes], [HAVE_IF_PACKET=no]) AM_CONDITIONAL([HAVE_IF_PACKET], [test "$HAVE_IF_PACKET" = yes]) if test "$HAVE_IF_PACKET" = yes; then AC_DEFINE([HAVE_IF_PACKET], [1], [Define to 1 if net/if_packet.h is available.]) fi]) dnl Checks for net/if_dl.h. dnl dnl (We use this as a proxy for checking whether we're building on FreeBSD dnl or NetBSD.) AC_DEFUN([OVS_CHECK_IF_DL], [AC_CHECK_HEADER([net/if_dl.h], [HAVE_IF_DL=yes], [HAVE_IF_DL=no]) AM_CONDITIONAL([HAVE_IF_DL], [test "$HAVE_IF_DL" = yes]) if test "$HAVE_IF_DL" = yes; then AC_DEFINE([HAVE_IF_DL], [1], [Define to 1 if net/if_dl.h is available.]) # On these platforms we use libpcap to access network devices. AC_SEARCH_LIBS([pcap_open_live], [pcap]) fi]) dnl Checks for buggy strtok_r. dnl dnl Some versions of glibc 2.7 has a bug in strtok_r when compiling dnl with optimization that can cause segfaults: dnl dnl http://sources.redhat.com/bugzilla/show_bug.cgi?id=5614. AC_DEFUN([OVS_CHECK_STRTOK_R], [AC_CACHE_CHECK( [whether strtok_r macro segfaults on some inputs], [ovs_cv_strtok_r_bug], [AC_RUN_IFELSE( [AC_LANG_PROGRAM([#include #include ], [[char string[] = ":::"; char *save_ptr = (char *) 0xc0ffee; char *token1, *token2; token1 = strtok_r(string, ":", &save_ptr); token2 = strtok_r(NULL, ":", &save_ptr); freopen ("/dev/null", "w", stdout); printf ("%s %s\n", token1, token2); return 0; ]])], [ovs_cv_strtok_r_bug=no], [ovs_cv_strtok_r_bug=yes], [ovs_cv_strtok_r_bug=yes])]) if test $ovs_cv_strtok_r_bug = yes; then AC_DEFINE([HAVE_STRTOK_R_BUG], [1], [Define if strtok_r macro segfaults on some inputs]) fi ]) dnl ---------------------------------------------------------------------- dnl These macros are from GNU PSPP, with the following original license: dnl Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, dnl with or without modifications, as long as this notice is preserved. AC_DEFUN([_OVS_CHECK_CC_OPTION], [dnl m4_define([ovs_cv_name], [ovs_cv_[]m4_translit([$1], [-], [_])])dnl AC_CACHE_CHECK([whether $CC accepts $1], [ovs_cv_name], [ovs_save_CFLAGS="$CFLAGS" dnl Include -Werror in the compiler options, because without -Werror dnl clang's GCC-compatible compiler driver does not return a failure dnl exit status even though it complains about options it does not dnl understand. CFLAGS="$CFLAGS $WERROR $1" AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,)], [ovs_cv_name[]=yes], [ovs_cv_name[]=no]) CFLAGS="$ovs_save_CFLAGS"]) if test $ovs_cv_name = yes; then m4_if([$2], [], [:], [$2]) else m4_if([$3], [], [:], [$3]) fi ]) dnl OVS_CHECK_WERROR dnl dnl Check whether the C compiler accepts -Werror. dnl Sets $WERROR to "-Werror", if so, and otherwise to the empty string. AC_DEFUN([OVS_CHECK_WERROR], [WERROR= _OVS_CHECK_CC_OPTION([-Werror], [WERROR=-Werror])]) dnl OVS_CHECK_CC_OPTION([OPTION], [ACTION-IF-ACCEPTED], [ACTION-IF-REJECTED]) dnl Check whether the given C compiler OPTION is accepted. dnl If so, execute ACTION-IF-ACCEPTED, otherwise ACTION-IF-REJECTED. AC_DEFUN([OVS_CHECK_CC_OPTION], [AC_REQUIRE([OVS_CHECK_WERROR]) _OVS_CHECK_CC_OPTION([$1], [$2], [$3])]) dnl OVS_ENABLE_OPTION([OPTION]) dnl Check whether the given C compiler OPTION is accepted. dnl If so, add it to WARNING_FLAGS. dnl Example: OVS_ENABLE_OPTION([-Wdeclaration-after-statement]) AC_DEFUN([OVS_ENABLE_OPTION], [OVS_CHECK_CC_OPTION([$1], [WARNING_FLAGS="$WARNING_FLAGS $1"]) AC_SUBST([WARNING_FLAGS])]) dnl OVS_CONDITIONAL_CC_OPTION([OPTION], [CONDITIONAL]) dnl Check whether the given C compiler OPTION is accepted. dnl If so, enable the given Automake CONDITIONAL. dnl Example: OVS_CONDITIONAL_CC_OPTION([-Wno-unused], [HAVE_WNO_UNUSED]) AC_DEFUN([OVS_CONDITIONAL_CC_OPTION], [OVS_CHECK_CC_OPTION( [$1], [ovs_have_cc_option=yes], [ovs_have_cc_option=no]) AM_CONDITIONAL([$2], [test $ovs_have_cc_option = yes])]) dnl ---------------------------------------------------------------------- dnl Check for too-old XenServer. AC_DEFUN([OVS_CHECK_XENSERVER_VERSION], [AC_CACHE_CHECK([XenServer release], [ovs_cv_xsversion], [if test -e /etc/redhat-release; then ovs_cv_xsversion=`sed -n 's/^XenServer DDK release \([[^-]]*\)-.*/\1/p' /etc/redhat-release` fi if test -z "$ovs_cv_xsversion"; then ovs_cv_xsversion=none fi]) case $ovs_cv_xsversion in none) ;; [[1-9]][[0-9]]* | dnl XenServer 10 or later [[6-9]]* | dnl XenServer 6 or later 5.[[7-9]]* | dnl XenServer 5.7 or later 5.6.[[1-9]][[0-9]][[0-9]][[0-9]]* | dnl XenServer 5.6.1000 or later 5.6.[[2-9]][[0-9]][[0-9]]* | dnl XenServer 5.6.200 or later 5.6.1[[0-9]][[0-9]]) dnl Xenserver 5.6.100 or later ;; *) AC_MSG_ERROR([This appears to be XenServer $ovs_cv_xsversion, but only XenServer 5.6.100 or later is supported. (If you are really using a supported version of XenServer, you may override this error message by specifying 'ovs_cv_xsversion=5.6.100' on the "configure" command line.)]) ;; esac]) dnl OVS_MAKE_HAS_IF([if-true], [if-false]) dnl dnl Checks whether make has the GNU make $(if condition,then,else) extension. dnl Runs 'if-true' if so, 'if-false' otherwise. AC_DEFUN([OVS_CHECK_MAKE_IF], [AC_CACHE_CHECK( [whether ${MAKE-make} has GNU make \$(if) extension], [ovs_cv_gnu_make_if], [cat <<'EOF' > conftest.mk conftest.out: echo $(if x,y,z) > conftest.out .PHONY: all EOF rm -f conftest.out AS_ECHO(["$as_me:$LINENO: invoking ${MAKE-make} -f conftest.mk all:"]) >&AS_MESSAGE_LOG_FD 2>&1 ${MAKE-make} -f conftest.mk conftest.out >&AS_MESSAGE_LOG_FD 2>&1 AS_ECHO(["$as_me:$LINENO: conftest.out contains:"]) >&AS_MESSAGE_LOG_FD 2>&1 cat conftest.out >&AS_MESSAGE_LOG_FD 2>&1 result=`cat conftest.out` rm -f conftest.mk conftest.out if test "X$result" = "Xy"; then ovs_cv_gnu_make_if=yes else ovs_cv_gnu_make_if=no fi])]) dnl OVS_CHECK_GNU_MAKE dnl dnl Checks whether make is GNU make (because Linux kernel Makefiles dnl only work with GNU make). AC_DEFUN([OVS_CHECK_GNU_MAKE], [AC_CACHE_CHECK( [whether ${MAKE-make} is GNU make], [ovs_cv_gnu_make], [rm -f conftest.out AS_ECHO(["$as_me:$LINENO: invoking ${MAKE-make} --version:"]) >&AS_MESSAGE_LOG_FD 2>&1 ${MAKE-make} --version >conftest.out 2>&1 cat conftest.out >&AS_MESSAGE_LOG_FD 2>&1 result=`cat conftest.out` rm -f conftest.mk conftest.out case $result in # ( GNU*) ovs_cv_gnu_make=yes ;; # ( *) ovs_cv_gnu_make=no ;; esac]) AM_CONDITIONAL([GNU_MAKE], [test $ovs_cv_gnu_make = yes])]) dnl OVS_CHECK_SPARSE_TARGET dnl dnl The "cgcc" script from "sparse" isn't very good at detecting the dnl target for which the code is being built. This helps it out. AC_DEFUN([OVS_CHECK_SPARSE_TARGET], [AC_CACHE_CHECK( [target hint for cgcc], [ac_cv_sparse_target], [AS_CASE([`$CC -dumpmachine 2>/dev/null`], [i?86-* | athlon-*], [ac_cv_sparse_target=x86], [x86_64-*], [ac_cv_sparse_target=x86_64], [ac_cv_sparse_target=other])]) AS_CASE([$ac_cv_sparse_target], [x86], [SPARSEFLAGS= CGCCFLAGS=-target=i86], [x86_64], [SPARSEFLAGS=-m64 CGCCFLAGS=-target=x86_64], [SPARSEFLAGS= CGCCFLAGS=]) AC_SUBST([SPARSEFLAGS]) AC_SUBST([CGCCFLAGS])]) dnl OVS_SPARSE_EXTRA_INCLUDES dnl dnl The cgcc script from "sparse" does not search gcc's default dnl search path. Get the default search path from GCC and pass dnl them to sparse. AC_DEFUN([OVS_SPARSE_EXTRA_INCLUDES], AC_SUBST([SPARSE_EXTRA_INCLUDES], [`$CC -v -E - &1 >/dev/null | sed -n -e '/^#include.*search.*starts.*here:/,/^End.*of.*search.*list\./s/^ \(.*\)/-I \1/p' |grep -v /usr/lib | grep -x -v '\-I /usr/include' | tr \\\n ' ' `] )) dnl OVS_ENABLE_SPARSE AC_DEFUN([OVS_ENABLE_SPARSE], [AC_REQUIRE([OVS_CHECK_SPARSE_TARGET]) AC_REQUIRE([OVS_CHECK_MAKE_IF]) AC_REQUIRE([OVS_SPARSE_EXTRA_INCLUDES]) : ${SPARSE=sparse} AC_SUBST([SPARSE]) AC_CONFIG_COMMANDS_PRE( [if test $ovs_cv_gnu_make_if = yes; then CC='$(if $(C),REAL_CC="'"$CC"'" CHECK="$(SPARSE) -I $(top_srcdir)/include/sparse $(SPARSEFLAGS) $(SPARSE_EXTRA_INCLUDES) " cgcc $(CGCCFLAGS),'"$CC"')' fi])]) dnl OVS_PTHREAD_SET_NAME dnl dnl This checks for three known variants of pthreads functions for setting dnl the name of the current thread: dnl dnl glibc: int pthread_setname_np(pthread_t, const char *name); dnl NetBSD: int pthread_setname_np(pthread_t, const char *format, void *arg); dnl FreeBSD: int pthread_set_name_np(pthread_t, const char *name); dnl dnl For glibc and FreeBSD, the arguments are just a thread and its name. For dnl NetBSD, 'format' is a printf() format string and 'arg' is an argument to dnl provide to it. dnl dnl This macro defines: dnl dnl glibc: HAVE_GLIBC_PTHREAD_SETNAME_NP dnl NetBSD: HAVE_NETBSD_PTHREAD_SETNAME_NP dnl FreeBSD: HAVE_PTHREAD_SET_NAME_NP AC_DEFUN([OVS_CHECK_PTHREAD_SET_NAME], [AC_CHECK_FUNCS([pthread_set_name_np]) if test $ac_cv_func_pthread_set_name_np != yes; then AC_CACHE_CHECK( [for pthread_setname_np() variant], [ovs_cv_pthread_setname_np], [AC_LINK_IFELSE( [AC_LANG_PROGRAM([#include ], [pthread_setname_np(pthread_self(), "name");])], [ovs_cv_pthread_setname_np=glibc], [AC_LINK_IFELSE( [AC_LANG_PROGRAM([#include ], [pthread_setname_np(pthread_self(), "%s", "name");])], [ovs_cv_pthread_setname_np=netbsd], [ovs_cv_pthread_setname_np=none])])]) case $ovs_cv_pthread_setname_np in # ( glibc) AC_DEFINE( [HAVE_GLIBC_PTHREAD_SETNAME_NP], [1], [Define to 1 if pthread_setname_np() is available and takes 2 parameters (like glibc).]) ;; # ( netbsd) AC_DEFINE( [HAVE_NETBSD_PTHREAD_SETNAME_NP], [1], [Define to 1 if pthread_setname_np() is available and takes 3 parameters (like NetBSD).]) ;; esac fi]) openvswitch-2.0.1+git20140120/boot.sh000077500000000000000000000000501226605124000167470ustar00rootroot00000000000000#! /bin/sh autoreconf --install --force openvswitch-2.0.1+git20140120/build-aux/000077500000000000000000000000001226605124000173445ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/build-aux/.gitignore000066400000000000000000000000471226605124000213350ustar00rootroot00000000000000/compile /depcomp /install-sh /missing openvswitch-2.0.1+git20140120/build-aux/check-structs000077500000000000000000000211671226605124000220630ustar00rootroot00000000000000#! /usr/bin/python import os.path import sys import re macros = {} anyWarnings = False types = {} types['char'] = {"size": 1, "alignment": 1} types['uint8_t'] = {"size": 1, "alignment": 1} types['ovs_be16'] = {"size": 2, "alignment": 2} types['ovs_be32'] = {"size": 4, "alignment": 4} types['ovs_be64'] = {"size": 8, "alignment": 8} types['ovs_32aligned_be64'] = {"size": 8, "alignment": 4} token = None line = "" idRe = "[a-zA-Z_][a-zA-Z_0-9]*" tokenRe = "#?" + idRe + "|[0-9]+|." includeRe = re.compile(r'\s*#include\s+"(openflow/[^#]+)"') includePath = '' inComment = False inDirective = False inputStack = [] def getToken(): global token global line global inComment global inDirective global inputFile global fileName while True: line = line.lstrip() if line != "": if line.startswith("/*"): inComment = True line = line[2:] elif inComment: commentEnd = line.find("*/") if commentEnd < 0: line = "" else: inComment = False line = line[commentEnd + 2:] else: match = re.match(tokenRe, line) token = match.group(0) line = line[len(token):] if token.startswith('#'): inDirective = True elif token in macros and not inDirective: line = macros[token] + line continue return True elif inDirective: token = "$" inDirective = False return True else: global lineNumber while True: line = inputFile.readline() lineNumber += 1 while line.endswith("\\\n"): line = line[:-2] + inputFile.readline() lineNumber += 1 match = includeRe.match(line) if match: inputStack.append((fileName, inputFile, lineNumber)) inputFile = open(includePath + match.group(1)) lineNumber = 0 continue if line == "": if inputStack: fileName, inputFile, lineNumber = inputStack.pop() continue if token == None: fatal("unexpected end of input") token = None return False break def fatal(msg): sys.stderr.write("%s:%d: error at \"%s\": %s\n" % (fileName, lineNumber, token, msg)) sys.exit(1) def warn(msg): global anyWarnings anyWarnings = True sys.stderr.write("%s:%d: warning: %s\n" % (fileName, lineNumber, msg)) def skipDirective(): getToken() while token != '$': getToken() def isId(s): return re.match(idRe + "$", s) != None def forceId(): if not isId(token): fatal("identifier expected") def forceInteger(): if not re.match('[0-9]+$', token): fatal("integer expected") def match(t): if token == t: getToken() return True else: return False def forceMatch(t): if not match(t): fatal("%s expected" % t) def parseTaggedName(): assert token in ('struct', 'union') name = token getToken() forceId() name = "%s %s" % (name, token) getToken() return name def parseTypeName(): if token in ('struct', 'union'): name = parseTaggedName() elif isId(token): name = token getToken() else: fatal("type name expected") if name in types: return name else: fatal("unknown type \"%s\"" % name) def parseStruct(): isStruct = token == 'struct' structName = parseTaggedName() if token == ";": return ofs = size = 0 alignment = 4 # ARM has minimum 32-bit alignment forceMatch('{') while not match('}'): typeName = parseTypeName() typeSize = types[typeName]['size'] typeAlignment = types[typeName]['alignment'] forceId() memberName = token getToken() if match('['): if token == ']': count = 0 else: forceInteger() count = int(token) getToken() forceMatch(']') else: count = 1 nBytes = typeSize * count if isStruct: if ofs % typeAlignment: shortage = typeAlignment - (ofs % typeAlignment) warn("%s member %s is %d bytes short of %d-byte alignment" % (structName, memberName, shortage, typeAlignment)) size += shortage ofs += shortage size += nBytes ofs += nBytes else: if nBytes > size: size = nBytes if typeAlignment > alignment: alignment = typeAlignment forceMatch(';') if size % alignment: shortage = alignment - (size % alignment) if (structName == "struct ofp10_packet_in" and shortage == 2 and memberName == 'data' and count == 0): # This is intentional pass else: warn("%s needs %d bytes of tail padding" % (structName, shortage)) size += shortage types[structName] = {"size": size, "alignment": alignment} return structName def checkStructs(): if len(sys.argv) < 2: sys.stderr.write("at least one non-option argument required; " "use --help for help") sys.exit(1) if '--help' in sys.argv: argv0 = os.path.basename(sys.argv[0]) print '''\ %(argv0)s, for checking struct and struct member alignment usage: %(argv0)s -Ipath HEADER [HEADER]... This program reads the header files specified on the command line and verifies that all struct members are aligned on natural boundaries without any need for the compiler to add additional padding. It also verifies that each struct's size is a multiple of 32 bits (because some ABIs for ARM require all structs to be a multiple of 32 bits), or 64 bits if the struct has any 64-bit members, again without the compiler adding additional padding. Finally, it checks struct size assertions using OFP_ASSERT. This program is specialized for reading Open vSwitch's OpenFlow header files. It will not work on arbitrary header files without extensions.\ ''' % {"argv0": argv0} sys.exit(0) global fileName for fileName in sys.argv[1:]: if fileName.startswith('-I'): global includePath includePath = fileName[2:] if not includePath.endswith('/'): includePath += '/' continue global inputFile global lineNumber inputFile = open(fileName) lineNumber = 0 lastStruct = None while getToken(): if token in ("#ifdef", "#ifndef", "#include", "#endif", "#elif", "#else"): skipDirective() elif token == "#define": getToken() name = token if line.startswith('('): skipDirective() else: definition = "" getToken() while token != '$': definition += token getToken() macros[name] = definition elif token == "enum": while token != ';': getToken() elif token in ('struct', 'union'): lastStruct = parseStruct() elif match('OFP_ASSERT') or match('BOOST_STATIC_ASSERT'): forceMatch('(') forceMatch('sizeof') forceMatch('(') typeName = parseTypeName() if typeName != lastStruct: warn("checking size of %s but %s was most recently defined" % (typeName, lastStruct)) forceMatch(')') forceMatch('=') forceMatch('=') forceInteger() size = int(token) getToken() forceMatch(')') if types[typeName]['size'] != size: warn("%s is %d bytes long but declared as %d" % ( typeName, types[typeName]['size'], size)) else: fatal("parse error") inputFile.close() if anyWarnings: sys.exit(1) if __name__ == '__main__': checkStructs() openvswitch-2.0.1+git20140120/build-aux/extract-ofp-errors000077500000000000000000000305521226605124000230450ustar00rootroot00000000000000#! /usr/bin/python import sys import os.path import re macros = {} # Map from OpenFlow version number to version ID used in ofp_header. version_map = {"1.0": 0x01, "1.1": 0x02, "1.2": 0x03, "1.3": 0x04} version_reverse_map = dict((v, k) for (k, v) in version_map.iteritems()) token = None line = "" idRe = "[a-zA-Z_][a-zA-Z_0-9]*" tokenRe = "#?" + idRe + "|[0-9]+|." inComment = False inDirective = False def open_file(fn): global fileName global inputFile global lineNumber fileName = fn inputFile = open(fileName) lineNumber = 0 def tryGetLine(): global inputFile global line global lineNumber line = inputFile.readline() lineNumber += 1 return line != "" def getLine(): if not tryGetLine(): fatal("unexpected end of input") def getToken(): global token global line global inComment global inDirective while True: line = line.lstrip() if line != "": if line.startswith("/*"): inComment = True line = line[2:] elif inComment: commentEnd = line.find("*/") if commentEnd < 0: line = "" else: inComment = False line = line[commentEnd + 2:] else: match = re.match(tokenRe, line) token = match.group(0) line = line[len(token):] if token.startswith('#'): inDirective = True elif token in macros and not inDirective: line = macros[token] + line continue return True elif inDirective: token = "$" inDirective = False return True else: global lineNumber line = inputFile.readline() lineNumber += 1 while line.endswith("\\\n"): line = line[:-2] + inputFile.readline() lineNumber += 1 if line == "": if token == None: fatal("unexpected end of input") token = None return False n_errors = 0 def error(msg): global n_errors sys.stderr.write("%s:%d: %s\n" % (fileName, lineNumber, msg)) n_errors += 1 def fatal(msg): error(msg) sys.exit(1) def skipDirective(): getToken() while token != '$': getToken() def isId(s): return re.match(idRe + "$", s) != None def forceId(): if not isId(token): fatal("identifier expected") def forceInteger(): if not re.match('[0-9]+$', token): fatal("integer expected") def match(t): if token == t: getToken() return True else: return False def forceMatch(t): if not match(t): fatal("%s expected" % t) def parseTaggedName(): assert token in ('struct', 'union') name = token getToken() forceId() name = "%s %s" % (name, token) getToken() return name def print_enum(tag, constants, storage_class): print (""" %(storage_class)sconst char * %(tag)s_to_string(uint16_t value) { switch (value) {\ """ % {"tag": tag, "bufferlen": len(tag) + 32, "storage_class": storage_class}) for constant in constants: print (" case %s: return \"%s\";" % (constant, constant)) print ("""\ } return NULL; }\ """ % {"tag": tag}) def usage(): argv0 = os.path.basename(sys.argv[0]) print ('''\ %(argv0)s, for extracting OpenFlow error codes from header files usage: %(argv0)s ERROR_HEADER VENDOR_HEADER This program reads VENDOR_HEADER to obtain OpenFlow vendor (aka experimenter IDs), then ERROR_HEADER to obtain OpenFlow error number. It outputs a C source file for translating OpenFlow error codes into strings. ERROR_HEADER should point to lib/ofp-errors.h. VENDOR_HEADER should point to include/openflow/openflow-common.h. The output is suitable for use as lib/ofp-errors.inc.\ ''' % {"argv0": argv0}) sys.exit(0) def extract_vendor_ids(fn): global vendor_map vendor_map = {} vendor_loc = {} open_file(fn) while tryGetLine(): m = re.match(r'#define\s+([A-Z0-9_]+)_VENDOR_ID\s+(0x[0-9a-fA-F]+|[0-9]+)', line) if not m: continue name = m.group(1) id_ = int(m.group(2), 0) if name in vendor_map: error("%s: duplicate definition of vendor" % name) sys.stderr.write("%s: Here is the location of the previous " "definition.\n" % vendor_loc[name]) sys.exit(1) vendor_map[name] = id_ vendor_loc[name] = "%s:%d" % (fileName, lineNumber) if not vendor_map: fatal("%s: no vendor definitions found" % fn) inputFile.close() vendor_reverse_map = {} for name, id_ in vendor_map.items(): if id_ in vendor_reverse_map: fatal("%s: duplicate vendor id for vendors %s and %s" % (id_, vendor_reverse_map[id_], name)) vendor_reverse_map[id_] = name def extract_ofp_errors(fn): error_types = {} comments = [] names = [] domain = {} reverse = {} for domain_name in version_map.values(): domain[domain_name] = {} reverse[domain_name] = {} n_errors = 0 expected_errors = {} open_file(fn) while True: getLine() if re.match('enum ofperr', line): break while True: getLine() if line.startswith('/*') or not line or line.isspace(): continue elif re.match('}', line): break if not line.lstrip().startswith('/*'): fatal("unexpected syntax between errors") comment = line.lstrip()[2:].strip() while not comment.endswith('*/'): getLine() if line.startswith('/*') or not line or line.isspace(): fatal("unexpected syntax within error") comment += ' %s' % line.lstrip('* \t').rstrip(' \t\r\n') comment = comment[:-2].rstrip() m = re.match('Expected: (.*)\.$', comment) if m: expected_errors[m.group(1)] = (fileName, lineNumber) continue m = re.match('((?:.(?!\. ))+.)\. (.*)$', comment) if not m: fatal("unexpected syntax between errors") dsts, comment = m.groups() getLine() m = re.match('\s+(?:OFPERR_([A-Z0-9_]+))(\s*=\s*OFPERR_OFS)?,', line) if not m: fatal("syntax error expecting enum value") enum = m.group(1) if enum in names: fatal("%s specified twice" % enum) comments.append(re.sub('\[[^]]*\]', '', comment)) names.append(enum) for dst in dsts.split(', '): m = re.match(r'([A-Z]+)([0-9.]+)(\+|-[0-9.]+)?\((\d+)(?:,(\d+))?\)$', dst) if not m: fatal("%r: syntax error in destination" % dst) vendor_name = m.group(1) version1_name = m.group(2) version2_name = m.group(3) type_ = int(m.group(4)) if m.group(5): code = int(m.group(5)) else: code = None if vendor_name not in vendor_map: fatal("%s: unknown vendor" % vendor_name) vendor = vendor_map[vendor_name] if version1_name not in version_map: fatal("%s: unknown OpenFlow version" % version1_name) v1 = version_map[version1_name] if version2_name is None: v2 = v1 elif version2_name == "+": v2 = max(version_map.values()) elif version2_name[1:] not in version_map: fatal("%s: unknown OpenFlow version" % version2_name[1:]) else: v2 = version_map[version2_name[1:]] if v2 < v1: fatal("%s%s: %s precedes %s" % (version1_name, version2_name, version2_name, version1_name)) if vendor == vendor_map['NX']: if v1 >= version_map['1.2'] or v2 >= version_map['1.2']: if code is not None: fatal("%s: NX1.2+ domains do not have codes" % dst) code = 0 elif vendor != vendor_map['OF']: if code is not None: fatal("%s: %s domains do not have codes" % vendor_name) for version in range(v1, v2 + 1): domain[version].setdefault(vendor, {}) domain[version][vendor].setdefault(type_, {}) if code in domain[version][vendor][type_]: msg = "%#x,%d,%d in OF%s means both %s and %s" % ( vendor, type_, code, version_reverse_map[version], domain[version][vendor][type_][code][0], enum) if msg in expected_errors: del expected_errors[msg] else: error("%s: %s." % (dst, msg)) sys.stderr.write("%s:%d: %s: Here is the location " "of the previous definition.\n" % (domain[version][vendor][type_][code][1], domain[version][vendor][type_][code][2], dst)) else: domain[version][vendor][type_][code] = (enum, fileName, lineNumber) assert enum not in reverse[version] reverse[version][enum] = (vendor, type_, code) inputFile.close() for fn, ln in expected_errors.values(): sys.stderr.write("%s:%d: expected duplicate not used.\n" % (fn, ln)) n_errors += 1 if n_errors: sys.exit(1) print ("""\ /* Generated automatically; do not modify! -*- buffer-read-only: t -*- */ #define OFPERR_N_ERRORS %d struct ofperr_domain { const char *name; uint8_t version; enum ofperr (*decode)(uint32_t vendor, uint16_t type, uint16_t code); struct triplet errors[OFPERR_N_ERRORS]; }; static const char *error_names[OFPERR_N_ERRORS] = { %s }; static const char *error_comments[OFPERR_N_ERRORS] = { %s };\ """ % (len(names), '\n'.join(' "%s",' % name for name in names), '\n'.join(' "%s",' % re.sub(r'(["\\])', r'\\\1', comment) for comment in comments))) def output_domain(map, name, description, version): print (""" static enum ofperr %s_decode(uint32_t vendor, uint16_t type, uint16_t code) { switch (((uint64_t) vendor << 32) | (type << 16) | code) {""" % name) found = set() for enum in names: if enum not in map: continue vendor, type_, code = map[enum] if code is None: continue value = (vendor << 32) | (type_ << 16) | code if value in found: continue found.add(value) if vendor: vendor_s = "(%#xULL << 32) | " % vendor else: vendor_s = "" print (" case %s(%d << 16) | %d:" % (vendor_s, type_, code)) print (" return OFPERR_%s;" % enum) print ("""\ } return 0; }""") print (""" static const struct ofperr_domain %s = { "%s", %d, %s_decode, {""" % (name, description, version, name)) for enum in names: if enum in map: vendor, type_, code = map[enum] if code == None: code = -1 print " { %#8x, %2d, %3d }, /* %s */" % (vendor, type_, code, enum) else: print (" { -1, -1, -1 }, /* %s */" % enum) print ("""\ }, };""") for version_name, id_ in version_map.items(): var = 'ofperr_of' + re.sub('[^A-Za-z0-9_]', '', version_name) description = "OpenFlow %s" % version_name output_domain(reverse[id_], var, description, id_) if __name__ == '__main__': if '--help' in sys.argv: usage() elif len(sys.argv) != 3: sys.stderr.write("exactly two non-options arguments required; " "use --help for help\n") sys.exit(1) else: extract_vendor_ids(sys.argv[2]) extract_ofp_errors(sys.argv[1]) openvswitch-2.0.1+git20140120/build-aux/extract-ofp-msgs000077500000000000000000000304641226605124000225040ustar00rootroot00000000000000#! /usr/bin/python import sys import os.path import re line = "" OFP10_VERSION = 0x01 OFP11_VERSION = 0x02 OFP12_VERSION = 0x03 OFP13_VERSION = 0x04 NX_VENDOR_ID = 0x00002320 OFPT_VENDOR = 4 OFPT10_STATS_REQUEST = 16 OFPT10_STATS_REPLY = 17 OFPT11_STATS_REQUEST = 18 OFPT11_STATS_REPLY = 19 OFPST_VENDOR = 0xffff version_map = {"1.0": (OFP10_VERSION, OFP10_VERSION), "1.1": (OFP11_VERSION, OFP11_VERSION), "1.2": (OFP12_VERSION, OFP12_VERSION), "1.3": (OFP13_VERSION, OFP13_VERSION), "1.0+": (OFP10_VERSION, OFP13_VERSION), "1.1+": (OFP11_VERSION, OFP13_VERSION), "1.2+": (OFP12_VERSION, OFP13_VERSION), "1.3+": (OFP13_VERSION, OFP13_VERSION), "1.0-1.1": (OFP10_VERSION, OFP11_VERSION), "1.0-1.2": (OFP10_VERSION, OFP12_VERSION), "1.1-1.2": (OFP11_VERSION, OFP12_VERSION), "": (0x01, 0xff)} def get_line(): global line global line_number line = input_file.readline() line_number += 1 if line == "": fatal("unexpected end of input") n_errors = 0 def error(msg): global n_errors sys.stderr.write("%s:%d: %s\n" % (file_name, line_number, msg)) n_errors += 1 def fatal(msg): error(msg) sys.exit(1) def usage(): argv0 = os.path.basename(sys.argv[0]) print '''\ %(argv0)s, for extracting OpenFlow message types from header files usage: %(argv0)s INPUT OUTPUT where INPUT is the name of the input header file and OUTPUT is the output file name. Despite OUTPUT, the output is written to stdout, and the OUTPUT argument only controls #line directives in the output.\ ''' % {"argv0": argv0} sys.exit(0) def make_sizeof(s): m = re.match(r'(.*) up to (.*)', s) if m: struct, member = m.groups() return "offsetof(%s, %s)" % (struct, member) else: return "sizeof(%s)" % s def extract_ofp_msgs(output_file_name): raw_types = [] all_hdrs = {} all_raws = {} all_raws_order = [] while True: get_line() if re.match('enum ofpraw', line): break while True: get_line() first_line_number = line_number here = '%s:%d' % (file_name, line_number) if (line.startswith('/*') or line.startswith(' *') or not line or line.isspace()): continue elif re.match('}', line): break if not line.lstrip().startswith('/*'): fatal("unexpected syntax between ofpraw types") comment = line.lstrip()[2:].strip() while not comment.endswith('*/'): get_line() if line.startswith('/*') or not line or line.isspace(): fatal("unexpected syntax within error") comment += ' %s' % line.lstrip('* \t').rstrip(' \t\r\n') comment = comment[:-2].rstrip() m = re.match(r'([A-Z]+) ([-.+\d]+|) \((\d+)\): ([^.]+)\.$', comment) if not m: fatal("unexpected syntax between messages") type_, versions, number, contents = m.groups() number = int(number) get_line() m = re.match('\s+(?:OFPRAW_%s)(\d*)_([A-Z0-9_]+),?$' % type_, line) if not m: fatal("syntax error expecting OFPRAW_ enum") vinfix, name = m.groups() rawname = 'OFPRAW_%s%s_%s' % (type_, vinfix, name) min_version, max_version = version_map[versions] human_name = '%s_%s' % (type_, name) if type_.endswith('ST'): if rawname.endswith('_REQUEST'): human_name = human_name[:-8] + " request" elif rawname.endswith('_REPLY'): human_name = human_name[:-6] + " reply" else: fatal("%s messages are statistics but %s doesn't end " "in _REQUEST or _REPLY" % (type_, rawname)) these_hdrs = [] for version in range(min_version, max_version + 1): if type_ == 'OFPT': if number == OFPT_VENDOR: fatal("OFPT (%d) is used for vendor extensions" % number) elif (version == OFP10_VERSION and (number == OFPT10_STATS_REQUEST or number == OFPT10_STATS_REPLY)): fatal("OFPT 1.0 (%d) is used for stats messages" % number) elif (version != OFP10_VERSION and (number == OFPT11_STATS_REQUEST or number == OFPT11_STATS_REPLY)): fatal("OFPT 1.1+ (%d) is used for stats messages" % number) hdrs = (version, number, 0, 0, 0) elif type_ == 'OFPST' and name.endswith('_REQUEST'): if version == OFP10_VERSION: hdrs = (version, OFPT10_STATS_REQUEST, number, 0, 0) else: hdrs = (version, OFPT11_STATS_REQUEST, number, 0, 0) elif type_ == 'OFPST' and name.endswith('_REPLY'): if version == OFP10_VERSION: hdrs = (version, OFPT10_STATS_REPLY, number, 0, 0) else: hdrs = (version, OFPT11_STATS_REPLY, number, 0, 0) elif type_ == 'NXT': hdrs = (version, OFPT_VENDOR, 0, NX_VENDOR_ID, number) elif type_ == 'NXST' and name.endswith('_REQUEST'): if version == OFP10_VERSION: hdrs = (version, OFPT10_STATS_REQUEST, OFPST_VENDOR, NX_VENDOR_ID, number) else: hdrs = (version, OFPT11_STATS_REQUEST, OFPST_VENDOR, NX_VENDOR_ID, number) elif type_ == 'NXST' and name.endswith('_REPLY'): if version == OFP10_VERSION: hdrs = (version, OFPT10_STATS_REPLY, OFPST_VENDOR, NX_VENDOR_ID, number) else: hdrs = (version, OFPT11_STATS_REPLY, OFPST_VENDOR, NX_VENDOR_ID, number) else: fatal("type '%s' unknown" % type_) if hdrs in all_hdrs: error("Duplicate message definition for %s." % str(hdrs)) sys.stderr.write("%s: Here is the location " "of the previous definition.\n" % (all_hdrs[hdrs])) all_hdrs[hdrs] = here these_hdrs.append(hdrs) extra_multiple = '0' if contents == 'void': min_body = '0' else: min_body_elem = [] for c in [s.strip() for s in contents.split(",")]: if c.endswith('[]'): if extra_multiple == '0': extra_multiple = make_sizeof(c[:-2]) else: error("Cannot have multiple [] elements") else: min_body_elem.append(c) if min_body_elem: min_body = " + ".join([make_sizeof(s) for s in min_body_elem]) else: if extra_multiple == '0': error("Must specify contents (use 'void' if empty)") min_body = 0 if rawname in all_raws: fatal("%s: Duplicate name" % rawname) all_raws[rawname] = {"hdrs": these_hdrs, "min_version": min_version, "max_version": max_version, "min_body": min_body, "extra_multiple": extra_multiple, "type": type_, "human_name": human_name, "line": first_line_number} all_raws_order.append(rawname) continue while True: get_line() if re.match('enum ofptype', line): break while True: get_line() if re.match(r'\s*/?\*', line) or line.isspace(): continue elif re.match('}', line): break if not re.match(r'\s*OFPTYPE_.*/\*', line): fatal("unexpected syntax between OFPTYPE_ definitions") syntax = line.strip() while not syntax.endswith('*/'): get_line() if not line.strip().startswith('*'): fatal("unexpected syntax within OFPTYPE_ definition") syntax += ' %s' % line.strip().lstrip('* \t') syntax = syntax.strip() m = re.match(r'(OFPTYPE_[A-Z0-9_]+),\s*/\* (.*) \*/', syntax) if not m: fatal("syntax error in OFPTYPE_ definition") ofptype, raws_ = m.groups() raws = [s.rstrip('.') for s in raws_.split()] for raw in raws: if not re.match('OFPRAW_[A-Z0-9_]+$', raw): fatal("%s: invalid OFPRAW_* name syntax" % raw) if raw not in all_raws: fatal("%s: not a declared OFPRAW_* name" % raw) if "ofptype" in all_raws[raw]: fatal("%s: already part of %s" % (raw, all_raws[raw]["ofptype"])) all_raws[raw]["ofptype"] = ofptype input_file.close() if n_errors: sys.exit(1) output = [] output.append("/* Generated automatically; do not modify! " "-*- buffer-read-only: t -*- */") output.append("") for raw in all_raws_order: r = all_raws[raw] output.append("static struct raw_instance %s_instances[] = {" % raw.lower()) for hdrs in r['hdrs']: output.append(" { {0, NULL}, {%d, %d, %d, 0x%x, %d}, %s, 0 }," % (hdrs + (raw,))) output.append("};") output.append("") output.append("static struct raw_info raw_infos[] = {") for raw in all_raws_order: r = all_raws[raw] if "ofptype" not in r: error("%s: no defined OFPTYPE_" % raw) continue output.append(" {") output.append(" %s_instances," % raw.lower()) output.append(" %d, %d," % (r["min_version"], r["max_version"])) output.append("#line %s \"%s\"" % (r["line"], file_name)) output.append(" %s," % r["min_body"]) output.append("#line %s \"%s\"" % (r["line"], file_name)) output.append(" %s," % r["extra_multiple"]) output.append("#line %s \"%s\"" % (len(output) + 2, output_file_name)) output.append(" %s," % r["ofptype"]) output.append(" \"%s\"," % r["human_name"]) output.append(" },") if r['type'].endswith("ST"): for hdrs in r['hdrs']: op_hdrs = list(hdrs) if hdrs[0] == OFP10_VERSION: if hdrs[1] == OFPT10_STATS_REQUEST: op_hdrs[1] = OFPT10_STATS_REPLY elif hdrs[1] == OFPT10_STATS_REPLY: op_hdrs[1] = OFPT10_STATS_REQUEST else: assert False else: if hdrs[1] == OFPT11_STATS_REQUEST: op_hdrs[1] = OFPT11_STATS_REPLY elif hdrs[1] == OFPT11_STATS_REPLY: op_hdrs[1] = OFPT11_STATS_REQUEST else: assert False if tuple(op_hdrs) not in all_hdrs: if r["human_name"].endswith("request"): fatal("%s has no corresponding reply" % r["human_name"]) else: fatal("%s has no corresponding request" % r["human_name"]) output.append("};") if n_errors: sys.exit(1) return output if __name__ == '__main__': if '--help' in sys.argv: usage() elif len(sys.argv) != 3: sys.stderr.write("exactly one non-option arguments required; " "use --help for help\n") sys.exit(1) else: global file_name global input_file global line_number file_name = sys.argv[1] input_file = open(file_name) line_number = 0 for line in extract_ofp_msgs(sys.argv[2]): print line openvswitch-2.0.1+git20140120/build-aux/sodepends.pl000066400000000000000000000035761226605124000217000ustar00rootroot00000000000000# Copyright (c) 2008, 2011 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. use strict; use warnings; use Getopt::Long; our ($exit_code) = 0; our (@include_dirs); Getopt::Long::Configure ("bundling"); GetOptions("I|include=s" => \@include_dirs) or exit(1); @include_dirs = ('.') if !@include_dirs; sub find_file { my ($name) = @_; foreach my $dir (@include_dirs, '.') { my $file = "$dir/$name"; if (stat($file)) { return $file; } } print STDERR "$name not found in: ", join(' ', @include_dirs), "\n"; $exit_code = 1; return; } print "# Generated automatically -- do not modify! -*- buffer-read-only: t -*-\n"; for my $toplevel (sort(@ARGV)) { # Skip names that don't end in .in. next if $toplevel !~ /\.in$/; # Open file. my ($fn) = find_file($toplevel); next if !defined($fn); if (!open(OUTER, '<', $fn)) { print "$fn: open: $!\n"; $exit_code = 1; next; } my (@dependencies); OUTER: while () { if (my ($name) = /^\.so (\S+)$/) { push(@dependencies, $name) if find_file($name); } } close(OUTER); my ($output) = $toplevel; $output =~ s/\.in//; print "\n$output:"; print " \\\n\t$_" foreach $toplevel, sort(@dependencies); print "\n"; print "$_:\n" foreach $toplevel, sort(@dependencies); } exit $exit_code; openvswitch-2.0.1+git20140120/build-aux/soexpand.pl000066400000000000000000000021771226605124000215310ustar00rootroot00000000000000# Copyright (c) 2008 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. use strict; use warnings; use Getopt::Long; my ($exit_code) = 0; my (@include_dirs); Getopt::Long::Configure ("bundling"); GetOptions("I|include=s" => \@include_dirs) or exit(1); @include_dirs = ('.') if !@include_dirs; OUTER: while () { if (my ($name) = /^\.so (\S+)$/) { foreach my $dir (@include_dirs, '.') { if (open(INNER, "$dir/$name")) { while () { print $_; } close(INNER); next OUTER; } } print STDERR "$name not found in: ", join(' ', @include_dirs), "\n"; $exit_code = 1; } print $_; } exit $exit_code; openvswitch-2.0.1+git20140120/build-aux/thread-safety-blacklist000066400000000000000000000021431226605124000237750ustar00rootroot00000000000000\basctime( \bbasename( \bcatgets( \bcrypt( \bctermid( \bctime( \bdbm_clearerr( \bdbm_close( \bdbm_delete( \bdbm_error( \bdbm_fetch( \bdbm_firstkey( \bdbm_nextkey( \bdbm_open( \bdbm_store( \bdirname( \bdlerror( \bdrand48( \becvt( \bencrypt( \bendgrent( \bendpwent( \bendutxent( \bfcvt( \bftw( \bgcvt( \bgetc_unlocked( \bgetchar_unlocked( \bgetdate( \bgetgrent( \bgetgrgid( \bgetgrnam( \bgethostbyaddr( \bgethostbyname( \bgethostent( \bgetlogin( \bgetmntent( \bgetnetbyaddr( \bgetnetbyname( \bgetnetent( \bgetprotobyname( \bgetprotobynumber( \bgetprotoent( \bgetpwent( \bgetpwnam( \bgetpwuid( \bgetservbyname( \bgetservbyport( \bgetservent( \bgetutxent( \bgetutxid( \bgetutxline( \bgmtime( \bhcreate( \bhdestroy( \bhsearch( \binet_ntoa( \bl64a( \blgamma( \blgammaf( \blgammal( \blocaleconv( \blocaltime( \blrand48( \bmrand48( \bnftw( \bnl_langinfo( \bptsname( \bputc_unlocked( \bputchar_unlocked( \bputenv( \bpututxline( \brand( \bsetenv( \bsetgrent( \bsetkey( \bsetpwent( \bsetutxent( \bsigprocmask( \bstrerror( \bstrsignal( \bstrtok( \bsystem( \btmpnam( \bttyname( \bunsetenv( \bwcrtomb( \bwcsrtombs( \bwcstombs( \bwctomb( openvswitch-2.0.1+git20140120/configure.ac000066400000000000000000000100151226605124000177350ustar00rootroot00000000000000# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. AC_PREREQ(2.64) AC_INIT(openvswitch, 2.0.1, ovs-bugs@openvswitch.org) AC_CONFIG_SRCDIR([datapath/datapath.c]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) AC_CONFIG_HEADERS([config.h]) AC_CONFIG_TESTDIR([tests]) AM_INIT_AUTOMAKE AC_PROG_CC AM_PROG_CC_C_O AC_PROG_CPP AC_PROG_RANLIB AC_PROG_MKDIR_P AC_PROG_FGREP AC_PROG_EGREP AC_ARG_VAR([PERL], [path to Perl interpreter]) AC_PATH_PROG([PERL], perl, no) if test "$PERL" = no; then AC_MSG_ERROR([Perl interpreter not found in $PATH or $PERL.]) fi AM_MISSING_PROG([AUTOM4TE], [autom4te]) AC_USE_SYSTEM_EXTENSIONS AC_C_BIGENDIAN AC_SYS_LARGEFILE AC_SEARCH_LIBS([pow], [m]) AC_SEARCH_LIBS([clock_gettime], [rt]) AC_SEARCH_LIBS([timer_create], [rt]) AC_SEARCH_LIBS([pthread_sigmask], [pthread]) AC_FUNC_STRERROR_R OVS_CHECK_ESX OVS_CHECK_COVERAGE OVS_CHECK_NDEBUG OVS_CHECK_NETLINK OVS_CHECK_OPENSSL OVS_CHECK_LOGDIR OVS_CHECK_PYTHON OVS_CHECK_PYUIC4 OVS_CHECK_OVSDBMONITOR OVS_CHECK_DOT OVS_CHECK_IF_PACKET OVS_CHECK_IF_DL OVS_CHECK_STRTOK_R AC_CHECK_DECLS([sys_siglist], [], [], [[#include ]]) AC_CHECK_MEMBERS([struct stat.st_mtim.tv_nsec, struct stat.st_mtimensec], [], [], [[#include ]]) AC_CHECK_MEMBERS([struct ifreq.ifr_flagshigh], [], [], [[#include ]]) AC_CHECK_FUNCS([mlockall strnlen getloadavg statvfs getmntent_r]) AC_CHECK_HEADERS([mntent.h sys/statvfs.h linux/types.h linux/if_ether.h stdatomic.h]) AC_CHECK_HEADERS([net/if_mib.h], [], [], [[#include #include ]]) OVS_CHECK_PKIDIR OVS_CHECK_RUNDIR OVS_CHECK_DBDIR OVS_CHECK_BACKTRACE OVS_CHECK_MALLOC_HOOKS OVS_CHECK_VALGRIND OVS_CHECK_SOCKET_LIBS OVS_CHECK_LINKER_SECTIONS OVS_CHECK_XENSERVER_VERSION OVS_CHECK_GROFF OVS_CHECK_GNU_MAKE OVS_CHECK_CACHE_TIME OVS_CHECK_TLS OVS_CHECK_ATOMIC_LIBS OVS_CHECK_GCC4_ATOMICS OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(1) OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(2) OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(4) OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(8) OVS_CHECK_POSIX_AIO OVS_CHECK_PTHREAD_SET_NAME OVS_ENABLE_OPTION([-Wall]) OVS_ENABLE_OPTION([-Wextra]) OVS_ENABLE_OPTION([-Wno-sign-compare]) OVS_ENABLE_OPTION([-Wpointer-arith]) OVS_ENABLE_OPTION([-Wdeclaration-after-statement]) OVS_ENABLE_OPTION([-Wformat-security]) OVS_ENABLE_OPTION([-Wswitch-enum]) OVS_ENABLE_OPTION([-Wunused-parameter]) OVS_ENABLE_OPTION([-Wstrict-aliasing]) OVS_ENABLE_OPTION([-Wbad-function-cast]) OVS_ENABLE_OPTION([-Wcast-align]) OVS_ENABLE_OPTION([-Wstrict-prototypes]) OVS_ENABLE_OPTION([-Wold-style-definition]) OVS_ENABLE_OPTION([-Wmissing-prototypes]) OVS_ENABLE_OPTION([-Wmissing-field-initializers]) OVS_ENABLE_OPTION([-Wthread-safety]) OVS_CONDITIONAL_CC_OPTION([-Wno-unused], [HAVE_WNO_UNUSED]) OVS_CONDITIONAL_CC_OPTION([-Wno-unused-parameter], [HAVE_WNO_UNUSED_PARAMETER]) OVS_ENABLE_WERROR OVS_ENABLE_SPARSE AC_ARG_VAR(KARCH, [Kernel Architecture String]) AC_SUBST(KARCH) OVS_CHECK_LINUX AC_CONFIG_FILES([Makefile datapath/Makefile datapath/linux/Kbuild datapath/linux/Makefile datapath/linux/Makefile.main tests/atlocal]) dnl This makes sure that include/openflow gets created in the build directory. AC_CONFIG_COMMANDS([include/openflow/openflow.h.stamp]) AC_CONFIG_COMMANDS([ovsdb/ovsdbmonitor/dummy], [:]) AC_CONFIG_COMMANDS([utilities/bugtool/dummy], [:]) AM_CONDITIONAL([LINUX_DATAPATH], [test "$HAVE_NETLINK" = yes && test "$ESX" = no]) if test "$HAVE_NETLINK" = yes && test "$ESX" = no; then AC_DEFINE([LINUX_DATAPATH], [1], [System uses the linux datapath module.]) fi AC_OUTPUT openvswitch-2.0.1+git20140120/datapath/000077500000000000000000000000001226605124000172405ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/datapath/.gitignore000066400000000000000000000001041226605124000212230ustar00rootroot00000000000000/Makefile /Makefile.in *.cmd *.ko *.mod.c Module.symvers /distfiles openvswitch-2.0.1+git20140120/datapath/Makefile.am000066400000000000000000000012531226605124000212750ustar00rootroot00000000000000SUBDIRS = if LINUX_ENABLED SUBDIRS += linux endif EXTRA_DIST = $(dist_headers) $(dist_sources) $(dist_extras) # Suppress warnings about GNU extensions in Modules.mk files. AUTOMAKE_OPTIONS = -Wno-portability include Modules.mk include linux/Modules.mk # The following is based on commands for the Automake "distdir" target. distfiles: Makefile @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t" | sort -u > $@ CLEANFILES = distfiles openvswitch-2.0.1+git20140120/datapath/Modules.mk000066400000000000000000000020641226605124000212030ustar00rootroot00000000000000# Some modules should be built and distributed, e.g. openvswitch. # # Some modules should be built but not distributed, e.g. third-party # hwtable modules. both_modules = openvswitch build_modules = $(both_modules) # Modules to build dist_modules = $(both_modules) # Modules to distribute openvswitch_sources = \ actions.c \ datapath.c \ dp_notify.c \ flow.c \ vport.c \ vport-gre.c \ vport-internal_dev.c \ vport-lisp.c \ vport-netdev.c \ vport-vxlan.c openvswitch_headers = \ compat.h \ datapath.h \ flow.h \ vlan.h \ vport.h \ vport-internal_dev.h \ vport-netdev.h openvswitch_extras = \ README dist_sources = $(foreach module,$(dist_modules),$($(module)_sources)) dist_headers = $(foreach module,$(dist_modules),$($(module)_headers)) dist_extras = $(foreach module,$(dist_modules),$($(module)_extras)) build_sources = $(foreach module,$(build_modules),$($(module)_sources)) build_headers = $(foreach module,$(build_modules),$($(module)_headers)) build_links = $(notdir $(build_sources)) build_objects = $(notdir $(patsubst %.c,%.o,$(build_sources))) openvswitch-2.0.1+git20140120/datapath/README000066400000000000000000000256201226605124000201250ustar00rootroot00000000000000Open vSwitch datapath developer documentation ============================================= The Open vSwitch kernel module allows flexible userspace control over flow-level packet processing on selected network devices. It can be used to implement a plain Ethernet switch, network device bonding, VLAN processing, network access control, flow-based network control, and so on. The kernel module implements multiple "datapaths" (analogous to bridges), each of which can have multiple "vports" (analogous to ports within a bridge). Each datapath also has associated with it a "flow table" that userspace populates with "flows" that map from keys based on packet headers and metadata to sets of actions. The most common action forwards the packet to another vport; other actions are also implemented. When a packet arrives on a vport, the kernel module processes it by extracting its flow key and looking it up in the flow table. If there is a matching flow, it executes the associated actions. If there is no match, it queues the packet to userspace for processing (as part of its processing, userspace will likely set up a flow to handle further packets of the same type entirely in-kernel). Flow key compatibility ---------------------- Network protocols evolve over time. New protocols become important and existing protocols lose their prominence. For the Open vSwitch kernel module to remain relevant, it must be possible for newer versions to parse additional protocols as part of the flow key. It might even be desirable, someday, to drop support for parsing protocols that have become obsolete. Therefore, the Netlink interface to Open vSwitch is designed to allow carefully written userspace applications to work with any version of the flow key, past or future. To support this forward and backward compatibility, whenever the kernel module passes a packet to userspace, it also passes along the flow key that it parsed from the packet. Userspace then extracts its own notion of a flow key from the packet and compares it against the kernel-provided version: - If userspace's notion of the flow key for the packet matches the kernel's, then nothing special is necessary. - If the kernel's flow key includes more fields than the userspace version of the flow key, for example if the kernel decoded IPv6 headers but userspace stopped at the Ethernet type (because it does not understand IPv6), then again nothing special is necessary. Userspace can still set up a flow in the usual way, as long as it uses the kernel-provided flow key to do it. - If the userspace flow key includes more fields than the kernel's, for example if userspace decoded an IPv6 header but the kernel stopped at the Ethernet type, then userspace can forward the packet manually, without setting up a flow in the kernel. This case is bad for performance because every packet that the kernel considers part of the flow must go to userspace, but the forwarding behavior is correct. (If userspace can determine that the values of the extra fields would not affect forwarding behavior, then it could set up a flow anyway.) How flow keys evolve over time is important to making this work, so the following sections go into detail. Flow key format --------------- A flow key is passed over a Netlink socket as a sequence of Netlink attributes. Some attributes represent packet metadata, defined as any information about a packet that cannot be extracted from the packet itself, e.g. the vport on which the packet was received. Most attributes, however, are extracted from headers within the packet, e.g. source and destination addresses from Ethernet, IP, or TCP headers. The header file defines the exact format of the flow key attributes. For informal explanatory purposes here, we write them as comma-separated strings, with parentheses indicating arguments and nesting. For example, the following could represent a flow key corresponding to a TCP packet that arrived on vport 1: in_port(1), eth(src=e0:91:f5:21:d0:b2, dst=00:02:e3:0f:80:a4), eth_type(0x0800), ipv4(src=172.16.0.20, dst=172.18.0.52, proto=17, tos=0, frag=no), tcp(src=49163, dst=80) Often we ellipsize arguments not important to the discussion, e.g.: in_port(1), eth(...), eth_type(0x0800), ipv4(...), tcp(...) Wildcarded flow key format -------------------------- A wildcarded flow is described with two sequences of Netlink attributes passed over the Netlink socket. A flow key, exactly as described above, and an optional corresponding flow mask. A wildcarded flow can represent a group of exact match flows. Each '1' bit in the mask specifies a exact match with the corresponding bit in the flow key. A '0' bit specifies a don't care bit, which will match either a '1' or '0' bit of a incoming packet. Using wildcarded flow can improve the flow set up rate by reduce the number of new flows need to be processed by the user space program. Support for the mask Netlink attribute is optional for both the kernel and user space program. The kernel can ignore the mask attribute, installing an exact match flow, or reduce the number of don't care bits in the kernel to less than what was specified by the user space program. In this case, variations in bits that the kernel does not implement will simply result in additional flow setups. The kernel module will also work with user space programs that neither support nor supply flow mask attributes. Since the kernel may ignore or modify wildcard bits, it can be difficult for the userspace program to know exactly what matches are installed. There are two possible approaches: reactively install flows as they miss the kernel flow table (and therefore not attempt to determine wildcard changes at all) or use the kernel's response messages to determine the installed wildcards. When interacting with userspace, the kernel should maintain the match portion of the key exactly as originally installed. This will provides a handle to identify the flow for all future operations. However, when reporting the mask of an installed flow, the mask should include any restrictions imposed by the kernel. The behavior when using overlapping wildcarded flows is undefined. It is the responsibility of the user space program to ensure that any incoming packet can match at most one flow, wildcarded or not. The current implementation performs best-effort detection of overlapping wildcarded flows and may reject some but not all of them. However, this behavior may change in future versions. Basic rule for evolving flow keys --------------------------------- Some care is needed to really maintain forward and backward compatibility for applications that follow the rules listed under "Flow key compatibility" above. The basic rule is obvious: ------------------------------------------------------------------ New network protocol support must only supplement existing flow key attributes. It must not change the meaning of already defined flow key attributes. ------------------------------------------------------------------ This rule does have less-obvious consequences so it is worth working through a few examples. Suppose, for example, that the kernel module did not already implement VLAN parsing. Instead, it just interpreted the 802.1Q TPID (0x8100) as the Ethertype then stopped parsing the packet. The flow key for any packet with an 802.1Q header would look essentially like this, ignoring metadata: eth(...), eth_type(0x8100) Naively, to add VLAN support, it makes sense to add a new "vlan" flow key attribute to contain the VLAN tag, then continue to decode the encapsulated headers beyond the VLAN tag using the existing field definitions. With this change, a TCP packet in VLAN 10 would have a flow key much like this: eth(...), vlan(vid=10, pcp=0), eth_type(0x0800), ip(proto=6, ...), tcp(...) But this change would negatively affect a userspace application that has not been updated to understand the new "vlan" flow key attribute. The application could, following the flow compatibility rules above, ignore the "vlan" attribute that it does not understand and therefore assume that the flow contained IP packets. This is a bad assumption (the flow only contains IP packets if one parses and skips over the 802.1Q header) and it could cause the application's behavior to change across kernel versions even though it follows the compatibility rules. The solution is to use a set of nested attributes. This is, for example, why 802.1Q support uses nested attributes. A TCP packet in VLAN 10 is actually expressed as: eth(...), eth_type(0x8100), vlan(vid=10, pcp=0), encap(eth_type(0x0800), ip(proto=6, ...), tcp(...))) Notice how the "eth_type", "ip", and "tcp" flow key attributes are nested inside the "encap" attribute. Thus, an application that does not understand the "vlan" key will not see either of those attributes and therefore will not misinterpret them. (Also, the outer eth_type is still 0x8100, not changed to 0x0800.) Handling malformed packets -------------------------- Don't drop packets in the kernel for malformed protocol headers, bad checksums, etc. This would prevent userspace from implementing a simple Ethernet switch that forwards every packet. Instead, in such a case, include an attribute with "empty" content. It doesn't matter if the empty content could be valid protocol values, as long as those values are rarely seen in practice, because userspace can always forward all packets with those values to userspace and handle them individually. For example, consider a packet that contains an IP header that indicates protocol 6 for TCP, but which is truncated just after the IP header, so that the TCP header is missing. The flow key for this packet would include a tcp attribute with all-zero src and dst, like this: eth(...), eth_type(0x0800), ip(proto=6, ...), tcp(src=0, dst=0) As another example, consider a packet with an Ethernet type of 0x8100, indicating that a VLAN TCI should follow, but which is truncated just after the Ethernet type. The flow key for this packet would include an all-zero-bits vlan and an empty encap attribute, like this: eth(...), eth_type(0x8100), vlan(0), encap() Unlike a TCP packet with source and destination ports 0, an all-zero-bits VLAN TCI is not that rare, so the CFI bit (aka VLAN_TAG_PRESENT inside the kernel) is ordinarily set in a vlan attribute expressly to allow this situation to be distinguished. Thus, the flow key in this second example unambiguously indicates a missing or malformed VLAN TCI. Other rules ----------- The other rules for flow keys are much less subtle: - Duplicate attributes are not allowed at a given nesting level. - Ordering of attributes is not significant. - When the kernel sends a given flow key to userspace, it always composes it the same way. This allows userspace to hash and compare entire flow keys that it may not be able to fully interpret. openvswitch-2.0.1+git20140120/datapath/actions.c000066400000000000000000000355151226605124000210550ustar00rootroot00000000000000/* * Copyright (c) 2007-2013 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "datapath.h" #include "vlan.h" #include "vport.h" static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, const struct nlattr *attr, int len, bool keep_skb); static int make_writable(struct sk_buff *skb, int write_len) { if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) return 0; return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); } /* remove VLAN header from packet and update csum accordingly. */ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) { struct vlan_hdr *vhdr; int err; err = make_writable(skb, VLAN_ETH_HLEN); if (unlikely(err)) return err; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(skb->data + (2 * ETH_ALEN), VLAN_HLEN, 0)); vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); *current_tci = vhdr->h_vlan_TCI; memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); __skb_pull(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); skb->mac_header += VLAN_HLEN; skb_reset_mac_len(skb); return 0; } static int pop_vlan(struct sk_buff *skb) { __be16 tci; int err; if (likely(vlan_tx_tag_present(skb))) { vlan_set_tci(skb, 0); } else { if (unlikely(skb->protocol != htons(ETH_P_8021Q) || skb->len < VLAN_ETH_HLEN)) return 0; err = __pop_vlan_tci(skb, &tci); if (err) return err; } /* move next vlan tag to hw accel tag */ if (likely(skb->protocol != htons(ETH_P_8021Q) || skb->len < VLAN_ETH_HLEN)) return 0; err = __pop_vlan_tci(skb, &tci); if (unlikely(err)) return err; __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci)); return 0; } static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan) { if (unlikely(vlan_tx_tag_present(skb))) { u16 current_tag; /* push down current VLAN tag */ current_tag = vlan_tx_tag_get(skb); if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag)) return -ENOMEM; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data + (2 * ETH_ALEN), VLAN_HLEN, 0)); } __vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); return 0; } static int set_eth_addr(struct sk_buff *skb, const struct ovs_key_ethernet *eth_key) { int err; err = make_writable(skb, ETH_HLEN); if (unlikely(err)) return err; skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN); memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN); ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); return 0; } static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, __be32 *addr, __be32 new_addr) { int transport_len = skb->len - skb_transport_offset(skb); if (nh->protocol == IPPROTO_TCP) { if (likely(transport_len >= sizeof(struct tcphdr))) inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb, *addr, new_addr, 1); } else if (nh->protocol == IPPROTO_UDP) { if (likely(transport_len >= sizeof(struct udphdr))) { struct udphdr *uh = udp_hdr(skb); if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace4(&uh->check, skb, *addr, new_addr, 1); if (!uh->check) uh->check = CSUM_MANGLED_0; } } } csum_replace4(&nh->check, *addr, new_addr); skb_clear_rxhash(skb); *addr = new_addr; } static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto, __be32 addr[4], const __be32 new_addr[4]) { int transport_len = skb->len - skb_transport_offset(skb); if (l4_proto == IPPROTO_TCP) { if (likely(transport_len >= sizeof(struct tcphdr))) inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb, addr, new_addr, 1); } else if (l4_proto == IPPROTO_UDP) { if (likely(transport_len >= sizeof(struct udphdr))) { struct udphdr *uh = udp_hdr(skb); if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace16(&uh->check, skb, addr, new_addr, 1); if (!uh->check) uh->check = CSUM_MANGLED_0; } } } } static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, __be32 addr[4], const __be32 new_addr[4], bool recalculate_csum) { if (recalculate_csum) update_ipv6_checksum(skb, l4_proto, addr, new_addr); skb_clear_rxhash(skb); memcpy(addr, new_addr, sizeof(__be32[4])); } static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc) { nh->priority = tc >> 4; nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4); } static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl) { nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16; nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8; nh->flow_lbl[2] = fl & 0x000000FF; } static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl) { csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8)); nh->ttl = new_ttl; } static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key) { struct iphdr *nh; int err; err = make_writable(skb, skb_network_offset(skb) + sizeof(struct iphdr)); if (unlikely(err)) return err; nh = ip_hdr(skb); if (ipv4_key->ipv4_src != nh->saddr) set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src); if (ipv4_key->ipv4_dst != nh->daddr) set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst); if (ipv4_key->ipv4_tos != nh->tos) ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos); if (ipv4_key->ipv4_ttl != nh->ttl) set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl); return 0; } static int set_ipv6(struct sk_buff *skb, const struct ovs_key_ipv6 *ipv6_key) { struct ipv6hdr *nh; int err; __be32 *saddr; __be32 *daddr; err = make_writable(skb, skb_network_offset(skb) + sizeof(struct ipv6hdr)); if (unlikely(err)) return err; nh = ipv6_hdr(skb); saddr = (__be32 *)&nh->saddr; daddr = (__be32 *)&nh->daddr; if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr, ipv6_key->ipv6_src, true); if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) { unsigned int offset = 0; int flags = OVS_IP6T_FH_F_SKIP_RH; bool recalc_csum = true; if (ipv6_ext_hdr(nh->nexthdr)) recalc_csum = ipv6_find_hdr(skb, &offset, NEXTHDR_ROUTING, NULL, &flags) != NEXTHDR_ROUTING; set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr, ipv6_key->ipv6_dst, recalc_csum); } set_ipv6_tc(nh, ipv6_key->ipv6_tclass); set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label)); nh->hop_limit = ipv6_key->ipv6_hlimit; return 0; } /* Must follow make_writable() since that can move the skb data. */ static void set_tp_port(struct sk_buff *skb, __be16 *port, __be16 new_port, __sum16 *check) { inet_proto_csum_replace2(check, skb, *port, new_port, 0); *port = new_port; skb_clear_rxhash(skb); } static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port) { struct udphdr *uh = udp_hdr(skb); if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) { set_tp_port(skb, port, new_port, &uh->check); if (!uh->check) uh->check = CSUM_MANGLED_0; } else { *port = new_port; skb_clear_rxhash(skb); } } static int set_udp(struct sk_buff *skb, const struct ovs_key_udp *udp_port_key) { struct udphdr *uh; int err; err = make_writable(skb, skb_transport_offset(skb) + sizeof(struct udphdr)); if (unlikely(err)) return err; uh = udp_hdr(skb); if (udp_port_key->udp_src != uh->source) set_udp_port(skb, &uh->source, udp_port_key->udp_src); if (udp_port_key->udp_dst != uh->dest) set_udp_port(skb, &uh->dest, udp_port_key->udp_dst); return 0; } static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key) { struct tcphdr *th; int err; err = make_writable(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)); if (unlikely(err)) return err; th = tcp_hdr(skb); if (tcp_port_key->tcp_src != th->source) set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check); if (tcp_port_key->tcp_dst != th->dest) set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check); return 0; } static int set_sctp(struct sk_buff *skb, const struct ovs_key_sctp *sctp_port_key) { struct sctphdr *sh; int err; unsigned int sctphoff = skb_transport_offset(skb); err = make_writable(skb, sctphoff + sizeof(struct sctphdr)); if (unlikely(err)) return err; sh = sctp_hdr(skb); if (sctp_port_key->sctp_src != sh->source || sctp_port_key->sctp_dst != sh->dest) { __le32 old_correct_csum, new_csum, old_csum; old_csum = sh->checksum; old_correct_csum = sctp_compute_cksum(skb, sctphoff); sh->source = sctp_port_key->sctp_src; sh->dest = sctp_port_key->sctp_dst; new_csum = sctp_compute_cksum(skb, sctphoff); /* Carry any checksum errors through. */ sh->checksum = old_csum ^ old_correct_csum ^ new_csum; skb_clear_rxhash(skb); } return 0; } static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port) { struct vport *vport; if (unlikely(!skb)) return -ENOMEM; vport = ovs_vport_rcu(dp, out_port); if (unlikely(!vport)) { kfree_skb(skb); return -ENODEV; } ovs_vport_send(vport, skb); return 0; } static int output_userspace(struct datapath *dp, struct sk_buff *skb, const struct nlattr *attr) { struct dp_upcall_info upcall; const struct nlattr *a; int rem; BUG_ON(!OVS_CB(skb)->pkt_key); upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.key = OVS_CB(skb)->pkt_key; upcall.userdata = NULL; upcall.portid = 0; for (a = nla_data(attr), rem = nla_len(attr); rem > 0; a = nla_next(a, &rem)) { switch (nla_type(a)) { case OVS_USERSPACE_ATTR_USERDATA: upcall.userdata = a; break; case OVS_USERSPACE_ATTR_PID: upcall.portid = nla_get_u32(a); break; } } return ovs_dp_upcall(dp, skb, &upcall); } static int sample(struct datapath *dp, struct sk_buff *skb, const struct nlattr *attr) { const struct nlattr *acts_list = NULL; const struct nlattr *a; int rem; for (a = nla_data(attr), rem = nla_len(attr); rem > 0; a = nla_next(a, &rem)) { switch (nla_type(a)) { case OVS_SAMPLE_ATTR_PROBABILITY: if (net_random() >= nla_get_u32(a)) return 0; break; case OVS_SAMPLE_ATTR_ACTIONS: acts_list = a; break; } } return do_execute_actions(dp, skb, nla_data(acts_list), nla_len(acts_list), true); } static int execute_set_action(struct sk_buff *skb, const struct nlattr *nested_attr) { int err = 0; switch (nla_type(nested_attr)) { case OVS_KEY_ATTR_PRIORITY: skb->priority = nla_get_u32(nested_attr); break; case OVS_KEY_ATTR_SKB_MARK: skb->mark = nla_get_u32(nested_attr); break; case OVS_KEY_ATTR_IPV4_TUNNEL: OVS_CB(skb)->tun_key = nla_data(nested_attr); break; case OVS_KEY_ATTR_ETHERNET: err = set_eth_addr(skb, nla_data(nested_attr)); break; case OVS_KEY_ATTR_IPV4: err = set_ipv4(skb, nla_data(nested_attr)); break; case OVS_KEY_ATTR_IPV6: err = set_ipv6(skb, nla_data(nested_attr)); break; case OVS_KEY_ATTR_TCP: err = set_tcp(skb, nla_data(nested_attr)); break; case OVS_KEY_ATTR_UDP: err = set_udp(skb, nla_data(nested_attr)); break; case OVS_KEY_ATTR_SCTP: err = set_sctp(skb, nla_data(nested_attr)); break; } return err; } /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, const struct nlattr *attr, int len, bool keep_skb) { /* Every output action needs a separate clone of 'skb', but the common * case is just a single output action, so that doing a clone and * then freeing the original skbuff is wasteful. So the following code * is slightly obscure just to avoid that. */ int prev_port = -1; const struct nlattr *a; int rem; for (a = attr, rem = len; rem > 0; a = nla_next(a, &rem)) { int err = 0; if (prev_port != -1) { do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port); prev_port = -1; } switch (nla_type(a)) { case OVS_ACTION_ATTR_OUTPUT: prev_port = nla_get_u32(a); break; case OVS_ACTION_ATTR_USERSPACE: output_userspace(dp, skb, a); break; case OVS_ACTION_ATTR_PUSH_VLAN: err = push_vlan(skb, nla_data(a)); if (unlikely(err)) /* skb already freed. */ return err; break; case OVS_ACTION_ATTR_POP_VLAN: err = pop_vlan(skb); break; case OVS_ACTION_ATTR_SET: err = execute_set_action(skb, nla_data(a)); break; case OVS_ACTION_ATTR_SAMPLE: err = sample(dp, skb, a); break; } if (unlikely(err)) { kfree_skb(skb); return err; } } if (prev_port != -1) { if (keep_skb) skb = skb_clone(skb, GFP_ATOMIC); do_output(dp, skb, prev_port); } else if (!keep_skb) consume_skb(skb); return 0; } /* We limit the number of times that we pass into execute_actions() * to avoid blowing out the stack in the event that we have a loop. */ #define MAX_LOOPS 4 struct loop_counter { u8 count; /* Count. */ bool looping; /* Loop detected? */ }; static DEFINE_PER_CPU(struct loop_counter, loop_counters); static int loop_suppress(struct datapath *dp, struct sw_flow_actions *actions) { if (net_ratelimit()) pr_warn("%s: flow looped %d times, dropping\n", ovs_dp_name(dp), MAX_LOOPS); actions->actions_len = 0; return -ELOOP; } /* Execute a list of actions against 'skb'. */ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb) { struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts); struct loop_counter *loop; int error; /* Check whether we've looped too much. */ loop = &__get_cpu_var(loop_counters); if (unlikely(++loop->count > MAX_LOOPS)) loop->looping = true; if (unlikely(loop->looping)) { error = loop_suppress(dp, acts); kfree_skb(skb); goto out_loop; } OVS_CB(skb)->tun_key = NULL; error = do_execute_actions(dp, skb, acts->actions, acts->actions_len, false); /* Check whether sub-actions looped too much. */ if (unlikely(loop->looping)) error = loop_suppress(dp, acts); out_loop: /* Decrement loop counter. */ if (!--loop->count) loop->looping = false; return error; } openvswitch-2.0.1+git20140120/datapath/compat.h000066400000000000000000000043271226605124000207020ustar00rootroot00000000000000/* * Copyright (c) 2007-2012 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #ifndef COMPAT_H #define COMPAT_H 1 #include #include #include #include #include static inline void skb_clear_rxhash(struct sk_buff *skb) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) skb->rxhash = 0; #endif } #ifdef HAVE_PARALLEL_OPS #define SET_PARALLEL_OPS .parallel_ops = true, #else #define SET_PARALLEL_OPS #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36) #define rt_dst(rt) (rt->dst) #else #define rt_dst(rt) (rt->u.dst) #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) #define inet_sport(sk) (inet_sk(sk)->sport) #else #define inet_sport(sk) (inet_sk(sk)->inet_sport) #endif static inline struct rtable *find_route(struct net *net, __be32 *saddr, __be32 daddr, u8 ipproto, u8 tos, u32 skb_mark) { struct rtable *rt; /* Tunnel configuration keeps DSCP part of TOS bits, But Linux * router expect RT_TOS bits only. */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) struct flowi fl = { .nl_u = { .ip4_u = { .daddr = daddr, .saddr = *saddr, .tos = RT_TOS(tos) } }, .mark = skb_mark, .proto = ipproto }; if (unlikely(ip_route_output_key(net, &rt, &fl))) return ERR_PTR(-EADDRNOTAVAIL); *saddr = fl.nl_u.ip4_u.saddr; return rt; #else struct flowi4 fl = { .daddr = daddr, .saddr = *saddr, .flowi4_tos = RT_TOS(tos), .flowi4_mark = skb_mark, .flowi4_proto = ipproto }; rt = ip_route_output_key(net, &fl); *saddr = fl.saddr; return rt; #endif } #endif /* compat.h */ openvswitch-2.0.1+git20140120/datapath/datapath.c000066400000000000000000001636731226605124000212120ustar00rootroot00000000000000/* * Copyright (c) 2007-2013 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "datapath.h" #include "flow.h" #include "vlan.h" #include "vport-internal_dev.h" #include "vport-netdev.h" #define REHASH_FLOW_INTERVAL (10 * 60 * HZ) static void rehash_flow_table(struct work_struct *work); static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); int ovs_net_id __read_mostly; static void ovs_notify(struct sk_buff *skb, struct genl_info *info, struct genl_multicast_group *grp) { genl_notify(skb, genl_info_net(info), info->snd_portid, grp->id, info->nlhdr, GFP_KERNEL); } /** * DOC: Locking: * * All writes e.g. Writes to device state (add/remove datapath, port, set * operations on vports, etc.), Writes to other state (flow table * modifications, set miscellaneous datapath parameters, etc.) are protected * by ovs_lock. * * Reads are protected by RCU. * * There are a few special cases (mostly stats) that have their own * synchronization but they nest under all of above and don't interact with * each other. * * The RTNL lock nests inside ovs_mutex. */ static DEFINE_MUTEX(ovs_mutex); void ovs_lock(void) { mutex_lock(&ovs_mutex); } void ovs_unlock(void) { mutex_unlock(&ovs_mutex); } #ifdef CONFIG_LOCKDEP int lockdep_ovsl_is_held(void) { if (debug_locks) return lockdep_is_held(&ovs_mutex); else return 1; } #endif static struct vport *new_vport(const struct vport_parms *); static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, const struct dp_upcall_info *); static int queue_userspace_packet(struct net *, int dp_ifindex, struct sk_buff *, const struct dp_upcall_info *); /* Must be called with rcu_read_lock or ovs_mutex. */ static struct datapath *get_dp(struct net *net, int dp_ifindex) { struct datapath *dp = NULL; struct net_device *dev; rcu_read_lock(); dev = dev_get_by_index_rcu(net, dp_ifindex); if (dev) { struct vport *vport = ovs_internal_dev_get_vport(dev); if (vport) dp = vport->dp; } rcu_read_unlock(); return dp; } /* Must be called with rcu_read_lock or ovs_mutex. */ const char *ovs_dp_name(const struct datapath *dp) { struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL); return vport->ops->get_name(vport); } static int get_dpifindex(struct datapath *dp) { struct vport *local; int ifindex; rcu_read_lock(); local = ovs_vport_rcu(dp, OVSP_LOCAL); if (local) ifindex = netdev_vport_priv(local)->dev->ifindex; else ifindex = 0; rcu_read_unlock(); return ifindex; } static void destroy_dp_rcu(struct rcu_head *rcu) { struct datapath *dp = container_of(rcu, struct datapath, rcu); ovs_flow_tbl_destroy((__force struct flow_table *)dp->table, false); free_percpu(dp->stats_percpu); release_net(ovs_dp_get_net(dp)); kfree(dp->ports); kfree(dp); } static struct hlist_head *vport_hash_bucket(const struct datapath *dp, u16 port_no) { return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)]; } struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no) { struct vport *vport; struct hlist_head *head; head = vport_hash_bucket(dp, port_no); hlist_for_each_entry_rcu(vport, head, dp_hash_node) { if (vport->port_no == port_no) return vport; } return NULL; } /* Called with ovs_mutex. */ static struct vport *new_vport(const struct vport_parms *parms) { struct vport *vport; vport = ovs_vport_add(parms); if (!IS_ERR(vport)) { struct datapath *dp = parms->dp; struct hlist_head *head = vport_hash_bucket(dp, vport->port_no); hlist_add_head_rcu(&vport->dp_hash_node, head); } return vport; } void ovs_dp_detach_port(struct vport *p) { ASSERT_OVSL(); /* First drop references to device. */ hlist_del_rcu(&p->dp_hash_node); /* Then destroy it. */ ovs_vport_del(p); } /* Must be called with rcu_read_lock. */ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) { struct datapath *dp = p->dp; struct sw_flow *flow; struct dp_stats_percpu *stats; struct sw_flow_key key; u64 *stats_counter; int error; stats = this_cpu_ptr(dp->stats_percpu); /* Extract flow from 'skb' into 'key'. */ error = ovs_flow_extract(skb, p->port_no, &key); if (unlikely(error)) { kfree_skb(skb); return; } /* Look up flow. */ flow = ovs_flow_lookup(rcu_dereference(dp->table), &key); if (unlikely(!flow)) { struct dp_upcall_info upcall; upcall.cmd = OVS_PACKET_CMD_MISS; upcall.key = &key; upcall.userdata = NULL; upcall.portid = p->upcall_portid; ovs_dp_upcall(dp, skb, &upcall); consume_skb(skb); stats_counter = &stats->n_missed; goto out; } OVS_CB(skb)->flow = flow; OVS_CB(skb)->pkt_key = &key; stats_counter = &stats->n_hit; ovs_flow_used(OVS_CB(skb)->flow, skb); ovs_execute_actions(dp, skb); out: /* Update datapath statistics. */ u64_stats_update_begin(&stats->sync); (*stats_counter)++; u64_stats_update_end(&stats->sync); } static struct genl_family dp_packet_genl_family = { .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), .name = OVS_PACKET_FAMILY, .version = OVS_PACKET_VERSION, .maxattr = OVS_PACKET_ATTR_MAX, .netnsok = true, SET_PARALLEL_OPS }; int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { struct dp_stats_percpu *stats; int dp_ifindex; int err; if (upcall_info->portid == 0) { err = -ENOTCONN; goto err; } dp_ifindex = get_dpifindex(dp); if (!dp_ifindex) { err = -ENODEV; goto err; } if (!skb_is_gso(skb)) err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); else err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info); if (err) goto err; return 0; err: stats = this_cpu_ptr(dp->stats_percpu); u64_stats_update_begin(&stats->sync); stats->n_lost++; u64_stats_update_end(&stats->sync); return err; } static int queue_gso_packets(struct net *net, int dp_ifindex, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { unsigned short gso_type = skb_shinfo(skb)->gso_type; struct dp_upcall_info later_info; struct sw_flow_key later_key; struct sk_buff *segs, *nskb; int err; segs = __skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM, false); if (IS_ERR(segs)) return PTR_ERR(segs); /* Queue all of the segments. */ skb = segs; do { err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info); if (err) break; if (skb == segs && gso_type & SKB_GSO_UDP) { /* The initial flow key extracted by ovs_flow_extract() * in this case is for a first fragment, so we need to * properly mark later fragments. */ later_key = *upcall_info->key; later_key.ip.frag = OVS_FRAG_TYPE_LATER; later_info = *upcall_info; later_info.key = &later_key; upcall_info = &later_info; } } while ((skb = skb->next)); /* Free all of the segments. */ skb = segs; do { nskb = skb->next; if (err) kfree_skb(skb); else consume_skb(skb); } while ((skb = nskb)); return err; } static size_t key_attr_size(void) { return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */ + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */ + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */ + nla_total_size(28); /* OVS_KEY_ATTR_ND */ } static size_t upcall_msg_size(const struct sk_buff *skb, const struct nlattr *userdata) { size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */ + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */ /* OVS_PACKET_ATTR_USERDATA */ if (userdata) size += NLA_ALIGN(userdata->nla_len); return size; } static int queue_userspace_packet(struct net *net, int dp_ifindex, struct sk_buff *skb, const struct dp_upcall_info *upcall_info) { struct ovs_header *upcall; struct sk_buff *nskb = NULL; struct sk_buff *user_skb; /* to be queued to userspace */ struct nlattr *nla; int err; if (vlan_tx_tag_present(skb)) { nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return -ENOMEM; nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb)); if (!nskb) return -ENOMEM; vlan_set_tci(nskb, 0); skb = nskb; } if (nla_attr_size(skb->len) > USHRT_MAX) { err = -EFBIG; goto out; } user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC); if (!user_skb) { err = -ENOMEM; goto out; } upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd); upcall->dp_ifindex = dp_ifindex; nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY); ovs_flow_to_nlattrs(upcall_info->key, upcall_info->key, user_skb); nla_nest_end(user_skb, nla); if (upcall_info->userdata) __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, nla_len(upcall_info->userdata), nla_data(upcall_info->userdata)); nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); skb_copy_and_csum_dev(skb, nla_data(nla)); genlmsg_end(user_skb, upcall); err = genlmsg_unicast(net, user_skb, upcall_info->portid); out: kfree_skb(nskb); return err; } /* Called with ovs_mutex. */ static int flush_flows(struct datapath *dp) { struct flow_table *old_table; struct flow_table *new_table; old_table = ovsl_dereference(dp->table); new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); if (!new_table) return -ENOMEM; rcu_assign_pointer(dp->table, new_table); ovs_flow_tbl_destroy(old_table, true); return 0; } static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len) { struct sw_flow_actions *acts; int new_acts_size; int req_size = NLA_ALIGN(attr_len); int next_offset = offsetof(struct sw_flow_actions, actions) + (*sfa)->actions_len; if (req_size <= (ksize(*sfa) - next_offset)) goto out; new_acts_size = ksize(*sfa) * 2; if (new_acts_size > MAX_ACTIONS_BUFSIZE) { if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) return ERR_PTR(-EMSGSIZE); new_acts_size = MAX_ACTIONS_BUFSIZE; } acts = ovs_flow_actions_alloc(new_acts_size); if (IS_ERR(acts)) return (void *)acts; memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); acts->actions_len = (*sfa)->actions_len; kfree(*sfa); *sfa = acts; out: (*sfa)->actions_len += req_size; return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); } static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) { struct nlattr *a; a = reserve_sfa_size(sfa, nla_attr_size(len)); if (IS_ERR(a)) return PTR_ERR(a); a->nla_type = attrtype; a->nla_len = nla_attr_size(len); if (data) memcpy(nla_data(a), data, len); memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); return 0; } static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype) { int used = (*sfa)->actions_len; int err; err = add_action(sfa, attrtype, NULL, 0); if (err) return err; return used; } static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset) { struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset); a->nla_len = sfa->actions_len - st_offset; } static int validate_and_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa); static int validate_and_copy_sample(const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; const struct nlattr *a; int rem, start, err, st_acts; memset(attrs, 0, sizeof(attrs)); nla_for_each_nested(a, attr, rem) { int type = nla_type(a); if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) return -EINVAL; attrs[type] = a; } if (rem) return -EINVAL; probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; if (!probability || nla_len(probability) != sizeof(u32)) return -EINVAL; actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) return -EINVAL; /* validation done, copy sample action. */ start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); if (start < 0) return start; err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32)); if (err) return err; st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); if (st_acts < 0) return st_acts; err = validate_and_copy_actions(actions, key, depth + 1, sfa); if (err) return err; add_nested_action_end(*sfa, st_acts); add_nested_action_end(*sfa, start); return 0; } static int validate_tp_port(const struct sw_flow_key *flow_key) { if (flow_key->eth.type == htons(ETH_P_IP)) { if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst) return 0; } else if (flow_key->eth.type == htons(ETH_P_IPV6)) { if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst) return 0; } return -EINVAL; } static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_actions **sfa) { struct sw_flow_match match; struct sw_flow_key key; int err, start; ovs_match_init(&match, &key, NULL); err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &match, false); if (err) return err; start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); if (start < 0) return start; err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &match.key->tun_key, sizeof(match.key->tun_key)); add_nested_action_end(*sfa, start); return err; } static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, struct sw_flow_actions **sfa, bool *set_tun) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); /* There can be only one key in a action */ if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) return -EINVAL; if (key_type > OVS_KEY_ATTR_MAX || (ovs_key_lens[key_type] != nla_len(ovs_key) && ovs_key_lens[key_type] != -1)) return -EINVAL; switch (key_type) { const struct ovs_key_ipv4 *ipv4_key; const struct ovs_key_ipv6 *ipv6_key; int err; case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_ETHERNET: break; case OVS_KEY_ATTR_TUNNEL: *set_tun = true; err = validate_and_copy_set_tun(a, sfa); if (err) return err; break; case OVS_KEY_ATTR_IPV4: if (flow_key->eth.type != htons(ETH_P_IP)) return -EINVAL; if (!flow_key->ip.proto) return -EINVAL; ipv4_key = nla_data(ovs_key); if (ipv4_key->ipv4_proto != flow_key->ip.proto) return -EINVAL; if (ipv4_key->ipv4_frag != flow_key->ip.frag) return -EINVAL; break; case OVS_KEY_ATTR_IPV6: if (flow_key->eth.type != htons(ETH_P_IPV6)) return -EINVAL; if (!flow_key->ip.proto) return -EINVAL; ipv6_key = nla_data(ovs_key); if (ipv6_key->ipv6_proto != flow_key->ip.proto) return -EINVAL; if (ipv6_key->ipv6_frag != flow_key->ip.frag) return -EINVAL; if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) return -EINVAL; break; case OVS_KEY_ATTR_TCP: if (flow_key->ip.proto != IPPROTO_TCP) return -EINVAL; return validate_tp_port(flow_key); case OVS_KEY_ATTR_UDP: if (flow_key->ip.proto != IPPROTO_UDP) return -EINVAL; return validate_tp_port(flow_key); case OVS_KEY_ATTR_SCTP: if (flow_key->ip.proto != IPPROTO_SCTP) return -EINVAL; return validate_tp_port(flow_key); default: return -EINVAL; } return 0; } static int validate_userspace(const struct nlattr *attr) { static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, }; struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; int error; error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr, userspace_policy); if (error) return error; if (!a[OVS_USERSPACE_ATTR_PID] || !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) return -EINVAL; return 0; } static int copy_action(const struct nlattr *from, struct sw_flow_actions **sfa) { int totlen = NLA_ALIGN(from->nla_len); struct nlattr *to; to = reserve_sfa_size(sfa, from->nla_len); if (IS_ERR(to)) return PTR_ERR(to); memcpy(to, from, totlen); return 0; } static int validate_and_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa) { const struct nlattr *a; int rem, err; if (depth >= SAMPLE_ACTION_DEPTH) return -EOVERFLOW; nla_for_each_nested(a, attr, rem) { /* Expected argument lengths, (u32)-1 for variable length. */ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), [OVS_ACTION_ATTR_POP_VLAN] = 0, [OVS_ACTION_ATTR_SET] = (u32)-1, [OVS_ACTION_ATTR_SAMPLE] = (u32)-1 }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); bool skip_copy; if (type > OVS_ACTION_ATTR_MAX || (action_lens[type] != nla_len(a) && action_lens[type] != (u32)-1)) return -EINVAL; skip_copy = false; switch (type) { case OVS_ACTION_ATTR_UNSPEC: return -EINVAL; case OVS_ACTION_ATTR_USERSPACE: err = validate_userspace(a); if (err) return err; break; case OVS_ACTION_ATTR_OUTPUT: if (nla_get_u32(a) >= DP_MAX_PORTS) return -EINVAL; break; case OVS_ACTION_ATTR_POP_VLAN: break; case OVS_ACTION_ATTR_PUSH_VLAN: vlan = nla_data(a); if (vlan->vlan_tpid != htons(ETH_P_8021Q)) return -EINVAL; if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) return -EINVAL; break; case OVS_ACTION_ATTR_SET: err = validate_set(a, key, sfa, &skip_copy); if (err) return err; break; case OVS_ACTION_ATTR_SAMPLE: err = validate_and_copy_sample(a, key, depth, sfa); if (err) return err; skip_copy = true; break; default: return -EINVAL; } if (!skip_copy) { err = copy_action(a, sfa); if (err) return err; } } if (rem > 0) return -EINVAL; return 0; } static void clear_stats(struct sw_flow *flow) { flow->used = 0; flow->tcp_flags = 0; flow->packet_count = 0; flow->byte_count = 0; } static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) { struct ovs_header *ovs_header = info->userhdr; struct nlattr **a = info->attrs; struct sw_flow_actions *acts; struct sk_buff *packet; struct sw_flow *flow; struct datapath *dp; struct ethhdr *eth; int len; int err; err = -EINVAL; if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || !a[OVS_PACKET_ATTR_ACTIONS]) goto err; len = nla_len(a[OVS_PACKET_ATTR_PACKET]); packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL); err = -ENOMEM; if (!packet) goto err; skb_reserve(packet, NET_IP_ALIGN); nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len); skb_reset_mac_header(packet); eth = eth_hdr(packet); /* Normally, setting the skb 'protocol' field would be handled by a * call to eth_type_trans(), but it assumes there's a sending * device, which we may not have. */ if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) packet->protocol = eth->h_proto; else packet->protocol = htons(ETH_P_802_2); /* Build an sw_flow for sending this packet. */ flow = ovs_flow_alloc(); err = PTR_ERR(flow); if (IS_ERR(flow)) goto err_kfree_skb; err = ovs_flow_extract(packet, -1, &flow->key); if (err) goto err_flow_free; err = ovs_flow_metadata_from_nlattrs(flow, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_free; acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); err = PTR_ERR(acts); if (IS_ERR(acts)) goto err_flow_free; err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts); rcu_assign_pointer(flow->sf_acts, acts); if (err) goto err_flow_free; OVS_CB(packet)->flow = flow; OVS_CB(packet)->pkt_key = &flow->key; packet->priority = flow->key.phy.priority; packet->mark = flow->key.phy.skb_mark; rcu_read_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); err = -ENODEV; if (!dp) goto err_unlock; local_bh_disable(); err = ovs_execute_actions(dp, packet); local_bh_enable(); rcu_read_unlock(); ovs_flow_free(flow, false); return err; err_unlock: rcu_read_unlock(); err_flow_free: ovs_flow_free(flow, false); err_kfree_skb: kfree_skb(packet); err: return err; } static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN }, [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, }; static struct genl_ops dp_packet_genl_ops[] = { { .cmd = OVS_PACKET_CMD_EXECUTE, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = packet_policy, .doit = ovs_packet_cmd_execute } }; static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) { struct flow_table *table; int i; table = rcu_dereference_check(dp->table, lockdep_ovsl_is_held()); stats->n_flows = ovs_flow_tbl_count(table); stats->n_hit = stats->n_missed = stats->n_lost = 0; for_each_possible_cpu(i) { const struct dp_stats_percpu *percpu_stats; struct dp_stats_percpu local_stats; unsigned int start; percpu_stats = per_cpu_ptr(dp->stats_percpu, i); do { start = u64_stats_fetch_begin_bh(&percpu_stats->sync); local_stats = *percpu_stats; } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start)); stats->n_hit += local_stats.n_hit; stats->n_missed += local_stats.n_missed; stats->n_lost += local_stats.n_lost; } } static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = { [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG }, }; static struct genl_family dp_flow_genl_family = { .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), .name = OVS_FLOW_FAMILY, .version = OVS_FLOW_VERSION, .maxattr = OVS_FLOW_ATTR_MAX, .netnsok = true, SET_PARALLEL_OPS }; static struct genl_multicast_group ovs_dp_flow_multicast_group = { .name = OVS_FLOW_MCGROUP }; static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb); static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) { const struct nlattr *a; struct nlattr *start; int err = 0, rem; start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); if (!start) return -EMSGSIZE; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); struct nlattr *st_sample; switch (type) { case OVS_SAMPLE_ATTR_PROBABILITY: if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a))) return -EMSGSIZE; break; case OVS_SAMPLE_ATTR_ACTIONS: st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); if (!st_sample) return -EMSGSIZE; err = actions_to_attr(nla_data(a), nla_len(a), skb); if (err) return err; nla_nest_end(skb, st_sample); break; } } nla_nest_end(skb, start); return err; } static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); struct nlattr *start; int err; switch (key_type) { case OVS_KEY_ATTR_IPV4_TUNNEL: start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); if (!start) return -EMSGSIZE; err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key), nla_data(ovs_key)); if (err) return err; nla_nest_end(skb, start); break; default: if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) return -EMSGSIZE; break; } return 0; } static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb) { const struct nlattr *a; int rem, err; nla_for_each_attr(a, attr, len, rem) { int type = nla_type(a); switch (type) { case OVS_ACTION_ATTR_SET: err = set_action_to_attr(a, skb); if (err) return err; break; case OVS_ACTION_ATTR_SAMPLE: err = sample_action_to_attr(a, skb); if (err) return err; break; default: if (nla_put(skb, type, nla_len(a), nla_data(a))) return -EMSGSIZE; break; } } return 0; } static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) { return NLMSG_ALIGN(sizeof(struct ovs_header)) + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */ + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */ + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ } /* Called with ovs_mutex. */ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { const int skb_orig_len = skb->len; struct nlattr *start; struct ovs_flow_stats stats; struct ovs_header *ovs_header; struct nlattr *nla; unsigned long used; u8 tcp_flags; int err; ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); if (!ovs_header) return -EMSGSIZE; ovs_header->dp_ifindex = get_dpifindex(dp); /* Fill flow key. */ nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); if (!nla) goto nla_put_failure; err = ovs_flow_to_nlattrs(&flow->unmasked_key, &flow->unmasked_key, skb); if (err) goto error; nla_nest_end(skb, nla); nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK); if (!nla) goto nla_put_failure; err = ovs_flow_to_nlattrs(&flow->key, &flow->mask->key, skb); if (err) goto error; nla_nest_end(skb, nla); spin_lock_bh(&flow->lock); used = flow->used; stats.n_packets = flow->packet_count; stats.n_bytes = flow->byte_count; tcp_flags = flow->tcp_flags; spin_unlock_bh(&flow->lock); if (used && nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) goto nla_put_failure; if (stats.n_packets && nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats)) goto nla_put_failure; if (tcp_flags && nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags)) goto nla_put_failure; /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if * this is the first flow to be dumped into 'skb'. This is unusual for * Netlink but individual action lists can be longer than * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this. * The userspace caller can always fetch the actions separately if it * really wants them. (Most userspace callers in fact don't care.) * * This can only fail for dump operations because the skb is always * properly sized for single flows. */ start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS); if (start) { const struct sw_flow_actions *sf_acts; sf_acts = rcu_dereference_check(flow->sf_acts, lockdep_ovsl_is_held()); err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb); if (!err) nla_nest_end(skb, start); else { if (skb_orig_len) goto error; nla_nest_cancel(skb, start); } } else if (skb_orig_len) goto nla_put_failure; return genlmsg_end(skb, ovs_header); nla_put_failure: err = -EMSGSIZE; error: genlmsg_cancel(skb, ovs_header); return err; } static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) { const struct sw_flow_actions *sf_acts; sf_acts = ovsl_dereference(flow->sf_acts); return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL); } static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp, u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; int retval; skb = ovs_flow_cmd_alloc_info(flow); if (!skb) return ERR_PTR(-ENOMEM); retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd); BUG_ON(retval < 0); return skb; } static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key, masked_key; struct sw_flow *flow = NULL; struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; struct flow_table *table; struct sw_flow_actions *acts = NULL; struct sw_flow_match match; int error; /* Extract key. */ error = -EINVAL; if (!a[OVS_FLOW_ATTR_KEY]) goto error; ovs_match_init(&match, &key, &mask); error = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]); if (error) goto error; /* Validate actions. */ if (a[OVS_FLOW_ATTR_ACTIONS]) { acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); error = PTR_ERR(acts); if (IS_ERR(acts)) goto error; ovs_flow_key_mask(&masked_key, &key, &mask); error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &masked_key, 0, &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); goto err_kfree; } } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { error = -EINVAL; goto error; } ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); error = -ENODEV; if (!dp) goto err_unlock_ovs; table = ovsl_dereference(dp->table); /* Check if this is a duplicate flow */ flow = ovs_flow_lookup(table, &key); if (!flow) { struct sw_flow_mask *mask_p; /* Bail out if we're not allowed to create a new flow. */ error = -ENOENT; if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) goto err_unlock_ovs; /* Expand table, if necessary, to make room. */ if (ovs_flow_tbl_need_to_expand(table)) { struct flow_table *new_table; new_table = ovs_flow_tbl_expand(table); if (!IS_ERR(new_table)) { rcu_assign_pointer(dp->table, new_table); ovs_flow_tbl_destroy(table, true); table = ovsl_dereference(dp->table); } } /* Allocate flow. */ flow = ovs_flow_alloc(); if (IS_ERR(flow)) { error = PTR_ERR(flow); goto err_unlock_ovs; } clear_stats(flow); flow->key = masked_key; flow->unmasked_key = key; /* Make sure mask is unique in the system */ mask_p = ovs_sw_flow_mask_find(table, &mask); if (!mask_p) { /* Allocate a new mask if none exsits. */ mask_p = ovs_sw_flow_mask_alloc(); if (!mask_p) goto err_flow_free; mask_p->key = mask.key; mask_p->range = mask.range; ovs_sw_flow_mask_insert(table, mask_p); } ovs_sw_flow_mask_add_ref(mask_p); flow->mask = mask_p; rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ ovs_flow_insert(table, flow); reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); } else { /* We found a matching flow. */ struct sw_flow_actions *old_acts; /* Bail out if we're not allowed to modify an existing flow. * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL * because Generic Netlink treats the latter as a dump * request. We also accept NLM_F_EXCL in case that bug ever * gets fixed. */ error = -EEXIST; if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) goto err_unlock_ovs; /* The unmasked key has to be the same for flow updates. */ error = -EINVAL; if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) { OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n"); goto err_unlock_ovs; } /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); ovs_flow_deferred_free_acts(old_acts); reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); /* Clear stats. */ if (a[OVS_FLOW_ATTR_CLEAR]) { spin_lock_bh(&flow->lock); clear_stats(flow); spin_unlock_bh(&flow->lock); } } ovs_unlock(); if (!IS_ERR(reply)) ovs_notify(reply, info, &ovs_dp_flow_multicast_group); else netlink_set_err(sock_net(skb->sk)->genl_sock, 0, ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); return 0; err_flow_free: ovs_flow_free(flow, false); err_unlock_ovs: ovs_unlock(); err_kfree: kfree(acts); error: return error; } static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key; struct sk_buff *reply; struct sw_flow *flow; struct datapath *dp; struct flow_table *table; struct sw_flow_match match; int err; if (!a[OVS_FLOW_ATTR_KEY]) { OVS_NLERR("Flow get message rejected, Key attribute missing.\n"); return -EINVAL; } ovs_match_init(&match, &key, NULL); err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (err) return err; ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) { err = -ENODEV; goto unlock; } table = ovsl_dereference(dp->table); flow = ovs_flow_lookup_unmasked_key(table, &match); if (!flow) { err = -ENOENT; goto unlock; } reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; } ovs_unlock(); return genlmsg_reply(reply, info); unlock: ovs_unlock(); return err; } static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; struct sw_flow_key key; struct sk_buff *reply; struct sw_flow *flow; struct datapath *dp; struct flow_table *table; struct sw_flow_match match; int err; ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) { err = -ENODEV; goto unlock; } if (!a[OVS_FLOW_ATTR_KEY]) { err = flush_flows(dp); goto unlock; } ovs_match_init(&match, &key, NULL); err = ovs_match_from_nlattrs(&match, a[OVS_FLOW_ATTR_KEY], NULL); if (err) goto unlock; table = ovsl_dereference(dp->table); flow = ovs_flow_lookup_unmasked_key(table, &match); if (!flow) { err = -ENOENT; goto unlock; } reply = ovs_flow_cmd_alloc_info(flow); if (!reply) { err = -ENOMEM; goto unlock; } ovs_flow_remove(table, flow); err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_DEL); BUG_ON(err < 0); ovs_flow_free(flow, true); ovs_unlock(); ovs_notify(reply, info, &ovs_dp_flow_multicast_group); return 0; unlock: ovs_unlock(); return err; } static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); struct datapath *dp; struct flow_table *table; rcu_read_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) { rcu_read_unlock(); return -ENODEV; } table = rcu_dereference(dp->table); for (;;) { struct sw_flow *flow; u32 bucket, obj; bucket = cb->args[0]; obj = cb->args[1]; flow = ovs_flow_dump_next(table, &bucket, &obj); if (!flow) break; if (ovs_flow_cmd_fill_info(flow, dp, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_FLOW_CMD_NEW) < 0) break; cb->args[0] = bucket; cb->args[1] = obj; } rcu_read_unlock(); return skb->len; } static struct genl_ops dp_flow_genl_ops[] = { { .cmd = OVS_FLOW_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, .doit = ovs_flow_cmd_new_or_set }, { .cmd = OVS_FLOW_CMD_DEL, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, .doit = ovs_flow_cmd_del }, { .cmd = OVS_FLOW_CMD_GET, .flags = 0, /* OK for unprivileged users. */ .policy = flow_policy, .doit = ovs_flow_cmd_get, .dumpit = ovs_flow_cmd_dump }, { .cmd = OVS_FLOW_CMD_SET, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = flow_policy, .doit = ovs_flow_cmd_new_or_set, }, }; static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = { [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, }; static struct genl_family dp_datapath_genl_family = { .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), .name = OVS_DATAPATH_FAMILY, .version = OVS_DATAPATH_VERSION, .maxattr = OVS_DP_ATTR_MAX, .netnsok = true, SET_PARALLEL_OPS }; static struct genl_multicast_group ovs_dp_datapath_multicast_group = { .name = OVS_DATAPATH_MCGROUP }; static size_t ovs_dp_cmd_msg_size(void) { size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header)); msgsize += nla_total_size(IFNAMSIZ); msgsize += nla_total_size(sizeof(struct ovs_dp_stats)); return msgsize; } static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_dp_stats dp_stats; int err; ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, flags, cmd); if (!ovs_header) goto error; ovs_header->dp_ifindex = get_dpifindex(dp); rcu_read_lock(); err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp)); rcu_read_unlock(); if (err) goto nla_put_failure; get_dp_stats(dp, &dp_stats); if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats)) goto nla_put_failure; return genlmsg_end(skb, ovs_header); nla_put_failure: genlmsg_cancel(skb, ovs_header); error: return -EMSGSIZE; } static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; int retval; skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd); if (retval < 0) { kfree_skb(skb); return ERR_PTR(retval); } return skb; } /* Called with ovs_mutex. */ static struct datapath *lookup_datapath(struct net *net, struct ovs_header *ovs_header, struct nlattr *a[OVS_DP_ATTR_MAX + 1]) { struct datapath *dp; if (!a[OVS_DP_ATTR_NAME]) dp = get_dp(net, ovs_header->dp_ifindex); else { struct vport *vport; rcu_read_lock(); vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME])); dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL; rcu_read_unlock(); } return dp ? dp : ERR_PTR(-ENODEV); } static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct vport_parms parms; struct sk_buff *reply; struct datapath *dp; struct vport *vport; struct ovs_net *ovs_net; int err, i; err = -EINVAL; if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) goto err; ovs_lock(); err = -ENOMEM; dp = kzalloc(sizeof(*dp), GFP_KERNEL); if (dp == NULL) goto err_unlock_ovs; ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); /* Allocate table. */ err = -ENOMEM; rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS)); if (!dp->table) goto err_free_dp; dp->stats_percpu = alloc_percpu(struct dp_stats_percpu); if (!dp->stats_percpu) { err = -ENOMEM; goto err_destroy_table; } dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head), GFP_KERNEL); if (!dp->ports) { err = -ENOMEM; goto err_destroy_percpu; } for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) INIT_HLIST_HEAD(&dp->ports[i]); /* Set up our datapath device. */ parms.name = nla_data(a[OVS_DP_ATTR_NAME]); parms.type = OVS_VPORT_TYPE_INTERNAL; parms.options = NULL; parms.dp = dp; parms.port_no = OVSP_LOCAL; parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); vport = new_vport(&parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); if (err == -EBUSY) err = -EEXIST; goto err_destroy_ports_array; } reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) goto err_destroy_local_port; ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); list_add_tail_rcu(&dp->list_node, &ovs_net->dps); ovs_unlock(); ovs_notify(reply, info, &ovs_dp_datapath_multicast_group); return 0; err_destroy_local_port: ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); err_destroy_ports_array: kfree(dp->ports); err_destroy_percpu: free_percpu(dp->stats_percpu); err_destroy_table: ovs_flow_tbl_destroy(ovsl_dereference(dp->table), false); err_free_dp: release_net(ovs_dp_get_net(dp)); kfree(dp); err_unlock_ovs: ovs_unlock(); err: return err; } /* Called with ovs_mutex. */ static void __dp_destroy(struct datapath *dp) { int i; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; struct hlist_node *n; hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) if (vport->port_no != OVSP_LOCAL) ovs_dp_detach_port(vport); } list_del_rcu(&dp->list_node); /* OVSP_LOCAL is datapath internal port. We need to make sure that * all port in datapath are destroyed first before freeing datapath. */ ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); call_rcu(&dp->rcu, destroy_dp_rcu); } static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *reply; struct datapath *dp; int err; ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); err = PTR_ERR(dp); if (IS_ERR(dp)) goto unlock; reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) goto unlock; __dp_destroy(dp); ovs_unlock(); ovs_notify(reply, info, &ovs_dp_datapath_multicast_group); return 0; unlock: ovs_unlock(); return err; } static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *reply; struct datapath *dp; int err; ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); err = PTR_ERR(dp); if (IS_ERR(dp)) goto unlock; reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); netlink_set_err(sock_net(skb->sk)->genl_sock, 0, ovs_dp_datapath_multicast_group.id, err); err = 0; goto unlock; } ovs_unlock(); ovs_notify(reply, info, &ovs_dp_datapath_multicast_group); return 0; unlock: ovs_unlock(); return err; } static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *reply; struct datapath *dp; int err; ovs_lock(); dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); if (IS_ERR(dp)) { err = PTR_ERR(dp); goto unlock; } reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); goto unlock; } ovs_unlock(); return genlmsg_reply(reply, info); unlock: ovs_unlock(); return err; } static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id); struct datapath *dp; int skip = cb->args[0]; int i = 0; rcu_read_lock(); list_for_each_entry_rcu(dp, &ovs_net->dps, list_node) { if (i >= skip && ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_DP_CMD_NEW) < 0) break; i++; } rcu_read_unlock(); cb->args[0] = i; return skb->len; } static struct genl_ops dp_datapath_genl_ops[] = { { .cmd = OVS_DP_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = datapath_policy, .doit = ovs_dp_cmd_new }, { .cmd = OVS_DP_CMD_DEL, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = datapath_policy, .doit = ovs_dp_cmd_del }, { .cmd = OVS_DP_CMD_GET, .flags = 0, /* OK for unprivileged users. */ .policy = datapath_policy, .doit = ovs_dp_cmd_get, .dumpit = ovs_dp_cmd_dump }, { .cmd = OVS_DP_CMD_SET, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = datapath_policy, .doit = ovs_dp_cmd_set, }, }; static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) }, [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, }; static struct genl_family dp_vport_genl_family = { .id = GENL_ID_GENERATE, .hdrsize = sizeof(struct ovs_header), .name = OVS_VPORT_FAMILY, .version = OVS_VPORT_VERSION, .maxattr = OVS_VPORT_ATTR_MAX, .netnsok = true, SET_PARALLEL_OPS }; struct genl_multicast_group ovs_dp_vport_multicast_group = { .name = OVS_VPORT_MCGROUP }; /* Called with ovs_mutex or RCU read lock. */ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; int err; ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, flags, cmd); if (!ovs_header) return -EMSGSIZE; ovs_header->dp_ifindex = get_dpifindex(vport->dp); if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) || nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid)) goto nla_put_failure; ovs_vport_get_stats(vport, &vport_stats); if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats), &vport_stats)) goto nla_put_failure; err = ovs_vport_get_options(vport, skb); if (err == -EMSGSIZE) goto error; return genlmsg_end(skb, ovs_header); nla_put_failure: err = -EMSGSIZE; error: genlmsg_cancel(skb, ovs_header); return err; } /* Called with ovs_mutex or RCU read lock. */ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; int retval; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return ERR_PTR(-ENOMEM); retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); BUG_ON(retval < 0); return skb; } /* Called with ovs_mutex or RCU read lock. */ static struct vport *lookup_vport(struct net *net, struct ovs_header *ovs_header, struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) { struct datapath *dp; struct vport *vport; if (a[OVS_VPORT_ATTR_NAME]) { vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); if (!vport) return ERR_PTR(-ENODEV); if (ovs_header->dp_ifindex && ovs_header->dp_ifindex != get_dpifindex(vport->dp)) return ERR_PTR(-ENODEV); return vport; } else if (a[OVS_VPORT_ATTR_PORT_NO]) { u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); if (port_no >= DP_MAX_PORTS) return ERR_PTR(-EFBIG); dp = get_dp(net, ovs_header->dp_ifindex); if (!dp) return ERR_PTR(-ENODEV); vport = ovs_vport_ovsl_rcu(dp, port_no); if (!vport) return ERR_PTR(-ENODEV); return vport; } else return ERR_PTR(-EINVAL); } static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; struct vport_parms parms; struct sk_buff *reply; struct vport *vport; struct datapath *dp; u32 port_no; int err; err = -EINVAL; if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || !a[OVS_VPORT_ATTR_UPCALL_PID]) goto exit; ovs_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); err = -ENODEV; if (!dp) goto exit_unlock; if (a[OVS_VPORT_ATTR_PORT_NO]) { port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]); err = -EFBIG; if (port_no >= DP_MAX_PORTS) goto exit_unlock; vport = ovs_vport_ovsl(dp, port_no); err = -EBUSY; if (vport) goto exit_unlock; } else { for (port_no = 1; ; port_no++) { if (port_no >= DP_MAX_PORTS) { err = -EFBIG; goto exit_unlock; } vport = ovs_vport_ovsl(dp, port_no); if (!vport) break; } } parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]); parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]); parms.options = a[OVS_VPORT_ATTR_OPTIONS]; parms.dp = dp; parms.port_no = port_no; parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); vport = new_vport(&parms); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock; err = 0; if (a[OVS_VPORT_ATTR_STATS]) ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS])); reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); ovs_dp_detach_port(vport); goto exit_unlock; } ovs_notify(reply, info, &ovs_dp_vport_multicast_group); exit_unlock: ovs_unlock(); exit: return err; } static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct sk_buff *reply; struct vport *vport; int err; ovs_lock(); vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock; if (a[OVS_VPORT_ATTR_TYPE] && nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { err = -EINVAL; goto exit_unlock; } reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!reply) { err = -ENOMEM; goto exit_unlock; } if (a[OVS_VPORT_ATTR_OPTIONS]) { err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); if (err) goto exit_free; } if (a[OVS_VPORT_ATTR_STATS]) ovs_vport_set_stats(vport, nla_data(a[OVS_VPORT_ATTR_STATS])); if (a[OVS_VPORT_ATTR_UPCALL_PID]) vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, info->snd_seq, 0, OVS_VPORT_CMD_NEW); BUG_ON(err < 0); ovs_unlock(); ovs_notify(reply, info, &ovs_dp_vport_multicast_group); return 0; exit_free: kfree_skb(reply); exit_unlock: ovs_unlock(); return err; } static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct sk_buff *reply; struct vport *vport; int err; ovs_lock(); vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock; if (vport->port_no == OVSP_LOCAL) { err = -EINVAL; goto exit_unlock; } reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) goto exit_unlock; err = 0; ovs_dp_detach_port(vport); ovs_notify(reply, info, &ovs_dp_vport_multicast_group); exit_unlock: ovs_unlock(); return err; } static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; struct ovs_header *ovs_header = info->userhdr; struct sk_buff *reply; struct vport *vport; int err; rcu_read_lock(); vport = lookup_vport(sock_net(skb->sk), ovs_header, a); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock; reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) goto exit_unlock; rcu_read_unlock(); return genlmsg_reply(reply, info); exit_unlock: rcu_read_unlock(); return err; } static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh)); struct datapath *dp; int bucket = cb->args[0], skip = cb->args[1]; int i, j = 0; dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) return -ENODEV; rcu_read_lock(); for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; j = 0; hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { if (j >= skip && ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_VPORT_CMD_NEW) < 0) goto out; j++; } skip = 0; } out: rcu_read_unlock(); cb->args[0] = i; cb->args[1] = j; return skb->len; } static struct genl_ops dp_vport_genl_ops[] = { { .cmd = OVS_VPORT_CMD_NEW, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = vport_policy, .doit = ovs_vport_cmd_new }, { .cmd = OVS_VPORT_CMD_DEL, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = vport_policy, .doit = ovs_vport_cmd_del }, { .cmd = OVS_VPORT_CMD_GET, .flags = 0, /* OK for unprivileged users. */ .policy = vport_policy, .doit = ovs_vport_cmd_get, .dumpit = ovs_vport_cmd_dump }, { .cmd = OVS_VPORT_CMD_SET, .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ .policy = vport_policy, .doit = ovs_vport_cmd_set, }, }; struct genl_family_and_ops { struct genl_family *family; struct genl_ops *ops; int n_ops; struct genl_multicast_group *group; }; static const struct genl_family_and_ops dp_genl_families[] = { { &dp_datapath_genl_family, dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops), &ovs_dp_datapath_multicast_group }, { &dp_vport_genl_family, dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops), &ovs_dp_vport_multicast_group }, { &dp_flow_genl_family, dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops), &ovs_dp_flow_multicast_group }, { &dp_packet_genl_family, dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops), NULL }, }; static void dp_unregister_genl(int n_families) { int i; for (i = 0; i < n_families; i++) genl_unregister_family(dp_genl_families[i].family); } static int dp_register_genl(void) { int n_registered; int err; int i; n_registered = 0; for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) { const struct genl_family_and_ops *f = &dp_genl_families[i]; err = genl_register_family_with_ops(f->family, f->ops, f->n_ops); if (err) goto error; n_registered++; if (f->group) { err = genl_register_mc_group(f->family, f->group); if (err) goto error; } } return 0; error: dp_unregister_genl(n_registered); return err; } static void rehash_flow_table(struct work_struct *work) { struct datapath *dp; struct net *net; ovs_lock(); rtnl_lock(); for_each_net(net) { struct ovs_net *ovs_net = net_generic(net, ovs_net_id); list_for_each_entry(dp, &ovs_net->dps, list_node) { struct flow_table *old_table = ovsl_dereference(dp->table); struct flow_table *new_table; new_table = ovs_flow_tbl_rehash(old_table); if (!IS_ERR(new_table)) { rcu_assign_pointer(dp->table, new_table); ovs_flow_tbl_destroy(old_table, true); } } } rtnl_unlock(); ovs_unlock(); schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); } static int __net_init ovs_init_net(struct net *net) { struct ovs_net *ovs_net = net_generic(net, ovs_net_id); INIT_LIST_HEAD(&ovs_net->dps); INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq); return 0; } static void __net_exit ovs_exit_net(struct net *net) { struct datapath *dp, *dp_next; struct ovs_net *ovs_net = net_generic(net, ovs_net_id); ovs_lock(); list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) __dp_destroy(dp); ovs_unlock(); cancel_work_sync(&ovs_net->dp_notify_work); } static struct pernet_operations ovs_net_ops = { .init = ovs_init_net, .exit = ovs_exit_net, .id = &ovs_net_id, .size = sizeof(struct ovs_net), }; DEFINE_COMPAT_PNET_REG_FUNC(device); static int __init dp_init(void) { int err; BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); pr_info("Open vSwitch switching datapath %s, built "__DATE__" "__TIME__"\n", VERSION); err = ovs_workqueues_init(); if (err) goto error; err = ovs_flow_init(); if (err) goto error_wq; err = ovs_vport_init(); if (err) goto error_flow_exit; err = register_pernet_device(&ovs_net_ops); if (err) goto error_vport_exit; err = register_netdevice_notifier(&ovs_dp_device_notifier); if (err) goto error_netns_exit; err = dp_register_genl(); if (err < 0) goto error_unreg_notifier; schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); return 0; error_unreg_notifier: unregister_netdevice_notifier(&ovs_dp_device_notifier); error_netns_exit: unregister_pernet_device(&ovs_net_ops); error_vport_exit: ovs_vport_exit(); error_flow_exit: ovs_flow_exit(); error_wq: ovs_workqueues_exit(); error: return err; } static void dp_cleanup(void) { cancel_delayed_work_sync(&rehash_flow_wq); dp_unregister_genl(ARRAY_SIZE(dp_genl_families)); unregister_netdevice_notifier(&ovs_dp_device_notifier); unregister_pernet_device(&ovs_net_ops); rcu_barrier(); ovs_vport_exit(); ovs_flow_exit(); ovs_workqueues_exit(); } module_init(dp_init); module_exit(dp_cleanup); MODULE_DESCRIPTION("Open vSwitch switching datapath"); MODULE_LICENSE("GPL"); MODULE_VERSION(VERSION); openvswitch-2.0.1+git20140120/datapath/datapath.h000066400000000000000000000131471226605124000212050ustar00rootroot00000000000000/* * Copyright (c) 2007-2012 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #ifndef DATAPATH_H #define DATAPATH_H 1 #include #include #include #include #include #include #include "compat.h" #include "flow.h" #include "vlan.h" #include "vport.h" #define DP_MAX_PORTS USHRT_MAX #define DP_VPORT_HASH_BUCKETS 1024 #define SAMPLE_ACTION_DEPTH 3 /** * struct dp_stats_percpu - per-cpu packet processing statistics for a given * datapath. * @n_hit: Number of received packets for which a matching flow was found in * the flow table. * @n_miss: Number of received packets that had no matching flow in the flow * table. The sum of @n_hit and @n_miss is the number of packets that have * been received by the datapath. * @n_lost: Number of received packets that had no matching flow in the flow * table that could not be sent to userspace (normally due to an overflow in * one of the datapath's queues). */ struct dp_stats_percpu { u64 n_hit; u64 n_missed; u64 n_lost; struct u64_stats_sync sync; }; /** * struct datapath - datapath for flow-based packet switching * @rcu: RCU callback head for deferred destruction. * @list_node: Element in global 'dps' list. * @table: Current flow table. Protected by ovs_mutex and RCU. * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by * ovs_mutex and RCU. * @stats_percpu: Per-CPU datapath statistics. * @net: Reference to net namespace. * * Context: See the comment on locking at the top of datapath.c for additional * locking information. */ struct datapath { struct rcu_head rcu; struct list_head list_node; /* Flow table. */ struct flow_table __rcu *table; /* Switch ports. */ struct hlist_head *ports; /* Stats. */ struct dp_stats_percpu __percpu *stats_percpu; #ifdef CONFIG_NET_NS /* Network namespace ref. */ struct net *net; #endif }; /** * struct ovs_skb_cb - OVS data in skb CB * @flow: The flow associated with this packet. May be %NULL if no flow. * @pkt_key: The flow information extracted from the packet. Must be nonnull. * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the * packet is not being tunneled. */ struct ovs_skb_cb { struct sw_flow *flow; struct sw_flow_key *pkt_key; struct ovs_key_ipv4_tunnel *tun_key; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) /** * struct dp_upcall - metadata to include with a packet to send to userspace * @cmd: One of %OVS_PACKET_CMD_*. * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull. * @userdata: If nonnull, its variable-length value is passed to userspace as * %OVS_PACKET_ATTR_USERDATA. * @portid: Netlink PID to which packet should be sent. If @portid is 0 then no * packet is sent and the packet is accounted in the datapath's @n_lost * counter. */ struct dp_upcall_info { u8 cmd; const struct sw_flow_key *key; const struct nlattr *userdata; u32 portid; }; /** * struct ovs_net - Per net-namespace data for ovs. * @dps: List of datapaths to enable dumping them all out. * Protected by genl_mutex. * @vport_net: Per network namespace data for vport. */ struct ovs_net { struct list_head dps; struct vport_net vport_net; struct work_struct dp_notify_work; }; extern int ovs_net_id; void ovs_lock(void); void ovs_unlock(void); #ifdef CONFIG_LOCKDEP int lockdep_ovsl_is_held(void); #else #define lockdep_ovsl_is_held() 1 #endif #define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held())) #define ovsl_dereference(p) \ rcu_dereference_protected(p, lockdep_ovsl_is_held()) static inline struct net *ovs_dp_get_net(struct datapath *dp) { return read_pnet(&dp->net); } static inline void ovs_dp_set_net(struct datapath *dp, struct net *net) { write_pnet(&dp->net, net); } struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no); static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no) { WARN_ON_ONCE(!rcu_read_lock_held()); return ovs_lookup_vport(dp, port_no); } static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no) { WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held()); return ovs_lookup_vport(dp, port_no); } static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no) { ASSERT_OVSL(); return ovs_lookup_vport(dp, port_no); } extern struct notifier_block ovs_dp_device_notifier; extern struct genl_multicast_group ovs_dp_vport_multicast_group; void ovs_dp_process_received_packet(struct vport *, struct sk_buff *); void ovs_dp_detach_port(struct vport *); int ovs_dp_upcall(struct datapath *, struct sk_buff *, const struct dp_upcall_info *); const char *ovs_dp_name(const struct datapath *dp); struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 portid, u32 seq, u8 cmd); int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); void ovs_dp_notify_wq(struct work_struct *work); #define OVS_NLERR(fmt, ...) \ pr_info_once("netlink: " fmt, ##__VA_ARGS__) #endif /* datapath.h */ openvswitch-2.0.1+git20140120/datapath/dp_notify.c000066400000000000000000000052701226605124000214030ustar00rootroot00000000000000/* * Copyright (c) 2007-2012 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #include #include #include #include #include "datapath.h" #include "vport-internal_dev.h" #include "vport-netdev.h" static void dp_detach_port_notify(struct vport *vport) { struct sk_buff *notify; struct datapath *dp; dp = vport->dp; notify = ovs_vport_cmd_build_info(vport, 0, 0, OVS_VPORT_CMD_DEL); ovs_dp_detach_port(vport); if (IS_ERR(notify)) { netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0, ovs_dp_vport_multicast_group.id, PTR_ERR(notify)); return; } genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0, ovs_dp_vport_multicast_group.id, GFP_KERNEL); } void ovs_dp_notify_wq(struct work_struct *work) { struct ovs_net *ovs_net = container_of(work, struct ovs_net, dp_notify_work); struct datapath *dp; ovs_lock(); list_for_each_entry(dp, &ovs_net->dps, list_node) { int i; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; struct hlist_node *n; hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) { struct netdev_vport *netdev_vport; if (vport->ops->type != OVS_VPORT_TYPE_NETDEV) continue; netdev_vport = netdev_vport_priv(vport); if (!(ovs_netdev_get_vport(netdev_vport->dev))) dp_detach_port_notify(vport); } } } ovs_unlock(); } static int dp_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct ovs_net *ovs_net; struct net_device *dev = ptr; struct vport *vport = NULL; if (!ovs_is_internal_dev(dev)) vport = ovs_netdev_get_vport(dev); if (!vport) return NOTIFY_DONE; if (event == NETDEV_UNREGISTER) { /* upper_dev_unlink and decrement promisc immediately */ ovs_netdev_detach_dev(vport); /* schedule vport destroy, dev_put and genl notification */ ovs_net = net_generic(dev_net(dev), ovs_net_id); queue_work(&ovs_net->dp_notify_work); } return NOTIFY_DONE; } struct notifier_block ovs_dp_device_notifier = { .notifier_call = dp_device_event }; openvswitch-2.0.1+git20140120/datapath/flow.c000066400000000000000000001571171226605124000203670ustar00rootroot00000000000000/* * Copyright (c) 2007-2013 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #include "flow.h" #include "datapath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vlan.h" static struct kmem_cache *flow_cache; static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask, struct sw_flow_key_range *range, u8 val); static void update_range__(struct sw_flow_match *match, size_t offset, size_t size, bool is_mask) { struct sw_flow_key_range *range = NULL; size_t start = rounddown(offset, sizeof(long)); size_t end = roundup(offset + size, sizeof(long)); if (!is_mask) range = &match->range; else if (match->mask) range = &match->mask->range; if (!range) return; if (range->start == range->end) { range->start = start; range->end = end; return; } if (range->start > start) range->start = start; if (range->end < end) range->end = end; } #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ do { \ update_range__(match, offsetof(struct sw_flow_key, field), \ sizeof((match)->key->field), is_mask); \ if (is_mask) { \ if ((match)->mask) \ (match)->mask->key.field = value; \ } else { \ (match)->key->field = value; \ } \ } while (0) #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ do { \ update_range__(match, offsetof(struct sw_flow_key, field), \ len, is_mask); \ if (is_mask) { \ if ((match)->mask) \ memcpy(&(match)->mask->key.field, value_p, len);\ } else { \ memcpy(&(match)->key->field, value_p, len); \ } \ } while (0) static u16 range_n_bytes(const struct sw_flow_key_range *range) { return range->end - range->start; } void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, struct sw_flow_mask *mask) { memset(match, 0, sizeof(*match)); match->key = key; match->mask = mask; memset(key, 0, sizeof(*key)); if (mask) { memset(&mask->key, 0, sizeof(mask->key)); mask->range.start = mask->range.end = 0; } } static bool ovs_match_validate(const struct sw_flow_match *match, u64 key_attrs, u64 mask_attrs) { u64 key_expected = 1ULL << OVS_KEY_ATTR_ETHERNET; u64 mask_allowed = key_attrs; /* At most allow all key attributes */ /* The following mask attributes allowed only if they * pass the validation tests. */ mask_allowed &= ~((1ULL << OVS_KEY_ATTR_IPV4) | (1ULL << OVS_KEY_ATTR_IPV6) | (1ULL << OVS_KEY_ATTR_TCP) | (1ULL << OVS_KEY_ATTR_UDP) | (1ULL << OVS_KEY_ATTR_SCTP) | (1ULL << OVS_KEY_ATTR_ICMP) | (1ULL << OVS_KEY_ATTR_ICMPV6) | (1ULL << OVS_KEY_ATTR_ARP) | (1ULL << OVS_KEY_ATTR_ND)); /* Always allowed mask fields. */ mask_allowed |= ((1ULL << OVS_KEY_ATTR_TUNNEL) | (1ULL << OVS_KEY_ATTR_IN_PORT) | (1ULL << OVS_KEY_ATTR_ETHERTYPE)); /* Check key attributes. */ if (match->key->eth.type == htons(ETH_P_ARP) || match->key->eth.type == htons(ETH_P_RARP)) { key_expected |= 1ULL << OVS_KEY_ATTR_ARP; if (match->mask && (match->mask->key.eth.type == htons(0xffff))) mask_allowed |= 1ULL << OVS_KEY_ATTR_ARP; } if (match->key->eth.type == htons(ETH_P_IP)) { key_expected |= 1ULL << OVS_KEY_ATTR_IPV4; if (match->mask && (match->mask->key.eth.type == htons(0xffff))) mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV4; if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { if (match->key->ip.proto == IPPROTO_UDP) { key_expected |= 1ULL << OVS_KEY_ATTR_UDP; if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP; } if (match->key->ip.proto == IPPROTO_SCTP) { key_expected |= 1ULL << OVS_KEY_ATTR_SCTP; if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP; } if (match->key->ip.proto == IPPROTO_TCP) { key_expected |= 1ULL << OVS_KEY_ATTR_TCP; if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP; } if (match->key->ip.proto == IPPROTO_ICMP) { key_expected |= 1ULL << OVS_KEY_ATTR_ICMP; if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMP; } } } if (match->key->eth.type == htons(ETH_P_IPV6)) { key_expected |= 1ULL << OVS_KEY_ATTR_IPV6; if (match->mask && (match->mask->key.eth.type == htons(0xffff))) mask_allowed |= 1ULL << OVS_KEY_ATTR_IPV6; if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { if (match->key->ip.proto == IPPROTO_UDP) { key_expected |= 1ULL << OVS_KEY_ATTR_UDP; if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1ULL << OVS_KEY_ATTR_UDP; } if (match->key->ip.proto == IPPROTO_SCTP) { key_expected |= 1ULL << OVS_KEY_ATTR_SCTP; if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1ULL << OVS_KEY_ATTR_SCTP; } if (match->key->ip.proto == IPPROTO_TCP) { key_expected |= 1ULL << OVS_KEY_ATTR_TCP; if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1ULL << OVS_KEY_ATTR_TCP; } if (match->key->ip.proto == IPPROTO_ICMPV6) { key_expected |= 1ULL << OVS_KEY_ATTR_ICMPV6; if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1ULL << OVS_KEY_ATTR_ICMPV6; if (match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || match->key->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { key_expected |= 1ULL << OVS_KEY_ATTR_ND; if (match->mask && (match->mask->key.ipv6.tp.src == htons(0xffff))) mask_allowed |= 1ULL << OVS_KEY_ATTR_ND; } } } } if ((key_attrs & key_expected) != key_expected) { /* Key attributes check failed. */ OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n", key_attrs, key_expected); return false; } if ((mask_attrs & mask_allowed) != mask_attrs) { /* Mask attributes check failed. */ OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n", mask_attrs, mask_allowed); return false; } return true; } static int check_header(struct sk_buff *skb, int len) { if (unlikely(skb->len < len)) return -EINVAL; if (unlikely(!pskb_may_pull(skb, len))) return -ENOMEM; return 0; } static bool arphdr_ok(struct sk_buff *skb) { return pskb_may_pull(skb, skb_network_offset(skb) + sizeof(struct arp_eth_header)); } static int check_iphdr(struct sk_buff *skb) { unsigned int nh_ofs = skb_network_offset(skb); unsigned int ip_len; int err; err = check_header(skb, nh_ofs + sizeof(struct iphdr)); if (unlikely(err)) return err; ip_len = ip_hdrlen(skb); if (unlikely(ip_len < sizeof(struct iphdr) || skb->len < nh_ofs + ip_len)) return -EINVAL; skb_set_transport_header(skb, nh_ofs + ip_len); return 0; } static bool tcphdr_ok(struct sk_buff *skb) { int th_ofs = skb_transport_offset(skb); int tcp_len; if (unlikely(!pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr)))) return false; tcp_len = tcp_hdrlen(skb); if (unlikely(tcp_len < sizeof(struct tcphdr) || skb->len < th_ofs + tcp_len)) return false; return true; } static bool udphdr_ok(struct sk_buff *skb) { return pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)); } static bool sctphdr_ok(struct sk_buff *skb) { return pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct sctphdr)); } static bool icmphdr_ok(struct sk_buff *skb) { return pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct icmphdr)); } u64 ovs_flow_used_time(unsigned long flow_jiffies) { struct timespec cur_ts; u64 cur_ms, idle_ms; ktime_get_ts(&cur_ts); idle_ms = jiffies_to_msecs(jiffies - flow_jiffies); cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC + cur_ts.tv_nsec / NSEC_PER_MSEC; return cur_ms - idle_ms; } static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) { unsigned int nh_ofs = skb_network_offset(skb); unsigned int nh_len; int payload_ofs; struct ipv6hdr *nh; uint8_t nexthdr; __be16 frag_off; int err; err = check_header(skb, nh_ofs + sizeof(*nh)); if (unlikely(err)) return err; nh = ipv6_hdr(skb); nexthdr = nh->nexthdr; payload_ofs = (u8 *)(nh + 1) - skb->data; key->ip.proto = NEXTHDR_NONE; key->ip.tos = ipv6_get_dsfield(nh); key->ip.ttl = nh->hop_limit; key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); key->ipv6.addr.src = nh->saddr; key->ipv6.addr.dst = nh->daddr; payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off); if (unlikely(payload_ofs < 0)) return -EINVAL; if (frag_off) { if (frag_off & htons(~0x7)) key->ip.frag = OVS_FRAG_TYPE_LATER; else key->ip.frag = OVS_FRAG_TYPE_FIRST; } nh_len = payload_ofs - nh_ofs; skb_set_transport_header(skb, nh_ofs + nh_len); key->ip.proto = nexthdr; return nh_len; } static bool icmp6hdr_ok(struct sk_buff *skb) { return pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct icmp6hdr)); } void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src, const struct sw_flow_mask *mask) { const long *m = (long *)((u8 *)&mask->key + mask->range.start); const long *s = (long *)((u8 *)src + mask->range.start); long *d = (long *)((u8 *)dst + mask->range.start); int i; /* The memory outside of the 'mask->range' are not set since * further operations on 'dst' only uses contents within * 'mask->range'. */ for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long)) *d++ = *s++ & *m++; } #define TCP_FLAGS_OFFSET 13 #define TCP_FLAG_MASK 0x3f void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) { u8 tcp_flags = 0; if ((flow->key.eth.type == htons(ETH_P_IP) || flow->key.eth.type == htons(ETH_P_IPV6)) && flow->key.ip.proto == IPPROTO_TCP && likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { u8 *tcp = (u8 *)tcp_hdr(skb); tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK; } spin_lock(&flow->lock); flow->used = jiffies; flow->packet_count++; flow->byte_count += skb->len; flow->tcp_flags |= tcp_flags; spin_unlock(&flow->lock); } struct sw_flow_actions *ovs_flow_actions_alloc(int size) { struct sw_flow_actions *sfa; if (size > MAX_ACTIONS_BUFSIZE) return ERR_PTR(-EINVAL); sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); if (!sfa) return ERR_PTR(-ENOMEM); sfa->actions_len = 0; return sfa; } struct sw_flow *ovs_flow_alloc(void) { struct sw_flow *flow; flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); if (!flow) return ERR_PTR(-ENOMEM); spin_lock_init(&flow->lock); flow->sf_acts = NULL; flow->mask = NULL; return flow; } static struct hlist_head *find_bucket(struct flow_table *table, u32 hash) { hash = jhash_1word(hash, table->hash_seed); return flex_array_get(table->buckets, (hash & (table->n_buckets - 1))); } static struct flex_array *alloc_buckets(unsigned int n_buckets) { struct flex_array *buckets; int i, err; buckets = flex_array_alloc(sizeof(struct hlist_head), n_buckets, GFP_KERNEL); if (!buckets) return NULL; err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL); if (err) { flex_array_free(buckets); return NULL; } for (i = 0; i < n_buckets; i++) INIT_HLIST_HEAD((struct hlist_head *) flex_array_get(buckets, i)); return buckets; } static void free_buckets(struct flex_array *buckets) { flex_array_free(buckets); } static struct flow_table *__flow_tbl_alloc(int new_size) { struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL); if (!table) return NULL; table->buckets = alloc_buckets(new_size); if (!table->buckets) { kfree(table); return NULL; } table->n_buckets = new_size; table->count = 0; table->node_ver = 0; table->keep_flows = false; get_random_bytes(&table->hash_seed, sizeof(u32)); table->mask_list = NULL; return table; } static void __flow_tbl_destroy(struct flow_table *table) { int i; if (table->keep_flows) goto skip_flows; for (i = 0; i < table->n_buckets; i++) { struct sw_flow *flow; struct hlist_head *head = flex_array_get(table->buckets, i); struct hlist_node *n; int ver = table->node_ver; hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) { hlist_del(&flow->hash_node[ver]); ovs_flow_free(flow, false); } } BUG_ON(!list_empty(table->mask_list)); kfree(table->mask_list); skip_flows: free_buckets(table->buckets); kfree(table); } struct flow_table *ovs_flow_tbl_alloc(int new_size) { struct flow_table *table = __flow_tbl_alloc(new_size); if (!table) return NULL; table->mask_list = kmalloc(sizeof(struct list_head), GFP_KERNEL); if (!table->mask_list) { table->keep_flows = true; __flow_tbl_destroy(table); return NULL; } INIT_LIST_HEAD(table->mask_list); return table; } static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) { struct flow_table *table = container_of(rcu, struct flow_table, rcu); __flow_tbl_destroy(table); } void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) { if (!table) return; if (deferred) call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb); else __flow_tbl_destroy(table); } struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *last) { struct sw_flow *flow; struct hlist_head *head; int ver; int i; ver = table->node_ver; while (*bucket < table->n_buckets) { i = 0; head = flex_array_get(table->buckets, *bucket); hlist_for_each_entry_rcu(flow, head, hash_node[ver]) { if (i < *last) { i++; continue; } *last = i + 1; return flow; } (*bucket)++; *last = 0; } return NULL; } static void __tbl_insert(struct flow_table *table, struct sw_flow *flow) { struct hlist_head *head; head = find_bucket(table, flow->hash); hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); table->count++; } static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) { int old_ver; int i; old_ver = old->node_ver; new->node_ver = !old_ver; /* Insert in new table. */ for (i = 0; i < old->n_buckets; i++) { struct sw_flow *flow; struct hlist_head *head; head = flex_array_get(old->buckets, i); hlist_for_each_entry(flow, head, hash_node[old_ver]) __tbl_insert(new, flow); } new->mask_list = old->mask_list; old->keep_flows = true; } static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets) { struct flow_table *new_table; new_table = __flow_tbl_alloc(n_buckets); if (!new_table) return ERR_PTR(-ENOMEM); flow_table_copy_flows(table, new_table); return new_table; } struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table) { return __flow_tbl_rehash(table, table->n_buckets); } struct flow_table *ovs_flow_tbl_expand(struct flow_table *table) { return __flow_tbl_rehash(table, table->n_buckets * 2); } static void __flow_free(struct sw_flow *flow) { kfree((struct sf_flow_acts __force *)flow->sf_acts); kmem_cache_free(flow_cache, flow); } static void rcu_free_flow_callback(struct rcu_head *rcu) { struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu); __flow_free(flow); } void ovs_flow_free(struct sw_flow *flow, bool deferred) { if (!flow) return; ovs_sw_flow_mask_del_ref(flow->mask, deferred); if (deferred) call_rcu(&flow->rcu, rcu_free_flow_callback); else __flow_free(flow); } /* RCU callback used by ovs_flow_deferred_free_acts. */ static void rcu_free_acts_callback(struct rcu_head *rcu) { struct sw_flow_actions *sf_acts = container_of(rcu, struct sw_flow_actions, rcu); kfree(sf_acts); } /* Schedules 'sf_acts' to be freed after the next RCU grace period. * The caller must hold rcu_read_lock for this to be sensible. */ void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts) { call_rcu(&sf_acts->rcu, rcu_free_acts_callback); } static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) { struct qtag_prefix { __be16 eth_type; /* ETH_P_8021Q */ __be16 tci; }; struct qtag_prefix *qp; if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16))) return 0; if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) + sizeof(__be16)))) return -ENOMEM; qp = (struct qtag_prefix *) skb->data; key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT); __skb_pull(skb, sizeof(struct qtag_prefix)); return 0; } static __be16 parse_ethertype(struct sk_buff *skb) { struct llc_snap_hdr { u8 dsap; /* Always 0xAA */ u8 ssap; /* Always 0xAA */ u8 ctrl; u8 oui[3]; __be16 ethertype; }; struct llc_snap_hdr *llc; __be16 proto; proto = *(__be16 *) skb->data; __skb_pull(skb, sizeof(__be16)); if (ntohs(proto) >= ETH_P_802_3_MIN) return proto; if (skb->len < sizeof(struct llc_snap_hdr)) return htons(ETH_P_802_2); if (unlikely(!pskb_may_pull(skb, sizeof(struct llc_snap_hdr)))) return htons(0); llc = (struct llc_snap_hdr *) skb->data; if (llc->dsap != LLC_SAP_SNAP || llc->ssap != LLC_SAP_SNAP || (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0) return htons(ETH_P_802_2); __skb_pull(skb, sizeof(struct llc_snap_hdr)); if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN) return llc->ethertype; return htons(ETH_P_802_2); } static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, int nh_len) { struct icmp6hdr *icmp = icmp6_hdr(skb); /* The ICMPv6 type and code fields use the 16-bit transport port * fields, so we need to store them in 16-bit network byte order. */ key->ipv6.tp.src = htons(icmp->icmp6_type); key->ipv6.tp.dst = htons(icmp->icmp6_code); if (icmp->icmp6_code == 0 && (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION || icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) { int icmp_len = skb->len - skb_transport_offset(skb); struct nd_msg *nd; int offset; /* In order to process neighbor discovery options, we need the * entire packet. */ if (unlikely(icmp_len < sizeof(*nd))) return 0; if (unlikely(skb_linearize(skb))) return -ENOMEM; nd = (struct nd_msg *)skb_transport_header(skb); key->ipv6.nd.target = nd->target; icmp_len -= sizeof(*nd); offset = 0; while (icmp_len >= 8) { struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd->opt + offset); int opt_len = nd_opt->nd_opt_len * 8; if (unlikely(!opt_len || opt_len > icmp_len)) return 0; /* Store the link layer address if the appropriate * option is provided. It is considered an error if * the same link layer option is specified twice. */ if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR && opt_len == 8) { if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll))) goto invalid; memcpy(key->ipv6.nd.sll, &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR && opt_len == 8) { if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll))) goto invalid; memcpy(key->ipv6.nd.tll, &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); } icmp_len -= opt_len; offset += opt_len; } } return 0; invalid: memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target)); memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll)); memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll)); return 0; } /** * ovs_flow_extract - extracts a flow key from an Ethernet frame. * @skb: sk_buff that contains the frame, with skb->data pointing to the * Ethernet header * @in_port: port number on which @skb was received. * @key: output flow key * * The caller must ensure that skb->len >= ETH_HLEN. * * Returns 0 if successful, otherwise a negative errno value. * * Initializes @skb header pointers as follows: * * - skb->mac_header: the Ethernet header. * * - skb->network_header: just past the Ethernet header, or just past the * VLAN header, to the first byte of the Ethernet payload. * * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 * on output, then just past the IP header, if one is present and * of a correct length, otherwise the same as skb->network_header. * For other key->eth.type values it is left untouched. */ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key) { int error; struct ethhdr *eth; memset(key, 0, sizeof(*key)); key->phy.priority = skb->priority; if (OVS_CB(skb)->tun_key) memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key)); key->phy.in_port = in_port; key->phy.skb_mark = skb->mark; skb_reset_mac_header(skb); /* Link layer. We are guaranteed to have at least the 14 byte Ethernet * header in the linear data area. */ eth = eth_hdr(skb); memcpy(key->eth.src, eth->h_source, ETH_ALEN); memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); __skb_pull(skb, 2 * ETH_ALEN); /* We are going to push all headers that we pull, so no need to * update skb->csum here. */ if (vlan_tx_tag_present(skb)) key->eth.tci = htons(vlan_get_tci(skb)); else if (eth->h_proto == htons(ETH_P_8021Q)) if (unlikely(parse_vlan(skb, key))) return -ENOMEM; key->eth.type = parse_ethertype(skb); if (unlikely(key->eth.type == htons(0))) return -ENOMEM; skb_reset_network_header(skb); __skb_push(skb, skb->data - skb_mac_header(skb)); /* Network layer. */ if (key->eth.type == htons(ETH_P_IP)) { struct iphdr *nh; __be16 offset; error = check_iphdr(skb); if (unlikely(error)) { if (error == -EINVAL) { skb->transport_header = skb->network_header; error = 0; } return error; } nh = ip_hdr(skb); key->ipv4.addr.src = nh->saddr; key->ipv4.addr.dst = nh->daddr; key->ip.proto = nh->protocol; key->ip.tos = nh->tos; key->ip.ttl = nh->ttl; offset = nh->frag_off & htons(IP_OFFSET); if (offset) { key->ip.frag = OVS_FRAG_TYPE_LATER; return 0; } if (nh->frag_off & htons(IP_MF) || skb_shinfo(skb)->gso_type & SKB_GSO_UDP) key->ip.frag = OVS_FRAG_TYPE_FIRST; /* Transport layer. */ if (key->ip.proto == IPPROTO_TCP) { if (tcphdr_ok(skb)) { struct tcphdr *tcp = tcp_hdr(skb); key->ipv4.tp.src = tcp->source; key->ipv4.tp.dst = tcp->dest; } } else if (key->ip.proto == IPPROTO_UDP) { if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); key->ipv4.tp.src = udp->source; key->ipv4.tp.dst = udp->dest; } } else if (key->ip.proto == IPPROTO_SCTP) { if (sctphdr_ok(skb)) { struct sctphdr *sctp = sctp_hdr(skb); key->ipv4.tp.src = sctp->source; key->ipv4.tp.dst = sctp->dest; } } else if (key->ip.proto == IPPROTO_ICMP) { if (icmphdr_ok(skb)) { struct icmphdr *icmp = icmp_hdr(skb); /* The ICMP type and code fields use the 16-bit * transport port fields, so we need to store * them in 16-bit network byte order. */ key->ipv4.tp.src = htons(icmp->type); key->ipv4.tp.dst = htons(icmp->code); } } } else if ((key->eth.type == htons(ETH_P_ARP) || key->eth.type == htons(ETH_P_RARP)) && arphdr_ok(skb)) { struct arp_eth_header *arp; arp = (struct arp_eth_header *)skb_network_header(skb); if (arp->ar_hrd == htons(ARPHRD_ETHER) && arp->ar_pro == htons(ETH_P_IP) && arp->ar_hln == ETH_ALEN && arp->ar_pln == 4) { /* We only match on the lower 8 bits of the opcode. */ if (ntohs(arp->ar_op) <= 0xff) key->ip.proto = ntohs(arp->ar_op); memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src)); memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN); memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); } } else if (key->eth.type == htons(ETH_P_IPV6)) { int nh_len; /* IPv6 Header + Extensions */ nh_len = parse_ipv6hdr(skb, key); if (unlikely(nh_len < 0)) { if (nh_len == -EINVAL) { skb->transport_header = skb->network_header; error = 0; } else { error = nh_len; } return error; } if (key->ip.frag == OVS_FRAG_TYPE_LATER) return 0; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) key->ip.frag = OVS_FRAG_TYPE_FIRST; /* Transport layer. */ if (key->ip.proto == NEXTHDR_TCP) { if (tcphdr_ok(skb)) { struct tcphdr *tcp = tcp_hdr(skb); key->ipv6.tp.src = tcp->source; key->ipv6.tp.dst = tcp->dest; } } else if (key->ip.proto == NEXTHDR_UDP) { if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); key->ipv6.tp.src = udp->source; key->ipv6.tp.dst = udp->dest; } } else if (key->ip.proto == NEXTHDR_SCTP) { if (sctphdr_ok(skb)) { struct sctphdr *sctp = sctp_hdr(skb); key->ipv6.tp.src = sctp->source; key->ipv6.tp.dst = sctp->dest; } } else if (key->ip.proto == NEXTHDR_ICMP) { if (icmp6hdr_ok(skb)) { error = parse_icmpv6(skb, key, nh_len); if (error) return error; } } } return 0; } static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_end) { u32 *hash_key = (u32 *)((u8 *)key + key_start); int hash_u32s = (key_end - key_start) >> 2; /* Make sure number of hash bytes are multiple of u32. */ BUILD_BUG_ON(sizeof(long) % sizeof(u32)); return jhash2(hash_key, hash_u32s, 0); } static int flow_key_start(const struct sw_flow_key *key) { if (key->tun_key.ipv4_dst) return 0; else return rounddown(offsetof(struct sw_flow_key, phy), sizeof(long)); } static bool __cmp_key(const struct sw_flow_key *key1, const struct sw_flow_key *key2, int key_start, int key_end) { const long *cp1 = (long *)((u8 *)key1 + key_start); const long *cp2 = (long *)((u8 *)key2 + key_start); long diffs = 0; int i; for (i = key_start; i < key_end; i += sizeof(long)) diffs |= *cp1++ ^ *cp2++; return diffs == 0; } static bool __flow_cmp_masked_key(const struct sw_flow *flow, const struct sw_flow_key *key, int key_start, int key_end) { return __cmp_key(&flow->key, key, key_start, key_end); } static bool __flow_cmp_unmasked_key(const struct sw_flow *flow, const struct sw_flow_key *key, int key_start, int key_end) { return __cmp_key(&flow->unmasked_key, key, key_start, key_end); } bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, const struct sw_flow_key *key, int key_end) { int key_start; key_start = flow_key_start(key); return __flow_cmp_unmasked_key(flow, key, key_start, key_end); } struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table, struct sw_flow_match *match) { struct sw_flow_key *unmasked = match->key; int key_end = match->range.end; struct sw_flow *flow; flow = ovs_flow_lookup(table, unmasked); if (flow && (!ovs_flow_cmp_unmasked_key(flow, unmasked, key_end))) flow = NULL; return flow; } static struct sw_flow *ovs_masked_flow_lookup(struct flow_table *table, const struct sw_flow_key *unmasked, struct sw_flow_mask *mask) { struct sw_flow *flow; struct hlist_head *head; int key_start = mask->range.start; int key_end = mask->range.end; u32 hash; struct sw_flow_key masked_key; ovs_flow_key_mask(&masked_key, unmasked, mask); hash = ovs_flow_hash(&masked_key, key_start, key_end); head = find_bucket(table, hash); hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { if (flow->mask == mask && __flow_cmp_masked_key(flow, &masked_key, key_start, key_end)) return flow; } return NULL; } struct sw_flow *ovs_flow_lookup(struct flow_table *tbl, const struct sw_flow_key *key) { struct sw_flow *flow = NULL; struct sw_flow_mask *mask; list_for_each_entry_rcu(mask, tbl->mask_list, list) { flow = ovs_masked_flow_lookup(tbl, key, mask); if (flow) /* Found */ break; } return flow; } void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow) { flow->hash = ovs_flow_hash(&flow->key, flow->mask->range.start, flow->mask->range.end); __tbl_insert(table, flow); } void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow) { BUG_ON(table->count == 0); hlist_del_rcu(&flow->hash_node[table->node_ver]); table->count--; } /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_ENCAP] = -1, [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), [OVS_KEY_ATTR_VLAN] = sizeof(__be16), [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), [OVS_KEY_ATTR_TUNNEL] = -1, }; static bool is_all_zero(const u8 *fp, size_t size) { int i; if (!fp) return false; for (i = 0; i < size; i++) if (fp[i]) return false; return true; } static int __parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], u64 *attrsp, bool nz) { const struct nlattr *nla; u64 attrs; int rem; attrs = *attrsp; nla_for_each_nested(nla, attr, rem) { u16 type = nla_type(nla); int expected_len; if (type > OVS_KEY_ATTR_MAX) { OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n", type, OVS_KEY_ATTR_MAX); return -EINVAL; } if (attrs & (1ULL << type)) { OVS_NLERR("Duplicate key attribute (type %d).\n", type); return -EINVAL; } expected_len = ovs_key_lens[type]; if (nla_len(nla) != expected_len && expected_len != -1) { OVS_NLERR("Key attribute has unexpected length (type=%d" ", length=%d, expected=%d).\n", type, nla_len(nla), expected_len); return -EINVAL; } if (!nz || !is_all_zero(nla_data(nla), expected_len)) { attrs |= 1ULL << type; a[type] = nla; } } if (rem) { OVS_NLERR("Message has %d unknown bytes.\n", rem); return -EINVAL; } *attrsp = attrs; return 0; } static int parse_flow_mask_nlattrs(const struct nlattr *attr, const struct nlattr *a[], u64 *attrsp) { return __parse_flow_nlattrs(attr, a, attrsp, true); } static int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], u64 *attrsp) { return __parse_flow_nlattrs(attr, a, attrsp, false); } int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask) { struct nlattr *a; int rem; bool ttl = false; __be16 tun_flags = 0; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), [OVS_TUNNEL_KEY_ATTR_TOS] = 1, [OVS_TUNNEL_KEY_ATTR_TTL] = 1, [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, }; if (type > OVS_TUNNEL_KEY_ATTR_MAX) { OVS_NLERR("Unknown IPv4 tunnel attribute (type=%d, max=%d).\n", type, OVS_TUNNEL_KEY_ATTR_MAX); return -EINVAL; } if (ovs_tunnel_key_lens[type] != nla_len(a)) { OVS_NLERR("IPv4 tunnel attribute type has unexpected " " length (type=%d, length=%d, expected=%d).\n", type, nla_len(a), ovs_tunnel_key_lens[type]); return -EINVAL; } switch (type) { case OVS_TUNNEL_KEY_ATTR_ID: SW_FLOW_KEY_PUT(match, tun_key.tun_id, nla_get_be64(a), is_mask); tun_flags |= TUNNEL_KEY; break; case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, nla_get_be32(a), is_mask); break; case OVS_TUNNEL_KEY_ATTR_IPV4_DST: SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst, nla_get_be32(a), is_mask); break; case OVS_TUNNEL_KEY_ATTR_TOS: SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos, nla_get_u8(a), is_mask); break; case OVS_TUNNEL_KEY_ATTR_TTL: SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl, nla_get_u8(a), is_mask); ttl = true; break; case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: tun_flags |= TUNNEL_DONT_FRAGMENT; break; case OVS_TUNNEL_KEY_ATTR_CSUM: tun_flags |= TUNNEL_CSUM; break; default: return -EINVAL; } } SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask); if (rem > 0) { OVS_NLERR("IPv4 tunnel attribute has %d unknown bytes.\n", rem); return -EINVAL; } if (!is_mask) { if (!match->key->tun_key.ipv4_dst) { OVS_NLERR("IPv4 tunnel destination address is zero.\n"); return -EINVAL; } if (!ttl) { OVS_NLERR("IPv4 tunnel TTL not specified.\n"); return -EINVAL; } } return 0; } int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *tun_key, const struct ovs_key_ipv4_tunnel *output) { struct nlattr *nla; nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); if (!nla) return -EMSGSIZE; if (output->tun_flags & TUNNEL_KEY && nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) return -EMSGSIZE; if (output->ipv4_src && nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->ipv4_src)) return -EMSGSIZE; if (output->ipv4_dst && nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->ipv4_dst)) return -EMSGSIZE; if (output->ipv4_tos && nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos)) return -EMSGSIZE; if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) return -EMSGSIZE; if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) return -EMSGSIZE; if ((output->tun_flags & TUNNEL_CSUM) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) return -EMSGSIZE; nla_nest_end(skb, nla); return 0; } static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, const struct nlattr **a, bool is_mask) { if (*attrs & (1ULL << OVS_KEY_ATTR_PRIORITY)) { SW_FLOW_KEY_PUT(match, phy.priority, nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_PRIORITY); } if (*attrs & (1ULL << OVS_KEY_ATTR_IN_PORT)) { u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); if (is_mask) in_port = 0xffffffff; /* Always exact match in_port. */ else if (in_port >= DP_MAX_PORTS) return -EINVAL; SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_IN_PORT); } else if (!is_mask) { SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); } if (*attrs & (1ULL << OVS_KEY_ATTR_SKB_MARK)) { uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_SKB_MARK); } if (*attrs & (1ULL << OVS_KEY_ATTR_TUNNEL)) { if (ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, is_mask)) return -EINVAL; *attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL); } return 0; } static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, const struct nlattr **a, bool is_mask) { int err; u64 orig_attrs = attrs; err = metadata_from_nlattrs(match, &attrs, a, is_mask); if (err) return err; if (attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) { const struct ovs_key_ethernet *eth_key; eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); SW_FLOW_KEY_MEMCPY(match, eth.src, eth_key->eth_src, ETH_ALEN, is_mask); SW_FLOW_KEY_MEMCPY(match, eth.dst, eth_key->eth_dst, ETH_ALEN, is_mask); attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERNET); } if (attrs & (1ULL << OVS_KEY_ATTR_VLAN)) { __be16 tci; tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); if (!(tci & htons(VLAN_TAG_PRESENT))) { if (is_mask) OVS_NLERR("VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.\n"); else OVS_NLERR("VLAN TCI does not have VLAN_TAG_PRESENT bit set.\n"); return -EINVAL; } SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); attrs &= ~(1ULL << OVS_KEY_ATTR_VLAN); } else if (!is_mask) SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); if (attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) { __be16 eth_type; eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); if (is_mask) { /* Always exact match EtherType. */ eth_type = htons(0xffff); } else if (ntohs(eth_type) < ETH_P_802_3_MIN) { OVS_NLERR("EtherType is less than minimum (type=%x, min=%x).\n", ntohs(eth_type), ETH_P_802_3_MIN); return -EINVAL; } SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE); } else if (!is_mask) { SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); } if (attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { const struct ovs_key_ipv4 *ipv4_key; ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { OVS_NLERR("Unknown IPv4 fragment type (value=%d, max=%d).\n", ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); return -EINVAL; } SW_FLOW_KEY_PUT(match, ip.proto, ipv4_key->ipv4_proto, is_mask); SW_FLOW_KEY_PUT(match, ip.tos, ipv4_key->ipv4_tos, is_mask); SW_FLOW_KEY_PUT(match, ip.ttl, ipv4_key->ipv4_ttl, is_mask); SW_FLOW_KEY_PUT(match, ip.frag, ipv4_key->ipv4_frag, is_mask); SW_FLOW_KEY_PUT(match, ipv4.addr.src, ipv4_key->ipv4_src, is_mask); SW_FLOW_KEY_PUT(match, ipv4.addr.dst, ipv4_key->ipv4_dst, is_mask); attrs &= ~(1ULL << OVS_KEY_ATTR_IPV4); } if (attrs & (1ULL << OVS_KEY_ATTR_IPV6)) { const struct ovs_key_ipv6 *ipv6_key; ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { OVS_NLERR("Unknown IPv6 fragment type (value=%d, max=%d).\n", ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); return -EINVAL; } SW_FLOW_KEY_PUT(match, ipv6.label, ipv6_key->ipv6_label, is_mask); SW_FLOW_KEY_PUT(match, ip.proto, ipv6_key->ipv6_proto, is_mask); SW_FLOW_KEY_PUT(match, ip.tos, ipv6_key->ipv6_tclass, is_mask); SW_FLOW_KEY_PUT(match, ip.ttl, ipv6_key->ipv6_hlimit, is_mask); SW_FLOW_KEY_PUT(match, ip.frag, ipv6_key->ipv6_frag, is_mask); SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, ipv6_key->ipv6_src, sizeof(match->key->ipv6.addr.src), is_mask); SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, ipv6_key->ipv6_dst, sizeof(match->key->ipv6.addr.dst), is_mask); attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6); } if (attrs & (1ULL << OVS_KEY_ATTR_ARP)) { const struct ovs_key_arp *arp_key; arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); if (!is_mask && (arp_key->arp_op & htons(0xff00))) { OVS_NLERR("Unknown ARP opcode (opcode=%d).\n", arp_key->arp_op); return -EINVAL; } SW_FLOW_KEY_PUT(match, ipv4.addr.src, arp_key->arp_sip, is_mask); SW_FLOW_KEY_PUT(match, ipv4.addr.dst, arp_key->arp_tip, is_mask); SW_FLOW_KEY_PUT(match, ip.proto, ntohs(arp_key->arp_op), is_mask); SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN, is_mask); SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN, is_mask); attrs &= ~(1ULL << OVS_KEY_ATTR_ARP); } if (attrs & (1ULL << OVS_KEY_ATTR_TCP)) { const struct ovs_key_tcp *tcp_key; tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { SW_FLOW_KEY_PUT(match, ipv4.tp.src, tcp_key->tcp_src, is_mask); SW_FLOW_KEY_PUT(match, ipv4.tp.dst, tcp_key->tcp_dst, is_mask); } else { SW_FLOW_KEY_PUT(match, ipv6.tp.src, tcp_key->tcp_src, is_mask); SW_FLOW_KEY_PUT(match, ipv6.tp.dst, tcp_key->tcp_dst, is_mask); } attrs &= ~(1ULL << OVS_KEY_ATTR_TCP); } if (attrs & (1ULL << OVS_KEY_ATTR_UDP)) { const struct ovs_key_udp *udp_key; udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { SW_FLOW_KEY_PUT(match, ipv4.tp.src, udp_key->udp_src, is_mask); SW_FLOW_KEY_PUT(match, ipv4.tp.dst, udp_key->udp_dst, is_mask); } else { SW_FLOW_KEY_PUT(match, ipv6.tp.src, udp_key->udp_src, is_mask); SW_FLOW_KEY_PUT(match, ipv6.tp.dst, udp_key->udp_dst, is_mask); } attrs &= ~(1ULL << OVS_KEY_ATTR_UDP); } if (attrs & (1ULL << OVS_KEY_ATTR_SCTP)) { const struct ovs_key_sctp *sctp_key; sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); if (orig_attrs & (1ULL << OVS_KEY_ATTR_IPV4)) { SW_FLOW_KEY_PUT(match, ipv4.tp.src, sctp_key->sctp_src, is_mask); SW_FLOW_KEY_PUT(match, ipv4.tp.dst, sctp_key->sctp_dst, is_mask); } else { SW_FLOW_KEY_PUT(match, ipv6.tp.src, sctp_key->sctp_src, is_mask); SW_FLOW_KEY_PUT(match, ipv6.tp.dst, sctp_key->sctp_dst, is_mask); } attrs &= ~(1ULL << OVS_KEY_ATTR_SCTP); } if (attrs & (1ULL << OVS_KEY_ATTR_ICMP)) { const struct ovs_key_icmp *icmp_key; icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); SW_FLOW_KEY_PUT(match, ipv4.tp.src, htons(icmp_key->icmp_type), is_mask); SW_FLOW_KEY_PUT(match, ipv4.tp.dst, htons(icmp_key->icmp_code), is_mask); attrs &= ~(1ULL << OVS_KEY_ATTR_ICMP); } if (attrs & (1ULL << OVS_KEY_ATTR_ICMPV6)) { const struct ovs_key_icmpv6 *icmpv6_key; icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); SW_FLOW_KEY_PUT(match, ipv6.tp.src, htons(icmpv6_key->icmpv6_type), is_mask); SW_FLOW_KEY_PUT(match, ipv6.tp.dst, htons(icmpv6_key->icmpv6_code), is_mask); attrs &= ~(1ULL << OVS_KEY_ATTR_ICMPV6); } if (attrs & (1ULL << OVS_KEY_ATTR_ND)) { const struct ovs_key_nd *nd_key; nd_key = nla_data(a[OVS_KEY_ATTR_ND]); SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, nd_key->nd_target, sizeof(match->key->ipv6.nd.target), is_mask); SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN, is_mask); SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN, is_mask); attrs &= ~(1ULL << OVS_KEY_ATTR_ND); } if (attrs != 0) return -EINVAL; return 0; } /** * ovs_match_from_nlattrs - parses Netlink attributes into a flow key and * mask. In case the 'mask' is NULL, the flow is treated as exact match * flow. Otherwise, it is treated as a wildcarded flow, except the mask * does not include any don't care bit. * @match: receives the extracted flow match information. * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. The fields should of the packet that triggered the creation * of this flow. * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink * attribute specifies the mask field of the wildcarded flow. */ int ovs_match_from_nlattrs(struct sw_flow_match *match, const struct nlattr *key, const struct nlattr *mask) { const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; const struct nlattr *encap; u64 key_attrs = 0; u64 mask_attrs = 0; bool encap_valid = false; int err; err = parse_flow_nlattrs(key, a, &key_attrs); if (err) return err; if ((key_attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) && (key_attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)) && (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { __be16 tci; if (!((key_attrs & (1ULL << OVS_KEY_ATTR_VLAN)) && (key_attrs & (1ULL << OVS_KEY_ATTR_ENCAP)))) { OVS_NLERR("Invalid Vlan frame.\n"); return -EINVAL; } key_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE); tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); encap = a[OVS_KEY_ATTR_ENCAP]; key_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP); encap_valid = true; if (tci & htons(VLAN_TAG_PRESENT)) { err = parse_flow_nlattrs(encap, a, &key_attrs); if (err) return err; } else if (!tci) { /* Corner case for truncated 802.1Q header. */ if (nla_len(encap)) { OVS_NLERR("Truncated 802.1Q header has non-zero encap attribute.\n"); return -EINVAL; } } else { OVS_NLERR("Encap attribute is set for a non-VLAN frame.\n"); return -EINVAL; } } err = ovs_key_from_nlattrs(match, key_attrs, a, false); if (err) return err; if (mask) { err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); if (err) return err; if (mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) { __be16 eth_type = 0; __be16 tci = 0; if (!encap_valid) { OVS_NLERR("Encap mask attribute is set for non-VLAN frame.\n"); return -EINVAL; } mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ENCAP); if (a[OVS_KEY_ATTR_ETHERTYPE]) eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); if (eth_type == htons(0xffff)) { mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE); encap = a[OVS_KEY_ATTR_ENCAP]; err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); } else { OVS_NLERR("VLAN frames must have an exact match on the TPID (mask=%x).\n", ntohs(eth_type)); return -EINVAL; } if (a[OVS_KEY_ATTR_VLAN]) tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); if (!(tci & htons(VLAN_TAG_PRESENT))) { OVS_NLERR("VLAN tag present bit must have an exact match (tci_mask=%x).\n", ntohs(tci)); return -EINVAL; } } err = ovs_key_from_nlattrs(match, mask_attrs, a, true); if (err) return err; } else { /* Populate exact match flow's key mask. */ if (match->mask) ovs_sw_flow_mask_set(match->mask, &match->range, 0xff); } if (!ovs_match_validate(match, key_attrs, mask_attrs)) return -EINVAL; return 0; } /** * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. * @flow: Receives extracted in_port, priority, tun_key and skb_mark. * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. * * This parses a series of Netlink attributes that form a flow key, which must * take the same form accepted by flow_from_nlattrs(), but only enough of it to * get the metadata, that is, the parts of the flow key that cannot be * extracted from the packet itself. */ int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, const struct nlattr *attr) { struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; u64 attrs = 0; int err; struct sw_flow_match match; flow->key.phy.in_port = DP_MAX_PORTS; flow->key.phy.priority = 0; flow->key.phy.skb_mark = 0; memset(tun_key, 0, sizeof(flow->key.tun_key)); err = parse_flow_nlattrs(attr, a, &attrs); if (err) return -EINVAL; memset(&match, 0, sizeof(match)); match.key = &flow->key; err = metadata_from_nlattrs(&match, &attrs, a, false); if (err) return err; return 0; } int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, const struct sw_flow_key *output, struct sk_buff *skb) { struct ovs_key_ethernet *eth_key; struct nlattr *nla, *encap; bool is_mask = (swkey != output); if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) goto nla_put_failure; if ((swkey->tun_key.ipv4_dst || is_mask) && ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key, &output->tun_key)) goto nla_put_failure; if (swkey->phy.in_port == DP_MAX_PORTS) { if (is_mask && (output->phy.in_port == 0xffff)) if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) goto nla_put_failure; } else { u16 upper_u16; upper_u16 = !is_mask ? 0 : 0xffff; if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, (upper_u16 << 16) | output->phy.in_port)) goto nla_put_failure; } if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) goto nla_put_failure; nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); if (!nla) goto nla_put_failure; eth_key = nla_data(nla); memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN); memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN); if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { __be16 eth_type; eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) goto nla_put_failure; encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); if (!swkey->eth.tci) goto unencap; } else encap = NULL; if (swkey->eth.type == htons(ETH_P_802_2)) { /* * Ethertype 802.2 is represented in the netlink with omitted * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and * 0xffff in the mask attribute. Ethertype can also * be wildcarded. */ if (is_mask && output->eth.type) if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) goto nla_put_failure; goto unencap; } if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) goto nla_put_failure; if (swkey->eth.type == htons(ETH_P_IP)) { struct ovs_key_ipv4 *ipv4_key; nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); if (!nla) goto nla_put_failure; ipv4_key = nla_data(nla); ipv4_key->ipv4_src = output->ipv4.addr.src; ipv4_key->ipv4_dst = output->ipv4.addr.dst; ipv4_key->ipv4_proto = output->ip.proto; ipv4_key->ipv4_tos = output->ip.tos; ipv4_key->ipv4_ttl = output->ip.ttl; ipv4_key->ipv4_frag = output->ip.frag; } else if (swkey->eth.type == htons(ETH_P_IPV6)) { struct ovs_key_ipv6 *ipv6_key; nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); if (!nla) goto nla_put_failure; ipv6_key = nla_data(nla); memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, sizeof(ipv6_key->ipv6_src)); memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, sizeof(ipv6_key->ipv6_dst)); ipv6_key->ipv6_label = output->ipv6.label; ipv6_key->ipv6_proto = output->ip.proto; ipv6_key->ipv6_tclass = output->ip.tos; ipv6_key->ipv6_hlimit = output->ip.ttl; ipv6_key->ipv6_frag = output->ip.frag; } else if (swkey->eth.type == htons(ETH_P_ARP) || swkey->eth.type == htons(ETH_P_RARP)) { struct ovs_key_arp *arp_key; nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); if (!nla) goto nla_put_failure; arp_key = nla_data(nla); memset(arp_key, 0, sizeof(struct ovs_key_arp)); arp_key->arp_sip = output->ipv4.addr.src; arp_key->arp_tip = output->ipv4.addr.dst; arp_key->arp_op = htons(output->ip.proto); memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN); memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN); } if ((swkey->eth.type == htons(ETH_P_IP) || swkey->eth.type == htons(ETH_P_IPV6)) && swkey->ip.frag != OVS_FRAG_TYPE_LATER) { if (swkey->ip.proto == IPPROTO_TCP) { struct ovs_key_tcp *tcp_key; nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); if (!nla) goto nla_put_failure; tcp_key = nla_data(nla); if (swkey->eth.type == htons(ETH_P_IP)) { tcp_key->tcp_src = output->ipv4.tp.src; tcp_key->tcp_dst = output->ipv4.tp.dst; } else if (swkey->eth.type == htons(ETH_P_IPV6)) { tcp_key->tcp_src = output->ipv6.tp.src; tcp_key->tcp_dst = output->ipv6.tp.dst; } } else if (swkey->ip.proto == IPPROTO_UDP) { struct ovs_key_udp *udp_key; nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); if (!nla) goto nla_put_failure; udp_key = nla_data(nla); if (swkey->eth.type == htons(ETH_P_IP)) { udp_key->udp_src = output->ipv4.tp.src; udp_key->udp_dst = output->ipv4.tp.dst; } else if (swkey->eth.type == htons(ETH_P_IPV6)) { udp_key->udp_src = output->ipv6.tp.src; udp_key->udp_dst = output->ipv6.tp.dst; } } else if (swkey->ip.proto == IPPROTO_SCTP) { struct ovs_key_sctp *sctp_key; nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); if (!nla) goto nla_put_failure; sctp_key = nla_data(nla); if (swkey->eth.type == htons(ETH_P_IP)) { sctp_key->sctp_src = swkey->ipv4.tp.src; sctp_key->sctp_dst = swkey->ipv4.tp.dst; } else if (swkey->eth.type == htons(ETH_P_IPV6)) { sctp_key->sctp_src = swkey->ipv6.tp.src; sctp_key->sctp_dst = swkey->ipv6.tp.dst; } } else if (swkey->eth.type == htons(ETH_P_IP) && swkey->ip.proto == IPPROTO_ICMP) { struct ovs_key_icmp *icmp_key; nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); if (!nla) goto nla_put_failure; icmp_key = nla_data(nla); icmp_key->icmp_type = ntohs(output->ipv4.tp.src); icmp_key->icmp_code = ntohs(output->ipv4.tp.dst); } else if (swkey->eth.type == htons(ETH_P_IPV6) && swkey->ip.proto == IPPROTO_ICMPV6) { struct ovs_key_icmpv6 *icmpv6_key; nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, sizeof(*icmpv6_key)); if (!nla) goto nla_put_failure; icmpv6_key = nla_data(nla); icmpv6_key->icmpv6_type = ntohs(output->ipv6.tp.src); icmpv6_key->icmpv6_code = ntohs(output->ipv6.tp.dst); if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { struct ovs_key_nd *nd_key; nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); if (!nla) goto nla_put_failure; nd_key = nla_data(nla); memcpy(nd_key->nd_target, &output->ipv6.nd.target, sizeof(nd_key->nd_target)); memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN); memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN); } } } unencap: if (encap) nla_nest_end(skb, encap); return 0; nla_put_failure: return -EMSGSIZE; } /* Initializes the flow module. * Returns zero if successful or a negative error code. */ int ovs_flow_init(void) { BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long)); BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0, 0, NULL); if (flow_cache == NULL) return -ENOMEM; return 0; } /* Uninitializes the flow module. */ void ovs_flow_exit(void) { kmem_cache_destroy(flow_cache); } struct sw_flow_mask *ovs_sw_flow_mask_alloc(void) { struct sw_flow_mask *mask; mask = kmalloc(sizeof(*mask), GFP_KERNEL); if (mask) mask->ref_count = 0; return mask; } void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *mask) { mask->ref_count++; } static void rcu_free_sw_flow_mask_cb(struct rcu_head *rcu) { struct sw_flow_mask *mask = container_of(rcu, struct sw_flow_mask, rcu); kfree(mask); } void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred) { if (!mask) return; BUG_ON(!mask->ref_count); mask->ref_count--; if (!mask->ref_count) { list_del_rcu(&mask->list); if (deferred) call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); else kfree(mask); } } static bool ovs_sw_flow_mask_equal(const struct sw_flow_mask *a, const struct sw_flow_mask *b) { u8 *a_ = (u8 *)&a->key + a->range.start; u8 *b_ = (u8 *)&b->key + b->range.start; return (a->range.end == b->range.end) && (a->range.start == b->range.start) && (memcmp(a_, b_, range_n_bytes(&a->range)) == 0); } struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *tbl, const struct sw_flow_mask *mask) { struct list_head *ml; list_for_each(ml, tbl->mask_list) { struct sw_flow_mask *m; m = container_of(ml, struct sw_flow_mask, list); if (ovs_sw_flow_mask_equal(mask, m)) return m; } return NULL; } /** * add a new mask into the mask list. * The caller needs to make sure that 'mask' is not the same * as any masks that are already on the list. */ void ovs_sw_flow_mask_insert(struct flow_table *tbl, struct sw_flow_mask *mask) { list_add_rcu(&mask->list, tbl->mask_list); } /** * Set 'range' fields in the mask to the value of 'val'. */ static void ovs_sw_flow_mask_set(struct sw_flow_mask *mask, struct sw_flow_key_range *range, u8 val) { u8 *m = (u8 *)&mask->key + range->start; mask->range = *range; memset(m, val, range_n_bytes(range)); } openvswitch-2.0.1+git20140120/datapath/flow.h000066400000000000000000000177231226605124000203720ustar00rootroot00000000000000/* * Copyright (c) 2007-2013 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #ifndef FLOW_H #define FLOW_H 1 #include #include #include #include #include #include #include #include #include #include #include #include #include struct sk_buff; struct sw_flow_mask; struct flow_table; struct sw_flow_actions { struct rcu_head rcu; u32 actions_len; struct nlattr actions[]; }; /* Used to memset ovs_key_ipv4_tunnel padding. */ #define OVS_TUNNEL_KEY_SIZE \ (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + \ FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl)) struct ovs_key_ipv4_tunnel { __be64 tun_id; __be32 ipv4_src; __be32 ipv4_dst; __be16 tun_flags; u8 ipv4_tos; u8 ipv4_ttl; }; static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, const struct iphdr *iph, __be64 tun_id, __be16 tun_flags) { tun_key->tun_id = tun_id; tun_key->ipv4_src = iph->saddr; tun_key->ipv4_dst = iph->daddr; tun_key->ipv4_tos = iph->tos; tun_key->ipv4_ttl = iph->ttl; tun_key->tun_flags = tun_flags; /* clear struct padding. */ memset((unsigned char *) tun_key + OVS_TUNNEL_KEY_SIZE, 0, sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE); } struct sw_flow_key { struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ struct { u32 priority; /* Packet QoS priority. */ u32 skb_mark; /* SKB mark. */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ } phy; struct { u8 src[ETH_ALEN]; /* Ethernet source address. */ u8 dst[ETH_ALEN]; /* Ethernet destination address. */ __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ __be16 type; /* Ethernet frame type. */ } eth; struct { u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ u8 tos; /* IP ToS. */ u8 ttl; /* IP TTL/hop limit. */ u8 frag; /* One of OVS_FRAG_TYPE_*. */ } ip; union { struct { struct { __be32 src; /* IP source address. */ __be32 dst; /* IP destination address. */ } addr; union { struct { __be16 src; /* TCP/UDP/SCTP source port. */ __be16 dst; /* TCP/UDP/SCTP destination port. */ } tp; struct { u8 sha[ETH_ALEN]; /* ARP source hardware address. */ u8 tha[ETH_ALEN]; /* ARP target hardware address. */ } arp; }; } ipv4; struct { struct { struct in6_addr src; /* IPv6 source address. */ struct in6_addr dst; /* IPv6 destination address. */ } addr; __be32 label; /* IPv6 flow label. */ struct { __be16 src; /* TCP/UDP/SCTP source port. */ __be16 dst; /* TCP/UDP/SCTP destination port. */ } tp; struct { struct in6_addr target; /* ND target address. */ u8 sll[ETH_ALEN]; /* ND source link layer address. */ u8 tll[ETH_ALEN]; /* ND target link layer address. */ } nd; } ipv6; }; } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */ struct sw_flow { struct rcu_head rcu; struct hlist_node hash_node[2]; u32 hash; struct sw_flow_key key; struct sw_flow_key unmasked_key; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; spinlock_t lock; /* Lock for values below. */ unsigned long used; /* Last used time (in jiffies). */ u64 packet_count; /* Number of packets matched. */ u64 byte_count; /* Number of bytes matched. */ u8 tcp_flags; /* Union of seen TCP flags. */ }; struct sw_flow_key_range { size_t start; size_t end; }; struct sw_flow_match { struct sw_flow_key *key; struct sw_flow_key_range range; struct sw_flow_mask *mask; }; void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, struct sw_flow_mask *mask); struct arp_eth_header { __be16 ar_hrd; /* format of hardware address */ __be16 ar_pro; /* format of protocol address */ unsigned char ar_hln; /* length of hardware address */ unsigned char ar_pln; /* length of protocol address */ __be16 ar_op; /* ARP opcode (command) */ /* Ethernet+IPv4 specific members. */ unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */ unsigned char ar_sip[4]; /* sender IP address */ unsigned char ar_tha[ETH_ALEN]; /* target hardware address */ unsigned char ar_tip[4]; /* target IP address */ } __packed; int ovs_flow_init(void); void ovs_flow_exit(void); struct sw_flow *ovs_flow_alloc(void); void ovs_flow_deferred_free(struct sw_flow *); void ovs_flow_free(struct sw_flow *, bool deferred); struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len); void ovs_flow_deferred_free_acts(struct sw_flow_actions *); int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *); void ovs_flow_used(struct sw_flow *, struct sk_buff *); u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_to_nlattrs(const struct sw_flow_key *, const struct sw_flow_key *, struct sk_buff *); int ovs_match_from_nlattrs(struct sw_flow_match *match, const struct nlattr *, const struct nlattr *); int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, const struct nlattr *attr); #define MAX_ACTIONS_BUFSIZE (32 * 1024) #define TBL_MIN_BUCKETS 1024 struct flow_table { struct flex_array *buckets; unsigned int count, n_buckets; struct rcu_head rcu; struct list_head *mask_list; int node_ver; u32 hash_seed; bool keep_flows; }; static inline int ovs_flow_tbl_count(struct flow_table *table) { return table->count; } static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table) { return (table->count > table->n_buckets); } struct sw_flow *ovs_flow_lookup(struct flow_table *, const struct sw_flow_key *); struct sw_flow *ovs_flow_lookup_unmasked_key(struct flow_table *table, struct sw_flow_match *match); void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred); struct flow_table *ovs_flow_tbl_alloc(int new_size); struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); void ovs_flow_insert(struct flow_table *table, struct sw_flow *flow); void ovs_flow_remove(struct flow_table *table, struct sw_flow *flow); struct sw_flow *ovs_flow_dump_next(struct flow_table *table, u32 *bucket, u32 *idx); extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask); int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, const struct ovs_key_ipv4_tunnel *tun_key, const struct ovs_key_ipv4_tunnel *output); bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, const struct sw_flow_key *key, int key_end); struct sw_flow_mask { int ref_count; struct rcu_head rcu; struct list_head list; struct sw_flow_key_range range; struct sw_flow_key key; }; struct sw_flow_mask *ovs_sw_flow_mask_alloc(void); void ovs_sw_flow_mask_add_ref(struct sw_flow_mask *); void ovs_sw_flow_mask_del_ref(struct sw_flow_mask *, bool deferred); void ovs_sw_flow_mask_insert(struct flow_table *, struct sw_flow_mask *); struct sw_flow_mask *ovs_sw_flow_mask_find(const struct flow_table *, const struct sw_flow_mask *); void ovs_flow_key_mask(struct sw_flow_key *dst, const struct sw_flow_key *src, const struct sw_flow_mask *mask); #endif /* flow.h */ openvswitch-2.0.1+git20140120/datapath/linux/000077500000000000000000000000001226605124000203775ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/datapath/linux/.gitignore000066400000000000000000000012171226605124000223700ustar00rootroot00000000000000/Kbuild /Makefile /Makefile.main /Module.markers /actions.c /addrconf_core-openvswitch.c /checksum.c /dev-openvswitch.c /dp_sysfs_dp.c /dp_sysfs_if.c /datapath.c /dp_dev.c /dp_notify.c /exthdrs_core.c /flex_array.c /flow.c /flow_dissector.c /genetlink-openvswitch.c /genl_exec.c /gre.c /gso.c /ip_output-openvswitch.c /ip_tunnels_core.c /kcompat.h /kmemdup.c /loop_counter.c /modules.order /netdevice.c /net_namespace.c /random32.c /reciprocal_div.c /skbuff-openvswitch.c /table.c /time.c /tmp /tunnel.c /vlan.c /vport-generic.c /vport-gre.c /vport-internal_dev.c /vport-lisp.c /vport-netdev.c /vport-patch.c /vport-vxlan.c /vport.c /vxlan.c /workqueue.c openvswitch-2.0.1+git20140120/datapath/linux/Kbuild.in000066400000000000000000000015371226605124000221470ustar00rootroot00000000000000# -*- makefile -*- export builddir = @abs_builddir@ export srcdir = @abs_srcdir@ export top_srcdir = @abs_top_srcdir@ export VERSION = @VERSION@ include $(srcdir)/../Modules.mk include $(srcdir)/Modules.mk EXTRA_CFLAGS := -DVERSION=\"$(VERSION)\" EXTRA_CFLAGS += -I$(srcdir)/.. EXTRA_CFLAGS += -I$(builddir)/.. EXTRA_CFLAGS += -g EXTRA_CFLAGS += -include $(builddir)/kcompat.h # These include directories have to go before -I$(KSRC)/include. # NOSTDINC_FLAGS just happens to be a variable that goes in the # right place, even though it's conceptually incorrect. NOSTDINC_FLAGS += -I$(top_srcdir)/include -I$(srcdir)/compat -I$(srcdir)/compat/include obj-m := $(patsubst %,%.o,$(build_modules)) define module_template $(1)-y = $$(notdir $$(patsubst %.c,%.o,$($(1)_sources))) endef $(foreach module,$(build_modules),$(eval $(call module_template,$(module)))) openvswitch-2.0.1+git20140120/datapath/linux/Makefile.in000066400000000000000000000003041226605124000224410ustar00rootroot00000000000000ifeq ($(KERNELRELEASE),) # We're being called directly by running make in this directory. include Makefile.main else # We're being included by the Linux kernel build system include Kbuild endif openvswitch-2.0.1+git20140120/datapath/linux/Makefile.main.in000066400000000000000000000057331226605124000233770ustar00rootroot00000000000000# -*- makefile -*- export builddir = @abs_builddir@ export srcdir = @abs_srcdir@ export top_srcdir = @abs_top_srcdir@ export KSRC = @KBUILD@ export VERSION = @VERSION@ include $(srcdir)/../Modules.mk include $(srcdir)/Modules.mk default: $(build_links) $(foreach s,$(sort $(foreach m,$(build_modules),$($(m)_sources))), \ $(eval $(notdir $(s)): ; ln -s $(srcdir)/../$(s) $@)) all: default distdir: clean install: install-data: install-exec: uninstall: install-dvi: install-html: install-info: install-ps: install-pdf: installdirs: check: all installcheck: mostlyclean: clean: rm -f *.o *.ko *.mod.* Module.symvers *.cmd kcompat.h.new for d in $(build_links); do if test -h $$d; then rm $$d; fi; done distclean: clean rm -f kcompat.h maintainer-clean: distclean dvi: pdf: ps: info: html: tags: TAGS: ifneq ($(KSRC),) ifeq (/lib/modules/$(shell uname -r)/source, $(KSRC)) KOBJ := /lib/modules/$(shell uname -r)/build else KOBJ := $(KSRC) endif VERSION_FILE := $(KOBJ)/include/linux/version.h ifeq (,$(wildcard $(VERSION_FILE))) VERSION_FILE := $(KOBJ)/include/generated/uapi/linux/version.h ifeq (,$(wildcard $(VERSION_FILE))) $(error Linux kernel source not configured - missing version.h) endif endif CONFIG_FILE := $(KSRC)/include/generated/autoconf.h ifeq (,$(wildcard $(CONFIG_FILE))) CONFIG_FILE := $(KSRC)/include/linux/autoconf.h ifeq (,$(wildcard $(CONFIG_FILE))) $(error Linux kernel source not configured - missing autoconf.h) endif endif default: $(MAKE) -C $(KSRC) M=$(builddir) modules modules_install: $(MAKE) -C $(KSRC) M=$(builddir) modules_install depmod `sed -n 's/#define UTS_RELEASE "\([^"]*\)"/\1/p' $(KSRC)/include/generated/utsrelease.h` endif # Much of the kernel build system in this file is derived from Intel's # e1000 distribution, with the following license: ################################################################################ # # Intel PRO/1000 Linux driver # Copyright(c) 1999 - 2007, 2009 Intel Corporation. # # This program is free software; you can redistribute it and/or modify it # under the terms and conditions of the GNU General Public License, # version 2, as published by the Free Software Foundation. # # This program is distributed in the hope it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. # # The full GNU General Public License is included in this distribution in # the file called "COPYING". # # Contact Information: # Linux NICS # e1000-devel Mailing List # Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 # ################################################################################ openvswitch-2.0.1+git20140120/datapath/linux/Modules.mk000066400000000000000000000047711226605124000223510ustar00rootroot00000000000000openvswitch_sources += \ linux/compat/dev-openvswitch.c \ linux/compat/exthdrs_core.c \ linux/compat/flex_array.c \ linux/compat/flow_dissector.c \ linux/compat/gre.c \ linux/compat/gso.c \ linux/compat/genetlink-openvswitch.c \ linux/compat/ip_tunnels_core.c \ linux/compat/netdevice.c \ linux/compat/net_namespace.c \ linux/compat/reciprocal_div.c \ linux/compat/skbuff-openvswitch.c \ linux/compat/vxlan.c \ linux/compat/workqueue.c \ linux/compat/utils.c openvswitch_headers += \ linux/compat/gso.h \ linux/compat/include/asm/percpu.h \ linux/compat/include/linux/compiler.h \ linux/compat/include/linux/compiler-gcc.h \ linux/compat/include/linux/cpumask.h \ linux/compat/include/linux/err.h \ linux/compat/include/linux/etherdevice.h \ linux/compat/include/linux/flex_array.h \ linux/compat/include/linux/icmp.h \ linux/compat/include/linux/icmpv6.h \ linux/compat/include/linux/if.h \ linux/compat/include/linux/if_arp.h \ linux/compat/include/linux/if_ether.h \ linux/compat/include/linux/if_tunnel.h \ linux/compat/include/linux/if_vlan.h \ linux/compat/include/linux/in.h \ linux/compat/include/linux/ip.h \ linux/compat/include/linux/ipv6.h \ linux/compat/include/linux/jiffies.h \ linux/compat/include/linux/kconfig.h \ linux/compat/include/linux/kernel.h \ linux/compat/include/linux/list.h \ linux/compat/include/linux/log2.h \ linux/compat/include/linux/net.h \ linux/compat/include/linux/netdevice.h \ linux/compat/include/linux/netdev_features.h \ linux/compat/include/linux/netlink.h \ linux/compat/include/linux/poison.h \ linux/compat/include/linux/rculist.h \ linux/compat/include/linux/rcupdate.h \ linux/compat/include/linux/rtnetlink.h \ linux/compat/include/linux/sctp.h \ linux/compat/include/linux/skbuff.h \ linux/compat/include/linux/stddef.h \ linux/compat/include/linux/tcp.h \ linux/compat/include/linux/types.h \ linux/compat/include/linux/u64_stats_sync.h \ linux/compat/include/linux/udp.h \ linux/compat/include/linux/workqueue.h \ linux/compat/include/net/checksum.h \ linux/compat/include/net/dst.h \ linux/compat/include/net/flow_keys.h \ linux/compat/include/net/genetlink.h \ linux/compat/include/net/gre.h \ linux/compat/include/net/inet_frag.h \ linux/compat/include/net/ip.h \ linux/compat/include/net/ip_tunnels.h \ linux/compat/include/net/ipv6.h \ linux/compat/include/net/net_namespace.h \ linux/compat/include/net/netlink.h \ linux/compat/include/net/sock.h \ linux/compat/include/net/vxlan.h \ linux/compat/include/net/sctp/checksum.h openvswitch-2.0.1+git20140120/datapath/linux/compat/000077500000000000000000000000001226605124000216625ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/datapath/linux/compat/dev-openvswitch.c000066400000000000000000000014551226605124000251600ustar00rootroot00000000000000#ifndef HAVE_DEV_DISABLE_LRO #include #ifdef NETIF_F_LRO #include /** * dev_disable_lro - disable Large Receive Offload on a device * @dev: device * * Disable Large Receive Offload (LRO) on a net device. Must be * called under RTNL. This is needed if received packets may be * forwarded to another interface. */ void dev_disable_lro(struct net_device *dev) { if (dev->ethtool_ops && dev->ethtool_ops->get_flags && dev->ethtool_ops->set_flags) { u32 flags = dev->ethtool_ops->get_flags(dev); if (flags & ETH_FLAG_LRO) { flags &= ~ETH_FLAG_LRO; dev->ethtool_ops->set_flags(dev, flags); } } WARN_ON(dev->features & NETIF_F_LRO); } #else void dev_disable_lro(struct net_device *dev) { } #endif /* NETIF_F_LRO */ #endif /* HAVE_DEV_DISABLE_LRO */ openvswitch-2.0.1+git20140120/datapath/linux/compat/exthdrs_core.c000066400000000000000000000105041226605124000245170ustar00rootroot00000000000000#include #include #include #if LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) int rpl_ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, __be16 *frag_offp) { u8 nexthdr = *nexthdrp; *frag_offp = 0; while (ipv6_ext_hdr(nexthdr)) { struct ipv6_opt_hdr _hdr, *hp; int hdrlen; if (nexthdr == NEXTHDR_NONE) return -1; hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); if (hp == NULL) return -1; if (nexthdr == NEXTHDR_FRAGMENT) { __be16 _frag_off, *fp; fp = skb_header_pointer(skb, start+offsetof(struct frag_hdr, frag_off), sizeof(_frag_off), &_frag_off); if (fp == NULL) return -1; *frag_offp = *fp; if (ntohs(*frag_offp) & ~0x7) break; hdrlen = 8; } else if (nexthdr == NEXTHDR_AUTH) hdrlen = (hp->hdrlen+2)<<2; else hdrlen = ipv6_optlen(hp); nexthdr = hp->nexthdr; start += hdrlen; } *nexthdrp = nexthdr; return start; } #endif /* Kernel version < 3.3 */ /* * find the offset to specified header or the protocol number of last header * if target < 0. "last header" is transport protocol header, ESP, or * "No next header". * * Note that *offset is used as input/output parameter. an if it is not zero, * then it must be a valid offset to an inner IPv6 header. This can be used * to explore inner IPv6 header, eg. ICMPv6 error messages. * * If target header is found, its offset is set in *offset and return protocol * number. Otherwise, return -1. * * If the first fragment doesn't contain the final protocol header or * NEXTHDR_NONE it is considered invalid. * * Note that non-1st fragment is special case that "the protocol number * of last header" is "next header" field in Fragment header. In this case, * *offset is meaningless and fragment offset is stored in *fragoff if fragoff * isn't NULL. * * if flags is not NULL and it's a fragment, then the frag flag * OVS_IP6T_FH_F_FRAG will be set. If it's an AH header, the * OVS_IP6T_FH_F_AUTH flag is set and target < 0, then this function will * stop at the AH header. If OVS_IP6T_FH_F_SKIP_RH flag was passed, then this * function will skip all those routing headers, where segements_left was 0. */ int rpl_ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, int target, unsigned short *fragoff, int *flags) { unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); u8 nexthdr = ipv6_hdr(skb)->nexthdr; unsigned int len; bool found; if (fragoff) *fragoff = 0; if (*offset) { struct ipv6hdr _ip6, *ip6; ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6); if (!ip6 || (ip6->version != 6)) { printk(KERN_ERR "IPv6 header not found\n"); return -EBADMSG; } start = *offset + sizeof(struct ipv6hdr); nexthdr = ip6->nexthdr; } len = skb->len - start; do { struct ipv6_opt_hdr _hdr, *hp; unsigned int hdrlen; found = (nexthdr == target); if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) { if (target < 0) break; return -ENOENT; } hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); if (hp == NULL) return -EBADMSG; if (nexthdr == NEXTHDR_ROUTING) { struct ipv6_rt_hdr _rh, *rh; rh = skb_header_pointer(skb, start, sizeof(_rh), &_rh); if (rh == NULL) return -EBADMSG; if (flags && (*flags & OVS_IP6T_FH_F_SKIP_RH) && rh->segments_left == 0) found = false; } if (nexthdr == NEXTHDR_FRAGMENT) { unsigned short _frag_off; __be16 *fp; if (flags) /* Indicate that this is a fragment */ *flags |= OVS_IP6T_FH_F_FRAG; fp = skb_header_pointer(skb, start+offsetof(struct frag_hdr, frag_off), sizeof(_frag_off), &_frag_off); if (fp == NULL) return -EBADMSG; _frag_off = ntohs(*fp) & ~0x7; if (_frag_off) { if (target < 0 && ((!ipv6_ext_hdr(hp->nexthdr)) || hp->nexthdr == NEXTHDR_NONE)) { if (fragoff) *fragoff = _frag_off; return hp->nexthdr; } return -ENOENT; } hdrlen = 8; } else if (nexthdr == NEXTHDR_AUTH) { if (flags && (*flags & OVS_IP6T_FH_F_AUTH) && (target < 0)) break; hdrlen = (hp->hdrlen + 2) << 2; } else hdrlen = ipv6_optlen(hp); if (!found) { nexthdr = hp->nexthdr; len -= hdrlen; start += hdrlen; } } while (!found); *offset = start; return nexthdr; } openvswitch-2.0.1+git20140120/datapath/linux/compat/flex_array.c000066400000000000000000000253031226605124000241650ustar00rootroot00000000000000#include #if LINUX_VERSION_CODE < KERNEL_VERSION(3,0,0) /* * Flexible array managed in PAGE_SIZE parts * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Copyright IBM Corporation, 2009 * * Author: Dave Hansen */ #include #include #include #include #include struct flex_array_part { char elements[FLEX_ARRAY_PART_SIZE]; }; /* * If a user requests an allocation which is small * enough, we may simply use the space in the * flex_array->parts[] array to store the user * data. */ static inline int elements_fit_in_base(struct flex_array *fa) { int data_size = fa->element_size * fa->total_nr_elements; if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT) return 1; return 0; } /** * flex_array_alloc - allocate a new flexible array * @element_size: the size of individual elements in the array * @total: total number of elements that this should hold * @flags: page allocation flags to use for base array * * Note: all locking must be provided by the caller. * * @total is used to size internal structures. If the user ever * accesses any array indexes >=@total, it will produce errors. * * The maximum number of elements is defined as: the number of * elements that can be stored in a page times the number of * page pointers that we can fit in the base structure or (using * integer math): * * (PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *) * * Here's a table showing example capacities. Note that the maximum * index that the get/put() functions is just nr_objects-1. This * basically means that you get 4MB of storage on 32-bit and 2MB on * 64-bit. * * * Element size | Objects | Objects | * PAGE_SIZE=4k | 32-bit | 64-bit | * ---------------------------------| * 1 bytes | 4177920 | 2088960 | * 2 bytes | 2088960 | 1044480 | * 3 bytes | 1392300 | 696150 | * 4 bytes | 1044480 | 522240 | * 32 bytes | 130560 | 65408 | * 33 bytes | 126480 | 63240 | * 2048 bytes | 2040 | 1020 | * 2049 bytes | 1020 | 510 | * void * | 1044480 | 261120 | * * Since 64-bit pointers are twice the size, we lose half the * capacity in the base structure. Also note that no effort is made * to efficiently pack objects across page boundaries. */ struct flex_array *flex_array_alloc(int element_size, unsigned int total, gfp_t flags) { struct flex_array *ret; int elems_per_part = 0; int reciprocal_elems = 0; int max_size = 0; if (element_size) { elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size); reciprocal_elems = reciprocal_value(elems_per_part); max_size = FLEX_ARRAY_NR_BASE_PTRS * elems_per_part; } /* max_size will end up 0 if element_size > PAGE_SIZE */ if (total > max_size) return NULL; ret = kzalloc(sizeof(struct flex_array), flags); if (!ret) return NULL; ret->element_size = element_size; ret->total_nr_elements = total; ret->elems_per_part = elems_per_part; ret->reciprocal_elems = reciprocal_elems; if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO)) memset(&ret->parts[0], FLEX_ARRAY_FREE, FLEX_ARRAY_BASE_BYTES_LEFT); return ret; } static int fa_element_to_part_nr(struct flex_array *fa, unsigned int element_nr) { return reciprocal_divide(element_nr, fa->reciprocal_elems); } /** * flex_array_free_parts - just free the second-level pages * @fa: the flex array from which to free parts * * This is to be used in cases where the base 'struct flex_array' * has been statically allocated and should not be free. */ void flex_array_free_parts(struct flex_array *fa) { int part_nr; if (elements_fit_in_base(fa)) return; for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) kfree(fa->parts[part_nr]); } void flex_array_free(struct flex_array *fa) { flex_array_free_parts(fa); kfree(fa); } static unsigned int index_inside_part(struct flex_array *fa, unsigned int element_nr, unsigned int part_nr) { unsigned int part_offset; part_offset = element_nr - part_nr * fa->elems_per_part; return part_offset * fa->element_size; } static struct flex_array_part * __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags) { struct flex_array_part *part = fa->parts[part_nr]; if (!part) { part = kmalloc(sizeof(struct flex_array_part), flags); if (!part) return NULL; if (!(flags & __GFP_ZERO)) memset(part, FLEX_ARRAY_FREE, sizeof(struct flex_array_part)); fa->parts[part_nr] = part; } return part; } /** * flex_array_put - copy data into the array at @element_nr * @fa: the flex array to copy data into * @element_nr: index of the position in which to insert * the new element. * @src: address of data to copy into the array * @flags: page allocation flags to use for array expansion * * * Note that this *copies* the contents of @src into * the array. If you are trying to store an array of * pointers, make sure to pass in &ptr instead of ptr. * You may instead wish to use the flex_array_put_ptr() * helper function. * * Locking must be provided by the caller. */ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, gfp_t flags) { int part_nr = 0; struct flex_array_part *part; void *dst; if (element_nr >= fa->total_nr_elements) return -ENOSPC; if (!fa->element_size) return 0; if (elements_fit_in_base(fa)) part = (struct flex_array_part *)&fa->parts[0]; else { part_nr = fa_element_to_part_nr(fa, element_nr); part = __fa_get_part(fa, part_nr, flags); if (!part) return -ENOMEM; } dst = &part->elements[index_inside_part(fa, element_nr, part_nr)]; memcpy(dst, src, fa->element_size); return 0; } /** * flex_array_clear - clear element in array at @element_nr * @fa: the flex array of the element. * @element_nr: index of the position to clear. * * Locking must be provided by the caller. */ int flex_array_clear(struct flex_array *fa, unsigned int element_nr) { int part_nr = 0; struct flex_array_part *part; void *dst; if (element_nr >= fa->total_nr_elements) return -ENOSPC; if (!fa->element_size) return 0; if (elements_fit_in_base(fa)) part = (struct flex_array_part *)&fa->parts[0]; else { part_nr = fa_element_to_part_nr(fa, element_nr); part = fa->parts[part_nr]; if (!part) return -EINVAL; } dst = &part->elements[index_inside_part(fa, element_nr, part_nr)]; memset(dst, FLEX_ARRAY_FREE, fa->element_size); return 0; } /** * flex_array_prealloc - guarantee that array space exists * @fa: the flex array for which to preallocate parts * @start: index of first array element for which space is * allocated * @nr_elements: number of elements for which space is allocated * @flags: page allocation flags * * This will guarantee that no future calls to flex_array_put() * will allocate memory. It can be used if you are expecting to * be holding a lock or in some atomic context while writing * data into the array. * * Locking must be provided by the caller. */ int flex_array_prealloc(struct flex_array *fa, unsigned int start, unsigned int nr_elements, gfp_t flags) { int start_part; int end_part; int part_nr; unsigned int end; struct flex_array_part *part; if (!start && !nr_elements) return 0; if (start >= fa->total_nr_elements) return -ENOSPC; if (!nr_elements) return 0; end = start + nr_elements - 1; if (end >= fa->total_nr_elements) return -ENOSPC; if (!fa->element_size) return 0; if (elements_fit_in_base(fa)) return 0; start_part = fa_element_to_part_nr(fa, start); end_part = fa_element_to_part_nr(fa, end); for (part_nr = start_part; part_nr <= end_part; part_nr++) { part = __fa_get_part(fa, part_nr, flags); if (!part) return -ENOMEM; } return 0; } /** * flex_array_get - pull data back out of the array * @fa: the flex array from which to extract data * @element_nr: index of the element to fetch from the array * * Returns a pointer to the data at index @element_nr. Note * that this is a copy of the data that was passed in. If you * are using this to store pointers, you'll get back &ptr. You * may instead wish to use the flex_array_get_ptr helper. * * Locking must be provided by the caller. */ void *flex_array_get(struct flex_array *fa, unsigned int element_nr) { int part_nr = 0; struct flex_array_part *part; if (!fa->element_size) return NULL; if (element_nr >= fa->total_nr_elements) return NULL; if (elements_fit_in_base(fa)) part = (struct flex_array_part *)&fa->parts[0]; else { part_nr = fa_element_to_part_nr(fa, element_nr); part = fa->parts[part_nr]; if (!part) return NULL; } return &part->elements[index_inside_part(fa, element_nr, part_nr)]; } /** * flex_array_get_ptr - pull a ptr back out of the array * @fa: the flex array from which to extract data * @element_nr: index of the element to fetch from the array * * Returns the pointer placed in the flex array at element_nr using * flex_array_put_ptr(). This function should not be called if the * element in question was not set using the _put_ptr() helper. */ void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr) { void **tmp; tmp = flex_array_get(fa, element_nr); if (!tmp) return NULL; return *tmp; } static int part_is_free(struct flex_array_part *part) { int i; for (i = 0; i < sizeof(struct flex_array_part); i++) if (part->elements[i] != FLEX_ARRAY_FREE) return 0; return 1; } /** * flex_array_shrink - free unused second-level pages * @fa: the flex array to shrink * * Frees all second-level pages that consist solely of unused * elements. Returns the number of pages freed. * * Locking must be provided by the caller. */ int flex_array_shrink(struct flex_array *fa) { struct flex_array_part *part; int part_nr; int ret = 0; if (!fa->total_nr_elements || !fa->element_size) return 0; if (elements_fit_in_base(fa)) return ret; for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) { part = fa->parts[part_nr]; if (!part) continue; if (part_is_free(part)) { fa->parts[part_nr] = NULL; kfree(part); ret++; } } return ret; } #endif /* Linux version < 3.0.0 */ openvswitch-2.0.1+git20140120/datapath/linux/compat/flow_dissector.c000066400000000000000000000121051226605124000250530ustar00rootroot00000000000000/* * Copyright (c) 2007-2013 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA * * This code is derived from kernel flow_dissector.c */ #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) #include #include #include #include #include #include #include #include #include #include #include #include #include /* copy saddr & daddr, possibly using 64bit load/store * Equivalent to : flow->src = iph->saddr; * flow->dst = iph->daddr; */ static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *iph) { BUILD_BUG_ON(offsetof(typeof(*flow), dst) != offsetof(typeof(*flow), src) + sizeof(flow->src)); memcpy(&flow->src, &iph->saddr, sizeof(flow->src) + sizeof(flow->dst)); } static bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) { int poff, nhoff = skb_network_offset(skb); u8 ip_proto; __be16 proto = skb->protocol; memset(flow, 0, sizeof(*flow)); again: switch (proto) { case __constant_htons(ETH_P_IP): { const struct iphdr *iph; struct iphdr _iph; ip: iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); if (!iph) return false; if (ip_is_fragment(iph)) ip_proto = 0; else ip_proto = iph->protocol; iph_to_flow_copy_addrs(flow, iph); nhoff += iph->ihl * 4; break; } case __constant_htons(ETH_P_IPV6): { const struct ipv6hdr *iph; struct ipv6hdr _iph; ipv6: iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); if (!iph) return false; ip_proto = iph->nexthdr; flow->src = (__force __be32)ipv6_addr_hash(&iph->saddr); flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr); nhoff += sizeof(struct ipv6hdr); break; } case __constant_htons(ETH_P_8021Q): { const struct vlan_hdr *vlan; struct vlan_hdr _vlan; vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan); if (!vlan) return false; proto = vlan->h_vlan_encapsulated_proto; nhoff += sizeof(*vlan); goto again; } case __constant_htons(ETH_P_PPP_SES): { struct { struct pppoe_hdr hdr; __be16 proto; } *hdr, _hdr; hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); if (!hdr) return false; proto = hdr->proto; nhoff += PPPOE_SES_HLEN; switch (proto) { case __constant_htons(PPP_IP): goto ip; case __constant_htons(PPP_IPV6): goto ipv6; default: return false; } } default: return false; } switch (ip_proto) { case IPPROTO_GRE: { struct gre_hdr { __be16 flags; __be16 proto; } *hdr, _hdr; hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr); if (!hdr) return false; /* * Only look inside GRE if version zero and no * routing */ if (!(hdr->flags & (GRE_VERSION|GRE_ROUTING))) { proto = hdr->proto; nhoff += 4; if (hdr->flags & GRE_CSUM) nhoff += 4; if (hdr->flags & GRE_KEY) nhoff += 4; if (hdr->flags & GRE_SEQ) nhoff += 4; if (proto == htons(ETH_P_TEB)) { const struct ethhdr *eth; struct ethhdr _eth; eth = skb_header_pointer(skb, nhoff, sizeof(_eth), &_eth); if (!eth) return false; proto = eth->h_proto; nhoff += sizeof(*eth); } goto again; } break; } case IPPROTO_IPIP: goto again; default: break; } flow->ip_proto = ip_proto; poff = proto_ports_offset(ip_proto); if (poff >= 0) { __be32 *ports, _ports; nhoff += poff; ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); if (ports) flow->ports = *ports; } flow->thoff = (u16) nhoff; return true; } static u32 hashrnd __read_mostly; static void init_hashrnd(void) { if (likely(hashrnd)) return; get_random_bytes(&hashrnd, sizeof(hashrnd)); } u32 __skb_get_rxhash(struct sk_buff *skb) { struct flow_keys keys; u32 hash; if (!skb_flow_dissect(skb, &keys)) return 0; /* get a consistent hash (same value on both flow directions) */ if (((__force u32)keys.dst < (__force u32)keys.src) || (((__force u32)keys.dst == (__force u32)keys.src) && ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) { swap(keys.dst, keys.src); swap(keys.port16[0], keys.port16[1]); } init_hashrnd(); hash = jhash_3words((__force u32)keys.dst, (__force u32)keys.src, (__force u32)keys.ports, hashrnd); if (!hash) hash = 1; #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,34) skb->rxhash = hash; #endif return hash; } #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/genetlink-openvswitch.c000066400000000000000000000007001226605124000263520ustar00rootroot00000000000000#include #include /* This is analogous to rtnl_notify() but uses genl_sock instead of rtnl. * * This is not (yet) in any upstream kernel. */ void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, u32 group, struct nlmsghdr *nlh, gfp_t flags) { struct sock *sk = net->genl_sock; int report = 0; if (nlh) report = nlmsg_report(nlh); nlmsg_notify(sk, skb, portid, group, report, flags); } openvswitch-2.0.1+git20140120/datapath/linux/compat/gre.c000066400000000000000000000170351226605124000226110ustar00rootroot00000000000000/* * Copyright (c) 2007-2013 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #include #if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX) #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "gso.h" static struct gre_cisco_protocol __rcu *gre_cisco_proto; static void gre_csum_fix(struct sk_buff *skb) { struct gre_base_hdr *greh; __be32 *options; int gre_offset = skb_transport_offset(skb); greh = (struct gre_base_hdr *)skb_transport_header(skb); options = ((__be32 *)greh + 1); *options = 0; *(__sum16 *)options = csum_fold(skb_checksum(skb, gre_offset, skb->len - gre_offset, 0)); } struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum) { int err; skb_reset_inner_headers(skb); if (skb_is_gso(skb)) { if (gre_csum) OVS_GSO_CB(skb)->fix_segment = gre_csum_fix; } else { if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) { err = skb_checksum_help(skb); if (err) goto error; } else if (skb->ip_summed != CHECKSUM_PARTIAL) skb->ip_summed = CHECKSUM_NONE; } return skb; error: kfree_skb(skb); return ERR_PTR(err); } static bool is_gre_gso(struct sk_buff *skb) { return skb_is_gso(skb); } void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, int hdr_len) { struct gre_base_hdr *greh; __skb_push(skb, hdr_len); greh = (struct gre_base_hdr *)skb->data; greh->flags = tnl_flags_to_gre_flags(tpi->flags); greh->protocol = tpi->proto; if (tpi->flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) { __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); if (tpi->flags & TUNNEL_SEQ) { *ptr = tpi->seq; ptr--; } if (tpi->flags & TUNNEL_KEY) { *ptr = tpi->key; ptr--; } if (tpi->flags & TUNNEL_CSUM && !is_gre_gso(skb)) { *ptr = 0; *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, skb->len, 0)); } } } static __sum16 check_checksum(struct sk_buff *skb) { __sum16 csum = 0; switch (skb->ip_summed) { case CHECKSUM_COMPLETE: csum = csum_fold(skb->csum); if (!csum) break; /* Fall through. */ case CHECKSUM_NONE: skb->csum = 0; csum = __skb_checksum_complete(skb); skb->ip_summed = CHECKSUM_COMPLETE; break; } return csum; } static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err) { unsigned int ip_hlen = ip_hdrlen(skb); struct gre_base_hdr *greh; __be32 *options; int hdr_len; if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) return -EINVAL; greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; tpi->flags = gre_flags_to_tnl_flags(greh->flags); hdr_len = ip_gre_calc_hlen(tpi->flags); if (!pskb_may_pull(skb, hdr_len)) return -EINVAL; greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); tpi->proto = greh->protocol; options = (__be32 *)(greh + 1); if (greh->flags & GRE_CSUM) { if (check_checksum(skb)) { *csum_err = true; return -EINVAL; } options++; } if (greh->flags & GRE_KEY) { tpi->key = *options; options++; } else tpi->key = 0; if (unlikely(greh->flags & GRE_SEQ)) { tpi->seq = *options; options++; } else tpi->seq = 0; /* WCCP version 1 and 2 protocol decoding. * - Change protocol to IP * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { tpi->proto = htons(ETH_P_IP); if ((*(u8 *)options & 0xF0) != 0x40) { hdr_len += 4; if (!pskb_may_pull(skb, hdr_len)) return -EINVAL; } } return iptunnel_pull_header(skb, hdr_len, tpi->proto); } static int gre_cisco_rcv(struct sk_buff *skb) { struct tnl_ptk_info tpi; bool csum_err = false; struct gre_cisco_protocol *proto; rcu_read_lock(); proto = rcu_dereference(gre_cisco_proto); if (!proto) goto drop; if (parse_gre_header(skb, &tpi, &csum_err) < 0) goto drop; proto->handler(skb, &tpi); rcu_read_unlock(); return 0; drop: rcu_read_unlock(); kfree_skb(skb); return 0; } static const struct gre_protocol ipgre_protocol = { .handler = gre_cisco_rcv, }; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; int gre_add_protocol(const struct gre_protocol *proto, u8 version) { if (version >= GREPROTO_MAX) return -EINVAL; return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ? 0 : -EBUSY; } int gre_del_protocol(const struct gre_protocol *proto, u8 version) { int ret; if (version >= GREPROTO_MAX) return -EINVAL; ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ? 0 : -EBUSY; if (ret) return ret; synchronize_net(); return 0; } static int gre_rcv(struct sk_buff *skb) { const struct gre_protocol *proto; u8 ver; int ret; if (!pskb_may_pull(skb, 12)) goto drop; ver = skb->data[1] & 0x7f; if (ver >= GREPROTO_MAX) goto drop; rcu_read_lock(); proto = rcu_dereference(gre_proto[ver]); if (!proto || !proto->handler) goto drop_unlock; ret = proto->handler(skb); rcu_read_unlock(); return ret; drop_unlock: rcu_read_unlock(); drop: kfree_skb(skb); return NET_RX_DROP; } static const struct net_protocol net_gre_protocol = { .handler = gre_rcv, #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32) .netns_ok = 1, #endif }; #endif static int gre_compat_init(void) { int err; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { pr_err("%s: cannot register gre protocol handler\n", __func__); return -EAGAIN; } #endif err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); if (err) { pr_warn("%s: cannot register gre_cisco protocol handler\n", __func__); #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); #endif } return err; } static int gre_compat_exit(void) { int ret; ret = gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); if (ret) return ret; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) ret = inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); if (ret) return ret; #endif return 0; } int gre_cisco_register(struct gre_cisco_protocol *newp) { int err; err = gre_compat_init(); if (err) return err; return (cmpxchg((struct gre_cisco_protocol **)&gre_cisco_proto, NULL, newp) == NULL) ? 0 : -EBUSY; } int gre_cisco_unregister(struct gre_cisco_protocol *proto) { int ret; ret = (cmpxchg((struct gre_cisco_protocol **)&gre_cisco_proto, proto, NULL) == proto) ? 0 : -EINVAL; if (ret) return ret; synchronize_net(); ret = gre_compat_exit(); return ret; } #endif /* CONFIG_NET_IPGRE_DEMUX */ openvswitch-2.0.1+git20140120/datapath/linux/compat/gso.c000066400000000000000000000071341226605124000226230ustar00rootroot00000000000000/* * Copyright (c) 2007-2013 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "gso.h" static __be16 __skb_network_protocol(struct sk_buff *skb) { __be16 type = skb->protocol; int vlan_depth = ETH_HLEN; while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { struct vlan_hdr *vh; if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) return 0; vh = (struct vlan_hdr *)(skb->data + vlan_depth); type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; } return type; } static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path) { struct iphdr *iph = ip_hdr(skb); int pkt_hlen = skb_inner_network_offset(skb); /* inner l2 + tunnel hdr. */ int mac_offset = skb_inner_mac_offset(skb); struct sk_buff *skb1 = skb; struct sk_buff *segs; __be16 proto = skb->protocol; char cb[sizeof(skb->cb)]; /* setup whole inner packet to get protocol. */ __skb_pull(skb, mac_offset); skb->protocol = __skb_network_protocol(skb); /* setup l3 packet to gso, to get around segmentation bug on older kernel.*/ __skb_pull(skb, (pkt_hlen - mac_offset)); skb_reset_mac_header(skb); skb_reset_network_header(skb); skb_reset_transport_header(skb); /* From 3.9 kernel skb->cb is used by skb gso. Therefore * make copy of it to restore it back. */ memcpy(cb, skb->cb, sizeof(cb)); segs = __skb_gso_segment(skb, 0, tx_path); if (!segs || IS_ERR(segs)) goto free; skb = segs; while (skb) { __skb_push(skb, pkt_hlen); skb_reset_mac_header(skb); skb_reset_network_header(skb); skb_set_transport_header(skb, sizeof(struct iphdr)); skb->mac_len = 0; memcpy(ip_hdr(skb), iph, pkt_hlen); memcpy(skb->cb, cb, sizeof(cb)); if (OVS_GSO_CB(skb)->fix_segment) OVS_GSO_CB(skb)->fix_segment(skb); skb->protocol = proto; skb = skb->next; } free: consume_skb(skb1); return segs; } int rpl_ip_local_out(struct sk_buff *skb) { int ret = NETDEV_TX_OK; int id = -1; if (skb_is_gso(skb)) { struct iphdr *iph; iph = ip_hdr(skb); id = ntohs(iph->id); skb = tnl_skb_gso_segment(skb, 0, false); if (!skb || IS_ERR(skb)) return 0; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { int err; err = skb_checksum_help(skb); if (unlikely(err)) return 0; } while (skb) { struct sk_buff *next_skb = skb->next; struct iphdr *iph; int err; skb->next = NULL; iph = ip_hdr(skb); if (id >= 0) iph->id = htons(id++); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); #undef ip_local_out err = ip_local_out(skb); if (unlikely(net_xmit_eval(err))) ret = err; skb = next_skb; } return ret; } openvswitch-2.0.1+git20140120/datapath/linux/compat/gso.h000066400000000000000000000037761226605124000226400ustar00rootroot00000000000000#ifndef __LINUX_GSO_WRAPPER_H #define __LINUX_GSO_WRAPPER_H #include #include #include "datapath.h" struct ovs_gso_cb { struct ovs_skb_cb dp_cb; sk_buff_data_t inner_network_header; sk_buff_data_t inner_mac_header; void (*fix_segment)(struct sk_buff *); }; #define OVS_GSO_CB(skb) ((struct ovs_gso_cb *)(skb)->cb) #define skb_inner_network_header rpl_skb_inner_network_header #ifdef NET_SKBUFF_DATA_USES_OFFSET #define skb_inner_network_header rpl_skb_inner_network_header static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb) { return skb->head + OVS_GSO_CB(skb)->inner_network_header; } #define skb_inner_mac_header rpl_skb_inner_mac_header static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) { return skb->head + OVS_GSO_CB(skb)->inner_mac_header; } #else #define skb_inner_network_header rpl_skb_inner_network_header static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb) { return OVS_GSO_CB(skb)->inner_network_header; } #define skb_inner_mac_header rpl_skb_inner_mac_header static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) { return OVS_GSO_CB(skb)->inner_mac_header; } #endif #define skb_inner_network_offset rpl_skb_inner_network_offset static inline int skb_inner_network_offset(const struct sk_buff *skb) { return skb_inner_network_header(skb) - skb->data; } #define skb_inner_mac_offset rpl_skb_inner_mac_offset static inline int skb_inner_mac_offset(const struct sk_buff *skb) { return skb_inner_mac_header(skb) - skb->data; } #define skb_reset_inner_headers rpl_skb_reset_inner_headers static inline void skb_reset_inner_headers(struct sk_buff *skb) { BUILD_BUG_ON(sizeof(struct ovs_gso_cb) > FIELD_SIZEOF(struct sk_buff, cb)); OVS_GSO_CB(skb)->inner_network_header = skb->network_header; OVS_GSO_CB(skb)->inner_mac_header = skb->mac_header; OVS_GSO_CB(skb)->fix_segment = NULL; } #define ip_local_out rpl_ip_local_out int ip_local_out(struct sk_buff *skb); #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/000077500000000000000000000000001226605124000233055ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/datapath/linux/compat/include/asm/000077500000000000000000000000001226605124000240655ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/datapath/linux/compat/include/asm/percpu.h000066400000000000000000000003451226605124000255360ustar00rootroot00000000000000#ifndef __ASM_PERCPU_WRAPPER_H #define __ASM_PERCPU_WRAPPER_H 1 #include_next #if !defined this_cpu_ptr && !defined HAVE_THIS_CPU_PTR #define this_cpu_ptr(ptr) per_cpu_ptr(ptr, smp_processor_id()) #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/000077500000000000000000000000001226605124000244445ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/compiler-gcc.h000066400000000000000000000003551226605124000271640ustar00rootroot00000000000000#ifndef __LINUX_COMPILER_H #error "Please don't include directly, include instead." #endif #include_next #ifndef __packed #define __packed __attribute__((packed)) #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/compiler.h000066400000000000000000000003001226605124000264200ustar00rootroot00000000000000#ifndef __LINUX_COMPILER_WRAPPER_H #define __LINUX_COMPILER_WRAPPER_H 1 #include_next #ifndef __percpu #define __percpu #endif #ifndef __rcu #define __rcu #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/cpumask.h000066400000000000000000000004341226605124000262610ustar00rootroot00000000000000#ifndef __LINUX_CPUMASK_WRAPPER_H #define __LINUX_CPUMASK_WRAPPER_H #include_next /* for_each_cpu was renamed for_each_possible_cpu in 2.6.18. */ #ifndef for_each_possible_cpu #define for_each_possible_cpu for_each_cpu #endif #endif /* linux/cpumask.h wrapper */ openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/err.h000066400000000000000000000007571226605124000254160ustar00rootroot00000000000000#ifndef __LINUX_ERR_WRAPPER_H #define __LINUX_ERR_WRAPPER_H 1 #include_next #ifndef HAVE_ERR_CAST /** * ERR_CAST - Explicitly cast an error-valued pointer to another pointer type * @ptr: The pointer to cast. * * Explicitly cast an error-valued pointer to another pointer type in such a * way as to make it clear that's what's going on. */ static inline void *ERR_CAST(const void *ptr) { /* cast away the const */ return (void *) ptr; } #endif /* HAVE_ERR_CAST */ #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/etherdevice.h000066400000000000000000000016021226605124000271030ustar00rootroot00000000000000#ifndef __LINUX_ETHERDEVICE_WRAPPER_H #define __LINUX_ETHERDEVICE_WRAPPER_H 1 #include #include_next #ifndef HAVE_ETH_HW_ADDR_RANDOM #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) static inline void eth_hw_addr_random(struct net_device *dev) { random_ether_addr(dev->dev_addr); } #elif LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) static inline void eth_hw_addr_random(struct net_device *dev) { dev_hw_addr_random(dev, dev->dev_addr); } #endif #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3,6,0) #define eth_mac_addr rpl_eth_mac_addr static inline int eth_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr = p; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; #ifdef NET_ADDR_RANDOM dev->addr_assign_type &= ~NET_ADDR_RANDOM; #endif memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); return 0; } #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/flex_array.h000066400000000000000000000051041226605124000267510ustar00rootroot00000000000000#ifndef __LINUX_FLEX_ARRAY_WRAPPER_H #define __LINUX_FLEX_ARRAY_WRAPPER_H #include #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,0) #include_next #else #include #include #define FLEX_ARRAY_PART_SIZE PAGE_SIZE #define FLEX_ARRAY_BASE_SIZE PAGE_SIZE struct flex_array_part; /* * This is meant to replace cases where an array-like * structure has gotten too big to fit into kmalloc() * and the developer is getting tempted to use * vmalloc(). */ struct flex_array { union { struct { int element_size; int total_nr_elements; int elems_per_part; u32 reciprocal_elems; struct flex_array_part *parts[]; }; /* * This little trick makes sure that * sizeof(flex_array) == PAGE_SIZE */ char padding[FLEX_ARRAY_BASE_SIZE]; }; }; /* Number of bytes left in base struct flex_array, excluding metadata */ #define FLEX_ARRAY_BASE_BYTES_LEFT \ (FLEX_ARRAY_BASE_SIZE - offsetof(struct flex_array, parts)) /* Number of pointers in base to struct flex_array_part pages */ #define FLEX_ARRAY_NR_BASE_PTRS \ (FLEX_ARRAY_BASE_BYTES_LEFT / sizeof(struct flex_array_part *)) /* Number of elements of size that fit in struct flex_array_part */ #define FLEX_ARRAY_ELEMENTS_PER_PART(size) \ (FLEX_ARRAY_PART_SIZE / size) /* * Defines a statically allocated flex array and ensures its parameters are * valid. */ #define DEFINE_FLEX_ARRAY(__arrayname, __element_size, __total) \ struct flex_array __arrayname = { { { \ .element_size = (__element_size), \ .total_nr_elements = (__total), \ } } }; \ static inline void __arrayname##_invalid_parameter(void) \ { \ BUILD_BUG_ON((__total) > FLEX_ARRAY_NR_BASE_PTRS * \ FLEX_ARRAY_ELEMENTS_PER_PART(__element_size)); \ } struct flex_array *flex_array_alloc(int element_size, unsigned int total, gfp_t flags); int flex_array_prealloc(struct flex_array *fa, unsigned int start, unsigned int nr_elements, gfp_t flags); void flex_array_free(struct flex_array *fa); void flex_array_free_parts(struct flex_array *fa); int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, gfp_t flags); int flex_array_clear(struct flex_array *fa, unsigned int element_nr); void *flex_array_get(struct flex_array *fa, unsigned int element_nr); int flex_array_shrink(struct flex_array *fa); #define flex_array_put_ptr(fa, nr, src, gfp) \ flex_array_put(fa, nr, (void *)&(src), gfp) void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr); #endif /* Linux version < 3.0.0 */ #endif /* __LINUX_FLEX_ARRAY_WRAPPER_H */ openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/icmp.h000066400000000000000000000004141226605124000255440ustar00rootroot00000000000000#ifndef __LINUX_ICMP_WRAPPER_H #define __LINUX_ICMP_WRAPPER_H 1 #include_next #ifndef HAVE_SKBUFF_HEADER_HELPERS static inline struct icmphdr *icmp_hdr(const struct sk_buff *skb) { return (struct icmphdr *)skb_transport_header(skb); } #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/icmpv6.h000066400000000000000000000004111226605124000260150ustar00rootroot00000000000000#ifndef __LINUX_ICMPV6_WRAPPER_H #define __LINUX_ICMPV6_WRAPPER_H 1 #include_next #ifndef HAVE_ICMP6_HDR static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb) { return (struct icmp6hdr *)skb_transport_header(skb); } #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/if.h000066400000000000000000000004401226605124000252110ustar00rootroot00000000000000#ifndef __LINUX_IF_WRAPPER_H #define __LINUX_IF_WRAPPER_H 1 #include_next #ifndef IFF_TX_SKB_SHARING #define IFF_TX_SKB_SHARING 0 #endif #ifndef IFF_OVS_DATAPATH #define IFF_OVS_DATAPATH 0 #endif #ifndef IFF_LIVE_ADDR_CHANGE #define IFF_LIVE_ADDR_CHANGE 0 #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/if_arp.h000066400000000000000000000005121226605124000260530ustar00rootroot00000000000000#ifndef __LINUX_IF_ARP_WRAPPER_H #define __LINUX_IF_ARP_WRAPPER_H 1 #include_next #ifndef HAVE_SKBUFF_HEADER_HELPERS #include static inline struct arphdr *arp_hdr(const struct sk_buff *skb) { return (struct arphdr *)skb_network_header(skb); } #endif /* !HAVE_SKBUFF_HEADER_HELPERS */ #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/if_ether.h000066400000000000000000000004401226605124000264000ustar00rootroot00000000000000#ifndef __LINUX_IF_ETHER_WRAPPER_H #define __LINUX_IF_ETHER_WRAPPER_H 1 #include_next #ifndef ETH_P_802_3_MIN #define ETH_P_802_3_MIN 0x0600 #endif #ifndef ETH_P_8021AD #define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */ #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/if_tunnel.h000066400000000000000000000006011226605124000265750ustar00rootroot00000000000000#ifndef _IF_TUNNEL_WRAPPER_H_ #define _IF_TUNNEL_WRAPPER_H_ #include #include_next #if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0) #include struct pcpu_tstats { u64 rx_packets; u64 rx_bytes; u64 tx_packets; u64 tx_bytes; struct u64_stats_sync syncp; }; #endif #endif /* _IF_TUNNEL_WRAPPER_H_ */ openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/if_vlan.h000066400000000000000000000054761226605124000262470ustar00rootroot00000000000000#ifndef __LINUX_IF_VLAN_WRAPPER_H #define __LINUX_IF_VLAN_WRAPPER_H 1 #include #include #include_next #if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) /* * The behavior of __vlan_put_tag() has changed over time: * * - In 2.6.26 and earlier, it adjusted both MAC and network header * pointers. (The latter didn't make any sense.) * * - In 2.6.27 and 2.6.28, it did not adjust any header pointers at all. * * - In 2.6.29 and later, it adjusts the MAC header pointer only. * * This is the version from 2.6.33. We unconditionally substitute this version * to avoid the need to guess whether the version in the kernel tree is * acceptable. */ #define __vlan_put_tag(skb, proto, tag) rpl__vlan_put_tag(skb, tag) static inline struct sk_buff *rpl__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) { struct vlan_ethhdr *veth; if (skb_cow_head(skb, VLAN_HLEN) < 0) { kfree_skb(skb); return NULL; } veth = (struct vlan_ethhdr *)skb_push(skb, VLAN_HLEN); /* Move the mac addresses to the beginning of the new header. */ memmove(skb->data, skb->data + VLAN_HLEN, 2 * ETH_ALEN); skb->mac_header -= VLAN_HLEN; /* first, the ethernet type */ veth->h_vlan_proto = htons(ETH_P_8021Q); /* now, the TCI */ veth->h_vlan_TCI = htons(vlan_tci); skb->protocol = htons(ETH_P_8021Q); return skb; } static inline struct sk_buff *rpl___vlan_hwaccel_put_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { return __vlan_hwaccel_put_tag(skb, vlan_tci); } #define __vlan_hwaccel_put_tag rpl___vlan_hwaccel_put_tag #endif /* All of these were introduced in a single commit preceding 2.6.33, so * presumably all of them or none of them are present. */ #ifndef VLAN_PRIO_MASK #define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ #define VLAN_PRIO_SHIFT 13 #define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */ #define VLAN_TAG_PRESENT VLAN_CFI_MASK #endif #ifndef HAVE_VLAN_SET_ENCAP_PROTO static inline void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr) { __be16 proto; unsigned char *rawp; /* * Was a VLAN packet, grab the encapsulated protocol, which the layer * three protocols care about. */ proto = vhdr->h_vlan_encapsulated_proto; if (ntohs(proto) >= 1536) { skb->protocol = proto; return; } rawp = skb->data; if (*(unsigned short *) rawp == 0xFFFF) /* * This is a magic hack to spot IPX packets. Older Novell * breaks the protocol design and runs IPX over 802.3 without * an 802.2 LLC layer. We look for FFFF which isn't a used * 802.2 SSAP/DSAP. This won't work for fault tolerant netware * but does for the rest. */ skb->protocol = htons(ETH_P_802_3); else /* * Real 802.2 LLC */ skb->protocol = htons(ETH_P_802_2); } #endif #endif /* linux/if_vlan.h wrapper */ openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/in.h000066400000000000000000000021141226605124000252210ustar00rootroot00000000000000#ifndef __LINUX_IN_WRAPPER_H #define __LINUX_IN_WRAPPER_H 1 #include_next #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) static inline int proto_ports_offset(int proto) { switch (proto) { case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_DCCP: case IPPROTO_ESP: /* SPI */ case IPPROTO_SCTP: case IPPROTO_UDPLITE: return 0; case IPPROTO_AH: /* SPI */ return 4; default: return -EINVAL; } } #endif #ifndef HAVE_IPV4_IS_MULTICAST static inline bool ipv4_is_loopback(__be32 addr) { return (addr & htonl(0xff000000)) == htonl(0x7f000000); } static inline bool ipv4_is_multicast(__be32 addr) { return (addr & htonl(0xf0000000)) == htonl(0xe0000000); } static inline bool ipv4_is_local_multicast(__be32 addr) { return (addr & htonl(0xffffff00)) == htonl(0xe0000000); } static inline bool ipv4_is_lbcast(__be32 addr) { /* limited broadcast */ return addr == htonl(INADDR_BROADCAST); } static inline bool ipv4_is_zeronet(__be32 addr) { return (addr & htonl(0xff000000)) == htonl(0x00000000); } #endif /* !HAVE_IPV4_IS_MULTICAST */ #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/ip.h000066400000000000000000000006351226605124000252310ustar00rootroot00000000000000#ifndef __LINUX_IP_WRAPPER_H #define __LINUX_IP_WRAPPER_H 1 #include_next #ifndef HAVE_SKBUFF_HEADER_HELPERS #include static inline struct iphdr *ip_hdr(const struct sk_buff *skb) { return (struct iphdr *)skb_network_header(skb); } static inline unsigned int ip_hdrlen(const struct sk_buff *skb) { return ip_hdr(skb)->ihl * 4; } #endif /* !HAVE_SKBUFF_HEADER_HELPERS */ #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/ipv6.h000066400000000000000000000004121226605124000254760ustar00rootroot00000000000000#ifndef __LINUX_IPV6_WRAPPER_H #define __LINUX_IPV6_WRAPPER_H 1 #include_next #ifndef HAVE_SKBUFF_HEADER_HELPERS static inline struct ipv6hdr *ipv6_hdr(const struct sk_buff *skb) { return (struct ipv6hdr *)skb_network_header(skb); } #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/jiffies.h000066400000000000000000000014061226605124000262350ustar00rootroot00000000000000#ifndef __LINUX_JIFFIES_WRAPPER_H #define __LINUX_JIFFIES_WRAPPER_H 1 #include_next #include /* Same as above, but does so with platform independent 64bit types. * These must be used when utilizing jiffies_64 (i.e. return value of * get_jiffies_64() */ #ifndef time_after64 #define time_after64(a, b) \ (typecheck(__u64, a) && \ typecheck(__u64, b) && \ ((__s64)(b) - (__s64)(a) < 0)) #endif #ifndef time_before64 #define time_before64(a, b) time_after64(b, a) #endif #ifndef time_after_eq64 #define time_after_eq64(a, b) \ (typecheck(__u64, a) && \ typecheck(__u64, b) && \ ((__s64)(a) - (__s64)(b) >= 0)) #endif #ifndef time_before_eq64 #define time_before_eq64(a, b) time_after_eq64(b, a) #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/kconfig.h000066400000000000000000000033611226605124000262400ustar00rootroot00000000000000#ifndef __LINUX_KCONFIG_WRAPPER_H #define __LINUX_KCONFIG_WRAPPER_H #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) #define CONFIG_NET_IPGRE_DEMUX 1 #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0) #include_next #endif #ifndef IS_ENABLED /* * Helper macros to use CONFIG_ options in C/CPP expressions. Note that * these only work with boolean and tristate options. */ /* * Getting something that works in C and CPP for an arg that may or may * not be defined is tricky. Here, if we have "#define CONFIG_BOOGER 1" * we match on the placeholder define, insert the "0," for arg1 and generate * the triplet (0, 1, 0). Then the last step cherry picks the 2nd arg (a one). * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when * the last step cherry picks the 2nd arg, we get a zero. */ #define __ARG_PLACEHOLDER_1 0, #define config_enabled(cfg) _config_enabled(cfg) #define _config_enabled(value) __config_enabled(__ARG_PLACEHOLDER_##value) #define __config_enabled(arg1_or_junk) ___config_enabled(arg1_or_junk 1, 0) #define ___config_enabled(__ignored, val, ...) val /* * IS_ENABLED(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y' or 'm', * 0 otherwise. * */ #define IS_ENABLED(option) \ (config_enabled(option) || config_enabled(option##_MODULE)) /* * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0 * otherwise. For boolean options, this is equivalent to * IS_ENABLED(CONFIG_FOO). */ #define IS_BUILTIN(option) config_enabled(option) /* * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0 * otherwise. */ #define IS_MODULE(option) config_enabled(option##_MODULE) #endif /* IS_ENABLED */ #endif /* __LINUX_KCONFIG_WRAPER_H */ openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/kernel.h000066400000000000000000000032001226605124000260700ustar00rootroot00000000000000#ifndef __KERNEL_H_WRAPPER #define __KERNEL_H_WRAPPER 1 #include_next #ifndef HAVE_LOG2_H #include #endif #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) #define pr_warn pr_warning #endif /* * Print a one-time message (analogous to WARN_ONCE() et al): */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 38) #undef printk_once #define printk_once(fmt, ...) \ ({ \ static bool __print_once; \ \ if (!__print_once) { \ __print_once = true; \ printk(fmt, ##__VA_ARGS__); \ } \ }) #define pr_emerg_once(fmt, ...) \ printk_once(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) #define pr_alert_once(fmt, ...) \ printk_once(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) #define pr_crit_once(fmt, ...) \ printk_once(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) #define pr_err_once(fmt, ...) \ printk_once(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) #define pr_warn_once(fmt, ...) \ printk_once(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) #define pr_notice_once(fmt, ...) \ printk_once(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) #define pr_info_once(fmt, ...) \ printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) #define pr_cont_once(fmt, ...) \ printk_once(KERN_CONT pr_fmt(fmt), ##__VA_ARGS__) #endif #ifndef USHRT_MAX #define USHRT_MAX ((u16)(~0U)) #define SHRT_MAX ((s16)(USHRT_MAX>>1)) #define SHRT_MIN ((s16)(-SHRT_MAX - 1)) #endif #ifndef DIV_ROUND_UP #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) #endif #ifndef rounddown #define rounddown(x, y) ( \ { \ typeof(x) __x = (x); \ __x - (__x % (y)); \ } \ ) #endif #endif /* linux/kernel.h */ openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/list.h000066400000000000000000000014261226605124000255730ustar00rootroot00000000000000#ifndef __LINUX_LIST_WRAPPER_H #define __LINUX_LIST_WRAPPER_H 1 #include_next #ifndef hlist_entry_safe #define hlist_entry_safe(ptr, type, member) \ ({ typeof(ptr) ____ptr = (ptr); \ ____ptr ? hlist_entry(____ptr, type, member) : NULL; \ }) #undef hlist_for_each_entry #define hlist_for_each_entry(pos, head, member) \ for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member);\ pos; \ pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) #undef hlist_for_each_entry_safe #define hlist_for_each_entry_safe(pos, n, head, member) \ for (pos = hlist_entry_safe((head)->first, typeof(*pos), member);\ pos && ({ n = pos->member.next; 1; }); \ pos = hlist_entry_safe(n, typeof(*pos), member)) #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/log2.h000066400000000000000000000005371226605124000254650ustar00rootroot00000000000000#ifndef __LINUX_LOG2_WRAPPER #define __LINUX_LOG2_WRAPPER #ifdef HAVE_LOG2_H #include_next #else /* This is very stripped down because log2.h has far too many dependencies. */ extern __attribute__((const, noreturn)) int ____ilog2_NaN(void); #define ilog2(n) ((n) == 4 ? 2 : \ (n) == 8 ? 3 : \ ____ilog2_NaN()) #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/net.h000066400000000000000000000020621226605124000254030ustar00rootroot00000000000000#ifndef __LINUX_NET_WRAPPER_H #define __LINUX_NET_WRAPPER_H 1 #include_next #ifndef net_ratelimited_function #define net_ratelimited_function(function, ...) \ do { \ if (net_ratelimit()) \ function(__VA_ARGS__); \ } while (0) #define net_emerg_ratelimited(fmt, ...) \ net_ratelimited_function(pr_emerg, fmt, ##__VA_ARGS__) #define net_alert_ratelimited(fmt, ...) \ net_ratelimited_function(pr_alert, fmt, ##__VA_ARGS__) #define net_crit_ratelimited(fmt, ...) \ net_ratelimited_function(pr_crit, fmt, ##__VA_ARGS__) #define net_err_ratelimited(fmt, ...) \ net_ratelimited_function(pr_err, fmt, ##__VA_ARGS__) #define net_notice_ratelimited(fmt, ...) \ net_ratelimited_function(pr_notice, fmt, ##__VA_ARGS__) #define net_warn_ratelimited(fmt, ...) \ net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__) #define net_info_ratelimited(fmt, ...) \ net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__) #define net_dbg_ratelimited(fmt, ...) \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/netdev_features.h000066400000000000000000000004471226605124000300050ustar00rootroot00000000000000#ifndef __LINUX_NETDEV_FEATURES_WRAPPER_H #define __LINUX_NETDEV_FEATURES_WRAPPER_H #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0) #include_next #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) #define NETIF_F_HW_VLAN_CTAG_TX NETIF_F_HW_VLAN_TX #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/netdevice.h000066400000000000000000000063021226605124000265640ustar00rootroot00000000000000#ifndef __LINUX_NETDEVICE_WRAPPER_H #define __LINUX_NETDEVICE_WRAPPER_H 1 #include_next struct net; #include #ifndef to_net_dev #define to_net_dev(class) container_of(class, struct net_device, NETDEV_DEV_MEMBER) #endif #ifdef HAVE_RHEL_OVS_HOOK extern struct sk_buff *(*openvswitch_handle_frame_hook)(struct sk_buff *skb); extern int nr_bridges; #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) extern void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); extern void unregister_netdevice_many(struct list_head *head); #endif #ifndef HAVE_DEV_DISABLE_LRO extern void dev_disable_lro(struct net_device *dev); #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) || \ defined HAVE_RHEL_OVS_HOOK static inline int netdev_rx_handler_register(struct net_device *dev, void *rx_handler, void *rx_handler_data) { #ifdef HAVE_RHEL_OVS_HOOK rcu_assign_pointer(dev->ax25_ptr, rx_handler_data); nr_bridges++; rcu_assign_pointer(openvswitch_handle_frame_hook, rx_handler); #else if (dev->br_port) return -EBUSY; rcu_assign_pointer(dev->br_port, rx_handler_data); #endif return 0; } static inline void netdev_rx_handler_unregister(struct net_device *dev) { #ifdef HAVE_RHEL_OVS_HOOK rcu_assign_pointer(dev->ax25_ptr, NULL); if (--nr_bridges <= 0) rcu_assign_pointer(openvswitch_handle_frame_hook, NULL); #else rcu_assign_pointer(dev->br_port, NULL); #endif } #endif #ifndef HAVE_DEV_GET_BY_INDEX_RCU static inline struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex) { struct net_device *dev; read_lock(&dev_base_lock); dev = __dev_get_by_index(net, ifindex); read_unlock(&dev_base_lock); return dev; } #endif #ifndef NETIF_F_FSO #define NETIF_F_FSO 0 #endif #ifndef HAVE_NETDEV_FEATURES_T typedef u32 netdev_features_t; #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) #define skb_gso_segment rpl_skb_gso_segment struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb, netdev_features_t features); #define netif_skb_features rpl_netif_skb_features netdev_features_t rpl_netif_skb_features(struct sk_buff *skb); #define netif_needs_gso rpl_netif_needs_gso static inline int rpl_netif_needs_gso(struct sk_buff *skb, int features) { return skb_is_gso(skb) && (!skb_gso_ok(skb, features) || unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); } #endif #ifndef HAVE___SKB_GSO_SEGMENT static inline struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path) { return skb_gso_segment(skb, features); } #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) /* XEN dom0 networking assumes dev->master is bond device * and it tries to access bond private structure from dev->master * ptr on receive path. This causes panic. Therefore it is better * not to backport this API. **/ static inline int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev) { return 0; } static inline void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { } static inline struct net_device *netdev_master_upper_dev_get(struct net_device *dev) { return NULL; } #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/netlink.h000066400000000000000000000006441226605124000262650ustar00rootroot00000000000000#ifndef __LINUX_NETLINK_WRAPPER_H #define __LINUX_NETLINK_WRAPPER_H 1 #include #include_next #ifndef NLA_TYPE_MASK #define NLA_F_NESTED (1 << 15) #define NLA_F_NET_BYTEORDER (1 << 14) #define NLA_TYPE_MASK (~(NLA_F_NESTED | NLA_F_NET_BYTEORDER)) #endif #include #ifndef NLMSG_DEFAULT_SIZE #define NLMSG_DEFAULT_SIZE (NLMSG_GOODSIZE - NLMSG_HDRLEN) #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/poison.h000066400000000000000000000003701226605124000261240ustar00rootroot00000000000000#ifndef __LINUX_POISON_WRAPPER_H #define __LINUX_POISON_WRAPPER_H 1 #include_next #ifndef FLEX_ARRAY_FREE /********** lib/flex_array.c **********/ #define FLEX_ARRAY_FREE 0x6c /* for use-after-free poisoning */ #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/rculist.h000066400000000000000000000013001226605124000262740ustar00rootroot00000000000000#ifndef __LINUX_RCULIST_WRAPPER_H #define __LINUX_RCULIST_WRAPPER_H #include_next #ifndef hlist_first_rcu #define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first))) #define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next))) #define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev))) #endif #undef hlist_for_each_entry_rcu #define hlist_for_each_entry_rcu(pos, head, member) \ for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\ typeof(*(pos)), member); \ pos; \ pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\ &(pos)->member)), typeof(*(pos)), member)) #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/rcupdate.h000066400000000000000000000007711226605124000264310ustar00rootroot00000000000000#ifndef __RCUPDATE_WRAPPER_H #define __RCUPDATE_WRAPPER_H 1 #include_next #ifndef rcu_dereference_check #define rcu_dereference_check(p, c) rcu_dereference(p) #endif #ifndef rcu_dereference_protected #define rcu_dereference_protected(p, c) (p) #endif #ifndef rcu_dereference_raw #define rcu_dereference_raw(p) rcu_dereference_check(p, 1) #endif #ifndef HAVE_RCU_READ_LOCK_HELD static inline int rcu_read_lock_held(void) { return 1; } #endif #endif /* linux/rcupdate.h wrapper */ openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/rtnetlink.h000066400000000000000000000021571226605124000266340ustar00rootroot00000000000000#ifndef __RTNETLINK_WRAPPER_H #define __RTNETLINK_WRAPPER_H 1 #include_next #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) #ifdef CONFIG_PROVE_LOCKING static inline int lockdep_rtnl_is_held(void) { return 1; } #endif #endif #ifndef rcu_dereference_rtnl /** * rcu_dereference_rtnl - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing * * Do an rcu_dereference(p), but check caller either holds rcu_read_lock() * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference() */ #define rcu_dereference_rtnl(p) \ rcu_dereference_check(p, rcu_read_lock_held() || \ lockdep_rtnl_is_held()) #endif #ifndef rtnl_dereference /** * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL * @p: The pointer to read, prior to dereferencing * * Return the value of the specified RCU-protected pointer, but omit * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because * caller holds RTNL. */ #define rtnl_dereference(p) \ rcu_dereference_protected(p, lockdep_rtnl_is_held()) #endif #endif /* linux/rtnetlink.h wrapper */ openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/sctp.h000066400000000000000000000004551226605124000255720ustar00rootroot00000000000000#ifndef __LINUX_SCTP_WRAPPER_H #define __LINUX_SCTP_WRAPPER_H 1 #include_next #ifndef HAVE_SKBUFF_HEADER_HELPERS static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb) { return (struct sctphdr *)skb_transport_header(skb); } #endif /* HAVE_SKBUFF_HEADER_HELPERS */ #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/skbuff.h000066400000000000000000000127241226605124000261030ustar00rootroot00000000000000#ifndef __LINUX_SKBUFF_WRAPPER_H #define __LINUX_SKBUFF_WRAPPER_H 1 #include_next #include #ifndef HAVE_SKB_COPY_FROM_LINEAR_DATA_OFFSET static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb, const int offset, void *to, const unsigned int len) { memcpy(to, skb->data + offset, len); } static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb, const int offset, const void *from, const unsigned int len) { memcpy(skb->data + offset, from, len); } #endif /* !HAVE_SKB_COPY_FROM_LINEAR_DATA_OFFSET */ #ifndef HAVE_SKB_RESET_TAIL_POINTER static inline void skb_reset_tail_pointer(struct sk_buff *skb) { skb->tail = skb->data; } #endif /* * The networking layer reserves some headroom in skb data (via * dev_alloc_skb). This is used to avoid having to reallocate skb data when * the header has to grow. In the default case, if the header has to grow * 16 bytes or less we avoid the reallocation. * * Unfortunately this headroom changes the DMA alignment of the resulting * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive * on some architectures. An architecture can override this value, * perhaps setting it to a cacheline in size (since that will maintain * cacheline alignment of the DMA). It must be a power of 2. * * Various parts of the networking layer expect at least 16 bytes of * headroom, you should not reduce this. */ #ifndef NET_SKB_PAD #define NET_SKB_PAD 16 #endif #ifndef HAVE_SKB_COW_HEAD static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom, int cloned) { int delta = 0; if (headroom < NET_SKB_PAD) headroom = NET_SKB_PAD; if (headroom > skb_headroom(skb)) delta = headroom - skb_headroom(skb); if (delta || cloned) return pskb_expand_head(skb, ALIGN(delta, NET_SKB_PAD), 0, GFP_ATOMIC); return 0; } static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom) { return __skb_cow(skb, headroom, skb_header_cloned(skb)); } #endif /* !HAVE_SKB_COW_HEAD */ #ifndef HAVE_SKB_DST_ACCESSOR_FUNCS static inline struct dst_entry *skb_dst(const struct sk_buff *skb) { return (struct dst_entry *)skb->dst; } static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) { skb->dst = dst; } static inline struct rtable *skb_rtable(const struct sk_buff *skb) { return (struct rtable *)skb->dst; } #endif #ifndef CHECKSUM_PARTIAL #define CHECKSUM_PARTIAL CHECKSUM_HW #endif #ifndef CHECKSUM_COMPLETE #define CHECKSUM_COMPLETE CHECKSUM_HW #endif #ifdef HAVE_MAC_RAW #define mac_header mac.raw #define network_header nh.raw #define transport_header h.raw #endif #ifndef HAVE_SKBUFF_HEADER_HELPERS static inline unsigned char *skb_transport_header(const struct sk_buff *skb) { return skb->h.raw; } static inline void skb_reset_transport_header(struct sk_buff *skb) { skb->h.raw = skb->data; } static inline void skb_set_transport_header(struct sk_buff *skb, const int offset) { skb->h.raw = skb->data + offset; } static inline unsigned char *skb_network_header(const struct sk_buff *skb) { return skb->nh.raw; } static inline void skb_reset_network_header(struct sk_buff *skb) { skb->nh.raw = skb->data; } static inline void skb_set_network_header(struct sk_buff *skb, const int offset) { skb->nh.raw = skb->data + offset; } static inline unsigned char *skb_mac_header(const struct sk_buff *skb) { return skb->mac.raw; } static inline void skb_reset_mac_header(struct sk_buff *skb) { skb->mac_header = skb->data; } static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) { skb->mac.raw = skb->data + offset; } static inline int skb_transport_offset(const struct sk_buff *skb) { return skb_transport_header(skb) - skb->data; } static inline int skb_network_offset(const struct sk_buff *skb) { return skb_network_header(skb) - skb->data; } static inline void skb_copy_to_linear_data(struct sk_buff *skb, const void *from, const unsigned int len) { memcpy(skb->data, from, len); } #endif /* !HAVE_SKBUFF_HEADER_HELPERS */ #ifndef HAVE_SKB_WARN_LRO #ifndef NETIF_F_LRO static inline bool skb_warn_if_lro(const struct sk_buff *skb) { return false; } #else extern void __skb_warn_lro_forwarding(const struct sk_buff *skb); static inline bool skb_warn_if_lro(const struct sk_buff *skb) { /* LRO sets gso_size but not gso_type, whereas if GSO is really * wanted then gso_type will be set. */ struct skb_shared_info *shinfo = skb_shinfo(skb); if (shinfo->gso_size != 0 && unlikely(shinfo->gso_type == 0)) { __skb_warn_lro_forwarding(skb); return true; } return false; } #endif /* NETIF_F_LRO */ #endif /* HAVE_SKB_WARN_LRO */ #ifndef HAVE_CONSUME_SKB #define consume_skb kfree_skb #endif #ifndef HAVE_SKB_FRAG_PAGE static inline struct page *skb_frag_page(const skb_frag_t *frag) { return frag->page; } #endif #ifndef HAVE_SKB_RESET_MAC_LEN static inline void skb_reset_mac_len(struct sk_buff *skb) { skb->mac_len = skb->network_header - skb->mac_header; } #endif #ifndef HAVE_SKB_UNCLONE static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) { might_sleep_if(pri & __GFP_WAIT); if (skb_cloned(skb)) return pskb_expand_head(skb, 0, 0, pri); return 0; } #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) extern u32 __skb_get_rxhash(struct sk_buff *skb); static inline __u32 skb_get_rxhash(struct sk_buff *skb) { #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,34) if (skb->rxhash) return skb->rxhash; #endif return __skb_get_rxhash(skb); } #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/stddef.h000066400000000000000000000003621226605124000260670ustar00rootroot00000000000000#ifndef __LINUX_STDDEF_WRAPPER_H #define __LINUX_STDDEF_WRAPPER_H 1 #include_next #ifdef __KERNEL__ #ifndef HAVE_BOOL_TYPE enum { false = 0, true = 1 }; #endif /* !HAVE_BOOL_TYPE */ #endif /* __KERNEL__ */ #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/tcp.h000066400000000000000000000006161226605124000254060ustar00rootroot00000000000000#ifndef __LINUX_TCP_WRAPPER_H #define __LINUX_TCP_WRAPPER_H 1 #include_next #ifndef HAVE_SKBUFF_HEADER_HELPERS static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb) { return (struct tcphdr *)skb_transport_header(skb); } static inline unsigned int tcp_hdrlen(const struct sk_buff *skb) { return tcp_hdr(skb)->doff * 4; } #endif /* !HAVE_SKBUFF_HEADER_HELPERS */ #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/types.h000066400000000000000000000004231226605124000257600ustar00rootroot00000000000000#ifndef __LINUX_TYPES_WRAPPER_H #define __LINUX_TYPES_WRAPPER_H 1 #include_next #ifndef HAVE_CSUM_TYPES typedef __u16 __bitwise __sum16; typedef __u32 __bitwise __wsum; #endif #ifndef HAVE_BOOL_TYPE typedef _Bool bool; #endif /* !HAVE_BOOL_TYPE */ #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/u64_stats_sync.h000066400000000000000000000105421226605124000275070ustar00rootroot00000000000000#ifndef _LINUX_U64_STATS_SYNC_WRAPPER_H #define _LINUX_U64_STATS_SYNC_WRAPPER_H #include #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36) #include_next #else /* * To properly implement 64bits network statistics on 32bit and 64bit hosts, * we provide a synchronization point, that is a noop on 64bit or UP kernels. * * Key points : * 1) Use a seqcount on SMP 32bits, with low overhead. * 2) Whole thing is a noop on 64bit arches or UP kernels. * 3) Write side must ensure mutual exclusion or one seqcount update could * be lost, thus blocking readers forever. * If this synchronization point is not a mutex, but a spinlock or * spinlock_bh() or disable_bh() : * 3.1) Write side should not sleep. * 3.2) Write side should not allow preemption. * 3.3) If applicable, interrupts should be disabled. * * 4) If reader fetches several counters, there is no guarantee the whole values * are consistent (remember point 1) : this is a noop on 64bit arches anyway) * * 5) readers are allowed to sleep or be preempted/interrupted : They perform * pure reads. But if they have to fetch many values, it's better to not allow * preemptions/interruptions to avoid many retries. * * 6) If counter might be written by an interrupt, readers should block interrupts. * (On UP, there is no seqcount_t protection, a reader allowing interrupts could * read partial values) * * 7) For softirq uses, readers can use u64_stats_fetch_begin_bh() and * u64_stats_fetch_retry_bh() helpers * * Usage : * * Stats producer (writer) should use following template granted it already got * an exclusive access to counters (a lock is already taken, or per cpu * data is used [in a non preemptable context]) * * spin_lock_bh(...) or other synchronization to get exclusive access * ... * u64_stats_update_begin(&stats->syncp); * stats->bytes64 += len; // non atomic operation * stats->packets64++; // non atomic operation * u64_stats_update_end(&stats->syncp); * * While a consumer (reader) should use following template to get consistent * snapshot for each variable (but no guarantee on several ones) * * u64 tbytes, tpackets; * unsigned int start; * * do { * start = u64_stats_fetch_begin(&stats->syncp); * tbytes = stats->bytes64; // non atomic operation * tpackets = stats->packets64; // non atomic operation * } while (u64_stats_fetch_retry(&stats->syncp, start)); * * * Example of use in drivers/net/loopback.c, using per_cpu containers, * in BH disabled context. */ #include struct u64_stats_sync { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) seqcount_t seq; #endif }; static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) write_seqcount_begin(&syncp->seq); #endif } static inline void u64_stats_update_end(struct u64_stats_sync *syncp) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) write_seqcount_end(&syncp->seq); #endif } static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) return read_seqcount_begin(&syncp->seq); #else #if BITS_PER_LONG==32 preempt_disable(); #endif return 0; #endif } static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp, unsigned int start) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) return read_seqcount_retry(&syncp->seq, start); #else #if BITS_PER_LONG==32 preempt_enable(); #endif return false; #endif } /* * In case softirq handlers can update u64 counters, readers can use following helpers * - SMP 32bit arches use seqcount protection, irq safe. * - UP 32bit must disable BH. * - 64bit have no problem atomically reading u64 values, irq safe. */ static inline unsigned int u64_stats_fetch_begin_bh(const struct u64_stats_sync *syncp) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) return read_seqcount_begin(&syncp->seq); #else #if BITS_PER_LONG==32 local_bh_disable(); #endif return 0; #endif } static inline bool u64_stats_fetch_retry_bh(const struct u64_stats_sync *syncp, unsigned int start) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) return read_seqcount_retry(&syncp->seq, start); #else #if BITS_PER_LONG==32 local_bh_enable(); #endif return false; #endif } #endif /* Linux kernel < 2.6.36 */ #endif /* _LINUX_U64_STATS_SYNC_WRAPPER_H */ openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/udp.h000066400000000000000000000006131226605124000254050ustar00rootroot00000000000000#ifndef __LINUX_UDP_WRAPPER_H #define __LINUX_UDP_WRAPPER_H 1 #include_next #ifndef HAVE_SKBUFF_HEADER_HELPERS static inline struct udphdr *udp_hdr(const struct sk_buff *skb) { return (struct udphdr *)skb_transport_header(skb); } #endif /* HAVE_SKBUFF_HEADER_HELPERS */ #if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) static inline void udp_encap_enable(void) { } #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/linux/workqueue.h000066400000000000000000000042251226605124000266470ustar00rootroot00000000000000#ifndef __LINUX_WORKQUEUE_WRAPPER_H #define __LINUX_WORKQUEUE_WRAPPER_H 1 #include int __init ovs_workqueues_init(void); void ovs_workqueues_exit(void); /* Older kernels have an implementation of work queues with some very bad * characteristics when trying to cancel work (potential deadlocks, use after * free, etc. Therefore we implement simple ovs specific work queue using * single worker thread. work-queue API are kept similar for compatibility. * It seems it is useful even on newer kernel. As it can avoid system wide * freeze in event of softlockup due to workq blocked on genl_lock. */ struct work_struct; typedef void (*work_func_t)(struct work_struct *work); #define work_data_bits(work) ((unsigned long *)(&(work)->data)) struct work_struct { #define WORK_STRUCT_PENDING 0 /* T if work item pending execution */ atomic_long_t data; struct list_head entry; work_func_t func; #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; #endif }; #define WORK_DATA_INIT() ATOMIC_LONG_INIT(0) #define work_clear_pending(work) \ clear_bit(WORK_STRUCT_PENDING, work_data_bits(work)) struct delayed_work { struct work_struct work; struct timer_list timer; }; #define __WORK_INITIALIZER(n, f) { \ .data = WORK_DATA_INIT(), \ .entry = { &(n).entry, &(n).entry }, \ .func = (f), \ } #define __DELAYED_WORK_INITIALIZER(n, f) { \ .work = __WORK_INITIALIZER((n).work, (f)), \ .timer = TIMER_INITIALIZER(NULL, 0, 0), \ } #define DECLARE_DELAYED_WORK(n, f) \ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f) #define schedule_delayed_work rpl_schedule_delayed_work int schedule_delayed_work(struct delayed_work *dwork, unsigned long delay); #define cancel_delayed_work_sync rpl_cancel_delayed_work_sync int cancel_delayed_work_sync(struct delayed_work *dwork); #define INIT_WORK(_work, _func) \ do { \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ INIT_LIST_HEAD(&(_work)->entry); \ (_work)->func = (_func); \ } while (0) extern void flush_scheduled_work(void); extern void queue_work(struct work_struct *work); extern bool cancel_work_sync(struct work_struct *work); #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/net/000077500000000000000000000000001226605124000240735ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/datapath/linux/compat/include/net/checksum.h000066400000000000000000000022451226605124000260510ustar00rootroot00000000000000#ifndef __NET_CHECKSUM_WRAPPER_H #define __NET_CHECKSUM_WRAPPER_H 1 #include_next #ifndef HAVE_CSUM_UNFOLD static inline __wsum csum_unfold(__sum16 n) { return (__force __wsum)n; } #endif /* !HAVE_CSUM_UNFOLD */ /* Workaround for debugging included in certain versions of XenServer. It only * applies to 32-bit x86. */ #if defined(HAVE_CSUM_COPY_DBG) && defined(CONFIG_X86_32) #define csum_and_copy_to_user(src, dst, len, sum, err_ptr) \ csum_and_copy_to_user(src, dst, len, sum, NULL, err_ptr) #endif #ifndef HAVE_CSUM_REPLACE4 static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { __be32 diff[] = { ~from, to }; *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum_unfold(*sum))); } static inline void csum_replace2(__sum16 *sum, __be16 from, __be16 to) { csum_replace4(sum, (__force __be32)from, (__force __be32)to); } #endif #ifndef CSUM_MANGLED_0 #define CSUM_MANGLED_0 ((__force __sum16)0xffff) #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, int pseudohdr); #endif #endif /* checksum.h */ openvswitch-2.0.1+git20140120/datapath/linux/compat/include/net/dst.h000066400000000000000000000007001226605124000250330ustar00rootroot00000000000000#ifndef __NET_DST_WRAPPER_H #define __NET_DST_WRAPPER_H 1 #include #include_next #if LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) && \ LINUX_VERSION_CODE > KERNEL_VERSION(3,0,20) #define dst_get_neighbour_noref dst_get_neighbour #endif #ifndef HAVE_SKB_DST_ACCESSOR_FUNCS static inline void skb_dst_drop(struct sk_buff *skb) { if (skb->dst) dst_release(skb_dst(skb)); skb->dst = NULL; } #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/net/flow_keys.h000066400000000000000000000006031226605124000262450ustar00rootroot00000000000000#ifndef _NET_FLOW_KEYS_WRAPPER_H #define _NET_FLOW_KEYS_WRAPPER_H #include #if LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0) #include_next #else struct flow_keys { /* (src,dst) must be grouped, in the same way than in IP header */ __be32 src; __be32 dst; union { __be32 ports; __be16 port16[2]; }; u16 thoff; u8 ip_proto; }; #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/net/genetlink.h000066400000000000000000000011641226605124000262260ustar00rootroot00000000000000#ifndef __NET_GENERIC_NETLINK_WRAPPER_H #define __NET_GENERIC_NETLINK_WRAPPER_H 1 #include #include #include #include_next /* * 15e473046cb6e5d18a4d0057e61d76315230382b renames pid to portid * the affected structures are * netlink_skb_parms::pid -> portid * genl_info::snd_pid -> snd_portid */ #if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) #define snd_portid snd_pid #define portid pid #endif extern void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, u32 group, struct nlmsghdr *nlh, gfp_t flags); #endif /* genetlink.h */ openvswitch-2.0.1+git20140120/datapath/linux/compat/include/net/gre.h000066400000000000000000000044711226605124000250270ustar00rootroot00000000000000#ifndef __LINUX_GRE_WRAPPER_H #define __LINUX_GRE_WRAPPER_H #include #include #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37) #include_next #else /* LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) */ #define GREPROTO_CISCO 0 #define GREPROTO_MAX 2 struct gre_protocol { int (*handler)(struct sk_buff *skb); }; int gre_add_protocol(const struct gre_protocol *proto, u8 version); int gre_del_protocol(const struct gre_protocol *proto, u8 version); #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) struct gre_base_hdr { __be16 flags; __be16 protocol; }; #define GRE_HEADER_SECTION 4 static inline __be16 gre_flags_to_tnl_flags(__be16 flags) { __be16 tflags = 0; if (flags & GRE_CSUM) tflags |= TUNNEL_CSUM; if (flags & GRE_ROUTING) tflags |= TUNNEL_ROUTING; if (flags & GRE_KEY) tflags |= TUNNEL_KEY; if (flags & GRE_SEQ) tflags |= TUNNEL_SEQ; if (flags & GRE_STRICT) tflags |= TUNNEL_STRICT; if (flags & GRE_REC) tflags |= TUNNEL_REC; if (flags & GRE_VERSION) tflags |= TUNNEL_VERSION; return tflags; } static inline __be16 tnl_flags_to_gre_flags(__be16 tflags) { __be16 flags = 0; if (tflags & TUNNEL_CSUM) flags |= GRE_CSUM; if (tflags & TUNNEL_ROUTING) flags |= GRE_ROUTING; if (tflags & TUNNEL_KEY) flags |= GRE_KEY; if (tflags & TUNNEL_SEQ) flags |= GRE_SEQ; if (tflags & TUNNEL_STRICT) flags |= GRE_STRICT; if (tflags & TUNNEL_REC) flags |= GRE_REC; if (tflags & TUNNEL_VERSION) flags |= GRE_VERSION; return flags; } #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) */ #define MAX_GRE_PROTO_PRIORITY 255 struct gre_cisco_protocol { int (*handler)(struct sk_buff *skb, const struct tnl_ptk_info *tpi); u8 priority; }; int gre_cisco_register(struct gre_cisco_protocol *proto); int gre_cisco_unregister(struct gre_cisco_protocol *proto); #define gre_build_header rpl_gre_build_header void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, int hdr_len); #define gre_handle_offloads rpl_gre_handle_offloads struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum); static inline int ip_gre_calc_hlen(__be16 o_flags) { int addend = 4; if (o_flags & TUNNEL_CSUM) addend += 4; if (o_flags & TUNNEL_KEY) addend += 4; if (o_flags & TUNNEL_SEQ) addend += 4; return addend; } #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/net/inet_frag.h000066400000000000000000000006061226605124000262040ustar00rootroot00000000000000#ifndef __NET_INET_FRAG_WRAPPER_H #define __NET_INET_FRAG_WRAPPER_H 1 #include #include_next #if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) #define inet_frag_evictor(nf, f, force) \ do { \ if (force || atomic_read(&nf->mem) > nf->high_thresh) { \ inet_frag_evictor(nf, f); \ } \ } while (0) #endif #endif /* inet_frag.h */ openvswitch-2.0.1+git20140120/datapath/linux/compat/include/net/ip.h000066400000000000000000000004451226605124000246570ustar00rootroot00000000000000#ifndef __NET_IP_WRAPPER_H #define __NET_IP_WRAPPER_H 1 #include_next #include #if LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) static inline bool ip_is_fragment(const struct iphdr *iph) { return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; } #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/net/ip_tunnels.h000066400000000000000000000020671226605124000264310ustar00rootroot00000000000000#ifndef __NET_IP_TUNNELS_WRAPPER_H #define __NET_IP_TUNNELS_WRAPPER_H 1 #include #include #include #include #include #include #include #include #include #define TUNNEL_CSUM __cpu_to_be16(0x01) #define TUNNEL_ROUTING __cpu_to_be16(0x02) #define TUNNEL_KEY __cpu_to_be16(0x04) #define TUNNEL_SEQ __cpu_to_be16(0x08) #define TUNNEL_STRICT __cpu_to_be16(0x10) #define TUNNEL_REC __cpu_to_be16(0x20) #define TUNNEL_VERSION __cpu_to_be16(0x40) #define TUNNEL_NO_KEY __cpu_to_be16(0x80) #define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100) struct tnl_ptk_info { __be16 flags; __be16 proto; __be32 key; __be32 seq; }; #define PACKET_RCVD 0 #define PACKET_REJECT 1 int iptunnel_xmit(struct net *net, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl, __be16 df); int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto); #endif /* __NET_IP_TUNNELS_H */ openvswitch-2.0.1+git20140120/datapath/linux/compat/include/net/ipv6.h000066400000000000000000000024031226605124000251270ustar00rootroot00000000000000#ifndef __NET_IPV6_WRAPPER_H #define __NET_IPV6_WRAPPER_H 1 #include #include_next #ifndef NEXTHDR_SCTP #define NEXTHDR_SCTP 132 /* Stream Control Transport Protocol */ #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) #define ipv6_skip_exthdr rpl_ipv6_skip_exthdr extern int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, __be16 *frag_offp); #endif enum { OVS_IP6T_FH_F_FRAG = (1 << 0), OVS_IP6T_FH_F_AUTH = (1 << 1), OVS_IP6T_FH_F_SKIP_RH = (1 << 2), }; /* This function is upstream, but not the version which skips routing * headers with 0 segments_left. We plan to propose the extended version. */ #define ipv6_find_hdr rpl_ipv6_find_hdr extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, int target, unsigned short *fragoff, int *fragflg); #if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) static inline u32 ipv6_addr_hash(const struct in6_addr *a) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 const unsigned long *ul = (const unsigned long *)a; unsigned long x = ul[0] ^ ul[1]; return (u32)(x ^ (x >> 32)); #else return (__force u32)(a->s6_addr32[0] ^ a->s6_addr32[1] ^ a->s6_addr32[2] ^ a->s6_addr32[3]); #endif } #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/net/net_namespace.h000066400000000000000000000032311226605124000270450ustar00rootroot00000000000000#ifndef __NET_NET_NAMESPACE_WRAPPER_H #define __NET_NET_NAMESPACE_WRAPPER_H 1 #include_next #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) /* for 2.6.32* */ struct rpl_pernet_operations { int (*init)(struct net *net); void (*exit)(struct net *net); int *id; size_t size; struct pernet_operations ops; }; #define pernet_operations rpl_pernet_operations #define register_pernet_device rpl_register_pernet_gen_device #define unregister_pernet_device rpl_unregister_pernet_gen_device int compat_init_net(struct net *net, struct rpl_pernet_operations *pnet); void compat_exit_net(struct net *net, struct rpl_pernet_operations *pnet); #define DEFINE_COMPAT_PNET_REG_FUNC(TYPE) \ \ static struct rpl_pernet_operations *pnet_gen_##TYPE; \ static int compat_init_net_gen_##TYPE(struct net *net) \ { \ return compat_init_net(net, pnet_gen_##TYPE); \ } \ \ static void compat_exit_net_gen_##TYPE(struct net *net) \ { \ compat_exit_net(net, pnet_gen_##TYPE); \ } \ \ static int rpl_register_pernet_gen_##TYPE(struct rpl_pernet_operations *rpl_pnet) \ { \ pnet_gen_##TYPE = rpl_pnet; \ rpl_pnet->ops.init = compat_init_net_gen_##TYPE; \ rpl_pnet->ops.exit = compat_exit_net_gen_##TYPE; \ return register_pernet_gen_##TYPE(pnet_gen_##TYPE->id, &rpl_pnet->ops); \ } \ \ static void rpl_unregister_pernet_gen_##TYPE(struct rpl_pernet_operations *rpl_pnet) \ { \ unregister_pernet_gen_##TYPE(*pnet_gen_##TYPE->id, &rpl_pnet->ops); \ } #else #define DEFINE_COMPAT_PNET_REG_FUNC(TYPE) #endif /* 2.6.33 */ #endif /* net/net_namespace.h wrapper */ openvswitch-2.0.1+git20140120/datapath/linux/compat/include/net/netlink.h000066400000000000000000000032221226605124000257070ustar00rootroot00000000000000#ifndef __NET_NETLINK_WRAPPER_H #define __NET_NETLINK_WRAPPER_H 1 #include #include_next #ifndef HAVE_NLA_GET_BE16 /** * nla_get_be16 - return payload of __be16 attribute * @nla: __be16 netlink attribute */ static inline __be16 nla_get_be16(const struct nlattr *nla) { return *(__be16 *) nla_data(nla); } #endif /* !HAVE_NLA_GET_BE16 */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) /* This function was introduced in 2.6.31, but initially it performed an * unaligned access, so we replace it up to 2.6.34 where it was fixed. */ #define nla_get_be64 rpl_nla_get_be64 static inline __be64 nla_get_be64(const struct nlattr *nla) { __be64 tmp; /* The additional cast is necessary because */ nla_memcpy(&tmp, (struct nlattr *) nla, sizeof(tmp)); return tmp; } #endif #ifndef HAVE_NLA_PUT_BE16 static inline int nla_put_be16(struct sk_buff *skb, int attrtype, __be16 value) { return nla_put(skb, attrtype, sizeof(__be16), &value); } #endif #ifndef HAVE_NLA_PUT_BE32 static inline int nla_put_be32(struct sk_buff *skb, int attrtype, __be32 value) { return nla_put(skb, attrtype, sizeof(__be32), &value); } #endif #ifndef HAVE_NLA_PUT_BE64 static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value) { return nla_put(skb, attrtype, sizeof(__be64), &value); } #endif #ifndef nla_for_each_nested #define nla_for_each_nested(pos, nla, rem) \ nla_for_each_attr(pos, nla_data(nla), nla_len(nla), rem) #endif #ifndef HAVE_NLA_FIND_NESTED static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype) { return nla_find(nla_data(nla), nla_len(nla), attrtype); } #endif #endif /* net/netlink.h */ openvswitch-2.0.1+git20140120/datapath/linux/compat/include/net/sctp/000077500000000000000000000000001226605124000250445ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/datapath/linux/compat/include/net/sctp/checksum.h000066400000000000000000000013621226605124000270210ustar00rootroot00000000000000#ifndef __SCTP_CHECKSUM_WRAPPER_H #define __SCTP_CHECKSUM_WRAPPER_H 1 #include #include_next #if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0) static inline __le32 sctp_compute_cksum(const struct sk_buff *skb, unsigned int offset) { const struct sk_buff *iter; __u32 crc32 = sctp_start_cksum(skb->data + offset, skb_headlen(skb) - offset); skb_walk_frags(skb, iter) crc32 = sctp_update_cksum((__u8 *) iter->data, skb_headlen(iter), crc32); /* Open-code sctp_end_cksum() to avoid a sparse warning due to a bug in * sparse annotations in Linux fixed in 3.10 in commit eee1d5a14 (sctp: * Correct type and usage of sctp_end_cksum()). */ return cpu_to_le32(~crc32); } #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/net/sock.h000066400000000000000000000005671226605124000252130ustar00rootroot00000000000000#ifndef __NET_SOCK_WRAPPER_H #define __NET_SOCK_WRAPPER_H 1 #include_next #ifndef __sk_user_data #define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data))) #define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk))) #define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr) #endif #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/include/net/vxlan.h000066400000000000000000000016231226605124000253760ustar00rootroot00000000000000#ifndef __NET_VXLAN_WRAPPER_H #define __NET_VXLAN_WRAPPER_H 1 #include #include #include struct vxlan_sock; typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb, __be32 key); /* per UDP socket information */ struct vxlan_sock { struct hlist_node hlist; vxlan_rcv_t *rcv; void *data; struct work_struct del_work; struct socket *sock; struct rcu_head rcu; }; struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, vxlan_rcv_t *rcv, void *data, bool no_share); void vxlan_sock_release(struct vxlan_sock *vs); int vxlan_xmit_skb(struct net *net, struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, __be32 vni); __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb); #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/ip_tunnels_core.c000066400000000000000000000052531226605124000252230ustar00rootroot00000000000000/* * Copyright (c) 2007-2013 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include #include #include #include #include #include #include #include #include #include #include "compat.h" #include "gso.h" int iptunnel_xmit(struct net *net, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl, __be16 df) { int pkt_len = skb->len; struct iphdr *iph; int err; nf_reset(skb); secpath_reset(skb); skb_clear_rxhash(skb); skb_dst_drop(skb); skb_dst_set(skb, &rt_dst(rt)); #if 0 /* Do not clear ovs_skb_cb. It will be done in gso code. */ memset(IPCB(skb), 0, sizeof(*IPCB(skb))); #endif /* Push down and install the IP header. */ __skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); iph = ip_hdr(skb); iph->version = 4; iph->ihl = sizeof(struct iphdr) >> 2; iph->frag_off = df; iph->protocol = proto; iph->tos = tos; iph->daddr = dst; iph->saddr = src; iph->ttl = ttl; __ip_select_ident(iph, &rt_dst(rt), (skb_shinfo(skb)->gso_segs ?: 1) - 1); err = ip_local_out(skb); if (unlikely(net_xmit_eval(err))) pkt_len = 0; return pkt_len; } int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) { if (unlikely(!pskb_may_pull(skb, hdr_len))) return -ENOMEM; skb_pull_rcsum(skb, hdr_len); if (inner_proto == htons(ETH_P_TEB)) { struct ethhdr *eh; if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) return -ENOMEM; eh = (struct ethhdr *)skb->data; if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) skb->protocol = eh->h_proto; else skb->protocol = htons(ETH_P_802_2); } else { skb->protocol = inner_proto; } nf_reset(skb); secpath_reset(skb); skb_clear_rxhash(skb); skb_dst_drop(skb); vlan_set_tci(skb, 0); skb_set_queue_mapping(skb, 0); skb->pkt_type = PACKET_HOST; return 0; } openvswitch-2.0.1+git20140120/datapath/linux/compat/net_namespace.c000066400000000000000000000013171226605124000246320ustar00rootroot00000000000000#include #include #include #include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) int compat_init_net(struct net *net, struct rpl_pernet_operations *pnet) { int err; void *ovs_net = kzalloc(pnet->size, GFP_KERNEL); if (!ovs_net) return -ENOMEM; err = net_assign_generic(net, *pnet->id, ovs_net); if (err) goto err; if (pnet->init) { err = pnet->init(net); if (err) goto err; } return 0; err: kfree(ovs_net); return err; } void compat_exit_net(struct net *net, struct rpl_pernet_operations *pnet) { void *ovs_net = net_generic(net, *pnet->id); if (pnet->exit) pnet->exit(net); kfree(ovs_net); } #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/netdevice.c000066400000000000000000000051761226605124000240050ustar00rootroot00000000000000#include #include #ifdef HAVE_RHEL_OVS_HOOK int nr_bridges = 0; #endif #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) #ifndef HAVE_CAN_CHECKSUM_PROTOCOL static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) { return ((features & NETIF_F_GEN_CSUM) || ((features & NETIF_F_V4_CSUM) && protocol == htons(ETH_P_IP)) || ((features & NETIF_F_V6_CSUM) && protocol == htons(ETH_P_IPV6)) || ((features & NETIF_F_FCOE_CRC) && protocol == htons(ETH_P_FCOE))); } #endif static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) { #ifdef CONFIG_HIGHMEM int i; if (dev->features & NETIF_F_HIGHDMA) return 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) if (PageHighMem(skb_shinfo(skb)->frags[i].page)) return 1; #endif return 0; } static netdev_features_t harmonize_features(struct sk_buff *skb, __be16 protocol, netdev_features_t features) { if (!can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; features &= ~NETIF_F_SG; } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } return features; } netdev_features_t rpl_netif_skb_features(struct sk_buff *skb) { unsigned long vlan_features = skb->dev->vlan_features; __be16 protocol = skb->protocol; netdev_features_t features = skb->dev->features; if (protocol == htons(ETH_P_8021Q)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else if (!vlan_tx_tag_present(skb)) { return harmonize_features(skb, protocol, features); } features &= (vlan_features | NETIF_F_HW_VLAN_TX); if (protocol != htons(ETH_P_8021Q)) { return harmonize_features(skb, protocol, features); } else { features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX; return harmonize_features(skb, protocol, features); } } struct sk_buff *rpl_skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { int vlan_depth = ETH_HLEN; __be16 type = skb->protocol; __be16 skb_proto; struct sk_buff *skb_gso; while (type == htons(ETH_P_8021Q)) { struct vlan_hdr *vh; if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) return ERR_PTR(-EINVAL); vh = (struct vlan_hdr *)(skb->data + vlan_depth); type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; } /* this hack needed to get regular skb_gso_segment() */ #undef skb_gso_segment skb_proto = skb->protocol; skb->protocol = type; skb_gso = skb_gso_segment(skb, features); skb->protocol = skb_proto; return skb_gso; } #endif /* kernel version < 2.6.38 */ openvswitch-2.0.1+git20140120/datapath/linux/compat/reciprocal_div.c000066400000000000000000000004661226605124000250210ustar00rootroot00000000000000#include #include #include #if LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) /* definition is required since reciprocal_value() is not exported */ u32 reciprocal_value(u32 k) { u64 val = (1LL << 32) + (k - 1); do_div(val, k); return (u32)val; } #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/skbuff-openvswitch.c000066400000000000000000000005651226605124000256630ustar00rootroot00000000000000#include #include #include #if !defined(HAVE_SKB_WARN_LRO) && defined(NETIF_F_LRO) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt void __skb_warn_lro_forwarding(const struct sk_buff *skb) { if (net_ratelimit()) pr_warn("%s: received packets cannot be forwarded while LRO is enabled\n", skb->dev->name); } #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/utils.c000066400000000000000000000020251226605124000231650ustar00rootroot00000000000000#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, int pseudohdr) { __be32 diff[] = { ~from[0], ~from[1], ~from[2], ~from[3], to[0], to[1], to[2], to[3], }; if (skb->ip_summed != CHECKSUM_PARTIAL) { *sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum))); if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) skb->csum = ~csum_partial(diff, sizeof(diff), ~skb->csum); } else if (pseudohdr) *sum = ~csum_fold(csum_partial(diff, sizeof(diff), csum_unfold(*sum))); } #endif openvswitch-2.0.1+git20140120/datapath/linux/compat/vxlan.c000066400000000000000000000166551226605124000231730ustar00rootroot00000000000000/* * Copyright (c) 2007-2013 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA * * This code is derived from kernel vxlan module. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "compat.h" #include "datapath.h" #include "gso.h" #include "vlan.h" #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) #define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */ /* VXLAN protocol header */ struct vxlanhdr { __be32 vx_flags; __be32 vx_vni; }; /* Callback from net/ipv4/udp.c to receive packets */ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct vxlan_sock *vs; struct vxlanhdr *vxh; /* Need Vxlan and inner Ethernet header to be present */ if (!pskb_may_pull(skb, VXLAN_HLEN)) goto error; /* Return packets with reserved bits set */ vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); if (vxh->vx_flags != htonl(VXLAN_FLAGS) || (vxh->vx_vni & htonl(0xff))) { pr_warn("invalid vxlan flags=%#x vni=%#x\n", ntohl(vxh->vx_flags), ntohl(vxh->vx_vni)); goto error; } if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB))) goto drop; vs = rcu_dereference_sk_user_data(sk); if (!vs) goto drop; vs->rcv(vs, skb, vxh->vx_vni); return 0; drop: /* Consume bad packet */ kfree_skb(skb); return 0; error: /* Return non vxlan pkt */ return 1; } static void vxlan_sock_put(struct sk_buff *skb) { sock_put(skb->sk); } /* On transmit, associate with the tunnel socket */ static void vxlan_set_owner(struct sock *sk, struct sk_buff *skb) { skb_orphan(skb); sock_hold(sk); skb->sk = sk; skb->destructor = vxlan_sock_put; } /* Compute source port for outgoing packet * first choice to use L4 flow hash since it will spread * better and maybe available from hardware * secondary choice is to use jhash on the Ethernet header */ __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb) { unsigned int range = (port_max - port_min) + 1; u32 hash; hash = skb_get_rxhash(skb); if (!hash) hash = jhash(skb->data, 2 * ETH_ALEN, (__force u32) skb->protocol); return htons((((u64) hash * range) >> 32) + port_min); } static void vxlan_gso(struct sk_buff *skb) { int udp_offset = skb_transport_offset(skb); struct udphdr *uh; uh = udp_hdr(skb); uh->len = htons(skb->len - udp_offset); /* csum segment if tunnel sets skb with csum. */ if (unlikely(uh->check)) { struct iphdr *iph = ip_hdr(skb); uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len - udp_offset, IPPROTO_UDP, 0); uh->check = csum_fold(skb_checksum(skb, udp_offset, skb->len - udp_offset, 0)); if (uh->check == 0) uh->check = CSUM_MANGLED_0; } skb->ip_summed = CHECKSUM_NONE; } static int handle_offloads(struct sk_buff *skb) { if (skb_is_gso(skb)) { OVS_GSO_CB(skb)->fix_segment = vxlan_gso; } else { if (skb->ip_summed != CHECKSUM_PARTIAL) skb->ip_summed = CHECKSUM_NONE; } return 0; } int vxlan_xmit_skb(struct net *net, struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, __be32 vni) { struct vxlanhdr *vxh; struct udphdr *uh; int min_headroom; int err; min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len + VXLAN_HLEN + sizeof(struct iphdr) + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); /* Need space for new headers (invalidates iph ptr) */ err = skb_cow_head(skb, min_headroom); if (unlikely(err)) return err; if (vlan_tx_tag_present(skb)) { if (unlikely(!__vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)))) return -ENOMEM; vlan_set_tci(skb, 0); } skb_reset_inner_headers(skb); vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_FLAGS); vxh->vx_vni = vni; __skb_push(skb, sizeof(*uh)); skb_reset_transport_header(skb); uh = udp_hdr(skb); uh->dest = dst_port; uh->source = src_port; uh->len = htons(skb->len); uh->check = 0; vxlan_set_owner(vs->sock->sk, skb); err = handle_offloads(skb); if (err) return err; return iptunnel_xmit(net, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df); } static void rcu_free_vs(struct rcu_head *rcu) { struct vxlan_sock *vs = container_of(rcu, struct vxlan_sock, rcu); kfree(vs); } static void vxlan_del_work(struct work_struct *work) { struct vxlan_sock *vs = container_of(work, struct vxlan_sock, del_work); sk_release_kernel(vs->sock->sk); call_rcu(&vs->rcu, rcu_free_vs); } static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, vxlan_rcv_t *rcv, void *data) { struct vxlan_sock *vs; struct sock *sk; struct sockaddr_in vxlan_addr = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), .sin_port = port, }; int rc; vs = kmalloc(sizeof(*vs), GFP_KERNEL); if (!vs) { pr_debug("memory alocation failure\n"); return ERR_PTR(-ENOMEM); } INIT_WORK(&vs->del_work, vxlan_del_work); /* Create UDP socket for encapsulation receive. */ rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &vs->sock); if (rc < 0) { pr_debug("UDP socket create failed\n"); kfree(vs); return ERR_PTR(rc); } /* Put in proper namespace */ sk = vs->sock->sk; sk_change_net(sk, net); rc = kernel_bind(vs->sock, (struct sockaddr *) &vxlan_addr, sizeof(vxlan_addr)); if (rc < 0) { pr_debug("bind for UDP socket %pI4:%u (%d)\n", &vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port), rc); sk_release_kernel(sk); kfree(vs); return ERR_PTR(rc); } vs->rcv = rcv; vs->data = data; /* Disable multicast loopback */ inet_sk(sk)->mc_loop = 0; rcu_assign_sk_user_data(vs->sock->sk, vs); /* Mark socket as an encapsulation socket. */ udp_sk(sk)->encap_type = 1; udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv; udp_encap_enable(); return vs; } struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, vxlan_rcv_t *rcv, void *data, bool no_share) { return vxlan_socket_create(net, port, rcv, data); } void vxlan_sock_release(struct vxlan_sock *vs) { ASSERT_OVSL(); rcu_assign_sk_user_data(vs->sock->sk, NULL); queue_work(&vs->del_work); } openvswitch-2.0.1+git20140120/datapath/linux/compat/workqueue.c000066400000000000000000000111071226605124000240550ustar00rootroot00000000000000/* * Derived from the kernel/workqueue.c * * This is the generic async execution mechanism. Work items as are * executed in process context. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static spinlock_t wq_lock; static struct list_head workq; static wait_queue_head_t more_work; static struct task_struct *workq_thread; static struct work_struct *current_work; static void add_work_to_ovs_wq(struct work_struct *work) { list_add_tail(&work->entry, &workq); wake_up(&more_work); } static void __queue_work(struct work_struct *work) { unsigned long flags; spin_lock_irqsave(&wq_lock, flags); add_work_to_ovs_wq(work); spin_unlock_irqrestore(&wq_lock, flags); } void queue_work(struct work_struct *work) { if (test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) return; __queue_work(work); } static void _delayed_work_timer_fn(unsigned long __data) { struct delayed_work *dwork = (struct delayed_work *)__data; __queue_work(&dwork->work); } static void __queue_delayed_work(struct delayed_work *dwork, unsigned long delay) { struct timer_list *timer = &dwork->timer; struct work_struct *work = &dwork->work; BUG_ON(timer_pending(timer)); BUG_ON(!list_empty(&work->entry)); timer->expires = jiffies + delay; timer->data = (unsigned long)dwork; timer->function = _delayed_work_timer_fn; add_timer(timer); } int schedule_delayed_work(struct delayed_work *dwork, unsigned long delay) { if (test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(&dwork->work))) return 0; if (delay == 0) __queue_work(&dwork->work); else __queue_delayed_work(dwork, delay); return 1; } struct wq_barrier { struct work_struct work; struct completion done; }; static void wq_barrier_func(struct work_struct *work) { struct wq_barrier *barr = container_of(work, struct wq_barrier, work); complete(&barr->done); } static void workqueue_barrier(struct work_struct *work) { bool need_barrier; struct wq_barrier barr; spin_lock_irq(&wq_lock); if (current_work != work) need_barrier = false; else { INIT_WORK(&barr.work, wq_barrier_func); init_completion(&barr.done); add_work_to_ovs_wq(&barr.work); need_barrier = true; } spin_unlock_irq(&wq_lock); if (need_barrier) wait_for_completion(&barr.done); } static int try_to_grab_pending(struct work_struct *work) { int ret; BUG_ON(in_interrupt()); if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) return 0; spin_lock_irq(&wq_lock); if (!list_empty(&work->entry)) { list_del_init(&work->entry); ret = 0; } else /* Already executed, retry. */ ret = -1; spin_unlock_irq(&wq_lock); return ret; } static int __cancel_work_timer(struct work_struct *work, struct timer_list *timer) { int ret; for (;;) { ret = (timer && likely(del_timer(timer))); if (ret) /* Was active timer, return true. */ break; /* Inactive timer case */ ret = try_to_grab_pending(work); if (!ret) break; } workqueue_barrier(work); work_clear_pending(work); return ret; } int cancel_delayed_work_sync(struct delayed_work *dwork) { return __cancel_work_timer(&dwork->work, &dwork->timer); } bool cancel_work_sync(struct work_struct *work) { return __cancel_work_timer(work, NULL); } static void run_workqueue(void) { spin_lock_irq(&wq_lock); while (!list_empty(&workq)) { struct work_struct *work = list_entry(workq.next, struct work_struct, entry); work_func_t f = work->func; list_del_init(workq.next); current_work = work; spin_unlock_irq(&wq_lock); work_clear_pending(work); f(work); BUG_ON(in_interrupt()); spin_lock_irq(&wq_lock); current_work = NULL; } spin_unlock_irq(&wq_lock); } static int worker_thread(void *dummy) { for (;;) { wait_event_interruptible(more_work, (kthread_should_stop() || !list_empty(&workq))); if (kthread_should_stop()) break; run_workqueue(); } return 0; } int __init ovs_workqueues_init(void) { spin_lock_init(&wq_lock); INIT_LIST_HEAD(&workq); init_waitqueue_head(&more_work); workq_thread = kthread_create(worker_thread, NULL, "ovs_workq"); if (IS_ERR(workq_thread)) return PTR_ERR(workq_thread); wake_up_process(workq_thread); return 0; } void ovs_workqueues_exit(void) { BUG_ON(!list_empty(&workq)); kthread_stop(workq_thread); } openvswitch-2.0.1+git20140120/datapath/vlan.h000066400000000000000000000042701226605124000203540ustar00rootroot00000000000000/* * Copyright (c) 2007-2011 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #ifndef VLAN_H #define VLAN_H 1 #include #include #include /** * DOC: VLAN tag manipulation. * * &struct sk_buff handling of VLAN tags has evolved over time: * * In 2.6.26 and earlier, VLAN tags did not have any generic representation in * an skb, other than as a raw 802.1Q header inside the packet data. * * In 2.6.27 &struct sk_buff added a @vlan_tci member. Between 2.6.27 and * 2.6.32, its value was the raw contents of the 802.1Q TCI field, or zero if * no 802.1Q header was present. This worked OK except for the corner case of * an 802.1Q header with an all-0-bits TCI, which could not be represented. * * In 2.6.33, @vlan_tci semantics changed. Now, if an 802.1Q header is * present, then the VLAN_TAG_PRESENT bit is always set. This fixes the * all-0-bits TCI corner case. * * For compatibility we emulate the 2.6.33+ behavior on earlier kernel * versions. The client must not access @vlan_tci directly. Instead, use * vlan_get_tci() to read it or vlan_set_tci() to write it, with semantics * equivalent to those on 2.6.33+. */ static inline u16 vlan_get_tci(struct sk_buff *skb) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) if (skb->vlan_tci) return skb->vlan_tci | VLAN_TAG_PRESENT; #endif return skb->vlan_tci; } static inline void vlan_set_tci(struct sk_buff *skb, u16 vlan_tci) { #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) vlan_tci &= ~VLAN_TAG_PRESENT; #endif skb->vlan_tci = vlan_tci; } #endif /* vlan.h */ openvswitch-2.0.1+git20140120/datapath/vport-gre.c000066400000000000000000000204531226605124000213350ustar00rootroot00000000000000/* * Copyright (c) 2007-2012 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #include #if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "datapath.h" #include "vport.h" /* Returns the least-significant 32 bits of a __be64. */ static __be32 be64_get_low32(__be64 x) { #ifdef __BIG_ENDIAN return (__force __be32)x; #else return (__force __be32)((__force u64)x >> 32); #endif } static __be16 filter_tnl_flags(__be16 flags) { return flags & (TUNNEL_CSUM | TUNNEL_KEY); } static struct sk_buff *__build_header(struct sk_buff *skb, int tunnel_hlen, __be32 seq, __be16 gre64_flag) { const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; struct tnl_ptk_info tpi; skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM)); if (IS_ERR(skb)) return NULL; tpi.flags = filter_tnl_flags(tun_key->tun_flags) | gre64_flag; tpi.proto = htons(ETH_P_TEB); tpi.key = be64_get_low32(tun_key->tun_id); tpi.seq = seq; gre_build_header(skb, &tpi, tunnel_hlen); return skb; } static __be64 key_to_tunnel_id(__be32 key, __be32 seq) { #ifdef __BIG_ENDIAN return (__force __be64)((__force u64)seq << 32 | (__force u32)key); #else return (__force __be64)((__force u64)key << 32 | (__force u32)seq); #endif } /* Called with rcu_read_lock and BH disabled. */ static int gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) { struct ovs_key_ipv4_tunnel tun_key; struct ovs_net *ovs_net; struct vport *vport; __be64 key; ovs_net = net_generic(dev_net(skb->dev), ovs_net_id); if ((tpi->flags & TUNNEL_KEY) && (tpi->flags & TUNNEL_SEQ)) vport = rcu_dereference(ovs_net->vport_net.gre64_vport); else vport = rcu_dereference(ovs_net->vport_net.gre_vport); if (unlikely(!vport)) return PACKET_REJECT; key = key_to_tunnel_id(tpi->key, tpi->seq); ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, filter_tnl_flags(tpi->flags)); ovs_vport_receive(vport, skb, &tun_key); return PACKET_RCVD; } static int __send(struct vport *vport, struct sk_buff *skb, int tunnel_hlen, __be32 seq, __be16 gre64_flag) { struct net *net = ovs_dp_get_net(vport->dp); struct rtable *rt; int min_headroom; __be16 df; __be32 saddr; int err; /* Route lookup */ saddr = OVS_CB(skb)->tun_key->ipv4_src; rt = find_route(ovs_dp_get_net(vport->dp), &saddr, OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, OVS_CB(skb)->tun_key->ipv4_tos, skb->mark); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto error; } min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len + tunnel_hlen + sizeof(struct iphdr) + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { int head_delta = SKB_DATA_ALIGN(min_headroom - skb_headroom(skb) + 16); err = pskb_expand_head(skb, max_t(int, head_delta, 0), 0, GFP_ATOMIC); if (unlikely(err)) goto err_free_rt; } if (vlan_tx_tag_present(skb)) { if (unlikely(!__vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)))) { err = -ENOMEM; goto err_free_rt; } vlan_set_tci(skb, 0); } /* Push Tunnel header. */ skb = __build_header(skb, tunnel_hlen, seq, gre64_flag); if (unlikely(!skb)) { err = 0; goto err_free_rt; } df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; skb->local_df = 1; return iptunnel_xmit(net, rt, skb, saddr, OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, OVS_CB(skb)->tun_key->ipv4_tos, OVS_CB(skb)->tun_key->ipv4_ttl, df); err_free_rt: ip_rt_put(rt); error: return err; } static struct gre_cisco_protocol gre_protocol = { .handler = gre_rcv, .priority = 1, }; static int gre_ports; static int gre_init(void) { int err; gre_ports++; if (gre_ports > 1) return 0; err = gre_cisco_register(&gre_protocol); if (err) pr_warn("cannot register gre protocol handler\n"); return err; } static void gre_exit(void) { gre_ports--; if (gre_ports > 0) return; gre_cisco_unregister(&gre_protocol); } static const char *gre_get_name(const struct vport *vport) { return vport_priv(vport); } static struct vport *gre_create(const struct vport_parms *parms) { struct net *net = ovs_dp_get_net(parms->dp); struct ovs_net *ovs_net; struct vport *vport; int err; err = gre_init(); if (err) return ERR_PTR(err); ovs_net = net_generic(net, ovs_net_id); if (ovsl_dereference(ovs_net->vport_net.gre_vport)) { vport = ERR_PTR(-EEXIST); goto error; } vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms); if (IS_ERR(vport)) goto error; strncpy(vport_priv(vport), parms->name, IFNAMSIZ); rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport); return vport; error: gre_exit(); return vport; } static void gre_tnl_destroy(struct vport *vport) { struct net *net = ovs_dp_get_net(vport->dp); struct ovs_net *ovs_net; ovs_net = net_generic(net, ovs_net_id); rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL); ovs_vport_deferred_free(vport); gre_exit(); } static int gre_send(struct vport *vport, struct sk_buff *skb) { int hlen; if (unlikely(!OVS_CB(skb)->tun_key)) return -EINVAL; hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags); return __send(vport, skb, hlen, 0, 0); } const struct vport_ops ovs_gre_vport_ops = { .type = OVS_VPORT_TYPE_GRE, .create = gre_create, .destroy = gre_tnl_destroy, .get_name = gre_get_name, .send = gre_send, }; /* GRE64 vport. */ static struct vport *gre64_create(const struct vport_parms *parms) { struct net *net = ovs_dp_get_net(parms->dp); struct ovs_net *ovs_net; struct vport *vport; int err; err = gre_init(); if (err) return ERR_PTR(err); ovs_net = net_generic(net, ovs_net_id); if (ovsl_dereference(ovs_net->vport_net.gre64_vport)) { vport = ERR_PTR(-EEXIST); goto error; } vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre64_vport_ops, parms); if (IS_ERR(vport)) goto error; strncpy(vport_priv(vport), parms->name, IFNAMSIZ); rcu_assign_pointer(ovs_net->vport_net.gre64_vport, vport); return vport; error: gre_exit(); return vport; } static void gre64_tnl_destroy(struct vport *vport) { struct net *net = ovs_dp_get_net(vport->dp); struct ovs_net *ovs_net; ovs_net = net_generic(net, ovs_net_id); rcu_assign_pointer(ovs_net->vport_net.gre64_vport, NULL); ovs_vport_deferred_free(vport); gre_exit(); } static __be32 be64_get_high32(__be64 x) { #ifdef __BIG_ENDIAN return (__force __be32)((__force u64)x >> 32); #else return (__force __be32)x; #endif } static int gre64_send(struct vport *vport, struct sk_buff *skb) { int hlen = GRE_HEADER_SECTION + /* GRE Hdr */ GRE_HEADER_SECTION + /* GRE Key */ GRE_HEADER_SECTION; /* GRE SEQ */ __be32 seq; if (unlikely(!OVS_CB(skb)->tun_key)) return -EINVAL; if (OVS_CB(skb)->tun_key->tun_flags & TUNNEL_CSUM) hlen += GRE_HEADER_SECTION; seq = be64_get_high32(OVS_CB(skb)->tun_key->tun_id); return __send(vport, skb, hlen, seq, (TUNNEL_KEY|TUNNEL_SEQ)); } const struct vport_ops ovs_gre64_vport_ops = { .type = OVS_VPORT_TYPE_GRE64, .create = gre64_create, .destroy = gre64_tnl_destroy, .get_name = gre_get_name, .send = gre64_send, }; #endif openvswitch-2.0.1+git20140120/datapath/vport-internal_dev.c000066400000000000000000000164661226605124000232430ustar00rootroot00000000000000/* * Copyright (c) 2007-2012 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #include #include #include #include #include #include #include #include #include #include #include #include "datapath.h" #include "vlan.h" #include "vport-internal_dev.h" #include "vport-netdev.h" struct internal_dev { struct vport *vport; }; static struct internal_dev *internal_dev_priv(struct net_device *netdev) { return netdev_priv(netdev); } /* This function is only called by the kernel network layer.*/ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36) static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netdev, struct rtnl_link_stats64 *stats) { #else static struct net_device_stats *internal_dev_sys_stats(struct net_device *netdev) { struct net_device_stats *stats = &netdev->stats; #endif struct vport *vport = ovs_internal_dev_get_vport(netdev); struct ovs_vport_stats vport_stats; ovs_vport_get_stats(vport, &vport_stats); /* The tx and rx stats need to be swapped because the * switch and host OS have opposite perspectives. */ stats->rx_packets = vport_stats.tx_packets; stats->tx_packets = vport_stats.rx_packets; stats->rx_bytes = vport_stats.tx_bytes; stats->tx_bytes = vport_stats.rx_bytes; stats->rx_errors = vport_stats.tx_errors; stats->tx_errors = vport_stats.rx_errors; stats->rx_dropped = vport_stats.tx_dropped; stats->tx_dropped = vport_stats.rx_dropped; return stats; } /* Called with rcu_read_lock_bh. */ static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) { rcu_read_lock(); ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL); rcu_read_unlock(); return 0; } static int internal_dev_open(struct net_device *netdev) { netif_start_queue(netdev); return 0; } static int internal_dev_stop(struct net_device *netdev) { netif_stop_queue(netdev); return 0; } static void internal_dev_getinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { strlcpy(info->driver, "openvswitch", sizeof(info->driver)); } static const struct ethtool_ops internal_dev_ethtool_ops = { .get_drvinfo = internal_dev_getinfo, .get_link = ethtool_op_get_link, #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) .get_sg = ethtool_op_get_sg, .set_sg = ethtool_op_set_sg, .get_tx_csum = ethtool_op_get_tx_csum, .set_tx_csum = ethtool_op_set_tx_hw_csum, .get_tso = ethtool_op_get_tso, .set_tso = ethtool_op_set_tso, #endif }; static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu) { if (new_mtu < 68) return -EINVAL; netdev->mtu = new_mtu; return 0; } static void internal_dev_destructor(struct net_device *dev) { struct vport *vport = ovs_internal_dev_get_vport(dev); ovs_vport_free(vport); free_netdev(dev); } static const struct net_device_ops internal_dev_netdev_ops = { .ndo_open = internal_dev_open, .ndo_stop = internal_dev_stop, .ndo_start_xmit = internal_dev_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = internal_dev_change_mtu, #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36) .ndo_get_stats64 = internal_dev_get_stats, #else .ndo_get_stats = internal_dev_sys_stats, #endif }; static void do_setup(struct net_device *netdev) { ether_setup(netdev); netdev->netdev_ops = &internal_dev_netdev_ops; netdev->priv_flags &= ~IFF_TX_SKB_SHARING; netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netdev->destructor = internal_dev_destructor; SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops); netdev->tx_queue_len = 0; netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO; netdev->vlan_features = netdev->features; netdev->features |= NETIF_F_HW_VLAN_CTAG_TX; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39) netdev->hw_features = netdev->features & ~NETIF_F_LLTX; #endif eth_hw_addr_random(netdev); } static struct vport *internal_dev_create(const struct vport_parms *parms) { struct vport *vport; struct netdev_vport *netdev_vport; struct internal_dev *internal_dev; int err; vport = ovs_vport_alloc(sizeof(struct netdev_vport), &ovs_internal_vport_ops, parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); goto error; } netdev_vport = netdev_vport_priv(vport); netdev_vport->dev = alloc_netdev(sizeof(struct internal_dev), parms->name, do_setup); if (!netdev_vport->dev) { err = -ENOMEM; goto error_free_vport; } dev_net_set(netdev_vport->dev, ovs_dp_get_net(vport->dp)); internal_dev = internal_dev_priv(netdev_vport->dev); internal_dev->vport = vport; /* Restrict bridge port to current netns. */ if (vport->port_no == OVSP_LOCAL) netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL; rtnl_lock(); err = register_netdevice(netdev_vport->dev); if (err) goto error_free_netdev; dev_set_promiscuity(netdev_vport->dev, 1); rtnl_unlock(); netif_start_queue(netdev_vport->dev); return vport; error_free_netdev: rtnl_unlock(); free_netdev(netdev_vport->dev); error_free_vport: ovs_vport_free(vport); error: return ERR_PTR(err); } static void internal_dev_destroy(struct vport *vport) { struct netdev_vport *netdev_vport = netdev_vport_priv(vport); netif_stop_queue(netdev_vport->dev); rtnl_lock(); dev_set_promiscuity(netdev_vport->dev, -1); /* unregister_netdevice() waits for an RCU grace period. */ unregister_netdevice(netdev_vport->dev); rtnl_unlock(); } static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) { struct net_device *netdev = netdev_vport_priv(vport)->dev; int len; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) if (vlan_tx_tag_present(skb)) { if (unlikely(!__vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)))) return 0; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data + (2 * ETH_ALEN), VLAN_HLEN, 0)); vlan_set_tci(skb, 0); } #endif len = skb->len; skb_dst_drop(skb); nf_reset(skb); secpath_reset(skb); skb->dev = netdev; skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, netdev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); netif_rx(skb); return len; } const struct vport_ops ovs_internal_vport_ops = { .type = OVS_VPORT_TYPE_INTERNAL, .create = internal_dev_create, .destroy = internal_dev_destroy, .get_name = ovs_netdev_get_name, .send = internal_dev_recv, }; int ovs_is_internal_dev(const struct net_device *netdev) { return netdev->netdev_ops == &internal_dev_netdev_ops; } struct vport *ovs_internal_dev_get_vport(struct net_device *netdev) { if (!ovs_is_internal_dev(netdev)) return NULL; return internal_dev_priv(netdev)->vport; } openvswitch-2.0.1+git20140120/datapath/vport-internal_dev.h000066400000000000000000000017011226605124000232320ustar00rootroot00000000000000/* * Copyright (c) 2007-2011 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #ifndef VPORT_INTERNAL_DEV_H #define VPORT_INTERNAL_DEV_H 1 #include "datapath.h" #include "vport.h" int ovs_is_internal_dev(const struct net_device *); struct vport *ovs_internal_dev_get_vport(struct net_device *); #endif /* vport-internal_dev.h */ openvswitch-2.0.1+git20140120/datapath/vport-lisp.c000066400000000000000000000372551226605124000215370ustar00rootroot00000000000000/* * Copyright (c) 2011 Nicira, Inc. * Copyright (c) 2013 Cisco Systems, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include #include #include #include #include #include #include #include #include #include "datapath.h" #include "vport.h" /* * LISP encapsulation header: * * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |N|L|E|V|I|flags| Nonce/Map-Version | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Instance ID/Locator Status Bits | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * */ /** * struct lisphdr - LISP header * @nonce_present: Flag indicating the presence of a 24 bit nonce value. * @locator_status_bits_present: Flag indicating the presence of Locator Status * Bits (LSB). * @solicit_echo_nonce: Flag indicating the use of the echo noncing mechanism. * @map_version_present: Flag indicating the use of mapping versioning. * @instance_id_present: Flag indicating the presence of a 24 bit Instance ID. * @reserved_flags: 3 bits reserved for future flags. * @nonce: 24 bit nonce value. * @map_version: 24 bit mapping version. * @locator_status_bits: Locator Status Bits: 32 bits when instance_id_present * is not set, 8 bits when it is. * @instance_id: 24 bit Instance ID */ struct lisphdr { #ifdef __LITTLE_ENDIAN_BITFIELD __u8 reserved_flags:3; __u8 instance_id_present:1; __u8 map_version_present:1; __u8 solicit_echo_nonce:1; __u8 locator_status_bits_present:1; __u8 nonce_present:1; #else __u8 nonce_present:1; __u8 locator_status_bits_present:1; __u8 solicit_echo_nonce:1; __u8 map_version_present:1; __u8 instance_id_present:1; __u8 reserved_flags:3; #endif union { __u8 nonce[3]; __u8 map_version[3]; } u1; union { __be32 locator_status_bits; struct { __u8 instance_id[3]; __u8 locator_status_bits; } word2; } u2; }; #define LISP_HLEN (sizeof(struct udphdr) + sizeof(struct lisphdr)) /** * struct lisp_port - Keeps track of open UDP ports * @dst_port: lisp UDP port no. * @list: list element in @lisp_ports. * @lisp_rcv_socket: The socket created for this port number. * @name: vport name. */ struct lisp_port { __be16 dst_port; struct list_head list; struct socket *lisp_rcv_socket; char name[IFNAMSIZ]; }; static LIST_HEAD(lisp_ports); static inline struct lisp_port *lisp_vport(const struct vport *vport) { return vport_priv(vport); } static struct lisp_port *lisp_find_port(struct net *net, __be16 port) { struct lisp_port *lisp_port; list_for_each_entry_rcu(lisp_port, &lisp_ports, list) { if (lisp_port->dst_port == port && net_eq(sock_net(lisp_port->lisp_rcv_socket->sk), net)) return lisp_port; } return NULL; } static inline struct lisphdr *lisp_hdr(const struct sk_buff *skb) { return (struct lisphdr *)(udp_hdr(skb) + 1); } /* Convert 64 bit tunnel ID to 24 bit Instance ID. */ static void tunnel_id_to_instance_id(__be64 tun_id, __u8 *iid) { #ifdef __BIG_ENDIAN iid[0] = (__force __u8)(tun_id >> 16); iid[1] = (__force __u8)(tun_id >> 8); iid[2] = (__force __u8)tun_id; #else iid[0] = (__force __u8)((__force u64)tun_id >> 40); iid[1] = (__force __u8)((__force u64)tun_id >> 48); iid[2] = (__force __u8)((__force u64)tun_id >> 56); #endif } /* Convert 24 bit Instance ID to 64 bit tunnel ID. */ static __be64 instance_id_to_tunnel_id(__u8 *iid) { #ifdef __BIG_ENDIAN return (iid[0] << 16) | (iid[1] << 8) | iid[2]; #else return (__force __be64)(((__force u64)iid[0] << 40) | ((__force u64)iid[1] << 48) | ((__force u64)iid[2] << 56)); #endif } /* Compute source UDP port for outgoing packet. * Currently we use the flow hash. */ static u16 ovs_tnl_get_src_port(struct sk_buff *skb) { int low; int high; unsigned int range; struct sw_flow_key *pkt_key = OVS_CB(skb)->pkt_key; u32 hash = jhash2((const u32 *)pkt_key, sizeof(*pkt_key) / sizeof(u32), 0); inet_get_local_port_range(&low, &high); range = (high - low) + 1; return (((u64) hash * range) >> 32) + low; } static void lisp_build_header(const struct vport *vport, struct sk_buff *skb, int tunnel_hlen) { struct lisp_port *lisp_port = lisp_vport(vport); struct udphdr *udph = udp_hdr(skb); struct lisphdr *lisph = (struct lisphdr *)(udph + 1); const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; udph->dest = lisp_port->dst_port; udph->source = htons(ovs_tnl_get_src_port(skb)); udph->check = 0; udph->len = htons(skb->len - skb_transport_offset(skb)); lisph->nonce_present = 0; /* We don't support echo nonce algorithm */ lisph->locator_status_bits_present = 1; /* Set LSB */ lisph->solicit_echo_nonce = 0; /* No echo noncing */ lisph->map_version_present = 0; /* No mapping versioning, nonce instead */ lisph->instance_id_present = 1; /* Store the tun_id as Instance ID */ lisph->reserved_flags = 0; /* Reserved flags, set to 0 */ lisph->u1.nonce[0] = 0; lisph->u1.nonce[1] = 0; lisph->u1.nonce[2] = 0; tunnel_id_to_instance_id(tun_key->tun_id, &lisph->u2.word2.instance_id[0]); lisph->u2.word2.locator_status_bits = 1; } /** * ovs_tnl_rcv - ingress point for generic tunnel code * * @vport: port this packet was received on * @skb: received packet * @tun_key: tunnel that carried packet * * Must be called with rcu_read_lock. * * Packets received by this function are in the following state: * - skb->data points to the inner Ethernet header. * - The inner Ethernet header is in the linear data area. * - The layer pointers are undefined. */ static void ovs_tnl_rcv(struct vport *vport, struct sk_buff *skb, struct ovs_key_ipv4_tunnel *tun_key) { struct ethhdr *eh; skb_reset_mac_header(skb); eh = eth_hdr(skb); if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) skb->protocol = eh->h_proto; else skb->protocol = htons(ETH_P_802_2); skb_dst_drop(skb); nf_reset(skb); skb_clear_rxhash(skb); secpath_reset(skb); vlan_set_tci(skb, 0); ovs_vport_receive(vport, skb, tun_key); } /* Called with rcu_read_lock and BH disabled. */ static int lisp_rcv(struct sock *sk, struct sk_buff *skb) { struct lisp_port *lisp_port; struct lisphdr *lisph; struct iphdr *iph, *inner_iph; struct ovs_key_ipv4_tunnel tun_key; __be64 key; struct ethhdr *ethh; __be16 protocol; lisp_port = lisp_find_port(dev_net(skb->dev), udp_hdr(skb)->dest); if (unlikely(!lisp_port)) goto error; if (unlikely(!pskb_may_pull(skb, LISP_HLEN))) goto error; lisph = lisp_hdr(skb); skb_pull_rcsum(skb, LISP_HLEN); if (lisph->instance_id_present != 1) key = 0; else key = instance_id_to_tunnel_id(&lisph->u2.word2.instance_id[0]); /* Save outer tunnel values */ iph = ip_hdr(skb); ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY); /* Drop non-IP inner packets */ inner_iph = (struct iphdr *)(lisph + 1); switch (inner_iph->version) { case 4: protocol = htons(ETH_P_IP); break; case 6: protocol = htons(ETH_P_IPV6); break; default: goto error; } /* Add Ethernet header */ ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN); memset(ethh, 0, ETH_HLEN); ethh->h_dest[0] = 0x02; ethh->h_source[0] = 0x02; ethh->h_proto = protocol; ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); ovs_tnl_rcv(vport_from_priv(lisp_port), skb, &tun_key); goto out; error: kfree_skb(skb); out: return 0; } /* Arbitrary value. Irrelevant as long as it's not 0 since we set the handler. */ #define UDP_ENCAP_LISP 1 static int lisp_socket_init(struct lisp_port *lisp_port, struct net *net) { struct sockaddr_in sin; int err; err = sock_create_kern(AF_INET, SOCK_DGRAM, 0, &lisp_port->lisp_rcv_socket); if (err) goto error; /* release net ref. */ sk_change_net(lisp_port->lisp_rcv_socket->sk, net); sin.sin_family = AF_INET; sin.sin_addr.s_addr = htonl(INADDR_ANY); sin.sin_port = lisp_port->dst_port; err = kernel_bind(lisp_port->lisp_rcv_socket, (struct sockaddr *)&sin, sizeof(struct sockaddr_in)); if (err) goto error_sock; udp_sk(lisp_port->lisp_rcv_socket->sk)->encap_type = UDP_ENCAP_LISP; udp_sk(lisp_port->lisp_rcv_socket->sk)->encap_rcv = lisp_rcv; udp_encap_enable(); return 0; error_sock: sk_release_kernel(lisp_port->lisp_rcv_socket->sk); error: pr_warn("cannot register lisp protocol handler: %d\n", err); return err; } static int lisp_get_options(const struct vport *vport, struct sk_buff *skb) { struct lisp_port *lisp_port = lisp_vport(vport); if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(lisp_port->dst_port))) return -EMSGSIZE; return 0; } static void lisp_tnl_destroy(struct vport *vport) { struct lisp_port *lisp_port = lisp_vport(vport); list_del_rcu(&lisp_port->list); /* Release socket */ sk_release_kernel(lisp_port->lisp_rcv_socket->sk); ovs_vport_deferred_free(vport); } static struct vport *lisp_tnl_create(const struct vport_parms *parms) { struct net *net = ovs_dp_get_net(parms->dp); struct nlattr *options = parms->options; struct lisp_port *lisp_port; struct vport *vport; struct nlattr *a; int err; u16 dst_port; if (!options) { err = -EINVAL; goto error; } a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT); if (a && nla_len(a) == sizeof(u16)) { dst_port = nla_get_u16(a); } else { /* Require destination port from userspace. */ err = -EINVAL; goto error; } /* Verify if we already have a socket created for this port */ if (lisp_find_port(net, htons(dst_port))) { err = -EEXIST; goto error; } vport = ovs_vport_alloc(sizeof(struct lisp_port), &ovs_lisp_vport_ops, parms); if (IS_ERR(vport)) return vport; lisp_port = lisp_vport(vport); lisp_port->dst_port = htons(dst_port); strncpy(lisp_port->name, parms->name, IFNAMSIZ); err = lisp_socket_init(lisp_port, net); if (err) goto error_free; list_add_tail_rcu(&lisp_port->list, &lisp_ports); return vport; error_free: ovs_vport_free(vport); error: return ERR_PTR(err); } static bool need_linearize(const struct sk_buff *skb) { int i; if (unlikely(skb_shinfo(skb)->frag_list)) return true; /* * Generally speaking we should linearize if there are paged frags. * However, if all of the refcounts are 1 we know nobody else can * change them from underneath us and we can skip the linearization. */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) if (unlikely(page_count(skb_frag_page(&skb_shinfo(skb)->frags[i])) > 1)) return true; return false; } static struct sk_buff *handle_offloads(struct sk_buff *skb) { int err; if (skb_is_gso(skb)) { struct sk_buff *nskb; char cb[sizeof(skb->cb)]; memcpy(cb, skb->cb, sizeof(cb)); nskb = __skb_gso_segment(skb, 0, false); if (IS_ERR(nskb)) { err = PTR_ERR(nskb); goto error; } consume_skb(skb); skb = nskb; while (nskb) { memcpy(nskb->cb, cb, sizeof(cb)); nskb = nskb->next; } } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* Pages aren't locked and could change at any time. * If this happens after we compute the checksum, the * checksum will be wrong. We linearize now to avoid * this problem. */ if (unlikely(need_linearize(skb))) { err = __skb_linearize(skb); if (unlikely(err)) goto error; } err = skb_checksum_help(skb); if (unlikely(err)) goto error; } skb->ip_summed = CHECKSUM_NONE; return skb; error: return ERR_PTR(err); } static int ovs_tnl_send(struct vport *vport, struct sk_buff *skb, u8 ipproto, int tunnel_hlen, void (*build_header)(const struct vport *, struct sk_buff *, int tunnel_hlen)) { int min_headroom; struct rtable *rt; __be32 saddr; int sent_len = 0; int err; struct sk_buff *nskb; /* Route lookup */ saddr = OVS_CB(skb)->tun_key->ipv4_src; rt = find_route(ovs_dp_get_net(vport->dp), &saddr, OVS_CB(skb)->tun_key->ipv4_dst, ipproto, OVS_CB(skb)->tun_key->ipv4_tos, skb->mark); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto error; } tunnel_hlen += sizeof(struct iphdr); min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len + tunnel_hlen + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { int head_delta = SKB_DATA_ALIGN(min_headroom - skb_headroom(skb) + 16); err = pskb_expand_head(skb, max_t(int, head_delta, 0), 0, GFP_ATOMIC); if (unlikely(err)) goto err_free_rt; } /* Offloading */ nskb = handle_offloads(skb); if (IS_ERR(nskb)) { err = PTR_ERR(nskb); goto err_free_rt; } skb = nskb; /* Reset SKB */ nf_reset(skb); secpath_reset(skb); skb_dst_drop(skb); skb_clear_rxhash(skb); while (skb) { struct sk_buff *next_skb = skb->next; struct iphdr *iph; int frag_len; skb->next = NULL; if (vlan_tx_tag_present(skb)) { if (unlikely(!__vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)))) goto next; vlan_set_tci(skb, 0); } frag_len = skb->len; skb_push(skb, tunnel_hlen); skb_reset_network_header(skb); skb_set_transport_header(skb, sizeof(struct iphdr)); if (next_skb) skb_dst_set(skb, dst_clone(&rt_dst(rt))); else skb_dst_set(skb, &rt_dst(rt)); /* Push Tunnel header. */ build_header(vport, skb, tunnel_hlen); /* Push IP header. */ iph = ip_hdr(skb); iph->version = 4; iph->ihl = sizeof(struct iphdr) >> 2; iph->protocol = ipproto; iph->daddr = OVS_CB(skb)->tun_key->ipv4_dst; iph->saddr = saddr; iph->tos = OVS_CB(skb)->tun_key->ipv4_tos; iph->ttl = OVS_CB(skb)->tun_key->ipv4_ttl; iph->frag_off = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; /* * Allow our local IP stack to fragment the outer packet even * if the DF bit is set as a last resort. We also need to * force selection of an IP ID here with __ip_select_ident(), * as ip_select_ident() assumes a proper ID is not needed when * when the DF bit is set. */ skb->local_df = 1; __ip_select_ident(iph, skb_dst(skb), 0); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); err = ip_local_out(skb); if (unlikely(net_xmit_eval(err))) goto next; sent_len += frag_len; next: skb = next_skb; } return sent_len; err_free_rt: ip_rt_put(rt); error: return err; } static int lisp_tnl_send(struct vport *vport, struct sk_buff *skb) { int tnl_len; int network_offset = skb_network_offset(skb); if (unlikely(!OVS_CB(skb)->tun_key)) return -EINVAL; /* We only encapsulate IPv4 and IPv6 packets */ switch (skb->protocol) { case htons(ETH_P_IP): case htons(ETH_P_IPV6): /* Pop off "inner" Ethernet header */ skb_pull(skb, network_offset); tnl_len = ovs_tnl_send(vport, skb, IPPROTO_UDP, LISP_HLEN, lisp_build_header); return tnl_len > 0 ? tnl_len + network_offset : tnl_len; default: kfree_skb(skb); return 0; } } static const char *lisp_get_name(const struct vport *vport) { struct lisp_port *lisp_port = lisp_vport(vport); return lisp_port->name; } const struct vport_ops ovs_lisp_vport_ops = { .type = OVS_VPORT_TYPE_LISP, .create = lisp_tnl_create, .destroy = lisp_tnl_destroy, .get_name = lisp_get_name, .get_options = lisp_get_options, .send = lisp_tnl_send, }; openvswitch-2.0.1+git20140120/datapath/vport-netdev.c000066400000000000000000000237371226605124000220550ustar00rootroot00000000000000/* * Copyright (c) 2007-2012 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include #include #include #include #include #include #include #include "datapath.h" #include "vlan.h" #include "vport-internal_dev.h" #include "vport-netdev.h" #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) && \ !defined(HAVE_VLAN_BUG_WORKAROUND) #include static int vlan_tso __read_mostly; module_param(vlan_tso, int, 0644); MODULE_PARM_DESC(vlan_tso, "Enable TSO for VLAN packets"); #else #define vlan_tso true #endif static void netdev_port_receive(struct vport *vport, struct sk_buff *skb); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,39) /* Called with rcu_read_lock and bottom-halves disabled. */ static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct vport *vport; if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) return RX_HANDLER_PASS; vport = ovs_netdev_get_vport(skb->dev); netdev_port_receive(vport, skb); return RX_HANDLER_CONSUMED; } #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36) || \ defined HAVE_RHEL_OVS_HOOK /* Called with rcu_read_lock and bottom-halves disabled. */ static struct sk_buff *netdev_frame_hook(struct sk_buff *skb) { struct vport *vport; if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) return skb; vport = ovs_netdev_get_vport(skb->dev); netdev_port_receive(vport, skb); return NULL; } #elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,32) /* * Used as br_handle_frame_hook. (Cannot run bridge at the same time, even on * different set of devices!) */ /* Called with rcu_read_lock and bottom-halves disabled. */ static struct sk_buff *netdev_frame_hook(struct net_bridge_port *p, struct sk_buff *skb) { netdev_port_receive((struct vport *)p, skb); return NULL; } #else #error #endif #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36) || \ defined HAVE_RHEL_OVS_HOOK static int netdev_init(void) { return 0; } static void netdev_exit(void) { } #else static int port_count; static void netdev_init(void) { port_count++; if (port_count > 1) return; /* Hook into callback used by the bridge to intercept packets. * Parasites we are. */ br_handle_frame_hook = netdev_frame_hook; return; } static void netdev_exit(void) { port_count--; if (port_count > 0) return; br_handle_frame_hook = NULL; } #endif static struct net_device *get_dpdev(struct datapath *dp) { struct vport *local; local = ovs_vport_ovsl(dp, OVSP_LOCAL); BUG_ON(!local); return netdev_vport_priv(local)->dev; } static struct vport *netdev_create(const struct vport_parms *parms) { struct vport *vport; struct netdev_vport *netdev_vport; int err; vport = ovs_vport_alloc(sizeof(struct netdev_vport), &ovs_netdev_vport_ops, parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); goto error; } netdev_vport = netdev_vport_priv(vport); netdev_vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name); if (!netdev_vport->dev) { err = -ENODEV; goto error_free_vport; } if (netdev_vport->dev->flags & IFF_LOOPBACK || netdev_vport->dev->type != ARPHRD_ETHER || ovs_is_internal_dev(netdev_vport->dev)) { err = -EINVAL; goto error_put; } rtnl_lock(); err = netdev_master_upper_dev_link(netdev_vport->dev, get_dpdev(vport->dp)); if (err) goto error_unlock; err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook, vport); if (err) goto error_master_upper_dev_unlink; dev_set_promiscuity(netdev_vport->dev, 1); netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH; rtnl_unlock(); netdev_init(); return vport; error_master_upper_dev_unlink: netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp)); error_unlock: rtnl_unlock(); error_put: dev_put(netdev_vport->dev); error_free_vport: ovs_vport_free(vport); error: return ERR_PTR(err); } static void free_port_rcu(struct rcu_head *rcu) { struct netdev_vport *netdev_vport = container_of(rcu, struct netdev_vport, rcu); dev_put(netdev_vport->dev); ovs_vport_free(vport_from_priv(netdev_vport)); } void ovs_netdev_detach_dev(struct vport *vport) { struct netdev_vport *netdev_vport = netdev_vport_priv(vport); ASSERT_RTNL(); netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; netdev_rx_handler_unregister(netdev_vport->dev); netdev_upper_dev_unlink(netdev_vport->dev, netdev_master_upper_dev_get(netdev_vport->dev)); dev_set_promiscuity(netdev_vport->dev, -1); } static void netdev_destroy(struct vport *vport) { struct netdev_vport *netdev_vport = netdev_vport_priv(vport); netdev_exit(); rtnl_lock(); if (ovs_netdev_get_vport(netdev_vport->dev)) ovs_netdev_detach_dev(vport); rtnl_unlock(); call_rcu(&netdev_vport->rcu, free_port_rcu); } const char *ovs_netdev_get_name(const struct vport *vport) { const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); return netdev_vport->dev->name; } /* Must be called with rcu_read_lock. */ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) { if (unlikely(!vport)) goto error; if (unlikely(skb_warn_if_lro(skb))) goto error; /* Make our own copy of the packet. Otherwise we will mangle the * packet for anyone who came before us (e.g. tcpdump via AF_PACKET). * (No one comes after us, since we tell handle_bridge() that we took * the packet.) */ skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) return; skb_push(skb, ETH_HLEN); ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); ovs_vport_receive(vport, skb, NULL); return; error: kfree_skb(skb); } static unsigned int packet_length(const struct sk_buff *skb) { unsigned int length = skb->len - ETH_HLEN; if (skb->protocol == htons(ETH_P_8021Q)) length -= VLAN_HLEN; return length; } static bool dev_supports_vlan_tx(struct net_device *dev) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37) /* Software fallback means every device supports vlan_tci on TX. */ return true; #elif defined(HAVE_VLAN_BUG_WORKAROUND) return dev->features & NETIF_F_HW_VLAN_TX; #else /* Assume that the driver is buggy. */ return false; #endif } static int netdev_send(struct vport *vport, struct sk_buff *skb) { struct netdev_vport *netdev_vport = netdev_vport_priv(vport); int mtu = netdev_vport->dev->mtu; int len; if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", netdev_vport->dev->name, packet_length(skb), mtu); goto drop; } skb->dev = netdev_vport->dev; if (vlan_tx_tag_present(skb) && !dev_supports_vlan_tx(skb->dev)) { int features; features = netif_skb_features(skb); if (!vlan_tso) features &= ~(NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_UFO | NETIF_F_FSO); if (netif_needs_gso(skb, features)) { struct sk_buff *nskb; nskb = skb_gso_segment(skb, features); if (!nskb) { if (unlikely(skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) goto drop; skb_shinfo(skb)->gso_type &= ~SKB_GSO_DODGY; goto tag; } if (IS_ERR(nskb)) goto drop; consume_skb(skb); skb = nskb; len = 0; do { nskb = skb->next; skb->next = NULL; skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)); if (likely(skb)) { len += skb->len; vlan_set_tci(skb, 0); dev_queue_xmit(skb); } skb = nskb; } while (skb); return len; } tag: skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)); if (unlikely(!skb)) return 0; vlan_set_tci(skb, 0); } len = skb->len; dev_queue_xmit(skb); return len; drop: kfree_skb(skb); return 0; } /* Returns null if this device is not attached to a datapath. */ struct vport *ovs_netdev_get_vport(struct net_device *dev) { #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36) || \ defined HAVE_RHEL_OVS_HOOK #if IFF_OVS_DATAPATH != 0 if (likely(dev->priv_flags & IFF_OVS_DATAPATH)) #else if (likely(rcu_access_pointer(dev->rx_handler) == netdev_frame_hook)) #endif #ifdef HAVE_RHEL_OVS_HOOK return (struct vport *)rcu_dereference_rtnl(dev->ax25_ptr); #else return (struct vport *)rcu_dereference_rtnl(dev->rx_handler_data); #endif else return NULL; #else return (struct vport *)rcu_dereference_rtnl(dev->br_port); #endif } const struct vport_ops ovs_netdev_vport_ops = { .type = OVS_VPORT_TYPE_NETDEV, .create = netdev_create, .destroy = netdev_destroy, .get_name = ovs_netdev_get_name, .send = netdev_send, }; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) && \ !defined HAVE_RHEL_OVS_HOOK /* * Enforces, mutual exclusion with the Linux bridge module, by declaring and * exporting br_should_route_hook. Because the bridge module also exports the * same symbol, the module loader will refuse to load both modules at the same * time (e.g. "bridge: exports duplicate symbol br_should_route_hook (owned by * openvswitch)"). * * Before Linux 2.6.36, Open vSwitch cannot safely coexist with the Linux * bridge module, so openvswitch uses this macro in those versions. In * Linux 2.6.36 and later, Open vSwitch can coexist with the bridge module. * * The use of "typeof" here avoids the need to track changes in the type of * br_should_route_hook over various kernel versions. */ typeof(br_should_route_hook) br_should_route_hook; EXPORT_SYMBOL(br_should_route_hook); #endif openvswitch-2.0.1+git20140120/datapath/vport-netdev.h000066400000000000000000000022731226605124000220520ustar00rootroot00000000000000/* * Copyright (c) 2007-2011 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #ifndef VPORT_NETDEV_H #define VPORT_NETDEV_H 1 #include #include #include "vport.h" struct vport *ovs_netdev_get_vport(struct net_device *dev); struct netdev_vport { struct rcu_head rcu; struct net_device *dev; }; static inline struct netdev_vport * netdev_vport_priv(const struct vport *vport) { return vport_priv(vport); } const char *ovs_netdev_get_name(const struct vport *); void ovs_netdev_detach_dev(struct vport *); #endif /* vport_netdev.h */ openvswitch-2.0.1+git20140120/datapath/vport-vxlan.c000066400000000000000000000116641226605124000217140ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * Copyright (c) 2013 Cisco Systems, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "datapath.h" #include "vport.h" /** * struct vxlan_port - Keeps track of open UDP ports * @vs: vxlan_sock created for the port. * @name: vport name. */ struct vxlan_port { struct vxlan_sock *vs; char name[IFNAMSIZ]; }; static inline struct vxlan_port *vxlan_vport(const struct vport *vport) { return vport_priv(vport); } /* Called with rcu_read_lock and BH disabled. */ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) { struct ovs_key_ipv4_tunnel tun_key; struct vport *vport = vs->data; struct iphdr *iph; __be64 key; /* Save outer tunnel values */ iph = ip_hdr(skb); key = cpu_to_be64(ntohl(vx_vni) >> 8); ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY); ovs_vport_receive(vport, skb, &tun_key); } static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) { struct vxlan_port *vxlan_port = vxlan_vport(vport); __be16 dst_port = inet_sport(vxlan_port->vs->sock->sk); if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port))) return -EMSGSIZE; return 0; } static void vxlan_tnl_destroy(struct vport *vport) { struct vxlan_port *vxlan_port = vxlan_vport(vport); vxlan_sock_release(vxlan_port->vs); ovs_vport_deferred_free(vport); } static struct vport *vxlan_tnl_create(const struct vport_parms *parms) { struct net *net = ovs_dp_get_net(parms->dp); struct nlattr *options = parms->options; struct vxlan_port *vxlan_port; struct vxlan_sock *vs; struct vport *vport; struct nlattr *a; u16 dst_port; int err; if (!options) { err = -EINVAL; goto error; } a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT); if (a && nla_len(a) == sizeof(u16)) { dst_port = nla_get_u16(a); } else { /* Require destination port from userspace. */ err = -EINVAL; goto error; } vport = ovs_vport_alloc(sizeof(struct vxlan_port), &ovs_vxlan_vport_ops, parms); if (IS_ERR(vport)) return vport; vxlan_port = vxlan_vport(vport); strncpy(vxlan_port->name, parms->name, IFNAMSIZ); vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true); if (IS_ERR(vs)) { ovs_vport_free(vport); return (void *)vs; } vxlan_port->vs = vs; return vport; error: return ERR_PTR(err); } static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) { struct vxlan_port *vxlan_port = vxlan_vport(vport); __be16 dst_port = inet_sport(vxlan_port->vs->sock->sk); struct net *net = ovs_dp_get_net(vport->dp); struct rtable *rt; __be16 src_port; __be32 saddr; __be16 df; int port_min; int port_max; int err; if (unlikely(!OVS_CB(skb)->tun_key)) { err = -EINVAL; goto error; } /* Route lookup */ saddr = OVS_CB(skb)->tun_key->ipv4_src; rt = find_route(ovs_dp_get_net(vport->dp), &saddr, OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_UDP, OVS_CB(skb)->tun_key->ipv4_tos, skb->mark); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto error; } df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; skb->local_df = 1; inet_get_local_port_range(&port_min, &port_max); src_port = vxlan_src_port(port_min, port_max, skb); err = vxlan_xmit_skb(net, vxlan_port->vs, rt, skb, saddr, OVS_CB(skb)->tun_key->ipv4_dst, OVS_CB(skb)->tun_key->ipv4_tos, OVS_CB(skb)->tun_key->ipv4_ttl, df, src_port, dst_port, htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8)); if (err < 0) ip_rt_put(rt); error: return err; } static const char *vxlan_get_name(const struct vport *vport) { struct vxlan_port *vxlan_port = vxlan_vport(vport); return vxlan_port->name; } const struct vport_ops ovs_vxlan_vport_ops = { .type = OVS_VPORT_TYPE_VXLAN, .create = vxlan_tnl_create, .destroy = vxlan_tnl_destroy, .get_name = vxlan_get_name, .get_options = vxlan_get_options, .send = vxlan_tnl_send, }; openvswitch-2.0.1+git20140120/datapath/vport.c000066400000000000000000000267551226605124000205750ustar00rootroot00000000000000/* * Copyright (c) 2007-2012 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "datapath.h" #include "vport.h" #include "vport-internal_dev.h" /* List of statically compiled vport implementations. Don't forget to also * add yours to the list at the bottom of vport.h. */ static const struct vport_ops *vport_ops_list[] = { &ovs_netdev_vport_ops, &ovs_internal_vport_ops, #if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX) &ovs_gre_vport_ops, &ovs_gre64_vport_ops, #endif &ovs_vxlan_vport_ops, &ovs_lisp_vport_ops, }; /* Protected by RCU read lock for reading, ovs_mutex for writing. */ static struct hlist_head *dev_table; #define VPORT_HASH_BUCKETS 1024 /** * ovs_vport_init - initialize vport subsystem * * Called at module load time to initialize the vport subsystem. */ int ovs_vport_init(void) { dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head), GFP_KERNEL); if (!dev_table) return -ENOMEM; return 0; } /** * ovs_vport_exit - shutdown vport subsystem * * Called at module exit time to shutdown the vport subsystem. */ void ovs_vport_exit(void) { kfree(dev_table); } static struct hlist_head *hash_bucket(struct net *net, const char *name) { unsigned int hash = jhash(name, strlen(name), (unsigned long) net); return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)]; } /** * ovs_vport_locate - find a port that has already been created * * @name: name of port to find * * Must be called with ovs or RCU read lock. */ struct vport *ovs_vport_locate(struct net *net, const char *name) { struct hlist_head *bucket = hash_bucket(net, name); struct vport *vport; hlist_for_each_entry_rcu(vport, bucket, hash_node) if (!strcmp(name, vport->ops->get_name(vport)) && net_eq(ovs_dp_get_net(vport->dp), net)) return vport; return NULL; } /** * ovs_vport_alloc - allocate and initialize new vport * * @priv_size: Size of private data area to allocate. * @ops: vport device ops * * Allocate and initialize a new vport defined by @ops. The vport will contain * a private data area of size @priv_size that can be accessed using * vport_priv(). vports that are no longer needed should be released with * ovs_vport_free(). */ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, const struct vport_parms *parms) { struct vport *vport; size_t alloc_size; alloc_size = sizeof(struct vport); if (priv_size) { alloc_size = ALIGN(alloc_size, VPORT_ALIGN); alloc_size += priv_size; } vport = kzalloc(alloc_size, GFP_KERNEL); if (!vport) return ERR_PTR(-ENOMEM); vport->dp = parms->dp; vport->port_no = parms->port_no; vport->upcall_portid = parms->upcall_portid; vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); vport->percpu_stats = alloc_percpu(struct pcpu_tstats); if (!vport->percpu_stats) { kfree(vport); return ERR_PTR(-ENOMEM); } spin_lock_init(&vport->stats_lock); return vport; } /** * ovs_vport_free - uninitialize and free vport * * @vport: vport to free * * Frees a vport allocated with ovs_vport_alloc() when it is no longer needed. * * The caller must ensure that an RCU grace period has passed since the last * time @vport was in a datapath. */ void ovs_vport_free(struct vport *vport) { free_percpu(vport->percpu_stats); kfree(vport); } /** * ovs_vport_add - add vport device (for kernel callers) * * @parms: Information about new vport. * * Creates a new vport with the specified configuration (which is dependent on * device type). ovs_mutex must be held. */ struct vport *ovs_vport_add(const struct vport_parms *parms) { struct vport *vport; int err = 0; int i; for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) { if (vport_ops_list[i]->type == parms->type) { struct hlist_head *bucket; vport = vport_ops_list[i]->create(parms); if (IS_ERR(vport)) { err = PTR_ERR(vport); goto out; } bucket = hash_bucket(ovs_dp_get_net(vport->dp), vport->ops->get_name(vport)); hlist_add_head_rcu(&vport->hash_node, bucket); return vport; } } err = -EAFNOSUPPORT; out: return ERR_PTR(err); } /** * ovs_vport_set_options - modify existing vport device (for kernel callers) * * @vport: vport to modify. * @options: New configuration. * * Modifies an existing device with the specified configuration (which is * dependent on device type). ovs_mutex must be held. */ int ovs_vport_set_options(struct vport *vport, struct nlattr *options) { if (!vport->ops->set_options) return -EOPNOTSUPP; return vport->ops->set_options(vport, options); } /** * ovs_vport_del - delete existing vport device * * @vport: vport to delete. * * Detaches @vport from its datapath and destroys it. It is possible to fail * for reasons such as lack of memory. ovs_mutex must be held. */ void ovs_vport_del(struct vport *vport) { ASSERT_OVSL(); hlist_del_rcu(&vport->hash_node); vport->ops->destroy(vport); } /** * ovs_vport_set_stats - sets offset device stats * * @vport: vport on which to set stats * @stats: stats to set * * Provides a set of transmit, receive, and error stats to be added as an * offset to the collect data when stats are retreived. Some devices may not * support setting the stats, in which case the result will always be * -EOPNOTSUPP. * * Must be called with ovs_mutex. */ void ovs_vport_set_stats(struct vport *vport, struct ovs_vport_stats *stats) { spin_lock_bh(&vport->stats_lock); vport->offset_stats = *stats; spin_unlock_bh(&vport->stats_lock); } /** * ovs_vport_get_stats - retrieve device stats * * @vport: vport from which to retrieve the stats * @stats: location to store stats * * Retrieves transmit, receive, and error stats for the given device. * * Must be called with ovs_mutex or rcu_read_lock. */ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) { int i; /* We potentially have 3 sources of stats that need to be * combined: those we have collected (split into err_stats and * percpu_stats), offset_stats from set_stats(), and device * error stats from netdev->get_stats() (for errors that happen * downstream and therefore aren't reported through our * vport_record_error() function). * Stats from first two sources are merged and reported by ovs over * OVS_VPORT_ATTR_STATS. * netdev-stats can be directly read over netlink-ioctl. */ spin_lock_bh(&vport->stats_lock); *stats = vport->offset_stats; stats->rx_errors += vport->err_stats.rx_errors; stats->tx_errors += vport->err_stats.tx_errors; stats->tx_dropped += vport->err_stats.tx_dropped; stats->rx_dropped += vport->err_stats.rx_dropped; spin_unlock_bh(&vport->stats_lock); for_each_possible_cpu(i) { const struct pcpu_tstats *percpu_stats; struct pcpu_tstats local_stats; unsigned int start; percpu_stats = per_cpu_ptr(vport->percpu_stats, i); do { start = u64_stats_fetch_begin_bh(&percpu_stats->syncp); local_stats = *percpu_stats; } while (u64_stats_fetch_retry_bh(&percpu_stats->syncp, start)); stats->rx_bytes += local_stats.rx_bytes; stats->rx_packets += local_stats.rx_packets; stats->tx_bytes += local_stats.tx_bytes; stats->tx_packets += local_stats.tx_packets; } } /** * ovs_vport_get_options - retrieve device options * * @vport: vport from which to retrieve the options. * @skb: sk_buff where options should be appended. * * Retrieves the configuration of the given device, appending an * %OVS_VPORT_ATTR_OPTIONS attribute that in turn contains nested * vport-specific attributes to @skb. * * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room, or another * negative error code if a real error occurred. If an error occurs, @skb is * left unmodified. * * Must be called with ovs_mutex or rcu_read_lock. */ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) { struct nlattr *nla; int err; if (!vport->ops->get_options) return 0; nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS); if (!nla) return -EMSGSIZE; err = vport->ops->get_options(vport, skb); if (err) { nla_nest_cancel(skb, nla); return err; } nla_nest_end(skb, nla); return 0; } /** * ovs_vport_receive - pass up received packet to the datapath for processing * * @vport: vport that received the packet * @skb: skb that was received * @tun_key: tunnel (if any) that carried packet * * Must be called with rcu_read_lock. The packet cannot be shared and * skb->data should point to the Ethernet header. The caller must have already * called compute_ip_summed() to initialize the checksumming fields. */ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, struct ovs_key_ipv4_tunnel *tun_key) { struct pcpu_tstats *stats; stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->syncp); stats->rx_packets++; stats->rx_bytes += skb->len; u64_stats_update_end(&stats->syncp); OVS_CB(skb)->tun_key = tun_key; ovs_dp_process_received_packet(vport, skb); } /** * ovs_vport_send - send a packet on a device * * @vport: vport on which to send the packet * @skb: skb to send * * Sends the given packet and returns the length of data sent. Either ovs * lock or rcu_read_lock must be held. */ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) { int sent = vport->ops->send(vport, skb); if (likely(sent > 0)) { struct pcpu_tstats *stats; stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->syncp); stats->tx_packets++; stats->tx_bytes += sent; u64_stats_update_end(&stats->syncp); } else if (sent < 0) { ovs_vport_record_error(vport, VPORT_E_TX_ERROR); kfree_skb(skb); } else ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); return sent; } /** * ovs_vport_record_error - indicate device error to generic stats layer * * @vport: vport that encountered the error * @err_type: one of enum vport_err_type types to indicate the error type * * If using the vport generic stats layer indicate that an error of the given * type has occurred. */ void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) { spin_lock(&vport->stats_lock); switch (err_type) { case VPORT_E_RX_DROPPED: vport->err_stats.rx_dropped++; break; case VPORT_E_RX_ERROR: vport->err_stats.rx_errors++; break; case VPORT_E_TX_DROPPED: vport->err_stats.tx_dropped++; break; case VPORT_E_TX_ERROR: vport->err_stats.tx_errors++; break; } spin_unlock(&vport->stats_lock); } static void free_vport_rcu(struct rcu_head *rcu) { struct vport *vport = container_of(rcu, struct vport, rcu); ovs_vport_free(vport); } void ovs_vport_deferred_free(struct vport *vport) { if (!vport) return; call_rcu(&vport->rcu, free_vport_rcu); } openvswitch-2.0.1+git20140120/datapath/vport.h000066400000000000000000000153041226605124000205660ustar00rootroot00000000000000/* * Copyright (c) 2007-2012 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA */ #ifndef VPORT_H #define VPORT_H 1 #include #include #include #include #include #include #include struct vport; struct vport_parms; /* The following definitions are for users of the vport subsytem: */ struct vport_net { struct vport __rcu *gre_vport; struct vport __rcu *gre64_vport; }; int ovs_vport_init(void); void ovs_vport_exit(void); struct vport *ovs_vport_add(const struct vport_parms *); void ovs_vport_del(struct vport *); struct vport *ovs_vport_locate(struct net *net, const char *name); void ovs_vport_set_stats(struct vport *, struct ovs_vport_stats *); void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *); int ovs_vport_set_options(struct vport *, struct nlattr *options); int ovs_vport_get_options(const struct vport *, struct sk_buff *); int ovs_vport_send(struct vport *, struct sk_buff *); /* The following definitions are for implementers of vport devices: */ struct vport_err_stats { u64 rx_dropped; u64 rx_errors; u64 tx_dropped; u64 tx_errors; }; /** * struct vport - one port within a datapath * @rcu: RCU callback head for deferred destruction. * @dp: Datapath to which this port belongs. * @upcall_portid: The Netlink port to use for packets received on this port that * miss the flow table. * @port_no: Index into @dp's @ports array. * @hash_node: Element in @dev_table hash table in vport.c. * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. * @ops: Class structure. * @percpu_stats: Points to per-CPU statistics used and maintained by vport * @stats_lock: Protects @err_stats and @offset_stats. * @err_stats: Points to error statistics used and maintained by vport * @offset_stats: Added to actual statistics as a sop to compatibility with * XAPI for Citrix XenServer. Deprecated. */ struct vport { struct rcu_head rcu; struct datapath *dp; u32 upcall_portid; u16 port_no; struct hlist_node hash_node; struct hlist_node dp_hash_node; const struct vport_ops *ops; struct pcpu_tstats __percpu *percpu_stats; spinlock_t stats_lock; struct vport_err_stats err_stats; struct ovs_vport_stats offset_stats; }; /** * struct vport_parms - parameters for creating a new vport * * @name: New vport's name. * @type: New vport's type. * @options: %OVS_VPORT_ATTR_OPTIONS attribute from Netlink message, %NULL if * none was supplied. * @dp: New vport's datapath. * @port_no: New vport's port number. */ struct vport_parms { const char *name; enum ovs_vport_type type; struct nlattr *options; /* For ovs_vport_alloc(). */ struct datapath *dp; u16 port_no; u32 upcall_portid; }; /** * struct vport_ops - definition of a type of virtual port * * @type: %OVS_VPORT_TYPE_* value for this type of virtual port. * @create: Create a new vport configured as specified. On success returns * a new vport allocated with ovs_vport_alloc(), otherwise an ERR_PTR() value. * @destroy: Destroys a vport. Must call vport_free() on the vport but not * before an RCU grace period has elapsed. * @set_options: Modify the configuration of an existing vport. May be %NULL * if modification is not supported. * @get_options: Appends vport-specific attributes for the configuration of an * existing vport to a &struct sk_buff. May be %NULL for a vport that does not * have any configuration. * @get_name: Get the device's name. * @send: Send a packet on the device. Returns the length of the packet sent, * zero for dropped packets or negative for error. */ struct vport_ops { enum ovs_vport_type type; /* Called with ovs_mutex. */ struct vport *(*create)(const struct vport_parms *); void (*destroy)(struct vport *); int (*set_options)(struct vport *, struct nlattr *); int (*get_options)(const struct vport *, struct sk_buff *); /* Called with rcu_read_lock or ovs_mutex. */ const char *(*get_name)(const struct vport *); int (*send)(struct vport *, struct sk_buff *); }; enum vport_err_type { VPORT_E_RX_DROPPED, VPORT_E_RX_ERROR, VPORT_E_TX_DROPPED, VPORT_E_TX_ERROR, }; struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *, const struct vport_parms *); void ovs_vport_free(struct vport *); void ovs_vport_deferred_free(struct vport *vport); #define VPORT_ALIGN 8 /** * vport_priv - access private data area of vport * * @vport: vport to access * * If a nonzero size was passed in priv_size of vport_alloc() a private data * area was allocated on creation. This allows that area to be accessed and * used for any purpose needed by the vport implementer. */ static inline void *vport_priv(const struct vport *vport) { return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN); } /** * vport_from_priv - lookup vport from private data pointer * * @priv: Start of private data area. * * It is sometimes useful to translate from a pointer to the private data * area to the vport, such as in the case where the private data pointer is * the result of a hash table lookup. @priv must point to the start of the * private data area. */ static inline struct vport *vport_from_priv(const void *priv) { return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); } void ovs_vport_receive(struct vport *, struct sk_buff *, struct ovs_key_ipv4_tunnel *); void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); /* List of statically compiled vport implementations. Don't forget to also * add yours to the list at the top of vport.c. */ extern const struct vport_ops ovs_netdev_vport_ops; extern const struct vport_ops ovs_internal_vport_ops; extern const struct vport_ops ovs_gre_vport_ops; extern const struct vport_ops ovs_gre64_vport_ops; extern const struct vport_ops ovs_vxlan_vport_ops; extern const struct vport_ops ovs_lisp_vport_ops; static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); } #endif /* vport.h */ openvswitch-2.0.1+git20140120/debian/000077500000000000000000000000001226605124000166745ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/debian/.gitignore000066400000000000000000000006011226605124000206610ustar00rootroot00000000000000*.debhelper *.debhelper.log *.substvars /control /copyright /files /nicira-switch /openvswitch /openvswitch-common /openvswitch-common.copyright /openvswitch-controller /openvswitch-datapath-source /openvswitch-datapath-dkms /openvswitch-dbg /openvswitch-ipsec /openvswitch-pki /openvswitch-switch /openvswitch-switch.copyright /openvswitch-test /ovsdbmonitor /python-openvswitch /tmp openvswitch-2.0.1+git20140120/debian/automake.mk000066400000000000000000000051411226605124000210340ustar00rootroot00000000000000EXTRA_DIST += \ debian/changelog \ debian/compat \ debian/control \ debian/control.modules.in \ debian/copyright \ debian/copyright.in \ debian/dkms.conf.in \ debian/dirs \ debian/openvswitch-common.dirs \ debian/openvswitch-common.docs \ debian/openvswitch-common.install \ debian/openvswitch-common.manpages \ debian/openvswitch-controller.README.Debian \ debian/openvswitch-controller.default \ debian/openvswitch-controller.dirs \ debian/openvswitch-controller.init \ debian/openvswitch-controller.install \ debian/openvswitch-controller.manpages \ debian/openvswitch-controller.postinst \ debian/openvswitch-controller.postrm \ debian/openvswitch-datapath-module-_KVERS_.postinst.modules.in \ debian/openvswitch-datapath-dkms.postinst \ debian/openvswitch-datapath-dkms.prerm \ debian/openvswitch-datapath-source.README.Debian \ debian/openvswitch-datapath-source.copyright \ debian/openvswitch-datapath-source.dirs \ debian/openvswitch-datapath-source.install \ debian/openvswitch-ipsec.dirs \ debian/openvswitch-ipsec.init \ debian/openvswitch-ipsec.install \ debian/openvswitch-pki.dirs \ debian/openvswitch-pki.postinst \ debian/openvswitch-pki.postrm \ debian/openvswitch-switch.README.Debian \ debian/openvswitch-switch.dirs \ debian/openvswitch-switch.init \ debian/openvswitch-switch.install \ debian/openvswitch-switch.logrotate \ debian/openvswitch-switch.manpages \ debian/openvswitch-switch.postinst \ debian/openvswitch-switch.postrm \ debian/openvswitch-switch.template \ debian/openvswitch-switch.links \ debian/openvswitch-test.dirs \ debian/openvswitch-test.install \ debian/openvswitch-test.manpages \ debian/ovsdbmonitor.install \ debian/ovsdbmonitor.manpages \ debian/ovs-monitor-ipsec \ debian/python-openvswitch.dirs \ debian/python-openvswitch.install \ debian/rules \ debian/rules.modules \ debian/ifupdown.sh \ debian/source/format check-debian-changelog-version: @DEB_VERSION=`echo '$(VERSION)' | sed 's/pre/~pre/'`; \ if $(FGREP) '($(DEB_VERSION)' $(srcdir)/debian/changelog >/dev/null; \ then \ :; \ else \ echo "Update debian/changelog to mention version $(VERSION)"; \ exit 1; \ fi ALL_LOCAL += check-debian-changelog-version DIST_HOOKS += check-debian-changelog-version $(srcdir)/debian/copyright: AUTHORS debian/copyright.in { sed -n -e '/%AUTHORS%/q' -e p < $(srcdir)/debian/copyright.in; \ sed '1,/^$$/d' $(srcdir)/AUTHORS | \ sed -n -e '/^$$/q' -e 's/^/ /p'; \ sed -e '1,/%AUTHORS%/d' $(srcdir)/debian/copyright.in; \ } > $@ DISTCLEANFILES += debian/copyright openvswitch-2.0.1+git20140120/debian/changelog000066400000000000000000001044061226605124000205530ustar00rootroot00000000000000openvswitch (2.0.1-1) unstable; urgency=low [ Open vSwitch team ] * New upstream version - Bug fixes -- Open vSwitch team Tue, 13 Dec 2013 16:16:32 -0700 openvswitch (2.0.0-1) unstable; urgency=low [ Open vSwitch team ] * New upstream version - The ovs-vswitchd process is no longer single-threaded. Multiple threads are now used to handle flow set up and asynchronous logging. - OpenFlow: * Experimental support for OpenFlow 1.1 (in addition to 1.2 and 1.3, which had experimental support in 1.10). * New support for matching outer source and destination IP address of tunneled packets, for tunnel ports configured with the newly added "remote_ip=flow" and "local_ip=flow" options. * Support for matching on metadata 'pkt_mark' for interacting with other system components. On Linux this corresponds to the skb mark. * Support matching, rewriting SCTP ports - The Interface table in the database has a new "ifindex" column to report the interface's OS-assigned ifindex. - New "check-oftest" Makefile target for running OFTest against Open vSwitch. See README-OFTest for details. - The flow eviction threshold has been moved to the Open_vSwitch table. - Database names are now mandatory when specifying ovsdb-server options through database paths (e.g. Private key option with the database name should look like "--private-key=db:Open_vSwitch,SSL,private_key"). - Added ovs-dev.py, a utility script helpful for Open vSwitch developers. - Support for Linux kernels up to 3.10 - ovs-ofctl: * New "ofp-parse" for printing OpenFlow messages read from a file. - Added configurable flow caching support to IPFIX exporter. - Dropped support for Linux pre-2.6.32. - Log file timestamps and ovsdb commit timestamps are now reported with millisecond resolution. (Previous versions only reported whole seconds.) -- Open vSwitch team Tue, 15 Oct 2013 15:03:42 -0700 openvswitch (1.11.0-1) unstable; urgency=low [ Open vSwitch team ] * New upstream version - Support for megaflows, which allows wildcarding in the kernel (and any dpif implementation that supports wildcards). Depending on the flow table and switch configuration, flow set up rates are close to the Linux bridge. - The "tutorial" directory contains a new tutorial for some advanced Open vSwitch features. - Stable bond mode has been removed. - The autopath action has been removed. - New support for the data encapsulation format of the LISP tunnel protocol (RFC 6830). An external control plane or manual flow setup is required for EID-to-RLOC mapping. - OpenFlow: * The "dec_mpls_ttl" and "set_mpls_ttl" actions from OpenFlow 1.1 and later are now implemented. * New "stack" extension for use in actions, to push and pop from NXM fields. * The "load" and "set_field" actions can now modify the "in_port". (This allows one to enable output to a flow's input port by setting the in_port to some unused value, such as OFPP_NONE.) - ovs-dpctl: * New debugging commands "add-flow", "mod-flow", "del-flow". - In dpif-based bridges, cache action translations, which can improve flow set up performance by 80% with a complicated flow table. - New syslog format, prefixed with "ovs|", to be easier to filter. - RHEL: Removes the default firewall rule that allowed GRE traffic to pass through. Any users that relied on this automatic firewall hole will have to manually configure it. The ovs-ctl(8) manpage documents the "enable-protocol" command that can be used as an alternative. - New CFM demand mode which uses data traffic to indicate interface liveness. -- Open vSwitch team Wed, 28 Aug 2013 14:31:44 -0700 openvswitch (1.10.0-1) unstable; urgency=low [ Open vSwitch team ] * New upstream version - Bridge compatibility support has been removed. Any uses that rely on ovs-brcompatd will have to stick with Open vSwitch 1.9.x or adapt to native Open vSwitch support (e.g. use ovs-vsctl instead of brctl). - The maximum size of the MAC learning table is now configurable. - With the Linux datapath, packets for new flows are now queued separately on a per-port basis, so it should no longer be possible for a large number of new flows arriving on one port to prevent new flows from being processed on other ports. - Many "ovs-vsctl" database commands now accept an --if-exists option. Please refer to the ovs-vsctl manpage for details. - OpenFlow: - Experimental support for newer versions of OpenFlow. See the "What versions of OpenFlow does Open vSwitch support?" question in the FAQ for more details. - The OpenFlow "dp_desc" may now be configured by setting the value of other-config:dp-desc in the Bridge table. - It is possible to request the OpenFlow port number with the "ofport_request" column in the Interface table. - Tunneling: - New support for the VXLAN tunnel protocol (see the IETF draft here: http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-03). - Tunneling requires the version of the kernel module paired with Open vSwitch 1.9.0 or later. - Inheritance of the Don't Fragment bit in IP tunnels (df_inherit) is no longer supported. - Path MTU discovery is no longer supported. - ovs-dpctl: - The "dump-flows" and "del-flows" no longer require an argument if only one datapath exists. - ovs-appctl: - New "vlog/disable-rate-limit" and "vlog/enable-rate-limit" commands available allow control over logging rate limits. - New "dpif/dump-dps", "dpif/show", and "dpif/dump-flows" command that mimic the equivalent ovs-dpctl commands. - The ofproto library is now responsible for assigning OpenFlow port numbers. An ofproto implementation should assign them when port_construct() is called. - All dpif-based bridges of a particular type share a common datapath called "ovs-", e.g. "ovs-system". The ovs-dpctl commands will now return information on that shared datapath. To get the equivalent bridge-specific information, use the new "ovs-appctl dpif/*" commands. - Backward-incompatible changes: - Earlier Open vSwitch versions treated ANY as a wildcard in flow syntax. OpenFlow 1.1 adds a port named ANY, which introduces a conflict. ANY was rarely used in flow syntax, so we chose to retire that meaning of ANY in favor of the OpenFlow 1.1 meaning. - Patch ports no longer require kernel support, so they now work with FreeBSD and the kernel module built into Linux 3.3 and later. -- Open vSwitch team Wed, 01 May 2013 14:28:21 -0700 openvswitch (1.9.0-1) unstable; urgency=low [ Open vSwitch team ] * New upstream version - Datapath: - Support for ipv6 set action. - SKB mark matching and setting. - support for Linux kernels up to 3.8 - FreeBSD is now a supported platform, thanks to code contributions from Gaetano Catalli, Ed Maste, and Giuseppe Lettieri. - ovs-bugtool: New --ovs option to report only OVS related information. - New %t and %T log escapes to identify the subprogram within a cooperating group of processes or threads that emitted a log message. The default log patterns now include this information. - OpenFlow: - Allow bitwise masking for SHA and THA fields in ARP, SLL and TLL fields in IPv6 neighbor discovery messages, and IPv6 flow label. - Adds support for writing to the metadata field for a flow. - Tunneling: - The tunneling code no longer assumes input and output keys are symmetric. If they are not, PMTUD needs to be disabled for tunneling to work. Note this only applies to flow-based keys. - New support for a nonstandard form of GRE that supports a 64-bit key. - Tunnel Path MTU Discovery default value was set to 'disabled'. This feature is deprecated and will be removed soon. - Tunnel header caching removed. - ovs-ofctl: - Commands and actions that accept port numbers now also accept keywords that represent those ports (such as LOCAL, NONE, and ALL). This is also the recommended way to specify these ports, for compatibility with OpenFlow 1.1 and later (which use the OpenFlow 1.0 numbers for these ports for different purposes). - ovs-dpctl: - Support requesting the port number with the "port_no" option in the "add-if" command. - ovs-pki: The "online PKI" features have been removed, along with the ovs-pki-cgi program that facilitated it, because of some alarmist insecurity claims. We do not believe that these claims are true, but because we do not know of any users for this feature it seems better on balance to remove it. (The ovs-pki-cgi program was not included in distribution packaging.) - ovsdb-server now enforces the immutability of immutable columns. This was not enforced in earlier versions due to an oversight. - The following features are now deprecated. They will be removed no earlier than February 2013. Please email dev@openvswitch.org with concerns. - Bridge compatibility. - Stable bond mode. - The autopath action. - Interface type "null". - Numeric values for reserved ports (see "ovs-ofctl" note above). - Tunnel Path MTU Discovery. - CAPWAP tunnel support. - The data in the RARP packets can now be matched in the same way as the data in ARP packets. -- Open vSwitch team Tue, 26 Feb 2013 11:23:19 -0700 openvswitch (1.8.0-1) unstable; urgency=low [ Open vSwitch team ] * New upstream version *** Internal only release *** - New FAQ. Please send updates and additions! - Authors of controllers, please read the new section titled "Action Reproduction" in DESIGN, which describes an Open vSwitch change in behavior in corner cases that may affect some controllers. - ovs-l3ping: - A new test utility that can create L3 tunnel between two Open vSwitches and detect connectivity issues. - ovs-ofctl: - New --sort and --rsort options for "dump-flows" command. - "mod-port" command can now control all OpenFlow config flags. - OpenFlow: - Allow general bitwise masking for IPv4 and IPv6 addresses in IPv4, IPv6, and ARP packets. (Previously, only CIDR masks were allowed.) - Allow support for arbitrary Ethernet masks. (Previously, only the multicast bit in the destination address could be individually masked.) - New field OXM_OF_METADATA, to align with OpenFlow 1.1. - The OFPST_QUEUE request now reports an error if a specified port or queue does not exist, or for requests for a specific queue on all ports, if the specified queue does not exist on any port. (Previous versions generally reported an empty set of results.) - New "flow monitor" feature to allow controllers to be notified of flow table changes as they happen. - Additional protocols are not mirrored and dropped when forward-bpdu is false. For a full list, see the ovs-vswitchd.conf.db man page. - Open vSwitch now sends RARP packets in situations where it previously sent a custom protocol, making it consistent with behavior of QEMU and VMware. - All Open vSwitch programs and log files now show timestamps in UTC, instead the local timezone, by default. -- Open vSwitch team Mon, 16 Jul 2012 16:44:52 +0900 openvswitch (1.7.0-1) unstable; urgency=low [ Open vSwitch team ] * New upstream version - kernel modules are renamed. openvswitch_mod.ko is now openvswitch.ko and brcompat_mod.ko is now brcompat.ko. - Increased the number of NXM registers to 8. - Added ability to configure dscp setting for manager and controller connections. By default, these connections have a DSCP value of Internetwork Control (0xc0). - Added the granular link health statistics, 'cfm_health', to an interface. - OpenFlow: - Added support to mask nd_target for ICMPv6 neighbor discovery flows. - Added support for OpenFlow 1.3 port description (OFPMP_PORT_DESC) multipart messages. - ovs-ofctl: - Added the "dump-ports-desc" command to retrieve port information using the new port description multipart messages. - ovs-test: - Added support for spawning ovs-test server from the client. - Now ovs-test is able to automatically create test bridges and ports. - "ovs-dpctl dump-flows" now prints observed TCP flags in TCP flows. - Tripled flow setup performance. - The "coverage/log" command previously available through ovs-appctl has been replaced by "coverage/show". The new command replies with coverage counter values, instead of logging them. -- Open vSwitch team Mon, 30 Jul 2012 17:23:57 +0900 openvswitch (1.6.1-1) unstable; urgency=low [ Open vSwitch team ] * New upstream version - Allow OFPP_CONTROLLER as the in_port for packet-out messages. -- Open vSwitch team Mon, 25 Jun 2012 13:52:17 +0900 openvswitch (1.6.0-1) unstable; urgency=low [ Open vSwitch team ] * New upstream version - bonding - LACP bonds no longer fall back to balance-slb when negotiations fail. Instead they drop traffic. - The default bond_mode changed from SLB to active-backup, to protect unsuspecting users from the significant risks of SLB bonds (which are documented in vswitchd/INTERNALS). - Load balancing can be disabled by setting the bond-rebalance-interval to zero. - OpenFlow: - Added support for bitwise matching on TCP and UDP ports. See ovs-ofctl(8) for more information. - NXM flow dumps now include times elapsed toward idle and hard timeouts. - Added an OpenFlow extension NXT_SET_ASYNC_CONFIG that allows controllers more precise control over which OpenFlow messages they receive asynchronously. - New "fin_timeout" action. - Added "fin_timeout" support to "learn" action. - The default MAC learning timeout has been increased from 60 seconds to 300 seconds. The MAC learning timeout is now configurable. - When QoS settings for an interface do not configure a default queue (queue 0), Open vSwitch now uses a default configuration for that queue, instead of dropping all packets as in previous versions. - Logging to console and file will have UTC timestamp as a default for all the daemons. An example of the default format is 2012-01-27T16:35:17Z. ovs-appctl can be used to change the default format as before. - New support for limiting the number of flows in an OpenFlow flow table, with configurable policy for evicting flows upon overflow. See the Flow_Table table in ovs-vswitch.conf.db(5) for more information. - New "enable-async-messages" column in the Controller table. If set to false, OpenFlow connections to the controller will initially have all asynchronous messages disabled, overriding normal OpenFlow behavior. - ofproto-provider interface: - "struct rule" has a new member "used" that ofproto implementations should maintain by updating with ofproto_rule_update_used(). - ovsdb-client: - The new option --timestamp causes the "monitor" command to print a timestamp with every update. - CFM module CCM broadcasts can now be tagged with an 802.1p priority. -- Open vSwitch team Fri, 24 Feb 2012 11:12:48 +0900 openvswitch (1.5.0-1) unstable; urgency=low [ Open vSwitch team ] * New upstream version - OpenFlow: - Added support for querying, modifying, and deleting flows based on flow cookie when using NXM. - Added new NXM_PACKET_IN format. - ovs-ofctl: - Added daemonization support to the monitor and snoop commands. - ovs-vsctl: - The "find" command supports new set relational operators {=}, {!=}, {<}, {>}, {<=}, and {>=}. - ovsdb-tool now uses the typical database and schema installation directories as defaults. -- Open vSwitch team Fri, 01 June 2012 13:06:00 +0900 openvswitch (1.4.0+git20120426-1) unstable; urgency=low * New upstream release fixing the following bugs: o Broken log rotation. o Use-after-free error when ports disappear. o Minor memory leaks. o Testsuite failures on big-endian architectures. -- Ben Pfaff Thu, 26 Apr 2012 13:46:55 -0700 openvswitch (1.4.0+git20120321-1) unstable; urgency=low * New upstream version including: o Features: - ovs-vsctl: Allow "fake bridges" to be created for VLAN 0. - vswitchd: Make the MAC entry aging time configurable. - mac-learning: Increase MAC learning timeout to 300 seconds. o Bug fixes: - netdev-linux: Fix use-after-free when netdev_dump_queues() deletes queues. - netlink-socket: Increase Netlink socket receive buffer size. - ofproto: Fix code that keeps track of MTU. - ovs-monitor-ipsec: Detect correctly IPSEC configuration changes - bond: Incorrectly reported an error in appctl. - socket-util: Unlink Unix domain sockets that bind but fail to connect. - bridge: Remove unwanted ports at time of ofproto creation. - dpif-linux: Make dpif_linux_port_query_by_name() query only one datapath. - ofproto-dpif: Cleanup STP on ports when disabled on their bridge. - configure: Try to extract kernel source directory from build Makefile. - vswitchd: Always configure a default queue for QoS. - ofproto-dpif: Don't output to in_port even if in_port is OFPP_LOCAL. - sflow_agent: Use snprintf() in place of sprintf(). o Packaging: - Move PKI directory to FHS-compliant location. Closes: #661090. Thanks to Andreas Beckmann for reporting this bug. - Use a different way to avoid failing install without kernel module. - Avoid unit test failure when doing "unofficial" builds. - Bump standards-version to 3.9.3. - Remove some useless files from the dkms package. - Clean .pyc files in "clean" target. - Remove po-debconf build dependency. - Build-depend on python-all to pull in all Python versions. - Add missing ${python:Depends} to openvswitch-test package. - Improve long descriptions so as to better describe the packages. - Bump debhelper compat level to 8 and make build-depends consistent. - Fix exit status of openvswitch-switch init script "status" command. - Use provided kernel source dir instead of host kernel version. - Do not run "make" if "configure" fails during DKMS build. - Look in /lib/modules instead of /usr/src for DKMS kernel sources. - Fix dependencies for openvswitch-datapath-dkms package. - Don't install Python modules for obsolete Python versions. - Add dependency on ${misc:Depends} to openvswitch-test o Documentation improvements: - ovsdb-doc: Use minus sign in negative numbers in nroff output. - ovsdb-doc: Convert '-' preceding a number as a minus sign, not a hyphen. - ovsdb-doc: Put NAME section into generated manpage. - Fix typo in manpage. - vswitchd: Document behavior of 802.1p priorities with VLAN splinters. - netdev: Fix typo in error message. - INSTALL.Linux: minor typo * Many thanks to Thomas Goirand for contributing many of the packaging fixes listed above. * This version fixes most of the lintian errors described in bug #663051, but a few remain, so this upload does not close that bug. I believe that this upload should be suitable for downgrading that bug's severity. -- Ben Pfaff Wed, 21 Mar 2012 10:00:28 -0700 openvswitch (1.4.0-2+nmu1) unstable; urgency=low * Non maintainer upload. * Removes all patches in debian/patches, because they are patching stuff inconditionally in debian/*, and this should be applied by default. * Uses the correct ${kernel_source_dir} in debian/dkms.conf.in, so that the kernel module builds as expected (Closes: #659685). -- Thomas Goirand Thu, 08 Mar 2012 08:46:24 +0000 openvswitch (1.4.0-2) unstable; urgency=low * Use explicit DKMS variable for kernel source directory - 0001-debian-Fix-dependencies-for-openvswitch-datapath-dkm.patch - 0002-debian-Look-in-lib-modules-instead-of-usr-src-for-DK.patch - 0001-debian-Use-provided-kernel-source-dir.patch - 0001-debian-Do-not-run-make-if-configure-fails-during-DKM.patch - (closes: #659685) * Don't install Python modules for obsolete Python versions - 0001-debian-Don-t-install-Python-modules-for-obsolete-Pyt.patch -- Simon Horman Tue, 14 Feb 2012 11:43:13 +0900 openvswitch (1.4.0-1) unstable; urgency=low [ Open vSwitch team ] * New upstream version - Compatible with Open vSwitch kernel module included in Linux 3.3. - New "VLAN splinters" feature to work around buggy device drivers in old Linux versions. (This feature is deprecated. When broken device drivers are no longer in widespread use, we will delete this feature.) See ovs-vswitchd.conf.db(5) for more information. - OpenFlow: - Added ability to match on IPv6 flow label through NXM. - Added ability to match on ECN bits in IPv4 and IPv6 through NXM. - Added ability to match on TTL in IPv4 and IPv6 through NXM. - Added ability to modify ECN bits in IPv4. - Added ability to modify TTL in IPv4. - ovs-vswitchd: - Don't require the "normal" action to use mirrors. Traffic will now be properly mirrored for any flows, regardless of their actions. - Track packet and byte statistics sent on mirrors. - ovs-appctl: - New "fdb/flush" command to flush bridge's MAC learning table. - ovs-test: - A new distributed testing tool that allows one to diagnose performance and connectivity issues. This tool currently is not included in RH or Xen packages. - RHEL packaging now supports integration with Red Hat network scripts. - Debian: Depend on python (>= 2.7) | python-argparse instead of python-argparse to avoid pulling in python2.6 (closes: #653645) -- Open vSwitch team Mon, 30 Jan 2012 23:36:00 +0000 openvswitch (1.3.0-1) unstable; urgency=low [ Open vSwitch team ] * New upstream version - OpenFlow: - Added an OpenFlow extension which allows the "output" action to accept NXM fields. - Added an OpenFlow extension for flexible learning. - Bumped number of NXM registers from four to five. - ovs-appctl: - New "version" command to determine version of running daemon. - If no argument is provided for "cfm/show", displays detailed information about all interfaces with CFM enabled. - If no argument is provided for "lacp/show", displays detailed information about all ports with LACP enabled. - ovs-vswitchd: - The software switch now supports 255 OpenFlow tables, instead of just one. By default, only table 0 is consulted, but the new NXAST_RESUBMIT_TABLE action can look up in additional tables. Tables 128 and above are reserved for use by the switch itself; please use only tables 0 through 127. - Add support for 802.1D spanning tree (STP). - Fragment handling extensions: - New OFPC_FRAG_NX_MATCH fragment handling mode, in which L4 fields are made available for matching in fragments with offset 0. - New NXM_NX_IP_FRAG match field for matching IP fragments (usable via "ip_frag" in ovs-ofctl). - New ovs-ofctl "get-frags" and "set-frags" commands to get and set fragment handling policy. - CAPWAP tunneling now supports an extension to transport a 64-key. By default it remains compatible with the old version and other standards-based implementations. - Flow setups are now processed in a round-robin manner across ports to prevent any single client from monopolizing the CPU and conducting a denial of service attack. - Added support for native VLAN tagging. A new "vlan_mode" parameter can be set for "port". Possible values: "access", "trunk", "native-tagged" and "native-untagged". - test-openflowd has been removed. Please use ovs-vswitchd instead. -- Open vSwitch team Mon, 09 Dec 2011 23:36:00 +0000 openvswitch (1.2.0-1) unstable; urgency=low [ Open vSwitch team ] * New upstream version - New "ofproto" abstraction layer to ease porting to hardware switching ASICs. - Packaging for Red Hat Enterprise Linux 5.6 and 6.0. - Datapath support for Linux kernels up to 3.0. - OpenFlow: - New "bundle" and "bundle_load" action extensions. - Database: - Implement table unique constraints. - Support cooperative locking between callers. - ovs-dpctl: - New "-s" option for "show" command prints packet and byte counters for each port. - ovs-ofctl: - New "--readd" option for "replace-flows". - ovs-vsctl: - New "show" command to print an overview of configuration. - New "comment" command to add remark that explains intentions. - ovs-brcompatd has been rewritten to fix long-standing bugs. - ovs-openflowd has been renamed test-openflowd and moved into the tests directory. Its presence confused too many users. Please use ovs-vswitchd instead. - New ovs-benchmark utility to test flow setup performance. - A new log level "off" has been added. Configuring a log facility "off" prevents any messages from being logged to it. Previously, "emer" was effectively "off" because no messages were ever logged at level "emer". Now, errors that cause a process to exit are logged at "emer" level. - "configure" option --with-l26 has been renamed --with-linux, and --with-l26-source has been renamed --with-linux-source. The old names will be removed after the next release, so please update your scripts. - The "-2.6" suffix has been dropped from the datapath/linux-2.6 and datapath/linux-2.6/compat-2.6 directories. - Feature removals: - Dropped support for "tun_id_from_cookie" OpenFlow extension. Please use the extensible match extensions instead. - Removed the Maintenance_Point and Monitor tables in an effort to simplify 802.1ag configuration. - Performance and scalability improvements - Bug fixes -- Open vSwitch team Wed, 03 Aug 2011 14:43:00 +0000 openvswitch (1.1.1-1) unstable; urgency=low [ Open vSwitch team ] * Bug fixes. [ Simon Horman ] * docs: Suppress warning marcro DD not defined (upstream commit 58f870d0) * debian: Make openvswitch depend on Python (upstream commit aa41cb61) * debian: Don't begin openvswitch-pki description with article. (upstream commit bc6bb66) * Debian: Add ${misc:Depends} dependency to python-openvswitch * Debian: Update standards version from 3.9.1 to 3.9.2 * Debian: ${source:Version} dependency on python openvswitch * Debian: ${source:Version} dependency for python openvswitch * Switch to dpkg-source 3.0 (quilt) format - For local non-debian/ patches (above) -- Simon Horman Wed, 15 Jun 2011 10:46:15 +0900 openvswitch (1.1.0-1) unstable; urgency=low [ Open vSwitch team ] * New upstream version - Ability to define policies over IPv6 - LACP - 802.1ag CCM - Support for extensible match extensions to OpenFlow - QoS: - Support for HFSC qdisc. - Queue used by in-band control can now be configured. - Kernel: - Kernel<->userspace interface has been reworked and should be close to a stable ABI now. - "Port group" concept has been dropped. - GRE over IPSEC tunnels - Bonding: - New active backup bonding mode. - New L4 hashing support when LACP is enabled. - Source MAC hash now includes VLAN field also. - miimon support. - Greatly improved handling of large flow tables - ovs-dpctl: - "show" command now prints full vport configuration. - "dump-groups" command removed since kernel support for port groups was dropped. - ovs-vsctl: - New commands for working with the new Managers table. - "list" command enhanced with new formatting options and --columns option. - "get" command now accepts new --id option. - New "find" command. - ovs-ofctl: - New "queue-stats" command for printing queue stats. - New commands "replace-flows" and "diff-flows". - Commands to add and remove flows can now read from files. - New --flow-format option to enable or disable NXM. - New --more option to increase OpenFlow message verbosity. - Removed "tun-cookie" command, which is no longer useful. - ovs-controller enhancements for testing various features. - New ovs-vlan-test command for testing for Linux kernel driver VLAN bugs. New ovs-vlan-bug-workaround command for enabling and disabling a workaround for these driver bugs. - OpenFlow support: - "Resubmit" actions now update flow statistics. - New "register" extension for use in matching and actions, via NXM. - New "multipath" experimental action extension. - New support for matching multicast Ethernet frames, via NXM. - New extension for OpenFlow vendor error codes. - New extension to set the QoS output queue without actually sending to an output port. - Open vSwitch now reports a single flow table, instead of separate hash and wildcard tables. This better models the current implementation. - New experimental "note" action. - New "ofproto/trace" ovs-appctl command and associated utilities to ease debugging complex flow tables. - Database: - Schema documentation now includes an entity-relationship diagram. - The database is now garbage collected. In most tables, unreferenced rows will be deleted automatically. - Many tables now include statistics updated periodically by ovs-vswitchd or ovsdb-server. - Every table now has an "external-ids" column for use by OVS integrators. - There is no default controller anymore. Each bridge must have its controller individually specified. - The "fail-mode" is now a property of a Bridge instead of a Controller. - New versioning and checksum features. - New Managers table and manager_options column in Open_vSwitch table for specifying managers. The old "managers" column in the Open_vSwitch table has been removed. - Many "name" columns are now immutable. - Feature removals: - Dropped support for XenServer pre-5.6.100. - Dropped support for Linux pre-2.6.18. - Dropped controller discovery support. - Dropped "ovs-ofctl status" and the OpenFlow extension that it used. Statistics reporting in the database is a rough equivalent. - Dropped the "corekeeper" package (now separate, at http://openvswitch.org/cgi-bin/gitweb.cgi?p=corekeeper). - Performance and scalability improvements - Bug fixes [ Simon Horman ] * Add the following fixes from upstream branch-1.1 - 7f1aca9 dpif-linux: Avoid logging error on ENOENT in dpif_linux_is_internal_device(). - 8996f83 dpif-linux: Avoid segfault on netdev_get_stats() without kernel module. - 002d4a3 vswitch: Improve schema documentation. - 58bd294 cfm: Fix broken fault logic. - c042664 bridge: Run once before configuring CFM. * Switch to dpkg-source 3.0 (quilt) format -- Simon Horman Wed, 27 Apr 2011 17:11:10 +0900 openvswitch (1.1.0~pre2.g2.ea763e0e-1) unstable; urgency=low * Git snapshot, including - tests: Fix Y2011 bug in testsuite (closes: #609506) -- Simon Horman Wed, 12 Jan 2011 08:34:35 +0900 openvswitch (1.1.0~pre2.g1.bbe8d06e-1) unstable; urgency=low * Git snaptshot -- Simon Horman Thu, 06 Jan 2011 11:11:55 +0900 openvswitch (1.1.0~pre2-5) unstable; urgency=low * Open vSwitch only works on Linux so set the Architecture of binary packages to linux-any accordingly -- Simon Horman Tue, 23 Nov 2010 07:55:19 +0900 openvswitch (1.1.0~pre2-4) unstable; urgency=low * Add procps to Build-Depends (closes: #602891) -- Simon Horman Tue, 16 Nov 2010 06:46:17 +0900 openvswitch (1.1.0~pre2-3) unstable; urgency=low * Remove the corekeeper package as it isn't strongly related to Open vSwitch. It has subsequently been broken off into a separate source repository and may be uploaded as its own debian source package. http://openvswitch.org/cgi-bin/gitweb.cgi?p=corekeeper;a=summary (closes: #602946) * Avoid the use of long socket names. (closes: #602891, closes: #602911) -- Simon Horman Thu, 11 Nov 2010 06:35:05 +0900 openvswitch (1.1.0~pre2-2) unstable; urgency=low * Clarify licensing of files in xenserver/ -- Simon Horman Thu, 23 Sep 2010 10:56:18 +0900 openvswitch (1.1.0~pre2-1) unstable; urgency=low * New upstream version - Bug fixes -- Simon Horman Wed, 15 Sep 2010 18:28:59 +0900 openvswitch (1.1.0~pre1-1) unstable; urgency=low * New upstream version - OpenFlow 1.0 slicing (QoS) functionality - Python bindings for configuration database (no write support) - Performance and scalability improvements - Bug fixes -- Open vSwitch team Tue, 31 Aug 2010 23:20:00 +0000 openvswitch (1.0.1-1) unstable; urgency=low * New upstream version. -- Open vSwitch team Mon, 17 May 2010 10:36:00 +0000 openvswitch-2.0.1+git20140120/debian/compat000066400000000000000000000000021226605124000200720ustar00rootroot000000000000008 openvswitch-2.0.1+git20140120/debian/control000066400000000000000000000233151226605124000203030ustar00rootroot00000000000000Source: openvswitch Section: net Priority: extra Maintainer: Open vSwitch developers Uploaders: Ben Pfaff , Simon Horman Build-Depends: debhelper (>= 8), autoconf (>= 2.64), automake (>= 1.10) | automake1.10, libssl-dev, bzip2, openssl, python-all (>= 2.6.6-3~), procps, python-qt4, python-zopeinterface, python-twisted-conch Standards-Version: 3.9.3 Homepage: http://openvswitch.org/ Package: openvswitch-datapath-source Architecture: all Depends: module-assistant, bzip2, debhelper (>= 5.0.37), ${misc:Depends} Suggests: openvswitch-switch Description: Open vSwitch datapath module source - module-assistant version Open vSwitch is a production quality, multilayer, software-based, Ethernet virtual switch. It is designed to enable massive network automation through programmatic extension, while still supporting standard management interfaces and protocols (e.g. NetFlow, IPFIX, sFlow, SPAN, RSPAN, CLI, LACP, 802.1ag). In addition, it is designed to support distribution across multiple physical servers similar to VMware's vNetwork distributed vswitch or Cisco's Nexus 1000V. . This package provides the Open vSwitch datapath module source code that is needed by openvswitch-switch. The kernel module can be built from it using module-assistant or make-kpkg. README.Debian in this package provides further instructions. Package: openvswitch-datapath-dkms Architecture: all Depends: dkms (>= 1.95), make, libc6-dev, ${misc:Depends}, ${python:Depends} Description: Open vSwitch datapath module source - DKMS version Open vSwitch is a production quality, multilayer, software-based, Ethernet virtual switch. It is designed to enable massive network automation through programmatic extension, while still supporting standard management interfaces and protocols (e.g. NetFlow, IPFIX, sFlow, SPAN, RSPAN, CLI, LACP, 802.1ag). In addition, it is designed to support distribution across multiple physical servers similar to VMware's vNetwork distributed vswitch or Cisco's Nexus 1000V. . This package provides the Open vSwitch datapath module source code that is needed by openvswitch-switch. DKMS can built the kernel module from it. Package: openvswitch-common Architecture: linux-any Depends: ${shlibs:Depends}, openssl, ${misc:Depends}, python, python (>= 2.7) | python-argparse Suggests: ethtool Description: Open vSwitch common components Open vSwitch is a production quality, multilayer, software-based, Ethernet virtual switch. It is designed to enable massive network automation through programmatic extension, while still supporting standard management interfaces and protocols (e.g. NetFlow, IPFIX, sFlow, SPAN, RSPAN, CLI, LACP, 802.1ag). In addition, it is designed to support distribution across multiple physical servers similar to VMware's vNetwork distributed vswitch or Cisco's Nexus 1000V. . openvswitch-common provides components required by both openvswitch-switch and openvswitch-controller. Package: openvswitch-switch Architecture: linux-any Suggests: openvswitch-datapath-module Depends: ${shlibs:Depends}, ${misc:Depends}, ${python:Depends}, openvswitch-common (= ${binary:Version}), module-init-tools, procps, uuid-runtime, netbase, python-argparse Description: Open vSwitch switch implementations Open vSwitch is a production quality, multilayer, software-based, Ethernet virtual switch. It is designed to enable massive network automation through programmatic extension, while still supporting standard management interfaces and protocols (e.g. NetFlow, IPFIX, sFlow, SPAN, RSPAN, CLI, LACP, 802.1ag). In addition, it is designed to support distribution across multiple physical servers similar to VMware's vNetwork distributed vswitch or Cisco's Nexus 1000V. . openvswitch-switch provides the userspace components and utilities for the Open vSwitch kernel-based switch. Package: openvswitch-ipsec Architecture: linux-any Depends: ${shlibs:Depends}, ${misc:Depends}, python, ipsec-tools (>=0.8~alpha20101208), racoon (>=0.8~alpha20101208), openvswitch-common (= ${binary:Version}), openvswitch-switch (= ${binary:Version}), python-openvswitch (= ${source:Version}) Description: Open vSwitch GRE-over-IPsec support Open vSwitch is a production quality, multilayer, software-based, Ethernet virtual switch. It is designed to enable massive network automation through programmatic extension, while still supporting standard management interfaces and protocols (e.g. NetFlow, IPFIX, sFlow, SPAN, RSPAN, CLI, LACP, 802.1ag). In addition, it is designed to support distribution across multiple physical servers similar to VMware's vNetwork distributed vswitch or Cisco's Nexus 1000V. . The ovs-monitor-ipsec script provides support for encrypting GRE tunnels with IPsec. Package: openvswitch-pki Architecture: all Depends: ${misc:Depends}, openvswitch-common (>= ${source:Version}), openvswitch-common (<< ${source:Version}.1~) Description: Open vSwitch public key infrastructure dependency package Open vSwitch is a production quality, multilayer, software-based, Ethernet virtual switch. It is designed to enable massive network automation through programmatic extension, while still supporting standard management interfaces and protocols (e.g. NetFlow, IPFIX, sFlow, SPAN, RSPAN, CLI, LACP, 802.1ag). In addition, it is designed to support distribution across multiple physical servers similar to VMware's vNetwork distributed vswitch or Cisco's Nexus 1000V. . openvswitch-pki provides PKI (public key infrastructure) support for Open vSwitch switches and controllers, reducing the risk of man-in-the-middle attacks on the Open vSwitch network infrastructure. Package: openvswitch-controller Architecture: linux-any Depends: ${shlibs:Depends}, openvswitch-common (= ${binary:Version}), openvswitch-pki (= ${source:Version}), ${misc:Depends} Description: Open vSwitch controller implementation Open vSwitch is a production quality, multilayer, software-based, Ethernet virtual switch. It is designed to enable massive network automation through programmatic extension, while still supporting standard management interfaces and protocols (e.g. NetFlow, IPFIX, sFlow, SPAN, RSPAN, CLI, LACP, 802.1ag). In addition, it is designed to support distribution across multiple physical servers similar to VMware's vNetwork distributed vswitch or Cisco's Nexus 1000V. . The Open vSwitch controller enables OpenFlow switches that connect to it to act as MAC-learning Ethernet switches. Package: openvswitch-dbg Section: debug Architecture: linux-any Depends: ${shlibs:Depends}, ${misc:Depends}, openvswitch-common (= ${binary:Version}), openvswitch-controller (= ${binary:Version}), openvswitch-switch (= ${binary:Version}) Description: Debug symbols for Open vSwitch packages Open vSwitch is a production quality, multilayer, software-based, Ethernet virtual switch. It is designed to enable massive network automation through programmatic extension, while still supporting standard management interfaces and protocols (e.g. NetFlow, IPFIX, sFlow, SPAN, RSPAN, CLI, LACP, 802.1ag). In addition, it is designed to support distribution across multiple physical servers similar to VMware's vNetwork distributed vswitch or Cisco's Nexus 1000V. . This package contains the debug symbols for all the other openvswitch-* packages. Install it to debug one of them or to examine a core dump produced by one of them. Package: python-openvswitch Architecture: all Section: python Depends: ${misc:Depends}, ${python:Depends} Description: Python bindings for Open vSwitch Open vSwitch is a production quality, multilayer, software-based, Ethernet virtual switch. It is designed to enable massive network automation through programmatic extension, while still supporting standard management interfaces and protocols (e.g. NetFlow, IPFIX, sFlow, SPAN, RSPAN, CLI, LACP, 802.1ag). In addition, it is designed to support distribution across multiple physical servers similar to VMware's vNetwork distributed vswitch or Cisco's Nexus 1000V. . This package contains the full Python bindings for Open vSwitch database. Package: ovsdbmonitor Architecture: all Section: utils Depends: ${python:Depends}, python-openvswitch, ${misc:Depends} Description: Open vSwitch graphical monitoring tool Open vSwitch is a production quality, multilayer, software-based, Ethernet virtual switch. It is designed to enable massive network automation through programmatic extension, while still supporting standard management interfaces and protocols (e.g. NetFlow, IPFIX, sFlow, SPAN, RSPAN, CLI, LACP, 802.1ag). In addition, it is designed to support distribution across multiple physical servers similar to VMware's vNetwork distributed vswitch or Cisco's Nexus 1000V. . This package is a GUI tool for monitoring and troubleshooting local or remote Open vSwitch installations. It presents GUI tables that graphically represent an Open vSwitch kernel flow table (similar to "ovs-dpctl dump-flows") and Open vSwitch database contents (similar to "ovs-vsctl list "). Package: openvswitch-test Architecture: all Depends: ${misc:Depends}, ${python:Depends}, python-twisted-web, python (>= 2.7) | python-argparse Description: Open vSwitch test package Open vSwitch is a production quality, multilayer, software-based, Ethernet virtual switch. It is designed to enable massive network automation through programmatic extension, while still supporting standard management interfaces and protocols (e.g. NetFlow, IPFIX, sFlow, SPAN, RSPAN, CLI, LACP, 802.1ag). In addition, it is designed to support distribution across multiple physical servers similar to VMware's vNetwork distributed vswitch or Cisco's Nexus 1000V. . This package contains utilities that are useful to diagnose performance and connectivity issues in Open vSwitch setup. openvswitch-2.0.1+git20140120/debian/control.modules.in000066400000000000000000000013721226605124000223560ustar00rootroot00000000000000Source: openvswitch Section: net Priority: extra Maintainer: Open vSwitch developers Build-Depends: debhelper (>= 5.0.37) Standards-Version: 3.7.3 Package: openvswitch-datapath-module-_KVERS_ Architecture: any Recommends: kernel-image-_KVERS_, openvswitch-switch Provides: openvswitch-datapath-module Description: Open vSwitch Linux datapath kernel module This package contains the Open vSwitch loadable datapath kernel modules for the kernel-image-_KVERS_ package. . If you compiled a custom kernel, you will most likely need to compile a custom version of this module as well. The openvswitch-datapath-source package has been provided for this purpose. Refer to README.Debian provided in that package for further instructions. openvswitch-2.0.1+git20140120/debian/copyright.in000066400000000000000000001023171226605124000212400ustar00rootroot00000000000000The original sources for this package can be found at: http://openvswitch.org/ Upstream Authors (from AUTHORS): %AUTHORS% Upstream Copyright Holders: Copyright (c) 2007, 2008, 2009, 2010, 2011, 2012 Nicira, Inc. Copyright (c) 2010 Jean Tourrilhes - HP-Labs. Copyright (c) 2008,2009,2010 Citrix Systems, Inc. and authors listed above. Copyright (c) 2011 Gaetano Catalli License: * The following components are licensed under the GNU Lesser General Public License version 2.1 only with the exception clause below as a pre-amble. xenserver/etc_xensource_scripts_vif xenserver/opt_xensource_libexec_InterfaceReconfigure.py xenserver/opt_xensource_libexec_InterfaceReconfigureBridge.py xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py xenserver/opt_xensource_libexec_interface-reconfigure xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py * These components are only distributed in the source package. They do not appear in any binary packages. On Debian systems, the complete text of the GNU Lesser General Public License version 2.1 can be found in `/usr/share/common-licenses/LGPL-2.1' The exception clause pre-amble reads: As a special exception to the GNU Lesser General Public License, you may link, statically or dynamically, a "work that uses the Library" with a publicly distributed version of the Library to produce an executable file containing portions of the Library, and distribute that executable file under terms of your choice, without any of the additional requirements listed in clause 6 of the GNU Lesser General Public License. By "a publicly distributed version of the Library", we mean either the unmodified Library as distributed, or a modified version of the Library that is distributed under the conditions defined in clause 3 of the GNU Library General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU Lesser General Public License. * The following components are licensed under the GNU Lesser General Public License version 2.1. utilities/bugtool/ovs-bugtool utilities/bugtool/ovs-bugtool-* utilities/bugtool/plugins/kernel-info/openvswitch.xml utilities/bugtool/plugins/network-status/openvswitch.xml utilities/bugtool/plugins/system-configuration.xml utilities/bugtool/plugins/system-configuration/openvswitch.xml On Debian systems, the complete text of the GNU Lesser General Public License version 2.1 can be found in `/usr/share/common-licenses/LGPL-2.1' * The following component is licensed under the GNU General Public License version 2. datapath/ On Debian systems, the complete text of the GNU General Public License version 2 can be found in `/usr/share/common-licenses/GPL-2' * The following components are dual-licensed under the GNU General Public License version 2 and the Apache License Version 2.0. include/linux/openvswitch.h On Debian systems, the complete text of the GNU General Public License version 2 can be found in `/usr/share/common-licenses/GPL-2'. On Debian systems, the complete text of the Apache License version 2.0 can be found in '/usr/share/common-licenses/Apache-2.0'. * The following components are licensed under the terms of either the Sun Industry Standard Source License 1.1 or the InMon sFlow License: lib/sflow.h lib/sflow_agent.c lib/sflow_api.h lib/sflow_poller.c lib/sflow_receiver.c lib/sflow_sampler.c These licenses are available, respectively, the following URLs: http://host-sflow.sourceforge.net/sissl.html http://www.inmon.com/technology/sflowlicense.txt The full text of each license is also appended to the end of this file. * The following components are licensed for use as desired without restriction: lib/crc32c.c * The following components are licensed under the Python Software Foundation License Version 2. python/compat/uuid.py python/compat/argparse.py * These components are only distributed in the source package. They do not appear in any binary packages. PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 -------------------------------------------- 1. This LICENSE AGREEMENT is between the Python Software Foundation ("PSF"), and the Individual or Organization ("Licensee") accessing and otherwise using this software ("Python") in source or binary form and its associated documentation. 2. Subject to the terms and conditions of this License Agreement, PSF hereby grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, analyze, test, perform and/or display publicly, prepare derivative works, distribute, and otherwise use Python alone or in any derivative version, provided, however, that PSF's License Agreement and PSF's notice of copyright, i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software Foundation; All Rights Reserved" are retained in Python alone or in any derivative version prepared by Licensee. 3. In the event Licensee prepares a derivative work that is based on or incorporates Python or any part thereof, and wants to make the derivative work available to others as provided herein, then Licensee hereby agrees to include in any such work a brief summary of the changes made to Python. 4. PSF is making Python available to Licensee on an "AS IS" basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT INFRINGE ANY THIRD PARTY RIGHTS. 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. 6. This License Agreement will automatically terminate upon a material breach of its terms and conditions. 7. Nothing in this License Agreement shall be deemed to create any relationship of agency, partnership, or joint venture between PSF and Licensee. This License Agreement does not grant permission to use PSF trademarks or trade name in a trademark sense to endorse or promote products or services of Licensee, or any third party. 8. By copying, installing or otherwise using Python, Licensee agrees to be bound by the terms and conditions of this License Agreement. * lib/ovs.tmac in the source distribution, and manpages in the binaries, includes troff macros from groff 1.21 that contain the following notice: .\" an-ext.tmac .\" .\" Written by Eric S. Raymond .\" Werner Lemberg .\" .\" Version 2007-Feb-02 .\" .\" Copyright (C) 2007, 2009, 2011 Free Software Foundation, Inc. .\" You may freely use, modify and/or distribute this file. * All other components of this package are licensed under The Apache License Version 2.0. On Debian systems, the complete text of the Apache License version 2.0 can be found in '/usr/share/common-licenses/Apache-2.0'. ---------------------------------------------------------------------- Retrieved from http://host-sflow.sourceforge.net/sissl.html, 2011-12-12: Sun Industry Standards Source License - Version 1.1 1.0 DEFINITIONS 1.1 "Commercial Use" means distribution or otherwise making the Original Code available to a third party. 1.2 "Contributor Version" means the combination of the Original Code, and the Modifications made by that particular Contributor. 1.3 "Electronic Distribution Mechanism" means a mechanism generally accepted in the software development community for the electronic transfer of data. 1.4 "Executable" means Original Code in any form other than Source Code. 1.5 "Initial Developer" means the individual or entity identified as the Initial Developer in the Source Code notice required by Exhibit A. 1.6 "Larger Work" means a work which combines Original Code or portions thereof with code not governed by the terms of this License. 1.7 "License" means this document. 1.8 "Licensable" means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently acquired, any and all of the rights conveyed herein. 1.9 "Modifications" means any addition to or deletion from the substance or structure of either the Original Code or any previous Modifications. A Modification is: A. Any addition to or deletion from the contents of a file containing Original Code or previous Modifications. B. Any new file that contains any part of the Original Code or previous Modifications. 1.10 "Original Code" means Source Code of computer software code which is described in the Source Code notice required by Exhibit A as Original Code. 1.11 "Patent Claims" means any patent claim(s), now owned or hereafter acquired, including without limitation, method, process, and apparatus claims, in any patent Licensable by grantor. 1.12 "Source Code" means the preferred form of the Original Code for making modifications to it, including all modules it contains, plus any associated interface definition files, or scripts used to control compilation and installation of an Executable. 1.13 "Standards" means the standards identified in Exhibit B. 1.14 "You" (or "Your") means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License or a future version of this License issued under Section 6.1. For legal entities, "You'' includes any entity which controls, is controlled by, or is under common control with You. For purposes of this definition, "control'' means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. 2.0 SOURCE CODE LICENSE 2.1 The Initial Developer Grant The Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive license, subject to third party intellectual property claims: (a) under intellectual property rights (other than patent or trademark) Licensable by Initial Developer to use, reproduce, modify, display, perform, sublicense and distribute the Original Code (or portions thereof) with or without Modifications, and/or as part of a Larger Work; and (b) under Patents Claims infringed by the making, using or selling of Original Code, to make, have made, use, practice, sell, and offer for sale, and/or otherwise dispose of the Original Code (or portions thereof). (c) the licenses granted in this Section 2.1(a) and (b) are effective on the date Initial Developer first distributes Original Code under the terms of this License. (d) Notwithstanding Section 2.1(b) above, no patent license is granted: 1) for code that You delete from the Original Code; 2) separate from the Original Code; or 3) for infringements caused by: i) the modification of the Original Code or ii) the combination of the Original Code with other software or devices, including but not limited to Modifications. 3.0 DISTRIBUTION OBLIGATIONS 3.1 Application of License. The Source Code version of Original Code may be distributed only under the terms of this License or a future version of this License released under Section 6.1, and You must include a copy of this License with every copy of the Source Code You distribute. You may not offer or impose any terms on any Source Code version that alters or restricts the applicable version of this License or the recipients' rights hereunder. Your license for shipment of the Contributor Version is conditioned upon Your full compliance with this Section. The Modifications which You create must comply with all requirements set out by the Standards body in effect one hundred twenty (120) days before You ship the Contributor Version. In the event that the Modifications do not meet such requirements, You agree to publish either (i) any deviation from the Standards protocol resulting from implementation of Your Modifications and a reference implementation of Your Modifications or (ii) Your Modifications in Source Code form, and to make any such deviation and reference implementation or Modifications available to all third parties under the same terms as this license on a royalty free basis within thirty (30) days of Your first customer shipment of Your Modifications. 3.2 Required Notices. You must duplicate the notice in Exhibit A in each file of the Source Code. If it is not possible to put such notice in a particular Source Code file due to its structure, then You must include such notice in a location (such as a relevant directory) where a user would be likely to look for such a notice. If You created one or more Modification(s) You may add Your name as a Contributor to the notice described in Exhibit A. You must also duplicate this License in any documentation for the Source Code where You describe recipients' rights or ownership rights relating to Initial Code. You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Your version of the Code. However, You may do so only on Your own behalf, and not on behalf of the Initial Developer. You must make it absolutely clear than any such warranty, support, indemnity or liability obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer for any liability incurred by the Initial Developer as a result of warranty, support, indemnity or liability terms You offer. 3.3 Distribution of Executable Versions. You may distribute Original Code in Executable and Source form only if the requirements of Sections 3.1 and 3.2 have been met for that Original Code, and if You include a notice stating that the Source Code version of the Original Code is available under the terms of this License. The notice must be conspicuously included in any notice in an Executable or Source versions, related documentation or collateral in which You describe recipients' rights relating to the Original Code. You may distribute the Executable and Source versions of Your version of the Code or ownership rights under a license of Your choice, which may contain terms different from this License, provided that You are in compliance with the terms of this License. If You distribute the Executable and Source versions under a different license You must make it absolutely clear that any terms which differ from this License are offered by You alone, not by the Initial Developer. You hereby agree to indemnify the Initial Developer for any liability incurred by the Initial Developer as a result of any such terms You offer. 3.4 Larger Works. You may create a Larger Work by combining Original Code with other code not governed by the terms of this License and distribute the Larger Work as a single product. In such a case, You must make sure the requirements of this License are fulfilled for the Original Code. 4.0 INABILITY TO COMPLY DUE TO STATUTE OR REGULATION If it is impossible for You to comply with any of the terms of this License with respect to some or all of the Original Code due to statute, judicial order, or regulation then You must: (a) comply with the terms of this License to the maximum extent possible; and (b) describe the limitations and the code they affect. Such description must be included in the LEGAL file described in Section 3.2 and must be included with all distributions of the Source Code. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill to be able to understand it. 5.0 APPLICATION OF THIS LICENSE This License applies to code to which the Initial Developer has attached the notice in Exhibit A and to related Modifications as set out in Section 3.1. 6.0 VERSIONS OF THE LICENSE 6.1 New Versions. Sun may publish revised and/or new versions of the License from time to time. Each version will be given a distinguishing version number. 6.2 Effect of New Versions. Once Original Code has been published under a particular version of the License, You may always continue to use it under the terms of that version. You may also choose to use such Original Code under the terms of any subsequent version of the License published by Sun. No one other than Sun has the right to modify the terms applicable to Original Code. 7.0 DISCLAIMER OF WARRANTY ORIGINAL CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE ORIGINAL CODE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE ORIGINAL CODE IS WITH YOU. SHOULD ANY ORIGINAL CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF ANY ORIGINAL CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. 8.0 TERMINATION 8.1 This License and the rights granted hereunder will terminate automatically if You fail to comply with terms herein and fail to cure such breach within 30 days of becoming aware of the breach. All sublicenses to the Original Code which are properly granted shall survive any termination of this License. Provisions which, by their nature, must remain in effect beyond the termination of this License shall survive. 8.2 In the event of termination under Section 8.1 above, all end user license agreements (excluding distributors and resellers) which have been validly granted by You or any distributor hereunder prior to termination shall survive termination. 9.0 LIMIT OF LIABILITY UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF ORIGINAL CODE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. 10.0 U.S. GOVERNMENT END USERS U.S. Government: If this Software is being acquired by or on behalf of the U.S. Government or by a U.S. Government prime contractor or subcontractor (at any tier), then the Government's rights in the Software and accompanying documentation shall be only as set forth in this license; this is in accordance with 48 C.F.R. 227.7201 through 227.7202-4 (for Department of Defense (DoD) acquisitions) and with 48 C.F.R. 2.101 and 12.212 (for non-DoD acquisitions). 11.0 MISCELLANEOUS This License represents the complete agreement concerning subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by California law provisions (except to the extent applicable law, if any, provides otherwise), excluding its conflict-of-law provisions. With respect to disputes in which at least one party is a citizen of, or an entity chartered or registered to do business in the United States of America, any litigation relating to this License shall be subject to the jurisdiction of the Federal Courts of the Northern District of California, with venue lying in Santa Clara County, California, with the losing party responsible for costs, including without limitation, court costs and reasonable attorneys' fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not apply to this License. EXHIBIT A - Sun Standards License "The contents of this file are subject to the Sun Standards License Version 1.1 (the "License"); You may not use this file except in compliance with the License. You may obtain a copy of the License at _______________________________. Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. The Original Code is ______________________________________. The Initial Developer of the Original Code is: InMon Corp. Portions created by: _______________________________________ are Copyright (C): _______________________________________ All Rights Reserved. Contributor(s): _______________________________________ EXHIBIT B - Standards The Standard is defined as the following: sFlow Specification, located at [1]http://sflow.org/developers/specifications.php References 1. http://sflow.org/developers/specifications.php ---------------------------------------------------------------------- Retrieved from http://www.inmon.com/technology/sflowlicense.txt, 2011-12-12: LICENSE AGREEMENT PLEASE READ THIS LICENSE AGREEMENT ("AGREEMENT") CAREFULLY BEFORE REPRODUCING OR IN ANY WAY UTILIZING THE sFlow(R) SOFTWARE ("SOFTWARE") AND/OR ANY ACCOMPANYING DOCUMENTATION ("DOCUMENTATION") AND/OR THE RELATED SPECIFICATIONS ("SPECIFICATIONS"). YOUR REPRODUCTION OR USE OF THE SOFTWARE AND/OR THE DOCUMENTATION AND/OR THE SPECIFICATIONS CONSTITUTES YOUR ACCEPTANCE OF THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE TO BE BOUND BY THE TERMS AND CONDITIONS OF THIS AGREEMENT, YOU MAY NOT REPRODUCE OR IN ANY WAY UTILIZE THE SOFTWARE OR THE DOCUMENTATION OR THE SPECIFICATIONS. 1. Definitions. "Documentation" means the user manuals, training materials, and operating materials, if any, InMon provides to Licensee under this Agreement. "InMon" means InMon Corporation, its affiliates and subsidiaries. "Intellectual Property Rights" means any trade secrets, patents, including without limitation any patents covering the Software, copyrights, know-how, moral rights and similar rights of any type under the laws of any governmental authority, domestic or foreign, including all applications and registrations relating to any of the foregoing. "Licensee Hardware" means all computers, routers, or other equipment owned or controlled by or on behalf of Licensee. "Products" means any and all software applications, computers, routers, or other equipment manufactured by or on behalf of Licensee for the purpose of resale or lease to any other third party, or otherwise made available by Licensee free of charge. "Software" means the sFlow(R) software programs, in source or binary code format, that Licensee licenses from InMon under this Agreement and any bug fixes or error corrections which InMon may provide to Licensee. "Specifications" means the published specifications provided or otherwise made available by InMon at: http://www.sflow.org. "Trademark" means InMon's "sFlow(R)" trademark. 2. License Grant. 2.1 Software, Documentation and Specifications License Grant. InMon hereby grants to Licensee, under all of InMon's Intellectual Property Rights therein, a perpetual (subject to InMon's termination rights under Section 7 below), nonexclusive, royalty-free, worldwide, transferable, sublicensable license, to: (i) use and reproduce the Software, the Documentation, and the Specifications; (ii) modify the Software; (iii) implement the Specifications in the Products; (iv) install the Software, or software in which the Specifications have been implemented, on Licensee Hardware and Products, and (v) distribute any Products that include the Software, the Documentation, or software in which the Specifications have been implemented. 2.2 Trademark License. InMon hereby grants to Licensee a perpetual (subject to InMon's termination rights under Section 7 below), nonexclusive, royalty-free, worldwide, transferable, sublicensable license to use the Trademark on or in connection with the Software, the Documentation, the Specifications and any software that implements the Specifications. 2.3 Restrictions. Licensee agrees that it will not use the Software in a way inconsistent with the license granted in Section 2.1. Further, Licensee agrees that, in exercising its rights under the license granted to it in this Agreement, Licensee will: (i) strictly adhere to and fully comply with the Specifications; (ii) use the Trademark, and no other mark, to identify the Software, the Documentation, the Specifications and any Products that implement the Specifications; (iii) place, in a font or graphic design designated by InMon, the phrase "sFlow(R)" on any technical documentation, sales/marketing materials, catalogs, or other such materials relating to products it manufactures or markets which it has configured to be compatible with the Software or otherwise implement the Specifications; (iv) in connection with any Products shipped to or sold in other countries that include the Software or any software that implements the Specifications, comply with the patent and trademark laws and practice of such other country; and (v) not alter or impair any acknowledgment of copyright or trademark rights of InMon that may appear in or on the Software, the Documentation or the Specifications. In the event InMon determines that Licensee is not complying with its obligations under clauses (i)-(v) above, InMon shall notify Licensee of such non-compliance, and if Licensee fails to correct such non-compliance within three (3) months, InMon may immediately terminate this Agreement as provided under paragraph 7 below and pursue any and all actions and remedies as it deems necessary, including, but not limited to breach of contract. 3. Ownership. Except for the license expressly granted in Section 2, Inmon hereby retains all right, title, and interest in and to the Trademark and all its Intellectual Property Rights in the Software, the Documentation and the Specifications. Licensee obtains no rights hereunder in the Trademark, Software, Documentation or Specifications by implication, estoppel or otherwise. Licensee acknowledges that the Trademark, Software, Documentation and Specifications are being licensed and not sold under this Agreement, and that this Agreement does not transfer title in the Trademark, Software, Documentation or Specifications, or any copy thereof, to Licensee. 4. Support. Inmon shall have no obligation under this Agreement to (a) supply maintenance or support, bug fixes or error corrections to the Licensed Software, (b) supply future versions of the Licensed Software or (c) provide Licensed Software development tools to Licensee. 5. Warranty. INMON HEREBY DISCLAIMS ALL WARRANTIES, EITHER EXPRESS, IMPLIED OR STATUTORY, WITH RESPECT TO THE TRADEMARK, THE SOFTWARE, THE DOCUMENTATION, THE SPECIFICATIONS. OR OTHERWISE, INCLUDING BUT NOT LIMITED TO IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT OF ANY INTELLECTUAL PROPERTY RIGHTS. 6. Limitation of Liability. IN NO EVENT SHALL INMON OR ITS SUPPLIERS OR LICENSORS BE LIABLE FOR ANY CONSEQUENTIAL, INCIDENTAL, SPECIAL, INDIRECT OR EXEMPLARY DAMAGES WHATSOEVER, WHETHER RELATED TO OR ARISING OUT OF THIS AGREEMENT, THE TRADEMARK, THE SOFTWARE, THE DOCUMENTATION, THE SPECIFICATIONS, OR OTHERWISE, INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS, BUSINESS INTERRUPTION, LOSS OF DATA, COSTS OF PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES OR FOR ANY CLAIM OR DEMAND AGAINST LICENSEE BY ANY OTHER PARTY, OR OTHER PECUNIARY LOSS, EVEN IF INMON HAS BEEN ADVISED OF OR KNOWS OF THE POSSIBILITY OF SUCH DAMAGES. 7. Term and Termination. The term of this Agreement will begin on the Effective Date, which shall be deemed to be the date of delivery of the Software and/or Documentation and/or Specifications to Licensee, and shall continue indefinitely unless and until terminated by Licensee's giving written notice of termination to InMon, or by InMon pursuant to InMon's termination rights as set forth in Section 2.3 above. Upon any termination of this Agreement, Licensee shall cease exercising its license rights under this Agreement, including the right to distribute Products that incorporate the Software or Documentation or that implement the Specifications. The rights and obligations contained in Sections 1, 3, 5, 6, 7, and 8 shall survive any termination of this Agreement. 8. General Provisions. 8.1 Assignment. This Agreement shall be binding upon and inure to the benefit of the parties hereto and their permitted successors and permitted assigns. InMon will have the right to assign this Agreement without notice to Licensee. Licensee may assign or transfer (whether by merger, operation of law or in any other manner) any of its rights or delegate any of its obligations hereunder without the prior written consent of InMon, provided the assignee assumes in writing all of Licensee's obligations hereunder. 8.2 Notices. All notices permitted or required under this Agreement shall be in writing and shall be delivered in person or mailed by first class, registered or certified mail, postage prepaid, to the address of the party specified in this Agreement or such other address as either party may specify in writing. Such notice shall be deemed to have been given upon receipt. 8.3 Non-Waiver. No term or provision hereof shall be deemed waived, and no breach excused, unless such waiver or consent shall be in writing and signed by the party claimed to have waived or consented. Any consent or waiver, whether express or implied, shall not constitute a consent or waiver of, or excuse for any separate, different or subsequent breach. 8.4 Independent Contractor. The parties' relationship shall be solely that of independent contractors, and nothing contained in this Agreement shall be construed to make either party an agent, partner, representative or principal of the other for any purpose. 8.5 Choice of Law and Forum. This Agreement shall be governed by and construed under the laws of the State of California, without giving effect to such state's conflict of laws principles. The parties hereby submit to the personal jurisdiction of, and agree that any legal proceeding with respect to or arising under this Agreement shall be brought in, the United States District Court for the Northern District of California or the state courts of the State of California for the County of San Francisco. 8.6 U.S. Government Licenses. The Software and Documentation are considered a "commercial item" as that term is defined at 48 C.F.R 2.101, or "commercial computer software" and "commercial computer software documentation" as such terms are used in 48 C.F.R 12.212 of the Federal Acquisition Regulations and its successors, and 48 C.F.R. 227.7202 of the DoD FAR Supplement and its successors. 8.7 Severability. If any provision of this Agreement is held to be unenforceable under applicable law, then such provision shall be excluded from this Agreement and the balance of this Agreement shall be interpreted as if such provision were so excluded and shall be enforceable in accordance with its terms. The court in its discretion may substitute for the excluded provision an enforceable provision which in economic substance reasonably approximates the excluded provision. 8.8 Compliance With Law. Licensee shall comply with all applicable laws and regulations (including privacy laws and regulations) having application to or governing its use and/or operation of the Software and agrees to indemnify and hold InMon harmless from and against any claims, damages, losses or obligations suffered or incurred by InMon arising from its failure to so comply. 8.9 Entire Agreement; Amendment. This Agreement constitutes the final, complete and entire agreement between the parties with respect to the subject matter hereof, and supersedes any previous proposals, negotiations, agreements, or arrangements, whether verbal or written, made between the parties with respect to such subject matter. This Agreement shall control over any additional or conflicting terms in any of Licensee's purchase orders or other business forms. This Agreement may only be amended or modified by mutual agreement of authorized representatives of the parties in writing. InMon Corp. 580 California Street, 5th Floor, San Francisco, CA 94104 Phone: (415) 283-3260 URL: www.inmon.com Email: info@inmon.com openvswitch-2.0.1+git20140120/debian/dirs000066400000000000000000000000211226605124000175510ustar00rootroot00000000000000usr/bin usr/sbin openvswitch-2.0.1+git20140120/debian/dkms.conf.in000066400000000000000000000004351226605124000211100ustar00rootroot00000000000000PACKAGE_NAME="openvswitch" PACKAGE_VERSION="__VERSION__" MAKE="./configure --with-linux='${kernel_source_dir}' && make -C datapath/linux" BUILT_MODULE_NAME[0]=openvswitch BUILT_MODULE_LOCATION[0]=datapath/linux/ DEST_MODULE_LOCATION[0]=/kernel/drivers/net/openvswitch/ AUTOINSTALL=yes openvswitch-2.0.1+git20140120/debian/ifupdown.sh000077500000000000000000000053461226605124000210760ustar00rootroot00000000000000#! /bin/sh # Copyright (c) 2012, 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Have a look at /usr/share/doc/openvswitch-switch/README.Debian # for more information about configuring the /etc/network/interfaces. if [ -z "${IF_OVS_TYPE}" ]; then exit 0 fi ovs_vsctl() { ovs-vsctl --timeout=5 "$@" } if (ovs_vsctl --version) > /dev/null 2>&1; then :; else exit 0 fi if [ "${MODE}" = "start" ]; then eval OVS_EXTRA=\"${IF_OVS_EXTRA}\" case "${IF_OVS_TYPE}" in OVSBridge) ovs_vsctl -- --may-exist add-br "${IFACE}" ${IF_OVS_OPTIONS}\ ${OVS_EXTRA+-- $OVS_EXTRA} if [ ! -z "${IF_OVS_PORTS}" ]; then ifup --allow="${IFACE}" ${IF_OVS_PORTS} fi ;; OVSPort) ovs_vsctl -- --may-exist add-port "${IF_OVS_BRIDGE}"\ "${IFACE}" ${IF_OVS_OPTIONS} \ ${OVS_EXTRA+-- $OVS_EXTRA} ifconfig "${IFACE}" up ;; OVSIntPort) ovs_vsctl -- --may-exist add-port "${IF_OVS_BRIDGE}"\ "${IFACE}" ${IF_OVS_OPTIONS} -- set Interface "${IFACE}"\ type=internal ${OVS_EXTRA+-- $OVS_EXTRA} ifconfig "${IFACE}" up ;; OVSBond) ovs_vsctl -- --fake-iface add-bond "${IF_OVS_BRIDGE}"\ "${IFACE}" ${IF_OVS_BONDS} ${IF_OVS_OPTIONS} \ ${OVS_EXTRA+-- $OVS_EXTRA} ifconfig "${IFACE}" up for slave in ${IF_OVS_BONDS} do ifconfig "${slave}" up done ;; *) exit 0 ;; esac elif [ "${MODE}" = "stop" ]; then case "${IF_OVS_TYPE}" in OVSBridge) if [ ! -z "${IF_OVS_PORTS}" ]; then ifdown --allow="${IFACE}" ${IF_OVS_PORTS} fi ovs_vsctl -- --if-exists del-br "${IFACE}" ;; OVSPort|OVSIntPort|OVSBond) ovs_vsctl -- --if-exists del-port "${IF_OVS_BRIDGE}" "${IFACE}" ;; *) exit 0 ;; esac fi exit 0 openvswitch-2.0.1+git20140120/debian/openvswitch-common.dirs000066400000000000000000000000241226605124000234120ustar00rootroot00000000000000var/log/openvswitch openvswitch-2.0.1+git20140120/debian/openvswitch-common.docs000066400000000000000000000000041226605124000233770ustar00rootroot00000000000000FAQ openvswitch-2.0.1+git20140120/debian/openvswitch-common.install000066400000000000000000000003431226605124000241230ustar00rootroot00000000000000usr/bin/ovs-appctl usr/bin/ovs-benchmark usr/bin/ovs-ofctl usr/bin/ovs-parse-backtrace usr/bin/ovs-pki usr/bin/ovsdb-client usr/sbin/ovs-bugtool usr/share/openvswitch/bugtool-plugins usr/share/openvswitch/scripts/ovs-bugtool-* openvswitch-2.0.1+git20140120/debian/openvswitch-common.manpages000066400000000000000000000003731226605124000242530ustar00rootroot00000000000000_debian/ovsdb/ovsdb-client.1 _debian/ovsdb/ovsdb-tool.1 _debian/utilities/ovs-appctl.8 _debian/utilities/ovs-benchmark.1 _debian/utilities/ovs-ofctl.8 _debian/utilities/ovs-pki.8 _debian/utilities/bugtool/ovs-bugtool.8 utilities/ovs-parse-backtrace.8 openvswitch-2.0.1+git20140120/debian/openvswitch-controller.README.Debian000066400000000000000000000004251226605124000254670ustar00rootroot00000000000000README.Debian for openvswitch-controller ------------------------------------- * To (re)configure the controller, edit /etc/default/openvswitch-controller and run "/etc/init.d/openvswitch-controller restart". -- Ben Pfaff , Fri, 4 Mar 2011 14:28:53 -0800 openvswitch-2.0.1+git20140120/debian/openvswitch-controller.default000066400000000000000000000017271226605124000250030ustar00rootroot00000000000000# This is a POSIX shell fragment -*- sh -*- # LISTEN: What OpenFlow connection methods should the controller listen on? # # This is a space-delimited list of connection methods: # # * "pssl:[PORT]": Listen for SSL connections on the specified PORT # (default: 6633). The private key, certificate, and CA certificate # must be specified below. # # * "ptcp:[PORT]": Listen for TCP connections on the specified PORT # (default: 6633). Not recommended for security reasons. # LISTEN="pssl:" # PRIVKEY: Name of file containing controller's private key. # Required if SSL enabled. PRIVKEY=/etc/openvswitch-controller/privkey.pem # CERT: Name of file containing certificate for private key. # Required if SSL enabled. CERT=/etc/openvswitch-controller/cert.pem # CACERT: Name of file containing switch CA certificate. # Required if SSL enabled. CACERT=/etc/openvswitch-controller/cacert.pem # Additional options to pass to controller, e.g. "--hub" DAEMON_OPTS="" openvswitch-2.0.1+git20140120/debian/openvswitch-controller.dirs000066400000000000000000000000331226605124000243050ustar00rootroot00000000000000etc/openvswitch-controller openvswitch-2.0.1+git20140120/debian/openvswitch-controller.init000077500000000000000000000211621226605124000243200ustar00rootroot00000000000000#!/bin/sh # # Copyright (c) 2011 Nicira, Inc. # Copyright (c) 2007, 2009 Javier Fernandez-Sanguino # # This is free software; you may redistribute it and/or modify # it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2, # or (at your option) any later version. # # This is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License with # the Debian operating system, in /usr/share/common-licenses/GPL; if # not, write to the Free Software Foundation, Inc., 59 Temple Place, # Suite 330, Boston, MA 02111-1307 USA # ### BEGIN INIT INFO # Provides: openvswitch-controller # Required-Start: $network $local_fs $remote_fs # Required-Stop: $remote_fs # Should-Start: $named # Should-Stop: # Default-Start: 2 3 4 5 # Default-Stop: 0 1 6 # Short-Description: Open vSwitch controller # Description: The Open vSwitch controller enables OpenFlow switches that connect to it # to act as MAC-learning Ethernet switches. ### END INIT INFO PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin DAEMON=/usr/bin/ovs-controller # Introduce the server's location here NAME=ovs-controller # Introduce the short server's name here DESC=ovs-controller # Introduce a short description here LOGDIR=/var/log/openvswitch # Log directory to use PIDFILE=/var/run/openvswitch/$NAME.pid test -x $DAEMON || exit 0 . /lib/lsb/init-functions # Default options, these can be overriden by the information # at /etc/default/openvswitch-controller DAEMON_OPTS="" # Additional options given to the server DODTIME=10 # Time to wait for the server to die, in seconds # If this value is set too low you might not # let some servers to die gracefully and # 'restart' will not work LOGFILE=$LOGDIR/$NAME.log # Server logfile #DAEMONUSER= # User to run the daemons as. If this value # is set start-stop-daemon will chuid the server # Include defaults if available default=/etc/default/openvswitch-controller if [ -f $default ] ; then . $default fi # Check that the user exists (if we set a user) # Does the user exist? if [ -n "$DAEMONUSER" ] ; then if getent passwd | grep -q "^$DAEMONUSER:"; then # Obtain the uid and gid DAEMONUID=`getent passwd |grep "^$DAEMONUSER:" | awk -F : '{print $3}'` DAEMONGID=`getent passwd |grep "^$DAEMONUSER:" | awk -F : '{print $4}'` else log_failure_msg "The user $DAEMONUSER, required to run $NAME does not exist." exit 1 fi fi set -e running_pid() { # Check if a given process pid's cmdline matches a given name pid=$1 name=$2 [ -z "$pid" ] && return 1 [ ! -d /proc/$pid ] && return 1 cmd=`cat /proc/$pid/cmdline | tr "\000" "\n"|head -n 1 |cut -d : -f 1` # Is this the expected server [ "$cmd" != "$name" ] && return 1 return 0 } running() { # Check if the process is running looking at /proc # (works for all users) # No pidfile, probably no daemon present [ ! -f "$PIDFILE" ] && return 1 pid=`cat $PIDFILE` running_pid $pid $DAEMON || return 1 return 0 } start_server() { if [ -z "$LISTEN" ]; then echo "$default: No connection methods configured, controller disabled" >&2 exit 0 fi if [ ! -d /var/run/openvswitch ]; then install -d -m 755 -o root -g root /var/run/openvswitch fi SSL_OPTS= case $LISTEN in *ssl*) : ${PRIVKEY:=/etc/openvswitch-controller/privkey.pem} : ${CERT:=/etc/openvswitch-controller/cert.pem} : ${CACERT:=/etc/openvswitch-controller/cacert.pem} if test ! -e "$PRIVKEY" || test ! -e "$CERT" || test ! -e "$CACERT"; then if test ! -e "$PRIVKEY"; then echo "$PRIVKEY: private key missing" >&2 fi if test ! -e "$CERT"; then echo "$CERT: certificate for private key missing" >&2 fi if test ! -e "$CACERT"; then echo "$CACERT: CA certificate missing" >&2 fi exit 1 fi SSL_OPTS="--private-key=$PRIVKEY --certificate=$CERT --ca-cert=$CACERT" ;; esac # Start the process using the wrapper if [ -z "$DAEMONUSER" ] ; then start-stop-daemon --start --pidfile $PIDFILE \ --exec $DAEMON -- --detach --pidfile=$PIDFILE \ $LISTEN $DAEMON_OPTS $SSL_OPTS errcode=$? else # if we are using a daemonuser then change the user id start-stop-daemon --start --quiet --pidfile $PIDFILE \ --chuid $DAEMONUSER --exec $DAEMON -- \ --detach --pidfile=$PIDFILE $LISTEN $DAEMON_OPTS \ $SSL_OPTS errcode=$? fi return $errcode } stop_server() { # Stop the process using the wrapper if [ -z "$DAEMONUSER" ] ; then start-stop-daemon --stop --quiet --pidfile $PIDFILE \ --exec $DAEMON errcode=$? else # if we are using a daemonuser then look for process that match start-stop-daemon --stop --quiet --pidfile $PIDFILE \ --user $DAEMONUSER --exec $DAEMON errcode=$? fi return $errcode } reload_server() { [ ! -f "$PIDFILE" ] && return 1 pid=`cat $PIDFILE` # This is the daemon's pid # Send a SIGHUP kill -1 $pid return $? } force_stop() { # Force the process to die killing it manually [ ! -e "$PIDFILE" ] && return if running ; then kill -15 $pid # Is it really dead? sleep "$DODTIME" if running ; then kill -9 $pid sleep "$DODTIME" if running ; then echo "Cannot kill $NAME (pid=$pid)!" exit 1 fi fi fi rm -f $PIDFILE } case "$1" in start) log_daemon_msg "Starting $DESC " "$NAME" # Check if it's running first if running ; then log_progress_msg "apparently already running" log_end_msg 0 exit 0 fi if start_server && running ; then # It's ok, the server started and is running log_end_msg 0 else # Either we could not start it or it is not running # after we did # NOTE: Some servers might die some time after they start, # this code does not try to detect this and might give # a false positive (use 'status' for that) log_end_msg 1 fi ;; stop) log_daemon_msg "Stopping $DESC" "$NAME" if running ; then # Only stop the server if we see it running stop_server log_end_msg $? else # If it's not running don't do anything log_progress_msg "apparently not running" log_end_msg 0 exit 0 fi ;; force-stop) # First try to stop gracefully the program $0 stop if running; then # If it's still running try to kill it more forcefully log_daemon_msg "Stopping (force) $DESC" "$NAME" force_stop log_end_msg $? fi ;; restart|force-reload) log_daemon_msg "Restarting $DESC" "$NAME" if running; then stop_server # Wait some sensible amount, some server need this. [ -n "$DODTIME" ] && sleep $DODTIME fi start_server running log_end_msg $? ;; status) log_daemon_msg "Checking status of $DESC" "$NAME" if running ; then log_progress_msg "running" log_end_msg 0 else log_progress_msg "apparently not running" log_end_msg 1 exit 1 fi ;; # Use this if the daemon cannot reload reload) log_warning_msg "Reloading $NAME daemon: not implemented, as the daemon" log_warning_msg "cannot re-read the config file (use restart)." ;; *) N=/etc/init.d/openvswitch-controller echo "Usage: $N {start|stop|force-stop|restart|force-reload|status}" >&2 exit 1 ;; esac exit 0 openvswitch-2.0.1+git20140120/debian/openvswitch-controller.install000066400000000000000000000000271226605124000250150ustar00rootroot00000000000000usr/bin/ovs-controller openvswitch-2.0.1+git20140120/debian/openvswitch-controller.manpages000066400000000000000000000000431226605124000251400ustar00rootroot00000000000000_debian/utilities/ovs-controller.8 openvswitch-2.0.1+git20140120/debian/openvswitch-controller.postinst000077500000000000000000000032671226605124000252460ustar00rootroot00000000000000#!/bin/sh # postinst script for openvswitch-controller # # see: dh_installdeb(1) set -e # summary of how this script can be called: # * `configure' # * `abort-upgrade' # * `abort-remove' `in-favour' # # * `abort-remove' # * `abort-deconfigure' `in-favour' # `removing' # # for details, see http://www.debian.org/doc/debian-policy/ or # the debian-policy package case "$1" in configure) cd /etc/openvswitch-controller # If cacert.pem is a symlink to the old location for cacert.pem, # remove it so that we can symlink it to the new location. if test -h cacert.pem && \ test X"`readlink cacert.pem`" = X/usr/share/openvswitch/pki/switchca/cacert.pem; then rm -f cacert.pem fi if ! test -e cacert.pem; then ln -s /var/lib/openvswitch/pki/switchca/cacert.pem cacert.pem fi if ! test -e privkey.pem || ! test -e cert.pem; then oldumask=$(umask) umask 077 ovs-pki req+sign tmp controller >/dev/null mv tmp-privkey.pem privkey.pem mv tmp-cert.pem cert.pem mv tmp-req.pem req.pem chmod go+r cert.pem req.pem umask $oldumask fi ;; abort-upgrade|abort-remove|abort-deconfigure) ;; *) echo "postinst called with unknown argument \`$1'" >&2 exit 1 ;; esac #DEBHELPER# exit 0 openvswitch-2.0.1+git20140120/debian/openvswitch-controller.postrm000077500000000000000000000021721226605124000247010ustar00rootroot00000000000000#!/bin/sh # postrm script for openvswitch-controller # # see: dh_installdeb(1) set -e # summary of how this script can be called: # * `remove' # * `purge' # * `upgrade' # * `failed-upgrade' # * `abort-install' # * `abort-install' # * `abort-upgrade' # * `disappear' # # for details, see http://www.debian.org/doc/debian-policy/ or # the debian-policy package case "$1" in purge) if cd /etc/openvswitch-controller; then rm -f cacert.pem cert.pem privkey.pem req.pem rm -f tmp-privkey.pem tmp-cert.pem tmp-req.pem fi ;; remove|upgrade|failed-upgrade|abort-install|abort-upgrade|disappear) ;; *) echo "postrm called with unknown argument \`$1'" >&2 exit 1 ;; esac # dh_installdeb will replace this with shell code automatically # generated by other debhelper scripts. #DEBHELPER# exit 0 openvswitch-2.0.1+git20140120/debian/openvswitch-datapath-dkms.postinst000066400000000000000000000007021226605124000255710ustar00rootroot00000000000000#!/bin/sh set -e package=openvswitch-datapath-dkms name=openvswitch version=`dpkg-query -W -f='${Version}' "$package" \ |rev|cut -d- -f2-|rev|cut -d':' -f2|tr -d "\n"` isadded=`dkms status -m "$name" -v "$version"` if [ "x${isadded}" = "x" ] ; then dkms add -m "$name" -v "$version" fi if [ "$1" = 'configure' ] ; then dkms build -m "$name" -v "$version" && dkms install -m "$name" -v "$version" || true fi #DEBHELPER# openvswitch-2.0.1+git20140120/debian/openvswitch-datapath-dkms.prerm000066400000000000000000000003741226605124000250400ustar00rootroot00000000000000#!/bin/sh set -e package=openvswitch-datapath-dkms name=openvswitch version=`dpkg-query -W -f='${Version}' "$package" \ |rev|cut -d- -f2-|rev|cut -d':' -f2|tr -d "\n"` dkms remove -m "$name" -v "$version" --all || true #DEBHELPER# exit 0 openvswitch-2.0.1+git20140120/debian/openvswitch-datapath-module-_KVERS_.postinst.modules.in000077500000000000000000000013201226605124000313620ustar00rootroot00000000000000#!/bin/sh # postinst script for #PACKAGE# # # see: dh_installdeb(1) set -e #DEBHELPER# # If the kernel module is already loaded, we have nothing to do here. # A force-reload-kmod should be run manually to use the new kernel module. if [ -e /sys/module/openvswitch ] || [ -e /sys/module/openvswitch_mod ]; then exit 0 fi # If the kernel module is not loaded, then it is likely because none # was installed before and therefore Open vSwitch couldn't be started. # Try to start it now. # # (Ideally we'd only want to do this if this package corresponds to the # running kernel, but I don't know a reliable way to check.) INIT=/etc/init.d/openvswitch-switch if test -x $INIT; then $INIT start || true fi exit 0 openvswitch-2.0.1+git20140120/debian/openvswitch-datapath-source.README.Debian000066400000000000000000000015441226605124000263730ustar00rootroot00000000000000Open vSwitch for Debian ---------------------- * How do I build this module the Debian way? - Building with module-assistant: $ module-assistant auto-install openvswitch-datapath or $ m-a a-i openvswitch-datapath If kernel source or headers are in a non-standard directory, add the option -k /path/to/kernel/source with the correct path. - Building with make-kpkg $ cd /usr/src/ $ tar jxvf openvswitch.tar.bz2 $ cd /usr/src/kernel-source-2.6.26 $ make-kpkg --added-modules=openvswitch modules - Building without make-kpkg $ cd /usr/src/ $ tar jxvf openvswitch.tar.bz2 $ cd modules/openvswitch $ fakeroot debian/rules kdist_image If you run this as root, fakeroot is not needed. -- Ben Pfaff , Wed, 22 Jun 2011 09:51:28 -0700 openvswitch-2.0.1+git20140120/debian/openvswitch-datapath-source.copyright000066400000000000000000000005271226605124000262650ustar00rootroot00000000000000Upstream Authors: Nicira, Inc. Copyright: Copyright (C) 2008 Nicira, Inc. License: Files in the datapath/ and its sub-directories are covered under the GNU General Public License Version 2. On Debian systems, the complete text of the GNU General Public License can be found in `/usr/share/common-licenses/GPL'. openvswitch-2.0.1+git20140120/debian/openvswitch-datapath-source.dirs000066400000000000000000000000541226605124000252110ustar00rootroot00000000000000usr/src/modules/openvswitch-datapath/debian openvswitch-2.0.1+git20140120/debian/openvswitch-datapath-source.install000066400000000000000000000004621226605124000257210ustar00rootroot00000000000000debian/changelog usr/src/modules/openvswitch-datapath/debian debian/control usr/src/modules/openvswitch-datapath/debian debian/compat usr/src/modules/openvswitch-datapath/debian debian/*.modules.in usr/src/modules/openvswitch-datapath/debian _debian/openvswitch.tar.gz usr/src/modules/openvswitch-datapath openvswitch-2.0.1+git20140120/debian/openvswitch-ipsec.dirs000066400000000000000000000000361226605124000232300ustar00rootroot00000000000000usr/share/openvswitch/scripts openvswitch-2.0.1+git20140120/debian/openvswitch-ipsec.init000077500000000000000000000127311226605124000232420ustar00rootroot00000000000000#!/bin/sh # # Copyright (c) 2007, 2009 Javier Fernandez-Sanguino # # This is free software; you may redistribute it and/or modify # it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2, # or (at your option) any later version. # # This is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License with # the Debian operating system, in /usr/share/common-licenses/GPL; if # not, write to the Free Software Foundation, Inc., 59 Temple Place, # Suite 330, Boston, MA 02111-1307 USA # ### BEGIN INIT INFO # Provides: openvswitch-ipsec # Required-Start: $network $local_fs $remote_fs openvswitch-switch # Required-Stop: $remote_fs # Default-Start: 2 3 4 5 # Default-Stop: 0 1 6 # Short-Description: Open vSwitch GRE-over-IPsec daemon # Description: The ovs-monitor-ipsec script provides support for encrypting GRE # tunnels with IPsec. ### END INIT INFO PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin DAEMON=/usr/share/openvswitch/scripts/ovs-monitor-ipsec # Daemon's location NAME=ovs-monitor-ipsec # Introduce the short server's name here LOGDIR=/var/log/openvswitch # Log directory to use PIDFILE=/var/run/openvswitch/$NAME.pid test -x $DAEMON || exit 0 . /lib/lsb/init-functions DODTIME=10 # Time to wait for the server to die, in seconds # If this value is set too low you might not # let some servers to die gracefully and # 'restart' will not work set -e running_pid() { # Check if a given process pid's cmdline matches a given name pid=$1 name=$2 [ -z "$pid" ] && return 1 [ ! -d /proc/$pid ] && return 1 cmd=`cat /proc/$pid/cmdline | tr "\000" " "|cut -d " " -f 2` # Is this the expected server [ "$cmd" != "$name" ] && return 1 return 0 } running() { # Check if the process is running looking at /proc # (works for all users) # No pidfile, probably no daemon present [ ! -f "$PIDFILE" ] && return 1 pid=`cat $PIDFILE` running_pid $pid $DAEMON || return 1 return 0 } start_server() { if [ ! -d /var/run/openvswitch ]; then install -d -m 755 -o root -g root /var/run/openvswitch fi /usr/share/openvswitch/scripts/ovs-monitor-ipsec \ --pidfile=$PIDFILE --log-file --detach --monitor \ unix:/var/run/openvswitch/db.sock return 0 } stop_server() { if [ -e $PIDFILE ]; then kill `cat $PIDFILE` fi return 0 } force_stop() { # Force the process to die killing it manually [ ! -e "$PIDFILE" ] && return if running ; then kill -15 $pid # Is it really dead? sleep "$DODTIME" if running ; then kill -9 $pid sleep "$DODTIME" if running ; then echo "Cannot kill $NAME (pid=$pid)!" exit 1 fi fi fi rm -f $PIDFILE } case "$1" in start) log_daemon_msg "Starting $NAME" # Check if it's running first if running ; then log_progress_msg "apparently already running" log_end_msg 0 exit 0 fi if start_server && running ; then # It's ok, the server started and is running log_end_msg 0 else # Either we could not start it or it is not running # after we did # NOTE: Some servers might die some time after they start, # this code does not try to detect this and might give # a false positive (use 'status' for that) log_end_msg 1 fi ;; stop) log_daemon_msg "Stopping $NAME" if running ; then # Only stop the server if we see it running stop_server log_end_msg $? else # If it's not running don't do anything log_progress_msg "apparently not running" log_end_msg 0 exit 0 fi ;; force-stop) # First try to stop gracefully the program $0 stop if running; then # If it's still running try to kill it more forcefully log_daemon_msg "Stopping (force) $NAME" force_stop log_end_msg $? fi ;; restart|force-reload) log_daemon_msg "Restarting $NAME" stop_server # Wait some sensible amount, some server need this [ -n "$DODTIME" ] && sleep $DODTIME start_server running log_end_msg $? ;; status) log_daemon_msg "Checking status of $NAME" if running ; then log_progress_msg "running" log_end_msg 0 else log_progress_msg "apparently not running" log_end_msg 1 exit 1 fi ;; # Use this if the daemon cannot reload reload) log_warning_msg "Reloading $NAME daemon: not implemented, as the daemon" log_warning_msg "cannot re-read the config file (use restart)." ;; *) N=/etc/init.d/openvswitch-ipsec echo "Usage: $N {start|stop|force-stop|restart|force-reload|status}" >&2 exit 1 ;; esac exit 0 openvswitch-2.0.1+git20140120/debian/openvswitch-ipsec.install000066400000000000000000000000671226605124000237410ustar00rootroot00000000000000debian/ovs-monitor-ipsec usr/share/openvswitch/scripts openvswitch-2.0.1+git20140120/debian/openvswitch-pki.dirs000066400000000000000000000000251226605124000227060ustar00rootroot00000000000000/var/lib/openvswitch openvswitch-2.0.1+git20140120/debian/openvswitch-pki.postinst000077500000000000000000000025761226605124000236500ustar00rootroot00000000000000#!/bin/sh # postinst script for openvswitch-pki # # see: dh_installdeb(1) set -e # summary of how this script can be called: # * `configure' # * `abort-upgrade' # * `abort-remove' `in-favour' # # * `abort-remove' # * `abort-deconfigure' `in-favour' # `removing' # # for details, see http://www.debian.org/doc/debian-policy/ or # the debian-policy package case "$1" in configure) # Move the pki directory from its previous, non FHS-compliant location, # to its new location, leaving behind a symlink for compatibility. if test -d /usr/share/openvswitch/pki && \ test ! -e /var/lib/openvswitch/pki; then mv /usr/share/openvswitch/pki /var/lib/openvswitch ln -s /var/lib/openvswitch/pki /usr/share/openvswitch/pki fi # Create certificate authorities. if test ! -e /var/lib/openvswitch/pki; then ovs-pki init fi ;; abort-upgrade|abort-remove|abort-deconfigure) ;; *) echo "postinst called with unknown argument \`$1'" >&2 exit 1 ;; esac #DEBHELPER# exit 0 openvswitch-2.0.1+git20140120/debian/openvswitch-pki.postrm000077500000000000000000000022471226605124000233040ustar00rootroot00000000000000#!/bin/sh # postrm script for openvswitch-pki # # see: dh_installdeb(1) set -e # summary of how this script can be called: # * `remove' # * `purge' # * `upgrade' # * `failed-upgrade' # * `abort-install' # * `abort-install' # * `abort-upgrade' # * `disappear' # # for details, see http://www.debian.org/doc/debian-policy/ or # the debian-policy package case "$1" in purge) rm -f /var/log/openvswitch/ovs-pki.log* || true # Remove backward compatibility symlink, if present. if test -h /usr/share/openvswitch/pki; then rm -f /usr/share/openvswitch/pki fi ;; remove|upgrade|failed-upgrade|abort-install|abort-upgrade|disappear) ;; *) echo "postrm called with unknown argument \`$1'" >&2 exit 1 ;; esac # dh_installdeb will replace this with shell code automatically # generated by other debhelper scripts. #DEBHELPER# exit 0 openvswitch-2.0.1+git20140120/debian/openvswitch-switch.README.Debian000066400000000000000000000070051226605124000246060ustar00rootroot00000000000000README.Debian for openvswitch-switch --------------------------------- * To use the Linux kernel-based switch implementation, you will need to build and install the Open vSwitch kernel module. To do so, install the openvswitch-datapath-source package, then follow the instructions given in /usr/share/doc/openvswitch-datapath-source/README.Debian * This package does not yet support the userspace datapath-based switch implementation. -- Ben Pfaff , Fri, 6 Jul 2012 15:12:38 -0700 Debian network scripts integration ---------------------------------- This package lets a user to optionally configure Open vSwitch bridges and ports from /etc/network/interfaces. Please refer to the interfaces(5) manpage for more details regarding /etc/network/interfaces. The stanzas that configure the OVS bridges should begin with "allow-ovs" followed by name of the bridge. Here is an example. allow-ovs br0 The stanzas that configure the OVS ports should begin with "allow-${bridge-name}" followed by name of the port. Here is an example. allow-br0 eth0 The following OVS specific "command" options are supported: - ovs_type: This can either be OVSBridge, OVSPort, OVSIntPort or OVSBond depending on whether you configure a bridge, port, an internal port or a bond. This is a required option. - ovs_ports: This option specifies all the ports that belong to a bridge. - ovs_bridge: This options specifies a bridge to which a port belongs. This is a required option for a port. - ovs_bonds: This option specifies the list of physical interfaces to be bonded together. - ovs_options: This option lets you add extra arguments to a ovs-vsctl command. See examples. - ovs_extra: This option lets you run additional ovs-vsctl commands, separated by "--" (double dash). Variables can be part of the "ovs_extra" option. You can provide all the standard environmental variables described in the interfaces(5) man page. You can also pass shell commands. More implementation specific details can be seen in the examples. Examples: -------- ex 1: A standalone bridge. allow-ovs br0 iface br0 inet static address 192.168.1.1 netmask 255.255.255.0 ovs_type OVSBridge ex 2: A bridge with one port. allow-ovs br0 iface br0 inet dhcp ovs_type OVSBridge ovs_ports eth0 allow-br0 eth0 iface eth0 inet manual ovs_bridge br0 ovs_type OVSPort ex 3: A bridge with multiple physical ports. allow-ovs br0 iface br0 inet dhcp ovs_type OVSBridge ovs_ports eth0 eth1 allow-br0 eth0 iface eth0 inet manual ovs_bridge br0 ovs_type OVSPort allow-br0 eth1 iface eth1 inet manual ovs_bridge br0 ovs_type OVSPort ex 4: A bridge with an OVS internal port. allow-ovs br1 iface br1 inet static address 192.168.1.1 netmask 255.255.255.0 ovs_type OVSBridge ovs_ports vlan100 allow-br1 vlan100 iface vlan100 inet manual ovs_bridge br1 ovs_type OVSIntPort ovs_options tag=100 ovs_extra set interface ${IFACE} external-ids:iface-id=$(hostname -s) ex 5: Bonding. allow-ovs br2 iface br2 inet static address 192.170.1.1 netmask 255.255.255.0 ovs_type OVSBridge ovs_ports bond0 allow-br2 bond0 iface bond0 inet manual ovs_bridge br2 ovs_type OVSBond ovs_bonds eth2 eth3 ovs_options bond_mode=balance-tcp lacp=active ex 6: Create and destroy bridges. ifup --allow=ovs $list_of_bridges ifdown --allow=ovs $list_of_bridges -- Gurucharan Shetty , Fri, 04 May 2012 12:58:19 -0700 openvswitch-2.0.1+git20140120/debian/openvswitch-switch.dirs000066400000000000000000000000571226605124000234310ustar00rootroot00000000000000/etc/openvswitch /usr/share/openvswitch/switch openvswitch-2.0.1+git20140120/debian/openvswitch-switch.init000077500000000000000000000103611226605124000234350ustar00rootroot00000000000000#! /bin/sh # # Copyright (C) 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ### BEGIN INIT INFO # Provides: openvswitch-switch # Required-Start: $network $named $remote_fs $syslog # Required-Stop: $remote_fs # Default-Start: 2 3 4 5 # Default-Stop: 0 1 6 # Short-Description: Open vSwitch switch # Description: openvswitch-switch provides the userspace components and utilities for # the Open vSwitch kernel-based switch. ### END INIT INFO (test -x /usr/sbin/ovs-vswitchd && test -x /usr/sbin/ovsdb-server) || exit 0 . /usr/share/openvswitch/scripts/ovs-lib test -e /etc/default/openvswitch-switch && . /etc/default/openvswitch-switch network_interfaces () { INTERFACES="/etc/network/interfaces" [ -e "${INTERFACES}" ] || return bridges=`awk '{ if ($1 == "allow-ovs") { print $2; } }' "${INTERFACES}"` [ -n "${bridges}" ] && $1 --allow=ovs ${bridges} } load_kmod () { ovs_ctl load-kmod || exit $? } start () { if ovs_ctl load-kmod; then : else echo "Module has probably not been built for this kernel." if ! test -d /usr/share/doc/openvswitch-datapath-source; then echo "Install the openvswitch-datapath-source package, then read" else echo "For instructions, read" fi echo "/usr/share/doc/openvswitch-datapath-source/README.Debian" if test X"$OVS_MISSING_KMOD_OK" = Xyes; then # We're being invoked by the package postinst. Do not # fail package installation just because the kernel module # is not available. exit 0 fi fi set ovs_ctl ${1-start} --system-id=random if test X"$FORCE_COREFILES" != X; then set "$@" --force-corefiles="$FORCE_COREFILES" fi set "$@" $OVS_CTL_OPTS "$@" || exit $? [ "$2" = "start" ] && network_interfaces ifup } stop () { network_interfaces ifdown ovs_ctl stop } restart () { # OVS_FORCE_RELOAD_KMOD can be set by package postinst script. if [ "$1" = "--save-flows=yes" ] || \ [ "${OVS_FORCE_RELOAD_KMOD}" = "no" ]; then start restart elif [ "${OVS_FORCE_RELOAD_KMOD}" = "yes" ]; then depmod -a if [ -e /sys/module/openvswitch ]; then LOADED_SRCVERSION=`cat /sys/module/openvswitch/srcversion` LOADED_VERSION=`cat /sys/module/openvswitch/version` elif [ -e /sys/module/openvswitch_mod ]; then LOADED_SRCVERSION=`cat /sys/module/openvswitch_mod/srcversion` LOADED_VERSION=`cat /sys/module/openvswitch_mod/version` fi SRCVERSION=`modinfo -F srcversion openvswitch 2>/dev/null` VERSION=`modinfo -F version openvswitch 2>/dev/null` ovs_ctl_log "Package upgrading:\n"\ "Loaded version: ${LOADED_VERSION} ${LOADED_SRCVERSION}.\n"\ "Version on disk: ${VERSION} ${SRCVERSION}." # If the kernel module was previously loaded and it is different than # the kernel module on disk, then do a 'force-reload-kmod'. if [ -n "${LOADED_SRCVERSION}" ] && [ -n "${SRCVERSION}" ] && \ [ "${SRCVERSION}" != "${LOADED_SRCVERSION}" ]; then start force-reload-kmod else start restart fi else stop start fi } case $1 in start) start ;; stop | force-stop) stop ;; reload | force-reload) # The OVS daemons keep up-to-date. ;; restart) shift restart "$@" ;; status) ovs_ctl status exit $? ;; force-reload-kmod) start force-reload-kmod ;; load-kmod) load_kmod ;; *) echo "Usage: $0 {start|stop|restart|force-reload|status|force-stop|force-reload-kmod|load-kmod}" >&2 exit 1 ;; esac exit 0 openvswitch-2.0.1+git20140120/debian/openvswitch-switch.install000066400000000000000000000006631226605124000241410ustar00rootroot00000000000000usr/bin/ovs-dpctl usr/bin/ovs-dpctl-top usr/bin/ovs-pcap usr/bin/ovs-tcpundump usr/bin/ovs-vlan-test usr/bin/ovs-vsctl usr/bin/ovsdb-tool usr/sbin/ovs-vswitchd usr/sbin/ovsdb-server usr/share/openvswitch/scripts/ovs-check-dead-ifs usr/share/openvswitch/scripts/ovs-ctl usr/share/openvswitch/scripts/ovs-lib usr/share/openvswitch/scripts/ovs-save usr/share/openvswitch/vswitch.ovsschema debian/ifupdown.sh usr/share/openvswitch/scripts openvswitch-2.0.1+git20140120/debian/openvswitch-switch.links000066400000000000000000000002371226605124000236100ustar00rootroot00000000000000usr/share/openvswitch/scripts/ifupdown.sh etc/network/if-pre-up.d/openvswitch usr/share/openvswitch/scripts/ifupdown.sh etc/network/if-post-down.d/openvswitch openvswitch-2.0.1+git20140120/debian/openvswitch-switch.logrotate000066400000000000000000000006141226605124000244670ustar00rootroot00000000000000/var/log/openvswitch/*.log { daily compress create 640 root adm delaycompress missingok rotate 30 postrotate # Tell Open vSwitch daemons to reopen their log files if [ -d /var/run/openvswitch ]; then for pidfile in `cd /var/run/openvswitch && echo *.pid`; do ovs-appctl -t "${pidfile%%.pid}" vlog/reopen done fi endscript } openvswitch-2.0.1+git20140120/debian/openvswitch-switch.manpages000066400000000000000000000004341226605124000242620ustar00rootroot00000000000000_debian/ovsdb/ovsdb-server.1 _debian/utilities/ovs-dpctl.8 _debian/utilities/ovs-dpctl-top.8 _debian/utilities/ovs-pcap.1 _debian/utilities/ovs-tcpundump.1 _debian/utilities/ovs-vlan-test.8 _debian/utilities/ovs-vsctl.8 _debian/vswitchd/ovs-vswitchd.8 vswitchd/ovs-vswitchd.conf.db.5 openvswitch-2.0.1+git20140120/debian/openvswitch-switch.postinst000077500000000000000000000032451226605124000243600ustar00rootroot00000000000000#!/bin/sh # postinst script for openvswitch-switch # # see: dh_installdeb(1) set -e # summary of how this script can be called: # * `configure' # * `abort-upgrade' # * `abort-remove' `in-favour' # # * `abort-remove' # * `abort-deconfigure' `in-favour' # `removing' # # for details, see http://www.debian.org/doc/debian-policy/ or # the debian-policy package case "$1" in configure) DEFAULT=/etc/default/openvswitch-switch TEMPLATE=/usr/share/openvswitch/switch/default.template if ! test -e $DEFAULT; then cp $TEMPLATE $DEFAULT else for var in $(awk -F'[ :]' '/^# [_A-Z0-9]+:/{print $2}' $TEMPLATE) do if ! grep $var $DEFAULT >/dev/null 2>&1; then echo >> $DEFAULT sed -n "/$var:/,/$var=/p" $TEMPLATE >> $DEFAULT fi done fi ;; abort-upgrade|abort-remove|abort-deconfigure) ;; *) echo "postinst called with unknown argument \`$1'" >&2 exit 1 ;; esac # Do not fail package installation just because the kernel module # is not available. OVS_MISSING_KMOD_OK=yes export OVS_MISSING_KMOD_OK # force-reload-kmod during upgrade. If a user wants to override this, # they can set the variable OVS_FORCE_RELOAD_KMOD=no while installing. [ -z "${OVS_FORCE_RELOAD_KMOD}" ] && OVS_FORCE_RELOAD_KMOD=yes || true export OVS_FORCE_RELOAD_KMOD #DEBHELPER# exit 0 openvswitch-2.0.1+git20140120/debian/openvswitch-switch.postrm000077500000000000000000000023641226605124000240220ustar00rootroot00000000000000#!/bin/sh # postrm script for openvswitch-switch # # see: dh_installdeb(1) set -e # summary of how this script can be called: # * `remove' # * `purge' # * `upgrade' # * `failed-upgrade' # * `abort-install' # * `abort-install' # * `abort-upgrade' # * `disappear' # # for details, see http://www.debian.org/doc/debian-policy/ or # the debian-policy package case "$1" in purge) rm -f /etc/openvswitch/conf.db rm -f /etc/openvswitch/.conf.db.~lock~ rm -f /etc/default/openvswitch-switch rm -f /var/log/openvswitch/ovs-vswitchd.log* || true rm -f /var/log/openvswitch/ovsdb-server.log* || true rm -f /etc/openvswitch/system-id.conf ;; remove|upgrade|failed-upgrade|abort-install|abort-upgrade|disappear) ;; *) echo "postrm called with unknown argument \`$1'" >&2 exit 1 ;; esac # dh_installdeb will replace this with shell code automatically # generated by other debhelper scripts. #DEBHELPER# exit 0 openvswitch-2.0.1+git20140120/debian/openvswitch-switch.template000066400000000000000000000004501226605124000243000ustar00rootroot00000000000000# This is a POSIX shell fragment -*- sh -*- # FORCE_COREFILES: If 'yes' then core files will be enabled. # FORCE_COREFILES=yes # OVS_CTL_OPTS: Extra options to pass to ovs-ctl. This is, for example, # a suitable place to specify --ovs-vswitchd-wrapper=valgrind. # OVS_CTL_OPTS= openvswitch-2.0.1+git20140120/debian/openvswitch-test.dirs000066400000000000000000000000341226605124000231020ustar00rootroot00000000000000usr/share/pyshared/ovstest/ openvswitch-2.0.1+git20140120/debian/openvswitch-test.install000066400000000000000000000001521226605124000236100ustar00rootroot00000000000000usr/share/openvswitch/python/ovstest usr/lib/python2.6/dist-packages/ usr/bin/ovs-test usr/bin/ovs-l3ping openvswitch-2.0.1+git20140120/debian/openvswitch-test.manpages000066400000000000000000000000741226605124000237400ustar00rootroot00000000000000_debian/utilities/ovs-test.8 _debian/utilities/ovs-l3ping.8 openvswitch-2.0.1+git20140120/debian/ovs-monitor-ipsec000077500000000000000000000415571226605124000222330ustar00rootroot00000000000000#!/usr/bin/python # Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # A daemon to monitor attempts to create GRE-over-IPsec tunnels. # Uses racoon and setkey to support the configuration. Assumes that # OVS has complete control over IPsec configuration for the box. # xxx To-do: # - Doesn't actually check that Interface is connected to bridge # - If a certificate is badly formed, Racoon will refuse to start. We # should do a better job of verifying certificates are valid before # adding an interface to racoon.conf. import argparse import glob import os import subprocess import sys import ovs.dirs from ovs.db import error from ovs.db import types import ovs.util import ovs.daemon import ovs.db.idl import ovs.unixctl import ovs.unixctl.server import ovs.vlog vlog = ovs.vlog.Vlog("ovs-monitor-ipsec") root_prefix = '' # Prefix for absolute file names, for testing. SETKEY = "/usr/sbin/setkey" exiting = False def unixctl_exit(conn, unused_argv, unused_aux): global exiting exiting = True conn.reply(None) # Class to configure the racoon daemon, which handles IKE negotiation class Racoon: # Default locations for files conf_file = "/etc/racoon/racoon.conf" cert_dir = "/etc/racoon/certs" psk_file = "/etc/racoon/psk.txt" # Racoon configuration header we use for IKE conf_header = """# Configuration file generated by Open vSwitch # # Do not modify by hand! path pre_shared_key "%s"; path certificate "%s"; """ # Racoon configuration footer we use for IKE conf_footer = """sainfo anonymous { pfs_group 2; lifetime time 1 hour; encryption_algorithm aes; authentication_algorithm hmac_sha1, hmac_md5; compression_algorithm deflate; } """ # Certificate entry template. cert_entry = """remote %s { exchange_mode main; nat_traversal on; ike_frag on; certificate_type x509 "%s" "%s"; my_identifier asn1dn; peers_identifier asn1dn; peers_certfile x509 "%s"; verify_identifier on; proposal { encryption_algorithm aes; hash_algorithm sha1; authentication_method rsasig; dh_group 2; } } """ # Pre-shared key template. psk_entry = """remote %s { exchange_mode main; nat_traversal on; proposal { encryption_algorithm aes; hash_algorithm sha1; authentication_method pre_shared_key; dh_group 2; } } """ def __init__(self): self.psk_hosts = {} self.cert_hosts = {} if not os.path.isdir(root_prefix + self.cert_dir): os.mkdir(self.cert_dir) # Clean out stale peer certs from previous runs for ovs_cert in glob.glob("%s%s/ovs-*.pem" % (root_prefix, self.cert_dir)): try: os.remove(ovs_cert) except OSError: vlog.warn("couldn't remove %s" % ovs_cert) # Replace racoon's conf file with our template self.commit() def reload(self): exitcode = subprocess.call([root_prefix + "/etc/init.d/racoon", "reload"]) if exitcode != 0: # Racoon is finicky about its configuration file and will # refuse to start if it sees something it doesn't like # (e.g., a certificate file doesn't exist). Try restarting # the process before giving up. vlog.warn("attempting to restart racoon") exitcode = subprocess.call([root_prefix + "/etc/init.d/racoon", "restart"]) if exitcode != 0: vlog.warn("couldn't reload racoon") def commit(self): # Rewrite the Racoon configuration file conf_file = open(root_prefix + self.conf_file, 'w') conf_file.write(Racoon.conf_header % (self.psk_file, self.cert_dir)) for host, vals in self.cert_hosts.iteritems(): conf_file.write(Racoon.cert_entry % (host, vals["certificate"], vals["private_key"], vals["peer_cert_file"])) for host in self.psk_hosts: conf_file.write(Racoon.psk_entry % host) conf_file.write(Racoon.conf_footer) conf_file.close() # Rewrite the pre-shared keys file; it must only be readable by root. orig_umask = os.umask(0077) psk_file = open(root_prefix + Racoon.psk_file, 'w') os.umask(orig_umask) psk_file.write("# Generated by Open vSwitch...do not modify by hand!") psk_file.write("\n\n") for host, vals in self.psk_hosts.iteritems(): psk_file.write("%s %s\n" % (host, vals["psk"])) psk_file.close() self.reload() def _add_psk(self, host, psk): if host in self.cert_hosts: raise error.Error("host %s already defined for cert" % host) self.psk_hosts[host] = psk self.commit() def _verify_certs(self, vals): # Racoon will refuse to start if the certificate files don't # exist, so verify that they're there. if not os.path.isfile(root_prefix + vals["certificate"]): raise error.Error("'certificate' file does not exist: %s" % vals["certificate"]) elif not os.path.isfile(root_prefix + vals["private_key"]): raise error.Error("'private_key' file does not exist: %s" % vals["private_key"]) # Racoon won't start if a given certificate or private key isn't # valid. This is a weak test, but will detect the most flagrant # errors. if vals["peer_cert"].find("-----BEGIN CERTIFICATE-----") == -1: raise error.Error("'peer_cert' is not in valid PEM format") cert = open(root_prefix + vals["certificate"]).read() if cert.find("-----BEGIN CERTIFICATE-----") == -1: raise error.Error("'certificate' is not in valid PEM format") cert = open(root_prefix + vals["private_key"]).read() if cert.find("-----BEGIN RSA PRIVATE KEY-----") == -1: raise error.Error("'private_key' is not in valid PEM format") def _add_cert(self, host, vals): if host in self.psk_hosts: raise error.Error("host %s already defined for psk" % host) if vals["certificate"] == None: raise error.Error("'certificate' not defined for %s" % host) elif vals["private_key"] == None: # Assume the private key is stored in the same PEM file as # the certificate. We make a copy of "vals" so that we don't # modify the original "vals", which would cause the script # to constantly think that the configuration has changed # in the database. vals = vals.copy() vals["private_key"] = vals["certificate"] self._verify_certs(vals) # The peer's certificate comes to us in PEM format as a string. # Write that string to a file for Racoon to use. f = open(root_prefix + vals["peer_cert_file"], "w") f.write(vals["peer_cert"]) f.close() self.cert_hosts[host] = vals self.commit() def _del_cert(self, host): peer_cert_file = self.cert_hosts[host]["peer_cert_file"] del self.cert_hosts[host] self.commit() try: os.remove(root_prefix + peer_cert_file) except OSError: pass def add_entry(self, host, vals): if vals["peer_cert"]: self._add_cert(host, vals) elif vals["psk"]: self._add_psk(host, vals) def del_entry(self, host): if host in self.cert_hosts: self._del_cert(host) elif host in self.psk_hosts: del self.psk_hosts[host] self.commit() # Class to configure IPsec on a system using racoon for IKE and setkey # for maintaining the Security Association Database (SAD) and Security # Policy Database (SPD). Only policies for GRE are supported. class IPsec: def __init__(self): self.sad_flush() self.spd_flush() self.racoon = Racoon() self.entries = [] def call_setkey(self, cmds): try: p = subprocess.Popen([root_prefix + SETKEY, "-c"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) except: vlog.err("could not call %s%s" % (root_prefix, SETKEY)) sys.exit(1) # xxx It is safer to pass the string into the communicate() # xxx method, but it didn't work for slightly longer commands. # xxx An alternative may need to be found. p.stdin.write(cmds) return p.communicate()[0] def get_spi(self, local_ip, remote_ip, proto="esp"): # Run the setkey dump command to retrieve the SAD. Then, parse # the output looking for SPI buried in the output. Note that # multiple SAD entries can exist for the same "flow", since an # older entry could be in a "dying" state. spi_list = [] host_line = "%s %s" % (local_ip, remote_ip) results = self.call_setkey("dump ;\n").split("\n") for i in range(len(results)): if results[i].strip() == host_line: # The SPI is in the line following the host pair spi_line = results[i + 1] if (spi_line[1:4] == proto): spi = spi_line.split()[2] spi_list.append(spi.split('(')[1].rstrip(')')) return spi_list def sad_flush(self): self.call_setkey("flush;\n") def sad_del(self, local_ip, remote_ip): # To delete all SAD entries, we should be able to use setkey's # "deleteall" command. Unfortunately, it's fundamentally broken # on Linux and not documented as such. cmds = "" # Delete local_ip->remote_ip SAD entries spi_list = self.get_spi(local_ip, remote_ip) for spi in spi_list: cmds += "delete %s %s esp %s;\n" % (local_ip, remote_ip, spi) # Delete remote_ip->local_ip SAD entries spi_list = self.get_spi(remote_ip, local_ip) for spi in spi_list: cmds += "delete %s %s esp %s;\n" % (remote_ip, local_ip, spi) if cmds: self.call_setkey(cmds) def spd_flush(self): self.call_setkey("spdflush;\n") def spd_add(self, local_ip, remote_ip): cmds = ("spdadd %s %s gre -P out ipsec esp/transport//require;\n" % (local_ip, remote_ip)) cmds += ("spdadd %s %s gre -P in ipsec esp/transport//require;\n" % (remote_ip, local_ip)) self.call_setkey(cmds) def spd_del(self, local_ip, remote_ip): cmds = "spddelete %s %s gre -P out;\n" % (local_ip, remote_ip) cmds += "spddelete %s %s gre -P in;\n" % (remote_ip, local_ip) self.call_setkey(cmds) def add_entry(self, local_ip, remote_ip, vals): if remote_ip in self.entries: raise error.Error("host %s already configured for ipsec" % remote_ip) self.racoon.add_entry(remote_ip, vals) self.spd_add(local_ip, remote_ip) self.entries.append(remote_ip) def del_entry(self, local_ip, remote_ip): if remote_ip in self.entries: self.racoon.del_entry(remote_ip) self.spd_del(local_ip, remote_ip) self.sad_del(local_ip, remote_ip) self.entries.remove(remote_ip) def update_ipsec(ipsec, interfaces, new_interfaces): for name, vals in interfaces.iteritems(): if name not in new_interfaces: ipsec.del_entry(vals["local_ip"], vals["remote_ip"]) for name, vals in new_interfaces.iteritems(): orig_vals = interfaces.get(name) if orig_vals: # Configuration for this host already exists. Check if it's # changed. We use set difference, since we want to ignore # any local additions to "orig_vals" that we've made # (e.g. the "peer_cert_file" key). if set(vals.items()) - set(orig_vals.items()): ipsec.del_entry(vals["local_ip"], vals["remote_ip"]) else: continue try: ipsec.add_entry(vals["local_ip"], vals["remote_ip"], vals) except error.Error, msg: vlog.warn("skipping ipsec config for %s: %s" % (name, msg)) def get_ssl_cert(data): for ovs_rec in data["Open_vSwitch"].rows.itervalues(): if ovs_rec.ssl: ssl = ovs_rec.ssl[0] if ssl.certificate and ssl.private_key: return (ssl.certificate, ssl.private_key) return None def main(): parser = argparse.ArgumentParser() parser.add_argument("database", metavar="DATABASE", help="A socket on which ovsdb-server is listening.") parser.add_argument("--root-prefix", metavar="DIR", help="Use DIR as alternate root directory" " (for testing).") ovs.vlog.add_args(parser) ovs.daemon.add_args(parser) args = parser.parse_args() ovs.vlog.handle_args(args) ovs.daemon.handle_args(args) global root_prefix if args.root_prefix: root_prefix = args.root_prefix remote = args.database schema_helper = ovs.db.idl.SchemaHelper() schema_helper.register_columns("Interface", ["name", "type", "options"]) schema_helper.register_columns("Open_vSwitch", ["ssl"]) schema_helper.register_columns("SSL", ["certificate", "private_key"]) idl = ovs.db.idl.Idl(remote, schema_helper) ovs.daemon.daemonize() ovs.unixctl.command_register("exit", "", 0, 0, unixctl_exit, None) error, unixctl_server = ovs.unixctl.server.UnixctlServer.create(None) if error: ovs.util.ovs_fatal(error, "could not create unixctl server", vlog) ipsec = IPsec() interfaces = {} seqno = idl.change_seqno # Sequence number when we last processed the db while True: unixctl_server.run() if exiting: break idl.run() if seqno == idl.change_seqno: poller = ovs.poller.Poller() unixctl_server.wait(poller) idl.wait(poller) poller.block() continue seqno = idl.change_seqno ssl_cert = get_ssl_cert(idl.tables) new_interfaces = {} for rec in idl.tables["Interface"].rows.itervalues(): if rec.type == "ipsec_gre" or rec.type == "ipsec_gre64": name = rec.name options = rec.options peer_cert_name = "ovs-%s.pem" % (options.get("remote_ip")) entry = { "remote_ip": options.get("remote_ip"), "local_ip": options.get("local_ip", "0.0.0.0/0"), "certificate": options.get("certificate"), "private_key": options.get("private_key"), "use_ssl_cert": options.get("use_ssl_cert"), "peer_cert": options.get("peer_cert"), "peer_cert_file": Racoon.cert_dir + "/" + peer_cert_name, "psk": options.get("psk")} if entry["peer_cert"] and entry["psk"]: vlog.warn("both 'peer_cert' and 'psk' defined for %s" % name) continue elif not entry["peer_cert"] and not entry["psk"]: vlog.warn("no 'peer_cert' or 'psk' defined for %s" % name) continue # The "use_ssl_cert" option is deprecated and will # likely go away in the near future. if entry["use_ssl_cert"] == "true": if not ssl_cert: vlog.warn("no valid SSL entry for %s" % name) continue entry["certificate"] = ssl_cert[0] entry["private_key"] = ssl_cert[1] new_interfaces[name] = entry if interfaces != new_interfaces: update_ipsec(ipsec, interfaces, new_interfaces) interfaces = new_interfaces unixctl_server.close() idl.close() if __name__ == '__main__': try: main() except SystemExit: # Let system.exit() calls complete normally raise except: vlog.exception("traceback") sys.exit(ovs.daemon.RESTART_EXIT_CODE) openvswitch-2.0.1+git20140120/debian/ovsdbmonitor.install000066400000000000000000000000541226605124000230100ustar00rootroot00000000000000usr/bin/ovsdbmonitor usr/share/ovsdbmonitor openvswitch-2.0.1+git20140120/debian/ovsdbmonitor.manpages000066400000000000000000000000421226605124000231320ustar00rootroot00000000000000ovsdb/ovsdbmonitor/ovsdbmonitor.1 openvswitch-2.0.1+git20140120/debian/python-openvswitch.dirs000066400000000000000000000000631226605124000234460ustar00rootroot00000000000000usr/share/pyshared/ovs/ usr/share/pyshared/ovs/db/ openvswitch-2.0.1+git20140120/debian/python-openvswitch.install000066400000000000000000000001021226605124000241450ustar00rootroot00000000000000usr/share/openvswitch/python/ovs usr/lib/python2.6/dist-packages/ openvswitch-2.0.1+git20140120/debian/rules000077500000000000000000000123521226605124000177570ustar00rootroot00000000000000#!/usr/bin/make -f # -*- makefile -*- # Sample debian/rules that uses debhelper. # # This file was originally written by Joey Hess and Craig Small. # As a special exception, when this file is copied by dh-make into a # dh-make output file, you may use that output file without restriction. # This special exception was added by Craig Small in version 0.37 of dh-make. # # Modified to make a template file for a multi-binary package with separated # build-arch and build-indep targets by Bill Allombert 2001 PACKAGE=openvswitch pdkms=openvswitch-datapath-dkms DEB_UPSTREAM_VERSION=$(shell dpkg-parsechangelog | sed -rne 's,^Version: ([0-9]:)*([^-]+).*,\2,p') srcfiles := $(filter-out debian, $(wildcard * .[^.]*)) ifneq (,$(filter parallel=%,$(DEB_BUILD_OPTIONS))) PARALLEL = -j$(patsubst parallel=%,%,$(filter parallel=%,$(DEB_BUILD_OPTIONS))) else PARALLEL = endif MAKEFLAGS += $(PARALLEL) CFLAGS += -g ifneq (,$(filter noopt,$(DEB_BUILD_OPTIONS))) CFLAGS += -O0 else CFLAGS += -O2 endif # Old versions of dpkg-buildflags do not understand --export=configure. # When dpkg-buildflags does not understand an option, it prints its full # --help output on stdout, so we have to avoid that here. buildflags := $(shell if dpkg-buildflags --export=configure >/dev/null 2>&1; \ then dpkg-buildflags --export=configure; fi) configure: configure-stamp configure-stamp: dh_testdir test -e configure || ./boot.sh test -d _debian || mkdir _debian echo $(DEB_BUILD_OPTIONS) echo $$CC cd _debian && ( \ test -e Makefile || \ ../configure --prefix=/usr --localstatedir=/var --enable-ssl \ --sysconfdir=/etc CFLAGS="$(CFLAGS)" \ $(buildflags) $(DATAPATH_CONFIGURE_OPTS)) touch configure-stamp #Architecture build: build-arch build-indep build-arch: build-arch-stamp build-arch-stamp: configure-stamp $(MAKE) -C _debian ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS))) if $(MAKE) -C _debian check TESTSUITEFLAGS='$(PARALLEL)'; then :; \ else \ cat _debian/tests/testsuite.log; \ exit 1; \ fi endif touch $@ build-indep: build-indep-stamp build-indep-stamp: configure-stamp $(MAKE) -C _debian dist distdir=openvswitch touch $@ clean: dh_testdir dh_testroot rm -f build-arch-stamp build-indep-stamp configure-stamp rm -rf _debian [ ! -f Makefile ] || $(MAKE) distclean dh_clean rm -f python/ovs/*.pyc python/ovs/db/*.pyc install: install-indep install-arch install-indep: build-indep dh_testdir dh_testroot dh_prep -i dh_installdirs -i $(MAKE) -C _debian DESTDIR=$(CURDIR)/debian/tmp install dh_install -i cp debian/rules.modules debian/openvswitch-datapath-source/usr/src/modules/openvswitch-datapath/debian/rules chmod 755 debian/openvswitch-datapath-source/usr/src/modules/openvswitch-datapath/debian/rules cd debian/openvswitch-datapath-source/usr/src && tar -c modules | bzip2 -9 > openvswitch-datapath.tar.bz2 && rm -rf modules #dkms stuff # setup the dirs dh_installdirs -p$(pdkms) usr/src/$(PACKAGE)-$(DEB_UPSTREAM_VERSION) # copy the source cd debian/$(pdkms)/usr/src/$(PACKAGE)-$(DEB_UPSTREAM_VERSION) && tar xvzf $(CURDIR)/_debian/openvswitch.tar.gz && mv openvswitch/* . && rmdir openvswitch # Prepare dkms.conf from the dkms.conf.in template sed "s/__VERSION__/$(DEB_UPSTREAM_VERSION)/g" debian/dkms.conf.in > debian/$(pdkms)/usr/src/$(PACKAGE)-$(DEB_UPSTREAM_VERSION)/dkms.conf # We don't need the debian folder in there, just upstream sources... rm -rf debian/$(pdkms)/usr/src/$(PACKAGE)-$(DEB_UPSTREAM_VERSION)/debian # We don't need the rhel stuff in there either rm -rf debian/$(pdkms)/usr/src/$(PACKAGE)-$(DEB_UPSTREAM_VERSION)/rhel # And we should also clean useless license files, which are already # descriped in our debian/copyright anyway. rm -f debian/$(pdkms)/usr/src/$(PACKAGE)-$(DEB_UPSTREAM_VERSION)/COPYING \ debian/$(pdkms)/usr/src/$(PACKAGE)-$(DEB_UPSTREAM_VERSION)/ovsdb/ovsdbmonitor/COPYING \ debian/$(pdkms)/usr/src/$(PACKAGE)-$(DEB_UPSTREAM_VERSION)/xenserver/LICENSE install-arch: build-arch dh_testdir dh_testroot dh_prep -s dh_installdirs -s $(MAKE) -C _debian DESTDIR=$(CURDIR)/debian/tmp install cp debian/openvswitch-switch.template debian/openvswitch-switch/usr/share/openvswitch/switch/default.template dh_install -s dh_link -s # Must not depend on anything. This is to be called by # binary-arch/binary-indep # in another 'make' thread. binary-common: dh_testdir dh_testroot dh_installchangelogs dh_installdocs dh_installexamples dh_installdebconf dh_installlogrotate dh_installinit -R dh_installcron dh_installman --language=C dh_link dh_strip --dbg-package=openvswitch-dbg dh_compress dh_fixperms dh_python2 dh_perl dh_makeshlibs dh_installdeb dh_shlibdeps dh_gencontrol dh_md5sums dh_builddeb binary-indep: install-indep $(MAKE) -f debian/rules DH_OPTIONS=-i binary-common binary-arch: install-arch $(MAKE) -f debian/rules DH_OPTIONS=-s binary-common binary: binary-arch binary-indep .PHONY: build clean binary-indep binary-arch binary install install-indep install-arch configure # This GNU make extensions disables parallel builds for the current Makefile # but not for sub-Makefiles. This is appropriate here because build-arch and # build-indep both invoke "make" on OVS, which can update some of the same # targets in ways that conflict (e.g. both update tests/testsuite). .NOTPARALLEL: openvswitch-2.0.1+git20140120/debian/rules.modules000077500000000000000000000015761226605124000214340ustar00rootroot00000000000000#! /usr/bin/make -f PACKAGE=openvswitch-datapath-module MA_DIR ?= /usr/share/modass -include $(MA_DIR)/include/generic.make -include $(MA_DIR)/include/common-rules.make DATAPATH_CONFIGURE_OPTS = kdist_clean: dh_testdir dh_testroot dh_clean rm -rf openvswitch .PHONY: kdist_config kdist_config: prep-deb-files .PHONY: binary-modules binary-modules: DSTDIR = $(CURDIR)/debian/$(PKGNAME)/lib/modules/$(KVERS)/kernel binary-modules: prep-deb-files dh_testdir dh_testroot dh_clean -k tar xzf openvswitch.tar.gz cd openvswitch && ./configure --with-linux=$(KSRC) $(DATAPATH_CONFIGURE_OPTS) cd openvswitch && $(MAKE) -C datapath/linux install -d -m755 $(DSTDIR) install -m644 openvswitch/datapath/linux/*.ko $(DSTDIR)/ dh_installmodules dh_installdocs dh_installchangelogs dh_compress dh_fixperms dh_installdeb dh_gencontrol dh_md5sums dh_builddeb --destdir=$(DEB_DESTDIR) openvswitch-2.0.1+git20140120/debian/source/000077500000000000000000000000001226605124000201745ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/debian/source/format000066400000000000000000000000141226605124000214020ustar00rootroot000000000000003.0 (quilt) openvswitch-2.0.1+git20140120/include/000077500000000000000000000000001226605124000170755ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/include/.gitignore000066400000000000000000000000271226605124000210640ustar00rootroot00000000000000/Makefile /Makefile.in openvswitch-2.0.1+git20140120/include/automake.mk000066400000000000000000000002221226605124000212300ustar00rootroot00000000000000include include/linux/automake.mk include include/openflow/automake.mk include include/openvswitch/automake.mk include include/sparse/automake.mk openvswitch-2.0.1+git20140120/include/linux/000077500000000000000000000000001226605124000202345ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/include/linux/automake.mk000066400000000000000000000001461226605124000223740ustar00rootroot00000000000000noinst_HEADERS += \ include/linux/if_ether.h \ include/linux/openvswitch.h \ include/linux/types.h openvswitch-2.0.1+git20140120/include/linux/if_ether.h000066400000000000000000000021101226605124000221640ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LINUX_IF_ETHER_H #define LINUX_IF_ETHER_H 1 /* On Linux, this header file just includes . * * On other platforms, this header file implements just enough of * to allow to work. */ #if defined(HAVE_LINUX_IF_ETHER_H) || defined(__KERNEL__) #include_next #else /* no */ #define ETH_ALEN 6 /* Octets in one ethernet addr */ #endif #endif /* */ openvswitch-2.0.1+git20140120/include/linux/openvswitch.h000066400000000000000000000477731226605124000230000ustar00rootroot00000000000000/* * Copyright (c) 2007-2013 Nicira, Inc. * * This file is offered under your choice of two licenses: Apache 2.0 or GNU * GPL 2.0 or later. The permission statements for each of these licenses is * given below. You may license your modifications to this file under either * of these licenses or both. If you wish to license your modifications under * only one of these licenses, delete the permission text for the other * license. * * ---------------------------------------------------------------------- * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ---------------------------------------------------------------------- * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA * ---------------------------------------------------------------------- */ #ifndef _LINUX_OPENVSWITCH_H #define _LINUX_OPENVSWITCH_H 1 #include #include /** * struct ovs_header - header for OVS Generic Netlink messages. * @dp_ifindex: ifindex of local port for datapath (0 to make a request not * specific to a datapath). * * Attributes following the header are specific to a particular OVS Generic * Netlink family, but all of the OVS families use this header. */ struct ovs_header { int dp_ifindex; }; /* Datapaths. */ #define OVS_DATAPATH_FAMILY "ovs_datapath" #define OVS_DATAPATH_MCGROUP "ovs_datapath" #define OVS_DATAPATH_VERSION 0x1 enum ovs_datapath_cmd { OVS_DP_CMD_UNSPEC, OVS_DP_CMD_NEW, OVS_DP_CMD_DEL, OVS_DP_CMD_GET, OVS_DP_CMD_SET }; /** * enum ovs_datapath_attr - attributes for %OVS_DP_* commands. * @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local * port". This is the name of the network device whose dp_ifindex is given in * the &struct ovs_header. Always present in notifications. Required in * %OVS_DP_NEW requests. May be used as an alternative to specifying * dp_ifindex in other requests (with a dp_ifindex of 0). * @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially * set on the datapath port (for OVS_ACTION_ATTR_MISS). Only valid on * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should * not be sent. * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the * datapath. Always present in notifications. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_DP_* commands. */ enum ovs_datapath_attr { OVS_DP_ATTR_UNSPEC, OVS_DP_ATTR_NAME, /* name of dp_ifindex netdev */ OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */ OVS_DP_ATTR_STATS, /* struct ovs_dp_stats */ __OVS_DP_ATTR_MAX }; #define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1) struct ovs_dp_stats { __u64 n_hit; /* Number of flow table matches. */ __u64 n_missed; /* Number of flow table misses. */ __u64 n_lost; /* Number of misses not sent to userspace. */ __u64 n_flows; /* Number of flows present */ }; struct ovs_vport_stats { __u64 rx_packets; /* total packets received */ __u64 tx_packets; /* total packets transmitted */ __u64 rx_bytes; /* total bytes received */ __u64 tx_bytes; /* total bytes transmitted */ __u64 rx_errors; /* bad packets received */ __u64 tx_errors; /* packet transmit problems */ __u64 rx_dropped; /* no space in linux buffers */ __u64 tx_dropped; /* no space available in linux */ }; /* Fixed logical ports. */ #define OVSP_LOCAL ((__u32)0) /* Packet transfer. */ #define OVS_PACKET_FAMILY "ovs_packet" #define OVS_PACKET_VERSION 0x1 enum ovs_packet_cmd { OVS_PACKET_CMD_UNSPEC, /* Kernel-to-user notifications. */ OVS_PACKET_CMD_MISS, /* Flow table miss. */ OVS_PACKET_CMD_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */ /* Userspace commands. */ OVS_PACKET_CMD_EXECUTE /* Apply actions to a packet. */ }; /** * enum ovs_packet_attr - attributes for %OVS_PACKET_* commands. * @OVS_PACKET_ATTR_PACKET: Present for all notifications. Contains the entire * packet as received, from the start of the Ethernet header onward. For * %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by * actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is * the flow key extracted from the packet as originally received. * @OVS_PACKET_ATTR_KEY: Present for all notifications. Contains the flow key * extracted from the packet as nested %OVS_KEY_ATTR_* attributes. This allows * userspace to adapt its flow setup strategy by comparing its notion of the * flow key against the kernel's. * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet. Used * for %OVS_PACKET_CMD_EXECUTE. It has nested %OVS_ACTION_ATTR_* attributes. * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content * specified there. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_PACKET_* commands. */ enum ovs_packet_attr { OVS_PACKET_ATTR_UNSPEC, OVS_PACKET_ATTR_PACKET, /* Packet data. */ OVS_PACKET_ATTR_KEY, /* Nested OVS_KEY_ATTR_* attributes. */ OVS_PACKET_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ OVS_PACKET_ATTR_USERDATA, /* OVS_ACTION_ATTR_USERSPACE arg. */ __OVS_PACKET_ATTR_MAX }; #define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1) /* Virtual ports. */ #define OVS_VPORT_FAMILY "ovs_vport" #define OVS_VPORT_MCGROUP "ovs_vport" #define OVS_VPORT_VERSION 0x1 enum ovs_vport_cmd { OVS_VPORT_CMD_UNSPEC, OVS_VPORT_CMD_NEW, OVS_VPORT_CMD_DEL, OVS_VPORT_CMD_GET, OVS_VPORT_CMD_SET }; enum ovs_vport_type { OVS_VPORT_TYPE_UNSPEC, OVS_VPORT_TYPE_NETDEV, /* network device */ OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ OVS_VPORT_TYPE_GRE, /* GRE tunnel. */ OVS_VPORT_TYPE_VXLAN, /* VXLAN tunnel */ OVS_VPORT_TYPE_GRE64 = 104, /* GRE tunnel with 64-bit keys */ OVS_VPORT_TYPE_LISP = 105, /* LISP tunnel */ __OVS_VPORT_TYPE_MAX }; #define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1) /** * enum ovs_vport_attr - attributes for %OVS_VPORT_* commands. * @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath. * @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type * of vport. * @OVS_VPORT_ATTR_NAME: Name of vport. For a vport based on a network device * this is the name of the network device. Maximum length %IFNAMSIZ-1 bytes * plus a null terminator. * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information. * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on * this port. A value of zero indicates that upcalls should not be sent. * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for * packets sent or received through the vport. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_VPORT_* commands. * * For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and * %OVS_VPORT_ATTR_NAME attributes are required. %OVS_VPORT_ATTR_PORT_NO is * optional; if not specified a free port number is automatically selected. * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type * of vport. %OVS_VPORT_ATTR_STATS is optional and other attributes are * ignored. * * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to * look up the vport to operate on; otherwise dp_idx from the &struct * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport. */ enum ovs_vport_attr { OVS_VPORT_ATTR_UNSPEC, OVS_VPORT_ATTR_PORT_NO, /* u32 port number within datapath */ OVS_VPORT_ATTR_TYPE, /* u32 OVS_VPORT_TYPE_* constant. */ OVS_VPORT_ATTR_NAME, /* string name, up to IFNAMSIZ bytes long */ OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */ OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */ OVS_VPORT_ATTR_STATS, /* struct ovs_vport_stats */ __OVS_VPORT_ATTR_MAX }; #define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1) /* OVS_VPORT_ATTR_OPTIONS attributes for tunnels. */ enum { OVS_TUNNEL_ATTR_UNSPEC, OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */ __OVS_TUNNEL_ATTR_MAX }; #define OVS_TUNNEL_ATTR_MAX (__OVS_TUNNEL_ATTR_MAX - 1) /* Flows. */ #define OVS_FLOW_FAMILY "ovs_flow" #define OVS_FLOW_MCGROUP "ovs_flow" #define OVS_FLOW_VERSION 0x1 enum ovs_flow_cmd { OVS_FLOW_CMD_UNSPEC, OVS_FLOW_CMD_NEW, OVS_FLOW_CMD_DEL, OVS_FLOW_CMD_GET, OVS_FLOW_CMD_SET }; struct ovs_flow_stats { __u64 n_packets; /* Number of matched packets. */ __u64 n_bytes; /* Number of matched bytes. */ }; enum ovs_key_attr { OVS_KEY_ATTR_UNSPEC, OVS_KEY_ATTR_ENCAP, /* Nested set of encapsulated attributes. */ OVS_KEY_ATTR_PRIORITY, /* u32 skb->priority */ OVS_KEY_ATTR_IN_PORT, /* u32 OVS dp port number */ OVS_KEY_ATTR_ETHERNET, /* struct ovs_key_ethernet */ OVS_KEY_ATTR_VLAN, /* be16 VLAN TCI */ OVS_KEY_ATTR_ETHERTYPE, /* be16 Ethernet type */ OVS_KEY_ATTR_IPV4, /* struct ovs_key_ipv4 */ OVS_KEY_ATTR_IPV6, /* struct ovs_key_ipv6 */ OVS_KEY_ATTR_TCP, /* struct ovs_key_tcp */ OVS_KEY_ATTR_UDP, /* struct ovs_key_udp */ OVS_KEY_ATTR_ICMP, /* struct ovs_key_icmp */ OVS_KEY_ATTR_ICMPV6, /* struct ovs_key_icmpv6 */ OVS_KEY_ATTR_ARP, /* struct ovs_key_arp */ OVS_KEY_ATTR_ND, /* struct ovs_key_nd */ OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */ OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */ OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */ #ifdef __KERNEL__ OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */ #endif OVS_KEY_ATTR_MPLS = 62, /* array of struct ovs_key_mpls. * The implementation may restrict * the accepted length of the array. */ __OVS_KEY_ATTR_MAX }; #define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1) enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_ID, /* be64 Tunnel ID */ OVS_TUNNEL_KEY_ATTR_IPV4_SRC, /* be32 src IP address. */ OVS_TUNNEL_KEY_ATTR_IPV4_DST, /* be32 dst IP address. */ OVS_TUNNEL_KEY_ATTR_TOS, /* u8 Tunnel IP ToS. */ OVS_TUNNEL_KEY_ATTR_TTL, /* u8 Tunnel IP TTL. */ OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT, /* No argument, set DF. */ OVS_TUNNEL_KEY_ATTR_CSUM, /* No argument. CSUM packet. */ __OVS_TUNNEL_KEY_ATTR_MAX }; #define OVS_TUNNEL_KEY_ATTR_MAX (__OVS_TUNNEL_KEY_ATTR_MAX - 1) /** * enum ovs_frag_type - IPv4 and IPv6 fragment type * @OVS_FRAG_TYPE_NONE: Packet is not a fragment. * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0. * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset. * * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct * ovs_key_ipv6. */ enum ovs_frag_type { OVS_FRAG_TYPE_NONE, OVS_FRAG_TYPE_FIRST, OVS_FRAG_TYPE_LATER, __OVS_FRAG_TYPE_MAX }; #define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1) struct ovs_key_ethernet { __u8 eth_src[ETH_ALEN]; __u8 eth_dst[ETH_ALEN]; }; struct ovs_key_mpls { __be32 mpls_lse; }; struct ovs_key_ipv4 { __be32 ipv4_src; __be32 ipv4_dst; __u8 ipv4_proto; __u8 ipv4_tos; __u8 ipv4_ttl; __u8 ipv4_frag; /* One of OVS_FRAG_TYPE_*. */ }; struct ovs_key_ipv6 { __be32 ipv6_src[4]; __be32 ipv6_dst[4]; __be32 ipv6_label; /* 20-bits in least-significant bits. */ __u8 ipv6_proto; __u8 ipv6_tclass; __u8 ipv6_hlimit; __u8 ipv6_frag; /* One of OVS_FRAG_TYPE_*. */ }; struct ovs_key_tcp { __be16 tcp_src; __be16 tcp_dst; }; struct ovs_key_udp { __be16 udp_src; __be16 udp_dst; }; struct ovs_key_sctp { __be16 sctp_src; __be16 sctp_dst; }; struct ovs_key_icmp { __u8 icmp_type; __u8 icmp_code; }; struct ovs_key_icmpv6 { __u8 icmpv6_type; __u8 icmpv6_code; }; struct ovs_key_arp { __be32 arp_sip; __be32 arp_tip; __be16 arp_op; __u8 arp_sha[ETH_ALEN]; __u8 arp_tha[ETH_ALEN]; }; struct ovs_key_nd { __u32 nd_target[4]; __u8 nd_sll[ETH_ALEN]; __u8 nd_tll[ETH_ALEN]; }; /** * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands. * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow * key. Always present in notifications. Required for all requests (except * dumps). * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying * the actions to take for packets that match the key. Always present in * notifications. Required for %OVS_FLOW_CMD_NEW requests, optional for * %OVS_FLOW_CMD_SET requests. * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this * flow. Present in notifications if the stats would be nonzero. Ignored in * requests. * @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the * TCP flags seen on packets in this flow. Only present in notifications for * TCP flows, and only if it would be nonzero. Ignored in requests. * @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on * the system monotonic clock, at which a packet was last processed for this * flow. Only present in notifications if a packet has been processed for this * flow. Ignored in requests. * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the * last-used time, accumulated TCP flags, and statistics for this flow. * Otherwise ignored in requests. Never present in notifications. * @OVS_FLOW_ATTR_MASK: Nested %OVS_KEY_ATTR_* attributes specifying the * mask bits for wildcarded flow match. Mask bit value '1' specifies exact * match with corresponding flow key bit, while mask bit value '0' specifies * a wildcarded match. Omitting attribute is treated as wildcarding all * corresponding fields. Optional for all requests. If not present, * all flow key bits are exact match bits. * * These attributes follow the &struct ovs_header within the Generic Netlink * payload for %OVS_FLOW_* commands. */ enum ovs_flow_attr { OVS_FLOW_ATTR_UNSPEC, OVS_FLOW_ATTR_KEY, /* Sequence of OVS_KEY_ATTR_* attributes. */ OVS_FLOW_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ OVS_FLOW_ATTR_STATS, /* struct ovs_flow_stats. */ OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */ OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */ OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */ OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */ __OVS_FLOW_ATTR_MAX }; #define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1) /** * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action. * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with * @OVS_ACTION_ATTR_SAMPLE. A value of 0 samples no packets, a value of * %UINT32_MAX samples all packets and intermediate values sample intermediate * fractions of packets. * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event. * Actions are passed as nested attributes. * * Executes the specified actions with the given probability on a per-packet * basis. */ enum ovs_sample_attr { OVS_SAMPLE_ATTR_UNSPEC, OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */ OVS_SAMPLE_ATTR_ACTIONS, /* Nested OVS_ACTION_ATTR_* attributes. */ __OVS_SAMPLE_ATTR_MAX, }; #define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1) /** * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action. * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION * message should be sent. Required. * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA. */ enum ovs_userspace_attr { OVS_USERSPACE_ATTR_UNSPEC, OVS_USERSPACE_ATTR_PID, /* u32 Netlink PID to receive upcalls. */ OVS_USERSPACE_ATTR_USERDATA, /* Optional user-specified cookie. */ __OVS_USERSPACE_ATTR_MAX }; #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) /** * struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action argument. * @mpls_lse: MPLS label stack entry to push. * @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame. * * The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC and * %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Other are rejected. */ struct ovs_action_push_mpls { __be32 mpls_lse; __be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */ }; /** * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument. * @vlan_tpid: Tag protocol identifier (TPID) to push. * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set * (but it will not be set in the 802.1Q header that is pushed). * * The @vlan_tpid value is typically %ETH_P_8021Q. The only acceptable TPID * values are those that the kernel module also parses as 802.1Q headers, to * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN * from having surprising results. */ struct ovs_action_push_vlan { __be16 vlan_tpid; /* 802.1Q TPID. */ __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */ }; /** * enum ovs_action_attr - Action types. * * @OVS_ACTION_ATTR_OUTPUT: Output packet to port. * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested * %OVS_USERSPACE_ATTR_* attributes. * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the * packet. * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in * the nested %OVS_SAMPLE_ATTR_* attributes. * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its * value. * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the * top of the packets MPLS label stack. Set the ethertype of the * encapsulating frame to either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to * indicate the new packet contents. * @OVS_ACTION_ATTR_POP_MPLS: Pop an MPLS label stack entry off of the * packet's MPLS label stack. Set the encapsulating frame's ethertype to * indicate the new packet contents This could potentially still be * %ETH_P_MPLS_* if the resulting MPLS label stack is not empty. If there * is no MPLS label stack, as determined by ethertype, no action is taken. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment * type may not be changed. */ enum ovs_action_attr { OVS_ACTION_ATTR_UNSPEC, OVS_ACTION_ATTR_OUTPUT, /* u32 port number. */ OVS_ACTION_ATTR_USERSPACE, /* Nested OVS_USERSPACE_ATTR_*. */ OVS_ACTION_ATTR_SET, /* One nested OVS_KEY_ATTR_*. */ OVS_ACTION_ATTR_PUSH_VLAN, /* struct ovs_action_push_vlan. */ OVS_ACTION_ATTR_POP_VLAN, /* No argument. */ OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */ OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */ __OVS_ACTION_ATTR_MAX }; #define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1) #endif /* _LINUX_OPENVSWITCH_H */ openvswitch-2.0.1+git20140120/include/linux/types.h000066400000000000000000000032111226605124000215460ustar00rootroot00000000000000/* * Copyright (c) 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LINUX_TYPES_H #define LINUX_TYPES_H 1 /* On Linux, this header file just includes . * * On other platforms, this header file implements just enough of * to allow to work, that is, it defines * the __u and __be types. */ #ifdef __KERNEL__ #include_next #elif defined(HAVE_LINUX_TYPES_H) /* With some combinations of kernel and userspace headers, including both * and only works if you do so in that order, so * force it. */ #ifdef __CHECKER__ #define __CHECK_ENDIAN__ #endif #include #include_next #else /* no */ #include #ifdef __CHECKER__ #define __bitwise__ __attribute__((bitwise)) #else #define __bitwise__ #endif typedef uint8_t __u8; typedef uint16_t __u16; typedef uint32_t __u32; typedef uint64_t __u64; typedef uint16_t __bitwise__ __be16; typedef uint32_t __bitwise__ __be32; typedef uint64_t __bitwise__ __be64; #endif /* no */ #endif /* */ openvswitch-2.0.1+git20140120/include/openflow/000077500000000000000000000000001226605124000207265ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/include/openflow/.gitignore000066400000000000000000000000111226605124000227060ustar00rootroot00000000000000*.hstamp openvswitch-2.0.1+git20140120/include/openflow/automake.mk000066400000000000000000000027171226605124000230740ustar00rootroot00000000000000noinst_HEADERS += \ include/openflow/nicira-ext.h \ include/openflow/openflow-1.0.h \ include/openflow/openflow-1.1.h \ include/openflow/openflow-1.2.h \ include/openflow/openflow-1.3.h \ include/openflow/openflow-common.h \ include/openflow/openflow.h if HAVE_PYTHON SUFFIXES += .h .hstamp .h.hstamp: $(run_python) $(srcdir)/build-aux/check-structs -I$(srcdir)/include $< touch $@ HSTAMP_FILES = \ include/openflow/nicira-ext.hstamp \ include/openflow/openflow-1.0.hstamp \ include/openflow/openflow-1.1.hstamp \ include/openflow/openflow-1.2.hstamp \ include/openflow/openflow-1.3.hstamp \ include/openflow/openflow-common.hstamp \ include/openflow/openflow.hstamp CLEANFILES += $(HSTAMP_FILES) ALL_LOCAL += $(HSTAMP_FILES) $(HSTAMP_FILES): build-aux/check-structs include/openflow/openflow-1.0.hstamp: \ include/openflow/openflow-common.h include/openflow/openflow-1.1.hstamp: \ include/openflow/openflow-common.h include/openflow/openflow-1.2.hstamp: \ include/openflow/openflow-common.h \ include/openflow/openflow-1.1.h include/openflow/openflow-1.3.hstamp: \ include/openflow/openflow-common.h \ include/openflow/openflow-1.1.h \ include/openflow/openflow-1.2.h include/openflow/nicira-ext.hstamp: \ include/openflow/openflow.h \ include/openflow/openflow-common.h \ include/openflow/openflow-1.0.h \ include/openflow/openflow-1.1.h \ include/openflow/openflow-1.2.h \ include/openflow/openflow-1.3.h endif EXTRA_DIST += build-aux/check-structs openvswitch-2.0.1+git20140120/include/openflow/nicira-ext.h000066400000000000000000003006361226605124000231520ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OPENFLOW_NICIRA_EXT_H #define OPENFLOW_NICIRA_EXT_H 1 #include "openflow/openflow.h" #include "openvswitch/types.h" /* The following vendor extensions, proposed by Nicira, are not yet * standardized, so they are not included in openflow.h. Some of them may be * suitable for standardization; others we never expect to standardize. */ /* Nicira vendor-specific error messages extension. * * OpenFlow 1.0 has a set of predefined error types (OFPET_*) and codes (which * are specific to each type). It does not have any provision for * vendor-specific error codes, and it does not even provide "generic" error * codes that can apply to problems not anticipated by the OpenFlow * specification authors. * * This extension attempts to address the problem by adding a generic "error * vendor extension". The extension works as follows: use NXET_VENDOR as type * and NXVC_VENDOR_ERROR as code, followed by struct nx_vendor_error with * vendor-specific details, followed by at least 64 bytes of the failed * request. * * It would be better to have a type-specific vendor extension, e.g. so that * OFPET_BAD_ACTION could be used with vendor-specific code values. But * OFPET_BAD_ACTION and most other standardized types already specify that * their 'data' values are (the start of) the OpenFlow message being replied * to, so there is no room to insert a vendor ID. * * Currently this extension is only implemented by Open vSwitch, but it seems * like a reasonable candidate for future standardization. */ /* This is a random number to avoid accidental collision with any other * vendor's extension. */ #define NXET_VENDOR 0xb0c2 /* ofp_error msg 'code' values for NXET_VENDOR. */ enum nx_vendor_code { NXVC_VENDOR_ERROR /* 'data' contains struct nx_vendor_error. */ }; /* 'data' for 'type' == NXET_VENDOR, 'code' == NXVC_VENDOR_ERROR. */ struct nx_vendor_error { ovs_be32 vendor; /* Vendor ID as in struct ofp_vendor_header. */ ovs_be16 type; /* Vendor-defined type. */ ovs_be16 code; /* Vendor-defined subtype. */ /* Followed by at least the first 64 bytes of the failed request. */ }; /* Nicira vendor requests and replies. */ /* Header for Nicira vendor requests and replies. */ struct nicira_header { struct ofp_header header; ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be32 subtype; /* See the NXT numbers in ofp-msgs.h. */ }; OFP_ASSERT(sizeof(struct nicira_header) == 16); /* Header for Nicira vendor stats request and reply messages in OpenFlow * 1.0. */ struct nicira10_stats_msg { struct ofp10_vendor_stats_msg vsm; /* Vendor NX_VENDOR_ID. */ ovs_be32 subtype; /* One of NXST_* below. */ uint8_t pad[4]; /* Align to 64-bits. */ }; OFP_ASSERT(sizeof(struct nicira10_stats_msg) == 24); /* Header for Nicira vendor stats request and reply messages in OpenFlow * 1.1. */ struct nicira11_stats_msg { struct ofp11_vendor_stats_msg vsm; /* Vendor NX_VENDOR_ID. */ ovs_be32 subtype; /* One of NXST_* below. */ }; OFP_ASSERT(sizeof(struct nicira11_stats_msg) == 24); /* Fields to use when hashing flows. */ enum nx_hash_fields { /* Ethernet source address (NXM_OF_ETH_SRC) only. */ NX_HASH_FIELDS_ETH_SRC, /* L2 through L4, symmetric across src/dst. Specifically, each of the * following fields, if present, is hashed (slashes separate symmetric * pairs): * * - NXM_OF_ETH_DST / NXM_OF_ETH_SRC * - NXM_OF_ETH_TYPE * - The VID bits from NXM_OF_VLAN_TCI, ignoring PCP and CFI. * - NXM_OF_IP_PROTO * - NXM_OF_IP_SRC / NXM_OF_IP_DST * - NXM_OF_TCP_SRC / NXM_OF_TCP_DST */ NX_HASH_FIELDS_SYMMETRIC_L4 }; /* This command enables or disables an Open vSwitch extension that allows a * controller to specify the OpenFlow table to which a flow should be added, * instead of having the switch decide which table is most appropriate as * required by OpenFlow 1.0. Because NXM was designed as an extension to * OpenFlow 1.0, the extension applies equally to ofp10_flow_mod and * nx_flow_mod. By default, the extension is disabled. * * When this feature is enabled, Open vSwitch treats struct ofp10_flow_mod's * and struct nx_flow_mod's 16-bit 'command' member as two separate fields. * The upper 8 bits are used as the table ID, the lower 8 bits specify the * command as usual. A table ID of 0xff is treated like a wildcarded table ID. * * The specific treatment of the table ID depends on the type of flow mod: * * - OFPFC_ADD: Given a specific table ID, the flow is always placed in that * table. If an identical flow already exists in that table only, then it * is replaced. If the flow cannot be placed in the specified table, * either because the table is full or because the table cannot support * flows of the given type, the switch replies with an OFPFMFC_TABLE_FULL * error. (A controller can distinguish these cases by comparing the * current and maximum number of entries reported in ofp_table_stats.) * * If the table ID is wildcarded, the switch picks an appropriate table * itself. If an identical flow already exist in the selected flow table, * then it is replaced. The choice of table might depend on the flows * that are already in the switch; for example, if one table fills up then * the switch might fall back to another one. * * - OFPFC_MODIFY, OFPFC_DELETE: Given a specific table ID, only flows * within that table are matched and modified or deleted. If the table ID * is wildcarded, flows within any table may be matched and modified or * deleted. * * - OFPFC_MODIFY_STRICT, OFPFC_DELETE_STRICT: Given a specific table ID, * only a flow within that table may be matched and modified or deleted. * If the table ID is wildcarded and exactly one flow within any table * matches, then it is modified or deleted; if flows in more than one * table match, then none is modified or deleted. */ struct nx_flow_mod_table_id { uint8_t set; /* Nonzero to enable, zero to disable. */ uint8_t pad[7]; }; OFP_ASSERT(sizeof(struct nx_flow_mod_table_id) == 8); enum nx_packet_in_format { NXPIF_OPENFLOW10 = 0, /* Standard OpenFlow 1.0 compatible. */ NXPIF_NXM = 1 /* Nicira Extended. */ }; /* NXT_SET_PACKET_IN_FORMAT request. */ struct nx_set_packet_in_format { ovs_be32 format; /* One of NXPIF_*. */ }; OFP_ASSERT(sizeof(struct nx_set_packet_in_format) == 4); /* NXT_PACKET_IN (analogous to OFPT_PACKET_IN). * * NXT_PACKET_IN is similar to the OpenFlow 1.2 OFPT_PACKET_IN. The * differences are: * * - NXT_PACKET_IN includes the cookie of the rule that triggered the * message. (OpenFlow 1.3 OFPT_PACKET_IN also includes the cookie.) * * - The metadata fields use NXM (instead of OXM) field numbers. * * Open vSwitch 1.9.0 and later omits metadata fields that are zero (as allowed * by OpenFlow 1.2). Earlier versions included all implemented metadata * fields. * * Open vSwitch does not include non-metadata in the nx_match, because by * definition that information can be found in the packet itself. The format * and the standards allow this, however, so controllers should be prepared to * tolerate future changes. * * The NXM format is convenient for reporting metadata values, but it is * important not to interpret the format as matching against a flow, because it * does not. Nothing is being matched; arbitrary metadata masks would not be * meaningful. * * Whereas in most cases a controller can expect to only get back NXM fields * that it set up itself (e.g. flow dumps will ordinarily report only NXM * fields from flows that the controller added), NXT_PACKET_IN messages might * contain fields that the controller does not understand, because the switch * might support fields (new registers, new protocols, etc.) that the * controller does not. The controller must prepared to tolerate these. * * The 'cookie' and 'table_id' fields have no meaning when 'reason' is * OFPR_NO_MATCH. In this case they should be set to 0. */ struct nx_packet_in { ovs_be32 buffer_id; /* ID assigned by datapath. */ ovs_be16 total_len; /* Full length of frame. */ uint8_t reason; /* Reason packet is sent (one of OFPR_*). */ uint8_t table_id; /* ID of the table that was looked up. */ ovs_be64 cookie; /* Cookie of the rule that was looked up. */ ovs_be16 match_len; /* Size of nx_match. */ uint8_t pad[6]; /* Align to 64-bits. */ /* Followed by: * - Exactly match_len (possibly 0) bytes containing the nx_match, then * - Exactly (match_len + 7)/8*8 - match_len (between 0 and 7) bytes of * all-zero bytes, then * - Exactly 2 all-zero padding bytes, then * - An Ethernet frame whose length is inferred from nxh.header.length. * * The padding bytes preceding the Ethernet frame ensure that the IP * header (if any) following the Ethernet header is 32-bit aligned. */ /* uint8_t nxm_fields[...]; */ /* NXM headers. */ /* uint8_t pad[2]; */ /* Align to 64 bit + 16 bit. */ /* uint8_t data[0]; */ /* Ethernet frame. */ }; OFP_ASSERT(sizeof(struct nx_packet_in) == 24); /* Configures the "role" of the sending controller. The default role is: * * - Other (NX_ROLE_OTHER), which allows the controller access to all * OpenFlow features. * * The other possible roles are a related pair: * * - Master (NX_ROLE_MASTER) is equivalent to Other, except that there may * be at most one Master controller at a time: when a controller * configures itself as Master, any existing Master is demoted to the * Slave role. * * - Slave (NX_ROLE_SLAVE) allows the controller read-only access to * OpenFlow features. In particular attempts to modify the flow table * will be rejected with an OFPBRC_EPERM error. * * Slave controllers do not receive OFPT_PACKET_IN or OFPT_FLOW_REMOVED * messages, but they do receive OFPT_PORT_STATUS messages. */ struct nx_role_request { ovs_be32 role; /* One of NX_ROLE_*. */ }; OFP_ASSERT(sizeof(struct nx_role_request) == 4); enum nx_role { NX_ROLE_OTHER, /* Default role, full access. */ NX_ROLE_MASTER, /* Full access, at most one. */ NX_ROLE_SLAVE /* Read-only access. */ }; /* NXT_SET_ASYNC_CONFIG. * * Sent by a controller, this message configures the asynchronous messages that * the controller wants to receive. Element 0 in each array specifies messages * of interest when the controller has an "other" or "master" role; element 1, * when the controller has a "slave" role. * * Each array element is a bitmask in which a 0-bit disables receiving a * particular message and a 1-bit enables receiving it. Each bit controls the * message whose 'reason' corresponds to the bit index. For example, the bit * with value 1<<2 == 4 in port_status_mask[1] determines whether the * controller will receive OFPT_PORT_STATUS messages with reason OFPPR_MODIFY * (value 2) when the controller has a "slave" role. * * As a side effect, for service controllers, this message changes the * miss_send_len from default of zero to OFP_DEFAULT_MISS_SEND_LEN (128). */ struct nx_async_config { ovs_be32 packet_in_mask[2]; /* Bitmasks of OFPR_* values. */ ovs_be32 port_status_mask[2]; /* Bitmasks of OFPRR_* values. */ ovs_be32 flow_removed_mask[2]; /* Bitmasks of OFPPR_* values. */ }; OFP_ASSERT(sizeof(struct nx_async_config) == 24); /* Nicira vendor flow actions. */ enum nx_action_subtype { NXAST_SNAT__OBSOLETE, /* No longer used. */ NXAST_RESUBMIT, /* struct nx_action_resubmit */ NXAST_SET_TUNNEL, /* struct nx_action_set_tunnel */ NXAST_DROP_SPOOFED_ARP__OBSOLETE, NXAST_SET_QUEUE, /* struct nx_action_set_queue */ NXAST_POP_QUEUE, /* struct nx_action_pop_queue */ NXAST_REG_MOVE, /* struct nx_action_reg_move */ NXAST_REG_LOAD, /* struct nx_action_reg_load */ NXAST_NOTE, /* struct nx_action_note */ NXAST_SET_TUNNEL64, /* struct nx_action_set_tunnel64 */ NXAST_MULTIPATH, /* struct nx_action_multipath */ NXAST_AUTOPATH__OBSOLETE, /* No longer used. */ NXAST_BUNDLE, /* struct nx_action_bundle */ NXAST_BUNDLE_LOAD, /* struct nx_action_bundle */ NXAST_RESUBMIT_TABLE, /* struct nx_action_resubmit */ NXAST_OUTPUT_REG, /* struct nx_action_output_reg */ NXAST_LEARN, /* struct nx_action_learn */ NXAST_EXIT, /* struct nx_action_header */ NXAST_DEC_TTL, /* struct nx_action_header */ NXAST_FIN_TIMEOUT, /* struct nx_action_fin_timeout */ NXAST_CONTROLLER, /* struct nx_action_controller */ NXAST_DEC_TTL_CNT_IDS, /* struct nx_action_cnt_ids */ NXAST_WRITE_METADATA, /* struct nx_action_write_metadata */ NXAST_PUSH_MPLS, /* struct nx_action_push_mpls */ NXAST_POP_MPLS, /* struct nx_action_pop_mpls */ NXAST_SET_MPLS_TTL, /* struct nx_action_ttl */ NXAST_DEC_MPLS_TTL, /* struct nx_action_header */ NXAST_STACK_PUSH, /* struct nx_action_stack */ NXAST_STACK_POP, /* struct nx_action_stack */ NXAST_SAMPLE, /* struct nx_action_sample */ }; /* Header for Nicira-defined actions. */ struct nx_action_header { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length is 16. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_*. */ uint8_t pad[6]; }; OFP_ASSERT(sizeof(struct nx_action_header) == 16); /* Action structures for NXAST_RESUBMIT and NXAST_RESUBMIT_TABLE. * * These actions search one of the switch's flow tables: * * - For NXAST_RESUBMIT_TABLE only, if the 'table' member is not 255, then * it specifies the table to search. * * - Otherwise (for NXAST_RESUBMIT_TABLE with a 'table' of 255, or for * NXAST_RESUBMIT regardless of 'table'), it searches the current flow * table, that is, the OpenFlow flow table that contains the flow from * which this action was obtained. If this action did not come from a * flow table (e.g. it came from an OFPT_PACKET_OUT message), then table 0 * is the current table. * * The flow table lookup uses a flow that may be slightly modified from the * original lookup: * * - For NXAST_RESUBMIT, the 'in_port' member of struct nx_action_resubmit * is used as the flow's in_port. * * - For NXAST_RESUBMIT_TABLE, if the 'in_port' member is not OFPP_IN_PORT, * then its value is used as the flow's in_port. Otherwise, the original * in_port is used. * * - If actions that modify the flow (e.g. OFPAT_SET_VLAN_VID) precede the * resubmit action, then the flow is updated with the new values. * * Following the lookup, the original in_port is restored. * * If the modified flow matched in the flow table, then the corresponding * actions are executed. Afterward, actions following the resubmit in the * original set of actions, if any, are executed; any changes made to the * packet (e.g. changes to VLAN) by secondary actions persist when those * actions are executed, although the original in_port is restored. * * Resubmit actions may be used any number of times within a set of actions. * * Resubmit actions may nest to an implementation-defined depth. Beyond this * implementation-defined depth, further resubmit actions are simply ignored. * * NXAST_RESUBMIT ignores 'table' and 'pad'. NXAST_RESUBMIT_TABLE requires * 'pad' to be all-bits-zero. * * Open vSwitch 1.0.1 and earlier did not support recursion. Open vSwitch * before 1.2.90 did not support NXAST_RESUBMIT_TABLE. */ struct nx_action_resubmit { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length is 16. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_RESUBMIT. */ ovs_be16 in_port; /* New in_port for checking flow table. */ uint8_t table; /* NXAST_RESUBMIT_TABLE: table to use. */ uint8_t pad[3]; }; OFP_ASSERT(sizeof(struct nx_action_resubmit) == 16); /* Action structure for NXAST_SET_TUNNEL. * * Sets the encapsulating tunnel ID to a 32-bit value. The most-significant 32 * bits of the tunnel ID are set to 0. */ struct nx_action_set_tunnel { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length is 16. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_SET_TUNNEL. */ uint8_t pad[2]; ovs_be32 tun_id; /* Tunnel ID. */ }; OFP_ASSERT(sizeof(struct nx_action_set_tunnel) == 16); /* Action structure for NXAST_SET_TUNNEL64. * * Sets the encapsulating tunnel ID to a 64-bit value. */ struct nx_action_set_tunnel64 { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length is 24. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_SET_TUNNEL64. */ uint8_t pad[6]; ovs_be64 tun_id; /* Tunnel ID. */ }; OFP_ASSERT(sizeof(struct nx_action_set_tunnel64) == 24); /* Action structure for NXAST_SET_QUEUE. * * Set the queue that should be used when packets are output. This is similar * to the OpenFlow OFPAT_ENQUEUE action, but does not take the output port as * an argument. This allows the queue to be defined before the port is * known. */ struct nx_action_set_queue { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length is 16. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_SET_QUEUE. */ uint8_t pad[2]; ovs_be32 queue_id; /* Where to enqueue packets. */ }; OFP_ASSERT(sizeof(struct nx_action_set_queue) == 16); /* Action structure for NXAST_POP_QUEUE. * * Restores the queue to the value it was before any NXAST_SET_QUEUE actions * were used. Only the original queue can be restored this way; no stack is * maintained. */ struct nx_action_pop_queue { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length is 16. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_POP_QUEUE. */ uint8_t pad[6]; }; OFP_ASSERT(sizeof(struct nx_action_pop_queue) == 16); /* Action structure for NXAST_REG_MOVE. * * Copies src[src_ofs:src_ofs+n_bits] to dst[dst_ofs:dst_ofs+n_bits], where * a[b:c] denotes the bits within 'a' numbered 'b' through 'c' (not including * bit 'c'). Bit numbering starts at 0 for the least-significant bit, 1 for * the next most significant bit, and so on. * * 'src' and 'dst' are nxm_header values with nxm_hasmask=0. (It doesn't make * sense to use nxm_hasmask=1 because the action does not do any kind of * matching; it uses the actual value of a field.) * * The following nxm_header values are potentially acceptable as 'src': * * - NXM_OF_IN_PORT * - NXM_OF_ETH_DST * - NXM_OF_ETH_SRC * - NXM_OF_ETH_TYPE * - NXM_OF_VLAN_TCI * - NXM_OF_IP_TOS * - NXM_OF_IP_PROTO * - NXM_OF_IP_SRC * - NXM_OF_IP_DST * - NXM_OF_TCP_SRC * - NXM_OF_TCP_DST * - NXM_OF_UDP_SRC * - NXM_OF_UDP_DST * - NXM_OF_ICMP_TYPE * - NXM_OF_ICMP_CODE * - NXM_OF_ARP_OP * - NXM_OF_ARP_SPA * - NXM_OF_ARP_TPA * - NXM_NX_TUN_ID * - NXM_NX_ARP_SHA * - NXM_NX_ARP_THA * - NXM_NX_ICMPV6_TYPE * - NXM_NX_ICMPV6_CODE * - NXM_NX_ND_SLL * - NXM_NX_ND_TLL * - NXM_NX_REG(idx) for idx in the switch's accepted range. * - NXM_NX_PKT_MARK * - NXM_NX_TUN_IPV4_SRC * - NXM_NX_TUN_IPV4_DST * * The following nxm_header values are potentially acceptable as 'dst': * * - NXM_OF_ETH_DST * - NXM_OF_ETH_SRC * - NXM_OF_IP_TOS * - NXM_OF_IP_SRC * - NXM_OF_IP_DST * - NXM_OF_TCP_SRC * - NXM_OF_TCP_DST * - NXM_OF_UDP_SRC * - NXM_OF_UDP_DST * Modifying any of the above fields changes the corresponding packet * header. * * - NXM_OF_IN_PORT * * - NXM_NX_REG(idx) for idx in the switch's accepted range. * * - NXM_NX_PKT_MARK * * - NXM_OF_VLAN_TCI. Modifying this field's value has side effects on the * packet's 802.1Q header. Setting a value with CFI=0 removes the 802.1Q * header (if any), ignoring the other bits. Setting a value with CFI=1 * adds or modifies the 802.1Q header appropriately, setting the TCI field * to the field's new value (with the CFI bit masked out). * * - NXM_NX_TUN_ID, NXM_NX_TUN_IPV4_SRC, NXM_NX_TUN_IPV4_DST. Modifying * any of these values modifies the corresponding tunnel header field used * for the packet's next tunnel encapsulation, if allowed by the * configuration of the output tunnel port. * * A given nxm_header value may be used as 'src' or 'dst' only on a flow whose * nx_match satisfies its prerequisites. For example, NXM_OF_IP_TOS may be * used only if the flow's nx_match includes an nxm_entry that specifies * nxm_type=NXM_OF_ETH_TYPE, nxm_hasmask=0, and nxm_value=0x0800. * * The switch will reject actions for which src_ofs+n_bits is greater than the * width of 'src' or dst_ofs+n_bits is greater than the width of 'dst' with * error type OFPET_BAD_ACTION, code OFPBAC_BAD_ARGUMENT. * * This action behaves properly when 'src' overlaps with 'dst', that is, it * behaves as if 'src' were copied out to a temporary buffer, then the * temporary buffer copied to 'dst'. */ struct nx_action_reg_move { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length is 24. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_REG_MOVE. */ ovs_be16 n_bits; /* Number of bits. */ ovs_be16 src_ofs; /* Starting bit offset in source. */ ovs_be16 dst_ofs; /* Starting bit offset in destination. */ ovs_be32 src; /* Source register. */ ovs_be32 dst; /* Destination register. */ }; OFP_ASSERT(sizeof(struct nx_action_reg_move) == 24); /* Action structure for NXAST_REG_LOAD. * * Copies value[0:n_bits] to dst[ofs:ofs+n_bits], where a[b:c] denotes the bits * within 'a' numbered 'b' through 'c' (not including bit 'c'). Bit numbering * starts at 0 for the least-significant bit, 1 for the next most significant * bit, and so on. * * 'dst' is an nxm_header with nxm_hasmask=0. See the documentation for * NXAST_REG_MOVE, above, for the permitted fields and for the side effects of * loading them. * * The 'ofs' and 'n_bits' fields are combined into a single 'ofs_nbits' field * to avoid enlarging the structure by another 8 bytes. To allow 'n_bits' to * take a value between 1 and 64 (inclusive) while taking up only 6 bits, it is * also stored as one less than its true value: * * 15 6 5 0 * +------------------------------+------------------+ * | ofs | n_bits - 1 | * +------------------------------+------------------+ * * The switch will reject actions for which ofs+n_bits is greater than the * width of 'dst', or in which any bits in 'value' with value 2**n_bits or * greater are set to 1, with error type OFPET_BAD_ACTION, code * OFPBAC_BAD_ARGUMENT. */ struct nx_action_reg_load { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length is 24. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_REG_LOAD. */ ovs_be16 ofs_nbits; /* (ofs << 6) | (n_bits - 1). */ ovs_be32 dst; /* Destination register. */ ovs_be64 value; /* Immediate value. */ }; OFP_ASSERT(sizeof(struct nx_action_reg_load) == 24); /* Action structure for NXAST_STACK_PUSH and NXAST_STACK_POP. * * Pushes (or pops) field[offset: offset + n_bits] to (or from) * top of the stack. */ struct nx_action_stack { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length is 16. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_STACK_PUSH or NXAST_STACK_POP. */ ovs_be16 offset; /* Bit offset into the field. */ ovs_be32 field; /* The field used for push or pop. */ ovs_be16 n_bits; /* (n_bits + 1) bits of the field. */ uint8_t zero[6]; /* Reserved, must be zero. */ }; OFP_ASSERT(sizeof(struct nx_action_stack) == 24); /* Action structure for NXAST_NOTE. * * This action has no effect. It is variable length. The switch does not * attempt to interpret the user-defined 'note' data in any way. A controller * can use this action to attach arbitrary metadata to a flow. * * This action might go away in the future. */ struct nx_action_note { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* A multiple of 8, but at least 16. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_NOTE. */ uint8_t note[6]; /* Start of user-defined data. */ /* Possibly followed by additional user-defined data. */ }; OFP_ASSERT(sizeof(struct nx_action_note) == 16); /* Action structure for NXAST_MULTIPATH. * * This action performs the following steps in sequence: * * 1. Hashes the fields designated by 'fields', one of NX_HASH_FIELDS_*. * Refer to the definition of "enum nx_mp_fields" for details. * * The 'basis' value is used as a universal hash parameter, that is, * different values of 'basis' yield different hash functions. The * particular universal hash function used is implementation-defined. * * The hashed fields' values are drawn from the current state of the * flow, including all modifications that have been made by actions up to * this point. * * 2. Applies the multipath link choice algorithm specified by 'algorithm', * one of NX_MP_ALG_*. Refer to the definition of "enum nx_mp_algorithm" * for details. * * The output of the algorithm is 'link', an unsigned integer less than * or equal to 'max_link'. * * Some algorithms use 'arg' as an additional argument. * * 3. Stores 'link' in dst[ofs:ofs+n_bits]. The format and semantics of * 'dst' and 'ofs_nbits' are similar to those for the NXAST_REG_LOAD * action. * * The switch will reject actions that have an unknown 'fields', or an unknown * 'algorithm', or in which ofs+n_bits is greater than the width of 'dst', or * in which 'max_link' is greater than or equal to 2**n_bits, with error type * OFPET_BAD_ACTION, code OFPBAC_BAD_ARGUMENT. */ struct nx_action_multipath { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length is 32. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_MULTIPATH. */ /* What fields to hash and how. */ ovs_be16 fields; /* One of NX_HASH_FIELDS_*. */ ovs_be16 basis; /* Universal hash parameter. */ ovs_be16 pad0; /* Multipath link choice algorithm to apply to hash value. */ ovs_be16 algorithm; /* One of NX_MP_ALG_*. */ ovs_be16 max_link; /* Number of output links, minus 1. */ ovs_be32 arg; /* Algorithm-specific argument. */ ovs_be16 pad1; /* Where to store the result. */ ovs_be16 ofs_nbits; /* (ofs << 6) | (n_bits - 1). */ ovs_be32 dst; /* Destination. */ }; OFP_ASSERT(sizeof(struct nx_action_multipath) == 32); /* NXAST_MULTIPATH: Multipath link choice algorithm to apply. * * In the descriptions below, 'n_links' is max_link + 1. */ enum nx_mp_algorithm { /* link = hash(flow) % n_links. * * Redistributes all traffic when n_links changes. O(1) performance. See * RFC 2992. * * Use UINT16_MAX for max_link to get a raw hash value. */ NX_MP_ALG_MODULO_N, /* link = hash(flow) / (MAX_HASH / n_links). * * Redistributes between one-quarter and one-half of traffic when n_links * changes. O(1) performance. See RFC 2992. */ NX_MP_ALG_HASH_THRESHOLD, /* for i in [0,n_links): * weights[i] = hash(flow, i) * link = { i such that weights[i] >= weights[j] for all j != i } * * Redistributes 1/n_links of traffic when n_links changes. O(n_links) * performance. If n_links is greater than a threshold (currently 64, but * subject to change), Open vSwitch will substitute another algorithm * automatically. See RFC 2992. */ NX_MP_ALG_HRW, /* Highest Random Weight. */ /* i = 0 * repeat: * i = i + 1 * link = hash(flow, i) % arg * while link > max_link * * Redistributes 1/n_links of traffic when n_links changes. O(1) * performance when arg/max_link is bounded by a constant. * * Redistributes all traffic when arg changes. * * arg must be greater than max_link and for best performance should be no * more than approximately max_link * 2. If arg is outside the acceptable * range, Open vSwitch will automatically substitute the least power of 2 * greater than max_link. * * This algorithm is specific to Open vSwitch. */ NX_MP_ALG_ITER_HASH /* Iterative Hash. */ }; /* Action structure for NXAST_LEARN. * * This action adds or modifies a flow in an OpenFlow table, similar to * OFPT_FLOW_MOD with OFPFC_MODIFY_STRICT as 'command'. The new flow has the * specified idle timeout, hard timeout, priority, cookie, and flags. The new * flow's match criteria and actions are built by applying each of the series * of flow_mod_spec elements included as part of the action. * * A flow_mod_spec starts with a 16-bit header. A header that is all-bits-0 is * a no-op used for padding the action as a whole to a multiple of 8 bytes in * length. Otherwise, the flow_mod_spec can be thought of as copying 'n_bits' * bits from a source to a destination. In this case, the header contains * multiple fields: * * 15 14 13 12 11 10 0 * +------+---+------+---------------------------------+ * | 0 |src| dst | n_bits | * +------+---+------+---------------------------------+ * * The meaning and format of a flow_mod_spec depends on 'src' and 'dst'. The * following table summarizes the meaning of each possible combination. * Details follow the table: * * src dst meaning * --- --- ---------------------------------------------------------- * 0 0 Add match criteria based on value in a field. * 1 0 Add match criteria based on an immediate value. * 0 1 Add NXAST_REG_LOAD action to copy field into a different field. * 1 1 Add NXAST_REG_LOAD action to load immediate value into a field. * 0 2 Add OFPAT_OUTPUT action to output to port from specified field. * All other combinations are undefined and not allowed. * * The flow_mod_spec header is followed by a source specification and a * destination specification. The format and meaning of the source * specification depends on 'src': * * - If 'src' is 0, the source bits are taken from a field in the flow to * which this action is attached. (This should be a wildcarded field. If * its value is fully specified then the source bits being copied have * constant values.) * * The source specification is an ovs_be32 'field' and an ovs_be16 'ofs'. * 'field' is an nxm_header with nxm_hasmask=0, and 'ofs' the starting bit * offset within that field. The source bits are field[ofs:ofs+n_bits-1]. * 'field' and 'ofs' are subject to the same restrictions as the source * field in NXAST_REG_MOVE. * * - If 'src' is 1, the source bits are a constant value. The source * specification is (n_bits+15)/16*2 bytes long. Taking those bytes as a * number in network order, the source bits are the 'n_bits' * least-significant bits. The switch will report an error if other bits * in the constant are nonzero. * * The flow_mod_spec destination specification, for 'dst' of 0 or 1, is an * ovs_be32 'field' and an ovs_be16 'ofs'. 'field' is an nxm_header with * nxm_hasmask=0 and 'ofs' is a starting bit offset within that field. The * meaning of the flow_mod_spec depends on 'dst': * * - If 'dst' is 0, the flow_mod_spec specifies match criteria for the new * flow. The new flow matches only if bits field[ofs:ofs+n_bits-1] in a * packet equal the source bits. 'field' may be any nxm_header with * nxm_hasmask=0 that is allowed in NXT_FLOW_MOD. * * Order is significant. Earlier flow_mod_specs must satisfy any * prerequisites for matching fields specified later, by copying constant * values into prerequisite fields. * * The switch will reject flow_mod_specs that do not satisfy NXM masking * restrictions. * * - If 'dst' is 1, the flow_mod_spec specifies an NXAST_REG_LOAD action for * the new flow. The new flow copies the source bits into * field[ofs:ofs+n_bits-1]. Actions are executed in the same order as the * flow_mod_specs. * * A single NXAST_REG_LOAD action writes no more than 64 bits, so n_bits * greater than 64 yields multiple NXAST_REG_LOAD actions. * * The flow_mod_spec destination spec for 'dst' of 2 (when 'src' is 0) is * empty. It has the following meaning: * * - The flow_mod_spec specifies an OFPAT_OUTPUT action for the new flow. * The new flow outputs to the OpenFlow port specified by the source field. * Of the special output ports with value OFPP_MAX or larger, OFPP_IN_PORT, * OFPP_FLOOD, OFPP_LOCAL, and OFPP_ALL are supported. Other special ports * may not be used. * * Resource Management * ------------------- * * A switch has a finite amount of flow table space available for learning. * When this space is exhausted, no new learning table entries will be learned * until some existing flow table entries expire. The controller should be * prepared to handle this by flooding (which can be implemented as a * low-priority flow). * * If a learned flow matches a single TCP stream with a relatively long * timeout, one may make the best of resource constraints by setting * 'fin_idle_timeout' or 'fin_hard_timeout' (both measured in seconds), or * both, to shorter timeouts. When either of these is specified as a nonzero * value, OVS adds a NXAST_FIN_TIMEOUT action, with the specified timeouts, to * the learned flow. * * Examples * -------- * * The following examples give a prose description of the flow_mod_specs along * with informal notation for how those would be represented and a hex dump of * the bytes that would be required. * * These examples could work with various nx_action_learn parameters. Typical * values would be idle_timeout=OFP_FLOW_PERMANENT, hard_timeout=60, * priority=OFP_DEFAULT_PRIORITY, flags=0, table_id=10. * * 1. Learn input port based on the source MAC, with lookup into * NXM_NX_REG1[16:31] by resubmit to in_port=99: * * Match on in_port=99: * ovs_be16(src=1, dst=0, n_bits=16), 20 10 * ovs_be16(99), 00 63 * ovs_be32(NXM_OF_IN_PORT), ovs_be16(0) 00 00 00 02 00 00 * * Match Ethernet destination on Ethernet source from packet: * ovs_be16(src=0, dst=0, n_bits=48), 00 30 * ovs_be32(NXM_OF_ETH_SRC), ovs_be16(0) 00 00 04 06 00 00 * ovs_be32(NXM_OF_ETH_DST), ovs_be16(0) 00 00 02 06 00 00 * * Set NXM_NX_REG1[16:31] to the packet's input port: * ovs_be16(src=0, dst=1, n_bits=16), 08 10 * ovs_be32(NXM_OF_IN_PORT), ovs_be16(0) 00 00 00 02 00 00 * ovs_be32(NXM_NX_REG1), ovs_be16(16) 00 01 02 04 00 10 * * Given a packet that arrived on port A with Ethernet source address B, * this would set up the flow "in_port=99, dl_dst=B, * actions=load:A->NXM_NX_REG1[16..31]". * * In syntax accepted by ovs-ofctl, this action is: learn(in_port=99, * NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[], * load:NXM_OF_IN_PORT[]->NXM_NX_REG1[16..31]) * * 2. Output to input port based on the source MAC and VLAN VID, with lookup * into NXM_NX_REG1[16:31]: * * Match on same VLAN ID as packet: * ovs_be16(src=0, dst=0, n_bits=12), 00 0c * ovs_be32(NXM_OF_VLAN_TCI), ovs_be16(0) 00 00 08 02 00 00 * ovs_be32(NXM_OF_VLAN_TCI), ovs_be16(0) 00 00 08 02 00 00 * * Match Ethernet destination on Ethernet source from packet: * ovs_be16(src=0, dst=0, n_bits=48), 00 30 * ovs_be32(NXM_OF_ETH_SRC), ovs_be16(0) 00 00 04 06 00 00 * ovs_be32(NXM_OF_ETH_DST), ovs_be16(0) 00 00 02 06 00 00 * * Output to the packet's input port: * ovs_be16(src=0, dst=2, n_bits=16), 10 10 * ovs_be32(NXM_OF_IN_PORT), ovs_be16(0) 00 00 00 02 00 00 * * Given a packet that arrived on port A with Ethernet source address B in * VLAN C, this would set up the flow "dl_dst=B, vlan_vid=C, * actions=output:A". * * In syntax accepted by ovs-ofctl, this action is: * learn(NXM_OF_VLAN_TCI[0..11], NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[], * output:NXM_OF_IN_PORT[]) * * 3. Here's a recipe for a very simple-minded MAC learning switch. It uses a * 10-second MAC expiration time to make it easier to see what's going on * * ovs-vsctl del-controller br0 * ovs-ofctl del-flows br0 * ovs-ofctl add-flow br0 "table=0 actions=learn(table=1, \ hard_timeout=10, NXM_OF_VLAN_TCI[0..11], \ NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[], \ output:NXM_OF_IN_PORT[]), resubmit(,1)" * ovs-ofctl add-flow br0 "table=1 priority=0 actions=flood" * * You can then dump the MAC learning table with: * * ovs-ofctl dump-flows br0 table=1 * * Usage Advice * ------------ * * For best performance, segregate learned flows into a table that is not used * for any other flows except possibly for a lowest-priority "catch-all" flow * (a flow with no match criteria). If different learning actions specify * different match criteria, use different tables for the learned flows. * * The meaning of 'hard_timeout' and 'idle_timeout' can be counterintuitive. * These timeouts apply to the flow that is added, which means that a flow with * an idle timeout will expire when no traffic has been sent *to* the learned * address. This is not usually the intent in MAC learning; instead, we want * the MAC learn entry to expire when no traffic has been sent *from* the * learned address. Use a hard timeout for that. */ struct nx_action_learn { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* At least 24. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_LEARN. */ ovs_be16 idle_timeout; /* Idle time before discarding (seconds). */ ovs_be16 hard_timeout; /* Max time before discarding (seconds). */ ovs_be16 priority; /* Priority level of flow entry. */ ovs_be64 cookie; /* Cookie for new flow. */ ovs_be16 flags; /* Either 0 or OFPFF_SEND_FLOW_REM. */ uint8_t table_id; /* Table to insert flow entry. */ uint8_t pad; /* Must be zero. */ ovs_be16 fin_idle_timeout; /* Idle timeout after FIN, if nonzero. */ ovs_be16 fin_hard_timeout; /* Hard timeout after FIN, if nonzero. */ /* Followed by a sequence of flow_mod_spec elements, as described above, * until the end of the action is reached. */ }; OFP_ASSERT(sizeof(struct nx_action_learn) == 32); #define NX_LEARN_N_BITS_MASK 0x3ff #define NX_LEARN_SRC_FIELD (0 << 13) /* Copy from field. */ #define NX_LEARN_SRC_IMMEDIATE (1 << 13) /* Copy from immediate value. */ #define NX_LEARN_SRC_MASK (1 << 13) #define NX_LEARN_DST_MATCH (0 << 11) /* Add match criterion. */ #define NX_LEARN_DST_LOAD (1 << 11) /* Add NXAST_REG_LOAD action. */ #define NX_LEARN_DST_OUTPUT (2 << 11) /* Add OFPAT_OUTPUT action. */ #define NX_LEARN_DST_RESERVED (3 << 11) /* Not yet defined. */ #define NX_LEARN_DST_MASK (3 << 11) /* Action structure for NXAST_FIN_TIMEOUT. * * This action changes the idle timeout or hard timeout, or both, of this * OpenFlow rule when the rule matches a TCP packet with the FIN or RST flag. * When such a packet is observed, the action reduces the rule's idle timeout * to 'fin_idle_timeout' and its hard timeout to 'fin_hard_timeout'. This * action has no effect on an existing timeout that is already shorter than the * one that the action specifies. A 'fin_idle_timeout' or 'fin_hard_timeout' * of zero has no effect on the respective timeout. * * 'fin_idle_timeout' and 'fin_hard_timeout' are measured in seconds. * 'fin_hard_timeout' specifies time since the flow's creation, not since the * receipt of the FIN or RST. * * This is useful for quickly discarding learned TCP flows that otherwise will * take a long time to expire. * * This action is intended for use with an OpenFlow rule that matches only a * single TCP flow. If the rule matches multiple TCP flows (e.g. it wildcards * all TCP traffic, or all TCP traffic to a particular port), then any FIN or * RST in any of those flows will cause the entire OpenFlow rule to expire * early, which is not normally desirable. */ struct nx_action_fin_timeout { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* 16. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_FIN_TIMEOUT. */ ovs_be16 fin_idle_timeout; /* New idle timeout, if nonzero. */ ovs_be16 fin_hard_timeout; /* New hard timeout, if nonzero. */ ovs_be16 pad; /* Must be zero. */ }; OFP_ASSERT(sizeof(struct nx_action_fin_timeout) == 16); /* Action structure for NXAST_BUNDLE and NXAST_BUNDLE_LOAD. * * The bundle actions choose a slave from a supplied list of options. * NXAST_BUNDLE outputs to its selection. NXAST_BUNDLE_LOAD writes its * selection to a register. * * The list of possible slaves follows the nx_action_bundle structure. The size * of each slave is governed by its type as indicated by the 'slave_type' * parameter. The list of slaves should be padded at its end with zeros to make * the total length of the action a multiple of 8. * * Switches infer from the 'slave_type' parameter the size of each slave. All * implementations must support the NXM_OF_IN_PORT 'slave_type' which indicates * that the slaves are OpenFlow port numbers with NXM_LENGTH(NXM_OF_IN_PORT) == * 2 byte width. Switches should reject actions which indicate unknown or * unsupported slave types. * * Switches use a strategy dictated by the 'algorithm' parameter to choose a * slave. If the switch does not support the specified 'algorithm' parameter, * it should reject the action. * * Several algorithms take into account liveness when selecting slaves. The * liveness of a slave is implementation defined (with one exception), but will * generally take into account things like its carrier status and the results * of any link monitoring protocols which happen to be running on it. In order * to give controllers a place-holder value, the OFPP_NONE port is always * considered live. * * Some slave selection strategies require the use of a hash function, in which * case the 'fields' and 'basis' parameters should be populated. The 'fields' * parameter (one of NX_HASH_FIELDS_*) designates which parts of the flow to * hash. Refer to the definition of "enum nx_hash_fields" for details. The * 'basis' parameter is used as a universal hash parameter. Different values * of 'basis' yield different hash results. * * The 'zero' parameter at the end of the action structure is reserved for * future use. Switches are required to reject actions which have nonzero * bytes in the 'zero' field. * * NXAST_BUNDLE actions should have 'ofs_nbits' and 'dst' zeroed. Switches * should reject actions which have nonzero bytes in either of these fields. * * NXAST_BUNDLE_LOAD stores the OpenFlow port number of the selected slave in * dst[ofs:ofs+n_bits]. The format and semantics of 'dst' and 'ofs_nbits' are * similar to those for the NXAST_REG_LOAD action. */ struct nx_action_bundle { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length including slaves. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_BUNDLE or NXAST_BUNDLE_LOAD. */ /* Slave choice algorithm to apply to hash value. */ ovs_be16 algorithm; /* One of NX_BD_ALG_*. */ /* What fields to hash and how. */ ovs_be16 fields; /* One of NX_HASH_FIELDS_*. */ ovs_be16 basis; /* Universal hash parameter. */ ovs_be32 slave_type; /* NXM_OF_IN_PORT. */ ovs_be16 n_slaves; /* Number of slaves. */ ovs_be16 ofs_nbits; /* (ofs << 6) | (n_bits - 1). */ ovs_be32 dst; /* Destination. */ uint8_t zero[4]; /* Reserved. Must be zero. */ }; OFP_ASSERT(sizeof(struct nx_action_bundle) == 32); /* NXAST_BUNDLE: Bundle slave choice algorithm to apply. * * In the descriptions below, 'slaves' is the list of possible slaves in the * order they appear in the OpenFlow action. */ enum nx_bd_algorithm { /* Chooses the first live slave listed in the bundle. * * O(n_slaves) performance. */ NX_BD_ALG_ACTIVE_BACKUP, /* for i in [0,n_slaves): * weights[i] = hash(flow, i) * slave = { slaves[i] such that weights[i] >= weights[j] for all j != i } * * Redistributes 1/n_slaves of traffic when a slave's liveness changes. * O(n_slaves) performance. * * Uses the 'fields' and 'basis' parameters. */ NX_BD_ALG_HRW /* Highest Random Weight. */ }; /* Action structure for NXAST_DEC_TTL_CNT_IDS. * * If the packet is not IPv4 or IPv6, does nothing. For IPv4 or IPv6, if the * TTL or hop limit is at least 2, decrements it by 1. Otherwise, if TTL or * hop limit is 0 or 1, sends a packet-in to the controllers with each of the * 'n_controllers' controller IDs specified in 'cnt_ids'. * * (This differs from NXAST_DEC_TTL in that for NXAST_DEC_TTL the packet-in is * sent only to controllers with id 0.) */ struct nx_action_cnt_ids { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length including slaves. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_DEC_TTL_CNT_IDS. */ ovs_be16 n_controllers; /* Number of controllers. */ uint8_t zeros[4]; /* Must be zero. */ /* Followed by 1 or more controller ids. * * uint16_t cnt_ids[]; // Controller ids. * uint8_t pad[]; // Must be 0 to 8-byte align cnt_ids[]. */ }; OFP_ASSERT(sizeof(struct nx_action_cnt_ids) == 16); /* Action structure for NXAST_OUTPUT_REG. * * Outputs to the OpenFlow port number written to src[ofs:ofs+nbits]. * * The format and semantics of 'src' and 'ofs_nbits' are similar to those for * the NXAST_REG_LOAD action. * * The acceptable nxm_header values for 'src' are the same as the acceptable * nxm_header values for the 'src' field of NXAST_REG_MOVE. * * The 'max_len' field indicates the number of bytes to send when the chosen * port is OFPP_CONTROLLER. Its semantics are equivalent to the 'max_len' * field of OFPAT_OUTPUT. * * The 'zero' field is required to be zeroed for forward compatibility. */ struct nx_action_output_reg { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* 24. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_OUTPUT_REG. */ ovs_be16 ofs_nbits; /* (ofs << 6) | (n_bits - 1). */ ovs_be32 src; /* Source. */ ovs_be16 max_len; /* Max length to send to controller. */ uint8_t zero[6]; /* Reserved, must be zero. */ }; OFP_ASSERT(sizeof(struct nx_action_output_reg) == 24); /* NXAST_EXIT * * Discontinues action processing. * * The NXAST_EXIT action causes the switch to immediately halt processing * actions for the flow. Any actions which have already been processed are * executed by the switch. However, any further actions, including those which * may be in different tables, or different levels of the NXAST_RESUBMIT * hierarchy, will be ignored. * * Uses the nx_action_header structure. */ /* Flexible flow specifications (aka NXM = Nicira Extended Match). * * OpenFlow 1.0 has "struct ofp10_match" for specifying flow matches. This * structure is fixed-length and hence difficult to extend. This section * describes a more flexible, variable-length flow match, called "nx_match" for * short, that is also supported by Open vSwitch. This section also defines a * replacement for each OpenFlow message that includes struct ofp10_match. * * * Format * ====== * * An nx_match is a sequence of zero or more "nxm_entry"s, which are * type-length-value (TLV) entries, each 5 to 259 (inclusive) bytes long. * "nxm_entry"s are not aligned on or padded to any multibyte boundary. The * first 4 bytes of an nxm_entry are its "header", followed by the entry's * "body". * * An nxm_entry's header is interpreted as a 32-bit word in network byte order: * * |<-------------------- nxm_type ------------------>| * | | * |31 16 15 9| 8 7 0 * +----------------------------------+---------------+--+------------------+ * | nxm_vendor | nxm_field |hm| nxm_length | * +----------------------------------+---------------+--+------------------+ * * The most-significant 23 bits of the header are collectively "nxm_type". * Bits 16...31 are "nxm_vendor", one of the NXM_VENDOR_* values below. Bits * 9...15 are "nxm_field", which is a vendor-specific value. nxm_type normally * designates a protocol header, such as the Ethernet type, but it can also * refer to packet metadata, such as the switch port on which a packet arrived. * * Bit 8 is "nxm_hasmask" (labeled "hm" above for space reasons). The meaning * of this bit is explained later. * * The least-significant 8 bits are "nxm_length", a positive integer. The * length of the nxm_entry, including the header, is exactly 4 + nxm_length * bytes. * * For a given nxm_vendor, nxm_field, and nxm_hasmask value, nxm_length is a * constant. It is included only to allow software to minimally parse * "nxm_entry"s of unknown types. (Similarly, for a given nxm_vendor, * nxm_field, and nxm_length, nxm_hasmask is a constant.) * * * Semantics * ========= * * A zero-length nx_match (one with no "nxm_entry"s) matches every packet. * * An nxm_entry places a constraint on the packets matched by the nx_match: * * - If nxm_hasmask is 0, the nxm_entry's body contains a value for the * field, called "nxm_value". The nx_match matches only packets in which * the field equals nxm_value. * * - If nxm_hasmask is 1, then the nxm_entry's body contains a value for the * field (nxm_value), followed by a bitmask of the same length as the * value, called "nxm_mask". For each 1-bit in position J in nxm_mask, the * nx_match matches only packets for which bit J in the given field's value * matches bit J in nxm_value. A 0-bit in nxm_mask causes the * corresponding bit in nxm_value is ignored (it should be 0; Open vSwitch * may enforce this someday), as is the corresponding bit in the field's * value. (The sense of the nxm_mask bits is the opposite of that used by * the "wildcards" member of struct ofp10_match.) * * When nxm_hasmask is 1, nxm_length is always even. * * An all-zero-bits nxm_mask is equivalent to omitting the nxm_entry * entirely. An all-one-bits nxm_mask is equivalent to specifying 0 for * nxm_hasmask. * * When there are multiple "nxm_entry"s, all of the constraints must be met. * * * Mask Restrictions * ================= * * Masks may be restricted: * * - Some nxm_types may not support masked wildcards, that is, nxm_hasmask * must always be 0 when these fields are specified. For example, the * field that identifies the port on which a packet was received may not be * masked. * * - Some nxm_types that do support masked wildcards may only support certain * nxm_mask patterns. For example, fields that have IPv4 address values * may be restricted to CIDR masks. * * These restrictions should be noted in specifications for individual fields. * A switch may accept an nxm_hasmask or nxm_mask value that the specification * disallows, if the switch correctly implements support for that nxm_hasmask * or nxm_mask value. A switch must reject an attempt to set up a flow that * contains a nxm_hasmask or nxm_mask value that it does not support. * * * Prerequisite Restrictions * ========================= * * The presence of an nxm_entry with a given nxm_type may be restricted based * on the presence of or values of other "nxm_entry"s. For example: * * - An nxm_entry for nxm_type=NXM_OF_IP_TOS is allowed only if it is * preceded by another entry with nxm_type=NXM_OF_ETH_TYPE, nxm_hasmask=0, * and nxm_value=0x0800. That is, matching on the IP source address is * allowed only if the Ethernet type is explicitly set to IP. * * - An nxm_entry for nxm_type=NXM_OF_TCP_SRC is allowed only if it is * preceded by an entry with nxm_type=NXM_OF_ETH_TYPE, nxm_hasmask=0, and * nxm_value either 0x0800 or 0x86dd, and another with * nxm_type=NXM_OF_IP_PROTO, nxm_hasmask=0, nxm_value=6, in that order. * That is, matching on the TCP source port is allowed only if the Ethernet * type is IP or IPv6 and the IP protocol is TCP. * * These restrictions should be noted in specifications for individual fields. * A switch may implement relaxed versions of these restrictions. A switch * must reject an attempt to set up a flow that violates its restrictions. * * * Ordering Restrictions * ===================== * * An nxm_entry that has prerequisite restrictions must appear after the * "nxm_entry"s for its prerequisites. Ordering of "nxm_entry"s within an * nx_match is not otherwise constrained. * * Any given nxm_type may appear in an nx_match at most once. * * * nxm_entry Examples * ================== * * These examples show the format of a single nxm_entry with particular * nxm_hasmask and nxm_length values. The diagrams are labeled with field * numbers and byte indexes. * * * 8-bit nxm_value, nxm_hasmask=1, nxm_length=2: * * 0 3 4 5 * +------------+---+---+ * | header | v | m | * +------------+---+---+ * * * 16-bit nxm_value, nxm_hasmask=0, nxm_length=2: * * 0 3 4 5 * +------------+------+ * | header | value| * +------------+------+ * * * 32-bit nxm_value, nxm_hasmask=0, nxm_length=4: * * 0 3 4 7 * +------------+-------------+ * | header | nxm_value | * +------------+-------------+ * * * 48-bit nxm_value, nxm_hasmask=0, nxm_length=6: * * 0 3 4 9 * +------------+------------------+ * | header | nxm_value | * +------------+------------------+ * * * 48-bit nxm_value, nxm_hasmask=1, nxm_length=12: * * 0 3 4 9 10 15 * +------------+------------------+------------------+ * | header | nxm_value | nxm_mask | * +------------+------------------+------------------+ * * * Error Reporting * =============== * * A switch should report an error in an nx_match using error type * OFPET_BAD_REQUEST and one of the NXBRC_NXM_* codes. Ideally the switch * should report a specific error code, if one is assigned for the particular * problem, but NXBRC_NXM_INVALID is also available to report a generic * nx_match error. */ #define NXM_HEADER__(VENDOR, FIELD, HASMASK, LENGTH) \ (((VENDOR) << 16) | ((FIELD) << 9) | ((HASMASK) << 8) | (LENGTH)) #define NXM_HEADER(VENDOR, FIELD, LENGTH) \ NXM_HEADER__(VENDOR, FIELD, 0, LENGTH) #define NXM_HEADER_W(VENDOR, FIELD, LENGTH) \ NXM_HEADER__(VENDOR, FIELD, 1, (LENGTH) * 2) #define NXM_VENDOR(HEADER) ((HEADER) >> 16) #define NXM_FIELD(HEADER) (((HEADER) >> 9) & 0x7f) #define NXM_TYPE(HEADER) (((HEADER) >> 9) & 0x7fffff) #define NXM_HASMASK(HEADER) (((HEADER) >> 8) & 1) #define NXM_LENGTH(HEADER) ((HEADER) & 0xff) #define NXM_MAKE_WILD_HEADER(HEADER) \ NXM_HEADER_W(NXM_VENDOR(HEADER), NXM_FIELD(HEADER), NXM_LENGTH(HEADER)) /* ## ------------------------------- ## */ /* ## OpenFlow 1.0-compatible fields. ## */ /* ## ------------------------------- ## */ /* Physical or virtual port on which the packet was received. * * Prereqs: None. * * Format: 16-bit integer in network byte order. * * Masking: Not maskable. */ #define NXM_OF_IN_PORT NXM_HEADER (0x0000, 0, 2) /* Source or destination address in Ethernet header. * * Prereqs: None. * * Format: 48-bit Ethernet MAC address. * * Masking: Fully maskable, in versions 1.8 and later. Earlier versions only * supported the following masks for NXM_OF_ETH_DST_W: 00:00:00:00:00:00, * fe:ff:ff:ff:ff:ff, 01:00:00:00:00:00, ff:ff:ff:ff:ff:ff. */ #define NXM_OF_ETH_DST NXM_HEADER (0x0000, 1, 6) #define NXM_OF_ETH_DST_W NXM_HEADER_W(0x0000, 1, 6) #define NXM_OF_ETH_SRC NXM_HEADER (0x0000, 2, 6) #define NXM_OF_ETH_SRC_W NXM_HEADER_W(0x0000, 2, 6) /* Packet's Ethernet type. * * For an Ethernet II packet this is taken from the Ethernet header. For an * 802.2 LLC+SNAP header with OUI 00-00-00 this is taken from the SNAP header. * A packet that has neither format has value 0x05ff * (OFP_DL_TYPE_NOT_ETH_TYPE). * * For a packet with an 802.1Q header, this is the type of the encapsulated * frame. * * Prereqs: None. * * Format: 16-bit integer in network byte order. * * Masking: Not maskable. */ #define NXM_OF_ETH_TYPE NXM_HEADER (0x0000, 3, 2) /* 802.1Q TCI. * * For a packet with an 802.1Q header, this is the Tag Control Information * (TCI) field, with the CFI bit forced to 1. For a packet with no 802.1Q * header, this has value 0. * * Prereqs: None. * * Format: 16-bit integer in network byte order. * * Masking: Arbitrary masks. * * This field can be used in various ways: * * - If it is not constrained at all, the nx_match matches packets without * an 802.1Q header or with an 802.1Q header that has any TCI value. * * - Testing for an exact match with 0 matches only packets without an * 802.1Q header. * * - Testing for an exact match with a TCI value with CFI=1 matches packets * that have an 802.1Q header with a specified VID and PCP. * * - Testing for an exact match with a nonzero TCI value with CFI=0 does * not make sense. The switch may reject this combination. * * - Testing with a specific VID and CFI=1, with nxm_mask=0x1fff, matches * packets that have an 802.1Q header with that VID (and any PCP). * * - Testing with a specific PCP and CFI=1, with nxm_mask=0xf000, matches * packets that have an 802.1Q header with that PCP (and any VID). * * - Testing with nxm_value=0, nxm_mask=0x0fff matches packets with no 802.1Q * header or with an 802.1Q header with a VID of 0. * * - Testing with nxm_value=0, nxm_mask=0xe000 matches packets with no 802.1Q * header or with an 802.1Q header with a PCP of 0. * * - Testing with nxm_value=0, nxm_mask=0xefff matches packets with no 802.1Q * header or with an 802.1Q header with both VID and PCP of 0. */ #define NXM_OF_VLAN_TCI NXM_HEADER (0x0000, 4, 2) #define NXM_OF_VLAN_TCI_W NXM_HEADER_W(0x0000, 4, 2) /* The "type of service" byte of the IP header, with the ECN bits forced to 0. * * Prereqs: NXM_OF_ETH_TYPE must be either 0x0800 or 0x86dd. * * Format: 8-bit integer with 2 least-significant bits forced to 0. * * Masking: Not maskable. */ #define NXM_OF_IP_TOS NXM_HEADER (0x0000, 5, 1) /* The "protocol" byte in the IP header. * * Prereqs: NXM_OF_ETH_TYPE must be either 0x0800 or 0x86dd. * * Format: 8-bit integer. * * Masking: Not maskable. */ #define NXM_OF_IP_PROTO NXM_HEADER (0x0000, 6, 1) /* The source or destination address in the IP header. * * Prereqs: NXM_OF_ETH_TYPE must match 0x0800 exactly. * * Format: 32-bit integer in network byte order. * * Masking: Fully maskable, in Open vSwitch 1.8 and later. In earlier * versions, only CIDR masks are allowed, that is, masks that consist of N * high-order bits set to 1 and the other 32-N bits set to 0. */ #define NXM_OF_IP_SRC NXM_HEADER (0x0000, 7, 4) #define NXM_OF_IP_SRC_W NXM_HEADER_W(0x0000, 7, 4) #define NXM_OF_IP_DST NXM_HEADER (0x0000, 8, 4) #define NXM_OF_IP_DST_W NXM_HEADER_W(0x0000, 8, 4) /* The source or destination port in the TCP header. * * Prereqs: * NXM_OF_ETH_TYPE must be either 0x0800 or 0x86dd. * NXM_OF_IP_PROTO must match 6 exactly. * * Format: 16-bit integer in network byte order. * * Masking: Fully maskable, in Open vSwitch 1.6 and later. Not maskable, in * earlier versions. */ #define NXM_OF_TCP_SRC NXM_HEADER (0x0000, 9, 2) #define NXM_OF_TCP_SRC_W NXM_HEADER_W(0x0000, 9, 2) #define NXM_OF_TCP_DST NXM_HEADER (0x0000, 10, 2) #define NXM_OF_TCP_DST_W NXM_HEADER_W(0x0000, 10, 2) /* The source or destination port in the UDP header. * * Prereqs: * NXM_OF_ETH_TYPE must match either 0x0800 or 0x86dd. * NXM_OF_IP_PROTO must match 17 exactly. * * Format: 16-bit integer in network byte order. * * Masking: Fully maskable, in Open vSwitch 1.6 and later. Not maskable, in * earlier versions. */ #define NXM_OF_UDP_SRC NXM_HEADER (0x0000, 11, 2) #define NXM_OF_UDP_SRC_W NXM_HEADER_W(0x0000, 11, 2) #define NXM_OF_UDP_DST NXM_HEADER (0x0000, 12, 2) #define NXM_OF_UDP_DST_W NXM_HEADER_W(0x0000, 12, 2) /* The type or code in the ICMP header. * * Prereqs: * NXM_OF_ETH_TYPE must match 0x0800 exactly. * NXM_OF_IP_PROTO must match 1 exactly. * * Format: 8-bit integer. * * Masking: Not maskable. */ #define NXM_OF_ICMP_TYPE NXM_HEADER (0x0000, 13, 1) #define NXM_OF_ICMP_CODE NXM_HEADER (0x0000, 14, 1) /* ARP opcode. * * For an Ethernet+IP ARP packet, the opcode in the ARP header. Always 0 * otherwise. Only ARP opcodes between 1 and 255 should be specified for * matching. * * Prereqs: NXM_OF_ETH_TYPE must match either 0x0806 or 0x8035. * * Format: 16-bit integer in network byte order. * * Masking: Not maskable. */ #define NXM_OF_ARP_OP NXM_HEADER (0x0000, 15, 2) /* For an Ethernet+IP ARP packet, the source or target protocol address * in the ARP header. Always 0 otherwise. * * Prereqs: NXM_OF_ETH_TYPE must match either 0x0806 or 0x8035. * * Format: 32-bit integer in network byte order. * * Masking: Fully maskable, in Open vSwitch 1.8 and later. In earlier * versions, only CIDR masks are allowed, that is, masks that consist of N * high-order bits set to 1 and the other 32-N bits set to 0. */ #define NXM_OF_ARP_SPA NXM_HEADER (0x0000, 16, 4) #define NXM_OF_ARP_SPA_W NXM_HEADER_W(0x0000, 16, 4) #define NXM_OF_ARP_TPA NXM_HEADER (0x0000, 17, 4) #define NXM_OF_ARP_TPA_W NXM_HEADER_W(0x0000, 17, 4) /* ## ------------------------ ## */ /* ## Nicira match extensions. ## */ /* ## ------------------------ ## */ /* Metadata registers. * * Registers initially have value 0. Actions allow register values to be * manipulated. * * Prereqs: None. * * Format: Array of 32-bit integer registers. Space is reserved for up to * NXM_NX_MAX_REGS registers, but switches may implement fewer. * * Masking: Arbitrary masks. */ #define NXM_NX_MAX_REGS 16 #define NXM_NX_REG(IDX) NXM_HEADER (0x0001, IDX, 4) #define NXM_NX_REG_W(IDX) NXM_HEADER_W(0x0001, IDX, 4) #define NXM_NX_REG_IDX(HEADER) NXM_FIELD(HEADER) #define NXM_IS_NX_REG(HEADER) (!((((HEADER) ^ NXM_NX_REG0)) & 0xffffe1ff)) #define NXM_IS_NX_REG_W(HEADER) (!((((HEADER) ^ NXM_NX_REG0_W)) & 0xffffe1ff)) #define NXM_NX_REG0 NXM_HEADER (0x0001, 0, 4) #define NXM_NX_REG0_W NXM_HEADER_W(0x0001, 0, 4) #define NXM_NX_REG1 NXM_HEADER (0x0001, 1, 4) #define NXM_NX_REG1_W NXM_HEADER_W(0x0001, 1, 4) #define NXM_NX_REG2 NXM_HEADER (0x0001, 2, 4) #define NXM_NX_REG2_W NXM_HEADER_W(0x0001, 2, 4) #define NXM_NX_REG3 NXM_HEADER (0x0001, 3, 4) #define NXM_NX_REG3_W NXM_HEADER_W(0x0001, 3, 4) #define NXM_NX_REG4 NXM_HEADER (0x0001, 4, 4) #define NXM_NX_REG4_W NXM_HEADER_W(0x0001, 4, 4) #define NXM_NX_REG5 NXM_HEADER (0x0001, 5, 4) #define NXM_NX_REG5_W NXM_HEADER_W(0x0001, 5, 4) #define NXM_NX_REG6 NXM_HEADER (0x0001, 6, 4) #define NXM_NX_REG6_W NXM_HEADER_W(0x0001, 6, 4) #define NXM_NX_REG7 NXM_HEADER (0x0001, 7, 4) #define NXM_NX_REG7_W NXM_HEADER_W(0x0001, 7, 4) /* Tunnel ID. * * For a packet received via a GRE, VXLAN or LISP tunnel including a (32-bit) * key, the key is stored in the low 32-bits and the high bits are zeroed. For * other packets, the value is 0. * * All zero bits, for packets not received via a keyed tunnel. * * Prereqs: None. * * Format: 64-bit integer in network byte order. * * Masking: Arbitrary masks. */ #define NXM_NX_TUN_ID NXM_HEADER (0x0001, 16, 8) #define NXM_NX_TUN_ID_W NXM_HEADER_W(0x0001, 16, 8) /* For an Ethernet+IP ARP packet, the source or target hardware address * in the ARP header. Always 0 otherwise. * * Prereqs: NXM_OF_ETH_TYPE must match either 0x0806 or 0x8035. * * Format: 48-bit Ethernet MAC address. * * Masking: Not maskable. */ #define NXM_NX_ARP_SHA NXM_HEADER (0x0001, 17, 6) #define NXM_NX_ARP_THA NXM_HEADER (0x0001, 18, 6) /* The source or destination address in the IPv6 header. * * Prereqs: NXM_OF_ETH_TYPE must match 0x86dd exactly. * * Format: 128-bit IPv6 address. * * Masking: Fully maskable, in Open vSwitch 1.8 and later. In previous * versions, only CIDR masks are allowed, that is, masks that consist of N * high-order bits set to 1 and the other 128-N bits set to 0. */ #define NXM_NX_IPV6_SRC NXM_HEADER (0x0001, 19, 16) #define NXM_NX_IPV6_SRC_W NXM_HEADER_W(0x0001, 19, 16) #define NXM_NX_IPV6_DST NXM_HEADER (0x0001, 20, 16) #define NXM_NX_IPV6_DST_W NXM_HEADER_W(0x0001, 20, 16) /* The type or code in the ICMPv6 header. * * Prereqs: * NXM_OF_ETH_TYPE must match 0x86dd exactly. * NXM_OF_IP_PROTO must match 58 exactly. * * Format: 8-bit integer. * * Masking: Not maskable. */ #define NXM_NX_ICMPV6_TYPE NXM_HEADER (0x0001, 21, 1) #define NXM_NX_ICMPV6_CODE NXM_HEADER (0x0001, 22, 1) /* The target address in an IPv6 Neighbor Discovery message. * * Prereqs: * NXM_OF_ETH_TYPE must match 0x86dd exactly. * NXM_OF_IP_PROTO must match 58 exactly. * NXM_OF_ICMPV6_TYPE must be either 135 or 136. * * Format: 128-bit IPv6 address. * * Masking: Fully maskable, in Open vSwitch 1.8 and later. In previous * versions, only CIDR masks are allowed, that is, masks that consist of N * high-order bits set to 1 and the other 128-N bits set to 0. */ #define NXM_NX_ND_TARGET NXM_HEADER (0x0001, 23, 16) #define NXM_NX_ND_TARGET_W NXM_HEADER_W (0x0001, 23, 16) /* The source link-layer address option in an IPv6 Neighbor Discovery * message. * * Prereqs: * NXM_OF_ETH_TYPE must match 0x86dd exactly. * NXM_OF_IP_PROTO must match 58 exactly. * NXM_OF_ICMPV6_TYPE must be exactly 135. * * Format: 48-bit Ethernet MAC address. * * Masking: Not maskable. */ #define NXM_NX_ND_SLL NXM_HEADER (0x0001, 24, 6) /* The target link-layer address option in an IPv6 Neighbor Discovery * message. * * Prereqs: * NXM_OF_ETH_TYPE must match 0x86dd exactly. * NXM_OF_IP_PROTO must match 58 exactly. * NXM_OF_ICMPV6_TYPE must be exactly 136. * * Format: 48-bit Ethernet MAC address. * * Masking: Not maskable. */ #define NXM_NX_ND_TLL NXM_HEADER (0x0001, 25, 6) /* IP fragment information. * * Prereqs: * NXM_OF_ETH_TYPE must be either 0x0800 or 0x86dd. * * Format: 8-bit value with one of the values 0, 1, or 3, as described below. * * Masking: Fully maskable. * * This field has three possible values: * * - A packet that is not an IP fragment has value 0. * * - A packet that is an IP fragment with offset 0 (the first fragment) has * bit 0 set and thus value 1. * * - A packet that is an IP fragment with nonzero offset has bits 0 and 1 set * and thus value 3. * * NX_IP_FRAG_ANY and NX_IP_FRAG_LATER are declared to symbolically represent * the meanings of bits 0 and 1. * * The switch may reject matches against values that can never appear. * * It is important to understand how this field interacts with the OpenFlow IP * fragment handling mode: * * - In OFPC_FRAG_DROP mode, the OpenFlow switch drops all IP fragments * before they reach the flow table, so every packet that is available for * matching will have value 0 in this field. * * - Open vSwitch does not implement OFPC_FRAG_REASM mode, but if it did then * IP fragments would be reassembled before they reached the flow table and * again every packet available for matching would always have value 0. * * - In OFPC_FRAG_NORMAL mode, all three values are possible, but OpenFlow * 1.0 says that fragments' transport ports are always 0, even for the * first fragment, so this does not provide much extra information. * * - In OFPC_FRAG_NX_MATCH mode, all three values are possible. For * fragments with offset 0, Open vSwitch makes L4 header information * available. */ #define NXM_NX_IP_FRAG NXM_HEADER (0x0001, 26, 1) #define NXM_NX_IP_FRAG_W NXM_HEADER_W(0x0001, 26, 1) /* Bits in the value of NXM_NX_IP_FRAG. */ #define NX_IP_FRAG_ANY (1 << 0) /* Is this a fragment? */ #define NX_IP_FRAG_LATER (1 << 1) /* Is this a fragment with nonzero offset? */ /* The flow label in the IPv6 header. * * Prereqs: NXM_OF_ETH_TYPE must match 0x86dd exactly. * * Format: 20-bit IPv6 flow label in least-significant bits. * * Masking: Not maskable. */ #define NXM_NX_IPV6_LABEL NXM_HEADER (0x0001, 27, 4) /* The ECN of the IP header. * * Prereqs: NXM_OF_ETH_TYPE must be either 0x0800 or 0x86dd. * * Format: ECN in the low-order 2 bits. * * Masking: Not maskable. */ #define NXM_NX_IP_ECN NXM_HEADER (0x0001, 28, 1) /* The time-to-live/hop limit of the IP header. * * Prereqs: NXM_OF_ETH_TYPE must be either 0x0800 or 0x86dd. * * Format: 8-bit integer. * * Masking: Not maskable. */ #define NXM_NX_IP_TTL NXM_HEADER (0x0001, 29, 1) /* Flow cookie. * * This may be used to gain the OpenFlow 1.1-like ability to restrict * certain NXM-based Flow Mod and Flow Stats Request messages to flows * with specific cookies. See the "nx_flow_mod" and "nx_flow_stats_request" * structure definitions for more details. This match is otherwise not * allowed. * * Prereqs: None. * * Format: 64-bit integer in network byte order. * * Masking: Arbitrary masks. */ #define NXM_NX_COOKIE NXM_HEADER (0x0001, 30, 8) #define NXM_NX_COOKIE_W NXM_HEADER_W(0x0001, 30, 8) /* The source or destination address in the outer IP header of a tunneled * packet. * * For non-tunneled packets, the value is 0. * * Prereqs: None. * * Format: 32-bit integer in network byte order. * * Masking: Fully maskable. */ #define NXM_NX_TUN_IPV4_SRC NXM_HEADER (0x0001, 31, 4) #define NXM_NX_TUN_IPV4_SRC_W NXM_HEADER_W(0x0001, 31, 4) #define NXM_NX_TUN_IPV4_DST NXM_HEADER (0x0001, 32, 4) #define NXM_NX_TUN_IPV4_DST_W NXM_HEADER_W(0x0001, 32, 4) /* Metadata marked onto the packet in a system-dependent manner. * * The packet mark may be used to carry contextual information * to other parts of the system outside of Open vSwitch. As a * result, the semantics depend on system in use. * * Prereqs: None. * * Format: 32-bit integer in network byte order. * * Masking: Fully maskable. */ #define NXM_NX_PKT_MARK NXM_HEADER (0x0001, 33, 4) #define NXM_NX_PKT_MARK_W NXM_HEADER_W(0x0001, 33, 4) /* ## --------------------- ## */ /* ## Requests and replies. ## */ /* ## --------------------- ## */ enum nx_flow_format { NXFF_OPENFLOW10 = 0, /* Standard OpenFlow 1.0 compatible. */ NXFF_NXM = 2 /* Nicira extended match. */ }; /* NXT_SET_FLOW_FORMAT request. */ struct nx_set_flow_format { ovs_be32 format; /* One of NXFF_*. */ }; OFP_ASSERT(sizeof(struct nx_set_flow_format) == 4); /* NXT_FLOW_MOD (analogous to OFPT_FLOW_MOD). * * It is possible to limit flow deletions and modifications to certain * cookies by using the NXM_NX_COOKIE(_W) matches. The "cookie" field * is used only to add or modify flow cookies. */ struct nx_flow_mod { ovs_be64 cookie; /* Opaque controller-issued identifier. */ ovs_be16 command; /* OFPFC_* + possibly a table ID (see comment * on struct nx_flow_mod_table_id). */ ovs_be16 idle_timeout; /* Idle time before discarding (seconds). */ ovs_be16 hard_timeout; /* Max time before discarding (seconds). */ ovs_be16 priority; /* Priority level of flow entry. */ ovs_be32 buffer_id; /* Buffered packet to apply to (or -1). Not meaningful for OFPFC_DELETE*. */ ovs_be16 out_port; /* For OFPFC_DELETE* commands, require matching entries to include this as an output port. A value of OFPP_NONE indicates no restriction. */ ovs_be16 flags; /* One of OFPFF_*. */ ovs_be16 match_len; /* Size of nx_match. */ uint8_t pad[6]; /* Align to 64-bits. */ /* Followed by: * - Exactly match_len (possibly 0) bytes containing the nx_match, then * - Exactly (match_len + 7)/8*8 - match_len (between 0 and 7) bytes of * all-zero bytes, then * - Actions to fill out the remainder of the message length (always a * multiple of 8). */ }; OFP_ASSERT(sizeof(struct nx_flow_mod) == 32); /* NXT_FLOW_REMOVED (analogous to OFPT_FLOW_REMOVED). * * 'table_id' is present only in Open vSwitch 1.11 and later. In earlier * versions of Open vSwitch, this is a padding byte that is always zeroed. * Therefore, a 'table_id' value of 0 indicates that the table ID is not known, * and other values may be interpreted as one more than the flow's former table * ID. */ struct nx_flow_removed { ovs_be64 cookie; /* Opaque controller-issued identifier. */ ovs_be16 priority; /* Priority level of flow entry. */ uint8_t reason; /* One of OFPRR_*. */ uint8_t table_id; /* Flow's former table ID, plus one. */ ovs_be32 duration_sec; /* Time flow was alive in seconds. */ ovs_be32 duration_nsec; /* Time flow was alive in nanoseconds beyond duration_sec. */ ovs_be16 idle_timeout; /* Idle timeout from original flow mod. */ ovs_be16 match_len; /* Size of nx_match. */ ovs_be64 packet_count; ovs_be64 byte_count; /* Followed by: * - Exactly match_len (possibly 0) bytes containing the nx_match, then * - Exactly (match_len + 7)/8*8 - match_len (between 0 and 7) bytes of * all-zero bytes. */ }; OFP_ASSERT(sizeof(struct nx_flow_removed) == 40); /* Nicira vendor stats request of type NXST_FLOW (analogous to OFPST_FLOW * request). * * It is possible to limit matches to certain cookies by using the * NXM_NX_COOKIE and NXM_NX_COOKIE_W matches. */ struct nx_flow_stats_request { ovs_be16 out_port; /* Require matching entries to include this as an output port. A value of OFPP_NONE indicates no restriction. */ ovs_be16 match_len; /* Length of nx_match. */ uint8_t table_id; /* ID of table to read (from ofp_table_stats) or 0xff for all tables. */ uint8_t pad[3]; /* Align to 64 bits. */ /* Followed by: * - Exactly match_len (possibly 0) bytes containing the nx_match, then * - Exactly (match_len + 7)/8*8 - match_len (between 0 and 7) bytes of * all-zero bytes, which must also exactly fill out the length of the * message. */ }; OFP_ASSERT(sizeof(struct nx_flow_stats_request) == 8); /* Body for Nicira vendor stats reply of type NXST_FLOW (analogous to * OFPST_FLOW reply). * * The values of 'idle_age' and 'hard_age' are only meaningful when talking to * a switch that implements the NXT_FLOW_AGE extension. Zero means that the * true value is unknown, perhaps because hardware does not track the value. * (Zero is also the value that one should ordinarily expect to see talking to * a switch that does not implement NXT_FLOW_AGE, since those switches zero the * padding bytes that these fields replaced.) A nonzero value X represents X-1 * seconds. A value of 65535 represents 65534 or more seconds. * * 'idle_age' is the number of seconds that the flow has been idle, that is, * the number of seconds since a packet passed through the flow. 'hard_age' is * the number of seconds since the flow was last modified (e.g. OFPFC_MODIFY or * OFPFC_MODIFY_STRICT). (The 'duration_*' fields are the elapsed time since * the flow was added, regardless of subsequent modifications.) * * For a flow with an idle or hard timeout, 'idle_age' or 'hard_age', * respectively, will ordinarily be smaller than the timeout, but flow * expiration times are only approximate and so one must be prepared to * tolerate expirations that occur somewhat early or late. */ struct nx_flow_stats { ovs_be16 length; /* Length of this entry. */ uint8_t table_id; /* ID of table flow came from. */ uint8_t pad; ovs_be32 duration_sec; /* Time flow has been alive in seconds. */ ovs_be32 duration_nsec; /* Time flow has been alive in nanoseconds beyond duration_sec. */ ovs_be16 priority; /* Priority of the entry. */ ovs_be16 idle_timeout; /* Number of seconds idle before expiration. */ ovs_be16 hard_timeout; /* Number of seconds before expiration. */ ovs_be16 match_len; /* Length of nx_match. */ ovs_be16 idle_age; /* Seconds since last packet, plus one. */ ovs_be16 hard_age; /* Seconds since last modification, plus one. */ ovs_be64 cookie; /* Opaque controller-issued identifier. */ ovs_be64 packet_count; /* Number of packets, UINT64_MAX if unknown. */ ovs_be64 byte_count; /* Number of bytes, UINT64_MAX if unknown. */ /* Followed by: * - Exactly match_len (possibly 0) bytes containing the nx_match, then * - Exactly (match_len + 7)/8*8 - match_len (between 0 and 7) bytes of * all-zero bytes, then * - Actions to fill out the remainder 'length' bytes (always a multiple * of 8). */ }; OFP_ASSERT(sizeof(struct nx_flow_stats) == 48); /* Nicira vendor stats request of type NXST_AGGREGATE (analogous to * OFPST_AGGREGATE request). * * The reply format is identical to the reply format for OFPST_AGGREGATE, * except for the header. */ struct nx_aggregate_stats_request { ovs_be16 out_port; /* Require matching entries to include this as an output port. A value of OFPP_NONE indicates no restriction. */ ovs_be16 match_len; /* Length of nx_match. */ uint8_t table_id; /* ID of table to read (from ofp_table_stats) or 0xff for all tables. */ uint8_t pad[3]; /* Align to 64 bits. */ /* Followed by: * - Exactly match_len (possibly 0) bytes containing the nx_match, then * - Exactly (match_len + 7)/8*8 - match_len (between 0 and 7) bytes of * all-zero bytes, which must also exactly fill out the length of the * message. */ }; OFP_ASSERT(sizeof(struct nx_aggregate_stats_request) == 8); /* NXT_SET_CONTROLLER_ID. * * Each OpenFlow controller connection has a 16-bit identifier that is * initially 0. This message changes the connection's ID to 'id'. * * Controller connection IDs need not be unique. * * The NXAST_CONTROLLER action is the only current user of controller * connection IDs. */ struct nx_controller_id { uint8_t zero[6]; /* Must be zero. */ ovs_be16 controller_id; /* New controller connection ID. */ }; OFP_ASSERT(sizeof(struct nx_controller_id) == 8); /* Action structure for NXAST_CONTROLLER. * * This generalizes using OFPAT_OUTPUT to send a packet to OFPP_CONTROLLER. In * addition to the 'max_len' that OFPAT_OUTPUT supports, it also allows * specifying: * * - 'reason': The reason code to use in the ofp_packet_in or nx_packet_in. * * - 'controller_id': The ID of the controller connection to which the * ofp_packet_in should be sent. The ofp_packet_in or nx_packet_in is * sent only to controllers that have the specified controller connection * ID. See "struct nx_controller_id" for more information. */ struct nx_action_controller { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length is 16. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_CONTROLLER. */ ovs_be16 max_len; /* Maximum length to send to controller. */ ovs_be16 controller_id; /* Controller ID to send packet-in. */ uint8_t reason; /* enum ofp_packet_in_reason (OFPR_*). */ uint8_t zero; /* Must be zero. */ }; OFP_ASSERT(sizeof(struct nx_action_controller) == 16); /* Flow Table Monitoring * ===================== * * NXST_FLOW_MONITOR allows a controller to keep track of changes to OpenFlow * flow table(s) or subsets of them, with the following workflow: * * 1. The controller sends an NXST_FLOW_MONITOR request to begin monitoring * flows. The 'id' in the request must be unique among all monitors that * the controller has started and not yet canceled on this OpenFlow * connection. * * 2. The switch responds with an NXST_FLOW_MONITOR reply. If the request's * 'flags' included NXFMF_INITIAL, the reply includes all the flows that * matched the request at the time of the request (with event NXFME_ADDED). * If 'flags' did not include NXFMF_INITIAL, the reply is empty. * * The reply uses the xid of the request (as do all replies to OpenFlow * requests). * * 3. Whenever a change to a flow table entry matches some outstanding monitor * request's criteria and flags, the switch sends a notification to the * controller as an additional NXST_FLOW_MONITOR reply with xid 0. * * When multiple outstanding monitors match a single change, only a single * notification is sent. This merged notification includes the information * requested in any of the individual monitors. That is, if any of the * matching monitors requests actions (NXFMF_ACTIONS), the notification * includes actions, and if any of the monitors request full changes for the * controller's own changes (NXFMF_OWN), the controller's own changes will * be included in full. * * 4. The controller may cancel a monitor with NXT_FLOW_MONITOR_CANCEL. No * further notifications will be sent on the basis of the canceled monitor * afterward. * * * Buffer Management * ================= * * OpenFlow messages for flow monitor notifications can overflow the buffer * space available to the switch, either temporarily (e.g. due to network * conditions slowing OpenFlow traffic) or more permanently (e.g. the sustained * rate of flow table change exceeds the network bandwidth between switch and * controller). * * When Open vSwitch's notification buffer space reaches a limiting threshold, * OVS reacts as follows: * * 1. OVS sends an NXT_FLOW_MONITOR_PAUSED message to the controller, following * all the already queued notifications. After it receives this message, * the controller knows that its view of the flow table, as represented by * flow monitor notifications, is incomplete. * * 2. As long as the notification buffer is not empty: * * - NXMFE_ADD and NXFME_MODIFIED notifications will not be sent. * * - NXFME_DELETED notifications will still be sent, but only for flows * that existed before OVS sent NXT_FLOW_MONITOR_PAUSED. * * - NXFME_ABBREV notifications will not be sent. They are treated as * the expanded version (and therefore only the NXFME_DELETED * components, if any, are sent). * * 3. When the notification buffer empties, OVS sends NXFME_ADD notifications * for flows added since the buffer reached its limit and NXFME_MODIFIED * notifications for flows that existed before the limit was reached and * changed after the limit was reached. * * 4. OVS sends an NXT_FLOW_MONITOR_RESUMED message to the controller. After * it receives this message, the controller knows that its view of the flow * table, as represented by flow monitor notifications, is again complete. * * This allows the maximum buffer space requirement for notifications to be * bounded by the limit plus the maximum number of supported flows. * * * "Flow Removed" messages * ======================= * * The flow monitor mechanism is independent of OFPT_FLOW_REMOVED and * NXT_FLOW_REMOVED. Flow monitor updates for deletion are sent if * NXFMF_DELETE is set on a monitor, regardless of whether the * OFPFF_SEND_FLOW_REM flag was set when the flow was added. */ /* NXST_FLOW_MONITOR request. * * The NXST_FLOW_MONITOR request's body consists of an array of zero or more * instances of this structure. The request arranges to monitor the flows * that match the specified criteria, which are interpreted in the same way as * for NXST_FLOW. * * 'id' identifies a particular monitor for the purpose of allowing it to be * canceled later with NXT_FLOW_MONITOR_CANCEL. 'id' must be unique among * existing monitors that have not already been canceled. * * The reply includes the initial flow matches for monitors that have the * NXFMF_INITIAL flag set. No single flow will be included in the reply more * than once, even if more than one requested monitor matches that flow. The * reply will be empty if none of the monitors has NXFMF_INITIAL set or if none * of the monitors initially matches any flows. * * For NXFMF_ADD, an event will be reported if 'out_port' matches against the * actions of the flow being added or, for a flow that is replacing an existing * flow, if 'out_port' matches against the actions of the flow being replaced. * For NXFMF_DELETE, 'out_port' matches against the actions of a flow being * deleted. For NXFMF_MODIFY, an event will be reported if 'out_port' matches * either the old or the new actions. */ struct nx_flow_monitor_request { ovs_be32 id; /* Controller-assigned ID for this monitor. */ ovs_be16 flags; /* NXFMF_*. */ ovs_be16 out_port; /* Required output port, if not OFPP_NONE. */ ovs_be16 match_len; /* Length of nx_match. */ uint8_t table_id; /* One table's ID or 0xff for all tables. */ uint8_t zeros[5]; /* Align to 64 bits (must be zero). */ /* Followed by: * - Exactly match_len (possibly 0) bytes containing the nx_match, then * - Exactly (match_len + 7)/8*8 - match_len (between 0 and 7) bytes of * all-zero bytes. */ }; OFP_ASSERT(sizeof(struct nx_flow_monitor_request) == 16); /* 'flags' bits in struct nx_flow_monitor_request. */ enum nx_flow_monitor_flags { /* When to send updates. */ NXFMF_INITIAL = 1 << 0, /* Initially matching flows. */ NXFMF_ADD = 1 << 1, /* New matching flows as they are added. */ NXFMF_DELETE = 1 << 2, /* Old matching flows as they are removed. */ NXFMF_MODIFY = 1 << 3, /* Matching flows as they are changed. */ /* What to include in updates. */ NXFMF_ACTIONS = 1 << 4, /* If set, actions are included. */ NXFMF_OWN = 1 << 5, /* If set, include own changes in full. */ }; /* NXST_FLOW_MONITOR reply header. * * The body of an NXST_FLOW_MONITOR reply is an array of variable-length * structures, each of which begins with this header. The 'length' member may * be used to traverse the array, and the 'event' member may be used to * determine the particular structure. * * Every instance is a multiple of 8 bytes long. */ struct nx_flow_update_header { ovs_be16 length; /* Length of this entry. */ ovs_be16 event; /* One of NXFME_*. */ /* ...other data depending on 'event'... */ }; OFP_ASSERT(sizeof(struct nx_flow_update_header) == 4); /* 'event' values in struct nx_flow_update_header. */ enum nx_flow_update_event { /* struct nx_flow_update_full. */ NXFME_ADDED = 0, /* Flow was added. */ NXFME_DELETED = 1, /* Flow was deleted. */ NXFME_MODIFIED = 2, /* Flow (generally its actions) was changed. */ /* struct nx_flow_update_abbrev. */ NXFME_ABBREV = 3, /* Abbreviated reply. */ }; /* NXST_FLOW_MONITOR reply for NXFME_ADDED, NXFME_DELETED, and * NXFME_MODIFIED. */ struct nx_flow_update_full { ovs_be16 length; /* Length is 24. */ ovs_be16 event; /* One of NXFME_*. */ ovs_be16 reason; /* OFPRR_* for NXFME_DELETED, else zero. */ ovs_be16 priority; /* Priority of the entry. */ ovs_be16 idle_timeout; /* Number of seconds idle before expiration. */ ovs_be16 hard_timeout; /* Number of seconds before expiration. */ ovs_be16 match_len; /* Length of nx_match. */ uint8_t table_id; /* ID of flow's table. */ uint8_t pad; /* Reserved, currently zeroed. */ ovs_be64 cookie; /* Opaque controller-issued identifier. */ /* Followed by: * - Exactly match_len (possibly 0) bytes containing the nx_match, then * - Exactly (match_len + 7)/8*8 - match_len (between 0 and 7) bytes of * all-zero bytes, then * - Actions to fill out the remainder 'length' bytes (always a multiple * of 8). If NXFMF_ACTIONS was not specified, or 'event' is * NXFME_DELETED, no actions are included. */ }; OFP_ASSERT(sizeof(struct nx_flow_update_full) == 24); /* NXST_FLOW_MONITOR reply for NXFME_ABBREV. * * When the controller does not specify NXFMF_OWN in a monitor request, any * flow tables changes due to the controller's own requests (on the same * OpenFlow channel) will be abbreviated, when possible, to this form, which * simply specifies the 'xid' of the OpenFlow request (e.g. an OFPT_FLOW_MOD or * NXT_FLOW_MOD) that caused the change. * * Some changes cannot be abbreviated and will be sent in full: * * - Changes that only partially succeed. This can happen if, for example, * a flow_mod with type OFPFC_MODIFY affects multiple flows, but only some * of those modifications succeed (e.g. due to hardware limitations). * * This cannot occur with the current implementation of the Open vSwitch * software datapath. It could happen with other datapath implementations. * * - Changes that race with conflicting changes made by other controllers or * other flow_mods (not separated by barriers) by the same controller. * * This cannot occur with the current Open vSwitch implementation * (regardless of datapath) because Open vSwitch internally serializes * potentially conflicting changes. * * A flow_mod that does not change the flow table will not trigger any * notification, even an abbreviated one. For example, a "modify" or "delete" * flow_mod that does not match any flows will not trigger a notification. * Whether an "add" or "modify" that specifies all the same parameters that a * flow already has triggers a notification is unspecified and subject to * change in future versions of Open vSwitch. * * OVS will always send the notifications for a given flow table change before * the reply to a OFPT_BARRIER_REQUEST request that follows the flow table * change. Thus, if the controller does not receive an abbreviated (or * unabbreviated) notification for a flow_mod before the next * OFPT_BARRIER_REPLY, it will never receive one. */ struct nx_flow_update_abbrev { ovs_be16 length; /* Length is 8. */ ovs_be16 event; /* NXFME_ABBREV. */ ovs_be32 xid; /* Controller-specified xid from flow_mod. */ }; OFP_ASSERT(sizeof(struct nx_flow_update_abbrev) == 8); /* NXT_FLOW_MONITOR_CANCEL. * * Used by a controller to cancel an outstanding monitor. */ struct nx_flow_monitor_cancel { ovs_be32 id; /* 'id' from nx_flow_monitor_request. */ }; OFP_ASSERT(sizeof(struct nx_flow_monitor_cancel) == 4); /* Action structure for NXAST_WRITE_METADATA. * * Modifies the 'mask' bits of the metadata value. */ struct nx_action_write_metadata { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length is 32. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_WRITE_METADATA. */ uint8_t zeros[6]; /* Must be zero. */ ovs_be64 metadata; /* Metadata register. */ ovs_be64 mask; /* Metadata mask. */ }; OFP_ASSERT(sizeof(struct nx_action_write_metadata) == 32); /* Action structure for NXAST_PUSH_MPLS. */ struct nx_action_push_mpls { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length is 8. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_PUSH_MPLS. */ ovs_be16 ethertype; /* Ethertype */ uint8_t pad[4]; }; OFP_ASSERT(sizeof(struct nx_action_push_mpls) == 16); /* Action structure for NXAST_POP_MPLS. */ struct nx_action_pop_mpls { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length is 8. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_POP_MPLS. */ ovs_be16 ethertype; /* Ethertype */ uint8_t pad[4]; }; OFP_ASSERT(sizeof(struct nx_action_pop_mpls) == 16); /* Action structure for NXAST_SET_MPLS_TTL. */ struct nx_action_mpls_ttl { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length is 8. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_SET_MPLS_TTL. */ uint8_t ttl; /* TTL */ uint8_t pad[5]; }; OFP_ASSERT(sizeof(struct nx_action_mpls_ttl) == 16); /* Action structure for NXAST_SAMPLE. * * Samples matching packets with the given probability and sends them * each to the set of collectors identified with the given ID. The * probability is expressed as a number of packets to be sampled out * of USHRT_MAX packets, and must be >0. * * When sending packet samples to IPFIX collectors, the IPFIX flow * record sent for each sampled packet is associated with the given * observation domain ID and observation point ID. Each IPFIX flow * record contain the sampled packet's headers when executing this * rule. If a sampled packet's headers are modified by previous * actions in the flow, those modified headers are sent. */ struct nx_action_sample { ovs_be16 type; /* OFPAT_VENDOR. */ ovs_be16 len; /* Length is 24. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_SAMPLE. */ ovs_be16 probability; /* Fraction of packets to sample. */ ovs_be32 collector_set_id; /* ID of collector set in OVSDB. */ ovs_be32 obs_domain_id; /* ID of sampling observation domain. */ ovs_be32 obs_point_id; /* ID of sampling observation point. */ }; OFP_ASSERT(sizeof(struct nx_action_sample) == 24); #endif /* openflow/nicira-ext.h */ openvswitch-2.0.1+git20140120/include/openflow/openflow-1.0.h000066400000000000000000000520701226605124000232300ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* OpenFlow: protocol between controller and datapath. */ #ifndef OPENFLOW_OPENFLOW10_H #define OPENFLOW_OPENFLOW10_H 1 #include "openflow/openflow-common.h" /* Port number(s) meaning * --------------- -------------------------------------- * 0x0000 not assigned a meaning by OpenFlow 1.0 * 0x0001...0xfeff "physical" ports * 0xff00...0xfff7 "reserved" but not assigned a meaning by OpenFlow 1.0 * 0xfff8...0xffff "reserved" OFPP_* ports with assigned meanings */ /* Ranges. */ #define OFPP_MAX OFP_PORT_C(0xff00) /* Max # of switch ports. */ #define OFPP_FIRST_RESV OFP_PORT_C(0xfff8) /* First assigned reserved port. */ #define OFPP_LAST_RESV OFP_PORT_C(0xffff) /* Last assigned reserved port. */ /* Reserved output "ports". */ #define OFPP_IN_PORT OFP_PORT_C(0xfff8) /* Where the packet came in. */ #define OFPP_TABLE OFP_PORT_C(0xfff9) /* Perform actions in flow table. */ #define OFPP_NORMAL OFP_PORT_C(0xfffa) /* Process with normal L2/L3. */ #define OFPP_FLOOD OFP_PORT_C(0xfffb) /* All ports except input port and * ports disabled by STP. */ #define OFPP_ALL OFP_PORT_C(0xfffc) /* All ports except input port. */ #define OFPP_CONTROLLER OFP_PORT_C(0xfffd) /* Send to controller. */ #define OFPP_LOCAL OFP_PORT_C(0xfffe) /* Local openflow "port". */ #define OFPP_NONE OFP_PORT_C(0xffff) /* Not associated with any port. */ /* OpenFlow 1.0 specific capabilities supported by the datapath (struct * ofp_switch_features, member capabilities). */ enum ofp10_capabilities { OFPC10_STP = 1 << 3, /* 802.1d spanning tree. */ OFPC10_RESERVED = 1 << 4, /* Reserved, must not be set. */ }; /* OpenFlow 1.0 specific flags to indicate behavior of the physical port. * These flags are used in ofp10_phy_port to describe the current * configuration. They are used in the ofp10_port_mod message to configure the * port's behavior. */ enum ofp10_port_config { OFPPC10_NO_STP = 1 << 1, /* Disable 802.1D spanning tree on port. */ OFPPC10_NO_RECV_STP = 1 << 3, /* Drop received 802.1D STP packets. */ OFPPC10_NO_FLOOD = 1 << 4, /* Do not include port when flooding. */ #define OFPPC10_ALL (OFPPC_PORT_DOWN | OFPPC10_NO_STP | OFPPC_NO_RECV | \ OFPPC10_NO_RECV_STP | OFPPC10_NO_FLOOD | OFPPC_NO_FWD | \ OFPPC_NO_PACKET_IN) }; /* OpenFlow 1.0 specific current state of the physical port. These are not * configurable from the controller. */ enum ofp10_port_state { /* The OFPPS10_STP_* bits have no effect on switch operation. The * controller must adjust OFPPC_NO_RECV, OFPPC_NO_FWD, and * OFPPC_NO_PACKET_IN appropriately to fully implement an 802.1D spanning * tree. */ OFPPS10_STP_LISTEN = 0 << 8, /* Not learning or relaying frames. */ OFPPS10_STP_LEARN = 1 << 8, /* Learning but not relaying frames. */ OFPPS10_STP_FORWARD = 2 << 8, /* Learning and relaying frames. */ OFPPS10_STP_BLOCK = 3 << 8, /* Not part of spanning tree. */ OFPPS10_STP_MASK = 3 << 8 /* Bit mask for OFPPS10_STP_* values. */ #define OFPPS10_ALL (OFPPS_LINK_DOWN | OFPPS10_STP_MASK) }; /* OpenFlow 1.0 specific features of physical ports available in a datapath. */ enum ofp10_port_features { OFPPF10_COPPER = 1 << 7, /* Copper medium. */ OFPPF10_FIBER = 1 << 8, /* Fiber medium. */ OFPPF10_AUTONEG = 1 << 9, /* Auto-negotiation. */ OFPPF10_PAUSE = 1 << 10, /* Pause. */ OFPPF10_PAUSE_ASYM = 1 << 11 /* Asymmetric pause. */ }; /* Description of a physical port */ struct ofp10_phy_port { ovs_be16 port_no; uint8_t hw_addr[OFP_ETH_ALEN]; char name[OFP_MAX_PORT_NAME_LEN]; /* Null-terminated */ ovs_be32 config; /* Bitmap of OFPPC_* and OFPPC10_* flags. */ ovs_be32 state; /* Bitmap of OFPPS_* and OFPPS10_* flags. */ /* Bitmaps of OFPPF_* and OFPPF10_* that describe features. All bits * zeroed if unsupported or unavailable. */ ovs_be32 curr; /* Current features. */ ovs_be32 advertised; /* Features being advertised by the port. */ ovs_be32 supported; /* Features supported by the port. */ ovs_be32 peer; /* Features advertised by peer. */ }; OFP_ASSERT(sizeof(struct ofp10_phy_port) == 48); /* Modify behavior of the physical port */ struct ofp10_port_mod { ovs_be16 port_no; uint8_t hw_addr[OFP_ETH_ALEN]; /* The hardware address is not configurable. This is used to sanity-check the request, so it must be the same as returned in an ofp10_phy_port struct. */ ovs_be32 config; /* Bitmap of OFPPC_* flags. */ ovs_be32 mask; /* Bitmap of OFPPC_* flags to be changed. */ ovs_be32 advertise; /* Bitmap of "ofp_port_features"s. Zero all bits to prevent any action taking place. */ uint8_t pad[4]; /* Pad to 64-bits. */ }; OFP_ASSERT(sizeof(struct ofp10_port_mod) == 24); /* Query for port queue configuration. */ struct ofp10_queue_get_config_request { ovs_be16 port; /* Port to be queried. Should refer to a valid physical port (i.e. < OFPP_MAX) */ uint8_t pad[2]; /* 32-bit alignment. */ }; OFP_ASSERT(sizeof(struct ofp10_queue_get_config_request) == 4); /* Queue configuration for a given port. */ struct ofp10_queue_get_config_reply { ovs_be16 port; uint8_t pad[6]; /* struct ofp10_packet_queue queues[0]; List of configured queues. */ }; OFP_ASSERT(sizeof(struct ofp10_queue_get_config_reply) == 8); /* Packet received on port (datapath -> controller). */ struct ofp10_packet_in { ovs_be32 buffer_id; /* ID assigned by datapath. */ ovs_be16 total_len; /* Full length of frame. */ ovs_be16 in_port; /* Port on which frame was received. */ uint8_t reason; /* Reason packet is being sent (one of OFPR_*) */ uint8_t pad; uint8_t data[0]; /* Ethernet frame, halfway through 32-bit word, so the IP header is 32-bit aligned. The amount of data is inferred from the length field in the header. Because of padding, offsetof(struct ofp_packet_in, data) == sizeof(struct ofp_packet_in) - 2. */ }; OFP_ASSERT(sizeof(struct ofp10_packet_in) == 12); enum ofp10_action_type { OFPAT10_OUTPUT, /* Output to switch port. */ OFPAT10_SET_VLAN_VID, /* Set the 802.1q VLAN id. */ OFPAT10_SET_VLAN_PCP, /* Set the 802.1q priority. */ OFPAT10_STRIP_VLAN, /* Strip the 802.1q header. */ OFPAT10_SET_DL_SRC, /* Ethernet source address. */ OFPAT10_SET_DL_DST, /* Ethernet destination address. */ OFPAT10_SET_NW_SRC, /* IP source address. */ OFPAT10_SET_NW_DST, /* IP destination address. */ OFPAT10_SET_NW_TOS, /* IP ToS (DSCP field, 6 bits). */ OFPAT10_SET_TP_SRC, /* TCP/UDP source port. */ OFPAT10_SET_TP_DST, /* TCP/UDP destination port. */ OFPAT10_ENQUEUE, /* Output to queue. */ OFPAT10_VENDOR = 0xffff }; /* Action structure for OFPAT10_OUTPUT, which sends packets out 'port'. * When the 'port' is the OFPP_CONTROLLER, 'max_len' indicates the max * number of bytes to send. A 'max_len' of zero means no bytes of the * packet should be sent. */ struct ofp10_action_output { ovs_be16 type; /* OFPAT10_OUTPUT. */ ovs_be16 len; /* Length is 8. */ ovs_be16 port; /* Output port. */ ovs_be16 max_len; /* Max length to send to controller. */ }; OFP_ASSERT(sizeof(struct ofp10_action_output) == 8); /* OFPAT10_ENQUEUE action struct: send packets to given queue on port. */ struct ofp10_action_enqueue { ovs_be16 type; /* OFPAT10_ENQUEUE. */ ovs_be16 len; /* Len is 16. */ ovs_be16 port; /* Port that queue belongs. Should refer to a valid physical port (i.e. < OFPP_MAX) or OFPP_IN_PORT. */ uint8_t pad[6]; /* Pad for 64-bit alignment. */ ovs_be32 queue_id; /* Where to enqueue the packets. */ }; OFP_ASSERT(sizeof(struct ofp10_action_enqueue) == 16); union ofp_action { ovs_be16 type; struct ofp_action_header header; struct ofp_action_vendor_header vendor; struct ofp10_action_output output10; struct ofp_action_vlan_vid vlan_vid; struct ofp_action_vlan_pcp vlan_pcp; struct ofp_action_nw_addr nw_addr; struct ofp_action_nw_tos nw_tos; struct ofp_action_tp_port tp_port; }; OFP_ASSERT(sizeof(union ofp_action) == 8); /* Send packet (controller -> datapath). */ struct ofp10_packet_out { ovs_be32 buffer_id; /* ID assigned by datapath or UINT32_MAX. */ ovs_be16 in_port; /* Packet's input port (OFPP_NONE if none). */ ovs_be16 actions_len; /* Size of action array in bytes. */ /* Followed by: * - Exactly 'actions_len' bytes (possibly 0 bytes, and always a multiple * of 8) containing actions. * - If 'buffer_id' == UINT32_MAX, packet data to fill out the remainder * of the message length. */ }; OFP_ASSERT(sizeof(struct ofp10_packet_out) == 8); /* Flow wildcards. */ enum ofp10_flow_wildcards { OFPFW10_IN_PORT = 1 << 0, /* Switch input port. */ OFPFW10_DL_VLAN = 1 << 1, /* VLAN vid. */ OFPFW10_DL_SRC = 1 << 2, /* Ethernet source address. */ OFPFW10_DL_DST = 1 << 3, /* Ethernet destination address. */ OFPFW10_DL_TYPE = 1 << 4, /* Ethernet frame type. */ OFPFW10_NW_PROTO = 1 << 5, /* IP protocol. */ OFPFW10_TP_SRC = 1 << 6, /* TCP/UDP source port. */ OFPFW10_TP_DST = 1 << 7, /* TCP/UDP destination port. */ /* IP source address wildcard bit count. 0 is exact match, 1 ignores the * LSB, 2 ignores the 2 least-significant bits, ..., 32 and higher wildcard * the entire field. This is the *opposite* of the usual convention where * e.g. /24 indicates that 8 bits (not 24 bits) are wildcarded. */ OFPFW10_NW_SRC_SHIFT = 8, OFPFW10_NW_SRC_BITS = 6, OFPFW10_NW_SRC_MASK = (((1 << OFPFW10_NW_SRC_BITS) - 1) << OFPFW10_NW_SRC_SHIFT), OFPFW10_NW_SRC_ALL = 32 << OFPFW10_NW_SRC_SHIFT, /* IP destination address wildcard bit count. Same format as source. */ OFPFW10_NW_DST_SHIFT = 14, OFPFW10_NW_DST_BITS = 6, OFPFW10_NW_DST_MASK = (((1 << OFPFW10_NW_DST_BITS) - 1) << OFPFW10_NW_DST_SHIFT), OFPFW10_NW_DST_ALL = 32 << OFPFW10_NW_DST_SHIFT, OFPFW10_DL_VLAN_PCP = 1 << 20, /* VLAN priority. */ OFPFW10_NW_TOS = 1 << 21, /* IP ToS (DSCP field, 6 bits). */ /* Wildcard all fields. */ OFPFW10_ALL = ((1 << 22) - 1) }; /* The wildcards for ICMP type and code fields use the transport source * and destination port fields, respectively. */ #define OFPFW10_ICMP_TYPE OFPFW10_TP_SRC #define OFPFW10_ICMP_CODE OFPFW10_TP_DST /* The VLAN id is 12-bits, so we can use the entire 16 bits to indicate * special conditions. All ones indicates that 802.1Q header is not present. */ #define OFP10_VLAN_NONE 0xffff /* Fields to match against flows */ struct ofp10_match { ovs_be32 wildcards; /* Wildcard fields. */ ovs_be16 in_port; /* Input switch port. */ uint8_t dl_src[OFP_ETH_ALEN]; /* Ethernet source address. */ uint8_t dl_dst[OFP_ETH_ALEN]; /* Ethernet destination address. */ ovs_be16 dl_vlan; /* Input VLAN. */ uint8_t dl_vlan_pcp; /* Input VLAN priority. */ uint8_t pad1[1]; /* Align to 64-bits. */ ovs_be16 dl_type; /* Ethernet frame type. */ uint8_t nw_tos; /* IP ToS (DSCP field, 6 bits). */ uint8_t nw_proto; /* IP protocol or lower 8 bits of ARP opcode. */ uint8_t pad2[2]; /* Align to 64-bits. */ ovs_be32 nw_src; /* IP source address. */ ovs_be32 nw_dst; /* IP destination address. */ ovs_be16 tp_src; /* TCP/UDP source port. */ ovs_be16 tp_dst; /* TCP/UDP destination port. */ }; OFP_ASSERT(sizeof(struct ofp10_match) == 40); enum ofp10_flow_mod_flags { OFPFF10_EMERG = 1 << 2 /* Part of "emergency flow cache". */ }; /* Flow setup and teardown (controller -> datapath). */ struct ofp10_flow_mod { struct ofp10_match match; /* Fields to match */ ovs_be64 cookie; /* Opaque controller-issued identifier. */ /* Flow actions. */ ovs_be16 command; /* One of OFPFC_*. */ ovs_be16 idle_timeout; /* Idle time before discarding (seconds). */ ovs_be16 hard_timeout; /* Max time before discarding (seconds). */ ovs_be16 priority; /* Priority level of flow entry. */ ovs_be32 buffer_id; /* Buffered packet to apply to (or -1). Not meaningful for OFPFC_DELETE*. */ ovs_be16 out_port; /* For OFPFC_DELETE* commands, require matching entries to include this as an output port. A value of OFPP_NONE indicates no restriction. */ ovs_be16 flags; /* One of OFPFF_*. */ struct ofp_action_header actions[0]; /* The action length is inferred from the length field in the header. */ }; OFP_ASSERT(sizeof(struct ofp10_flow_mod) == 64); /* Flow removed (datapath -> controller). */ struct ofp10_flow_removed { struct ofp10_match match; /* Description of fields. */ ovs_be64 cookie; /* Opaque controller-issued identifier. */ ovs_be16 priority; /* Priority level of flow entry. */ uint8_t reason; /* One of OFPRR_*. */ uint8_t pad[1]; /* Align to 32-bits. */ ovs_be32 duration_sec; /* Time flow was alive in seconds. */ ovs_be32 duration_nsec; /* Time flow was alive in nanoseconds beyond duration_sec. */ ovs_be16 idle_timeout; /* Idle timeout from original flow mod. */ uint8_t pad2[2]; /* Align to 64-bits. */ ovs_be64 packet_count; ovs_be64 byte_count; }; OFP_ASSERT(sizeof(struct ofp10_flow_removed) == 80); /* Statistics request or reply message. */ struct ofp10_stats_msg { struct ofp_header header; ovs_be16 type; /* One of the OFPST_* constants. */ ovs_be16 flags; /* Requests: always 0. * Replies: 0 or OFPSF_REPLY_MORE. */ }; OFP_ASSERT(sizeof(struct ofp10_stats_msg) == 12); /* Stats request of type OFPST_AGGREGATE or OFPST_FLOW. */ struct ofp10_flow_stats_request { struct ofp10_match match; /* Fields to match. */ uint8_t table_id; /* ID of table to read (from ofp_table_stats) or 0xff for all tables. */ uint8_t pad; /* Align to 32 bits. */ ovs_be16 out_port; /* Require matching entries to include this as an output port. A value of OFPP_NONE indicates no restriction. */ }; OFP_ASSERT(sizeof(struct ofp10_flow_stats_request) == 44); /* Body of reply to OFPST_FLOW request. */ struct ofp10_flow_stats { ovs_be16 length; /* Length of this entry. */ uint8_t table_id; /* ID of table flow came from. */ uint8_t pad; struct ofp10_match match; /* Description of fields. */ ovs_be32 duration_sec; /* Time flow has been alive in seconds. */ ovs_be32 duration_nsec; /* Time flow has been alive in nanoseconds beyond duration_sec. */ ovs_be16 priority; /* Priority of the entry. Only meaningful when this is not an exact-match entry. */ ovs_be16 idle_timeout; /* Number of seconds idle before expiration. */ ovs_be16 hard_timeout; /* Number of seconds before expiration. */ uint8_t pad2[6]; /* Align to 64 bits. */ ovs_32aligned_be64 cookie; /* Opaque controller-issued identifier. */ ovs_32aligned_be64 packet_count; /* Number of packets in flow. */ ovs_32aligned_be64 byte_count; /* Number of bytes in flow. */ struct ofp_action_header actions[0]; /* Actions. */ }; OFP_ASSERT(sizeof(struct ofp10_flow_stats) == 88); /* Body of reply to OFPST_TABLE request. */ struct ofp10_table_stats { uint8_t table_id; /* Identifier of table. Lower numbered tables are consulted first. */ uint8_t pad[3]; /* Align to 32-bits. */ char name[OFP_MAX_TABLE_NAME_LEN]; ovs_be32 wildcards; /* Bitmap of OFPFW10_* wildcards that are supported by the table. */ ovs_be32 max_entries; /* Max number of entries supported. */ ovs_be32 active_count; /* Number of active entries. */ ovs_32aligned_be64 lookup_count; /* # of packets looked up in table. */ ovs_32aligned_be64 matched_count; /* Number of packets that hit table. */ }; OFP_ASSERT(sizeof(struct ofp10_table_stats) == 64); /* Stats request of type OFPST_PORT. */ struct ofp10_port_stats_request { ovs_be16 port_no; /* OFPST_PORT message may request statistics for a single port (specified with port_no) or for all ports (port_no == OFPP_NONE). */ uint8_t pad[6]; }; OFP_ASSERT(sizeof(struct ofp10_port_stats_request) == 8); /* Body of reply to OFPST_PORT request. If a counter is unsupported, set * the field to all ones. */ struct ofp10_port_stats { ovs_be16 port_no; uint8_t pad[6]; /* Align to 64-bits. */ ovs_32aligned_be64 rx_packets; /* Number of received packets. */ ovs_32aligned_be64 tx_packets; /* Number of transmitted packets. */ ovs_32aligned_be64 rx_bytes; /* Number of received bytes. */ ovs_32aligned_be64 tx_bytes; /* Number of transmitted bytes. */ ovs_32aligned_be64 rx_dropped; /* Number of packets dropped by RX. */ ovs_32aligned_be64 tx_dropped; /* Number of packets dropped by TX. */ ovs_32aligned_be64 rx_errors; /* Number of receive errors. This is a super-set of receive errors and should be great than or equal to the sum of all rx_*_err values. */ ovs_32aligned_be64 tx_errors; /* Number of transmit errors. This is a super-set of transmit errors. */ ovs_32aligned_be64 rx_frame_err; /* Number of frame alignment errors. */ ovs_32aligned_be64 rx_over_err; /* Number of packets with RX overrun. */ ovs_32aligned_be64 rx_crc_err; /* Number of CRC errors. */ ovs_32aligned_be64 collisions; /* Number of collisions. */ }; OFP_ASSERT(sizeof(struct ofp10_port_stats) == 104); /* All ones is used to indicate all queues in a port (for stats retrieval). */ #define OFPQ_ALL 0xffffffff /* Body for stats request of type OFPST_QUEUE. */ struct ofp10_queue_stats_request { ovs_be16 port_no; /* All ports if OFPP_ALL. */ uint8_t pad[2]; /* Align to 32-bits. */ ovs_be32 queue_id; /* All queues if OFPQ_ALL. */ }; OFP_ASSERT(sizeof(struct ofp10_queue_stats_request) == 8); /* Body for stats reply of type OFPST_QUEUE consists of an array of this * structure type. */ struct ofp10_queue_stats { ovs_be16 port_no; uint8_t pad[2]; /* Align to 32-bits. */ ovs_be32 queue_id; /* Queue id. */ ovs_32aligned_be64 tx_bytes; /* Number of transmitted bytes. */ ovs_32aligned_be64 tx_packets; /* Number of transmitted packets. */ ovs_32aligned_be64 tx_errors; /* # of packets dropped due to overrun. */ }; OFP_ASSERT(sizeof(struct ofp10_queue_stats) == 32); /* Vendor extension stats message. */ struct ofp10_vendor_stats_msg { struct ofp10_stats_msg osm; /* Type OFPST_VENDOR. */ ovs_be32 vendor; /* Vendor ID: * - MSB 0: low-order bytes are IEEE OUI. * - MSB != 0: defined by OpenFlow * consortium. */ /* Followed by vendor-defined arbitrary additional data. */ }; OFP_ASSERT(sizeof(struct ofp10_vendor_stats_msg) == 16); #endif /* openflow/openflow-1.0.h */ openvswitch-2.0.1+git20140120/include/openflow/openflow-1.1.h000066400000000000000000001064121226605124000232310ustar00rootroot00000000000000/* Copyright (c) 2008, 2011, 2012 The Board of Trustees of The Leland Stanford * Junior University * * We are making the OpenFlow specification and associated documentation * (Software) available for public use and benefit with the expectation * that others will use, modify and enhance the Software and contribute * those enhancements back to the community. However, since we would * like to make the Software available for broadest use, with as few * restrictions as possible permission is hereby granted, free of * charge, to any person obtaining a copy of this Software to deal in * the Software under the copyrights without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * The name and trademarks of copyright holder(s) may NOT be used in * advertising or publicity pertaining to the Software or any * derivatives without specific, written prior permission. */ /* * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* OpenFlow: protocol between controller and datapath. */ #ifndef OPENFLOW_11_H #define OPENFLOW_11_H 1 #include "openflow/openflow-common.h" /* OpenFlow 1.1 uses 32-bit port numbers. Open vSwitch, for now, uses OpenFlow * 1.0 port numbers internally. We map them to OpenFlow 1.0 as follows: * * OF1.1 <=> OF1.0 * ----------------------- --------------- * 0x00000000...0x0000feff <=> 0x0000...0xfeff "physical" ports * 0x0000ff00...0xfffffeff <=> not supported * 0xffffff00...0xffffffff <=> 0xff00...0xffff "reserved" OFPP_* ports * * OFPP11_OFFSET is the value that must be added or subtracted to convert * an OpenFlow 1.0 reserved port number to or from, respectively, the * corresponding OpenFlow 1.1 reserved port number. */ #define OFPP11_MAX OFP11_PORT_C(0xffffff00) #define OFPP11_OFFSET 0xffff0000 /* OFPP11_MAX - OFPP_MAX */ /* Reserved wildcard port used only for flow mod (delete) and flow stats * requests. Selects all flows regardless of output port * (including flows with no output port) * * Define it via OFPP_NONE (0xFFFF) so that OFPP_ANY is still an enum ofp_port */ #define OFPP_ANY OFPP_NONE /* OpenFlow 1.1 port config flags are just the common flags. */ #define OFPPC11_ALL \ (OFPPC_PORT_DOWN | OFPPC_NO_RECV | OFPPC_NO_FWD | OFPPC_NO_PACKET_IN) /* OpenFlow 1.1 specific current state of the physical port. These are not * configurable from the controller. */ enum ofp11_port_state { OFPPS11_BLOCKED = 1 << 1, /* Port is blocked */ OFPPS11_LIVE = 1 << 2, /* Live for Fast Failover Group. */ #define OFPPS11_ALL (OFPPS_LINK_DOWN | OFPPS11_BLOCKED | OFPPS11_LIVE) }; /* OpenFlow 1.1 specific features of ports available in a datapath. */ enum ofp11_port_features { OFPPF11_40GB_FD = 1 << 7, /* 40 Gb full-duplex rate support. */ OFPPF11_100GB_FD = 1 << 8, /* 100 Gb full-duplex rate support. */ OFPPF11_1TB_FD = 1 << 9, /* 1 Tb full-duplex rate support. */ OFPPF11_OTHER = 1 << 10, /* Other rate, not in the list. */ OFPPF11_COPPER = 1 << 11, /* Copper medium. */ OFPPF11_FIBER = 1 << 12, /* Fiber medium. */ OFPPF11_AUTONEG = 1 << 13, /* Auto-negotiation. */ OFPPF11_PAUSE = 1 << 14, /* Pause. */ OFPPF11_PAUSE_ASYM = 1 << 15 /* Asymmetric pause. */ #define OFPPF11_ALL ((1 << 16) - 1) }; /* Description of a port */ struct ofp11_port { ovs_be32 port_no; uint8_t pad[4]; uint8_t hw_addr[OFP_ETH_ALEN]; uint8_t pad2[2]; /* Align to 64 bits. */ char name[OFP_MAX_PORT_NAME_LEN]; /* Null-terminated */ ovs_be32 config; /* Bitmap of OFPPC_* flags. */ ovs_be32 state; /* Bitmap of OFPPS_* and OFPPS11_* flags. */ /* Bitmaps of OFPPF_* and OFPPF11_* that describe features. All bits * zeroed if unsupported or unavailable. */ ovs_be32 curr; /* Current features. */ ovs_be32 advertised; /* Features being advertised by the port. */ ovs_be32 supported; /* Features supported by the port. */ ovs_be32 peer; /* Features advertised by peer. */ ovs_be32 curr_speed; /* Current port bitrate in kbps. */ ovs_be32 max_speed; /* Max port bitrate in kbps */ }; OFP_ASSERT(sizeof(struct ofp11_port) == 64); /* Modify behavior of the physical port */ struct ofp11_port_mod { ovs_be32 port_no; uint8_t pad[4]; uint8_t hw_addr[OFP_ETH_ALEN]; /* The hardware address is not configurable. This is used to sanity-check the request, so it must be the same as returned in an ofp11_port struct. */ uint8_t pad2[2]; /* Pad to 64 bits. */ ovs_be32 config; /* Bitmap of OFPPC_* flags. */ ovs_be32 mask; /* Bitmap of OFPPC_* flags to be changed. */ ovs_be32 advertise; /* Bitmap of OFPPF_* and OFPPF11_*. Zero all bits to prevent any action taking place. */ uint8_t pad3[4]; /* Pad to 64 bits. */ }; OFP_ASSERT(sizeof(struct ofp11_port_mod) == 32); /* Group setup and teardown (controller -> datapath). */ struct ofp11_group_mod { ovs_be16 command; /* One of OFPGC_*. */ uint8_t type; /* One of OFPGT_*. */ uint8_t pad; /* Pad to 64 bits. */ ovs_be32 group_id; /* Group identifier. */ /* struct ofp11_bucket buckets[0]; The bucket length is inferred from the length field in the header. */ }; OFP_ASSERT(sizeof(struct ofp11_group_mod) == 8); /* Query for port queue configuration. */ struct ofp11_queue_get_config_request { ovs_be32 port; /* Port to be queried. Should refer to a valid physical port (i.e. < OFPP_MAX) */ uint8_t pad[4]; }; OFP_ASSERT(sizeof(struct ofp11_queue_get_config_request) == 8); /* Group commands */ enum ofp11_group_mod_command { OFPGC11_ADD, /* New group. */ OFPGC11_MODIFY, /* Modify all matching groups. */ OFPGC11_DELETE, /* Delete all matching groups. */ }; /* OpenFlow 1.1 specific capabilities supported by the datapath (struct * ofp_switch_features, member capabilities). */ enum ofp11_capabilities { OFPC11_GROUP_STATS = 1 << 3, /* Group statistics. */ }; enum ofp11_action_type { OFPAT11_OUTPUT, /* Output to switch port. */ OFPAT11_SET_VLAN_VID, /* Set the 802.1q VLAN id. */ OFPAT11_SET_VLAN_PCP, /* Set the 802.1q priority. */ OFPAT11_SET_DL_SRC, /* Ethernet source address. */ OFPAT11_SET_DL_DST, /* Ethernet destination address. */ OFPAT11_SET_NW_SRC, /* IP source address. */ OFPAT11_SET_NW_DST, /* IP destination address. */ OFPAT11_SET_NW_TOS, /* IP ToS (DSCP field, 6 bits). */ OFPAT11_SET_NW_ECN, /* IP ECN (2 bits). */ OFPAT11_SET_TP_SRC, /* TCP/UDP/SCTP source port. */ OFPAT11_SET_TP_DST, /* TCP/UDP/SCTP destination port. */ OFPAT11_COPY_TTL_OUT, /* Copy TTL "outwards" -- from next-to-outermost to outermost */ OFPAT11_COPY_TTL_IN, /* Copy TTL "inwards" -- from outermost to next-to-outermost */ OFPAT11_SET_MPLS_LABEL, /* MPLS label */ OFPAT11_SET_MPLS_TC, /* MPLS TC */ OFPAT11_SET_MPLS_TTL, /* MPLS TTL */ OFPAT11_DEC_MPLS_TTL, /* Decrement MPLS TTL */ OFPAT11_PUSH_VLAN, /* Push a new VLAN tag */ OFPAT11_POP_VLAN, /* Pop the outer VLAN tag */ OFPAT11_PUSH_MPLS, /* Push a new MPLS Label Stack Entry */ OFPAT11_POP_MPLS, /* Pop the outer MPLS Label Stack Entry */ OFPAT11_SET_QUEUE, /* Set queue id when outputting to a port */ OFPAT11_GROUP, /* Apply group. */ OFPAT11_SET_NW_TTL, /* IP TTL. */ OFPAT11_DEC_NW_TTL, /* Decrement IP TTL. */ OFPAT11_EXPERIMENTER = 0xffff }; #define OFPMT11_STANDARD_LENGTH 88 struct ofp11_match_header { ovs_be16 type; /* One of OFPMT_* */ ovs_be16 length; /* Length of match */ }; OFP_ASSERT(sizeof(struct ofp11_match_header) == 4); /* Fields to match against flows */ struct ofp11_match { struct ofp11_match_header omh; ovs_be32 in_port; /* Input switch port. */ ovs_be32 wildcards; /* Wildcard fields. */ uint8_t dl_src[OFP_ETH_ALEN]; /* Ethernet source address. */ uint8_t dl_src_mask[OFP_ETH_ALEN]; /* Ethernet source address mask. */ uint8_t dl_dst[OFP_ETH_ALEN]; /* Ethernet destination address. */ uint8_t dl_dst_mask[OFP_ETH_ALEN]; /* Ethernet destination address mask. */ ovs_be16 dl_vlan; /* Input VLAN id. */ uint8_t dl_vlan_pcp; /* Input VLAN priority. */ uint8_t pad1[1]; /* Align to 32-bits */ ovs_be16 dl_type; /* Ethernet frame type. */ uint8_t nw_tos; /* IP ToS (actually DSCP field, 6 bits). */ uint8_t nw_proto; /* IP protocol or lower 8 bits of ARP opcode. */ ovs_be32 nw_src; /* IP source address. */ ovs_be32 nw_src_mask; /* IP source address mask. */ ovs_be32 nw_dst; /* IP destination address. */ ovs_be32 nw_dst_mask; /* IP destination address mask. */ ovs_be16 tp_src; /* TCP/UDP/SCTP source port. */ ovs_be16 tp_dst; /* TCP/UDP/SCTP destination port. */ ovs_be32 mpls_label; /* MPLS label. */ uint8_t mpls_tc; /* MPLS TC. */ uint8_t pad2[3]; /* Align to 64-bits */ ovs_be64 metadata; /* Metadata passed between tables. */ ovs_be64 metadata_mask; /* Mask for metadata. */ }; OFP_ASSERT(sizeof(struct ofp11_match) == OFPMT11_STANDARD_LENGTH); /* Flow wildcards. */ enum ofp11_flow_wildcards { OFPFW11_IN_PORT = 1 << 0, /* Switch input port. */ OFPFW11_DL_VLAN = 1 << 1, /* VLAN id. */ OFPFW11_DL_VLAN_PCP = 1 << 2, /* VLAN priority. */ OFPFW11_DL_TYPE = 1 << 3, /* Ethernet frame type. */ OFPFW11_NW_TOS = 1 << 4, /* IP ToS (DSCP field, 6 bits). */ OFPFW11_NW_PROTO = 1 << 5, /* IP protocol. */ OFPFW11_TP_SRC = 1 << 6, /* TCP/UDP/SCTP source port. */ OFPFW11_TP_DST = 1 << 7, /* TCP/UDP/SCTP destination port. */ OFPFW11_MPLS_LABEL = 1 << 8, /* MPLS label. */ OFPFW11_MPLS_TC = 1 << 9, /* MPLS TC. */ /* Wildcard all fields. */ OFPFW11_ALL = ((1 << 10) - 1) }; /* The VLAN id is 12-bits, so we can use the entire 16 bits to indicate * special conditions. */ enum ofp11_vlan_id { OFPVID11_ANY = 0xfffe, /* Indicate that a VLAN id is set but don't care about it's value. Note: only valid when specifying the VLAN id in a match */ OFPVID11_NONE = 0xffff, /* No VLAN id was set. */ }; enum ofp11_instruction_type { OFPIT11_GOTO_TABLE = 1, /* Setup the next table in the lookup pipeline */ OFPIT11_WRITE_METADATA = 2, /* Setup the metadata field for use later in pipeline */ OFPIT11_WRITE_ACTIONS = 3, /* Write the action(s) onto the datapath action set */ OFPIT11_APPLY_ACTIONS = 4, /* Applies the action(s) immediately */ OFPIT11_CLEAR_ACTIONS = 5, /* Clears all actions from the datapath action set */ OFPIT11_EXPERIMENTER = 0xFFFF /* Experimenter instruction */ }; #define OFPIT11_ALL (OFPIT11_GOTO_TABLE | OFPIT11_WRITE_METADATA | \ OFPIT11_WRITE_ACTIONS | OFPIT11_APPLY_ACTIONS | \ OFPIT11_CLEAR_ACTIONS) #define OFP11_INSTRUCTION_ALIGN 8 /* Generic ofp_instruction structure. */ struct ofp11_instruction { ovs_be16 type; /* Instruction type */ ovs_be16 len; /* Length of this struct in bytes. */ uint8_t pad[4]; /* Align to 64-bits */ }; OFP_ASSERT(sizeof(struct ofp11_instruction) == 8); /* Instruction structure for OFPIT_GOTO_TABLE */ struct ofp11_instruction_goto_table { ovs_be16 type; /* OFPIT_GOTO_TABLE */ ovs_be16 len; /* Length of this struct in bytes. */ uint8_t table_id; /* Set next table in the lookup pipeline */ uint8_t pad[3]; /* Pad to 64 bits. */ }; OFP_ASSERT(sizeof(struct ofp11_instruction_goto_table) == 8); /* Instruction structure for OFPIT_WRITE_METADATA */ struct ofp11_instruction_write_metadata { ovs_be16 type; /* OFPIT_WRITE_METADATA */ ovs_be16 len; /* Length of this struct in bytes. */ uint8_t pad[4]; /* Align to 64-bits */ ovs_be64 metadata; /* Metadata value to write */ ovs_be64 metadata_mask; /* Metadata write bitmask */ }; OFP_ASSERT(sizeof(struct ofp11_instruction_write_metadata) == 24); /* Instruction structure for OFPIT_WRITE/APPLY/CLEAR_ACTIONS */ struct ofp11_instruction_actions { ovs_be16 type; /* One of OFPIT_*_ACTIONS */ ovs_be16 len; /* Length of this struct in bytes. */ uint8_t pad[4]; /* Align to 64-bits */ /* struct ofp_action_header actions[0]; Actions associated with OFPIT_WRITE_ACTIONS and OFPIT_APPLY_ACTIONS */ }; OFP_ASSERT(sizeof(struct ofp11_instruction_actions) == 8); /* Instruction structure for experimental instructions */ struct ofp11_instruction_experimenter { ovs_be16 type; /* OFPIT11_EXPERIMENTER */ ovs_be16 len; /* Length of this struct in bytes */ ovs_be32 experimenter; /* Experimenter ID which takes the same form as in struct ofp_vendor_header. */ /* Experimenter-defined arbitrary additional data. */ }; OFP_ASSERT(sizeof(struct ofp11_instruction_experimenter) == 8); /* Action structure for OFPAT_OUTPUT, which sends packets out 'port'. * When the 'port' is the OFPP_CONTROLLER, 'max_len' indicates the max * number of bytes to send. A 'max_len' of zero means no bytes of the * packet should be sent.*/ struct ofp11_action_output { ovs_be16 type; /* OFPAT11_OUTPUT. */ ovs_be16 len; /* Length is 16. */ ovs_be32 port; /* Output port. */ ovs_be16 max_len; /* Max length to send to controller. */ uint8_t pad[6]; /* Pad to 64 bits. */ }; OFP_ASSERT(sizeof(struct ofp11_action_output) == 16); /* Action structure for OFPAT_GROUP. */ struct ofp11_action_group { ovs_be16 type; /* OFPAT11_GROUP. */ ovs_be16 len; /* Length is 8. */ ovs_be32 group_id; /* Group identifier. */ }; OFP_ASSERT(sizeof(struct ofp11_action_group) == 8); /* OFPAT_SET_QUEUE action struct: send packets to given queue on port. */ struct ofp11_action_set_queue { ovs_be16 type; /* OFPAT11_SET_QUEUE. */ ovs_be16 len; /* Len is 8. */ ovs_be32 queue_id; /* Queue id for the packets. */ }; OFP_ASSERT(sizeof(struct ofp11_action_set_queue) == 8); /* Action structure for OFPAT11_SET_MPLS_LABEL. */ struct ofp11_action_mpls_label { ovs_be16 type; /* OFPAT11_SET_MPLS_LABEL. */ ovs_be16 len; /* Length is 8. */ ovs_be32 mpls_label; /* MPLS label */ }; OFP_ASSERT(sizeof(struct ofp11_action_mpls_label) == 8); /* Action structure for OFPAT11_SET_MPLS_TC. */ struct ofp11_action_mpls_tc { ovs_be16 type; /* OFPAT11_SET_MPLS_TC. */ ovs_be16 len; /* Length is 8. */ uint8_t mpls_tc; /* MPLS TC */ uint8_t pad[3]; }; OFP_ASSERT(sizeof(struct ofp11_action_mpls_tc) == 8); /* Action structure for OFPAT11_SET_MPLS_TTL. */ struct ofp11_action_mpls_ttl { ovs_be16 type; /* OFPAT11_SET_MPLS_TTL. */ ovs_be16 len; /* Length is 8. */ uint8_t mpls_ttl; /* MPLS TTL */ uint8_t pad[3]; }; OFP_ASSERT(sizeof(struct ofp11_action_mpls_ttl) == 8); /* Action structure for OFPAT11_SET_NW_ECN. */ struct ofp11_action_nw_ecn { ovs_be16 type; /* OFPAT11_SET_TW_SRC/DST. */ ovs_be16 len; /* Length is 8. */ uint8_t nw_ecn; /* IP ECN (2 bits). */ uint8_t pad[3]; }; OFP_ASSERT(sizeof(struct ofp11_action_nw_ecn) == 8); /* Action structure for OFPAT11_SET_NW_TTL. */ struct ofp11_action_nw_ttl { ovs_be16 type; /* OFPAT11_SET_NW_TTL. */ ovs_be16 len; /* Length is 8. */ uint8_t nw_ttl; /* IP TTL */ uint8_t pad[3]; }; OFP_ASSERT(sizeof(struct ofp11_action_nw_ttl) == 8); /* Action structure for OFPAT11_PUSH_VLAN/MPLS. */ struct ofp11_action_push { ovs_be16 type; /* OFPAT11_PUSH_VLAN/MPLS. */ ovs_be16 len; /* Length is 8. */ ovs_be16 ethertype; /* Ethertype */ uint8_t pad[2]; }; OFP_ASSERT(sizeof(struct ofp11_action_push) == 8); /* Action structure for OFPAT11_POP_MPLS. */ struct ofp11_action_pop_mpls { ovs_be16 type; /* OFPAT11_POP_MPLS. */ ovs_be16 len; /* Length is 8. */ ovs_be16 ethertype; /* Ethertype */ uint8_t pad[2]; }; OFP_ASSERT(sizeof(struct ofp11_action_pop_mpls) == 8); /* Configure/Modify behavior of a flow table */ struct ofp11_table_mod { uint8_t table_id; /* ID of the table, 0xFF indicates all tables */ uint8_t pad[3]; /* Pad to 32 bits */ ovs_be32 config; /* Bitmap of OFPTC_* flags */ }; OFP_ASSERT(sizeof(struct ofp11_table_mod) == 8); /* Flags to indicate behavior of the flow table for unmatched packets. These flags are used in ofp_table_stats messages to describe the current configuration and in ofp_table_mod messages to configure table behavior. */ enum ofp11_table_config { OFPTC11_TABLE_MISS_CONTROLLER = 0, /* Send to controller. */ OFPTC11_TABLE_MISS_CONTINUE = 1 << 0, /* Continue to the next table in the pipeline (OpenFlow 1.0 behavior). */ OFPTC11_TABLE_MISS_DROP = 1 << 1, /* Drop the packet. */ OFPTC11_TABLE_MISS_MASK = 3 }; /* Flow setup and teardown (controller -> datapath). */ struct ofp11_flow_mod { ovs_be64 cookie; /* Opaque controller-issued identifier. */ ovs_be64 cookie_mask; /* Mask used to restrict the cookie bits that must match when the command is OFPFC_MODIFY* or OFPFC_DELETE*. A value of 0 indicates no restriction. */ /* Flow actions. */ uint8_t table_id; /* ID of the table to put the flow in */ uint8_t command; /* One of OFPFC_*. */ ovs_be16 idle_timeout; /* Idle time before discarding (seconds). */ ovs_be16 hard_timeout; /* Max time before discarding (seconds). */ ovs_be16 priority; /* Priority level of flow entry. */ ovs_be32 buffer_id; /* Buffered packet to apply to (or -1). Not meaningful for OFPFC_DELETE*. */ ovs_be32 out_port; /* For OFPFC_DELETE* commands, require matching entries to include this as an output port. A value of OFPP_ANY indicates no restriction. */ ovs_be32 out_group; /* For OFPFC_DELETE* commands, require matching entries to include this as an output group. A value of OFPG11_ANY indicates no restriction. */ ovs_be16 flags; /* One of OFPFF_*. */ uint8_t pad[2]; /* Followed by an ofp11_match structure. */ /* Followed by an instruction set. */ }; OFP_ASSERT(sizeof(struct ofp11_flow_mod) == 40); /* Group types. Values in the range [128, 255] are reserved for experimental * use. */ enum ofp11_group_type { OFPGT11_ALL, /* All (multicast/broadcast) group. */ OFPGT11_SELECT, /* Select group. */ OFPGT11_INDIRECT, /* Indirect group. */ OFPGT11_FF /* Fast failover group. */ }; /* Group numbering. Groups can use any number up to OFPG_MAX. */ enum ofp11_group { /* Last usable group number. */ OFPG11_MAX = 0xffffff00, /* Fake groups. */ OFPG11_ALL = 0xfffffffc, /* Represents all groups for group delete commands. */ OFPG11_ANY = 0xffffffff /* Wildcard group used only for flow stats requests. Selects all flows regardless of group (including flows with no group). */ }; /* Bucket for use in groups. */ struct ofp11_bucket { ovs_be16 len; /* Length the bucket in bytes, including this header and any padding to make it 64-bit aligned. */ ovs_be16 weight; /* Relative weight of bucket. Only defined for select groups. */ ovs_be32 watch_port; /* Port whose state affects whether this bucket is live. Only required for fast failover groups. */ ovs_be32 watch_group; /* Group whose state affects whether this bucket is live. Only required for fast failover groups. */ uint8_t pad[4]; /* struct ofp_action_header actions[0]; The action length is inferred from the length field in the header. */ }; OFP_ASSERT(sizeof(struct ofp11_bucket) == 16); /* Queue configuration for a given port. */ struct ofp11_queue_get_config_reply { ovs_be32 port; uint8_t pad[4]; /* struct ofp_packet_queue queues[0]; List of configured queues. */ }; OFP_ASSERT(sizeof(struct ofp11_queue_get_config_reply) == 8); struct ofp11_stats_msg { struct ofp_header header; ovs_be16 type; /* One of the OFPST_* constants. */ ovs_be16 flags; /* OFPSF_REQ_* flags (none yet defined). */ uint8_t pad[4]; /* Followed by the body of the request. */ }; OFP_ASSERT(sizeof(struct ofp11_stats_msg) == 16); /* Vendor extension stats message. */ struct ofp11_vendor_stats_msg { struct ofp11_stats_msg osm; /* Type OFPST_VENDOR. */ ovs_be32 vendor; /* Vendor ID: * - MSB 0: low-order bytes are IEEE OUI. * - MSB != 0: defined by OpenFlow * consortium. */ /* Followed by vendor-defined arbitrary additional data. */ }; OFP_ASSERT(sizeof(struct ofp11_vendor_stats_msg) == 20); /* Stats request of type OFPST_FLOW. */ struct ofp11_flow_stats_request { uint8_t table_id; /* ID of table to read (from ofp_table_stats), 0xff for all tables. */ uint8_t pad[3]; /* Align to 64 bits. */ ovs_be32 out_port; /* Require matching entries to include this as an output port. A value of OFPP_ANY indicates no restriction. */ ovs_be32 out_group; /* Require matching entries to include this as an output group. A value of OFPG11_ANY indicates no restriction. */ uint8_t pad2[4]; /* Align to 64 bits. */ ovs_be64 cookie; /* Require matching entries to contain this cookie value */ ovs_be64 cookie_mask; /* Mask used to restrict the cookie bits that must match. A value of 0 indicates no restriction. */ /* Followed by an ofp11_match structure. */ }; OFP_ASSERT(sizeof(struct ofp11_flow_stats_request) == 32); /* Body of reply to OFPST_FLOW request. */ struct ofp11_flow_stats { ovs_be16 length; /* Length of this entry. */ uint8_t table_id; /* ID of table flow came from. */ uint8_t pad; ovs_be32 duration_sec; /* Time flow has been alive in seconds. */ ovs_be32 duration_nsec; /* Time flow has been alive in nanoseconds beyond duration_sec. */ ovs_be16 priority; /* Priority of the entry. Only meaningful when this is not an exact-match entry. */ ovs_be16 idle_timeout; /* Number of seconds idle before expiration. */ ovs_be16 hard_timeout; /* Number of seconds before expiration. */ ovs_be16 flags; /* OF 1.3: Set of OFPFF*. */ uint8_t pad2[4]; /* Align to 64-bits. */ ovs_be64 cookie; /* Opaque controller-issued identifier. */ ovs_be64 packet_count; /* Number of packets in flow. */ ovs_be64 byte_count; /* Number of bytes in flow. */ /* Open Flow version specific match */ /* struct ofp11_instruction instructions[0]; Instruction set. */ }; OFP_ASSERT(sizeof(struct ofp11_flow_stats) == 48); /* Body for ofp_stats_request of type OFPST_AGGREGATE. */ /* Identical to ofp11_flow_stats_request */ /* Flow match fields. */ enum ofp11_flow_match_fields { OFPFMF11_IN_PORT = 1 << 0, /* Switch input port. */ OFPFMF11_DL_VLAN = 1 << 1, /* VLAN id. */ OFPFMF11_DL_VLAN_PCP = 1 << 2, /* VLAN priority. */ OFPFMF11_DL_TYPE = 1 << 3, /* Ethernet frame type. */ OFPFMF11_NW_TOS = 1 << 4, /* IP ToS (DSCP field, 6 bits). */ OFPFMF11_NW_PROTO = 1 << 5, /* IP protocol. */ OFPFMF11_TP_SRC = 1 << 6, /* TCP/UDP/SCTP source port. */ OFPFMF11_TP_DST = 1 << 7, /* TCP/UDP/SCTP destination port. */ OFPFMF11_MPLS_LABEL = 1 << 8, /* MPLS label. */ OFPFMF11_MPLS_TC = 1 << 9, /* MPLS TC. */ OFPFMF11_TYPE = 1 << 10, /* Match type. */ OFPFMF11_DL_SRC = 1 << 11, /* Ethernet source address. */ OFPFMF11_DL_DST = 1 << 12, /* Ethernet destination address. */ OFPFMF11_NW_SRC = 1 << 13, /* IP source address. */ OFPFMF11_NW_DST = 1 << 14, /* IP destination address. */ OFPFMF11_METADATA = 1 << 15, /* Metadata passed between tables. */ }; /* Body of reply to OFPST_TABLE request. */ struct ofp11_table_stats { uint8_t table_id; /* Identifier of table. Lower numbered tables are consulted first. */ uint8_t pad[7]; /* Align to 64-bits. */ char name[OFP_MAX_TABLE_NAME_LEN]; ovs_be32 wildcards; /* Bitmap of OFPFMF_* wildcards that are supported by the table. */ ovs_be32 match; /* Bitmap of OFPFMF_* that indicate the fields the table can match on. */ ovs_be32 instructions; /* Bitmap of OFPIT_* values supported. */ ovs_be32 write_actions; /* Bitmap of OFPAT_* that are supported by the table with OFPIT_WRITE_ACTIONS. */ ovs_be32 apply_actions; /* Bitmap of OFPAT_* that are supported by the table with OFPIT_APPLY_ACTIONS. */ ovs_be32 config; /* Bitmap of OFPTC_* values */ ovs_be32 max_entries; /* Max number of entries supported. */ ovs_be32 active_count; /* Number of active entries. */ ovs_be64 lookup_count; /* Number of packets looked up in table. */ ovs_be64 matched_count; /* Number of packets that hit table. */ }; OFP_ASSERT(sizeof(struct ofp11_table_stats) == 88); /* Body for ofp_stats_request of type OFPST_PORT. */ struct ofp11_port_stats_request { ovs_be32 port_no; /* OFPST_PORT message must request statistics * either for a single port (specified in * port_no) or for all ports (if port_no == * OFPP_ANY). */ uint8_t pad[4]; }; OFP_ASSERT(sizeof(struct ofp11_port_stats_request) == 8); /* Body of reply to OFPST_PORT request. If a counter is unsupported, set * the field to all ones. */ struct ofp11_port_stats { ovs_be32 port_no; uint8_t pad[4]; /* Align to 64-bits. */ ovs_be64 rx_packets; /* Number of received packets. */ ovs_be64 tx_packets; /* Number of transmitted packets. */ ovs_be64 rx_bytes; /* Number of received bytes. */ ovs_be64 tx_bytes; /* Number of transmitted bytes. */ ovs_be64 rx_dropped; /* Number of packets dropped by RX. */ ovs_be64 tx_dropped; /* Number of packets dropped by TX. */ ovs_be64 rx_errors; /* Number of receive errors. This is a super-set of receive errors and should be great than or equal to the sum of all rx_*_err values. */ ovs_be64 tx_errors; /* Number of transmit errors. This is a super-set of transmit errors. */ ovs_be64 rx_frame_err; /* Number of frame alignment errors. */ ovs_be64 rx_over_err; /* Number of packets with RX overrun. */ ovs_be64 rx_crc_err; /* Number of CRC errors. */ ovs_be64 collisions; /* Number of collisions. */ }; OFP_ASSERT(sizeof(struct ofp11_port_stats) == 104); struct ofp11_queue_stats_request { ovs_be32 port_no; /* All ports if OFPP_ANY. */ ovs_be32 queue_id; /* All queues if OFPQ_ALL. */ }; OFP_ASSERT(sizeof(struct ofp11_queue_stats_request) == 8); struct ofp11_queue_stats { ovs_be32 port_no; ovs_be32 queue_id; /* Queue id. */ ovs_be64 tx_bytes; /* Number of transmitted bytes. */ ovs_be64 tx_packets; /* Number of transmitted packets. */ ovs_be64 tx_errors; /* # of packets dropped due to overrun. */ }; OFP_ASSERT(sizeof(struct ofp11_queue_stats) == 32); struct ofp11_group_stats_request { ovs_be32 group_id; /* All groups if OFPG_ALL. */ uint8_t pad[4]; /* Align to 64 bits. */ }; OFP_ASSERT(sizeof(struct ofp11_group_stats_request) == 8); /* Body of reply to OFPST11_GROUP request */ struct ofp11_group_stats { ovs_be16 length; /* Length of this entry. */ uint8_t pad[2]; /* Align to 64 bits. */ ovs_be32 group_id; /* Group identifier. */ ovs_be32 ref_count; /* Number of flows or groups that directly forward to this group. */ uint8_t pad2[4]; /* Align to 64 bits. */ ovs_be64 packet_count; /* Number of packets processed by group. */ ovs_be64 byte_count; /* Number of bytes processed by group. */ /* struct ofp11_bucket_counter bucket_stats[0]; */ }; OFP_ASSERT(sizeof(struct ofp11_group_stats) == 32); /* Used in group stats replies. */ struct ofp11_bucket_counter { ovs_be64 packet_count; /* Number of packets processed by bucket. */ ovs_be64 byte_count; /* Number of bytes processed by bucket. */ }; OFP_ASSERT(sizeof(struct ofp11_bucket_counter) == 16); /* Body of reply to OFPST11_GROUP_DESC request. */ struct ofp11_group_desc_stats { ovs_be16 length; /* Length of this entry. */ uint8_t type; /* One of OFPGT_*. */ uint8_t pad; /* Pad to 64 bits. */ ovs_be32 group_id; /* Group identifier. */ /* struct ofp11_bucket buckets[0]; */ }; OFP_ASSERT(sizeof(struct ofp11_group_desc_stats) == 8); /* Send packet (controller -> datapath). */ struct ofp11_packet_out { ovs_be32 buffer_id; /* ID assigned by datapath (-1 if none). */ ovs_be32 in_port; /* Packet's input port or OFPP_CONTROLLER. */ ovs_be16 actions_len; /* Size of action array in bytes. */ uint8_t pad[6]; /* struct ofp_action_header actions[0]; Action list. */ /* uint8_t data[0]; */ /* Packet data. The length is inferred from the length field in the header. (Only meaningful if buffer_id == -1.) */ }; OFP_ASSERT(sizeof(struct ofp11_packet_out) == 16); /* Packet received on port (datapath -> controller). */ struct ofp11_packet_in { ovs_be32 buffer_id; /* ID assigned by datapath. */ ovs_be32 in_port; /* Port on which frame was received. */ ovs_be32 in_phy_port; /* Physical Port on which frame was received. */ ovs_be16 total_len; /* Full length of frame. */ uint8_t reason; /* Reason packet is being sent (one of OFPR_*) */ uint8_t table_id; /* ID of the table that was looked up */ /* uint8_t data[0]; Ethernet frame, halfway through 32-bit word, so the IP header is 32-bit aligned. The amount of data is inferred from the length field in the header. Because of padding, offsetof(struct ofp_packet_in, data) == sizeof(struct ofp_packet_in) - 2. */ }; OFP_ASSERT(sizeof(struct ofp11_packet_in) == 16); /* Flow removed (datapath -> controller). */ struct ofp11_flow_removed { ovs_be64 cookie; /* Opaque controller-issued identifier. */ ovs_be16 priority; /* Priority level of flow entry. */ uint8_t reason; /* One of OFPRR_*. */ uint8_t table_id; /* ID of the table */ ovs_be32 duration_sec; /* Time flow was alive in seconds. */ ovs_be32 duration_nsec; /* Time flow was alive in nanoseconds beyond duration_sec. */ ovs_be16 idle_timeout; /* Idle timeout from original flow mod. */ uint8_t pad2[2]; /* Align to 64-bits. */ ovs_be64 packet_count; ovs_be64 byte_count; /* Followed by an ofp11_match structure. */ }; OFP_ASSERT(sizeof(struct ofp11_flow_removed) == 40); #endif /* openflow/openflow-1.1.h */ openvswitch-2.0.1+git20140120/include/openflow/openflow-1.2.h000066400000000000000000000444461226605124000232420ustar00rootroot00000000000000/* Copyright (c) 2008, 2011, 2012, 2013 The Board of Trustees of The Leland Stanford * Junior University * * We are making the OpenFlow specification and associated documentation * (Software) available for public use and benefit with the expectation * that others will use, modify and enhance the Software and contribute * those enhancements back to the community. However, since we would * like to make the Software available for broadest use, with as few * restrictions as possible permission is hereby granted, free of * charge, to any person obtaining a copy of this Software to deal in * the Software under the copyrights without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * The name and trademarks of copyright holder(s) may NOT be used in * advertising or publicity pertaining to the Software or any * derivatives without specific, written prior permission. */ /* * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * Copyright (c) 2012 Horms Solutions Ltd. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* OpenFlow: protocol between controller and datapath. */ #ifndef OPENFLOW_12_H #define OPENFLOW_12_H 1 #include "openflow/openflow-1.1.h" /* Error type for experimenter error messages. */ #define OFPET12_EXPERIMENTER 0xffff /* * OXM Class IDs. * The high order bit differentiate reserved classes from member classes. * Classes 0x0000 to 0x7FFF are member classes, allocated by ONF. * Classes 0x8000 to 0xFFFE are reserved classes, reserved for standardisation. */ enum ofp12_oxm_class { OFPXMC12_NXM_0 = 0x0000, /* Backward compatibility with NXM */ OFPXMC12_NXM_1 = 0x0001, /* Backward compatibility with NXM */ OFPXMC12_OPENFLOW_BASIC = 0x8000, /* Basic class for OpenFlow */ OFPXMC12_EXPERIMENTER = 0xffff, /* Experimenter class */ }; /* OXM Flow match field types for OpenFlow basic class. */ enum oxm12_ofb_match_fields { OFPXMT12_OFB_IN_PORT, /* Switch input port. */ OFPXMT12_OFB_IN_PHY_PORT, /* Switch physical input port. */ OFPXMT12_OFB_METADATA, /* Metadata passed between tables. */ OFPXMT12_OFB_ETH_DST, /* Ethernet destination address. */ OFPXMT12_OFB_ETH_SRC, /* Ethernet source address. */ OFPXMT12_OFB_ETH_TYPE, /* Ethernet frame type. */ OFPXMT12_OFB_VLAN_VID, /* VLAN id. */ OFPXMT12_OFB_VLAN_PCP, /* VLAN priority. */ OFPXMT12_OFB_IP_DSCP, /* IP DSCP (6 bits in ToS field). */ OFPXMT12_OFB_IP_ECN, /* IP ECN (2 bits in ToS field). */ OFPXMT12_OFB_IP_PROTO, /* IP protocol. */ OFPXMT12_OFB_IPV4_SRC, /* IPv4 source address. */ OFPXMT12_OFB_IPV4_DST, /* IPv4 destination address. */ OFPXMT12_OFB_TCP_SRC, /* TCP source port. */ OFPXMT12_OFB_TCP_DST, /* TCP destination port. */ OFPXMT12_OFB_UDP_SRC, /* UDP source port. */ OFPXMT12_OFB_UDP_DST, /* UDP destination port. */ OFPXMT12_OFB_SCTP_SRC, /* SCTP source port. */ OFPXMT12_OFB_SCTP_DST, /* SCTP destination port. */ OFPXMT12_OFB_ICMPV4_TYPE, /* ICMP type. */ OFPXMT12_OFB_ICMPV4_CODE, /* ICMP code. */ OFPXMT12_OFB_ARP_OP, /* ARP opcode. */ OFPXMT12_OFB_ARP_SPA, /* ARP source IPv4 address. */ OFPXMT12_OFB_ARP_TPA, /* ARP target IPv4 address. */ OFPXMT12_OFB_ARP_SHA, /* ARP source hardware address. */ OFPXMT12_OFB_ARP_THA, /* ARP target hardware address. */ OFPXMT12_OFB_IPV6_SRC, /* IPv6 source address. */ OFPXMT12_OFB_IPV6_DST, /* IPv6 destination address. */ OFPXMT12_OFB_IPV6_FLABEL, /* IPv6 Flow Label */ OFPXMT12_OFB_ICMPV6_TYPE, /* ICMPv6 type. */ OFPXMT12_OFB_ICMPV6_CODE, /* ICMPv6 code. */ OFPXMT12_OFB_IPV6_ND_TARGET, /* Target address for ND. */ OFPXMT12_OFB_IPV6_ND_SLL, /* Source link-layer for ND. */ OFPXMT12_OFB_IPV6_ND_TLL, /* Target link-layer for ND. */ OFPXMT12_OFB_MPLS_LABEL, /* MPLS label. */ OFPXMT12_OFB_MPLS_TC, /* MPLS TC. */ /* Following added in OpenFlow 1.3 */ OFPXMT12_OFB_MPLS_BOS, /* MPLS BoS bit. */ OFPXMT12_OFB_PBB_ISID, /* PBB I-SID. */ OFPXMT12_OFB_TUNNEL_ID, /* Logical Port Metadata */ OFPXMT12_OFB_IPV6_EXTHDR, /* IPv6 Extension Header pseudo-field */ /* End Marker */ OFPXMT12_OFB_MAX, }; #define OFPXMT12_MASK ((1ULL << OFPXMT12_OFB_MAX) - 1) /* OXM implementation makes use of NXM as they are the same format * with different field definitions */ #define OXM_HEADER(FIELD, LENGTH) \ NXM_HEADER(OFPXMC12_OPENFLOW_BASIC, FIELD, LENGTH) #define OXM_HEADER_W(FIELD, LENGTH) \ NXM_HEADER_W(OFPXMC12_OPENFLOW_BASIC, FIELD, LENGTH) #define IS_OXM_HEADER(header) (NXM_VENDOR(header) == OFPXMC12_OPENFLOW_BASIC) #define OXM_OF_IN_PORT OXM_HEADER (OFPXMT12_OFB_IN_PORT, 4) #define OXM_OF_IN_PHY_PORT OXM_HEADER (OFPXMT12_OFB_IN_PHY_PORT, 4) #define OXM_OF_METADATA OXM_HEADER (OFPXMT12_OFB_METADATA, 8) #define OXM_OF_ETH_DST OXM_HEADER (OFPXMT12_OFB_ETH_DST, 6) #define OXM_OF_ETH_DST_W OXM_HEADER_W (OFPXMT12_OFB_ETH_DST, 6) #define OXM_OF_ETH_SRC OXM_HEADER (OFPXMT12_OFB_ETH_SRC, 6) #define OXM_OF_ETH_SRC_W OXM_HEADER_W (OFPXMT12_OFB_ETH_SRC, 6) #define OXM_OF_ETH_TYPE OXM_HEADER (OFPXMT12_OFB_ETH_TYPE, 2) #define OXM_OF_VLAN_VID OXM_HEADER (OFPXMT12_OFB_VLAN_VID, 2) #define OXM_OF_VLAN_VID_W OXM_HEADER_W (OFPXMT12_OFB_VLAN_VID, 2) #define OXM_OF_VLAN_PCP OXM_HEADER (OFPXMT12_OFB_VLAN_PCP, 1) #define OXM_OF_IP_DSCP OXM_HEADER (OFPXMT12_OFB_IP_DSCP, 1) #define OXM_OF_IP_ECN OXM_HEADER (OFPXMT12_OFB_IP_ECN, 1) #define OXM_OF_IP_PROTO OXM_HEADER (OFPXMT12_OFB_IP_PROTO, 1) #define OXM_OF_IPV4_SRC OXM_HEADER (OFPXMT12_OFB_IPV4_SRC, 4) #define OXM_OF_IPV4_SRC_W OXM_HEADER_W (OFPXMT12_OFB_IPV4_SRC, 4) #define OXM_OF_IPV4_DST OXM_HEADER (OFPXMT12_OFB_IPV4_DST, 4) #define OXM_OF_IPV4_DST_W OXM_HEADER_W (OFPXMT12_OFB_IPV4_DST, 4) #define OXM_OF_TCP_SRC OXM_HEADER (OFPXMT12_OFB_TCP_SRC, 2) #define OXM_OF_TCP_DST OXM_HEADER (OFPXMT12_OFB_TCP_DST, 2) #define OXM_OF_UDP_SRC OXM_HEADER (OFPXMT12_OFB_UDP_SRC, 2) #define OXM_OF_UDP_DST OXM_HEADER (OFPXMT12_OFB_UDP_DST, 2) #define OXM_OF_SCTP_SRC OXM_HEADER (OFPXMT12_OFB_SCTP_SRC, 2) #define OXM_OF_SCTP_DST OXM_HEADER (OFPXMT12_OFB_SCTP_DST, 2) #define OXM_OF_ICMPV4_TYPE OXM_HEADER (OFPXMT12_OFB_ICMPV4_TYPE, 1) #define OXM_OF_ICMPV4_CODE OXM_HEADER (OFPXMT12_OFB_ICMPV4_CODE, 1) #define OXM_OF_ARP_OP OXM_HEADER (OFPXMT12_OFB_ARP_OP, 2) #define OXM_OF_ARP_SPA OXM_HEADER (OFPXMT12_OFB_ARP_SPA, 4) #define OXM_OF_ARP_SPA_W OXM_HEADER_W (OFPXMT12_OFB_ARP_SPA, 4) #define OXM_OF_ARP_TPA OXM_HEADER (OFPXMT12_OFB_ARP_TPA, 4) #define OXM_OF_ARP_TPA_W OXM_HEADER_W (OFPXMT12_OFB_ARP_TPA, 4) #define OXM_OF_ARP_SHA OXM_HEADER (OFPXMT12_OFB_ARP_SHA, 6) #define OXM_OF_ARP_SHA_W OXM_HEADER_W (OFPXMT12_OFB_ARP_SHA, 6) #define OXM_OF_ARP_THA OXM_HEADER (OFPXMT12_OFB_ARP_THA, 6) #define OXM_OF_ARP_THA_W OXM_HEADER_W (OFPXMT12_OFB_ARP_THA, 6) #define OXM_OF_IPV6_SRC OXM_HEADER (OFPXMT12_OFB_IPV6_SRC, 16) #define OXM_OF_IPV6_SRC_W OXM_HEADER_W (OFPXMT12_OFB_IPV6_SRC, 16) #define OXM_OF_IPV6_DST OXM_HEADER (OFPXMT12_OFB_IPV6_DST, 16) #define OXM_OF_IPV6_DST_W OXM_HEADER_W (OFPXMT12_OFB_IPV6_DST, 16) #define OXM_OF_IPV6_FLABEL OXM_HEADER (OFPXMT12_OFB_IPV6_FLABEL, 4) #define OXM_OF_IPV6_FLABEL_W OXM_HEADER_W (OFPXMT12_OFB_IPV6_FLABEL, 4) #define OXM_OF_ICMPV6_TYPE OXM_HEADER (OFPXMT12_OFB_ICMPV6_TYPE, 1) #define OXM_OF_ICMPV6_CODE OXM_HEADER (OFPXMT12_OFB_ICMPV6_CODE, 1) #define OXM_OF_IPV6_ND_TARGET OXM_HEADER (OFPXMT12_OFB_IPV6_ND_TARGET, 16) #define OXM_OF_IPV6_ND_SLL OXM_HEADER (OFPXMT12_OFB_IPV6_ND_SLL, 6) #define OXM_OF_IPV6_ND_TLL OXM_HEADER (OFPXMT12_OFB_IPV6_ND_TLL, 6) #define OXM_OF_MPLS_LABEL OXM_HEADER (OFPXMT12_OFB_MPLS_LABEL, 4) #define OXM_OF_MPLS_TC OXM_HEADER (OFPXMT12_OFB_MPLS_TC, 1) #define OXM_OF_MPLS_BOS OXM_HEADER (OFPXMT12_OFB_MPLS_BOS, 1) #define OXM_OF_PBB_ISID OXM_HEADER (OFPXMT12_OFB_PBB_ISID, 4) #define OXM_OF_PBB_ISID_W OXM_HEADER_W (OFPXMT12_OFB_PBB_ISID, 4) #define OXM_OF_TUNNEL_ID OXM_HEADER (OFPXMT12_OFB_TUNNEL_ID, 8) #define OXM_OF_TUNNEL_ID_W OXM_HEADER_W (OFPXMT12_OFB_TUNNEL_ID, 8) #define OXM_OF_IPV6_EXTHDR OXM_HEADER (OFPXMT12_OFB_IPV6_EXTHDR, 2) #define OXM_OF_IPV6_EXTHDR_W OXM_HEADER_W (OFPXMT12_OFB_IPV6_EXTHDR, 2) /* The VLAN id is 12-bits, so we can use the entire 16 bits to indicate * special conditions. */ enum ofp12_vlan_id { OFPVID12_PRESENT = 0x1000, /* Bit that indicate that a VLAN id is set */ OFPVID12_NONE = 0x0000, /* No VLAN id was set. */ }; /* Bit definitions for IPv6 Extension Header pseudo-field. */ enum ofp12_ipv6exthdr_flags { OFPIEH12_NONEXT = 1 << 0, /* "No next header" encountered. */ OFPIEH12_ESP = 1 << 1, /* Encrypted Sec Payload header present. */ OFPIEH12_AUTH = 1 << 2, /* Authentication header present. */ OFPIEH12_DEST = 1 << 3, /* 1 or 2 dest headers present. */ OFPIEH12_FRAG = 1 << 4, /* Fragment header present. */ OFPIEH12_ROUTER = 1 << 5, /* Router header present. */ OFPIEH12_HOP = 1 << 6, /* Hop-by-hop header present. */ OFPIEH12_UNREP = 1 << 7, /* Unexpected repeats encountered. */ OFPIEH12_UNSEQ = 1 << 8 /* Unexpected sequencing encountered. */ }; /* Header for OXM experimenter match fields. */ struct ofp12_oxm_experimenter_header { ovs_be32 oxm_header; /* oxm_class = OFPXMC_EXPERIMENTER */ ovs_be32 experimenter; /* Experimenter ID which takes the same form as in struct ofp11_experimenter_header. */ }; OFP_ASSERT(sizeof(struct ofp12_oxm_experimenter_header) == 8); enum ofp12_action_type { OFPAT12_SET_FIELD = 25, /* Set a header field using OXM TLV format. */ }; enum ofp12_controller_max_len { OFPCML12_MAX = 0xffe5, /* maximum max_len value which can be used * to request a specific byte length. */ OFPCML12_NO_BUFFER = 0xffff /* indicates that no buffering should be * applied and the whole packet is to be * sent to the controller. */ }; /* Action structure for OFPAT12_SET_FIELD. */ struct ofp12_action_set_field { ovs_be16 type; /* OFPAT12_SET_FIELD. */ ovs_be16 len; /* Length is padded to 64 bits. */ ovs_be32 dst; /* OXM TLV header */ /* Followed by: * - Exactly ((oxm_len + 4) + 7)/8*8 - (oxm_len + 4) (between 0 and 7) * bytes of all-zero bytes */ }; OFP_ASSERT(sizeof(struct ofp12_action_set_field) == 8); /* OpenFlow 1.2 specific flags * (struct ofp12_flow_mod, member flags). */ enum ofp12_flow_mod_flags { OFPFF12_RESET_COUNTS = 1 << 2 /* Reset flow packet and byte counts. */ }; /* OpenFlow 1.2 specific capabilities * (struct ofp_switch_features, member capabilities). */ enum ofp12_capabilities { OFPC12_PORT_BLOCKED = 1 << 8 /* Switch will block looping ports. */ }; /* OpenFlow 1.2 specific types * (struct ofp11_stats_request/reply, member type). */ enum ofp12_stats_types { /* Group features. * The request body is empty. * The reply body is struct ofp12_group_features_stats. */ OFPST12_GROUP_FEATURES = 8 }; /* OpenFlow 1.2 specific properties * (struct ofp_queue_prop_header member property). */ enum ofp12_queue_properties { OFPQT12_MIN_RATE = 1, /* Minimum datarate guaranteed. */ OFPQT12_MAX_RATE = 2, /* Maximum datarate. */ OFPQT12_EXPERIMENTER = 0xffff /* Experimenter defined property. */ }; /* Body of reply to OFPST_TABLE request. */ struct ofp12_table_stats { uint8_t table_id; /* Identifier of table. Lower numbered tables are consulted first. */ uint8_t pad[7]; /* Align to 64-bits. */ char name[OFP_MAX_TABLE_NAME_LEN]; ovs_be64 match; /* Bitmap of (1 << OFPXMT_*) that indicate the fields the table can match on. */ ovs_be64 wildcards; /* Bitmap of (1 << OFPXMT_*) wildcards that are supported by the table. */ ovs_be32 write_actions; /* Bitmap of OFPAT_* that are supported by the table with OFPIT_WRITE_ACTIONS. */ ovs_be32 apply_actions; /* Bitmap of OFPAT_* that are supported by the table with OFPIT_APPLY_ACTIONS. */ ovs_be64 write_setfields;/* Bitmap of (1 << OFPXMT_*) header fields that can be set with OFPIT_WRITE_ACTIONS. */ ovs_be64 apply_setfields;/* Bitmap of (1 << OFPXMT_*) header fields that can be set with OFPIT_APPLY_ACTIONS. */ ovs_be64 metadata_match; /* Bits of metadata table can match. */ ovs_be64 metadata_write; /* Bits of metadata table can write. */ ovs_be32 instructions; /* Bitmap of OFPIT_* values supported. */ ovs_be32 config; /* Bitmap of OFPTC_* values */ ovs_be32 max_entries; /* Max number of entries supported. */ ovs_be32 active_count; /* Number of active entries. */ ovs_be64 lookup_count; /* Number of packets looked up in table. */ ovs_be64 matched_count; /* Number of packets that hit table. */ }; OFP_ASSERT(sizeof(struct ofp12_table_stats) == 128); /* Body of reply to OFPST12_GROUP_FEATURES request. Group features. */ struct ofp12_group_features_stats { ovs_be32 types; /* Bitmap of OFPGT_* values supported. */ ovs_be32 capabilities; /* Bitmap of OFPGFC12_* capability supported. */ ovs_be32 max_groups[4]; /* Maximum number of groups for each type. */ ovs_be32 actions[4]; /* Bitmaps of OFPAT_* that are supported. */ }; OFP_ASSERT(sizeof(struct ofp12_group_features_stats) == 40); /* Group configuration flags */ enum ofp12_group_capabilities { OFPGFC12_SELECT_WEIGHT = 1 << 0, /* Support weight for select groups */ OFPGFC12_SELECT_LIVENESS = 1 << 1, /* Support liveness for select groups */ OFPGFC12_CHAINING = 1 << 2, /* Support chaining groups */ OFPGFC12_CHAINING_CHECKS = 1 << 3, /* Check chaining for loops and delete */ }; /* Body for ofp12_stats_request/reply of type OFPST_EXPERIMENTER. */ struct ofp12_experimenter_stats_header { ovs_be32 experimenter; /* Experimenter ID which takes the same form as in struct ofp_experimenter_header. */ ovs_be32 exp_type; /* Experimenter defined. */ /* Experimenter-defined arbitrary additional data. */ }; OFP_ASSERT(sizeof(struct ofp12_experimenter_stats_header) == 8); /* Role request and reply message. */ struct ofp12_role_request { ovs_be32 role; /* One of OFPCR12_ROLE_*. */ uint8_t pad[4]; /* Align to 64 bits. */ ovs_be64 generation_id; /* Master Election Generation Id */ }; OFP_ASSERT(sizeof(struct ofp12_role_request) == 16); /* Controller roles. */ enum ofp12_controller_role { OFPCR12_ROLE_NOCHANGE, /* Don't change current role. */ OFPCR12_ROLE_EQUAL, /* Default role, full access. */ OFPCR12_ROLE_MASTER, /* Full access, at most one master. */ OFPCR12_ROLE_SLAVE, /* Read-only access. */ }; /* Packet received on port (datapath -> controller). */ struct ofp12_packet_in { ovs_be32 buffer_id; /* ID assigned by datapath. */ ovs_be16 total_len; /* Full length of frame. */ uint8_t reason; /* Reason packet is being sent (one of OFPR_*) */ uint8_t table_id; /* ID of the table that was looked up */ /* Followed by: * - Match * - Exactly 2 all-zero padding bytes, then * - An Ethernet frame whose length is inferred from header.length. * The padding bytes preceding the Ethernet frame ensure that the IP * header (if any) following the Ethernet header is 32-bit aligned. */ /* struct ofp12_match match; */ /* uint8_t pad[2]; Align to 64 bit + 16 bit */ /* uint8_t data[0]; Ethernet frame */ }; OFP_ASSERT(sizeof(struct ofp12_packet_in) == 8); /* Flow removed (datapath -> controller). */ struct ofp12_flow_removed { ovs_be64 cookie; /* Opaque controller-issued identifier. */ ovs_be16 priority; /* Priority level of flow entry. */ uint8_t reason; /* One of OFPRR_*. */ uint8_t table_id; /* ID of the table */ ovs_be32 duration_sec; /* Time flow was alive in seconds. */ ovs_be32 duration_nsec; /* Time flow was alive in nanoseconds beyond duration_sec. */ ovs_be16 idle_timeout; /* Idle timeout from original flow mod. */ ovs_be16 hard_timeout; /* Hard timeout from original flow mod. */ ovs_be64 packet_count; ovs_be64 byte_count; /* struct ofp12_match match; Description of fields. Variable size. */ }; OFP_ASSERT(sizeof(struct ofp12_flow_removed) == 40); #endif /* openflow/openflow-1.2.h */ openvswitch-2.0.1+git20140120/include/openflow/openflow-1.3.h000066400000000000000000000472001226605124000232320ustar00rootroot00000000000000/* Copyright (c) 2008 The Board of Trustees of The Leland Stanford * Junior University * Copyright (c) 2011, 2012 Open Networking Foundation * * We are making the OpenFlow specification and associated documentation * (Software) available for public use and benefit with the expectation * that others will use, modify and enhance the Software and contribute * those enhancements back to the community. However, since we would * like to make the Software available for broadest use, with as few * restrictions as possible permission is hereby granted, free of * charge, to any person obtaining a copy of this Software to deal in * the Software under the copyrights without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * The name and trademarks of copyright holder(s) may NOT be used in * advertising or publicity pertaining to the Software or any * derivatives without specific, written prior permission. */ /* OpenFlow: protocol between controller and datapath. */ #ifndef OPENFLOW_13_H #define OPENFLOW_13_H 1 #include "openflow/openflow-1.2.h" /* * OpenFlow 1.3 modifies the syntax of the following message types: * * OFPT_FEATURES_REPLY = 6 (opf13_switch_features) * - new field: auxiliary_id * - removed: ofp_ports at the end * * OFPT_PACKET_IN = 10 (ofp13_packet_in) new field: cookie * * OpenFlow 1.3 adds following new message types: * * * Asynchronous message configuration. * * OFPT13_GET_ASYNC_REQUEST = 26 (void) * OFPT13_GET_ASYNC_REPLY = 27 (ofp13_async_config) * OFPT13_SET_ASYNC = 28 (ofp13_async_config) * * * Meters and rate limiters configuration messages. * * OFPT13_METER_MOD = 29 (ofp13_meter_mod) * * OpenFlow 1.3 modifies the syntax of the following statistics message types * (now called multipart message types): * * OFPMP13_FLOW_REPLY = 1 (struct ofp13_flow_stats[]) * OFPMP13_TABLE_REPLY = 3 (struct ofp13_table_stats[]) * OFPMP13_PORT_REPLY = 4 (struct ofp13_port_stats[]) * OFPMP13_QUEUE_REPLY = 5, (struct ofp13_queue_stats[]) * OFPMP13_GROUP_REPLY = 6, (struct ofp13_group_stats[]) * * OpenFlow 1.3 adds the following multipart message types * * Meter statistics: * OFPMP13_METER_REQUEST = 9, (struct ofp13_meter_multipart_request) * OFPMP13_METER_REPLY = 9, (struct ofp13_meter_stats[]) * * Meter configuration: * OFPMP13_METER_CONFIG_REQUEST = 10, (struct ofp13_meter_multipart_request) * OFPMP13_METER_CONFIG_REPLY = 10, (struct ofp13_meter_config[]) * * Meter features: * OFPMP13_METER_FEATURES_REQUEST = 11 (void) * OFPMP13_METER_FEATURES_REPLY = 11 (struct ofp13_meter_features) * * Table features: * OFPMP13_TABLE_FEATURES_REQUEST = 12, (struct ofp13_table_features[]) * OFPMP13_TABLE_FEATURES_REPLY = 12, (struct ofp13_table_features[]) * */ enum ofp13_instruction_type { OFPIT13_METER = 6 /* Apply meter (rate limiter) */ }; #define OFPIT13_ALL (OFPIT11_GOTO_TABLE | OFPIT11_WRITE_METADATA | \ OFPIT11_WRITE_ACTIONS | OFPIT11_APPLY_ACTIONS | \ OFPIT11_CLEAR_ACTIONS | OFPIT13_METER) /* Instruction structure for OFPIT_METER */ struct ofp13_instruction_meter { ovs_be16 type; /* OFPIT13_METER */ ovs_be16 len; /* Length is 8. */ ovs_be32 meter_id; /* Meter instance. */ }; OFP_ASSERT(sizeof(struct ofp13_instruction_meter) == 8); enum ofp13_action_type { OFPAT13_PUSH_PBB = 26, /* Push a new PBB service tag (I-TAG) */ OFPAT13_POP_PBB = 27 /* Pop the outer PBB service tag (I-TAG) */ }; /* enum ofp_config_flags value OFPC_INVALID_TTL_TO_CONTROLLER * is deprecated in OpenFlow 1.3 */ /* Flags to configure the table. Reserved for future use. */ enum ofp13_table_config { OFPTC13_DEPRECATED_MASK = 3 /* Deprecated bits */ }; /* OpenFlow 1.3 specific flags * (struct ofp12_flow_mod, member flags). */ enum ofp13_flow_mod_flags { OFPFF13_NO_PKT_COUNTS = 1 << 3, /* Don't keep track of packet count. */ OFPFF13_NO_BYT_COUNTS = 1 << 4 /* Don't keep track of byte count. */ }; /* Common header for all meter bands */ struct ofp13_meter_band_header { ovs_be16 type; /* One of OFPMBT_*. */ ovs_be16 len; /* Length in bytes of this band. */ ovs_be32 rate; /* Rate for this band. */ ovs_be32 burst_size; /* Size of bursts. */ }; OFP_ASSERT(sizeof(struct ofp13_meter_band_header) == 12); /* Meter configuration. OFPT_METER_MOD. */ struct ofp13_meter_mod { ovs_be16 command; /* One of OFPMC_*. */ ovs_be16 flags; /* Set of OFPMF_*. */ ovs_be32 meter_id; /* Meter instance. */ /* struct ofp13_meter_band_header bands[0]; The bands length is inferred from the length field in the header. */ }; OFP_ASSERT(sizeof(struct ofp13_meter_mod) == 8); /* Meter numbering. Flow meters can use any number up to OFPM_MAX. */ enum ofp13_meter { /* Last usable meter. */ OFPM13_MAX = 0xffff0000, /* Virtual meters. */ OFPM13_SLOWPATH = 0xfffffffd, /* Meter for slow datapath. */ OFPM13_CONTROLLER = 0xfffffffe, /* Meter for controller connection. */ OFPM13_ALL = 0xffffffff, /* Represents all meters for stat requests commands. */ }; /* Meter commands */ enum ofp13_meter_mod_command { OFPMC13_ADD, /* New meter. */ OFPMC13_MODIFY, /* Modify specified meter. */ OFPMC13_DELETE /* Delete specified meter. */ }; /* Meter configuration flags */ enum ofp13_meter_flags { OFPMF13_KBPS = 1 << 0, /* Rate value in kb/s (kilo-bit per second). */ OFPMF13_PKTPS = 1 << 1, /* Rate value in packet/sec. */ OFPMF13_BURST = 1 << 2, /* Do burst size. */ OFPMF13_STATS = 1 << 3 /* Collect statistics. */ }; /* Meter band types */ enum ofp13_meter_band_type { OFPMBT13_DROP = 1, /* Drop packet. */ OFPMBT13_DSCP_REMARK = 2, /* Remark DSCP in the IP header. */ OFPMBT13_EXPERIMENTER = 0xFFFF /* Experimenter meter band. */ }; /* OFPMBT_DROP band - drop packets */ struct ofp13_meter_band_drop { ovs_be16 type; /* OFPMBT_DROP. */ ovs_be16 len; /* Length in bytes of this band. */ ovs_be32 rate; /* Rate for dropping packets. */ ovs_be32 burst_size; /* Size of bursts. */ uint8_t pad[4]; }; OFP_ASSERT(sizeof(struct ofp13_meter_band_drop) == 16); /* OFPMBT_DSCP_REMARK band - Remark DSCP in the IP header */ struct ofp13_meter_band_dscp_remark { ovs_be16 type; /* OFPMBT_DSCP_REMARK. */ ovs_be16 len; /* Length in bytes of this band. */ ovs_be32 rate; /* Rate for remarking packets. */ ovs_be32 burst_size; /* Size of bursts. */ uint8_t prec_level; /* Number of drop precedence level to add. */ uint8_t pad[3]; }; OFP_ASSERT(sizeof(struct ofp13_meter_band_dscp_remark) == 16); /* OFPMBT_EXPERIMENTER band - Write actions in action set */ struct ofp13_meter_band_experimenter { ovs_be16 type; /* OFPMBT_EXPERIMENTER. */ ovs_be16 len; /* Length in bytes of this band. */ ovs_be32 rate; /* Rate for dropping packets. */ ovs_be32 burst_size; /* Size of bursts. */ ovs_be32 experimenter; /* Experimenter ID which takes the same form as in struct ofp_experimenter_header. */ }; OFP_ASSERT(sizeof(struct ofp13_meter_band_experimenter) == 16); /* OF 1.3 adds MORE flag also for requests */ enum ofp13_multipart_request_flags { OFPMPF13_REQ_MORE = 1 << 0 /* More requests to follow. */ }; /* OF 1.3 splits table features off the ofp_table_stats */ /* Body of reply to OFPMP13_TABLE request. */ struct ofp13_table_stats { uint8_t table_id; /* Identifier of table. Lower numbered tables are consulted first. */ uint8_t pad[3]; /* Align to 32-bits. */ ovs_be32 active_count; /* Number of active entries. */ ovs_be64 lookup_count; /* Number of packets looked up in table. */ ovs_be64 matched_count; /* Number of packets that hit table. */ }; OFP_ASSERT(sizeof(struct ofp13_table_stats) == 24); /* Common header for all Table Feature Properties */ struct ofp13_table_feature_prop_header { ovs_be16 type; /* One of OFPTFPT_*. */ ovs_be16 length; /* Length in bytes of this property. */ }; OFP_ASSERT(sizeof(struct ofp13_table_feature_prop_header) == 4); /* Body for ofp_multipart_request of type OFPMP_TABLE_FEATURES./ * Body of reply to OFPMP_TABLE_FEATURES request. */ struct ofp13_table_features { ovs_be16 length; /* Length is padded to 64 bits. */ uint8_t table_id; /* Identifier of table. Lower numbered tables are consulted first. */ uint8_t pad[5]; /* Align to 64-bits. */ char name[OFP_MAX_TABLE_NAME_LEN]; ovs_be64 metadata_match; /* Bits of metadata table can match. */ ovs_be64 metadata_write; /* Bits of metadata table can write. */ ovs_be32 config; /* Bitmap of OFPTC_* values */ ovs_be32 max_entries; /* Max number of entries supported. */ /* Table Feature Property list */ /* struct ofp13_table_feature_prop_header properties[0]; */ }; OFP_ASSERT(sizeof(struct ofp13_table_features) == 64); /* Table Feature property types. * Low order bit cleared indicates a property for a regular Flow Entry. * Low order bit set indicates a property for the Table-Miss Flow Entry. */ enum ofp13_table_feature_prop_type { OFPTFPT13_INSTRUCTIONS = 0, /* Instructions property. */ OFPTFPT13_INSTRUCTIONS_MISS = 1, /* Instructions for table-miss. */ OFPTFPT13_NEXT_TABLES = 2, /* Next Table property. */ OFPTFPT13_NEXT_TABLES_MISS = 3, /* Next Table for table-miss. */ OFPTFPT13_WRITE_ACTIONS = 4, /* Write Actions property. */ OFPTFPT13_WRITE_ACTIONS_MISS = 5, /* Write Actions for table-miss. */ OFPTFPT13_APPLY_ACTIONS = 6, /* Apply Actions property. */ OFPTFPT13_APPLY_ACTIONS_MISS = 7, /* Apply Actions for table-miss. */ OFPTFPT13_MATCH = 8, /* Match property. */ OFPTFPT13_WILDCARDS = 10, /* Wildcards property. */ OFPTFPT13_WRITE_SETFIELD = 12, /* Write Set-Field property. */ OFPTFPT13_WRITE_SETFIELD_MISS = 13, /* Write Set-Field for table-miss. */ OFPTFPT13_APPLY_SETFIELD = 14, /* Apply Set-Field property. */ OFPTFPT13_APPLY_SETFIELD_MISS = 15, /* Apply Set-Field for table-miss. */ OFPTFPT13_EXPERIMENTER = 0xFFFE, /* Experimenter property. */ OFPTFPT13_EXPERIMENTER_MISS = 0xFFFF, /* Experimenter for table-miss. */ }; /* Instructions property */ struct ofp13_table_feature_prop_instructions { ovs_be16 type; /* One of OFPTFPT13_INSTRUCTIONS, OFPTFPT13_INSTRUCTIONS_MISS. */ ovs_be16 length; /* Length in bytes of this property. */ /* Followed by: * - Exactly (length - 4) bytes containing the instruction ids, then * - Exactly (length + 7)/8*8 - (length) (between 0 and 7) * bytes of all-zero bytes */ /* struct ofp11_instruction instruction_ids[0]; List of instructions without any data */ }; OFP_ASSERT(sizeof(struct ofp13_table_feature_prop_instructions) == 4); /* Next Tables property */ struct ofp13_table_feature_prop_next_tables { ovs_be16 type; /* One of OFPTFPT13_NEXT_TABLES, OFPTFPT13_NEXT_TABLES_MISS. */ ovs_be16 length; /* Length in bytes of this property. */ /* Followed by: * - Exactly (length - 4) bytes containing the table_ids, then * - Exactly (length + 7)/8*8 - (length) (between 0 and 7) * bytes of all-zero bytes */ /* uint8_t next_table_ids[0]; */ }; OFP_ASSERT(sizeof(struct ofp13_table_feature_prop_next_tables) == 4); /* Actions property */ struct ofp13_table_feature_prop_actions { ovs_be16 type; /* One of OFPTFPT13_WRITE_ACTIONS, OFPTFPT13_WRITE_ACTIONS_MISS, OFPTFPT13_APPLY_ACTIONS, OFPTFPT13_APPLY_ACTIONS_MISS. */ ovs_be16 length; /* Length in bytes of this property. */ /* Followed by: * - Exactly (length - 4) bytes containing the action_ids, then * - Exactly (length + 7)/8*8 - (length) (between 0 and 7) * bytes of all-zero bytes */ /* struct ofp_action_header action_ids[0]; List of actions without any data */ }; OFP_ASSERT(sizeof(struct ofp13_table_feature_prop_actions) == 4); /* Match, Wildcard or Set-Field property */ struct ofp13_table_feature_prop_oxm { ovs_be16 type; /* One of OFPTFPT13_MATCH, OFPTFPT13_WILDCARDS, OFPTFPT13_WRITE_SETFIELD, OFPTFPT13_WRITE_SETFIELD_MISS, OFPTFPT13_APPLY_SETFIELD, OFPTFPT13_APPLY_SETFIELD_MISS. */ ovs_be16 length; /* Length in bytes of this property. */ /* Followed by: * - Exactly (length - 4) bytes containing the oxm_ids, then * - Exactly (length + 7)/8*8 - (length) (between 0 and 7) * bytes of all-zero bytes */ /* ovs_be32 oxm_ids[0]; Array of OXM headers */ }; OFP_ASSERT(sizeof(struct ofp13_table_feature_prop_oxm) == 4); /* Experimenter table feature property */ struct ofp13_table_feature_prop_experimenter { ovs_be16 type; /* One of OFPTFPT13_EXPERIMENTER, OFPTFPT13_EXPERIMENTER_MISS. */ ovs_be16 length; /* Length in bytes of this property. */ ovs_be32 experimenter; /* Experimenter ID which takes the same form as in struct ofp_experimenter_header. */ ovs_be32 exp_type; /* Experimenter defined. */ /* Followed by: * - Exactly (length - 12) bytes containing the experimenter data, then * - Exactly (length + 7)/8*8 - (length) (between 0 and 7) * bytes of all-zero bytes */ /* ovs_be32 experimenter_data[0]; */ }; OFP_ASSERT(sizeof(struct ofp13_table_feature_prop_experimenter) == 12); /* Body of reply to OFPMP13_PORT request. If a counter is unsupported, set * the field to all ones. */ struct ofp13_port_stats { struct ofp11_port_stats ps; ovs_be32 duration_sec; /* Time port has been alive in seconds. */ ovs_be32 duration_nsec; /* Time port has been alive in nanoseconds beyond duration_sec. */ }; OFP_ASSERT(sizeof(struct ofp13_port_stats) == 112); /* Body of reply to OFPMP13_QUEUE request */ struct ofp13_queue_stats { struct ofp11_queue_stats qs; ovs_be32 duration_sec; /* Time queue has been alive in seconds. */ ovs_be32 duration_nsec; /* Time queue has been alive in nanoseconds beyond duration_sec. */ }; OFP_ASSERT(sizeof(struct ofp13_queue_stats) == 40); /* Body of reply to OFPMP13_GROUP request */ struct ofp13_group_stats { struct ofp11_group_stats gs; ovs_be32 duration_sec; /* NEW: Time group has been alive in seconds. */ ovs_be32 duration_nsec; /* NEW: Time group has been alive in nanoseconds beyond duration_sec. */ /* struct ofp11_bucket_counter bucket_stats[0]; */ }; OFP_ASSERT(sizeof(struct ofp13_group_stats) == 40); /* Body of OFPMP13_METER and OFPMP13_METER_CONFIG requests. */ struct ofp13_meter_multipart_request { ovs_be32 meter_id; /* Meter instance, or OFPM_ALL. */ uint8_t pad[4]; /* Align to 64 bits. */ }; OFP_ASSERT(sizeof(struct ofp13_meter_multipart_request) == 8); /* Statistics for each meter band */ struct ofp13_meter_band_stats { ovs_be64 packet_band_count; /* Number of packets in band. */ ovs_be64 byte_band_count; /* Number of bytes in band. */ }; OFP_ASSERT(sizeof(struct ofp13_meter_band_stats) == 16); /* Body of reply to OFPMP13_METER request. Meter statistics. */ struct ofp13_meter_stats { ovs_be32 meter_id; /* Meter instance. */ ovs_be16 len; /* Length in bytes of this stats. */ uint8_t pad[6]; ovs_be32 flow_count; /* Number of flows bound to meter. */ ovs_be64 packet_in_count; /* Number of packets in input. */ ovs_be64 byte_in_count; /* Number of bytes in input. */ ovs_be32 duration_sec; /* Time meter has been alive in seconds. */ ovs_be32 duration_nsec; /* Time meter has been alive in nanoseconds beyond duration_sec. */ struct ofp13_meter_band_stats band_stats[0]; /* The band_stats length is inferred from the length field. */ }; OFP_ASSERT(sizeof(struct ofp13_meter_stats) == 40); /* Body of reply to OFPMP13_METER_CONFIG request. Meter configuration. */ struct ofp13_meter_config { ovs_be16 length; /* Length of this entry. */ ovs_be16 flags; /* Set of OFPMC_* that apply. */ ovs_be32 meter_id; /* Meter instance. */ /* struct ofp13_meter_band_header bands[0]; The bands length is inferred from the length field. */ }; OFP_ASSERT(sizeof(struct ofp13_meter_config) == 8); /* Body of reply to OFPMP13_METER_FEATURES request. Meter features. */ struct ofp13_meter_features { ovs_be32 max_meter; /* Maximum number of meters. */ ovs_be32 band_types; /* Bitmaps of OFPMBT13_* values supported. */ ovs_be32 capabilities; /* Bitmaps of "ofp13_meter_flags". */ uint8_t max_bands; /* Maximum bands per meters */ uint8_t max_color; /* Maximum color value */ uint8_t pad[2]; }; OFP_ASSERT(sizeof(struct ofp13_meter_features) == 16); /* Asynchronous message configuration. */ /* The body of this is the same as nx_async_config */ /* OFPT_GET_ASYNC_REPLY or OFPT_SET_ASYNC. */ struct ofp13_async_config { ovs_be32 packet_in_mask[2]; /* Bitmasks of OFPR_* values. */ ovs_be32 port_status_mask[2]; /* Bitmasks of OFPPR_* values. */ ovs_be32 flow_removed_mask[2];/* Bitmasks of OFPRR_* values. */ }; OFP_ASSERT(sizeof(struct ofp13_async_config) == 24); /* Packet received on port (datapath -> controller). */ struct ofp13_packet_in { struct ofp12_packet_in pi; ovs_be64 cookie; /* Cookie of the flow entry that was looked up */ /* Followed by: * - Match * - Exactly 2 all-zero padding bytes, then * - An Ethernet frame whose length is inferred from header.length. * The padding bytes preceding the Ethernet frame ensure that the IP * header (if any) following the Ethernet header is 32-bit aligned. */ /* struct ofp12_match match; */ /* uint8_t pad[2]; Align to 64 bit + 16 bit */ /* uint8_t data[0]; Ethernet frame */ }; OFP_ASSERT(sizeof(struct ofp13_packet_in) == 16); #endif /* openflow/openflow-1.3.h */ openvswitch-2.0.1+git20140120/include/openflow/openflow-common.h000066400000000000000000000450341226605124000242240ustar00rootroot00000000000000/* Copyright (c) 2008, 2011, 2012, 2013 The Board of Trustees of The Leland Stanford * Junior University * * We are making the OpenFlow specification and associated documentation * (Software) available for public use and benefit with the expectation * that others will use, modify and enhance the Software and contribute * those enhancements back to the community. However, since we would * like to make the Software available for broadest use, with as few * restrictions as possible permission is hereby granted, free of * charge, to any person obtaining a copy of this Software to deal in * the Software under the copyrights without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * The name and trademarks of copyright holder(s) may NOT be used in * advertising or publicity pertaining to the Software or any * derivatives without specific, written prior permission. */ /* * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OPENFLOW_COMMON_H #define OPENFLOW_COMMON_H 1 #include "openvswitch/types.h" #ifdef SWIG #define OFP_ASSERT(EXPR) /* SWIG can't handle OFP_ASSERT. */ #elif !defined(__cplusplus) /* Build-time assertion for use in a declaration context. */ #define OFP_ASSERT(EXPR) \ extern int (*build_assert(void))[ sizeof(struct { \ unsigned int build_assert_failed : (EXPR) ? 1 : -1; })] #else /* __cplusplus */ #include #define OFP_ASSERT BOOST_STATIC_ASSERT #endif /* __cplusplus */ /* Version number: * Non-experimental versions released: 0x01 0x02 * Experimental versions released: 0x81 -- 0x99 */ /* The most significant bit being set in the version field indicates an * experimental OpenFlow version. */ enum ofp_version { OFP10_VERSION = 0x01, OFP11_VERSION = 0x02, OFP12_VERSION = 0x03, OFP13_VERSION = 0x04 }; /* Vendor (aka experimenter) IDs. * * These are used in various places in OpenFlow to identify an extension * defined by some vendor, as opposed to a standardized part of the core * OpenFlow protocol. * * Vendor IDs whose top 8 bits are 0 hold an Ethernet OUI in their low 24 bits. * The Open Networking Foundation assigns vendor IDs whose top 8 bits are * nonzero. * * A few vendor IDs are special: * * - OF_VENDOR_ID is not a real vendor ID and does not appear in the * OpenFlow protocol itself. It can occasionally be useful within Open * vSwitch to identify a standardized part of OpenFlow. * * - ONF_VENDOR_ID is being used within the ONF "extensibility" working * group to identify extensions being proposed for standardization. */ #define OF_VENDOR_ID 0 #define NX_VENDOR_ID 0x00002320 /* Nicira. */ #define ONF_VENDOR_ID 0x4f4e4600 /* Open Networking Foundation. */ #define OFP_MAX_TABLE_NAME_LEN 32 #define OFP_MAX_PORT_NAME_LEN 16 #define OFP_TCP_PORT 6633 #define OFP_SSL_PORT 6633 #define OFP_ETH_ALEN 6 /* Bytes in an Ethernet address. */ #define OFP_DEFAULT_MISS_SEND_LEN 128 /* Values below this cutoff are 802.3 packets and the two bytes * following MAC addresses are used as a frame length. Otherwise, the * two bytes are used as the Ethernet type. */ #define OFP_DL_TYPE_ETH2_CUTOFF 0x0600 /* Value of dl_type to indicate that the frame does not include an * Ethernet type. */ #define OFP_DL_TYPE_NOT_ETH_TYPE 0x05ff /* Value used in "idle_timeout" and "hard_timeout" to indicate that the entry * is permanent. */ #define OFP_FLOW_PERMANENT 0 /* By default, choose a priority in the middle. */ #define OFP_DEFAULT_PRIORITY 0x8000 /* Header on all OpenFlow packets. */ struct ofp_header { uint8_t version; /* An OpenFlow version number, e.g. OFP10_VERSION. */ uint8_t type; /* One of the OFPT_ constants. */ ovs_be16 length; /* Length including this ofp_header. */ ovs_be32 xid; /* Transaction id associated with this packet. Replies use the same id as was in the request to facilitate pairing. */ }; OFP_ASSERT(sizeof(struct ofp_header) == 8); /* OFPT_ERROR: Error message (datapath -> controller). */ struct ofp_error_msg { ovs_be16 type; ovs_be16 code; uint8_t data[0]; /* Variable-length data. Interpreted based on the type and code. */ }; OFP_ASSERT(sizeof(struct ofp_error_msg) == 4); enum ofp_config_flags { /* Handling of IP fragments. */ OFPC_FRAG_NORMAL = 0, /* No special handling for fragments. */ OFPC_FRAG_DROP = 1, /* Drop fragments. */ OFPC_FRAG_REASM = 2, /* Reassemble (only if OFPC_IP_REASM set). */ OFPC_FRAG_NX_MATCH = 3, /* Make first fragments available for matching. */ OFPC_FRAG_MASK = 3, /* OFPC_INVALID_TTL_TO_CONTROLLER is deprecated in OpenFlow 1.3 */ /* TTL processing - applicable for IP and MPLS packets. */ OFPC_INVALID_TTL_TO_CONTROLLER = 1 << 2, /* Send packets with invalid TTL to the controller. */ }; /* Switch configuration. */ struct ofp_switch_config { ovs_be16 flags; /* OFPC_* flags. */ ovs_be16 miss_send_len; /* Max bytes of new flow that datapath should send to the controller. */ }; OFP_ASSERT(sizeof(struct ofp_switch_config) == 4); /* Common flags to indicate behavior of the physical port. These flags are * used in ofp_port to describe the current configuration. They are used in * the ofp_port_mod message to configure the port's behavior. */ enum ofp_port_config { OFPPC_PORT_DOWN = 1 << 0, /* Port is administratively down. */ OFPPC_NO_RECV = 1 << 2, /* Drop all packets received by port. */ OFPPC_NO_FWD = 1 << 5, /* Drop packets forwarded to port. */ OFPPC_NO_PACKET_IN = 1 << 6 /* Do not send packet-in msgs for port. */ }; /* Common current state of the physical port. These are not configurable from * the controller. */ enum ofp_port_state { OFPPS_LINK_DOWN = 1 << 0, /* No physical link present. */ }; /* Common features of physical ports available in a datapath. */ enum ofp_port_features { OFPPF_10MB_HD = 1 << 0, /* 10 Mb half-duplex rate support. */ OFPPF_10MB_FD = 1 << 1, /* 10 Mb full-duplex rate support. */ OFPPF_100MB_HD = 1 << 2, /* 100 Mb half-duplex rate support. */ OFPPF_100MB_FD = 1 << 3, /* 100 Mb full-duplex rate support. */ OFPPF_1GB_HD = 1 << 4, /* 1 Gb half-duplex rate support. */ OFPPF_1GB_FD = 1 << 5, /* 1 Gb full-duplex rate support. */ OFPPF_10GB_FD = 1 << 6, /* 10 Gb full-duplex rate support. */ }; struct ofp_packet_queue { ovs_be32 queue_id; /* id for the specific queue. */ ovs_be16 len; /* Length in bytes of this queue desc. */ uint8_t pad[2]; /* 64-bit alignment. */ /* struct ofp_queue_prop_header properties[0]; List of properties. */ }; OFP_ASSERT(sizeof(struct ofp_packet_queue) == 8); enum ofp_queue_properties { OFPQT_NONE = 0, /* No property defined for queue (default). */ OFPQT_MIN_RATE, /* Minimum datarate guaranteed. */ /* Other types should be added here * (i.e. max rate, precedence, etc). */ }; /* Common description for a queue. */ struct ofp_queue_prop_header { ovs_be16 property; /* One of OFPQT_. */ ovs_be16 len; /* Length of property, including this header. */ uint8_t pad[4]; /* 64-bit alignemnt. */ }; OFP_ASSERT(sizeof(struct ofp_queue_prop_header) == 8); /* Min-Rate queue property description. */ struct ofp_queue_prop_min_rate { struct ofp_queue_prop_header prop_header; /* prop: OFPQT_MIN, len: 16. */ ovs_be16 rate; /* In 1/10 of a percent; >1000 -> disabled. */ uint8_t pad[6]; /* 64-bit alignment */ }; OFP_ASSERT(sizeof(struct ofp_queue_prop_min_rate) == 16); /* Switch features. */ struct ofp_switch_features { ovs_be64 datapath_id; /* Datapath unique ID. The lower 48-bits are for a MAC address, while the upper 16-bits are implementer-defined. */ ovs_be32 n_buffers; /* Max packets buffered at once. */ uint8_t n_tables; /* Number of tables supported by datapath. */ uint8_t auxiliary_id; /* OF 1.3: Identify auxiliary connections */ uint8_t pad[2]; /* Align to 64-bits. */ /* Features. */ ovs_be32 capabilities; /* OFPC_*, OFPC10_*, OFPC11_*, OFPC12_*. */ ovs_be32 actions; /* Bitmap of supported "ofp_action_type"s. * DEPRECATED in OpenFlow 1.1 */ /* Followed by an array of struct ofp10_phy_port or struct ofp11_port * structures. The number is inferred from header.length. * REMOVED in OpenFlow 1.3 */ }; OFP_ASSERT(sizeof(struct ofp_switch_features) == 24); /* Common capabilities supported by the datapath (struct ofp_switch_features, * member capabilities). */ enum ofp_capabilities { OFPC_FLOW_STATS = 1 << 0, /* Flow statistics. */ OFPC_TABLE_STATS = 1 << 1, /* Table statistics. */ OFPC_PORT_STATS = 1 << 2, /* Port statistics. */ OFPC_IP_REASM = 1 << 5, /* Can reassemble IP fragments. */ OFPC_QUEUE_STATS = 1 << 6, /* Queue statistics. */ OFPC_ARP_MATCH_IP = 1 << 7 /* Match IP addresses in ARP pkts. */ }; /* Why is this packet being sent to the controller? */ enum ofp_packet_in_reason { OFPR_NO_MATCH, /* No matching flow. */ OFPR_ACTION, /* Action explicitly output to controller. */ OFPR_INVALID_TTL /* Packet has invalid TTL. */, OFPR_N_REASONS }; enum ofp_flow_mod_command { OFPFC_ADD, /* New flow. */ OFPFC_MODIFY, /* Modify all matching flows. */ OFPFC_MODIFY_STRICT, /* Modify entry strictly matching wildcards */ OFPFC_DELETE, /* Delete all matching flows. */ OFPFC_DELETE_STRICT /* Strictly match wildcards and priority. */ }; enum ofp_flow_mod_flags { OFPFF_SEND_FLOW_REM = 1 << 0, /* Send flow removed message when flow * expires or is deleted. */ OFPFF_CHECK_OVERLAP = 1 << 1, /* Check for overlapping entries first. */ }; /* Action header for OFPAT10_VENDOR and OFPAT11_EXPERIMEMNTER. * The rest of the body is vendor-defined. */ struct ofp_action_vendor_header { ovs_be16 type; /* OFPAT10_VENDOR. */ ovs_be16 len; /* Length is a multiple of 8. */ ovs_be32 vendor; /* Vendor ID, which takes the same form as in "struct ofp_vendor_header". */ }; OFP_ASSERT(sizeof(struct ofp_action_vendor_header) == 8); /* Action header that is common to all actions. The length includes the * header and any padding used to make the action 64-bit aligned. * NB: The length of an action *must* always be a multiple of eight. */ struct ofp_action_header { ovs_be16 type; /* One of OFPAT10_*. */ ovs_be16 len; /* Length of action, including this header. This is the length of action, including any padding to make it 64-bit aligned. */ uint8_t pad[4]; }; OFP_ASSERT(sizeof(struct ofp_action_header) == 8); /* Action structure for OFPAT10_SET_VLAN_VID and OFPAT11_SET_VLAN_VID. */ struct ofp_action_vlan_vid { ovs_be16 type; /* Type. */ ovs_be16 len; /* Length is 8. */ ovs_be16 vlan_vid; /* VLAN id. */ uint8_t pad[2]; }; OFP_ASSERT(sizeof(struct ofp_action_vlan_vid) == 8); /* Action structure for OFPAT10_SET_VLAN_PCP and OFPAT11_SET_VLAN_PCP. */ struct ofp_action_vlan_pcp { ovs_be16 type; /* Type. */ ovs_be16 len; /* Length is 8. */ uint8_t vlan_pcp; /* VLAN priority. */ uint8_t pad[3]; }; OFP_ASSERT(sizeof(struct ofp_action_vlan_pcp) == 8); /* Action structure for OFPAT10_SET_DL_SRC/DST and OFPAT11_SET_DL_SRC/DST. */ struct ofp_action_dl_addr { ovs_be16 type; /* Type. */ ovs_be16 len; /* Length is 16. */ uint8_t dl_addr[OFP_ETH_ALEN]; /* Ethernet address. */ uint8_t pad[6]; }; OFP_ASSERT(sizeof(struct ofp_action_dl_addr) == 16); /* Action structure for OFPAT10_SET_NW_SRC/DST and OFPAT11_SET_NW_SRC/DST. */ struct ofp_action_nw_addr { ovs_be16 type; /* Type. */ ovs_be16 len; /* Length is 8. */ ovs_be32 nw_addr; /* IP address. */ }; OFP_ASSERT(sizeof(struct ofp_action_nw_addr) == 8); /* Action structure for OFPAT10_SET_NW_TOS and OFPAT11_SET_NW_TOS. */ struct ofp_action_nw_tos { ovs_be16 type; /* Type.. */ ovs_be16 len; /* Length is 8. */ uint8_t nw_tos; /* DSCP in high 6 bits, rest ignored. */ uint8_t pad[3]; }; OFP_ASSERT(sizeof(struct ofp_action_nw_tos) == 8); /* Action structure for OFPAT10_SET_TP_SRC/DST and OFPAT11_SET_TP_SRC/DST. */ struct ofp_action_tp_port { ovs_be16 type; /* Type. */ ovs_be16 len; /* Length is 8. */ ovs_be16 tp_port; /* TCP/UDP port. */ uint8_t pad[2]; }; OFP_ASSERT(sizeof(struct ofp_action_tp_port) == 8); /* Why was this flow removed? */ enum ofp_flow_removed_reason { OFPRR_IDLE_TIMEOUT, /* Flow idle time exceeded idle_timeout. */ OFPRR_HARD_TIMEOUT, /* Time exceeded hard_timeout. */ OFPRR_DELETE, /* Evicted by a DELETE flow mod. */ OFPRR_GROUP_DELETE, /* Group was removed. */ OFPRR_METER_DELETE, /* Meter was removed. */ OFPRR_EVICTION, /* Switch eviction to free resources. */ }; /* What changed about the physical port */ enum ofp_port_reason { OFPPR_ADD, /* The port was added. */ OFPPR_DELETE, /* The port was removed. */ OFPPR_MODIFY /* Some attribute of the port has changed. */ }; /* A physical port has changed in the datapath */ struct ofp_port_status { uint8_t reason; /* One of OFPPR_*. */ uint8_t pad[7]; /* Align to 64-bits. */ /* Followed by struct ofp10_phy_port or struct ofp11_port. */ }; OFP_ASSERT(sizeof(struct ofp_port_status) == 8); enum ofp_stats_reply_flags { OFPSF_REPLY_MORE = 1 << 0 /* More replies to follow. */ }; #define DESC_STR_LEN 256 #define SERIAL_NUM_LEN 32 /* Body of reply to OFPST_DESC request. Each entry is a NULL-terminated ASCII * string. */ struct ofp_desc_stats { char mfr_desc[DESC_STR_LEN]; /* Manufacturer description. */ char hw_desc[DESC_STR_LEN]; /* Hardware description. */ char sw_desc[DESC_STR_LEN]; /* Software description. */ char serial_num[SERIAL_NUM_LEN]; /* Serial number. */ char dp_desc[DESC_STR_LEN]; /* Human readable description of the datapath. */ }; OFP_ASSERT(sizeof(struct ofp_desc_stats) == 1056); /* Reply to OFPST_AGGREGATE request. */ struct ofp_aggregate_stats_reply { ovs_32aligned_be64 packet_count; /* Number of packets in flows. */ ovs_32aligned_be64 byte_count; /* Number of bytes in flows. */ ovs_be32 flow_count; /* Number of flows. */ uint8_t pad[4]; /* Align to 64 bits. */ }; OFP_ASSERT(sizeof(struct ofp_aggregate_stats_reply) == 24); /* The match type indicates the match structure (set of fields that compose the * match) in use. The match type is placed in the type field at the beginning * of all match structures. The "OpenFlow Extensible Match" type corresponds * to OXM TLV format described below and must be supported by all OpenFlow * switches. Extensions that define other match types may be published on the * ONF wiki. Support for extensions is optional. */ enum ofp_match_type { OFPMT_STANDARD = 0, /* The match fields defined in the ofp11_match structure apply */ OFPMT_OXM = 1, /* OpenFlow Extensible Match */ }; /* Group numbering. Groups can use any number up to OFPG_MAX. */ enum ofp_group { /* Last usable group number. */ OFPG_MAX = 0xffffff00, /* Fake groups. */ OFPG_ALL = 0xfffffffc, /* All groups, for group delete commands. */ OFPG_ANY = 0xffffffff /* Wildcard, for flow stats requests. */ }; enum ofp_hello_elem_type { OFPHET_VERSIONBITMAP = 1, /* Bitmap of version supported. */ }; /* Common header for all Hello Elements */ struct ofp_hello_elem_header { ovs_be16 type; /* One of OFPHET_*. */ ovs_be16 length; /* Length in bytes of this element. */ }; OFP_ASSERT(sizeof(struct ofp_hello_elem_header) == 4); /* Vendor extension. */ struct ofp_vendor_header { struct ofp_header header; /* Type OFPT_VENDOR or OFPT_EXPERIMENTER. */ ovs_be32 vendor; /* Vendor ID: * - MSB 0: low-order bytes are IEEE OUI. * - MSB != 0: defined by OpenFlow * consortium. */ /* Vendor-defined arbitrary additional data. */ }; OFP_ASSERT(sizeof(struct ofp_vendor_header) == 12); #endif /* openflow/openflow-common.h */ openvswitch-2.0.1+git20140120/include/openflow/openflow.h000066400000000000000000000015331226605124000227320ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OPENFLOW_OPENFLOW_H #define OPENFLOW_OPENFLOW_H 1 #include "openflow/openflow-1.0.h" #include "openflow/openflow-1.1.h" #include "openflow/openflow-1.2.h" #include "openflow/openflow-1.3.h" #endif /* openflow/openflow.h */ openvswitch-2.0.1+git20140120/include/openvswitch/000077500000000000000000000000001226605124000214465ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/include/openvswitch/automake.mk000066400000000000000000000000621226605124000236030ustar00rootroot00000000000000noinst_HEADERS += \ include/openvswitch/types.h openvswitch-2.0.1+git20140120/include/openvswitch/types.h000066400000000000000000000065241226605124000227720ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OPENVSWITCH_TYPES_H #define OPENVSWITCH_TYPES_H 1 #include #include #include #ifdef __CHECKER__ #define OVS_BITWISE __attribute__((bitwise)) #define OVS_FORCE __attribute__((force)) #else #define OVS_BITWISE #define OVS_FORCE #endif /* The ovs_be types indicate that an object is in big-endian, not * native-endian, byte order. They are otherwise equivalent to uint_t. * * We bootstrap these from the Linux __be types. If we instead define our * own independently then __be and ovs_be become mutually * incompatible. */ typedef __be16 ovs_be16; typedef __be32 ovs_be32; typedef __be64 ovs_be64; /* These types help with a few funny situations: * * - The Ethernet header is 14 bytes long, which misaligns everything after * that. One can put 2 "shim" bytes before the Ethernet header, but this * helps only if there is exactly one Ethernet header. If there are two, * as with GRE and VXLAN (and if the inner header doesn't use this * trick--GRE and VXLAN don't) then you have the choice of aligning the * inner data or the outer data. So it seems better to treat 32-bit fields * in protocol headers as aligned only on 16-bit boundaries. * * - ARP headers contain misaligned 32-bit fields. * * - Netlink and OpenFlow contain 64-bit values that are only guaranteed to * be aligned on 32-bit boundaries. * * lib/unaligned.h has helper functions for accessing these. */ /* A 32-bit value, in host byte order, that is only aligned on a 16-bit * boundary. */ typedef struct { #ifdef WORDS_BIGENDIAN uint16_t hi, lo; #else uint16_t lo, hi; #endif } ovs_16aligned_u32; /* A 32-bit value, in network byte order, that is only aligned on a 16-bit * boundary. */ typedef struct { ovs_be16 hi, lo; } ovs_16aligned_be32; /* A 64-bit value, in host byte order, that is only aligned on a 32-bit * boundary. */ typedef struct { #ifdef WORDS_BIGENDIAN uint32_t hi, lo; #else uint32_t lo, hi; #endif } ovs_32aligned_u64; /* A 64-bit value, in network byte order, that is only aligned on a 32-bit * boundary. */ typedef struct { ovs_be32 hi, lo; } ovs_32aligned_be64; /* ofp_port_t represents the port number of a OpenFlow switch. * odp_port_t represents the port number on the datapath. * ofp11_port_t represents the OpenFlow-1.1 port number. */ typedef uint16_t OVS_BITWISE ofp_port_t; typedef uint32_t OVS_BITWISE odp_port_t; typedef uint32_t OVS_BITWISE ofp11_port_t; /* Macro functions that cast int types to ofp/odp/ofp11 types. */ #define OFP_PORT_C(X) ((OVS_FORCE ofp_port_t) (X)) #define ODP_PORT_C(X) ((OVS_FORCE odp_port_t) (X)) #define OFP11_PORT_C(X) ((OVS_FORCE ofp11_port_t) (X)) #endif /* openvswitch/types.h */ openvswitch-2.0.1+git20140120/include/sparse/000077500000000000000000000000001226605124000203725ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/include/sparse/arpa/000077500000000000000000000000001226605124000213155ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/include/sparse/arpa/inet.h000066400000000000000000000013361226605124000224300ustar00rootroot00000000000000/* * Copyright (c) 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __CHECKER__ #error "Use this header only with sparse. It is not a correct implementation." #endif #include openvswitch-2.0.1+git20140120/include/sparse/assert.h000066400000000000000000000014331226605124000220450ustar00rootroot00000000000000/* * Copyright (c) 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __CHECKER__ #error "Use this header only with sparse. It is not a correct implementation." #endif extern void __ovs_assert(_Bool); #define assert(EXPRESSION) __ovs_assert(EXPRESSION) openvswitch-2.0.1+git20140120/include/sparse/automake.mk000066400000000000000000000004631226605124000225340ustar00rootroot00000000000000noinst_HEADERS += \ include/sparse/arpa/inet.h \ include/sparse/assert.h \ include/sparse/math.h \ include/sparse/netinet/in.h \ include/sparse/netinet/ip6.h \ include/sparse/pthread.h \ include/sparse/sys/socket.h \ include/sparse/sys/wait.h openvswitch-2.0.1+git20140120/include/sparse/math.h000066400000000000000000000131321226605124000214740ustar00rootroot00000000000000/* * Copyright (c) 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __CHECKER__ #error "Use this header only with sparse. It is not a correct implementation." #endif #ifndef __SYS_MATH_SPARSE #define __SYS_MATH_SPARSE 1 double acos(double); float acosf(float); double acosh(double); float acoshf(float); long double acoshl(long double); long double acosl(long double); double asin(double); float asinf(float); double asinh(double); float asinhf(float); long double asinhl(long double); long double asinl(long double); double atan(double); double atan2(double, double); float atan2f(float, float); long double atan2l(long double, long double); float atanf(float); double atanh(double); float atanhf(float); long double atanhl(long double); long double atanl(long double); double cbrt(double); float cbrtf(float); long double cbrtl(long double); double ceil(double); float ceilf(float); long double ceill(long double); double copysign(double, double); float copysignf(float, float); long double copysignl(long double, long double); double cos(double); float cosf(float); double cosh(double); float coshf(float); long double coshl(long double); long double cosl(long double); double erf(double); double erfc(double); float erfcf(float); long double erfcl(long double); float erff(float); long double erfl(long double); double exp(double); double exp2(double); float exp2f(float); long double exp2l(long double); float expf(float); long double expl(long double); double expm1(double); float expm1f(float); long double expm1l(long double); double fabs(double); float fabsf(float); long double fabsl(long double); double fdim(double, double); float fdimf(float, float); long double fdiml(long double, long double); double floor(double); float floorf(float); long double floorl(long double); double fma(double, double, double); float fmaf(float, float, float); long double fmal(long double, long double, long double); double fmax(double, double); float fmaxf(float, float); long double fmaxl(long double, long double); double fmin(double, double); float fminf(float, float); long double fminl(long double, long double); double fmod(double, double); float fmodf(float, float); long double fmodl(long double, long double); double frexp(double, int *); float frexpf(float value, int *); long double frexpl(long double value, int *); double hypot(double, double); float hypotf(float, float); long double hypotl(long double, long double); int ilogb(double); int ilogbf(float); int ilogbl(long double); double j0(double); double j1(double); double jn(int, double); double ldexp(double, int); float ldexpf(float, int); long double ldexpl(long double, int); long long llrint(double); long long llrintf(float); long long llrintl(long double); long long llround(double); long long llroundf(float); long long llroundl(long double); double log(double); double log10(double); float log10f(float); long double log10l(long double); double log1p(double); float log1pf(float); long double log1pl(long double); double log2(double); float log2f(float); long double log2l(long double); double logb(double); float logbf(float); long double logbl(long double); float logf(float); long double logl(long double); long lrint(double); long lrintf(float); long lrintl(long double); long lround(double); long lroundf(float); long lroundl(long double); double modf(double, double *); float modff(float, float *); long double modfl(long double, long double *); double nan(const char *); float nanf(const char *); long double nanl(const char *); double nearbyint(double); float nearbyintf(float); long double nearbyintl(long double); double nextafter(double, double); float nextafterf(float, float); long double nextafterl(long double, long double); double nexttoward(double, long double); float nexttowardf(float, long double); long double nexttowardl(long double, long double); double pow(double, double); float powf(float, float); long double powl(long double, long double); double remainder(double, double); float remainderf(float, float); long double remainderl(long double, long double); double remquo(double, double, int *); float remquof(float, float, int *); long double remquol(long double, long double, int *); double rint(double); float rintf(float); long double rintl(long double); double round(double); float roundf(float); long double roundl(long double); double scalb(double, double); double scalbln(double, long); float scalblnf(float, long); long double scalblnl(long double, long); double scalbn(double, int); float scalbnf(float, int); long double scalbnl(long double, int); double sin(double); float sinf(float); double sinh(double); float sinhf(float); long double sinhl(long double); long double sinl(long double); double sqrt(double); float sqrtf(float); long double sqrtl(long double); double tan(double); float tanf(float); double tanh(double); float tanhf(float); long double tanhl(long double); long double tanl(long double); double tgamma(double); float tgammaf(float); long double tgammal(long double); double trunc(double); float truncf(float); long double truncl(long double); double y0(double); double y1(double); double yn(int, double); #endif /* for sparse */ openvswitch-2.0.1+git20140120/include/sparse/netinet/000077500000000000000000000000001226605124000220405ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/include/sparse/netinet/in.h000066400000000000000000000052131226605124000226200ustar00rootroot00000000000000/* * Copyright (c) 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __CHECKER__ #error "Use this header only with sparse. It is not a correct implementation." #endif #ifndef __NETINET_IN_SPARSE #define __NETINET_IN_SPARSE 1 #include "openvswitch/types.h" #include #include typedef ovs_be16 in_port_t; typedef ovs_be32 in_addr_t; struct in_addr { in_addr_t s_addr; }; struct sockaddr_in { sa_family_t sin_family; in_port_t sin_port; struct in_addr sin_addr; }; struct in6_addr { union { uint8_t u_s6_addr[16]; } u; }; #define s6_addr u.u_s6_addr extern const struct in6_addr in6addr_any; #define IPPROTO_IP 0 #define IPPROTO_HOPOPTS 0 #define IPPROTO_ICMP 1 #define IPPROTO_TCP 6 #define IPPROTO_UDP 17 #define IPPROTO_ROUTING 43 #define IPPROTO_FRAGMENT 44 #define IPPROTO_AH 51 #define IPPROTO_ICMPV6 58 #define IPPROTO_NONE 59 #define IPPROTO_DSTOPTS 60 #define IPPROTO_SCTP 132 /* All the IP options documented in Linux ip(7). */ #define IP_ADD_MEMBERSHIP 0 #define IP_DROP_MEMBERSHIP 1 #define IP_HDRINCL 2 #define IP_MTU 3 #define IP_MTU_DISCOVER 4 #define IP_MULTICAST_IF 5 #define IP_MULTICAST_LOOP 6 #define IP_MULTICAST_TTL 7 #define IP_NODEFRAG 8 #define IP_OPTIONS 9 #define IP_PKTINFO 10 #define IP_RECVERR 11 #define IP_RECVOPTS 12 #define IP_RECVTOS 13 #define IP_RECVTTL 14 #define IP_RETOPTS 15 #define IP_ROUTER_ALERT 16 #define IP_TOS 17 #define IP_TTL 18 #define INADDR_ANY 0x00000000 #define INADDR_BROADCAST 0xffffffff #define INADDR_NONE 0xffffffff #define INET6_ADDRSTRLEN 46 static inline ovs_be32 htonl(uint32_t x) { return (OVS_FORCE ovs_be32) x; } static inline ovs_be16 htons(uint16_t x) { return (OVS_FORCE ovs_be16) x; } static inline uint32_t ntohl(ovs_be32 x) { return (OVS_FORCE uint32_t) x; } static inline uint16_t ntohs(ovs_be16 x) { return (OVS_FORCE uint16_t) x; } in_addr_t inet_addr(const char *); int inet_aton (const char *, struct in_addr *); const char *inet_ntop(int, const void *, char *, socklen_t); int inet_pton(int, const char *, void *); #endif /* sparse */ openvswitch-2.0.1+git20140120/include/sparse/netinet/ip6.h000066400000000000000000000033771226605124000227210ustar00rootroot00000000000000/* * Copyright (c) 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __CHECKER__ #error "Use this header only with sparse. It is not a correct implementation." #endif #ifndef __NETINET_IP6_SPARSE #define __NETINET_IP6_SPARSE 1 #include struct ip6_hdr { union { struct ip6_hdrctl { ovs_be32 ip6_un1_flow; ovs_be16 ip6_un1_plen; uint8_t ip6_un1_nxt; uint8_t ip6_un1_hlim; } ip6_un1; uint8_t ip6_un2_vfc; } ip6_ctlun; struct in6_addr ip6_src; struct in6_addr ip6_dst; }; #define ip6_vfc ip6_ctlun.ip6_un2_vfc #define ip6_flow ip6_ctlun.ip6_un1.ip6_un1_flow #define ip6_plen ip6_ctlun.ip6_un1.ip6_un1_plen #define ip6_nxt ip6_ctlun.ip6_un1.ip6_un1_nxt #define ip6_hlim ip6_ctlun.ip6_un1.ip6_un1_hlim #define ip6_hops ip6_ctlun.ip6_un1.ip6_un1_hlim struct ip6_rthdr { uint8_t ip6r_nxt; uint8_t ip6r_len; uint8_t ip6r_type; uint8_t ip6r_segleft; }; struct ip6_ext { uint8_t ip6e_nxt; uint8_t ip6e_len; }; struct ip6_frag { uint8_t ip6f_nxt; uint8_t ip6f_reserved; ovs_be16 ip6f_offlg; ovs_be32 ip6f_ident; }; #define IP6F_OFF_MASK ((OVS_FORCE ovs_be16) 0xfff8) #endif /* netinet/ip6.h sparse */ openvswitch-2.0.1+git20140120/include/sparse/pthread.h000066400000000000000000000022761226605124000222010ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __CHECKER__ #error "Use this header only with sparse. It is not a correct implementation." #endif /* Get actual definitions for us to annotate and build on. */ #include_next /* Sparse complains about the proper PTHREAD_*_INITIALIZER definitions. * Luckily, it's not a real compiler so we can overwrite it with something * simple. */ #undef PTHREAD_MUTEX_INITIALIZER #define PTHREAD_MUTEX_INITIALIZER {} #undef PTHREAD_RWLOCK_INITIALIZER #define PTHREAD_RWLOCK_INITIALIZER {} #undef PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP #define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP {} openvswitch-2.0.1+git20140120/include/sparse/sys/000077500000000000000000000000001226605124000212105ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/include/sparse/sys/socket.h000066400000000000000000000076111226605124000226560ustar00rootroot00000000000000/* * Copyright (c) 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __CHECKER__ #error "Use this header only with sparse. It is not a correct implementation." #endif #ifndef __SYS_SOCKET_SPARSE #define __SYS_SOCKET_SPARSE 1 #include "openvswitch/types.h" #include typedef unsigned short int sa_family_t; typedef __socklen_t socklen_t; struct sockaddr { sa_family_t sa_family; char sa_data[64]; }; struct sockaddr_storage { sa_family_t ss_family; char sa_data[64]; }; struct msghdr { void *msg_name; socklen_t msg_namelen; struct iovec *msg_iov; int msg_iovlen; void *msg_control; socklen_t msg_controllen; int msg_flags; }; struct cmsghdr { size_t cmsg_len; int cmsg_level; int cmsg_type; unsigned char cmsg_data[]; }; #define __CMSG_ALIGNTO sizeof(size_t) #define CMSG_ALIGN(LEN) \ (((LEN) + __CMSG_ALIGNTO - 1) / __CMSG_ALIGNTO * __CMSG_ALIGNTO) #define CMSG_DATA(CMSG) ((CMSG)->cmsg_data) #define CMSG_LEN(LEN) (sizeof(struct cmsghdr) + (LEN)) #define CMSG_SPACE(LEN) CMSG_ALIGN(CMSG_LEN(LEN)) #define CMSG_FIRSTHDR(MSG) \ ((MSG)->msg_controllen ? (struct cmsghdr *) (MSG)->msg_control : NULL) #define CMSG_NXTHDR(MSG, CMSG) __cmsg_nxthdr(MSG, CMSG) static inline struct cmsghdr * __cmsg_nxthdr(struct msghdr *msg, struct cmsghdr *cmsg) { size_t ofs = (char *) cmsg - (char *) msg->msg_control; size_t next_ofs = ofs + CMSG_ALIGN(cmsg->cmsg_len); return (next_ofs < msg->msg_controllen ? (void *) ((char *) msg->msg_control + next_ofs) : NULL); } enum { SCM_RIGHTS = 1 }; enum { SOCK_DGRAM, SOCK_RAW, SOCK_SEQPACKET, SOCK_STREAM }; enum { SOL_SOCKET }; enum { SO_ACCEPTCONN, SO_BROADCAST, SO_DEBUG, SO_DONTROUTE, SO_ERROR, SO_KEEPALIVE, SO_LINGER, SO_OOBINLINE, SO_RCVBUF, SO_RCVLOWAT, SO_RCVTIMEO, SO_REUSEADDR, SO_SNDBUF, SO_SNDLOWAT, SO_SNDTIMEO, SO_TYPE, SO_RCVBUFFORCE, SO_ATTACH_FILTER }; enum { MSG_CTRUNC, MSG_DONTROUTE, MSG_EOR, MSG_OOB, MSG_NOSIGNAL, MSG_PEEK, MSG_TRUNC, MSG_WAITALL, MSG_DONTWAIT }; enum { AF_UNSPEC, PF_UNSPEC = AF_UNSPEC, AF_INET, PF_INET = AF_INET, AF_INET6, PF_INET6 = AF_INET6, AF_UNIX, PF_UNIX = AF_UNIX, AF_NETLINK, PF_NETLINK = AF_NETLINK, AF_PACKET, PF_PACKET = AF_PACKET }; enum { SHUT_RD, SHUT_RDWR, SHUT_WR }; int accept(int, struct sockaddr *, socklen_t *); int bind(int, const struct sockaddr *, socklen_t); int connect(int, const struct sockaddr *, socklen_t); int getpeername(int, struct sockaddr *, socklen_t *); int getsockname(int, struct sockaddr *, socklen_t *); int getsockopt(int, int, int, void *, socklen_t *); int listen(int, int); ssize_t recv(int, void *, size_t, int); ssize_t recvfrom(int, void *, size_t, int, struct sockaddr *, socklen_t *); ssize_t recvmsg(int, struct msghdr *, int); ssize_t send(int, const void *, size_t, int); ssize_t sendmsg(int, const struct msghdr *, int); ssize_t sendto(int, const void *, size_t, int, const struct sockaddr *, socklen_t); int setsockopt(int, int, int, const void *, socklen_t); int shutdown(int, int); int sockatmark(int); int socket(int, int, int); int socketpair(int, int, int, int[2]); #endif /* for sparse */ openvswitch-2.0.1+git20140120/include/sparse/sys/wait.h000066400000000000000000000017421226605124000223310ustar00rootroot00000000000000/* * Copyright (c) 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef __CHECKER__ #error "Use this header only with sparse. It is not a correct implementation." #endif #ifndef __SYS_WAIT_SPARSE #define __SYS_WAIT_SPARSE 1 #include_next #undef wait #define wait(a) rpl_wait(a) pid_t rpl_wait(int *); #undef waitpid #define waitpid(a, b, c) rpl_waitpid(a, b, c) pid_t rpl_waitpid(pid_t, int *, int); #endif /* for sparse */ openvswitch-2.0.1+git20140120/lib/000077500000000000000000000000001226605124000162205ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/lib/.gitignore000066400000000000000000000002201226605124000202020ustar00rootroot00000000000000/Makefile /Makefile.in /dhparams.c /dirs.c /coverage-counters.c /ofp-errors.inc /ofp-msgs.inc /vswitch-idl.c /vswitch-idl.h /vswitch-idl.ovsidl openvswitch-2.0.1+git20140120/lib/aes128.c000066400000000000000000001211601226605124000173700ustar00rootroot00000000000000/* * Copyright (c) 2009, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Based on rijndael.txt by Philip J. Erdelsky, downloaded from * http://www.efgh.com/software/rijndael.htm on September 24, 2009. The * license information there is: "Public domain; no restrictions on use." * The Apache license above applies only to Nicira's modifications to the * original code. */ #include #include "aes128.h" #include "util.h" static const uint32_t Te0[256] = { 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U, 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU, 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU, 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U, 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU, 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU, 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU, 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU, 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU, 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U, 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU, 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU, 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U, 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU, 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU, 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU, 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU, 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU, 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U, 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU, 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU, 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU, 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU, 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U, 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U, 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U, 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U, 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU, 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U, 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U, 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU, 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU, 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U, 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U, 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U, 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU, 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U, 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU, 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U, 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU, 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U, 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U, 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU, 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U, 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U, 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U, 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U, 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U, 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U, 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U, 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U, 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU, 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U, 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U, 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U, 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U, 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U, 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U, 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU, 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U, 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U, 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U, 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU, }; static const uint32_t Te1[256] = { 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU, 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U, 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU, 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U, 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU, 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U, 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU, 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U, 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U, 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU, 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U, 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U, 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U, 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU, 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U, 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U, 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU, 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U, 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U, 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U, 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU, 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU, 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U, 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU, 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU, 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U, 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU, 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U, 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU, 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U, 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U, 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U, 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU, 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U, 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU, 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U, 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU, 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U, 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U, 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU, 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU, 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU, 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U, 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U, 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU, 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U, 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU, 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U, 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU, 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U, 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU, 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU, 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U, 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU, 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U, 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU, 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U, 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U, 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U, 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU, 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU, 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U, 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU, 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U, }; static const uint32_t Te2[256] = { 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU, 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U, 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU, 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U, 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU, 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U, 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU, 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U, 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U, 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU, 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U, 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U, 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U, 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU, 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U, 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U, 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU, 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U, 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U, 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U, 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU, 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU, 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U, 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU, 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU, 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U, 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU, 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U, 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU, 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U, 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U, 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U, 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU, 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U, 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU, 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U, 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU, 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U, 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U, 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU, 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU, 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU, 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U, 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U, 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU, 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U, 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU, 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U, 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU, 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U, 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU, 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU, 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U, 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU, 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U, 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU, 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U, 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U, 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U, 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU, 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU, 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U, 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU, 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U, }; static const uint32_t Te3[256] = { 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U, 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U, 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U, 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU, 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU, 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU, 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U, 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU, 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU, 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U, 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U, 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU, 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU, 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU, 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU, 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU, 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U, 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU, 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU, 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U, 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U, 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U, 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U, 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U, 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU, 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U, 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU, 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU, 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U, 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U, 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U, 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU, 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U, 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU, 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU, 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U, 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U, 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU, 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U, 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU, 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U, 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U, 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U, 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U, 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU, 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U, 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU, 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U, 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU, 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U, 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU, 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU, 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU, 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU, 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U, 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U, 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U, 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U, 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U, 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U, 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU, 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U, 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU, 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU, }; static const uint32_t Te4[256] = { 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU, 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U, 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU, 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U, 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU, 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U, 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU, 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U, 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U, 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU, 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U, 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U, 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U, 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU, 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U, 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U, 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU, 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U, 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U, 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U, 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU, 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU, 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U, 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU, 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU, 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U, 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU, 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U, 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU, 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U, 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U, 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U, 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU, 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U, 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU, 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U, 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU, 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U, 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U, 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU, 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU, 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU, 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U, 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U, 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU, 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U, 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU, 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U, 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU, 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U, 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU, 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU, 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U, 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU, 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U, 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU, 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U, 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U, 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U, 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU, 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU, 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U, 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU, 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U, }; static const uint32_t Td0[256] = { 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU, 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U, 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U, 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU, 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U, 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU, 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U, 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U, 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U, 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U, 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU, 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U, 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU, 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U, 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU, 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U, 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U, 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U, 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU, 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U, 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU, 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U, 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU, 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U, 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU, 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU, 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U, 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU, 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U, 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU, 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U, 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U, 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U, 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU, 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U, 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U, 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU, 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U, 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U, 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U, 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U, 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U, 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU, 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U, 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U, 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U, 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U, 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U, 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU, 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU, 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU, 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU, 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U, 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U, 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU, 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU, 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U, 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU, 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U, 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U, 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U, }; static const uint32_t Td1[256] = { 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU, 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U, 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU, 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U, 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U, 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U, 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U, 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U, 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U, 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU, 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU, 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU, 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U, 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU, 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U, 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U, 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U, 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU, 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU, 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U, 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU, 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U, 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU, 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU, 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U, 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U, 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U, 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU, 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U, 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU, 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U, 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U, 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U, 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU, 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U, 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U, 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U, 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U, 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U, 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U, 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU, 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU, 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U, 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU, 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U, 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU, 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU, 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U, 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU, 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U, 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U, 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U, 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U, 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U, 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U, 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U, 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU, 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U, 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U, 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU, 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U, 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U, 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U, 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U, }; static const uint32_t Td2[256] = { 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U, 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U, 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U, 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U, 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU, 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U, 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U, 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U, 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U, 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU, 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U, 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U, 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU, 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U, 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U, 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U, 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U, 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U, 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U, 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU, 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U, 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U, 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U, 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U, 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U, 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU, 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU, 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U, 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU, 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U, 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU, 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU, 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU, 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU, 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U, 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U, 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U, 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U, 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U, 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U, 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U, 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU, 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU, 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U, 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U, 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU, 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU, 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U, 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U, 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U, 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U, 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U, 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U, 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U, 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU, 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U, 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U, 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U, 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U, 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U, 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U, 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU, 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U, 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U, }; static const uint32_t Td3[256] = { 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU, 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU, 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U, 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U, 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU, 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU, 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U, 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU, 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U, 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU, 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U, 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U, 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U, 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U, 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U, 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU, 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU, 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U, 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U, 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU, 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU, 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U, 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U, 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U, 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U, 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU, 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U, 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U, 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU, 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU, 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U, 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U, 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U, 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU, 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U, 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U, 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U, 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U, 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U, 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U, 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U, 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU, 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U, 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U, 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU, 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU, 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U, 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU, 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U, 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U, 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U, 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U, 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U, 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U, 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU, 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU, 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU, 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU, 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U, 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U, 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U, 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU, 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U, 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U, }; static const uint32_t Td4[256] = { 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U, 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U, 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU, 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU, 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U, 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U, 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U, 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU, 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U, 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU, 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU, 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU, 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U, 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U, 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U, 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U, 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U, 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U, 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU, 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U, 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U, 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU, 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U, 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U, 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U, 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU, 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U, 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U, 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU, 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U, 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U, 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU, 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U, 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU, 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU, 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U, 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U, 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U, 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U, 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU, 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U, 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U, 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU, 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU, 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU, 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U, 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU, 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U, 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U, 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U, 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U, 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU, 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U, 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU, 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU, 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU, 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU, 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U, 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU, 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U, 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU, 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U, 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U, 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU, }; static const uint32_t rcon[] = { 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000, 0x1B000000, 0x36000000, }; static uint32_t get_u32(const uint8_t *p) { uint32_t p0 = p[0]; uint32_t p1 = p[1]; uint32_t p2 = p[2]; uint32_t p3 = p[3]; return (p0 << 24) | (p1 << 16) | (p2 << 8) | p3; } static void put_u32(uint8_t *p, uint32_t x) { p[0] = x >> 24; p[1] = x >> 16; p[2] = x >> 8; p[3] = x; } /* Expands 128-bit 'key' into the encryption key 'schedule'. */ void aes128_schedule(struct aes128 *aes, const uint8_t key[16]) { uint32_t *rk = aes->rk; int i; rk[0] = get_u32(key); rk[1] = get_u32(key + 4); rk[2] = get_u32(key + 8); rk[3] = get_u32(key + 12); for (i = 0; i < 10; i++, rk += 4) { uint32_t temp = rk[3]; rk[4] = (rk[0] ^ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^ (Te4[(temp ) & 0xff] & 0x0000ff00) ^ (Te4[(temp >> 24) ] & 0x000000ff) ^ rcon[i]); rk[5] = rk[1] ^ rk[4]; rk[6] = rk[2] ^ rk[5]; rk[7] = rk[3] ^ rk[6]; } ovs_assert(rk == &aes->rk[40]); } void aes128_encrypt(const struct aes128 *aes, const void *input_, void *output_) { const uint8_t *input = input_; uint8_t *output = output_; const uint32_t *rk = aes->rk; uint32_t s0, s1, s2, s3; uint32_t t0, t1, t2, t3; int r; /* Map byte array block to cipher state and add initial round key. */ s0 = get_u32(input ) ^ rk[0]; s1 = get_u32(input + 4) ^ rk[1]; s2 = get_u32(input + 8) ^ rk[2]; s3 = get_u32(input + 12) ^ rk[3]; /* 10 full rounds. */ r = 10 / 2; for (;;) { t0 = (Te0[(s0 >> 24) ] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[(s3 ) & 0xff] ^ rk[4]); t1 = (Te0[(s1 >> 24) ] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[(s0 ) & 0xff] ^ rk[5]); t2 = (Te0[(s2 >> 24) ] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[(s1 ) & 0xff] ^ rk[6]); t3 = (Te0[(s3 >> 24) ] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[(s2 ) & 0xff] ^ rk[7]); rk += 8; if (--r == 0) { break; } s0 = (Te0[(t0 >> 24) ] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[(t3 ) & 0xff] ^ rk[0]); s1 = (Te0[(t1 >> 24) ] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[(t0 ) & 0xff] ^ rk[1]); s2 = (Te0[(t2 >> 24) ] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[(t1 ) & 0xff] ^ rk[2]); s3 = (Te0[(t3 >> 24) ] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[(t2 ) & 0xff] ^ rk[3]); } /* Apply last round and map cipher state to byte array block. */ s0 = ((Te4[(t0 >> 24) ] & 0xff000000) ^ (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^ (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^ (Te4[(t3 ) & 0xff] & 0x000000ff) ^ rk[0]); put_u32(output , s0); s1 = ((Te4[(t1 >> 24) ] & 0xff000000) ^ (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^ (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^ (Te4[(t0 ) & 0xff] & 0x000000ff) ^ rk[1]); put_u32(output + 4, s1); s2 = ((Te4[(t2 >> 24) ] & 0xff000000) ^ (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^ (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^ (Te4[(t1 ) & 0xff] & 0x000000ff) ^ rk[2]); put_u32(output + 8, s2); s3 = ((Te4[(t3 >> 24) ] & 0xff000000) ^ (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^ (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^ (Te4[(t2 ) & 0xff] & 0x000000ff) ^ rk[3]); put_u32(output + 12, s3); } openvswitch-2.0.1+git20140120/lib/aes128.h000066400000000000000000000022221226605124000173720ustar00rootroot00000000000000/* * Copyright (c) 2009 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Based on rijndael.txt by Philip J. Erdelsky, downloaded from * http://www.efgh.com/software/rijndael.htm on September 24, 2009. The * license information there is: "Public domain; no restrictions on use." * The Apache license above applies only to Nicira's modifications to the * original code. */ #ifndef AES128_H #define AES128_H #include struct aes128 { uint32_t rk[128/8 + 28]; }; void aes128_schedule(struct aes128 *, const uint8_t key[16]); void aes128_encrypt(const struct aes128 *, const void *, void *); #endif /* aes128.h */ openvswitch-2.0.1+git20140120/lib/async-append-aio.c000066400000000000000000000077341226605124000215270ustar00rootroot00000000000000/* Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include /* This implementation of the async-append.h interface uses the POSIX * asynchronous I/O interface. */ #include "async-append.h" #include #include #include #include #include "byteq.h" #include "ovs-thread.h" #include "util.h" /* Maximum number of bytes of buffered data. */ enum { BUFFER_SIZE = 65536 }; /* Maximum number of aiocbs to use. * * aiocbs are big (144 bytes with glibc 2.11 on i386) so we try to allow for a * reasonable number by basing the number we allocate on the amount of buffer * space. */ enum { MAX_CBS = ROUND_DOWN_POW2(BUFFER_SIZE / sizeof(struct aiocb)) }; BUILD_ASSERT_DECL(IS_POW2(MAX_CBS)); struct async_append { int fd; struct aiocb *aiocbs; unsigned int aiocb_head, aiocb_tail; uint8_t *buffer; struct byteq byteq; }; struct async_append * async_append_create(int fd) { struct async_append *ap; ap = xmalloc(sizeof *ap); ap->fd = fd; ap->aiocbs = xmalloc(MAX_CBS * sizeof *ap->aiocbs); ap->aiocb_head = ap->aiocb_tail = 0; ap->buffer = xmalloc(BUFFER_SIZE); byteq_init(&ap->byteq, ap->buffer, BUFFER_SIZE); return ap; } void async_append_destroy(struct async_append *ap) { if (ap) { async_append_flush(ap); free(ap->aiocbs); free(ap->buffer); free(ap); } } static bool async_append_is_full(const struct async_append *ap) { return (ap->aiocb_head - ap->aiocb_tail >= MAX_CBS || byteq_is_full(&ap->byteq)); } static bool async_append_is_empty(const struct async_append *ap) { return byteq_is_empty(&ap->byteq); } static void async_append_wait(struct async_append *ap) { int n = 0; while (!async_append_is_empty(ap)) { struct aiocb *aiocb = &ap->aiocbs[ap->aiocb_tail & (MAX_CBS - 1)]; int error = aio_error(aiocb); if (error == EINPROGRESS) { const struct aiocb *p = aiocb; if (n > 0) { return; } aio_suspend(&p, 1, NULL); } else { ignore(aio_return(aiocb)); ap->aiocb_tail++; byteq_advance_tail(&ap->byteq, aiocb->aio_nbytes); n++; } } } void async_append_write(struct async_append *ap, const void *data_, size_t size) { const uint8_t *data = data_; while (size > 0) { struct aiocb *aiocb; size_t chunk_size; void *chunk; while (async_append_is_full(ap)) { async_append_wait(ap); } chunk = byteq_head(&ap->byteq); chunk_size = byteq_headroom(&ap->byteq); if (chunk_size > size) { chunk_size = size; } memcpy(chunk, data, chunk_size); aiocb = &ap->aiocbs[ap->aiocb_head & (MAX_CBS - 1)]; memset(aiocb, 0, sizeof *aiocb); aiocb->aio_fildes = ap->fd; aiocb->aio_offset = 0; aiocb->aio_buf = chunk; aiocb->aio_nbytes = chunk_size; aiocb->aio_sigevent.sigev_notify = SIGEV_NONE; if (aio_write(aiocb) == -1) { async_append_flush(ap); ignore(write(ap->fd, data, size)); return; } data += chunk_size; size -= chunk_size; byteq_advance_head(&ap->byteq, chunk_size); ap->aiocb_head++; } } void async_append_flush(struct async_append *ap) { while (!async_append_is_empty(ap)) { async_append_wait(ap); } } openvswitch-2.0.1+git20140120/lib/async-append-null.c000066400000000000000000000023331226605124000217170ustar00rootroot00000000000000/* Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include /* This is a null implementation of the asynchronous I/O interface for systems * that don't have a form of asynchronous I/O. */ #include "async-append.h" #include #include #include "util.h" struct async_append * async_append_create(int fd OVS_UNUSED) { return NULL; } void async_append_destroy(struct async_append *ap) { ovs_assert(ap == NULL); } void async_append_write(struct async_append *ap OVS_UNUSED, const void *data OVS_UNUSED, size_t size OVS_UNUSED) { NOT_REACHED(); } void async_append_flush(struct async_append *ap OVS_UNUSED) { NOT_REACHED(); } openvswitch-2.0.1+git20140120/lib/async-append.h000066400000000000000000000040361226605124000207560ustar00rootroot00000000000000/* Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ASYNC_APPEND_H #define ASYNC_APPEND_H 1 #include /* This module defines a simple, abstract interface to asynchronous file I/O. * It is currently used only for logging. Thus, for now the interface only * supports appending to a file. Multiple implementations are possible * depending on the operating system's degree and form of support for * asynchronous I/O. * * The comments below document the requirements on any implementation. * * Thread-safety * ============= * * Only a single thread may use a given 'struct async_append' at one time. */ /* Creates and returns a new asynchronous appender for file descriptor 'fd', * which the caller must have opened in append mode (O_APPEND). If the system * is for some reason unable to support asynchronous I/O on 'fd' this function * may return NULL. */ struct async_append *async_append_create(int fd); /* Destroys 'ap', without closing its underlying file descriptor. */ void async_append_destroy(struct async_append *ap); /* Appends the 'size' bytes of 'data' to 'ap', asynchronously if possible. */ void async_append_write(struct async_append *ap, const void *data, size_t size); /* Blocks until all data asynchronously written to 'ap' with * async_append_write() has been committed to the point that it will be written * to disk barring an operating system or hardware failure. */ void async_append_flush(struct async_append *ap); #endif /* async-append.h */ openvswitch-2.0.1+git20140120/lib/automake.mk000066400000000000000000000215611226605124000203640ustar00rootroot00000000000000# Copyright (C) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. This file is offered as-is, # without warranty of any kind. noinst_LIBRARIES += lib/libopenvswitch.a lib_libopenvswitch_a_SOURCES = \ lib/aes128.c \ lib/aes128.h \ lib/async-append.h \ lib/backtrace.c \ lib/backtrace.h \ lib/bfd.c \ lib/bfd.h \ lib/bitmap.c \ lib/bitmap.h \ lib/bond.c \ lib/bond.h \ lib/bundle.c \ lib/bundle.h \ lib/byte-order.h \ lib/byteq.c \ lib/byteq.h \ lib/cfm.c \ lib/cfm.h \ lib/classifier.c \ lib/classifier.h \ lib/command-line.c \ lib/command-line.h \ lib/compiler.h \ lib/coverage.c \ lib/coverage.h \ lib/crc32c.c \ lib/crc32c.h \ lib/csum.c \ lib/csum.h \ lib/daemon.c \ lib/daemon.h \ lib/dhcp.h \ lib/dummy.c \ lib/dummy.h \ lib/dhparams.h \ lib/dirs.h \ lib/dpif-netdev.c \ lib/dpif-provider.h \ lib/dpif.c \ lib/dpif.h \ lib/heap.c \ lib/heap.h \ lib/dynamic-string.c \ lib/dynamic-string.h \ lib/entropy.c \ lib/entropy.h \ lib/fatal-signal.c \ lib/fatal-signal.h \ lib/flow.c \ lib/flow.h \ lib/guarded-list.c \ lib/guarded-list.h \ lib/hash.c \ lib/hash.h \ lib/hindex.c \ lib/hindex.h \ lib/hmap.c \ lib/hmap.h \ lib/hmapx.c \ lib/hmapx.h \ lib/jhash.c \ lib/jhash.h \ lib/json.c \ lib/json.h \ lib/jsonrpc.c \ lib/jsonrpc.h \ lib/lacp.c \ lib/lacp.h \ lib/latch.c \ lib/latch.h \ lib/learn.c \ lib/learn.h \ lib/learning-switch.c \ lib/learning-switch.h \ lib/list.c \ lib/list.h \ lib/lockfile.c \ lib/lockfile.h \ lib/mac-learning.c \ lib/mac-learning.h \ lib/match.c \ lib/match.h \ lib/memory.c \ lib/memory.h \ lib/meta-flow.c \ lib/meta-flow.h \ lib/multipath.c \ lib/multipath.h \ lib/netdev-dummy.c \ lib/netdev-provider.h \ lib/netdev-vport.c \ lib/netdev-vport.h \ lib/netdev.c \ lib/netdev.h \ lib/netflow.h \ lib/netlink.c \ lib/netlink.h \ lib/nx-match.c \ lib/nx-match.h \ lib/odp-execute.c \ lib/odp-execute.h \ lib/odp-util.c \ lib/odp-util.h \ lib/ofp-actions.c \ lib/ofp-actions.h \ lib/ofp-errors.c \ lib/ofp-errors.h \ lib/ofp-msgs.c \ lib/ofp-msgs.h \ lib/ofp-parse.c \ lib/ofp-parse.h \ lib/ofp-print.c \ lib/ofp-print.h \ lib/ofp-util.c \ lib/ofp-util.def \ lib/ofp-util.h \ lib/ofp-version-opt.h \ lib/ofp-version-opt.c \ lib/ofpbuf.c \ lib/ofpbuf.h \ lib/ovs-atomic-c11.h \ lib/ovs-atomic-clang.h \ lib/ovs-atomic-flag-gcc4.7+.h \ lib/ovs-atomic-gcc4+.c \ lib/ovs-atomic-gcc4+.h \ lib/ovs-atomic-gcc4.7+.h \ lib/ovs-atomic-pthreads.c \ lib/ovs-atomic-pthreads.h \ lib/ovs-atomic.h \ lib/ovs-thread.c \ lib/ovs-thread.h \ lib/ovsdb-data.c \ lib/ovsdb-data.h \ lib/ovsdb-error.c \ lib/ovsdb-error.h \ lib/ovsdb-idl-provider.h \ lib/ovsdb-idl.c \ lib/ovsdb-idl.h \ lib/ovsdb-parser.c \ lib/ovsdb-parser.h \ lib/ovsdb-types.c \ lib/ovsdb-types.h \ lib/packets.c \ lib/packets.h \ lib/pcap-file.c \ lib/pcap-file.h \ lib/poll-loop.c \ lib/poll-loop.h \ lib/process.c \ lib/process.h \ lib/random.c \ lib/random.h \ lib/rconn.c \ lib/rconn.h \ lib/reconnect.c \ lib/reconnect.h \ lib/sat-math.h \ lib/seq.c \ lib/seq.h \ lib/sha1.c \ lib/sha1.h \ lib/shash.c \ lib/shash.h \ lib/simap.c \ lib/simap.h \ lib/signals.c \ lib/signals.h \ lib/smap.c \ lib/smap.h \ lib/socket-util.c \ lib/socket-util.h \ lib/sort.c \ lib/sort.h \ lib/sset.c \ lib/sset.h \ lib/stp.c \ lib/stp.h \ lib/stream-fd.c \ lib/stream-fd.h \ lib/stream-provider.h \ lib/stream-ssl.h \ lib/stream-tcp.c \ lib/stream-unix.c \ lib/stream.c \ lib/stream.h \ lib/string.c \ lib/string.h \ lib/svec.c \ lib/svec.h \ lib/table.c \ lib/table.h \ lib/timer.c \ lib/timer.h \ lib/timeval.c \ lib/timeval.h \ lib/token-bucket.c \ lib/token-bucket.h \ lib/type-props.h \ lib/unaligned.h \ lib/unicode.c \ lib/unicode.h \ lib/unixctl.c \ lib/unixctl.h \ lib/util.c \ lib/util.h \ lib/uuid.c \ lib/uuid.h \ lib/valgrind.h \ lib/vconn-provider.h \ lib/vconn-stream.c \ lib/vconn.c \ lib/vconn.h \ lib/vlan-bitmap.c \ lib/vlan-bitmap.h \ lib/vlandev.c \ lib/vlandev.h \ lib/vlog.c \ lib/vlog.h \ lib/vswitch-idl.c \ lib/vswitch-idl.h nodist_lib_libopenvswitch_a_SOURCES = \ lib/dirs.c CLEANFILES += $(nodist_lib_libopenvswitch_a_SOURCES) noinst_LIBRARIES += lib/libsflow.a lib_libsflow_a_SOURCES = \ lib/sflow_api.h \ lib/sflow.h \ lib/sflow_agent.c \ lib/sflow_sampler.c \ lib/sflow_poller.c \ lib/sflow_receiver.c lib_libsflow_a_CFLAGS = $(AM_CFLAGS) if HAVE_WNO_UNUSED lib_libsflow_a_CFLAGS += -Wno-unused endif if HAVE_WNO_UNUSED_PARAMETER lib_libsflow_a_CFLAGS += -Wno-unused-parameter endif if LINUX_DATAPATH lib_libopenvswitch_a_SOURCES += \ lib/dpif-linux.c \ lib/dpif-linux.h \ lib/netdev-linux.c \ lib/netdev-linux.h \ lib/netlink-notifier.c \ lib/netlink-notifier.h \ lib/netlink-protocol.h \ lib/netlink-socket.c \ lib/netlink-socket.h \ lib/rtnetlink-link.c \ lib/rtnetlink-link.h \ lib/route-table.c \ lib/route-table.h endif if HAVE_POSIX_AIO lib_libopenvswitch_a_SOURCES += lib/async-append-aio.c else lib_libopenvswitch_a_SOURCES += lib/async-append-null.c endif if ESX lib_libopenvswitch_a_SOURCES += \ lib/route-table-stub.c endif if HAVE_IF_DL lib_libopenvswitch_a_SOURCES += \ lib/netdev-bsd.c \ lib/rtbsd.c \ lib/rtbsd.h \ lib/route-table-bsd.c endif if HAVE_OPENSSL lib_libopenvswitch_a_SOURCES += lib/stream-ssl.c nodist_lib_libopenvswitch_a_SOURCES += lib/dhparams.c lib/dhparams.c: lib/dh1024.pem lib/dh2048.pem lib/dh4096.pem (echo '#include "lib/dhparams.h"' && \ openssl dhparam -C -in $(srcdir)/lib/dh1024.pem -noout && \ openssl dhparam -C -in $(srcdir)/lib/dh2048.pem -noout && \ openssl dhparam -C -in $(srcdir)/lib/dh4096.pem -noout) \ | sed 's/\(get_dh[0-9]*\)()/\1(void)/' > lib/dhparams.c.tmp mv lib/dhparams.c.tmp lib/dhparams.c else lib_libopenvswitch_a_SOURCES += lib/stream-nossl.c endif EXTRA_DIST += \ lib/dh1024.pem \ lib/dh2048.pem \ lib/dh4096.pem \ lib/dirs.c.in MAN_FRAGMENTS += \ lib/common.man \ lib/common-syn.man \ lib/coverage-unixctl.man \ lib/daemon.man \ lib/daemon-syn.man \ lib/memory-unixctl.man \ lib/ofp-version.man \ lib/ovs.tmac \ lib/ssl-bootstrap.man \ lib/ssl-bootstrap-syn.man \ lib/ssl-peer-ca-cert.man \ lib/ssl.man \ lib/ssl-syn.man \ lib/table.man \ lib/unixctl.man \ lib/unixctl-syn.man \ lib/vconn-active.man \ lib/vconn-passive.man \ lib/vlog-unixctl.man \ lib/vlog-syn.man \ lib/vlog.man # vswitch IDL OVSIDL_BUILT += \ $(srcdir)/lib/vswitch-idl.c \ $(srcdir)/lib/vswitch-idl.h \ $(srcdir)/lib/vswitch-idl.ovsidl EXTRA_DIST += $(srcdir)/lib/vswitch-idl.ann VSWITCH_IDL_FILES = \ $(srcdir)/vswitchd/vswitch.ovsschema \ $(srcdir)/lib/vswitch-idl.ann $(srcdir)/lib/vswitch-idl.ovsidl: $(VSWITCH_IDL_FILES) $(OVSDB_IDLC) annotate $(VSWITCH_IDL_FILES) > $@.tmp mv $@.tmp $@ lib/dirs.c: lib/dirs.c.in Makefile ($(ro_c) && sed < $(srcdir)/lib/dirs.c.in \ -e 's,[@]srcdir[@],$(srcdir),g' \ -e 's,[@]LOGDIR[@],"$(LOGDIR)",g' \ -e 's,[@]RUNDIR[@],"$(RUNDIR)",g' \ -e 's,[@]DBDIR[@],"$(DBDIR)",g' \ -e 's,[@]bindir[@],"$(bindir)",g' \ -e 's,[@]sysconfdir[@],"$(sysconfdir)",g' \ -e 's,[@]pkgdatadir[@],"$(pkgdatadir)",g') \ > lib/dirs.c.tmp mv lib/dirs.c.tmp lib/dirs.c $(srcdir)/lib/ofp-errors.inc: \ lib/ofp-errors.h include/openflow/openflow-common.h \ $(srcdir)/build-aux/extract-ofp-errors $(run_python) $(srcdir)/build-aux/extract-ofp-errors \ $(srcdir)/lib/ofp-errors.h \ $(srcdir)/include/openflow/openflow-common.h > $@.tmp mv $@.tmp $@ $(srcdir)/lib/ofp-errors.c: $(srcdir)/lib/ofp-errors.inc EXTRA_DIST += build-aux/extract-ofp-errors lib/ofp-errors.inc $(srcdir)/lib/ofp-msgs.inc: \ lib/ofp-msgs.h $(srcdir)/build-aux/extract-ofp-msgs $(run_python) $(srcdir)/build-aux/extract-ofp-msgs \ $(srcdir)/lib/ofp-msgs.h $@ > $@.tmp && mv $@.tmp $@ $(srcdir)/lib/ofp-msgs.c: $(srcdir)/lib/ofp-msgs.inc EXTRA_DIST += build-aux/extract-ofp-msgs lib/ofp-msgs.inc INSTALL_DATA_LOCAL += lib-install-data-local lib-install-data-local: $(MKDIR_P) $(DESTDIR)$(RUNDIR) $(MKDIR_P) $(DESTDIR)$(PKIDIR) $(MKDIR_P) $(DESTDIR)$(LOGDIR) $(MKDIR_P) $(DESTDIR)$(DBDIR) if !USE_LINKER_SECTIONS # All distributed sources, with names adjust properly for referencing # from $(builddir). all_sources = \ `for file in $(DIST_SOURCES); do \ if test -f $$file; then \ echo $$file; \ else \ echo $(VPATH)/$$file; \ fi; \ done` lib/coverage.$(OBJEXT): lib/coverage.def lib/coverage.def: $(DIST_SOURCES) sed -n 's|^COVERAGE_DEFINE(\([_a-zA-Z0-9]\{1,\}\)).*$$|COVERAGE_COUNTER(\1)|p' $(all_sources) | LC_ALL=C sort -u > $@ CLEANFILES += lib/coverage.def lib/vlog.$(OBJEXT): lib/vlog-modules.def lib/vlog-modules.def: $(DIST_SOURCES) sed -n 's|^VLOG_DEFINE_\(THIS_\)\{0,1\}MODULE(\([_a-zA-Z0-9]\{1,\}\)).*$$|VLOG_MODULE(\2)|p' $(all_sources) | LC_ALL=C sort -u > $@ CLEANFILES += lib/vlog-modules.def endif openvswitch-2.0.1+git20140120/lib/backtrace.c000066400000000000000000000020501226605124000203000ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "backtrace.h" #ifdef HAVE_BACKTRACE #include void backtrace_capture(struct backtrace *b) { void *frames[BACKTRACE_MAX_FRAMES]; int i; b->n_frames = backtrace(frames, BACKTRACE_MAX_FRAMES); for (i = 0; i < b->n_frames; i++) { b->frames[i] = (uintptr_t) frames[i]; } } #else void backtrace_capture(struct backtrace *backtrace) { backtrace->n_frames = 0; } #endif openvswitch-2.0.1+git20140120/lib/backtrace.h000066400000000000000000000015261226605124000203140ustar00rootroot00000000000000/* * Copyright (c) 2009 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef BACKTRACE_H #define BACKTRACE_H 1 #include #define BACKTRACE_MAX_FRAMES 31 struct backtrace { int n_frames; uintptr_t frames[BACKTRACE_MAX_FRAMES]; }; void backtrace_capture(struct backtrace *); #endif /* backtrace.h */ openvswitch-2.0.1+git20140120/lib/bfd.c000066400000000000000000001175471226605124000171360ustar00rootroot00000000000000/* Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "bfd.h" #include #include #include #include #include "byte-order.h" #include "csum.h" #include "dpif.h" #include "dynamic-string.h" #include "flow.h" #include "hash.h" #include "hmap.h" #include "list.h" #include "netdev.h" #include "netlink.h" #include "odp-util.h" #include "ofpbuf.h" #include "ovs-thread.h" #include "openvswitch/types.h" #include "packets.h" #include "poll-loop.h" #include "random.h" #include "smap.h" #include "timeval.h" #include "unaligned.h" #include "unixctl.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(bfd); /* XXX Finish BFD. * * The goal of this module is to replace CFM with something both more flexible * and standards compliant. In service of this goal, the following needs to be * done. * * - Compliance * * Implement Demand mode. * * Go through the RFC line by line and verify we comply. * * Test against a hardware implementation. Preferably a popular one. * * Delete BFD packets with nw_ttl != 255 in the datapath to prevent DOS * attacks. * * - Unit tests. * * - Set TOS/PCP on the outer tunnel header when encapped. * * - Sending BFD messages should be in its own thread/process. * * - Scale testing. How does it operate when there are large number of bfd * sessions? Do we ever have random flaps? What's the CPU utilization? * * - Rely on data traffic for liveness by using BFD demand mode. * If we're receiving traffic on a port, we can safely assume it's up (modulo * unidrectional failures). BFD has a demand mode in which it can stay quiet * unless it feels the need to check the status of the port. Using this, we * can implement a strategy in which BFD only sends control messages on dark * interfaces. * * - Depending on how one interprets the spec, it appears that a BFD session * can never change bfd.LocalDiag to "No Diagnostic". We should verify that * this is what hardware implementations actually do. Seems like "No * Diagnostic" should be set once a BFD session state goes UP. */ #define BFD_VERSION 1 enum flags { FLAG_MULTIPOINT = 1 << 0, FLAG_DEMAND = 1 << 1, FLAG_AUTH = 1 << 2, FLAG_CTL = 1 << 3, FLAG_FINAL = 1 << 4, FLAG_POLL = 1 << 5 }; enum state { STATE_ADMIN_DOWN = 0 << 6, STATE_DOWN = 1 << 6, STATE_INIT = 2 << 6, STATE_UP = 3 << 6 }; enum diag { DIAG_NONE = 0, /* No Diagnostic. */ DIAG_EXPIRED = 1, /* Control Detection Time Expired. */ DIAG_ECHO_FAILED = 2, /* Echo Function Failed. */ DIAG_RMT_DOWN = 3, /* Neighbor Signaled Session Down. */ DIAG_FWD_RESET = 4, /* Forwarding Plane Reset. */ DIAG_PATH_DOWN = 5, /* Path Down. */ DIAG_CPATH_DOWN = 6, /* Concatenated Path Down. */ DIAG_ADMIN_DOWN = 7, /* Administratively Down. */ DIAG_RCPATH_DOWN = 8 /* Reverse Concatenated Path Down. */ }; /* RFC 5880 Section 4.1 * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |Vers | Diag |Sta|P|F|C|A|D|M| Detect Mult | Length | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | My Discriminator | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Your Discriminator | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Desired Min TX Interval | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Required Min RX Interval | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Required Min Echo RX Interval | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ struct msg { uint8_t vers_diag; /* Version and diagnostic. */ uint8_t flags; /* 2bit State field followed by flags. */ uint8_t mult; /* Fault detection multiplier. */ uint8_t length; /* Length of this BFD message. */ ovs_be32 my_disc; /* My discriminator. */ ovs_be32 your_disc; /* Your discriminator. */ ovs_be32 min_tx; /* Desired minimum tx interval. */ ovs_be32 min_rx; /* Required minimum rx interval. */ ovs_be32 min_rx_echo; /* Required minimum echo rx interval. */ }; BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct msg)); #define DIAG_MASK 0x1f #define VERS_SHIFT 5 #define STATE_MASK 0xC0 #define FLAGS_MASK 0x3f struct bfd { struct hmap_node node; /* In 'all_bfds'. */ uint32_t disc; /* bfd.LocalDiscr. Key in 'all_bfds' hmap. */ char *name; /* Name used for logging. */ bool cpath_down; /* Concatenated Path Down. */ uint8_t mult; /* bfd.DetectMult. */ struct netdev *netdev; uint64_t rx_packets; /* Packets received by 'netdev'. */ enum state state; /* bfd.SessionState. */ enum state rmt_state; /* bfd.RemoteSessionState. */ enum diag diag; /* bfd.LocalDiag. */ enum diag rmt_diag; /* Remote diagnostic. */ enum flags flags; /* Flags sent on messages. */ enum flags rmt_flags; /* Flags last received. */ uint32_t rmt_disc; /* bfd.RemoteDiscr. */ uint8_t eth_dst[ETH_ADDR_LEN];/* Ethernet destination address. */ bool eth_dst_set; /* 'eth_dst' set through database. */ uint16_t udp_src; /* UDP source port. */ /* All timers in milliseconds. */ long long int rmt_min_rx; /* bfd.RemoteMinRxInterval. */ long long int rmt_min_tx; /* Remote minimum TX interval. */ long long int cfg_min_tx; /* Configured minimum TX rate. */ long long int cfg_min_rx; /* Configured required minimum RX rate. */ long long int poll_min_tx; /* Min TX negotating in a poll sequence. */ long long int poll_min_rx; /* Min RX negotating in a poll sequence. */ long long int min_tx; /* bfd.DesiredMinTxInterval. */ long long int min_rx; /* bfd.RequiredMinRxInterval. */ long long int last_tx; /* Last TX time. */ long long int next_tx; /* Next TX time. */ long long int detect_time; /* RFC 5880 6.8.4 Detection time. */ int forwarding_override; /* Manual override of 'forwarding' status. */ atomic_bool check_tnl_key; /* Verify tunnel key of inbound packets? */ atomic_int ref_cnt; /* When forward_if_rx is true, bfd_forwarding() will return * true as long as there are incoming packets received. * Note, forwarding_override still has higher priority. */ bool forwarding_if_rx; long long int forwarding_if_rx_detect_time; /* BFD decay related variables. */ bool in_decay; /* True when bfd is in decay. */ int decay_min_rx; /* min_rx is set to decay_min_rx when */ /* in decay. */ int decay_rx_ctl; /* Count bfd packets received within decay */ /* detect interval. */ uint64_t decay_rx_packets; /* Packets received by 'netdev'. */ long long int decay_detect_time; /* Decay detection time. */ }; static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; static struct hmap all_bfds__ = HMAP_INITIALIZER(&all_bfds__); static struct hmap *const all_bfds OVS_GUARDED_BY(mutex) = &all_bfds__; static bool bfd_forwarding__(const struct bfd *) OVS_REQUIRES(mutex); static bool bfd_in_poll(const struct bfd *) OVS_REQUIRES(mutex); static void bfd_poll(struct bfd *bfd) OVS_REQUIRES(mutex); static const char *bfd_diag_str(enum diag) OVS_REQUIRES(mutex); static const char *bfd_state_str(enum state) OVS_REQUIRES(mutex); static long long int bfd_min_tx(const struct bfd *) OVS_REQUIRES(mutex); static long long int bfd_tx_interval(const struct bfd *) OVS_REQUIRES(mutex); static long long int bfd_rx_interval(const struct bfd *) OVS_REQUIRES(mutex); static void bfd_set_next_tx(struct bfd *) OVS_REQUIRES(mutex); static void bfd_set_state(struct bfd *, enum state, enum diag) OVS_REQUIRES(mutex); static uint32_t generate_discriminator(void) OVS_REQUIRES(mutex); static void bfd_put_details(struct ds *, const struct bfd *) OVS_REQUIRES(mutex); static uint64_t bfd_rx_packets(const struct bfd *) OVS_REQUIRES(mutex); static void bfd_try_decay(struct bfd *) OVS_REQUIRES(mutex); static void bfd_decay_update(struct bfd *) OVS_REQUIRES(mutex); static void bfd_check_rx(struct bfd *) OVS_REQUIRES(mutex); static void bfd_forwarding_if_rx_update(struct bfd *) OVS_REQUIRES(mutex); static void bfd_unixctl_show(struct unixctl_conn *, int argc, const char *argv[], void *aux OVS_UNUSED); static void bfd_unixctl_set_forwarding_override(struct unixctl_conn *, int argc, const char *argv[], void *aux OVS_UNUSED); static void log_msg(enum vlog_level, const struct msg *, const char *message, const struct bfd *) OVS_REQUIRES(mutex); static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 20); /* Returns true if the interface on which 'bfd' is running may be used to * forward traffic according to the BFD session state. */ bool bfd_forwarding(const struct bfd *bfd) OVS_EXCLUDED(mutex) { bool ret; ovs_mutex_lock(&mutex); ret = bfd_forwarding__(bfd); ovs_mutex_unlock(&mutex); return ret; } /* Returns a 'smap' of key value pairs representing the status of 'bfd' * intended for the OVS database. */ void bfd_get_status(const struct bfd *bfd, struct smap *smap) OVS_EXCLUDED(mutex) { ovs_mutex_lock(&mutex); smap_add(smap, "forwarding", bfd_forwarding__(bfd)? "true" : "false"); smap_add(smap, "state", bfd_state_str(bfd->state)); smap_add(smap, "diagnostic", bfd_diag_str(bfd->diag)); if (bfd->state != STATE_DOWN) { smap_add(smap, "remote_state", bfd_state_str(bfd->rmt_state)); smap_add(smap, "remote_diagnostic", bfd_diag_str(bfd->rmt_diag)); } ovs_mutex_unlock(&mutex); } /* Initializes, destroys, or reconfigures the BFD session 'bfd' (named 'name'), * according to the database configuration contained in 'cfg'. Takes ownership * of 'bfd', which may be NULL. Returns a BFD object which may be used as a * handle for the session, or NULL if BFD is not enabled according to 'cfg'. * Also returns NULL if cfg is NULL. */ struct bfd * bfd_configure(struct bfd *bfd, const char *name, const struct smap *cfg, struct netdev *netdev) OVS_EXCLUDED(mutex) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; static atomic_uint16_t udp_src = ATOMIC_VAR_INIT(0); int decay_min_rx; long long int min_tx, min_rx; bool need_poll = false; bool cfg_min_rx_changed = false; bool cpath_down, forwarding_if_rx; const char *hwaddr; uint8_t ea[ETH_ADDR_LEN]; if (ovsthread_once_start(&once)) { unixctl_command_register("bfd/show", "[interface]", 0, 1, bfd_unixctl_show, NULL); unixctl_command_register("bfd/set-forwarding", "[interface] normal|false|true", 1, 2, bfd_unixctl_set_forwarding_override, NULL); ovsthread_once_done(&once); } if (!cfg || !smap_get_bool(cfg, "enable", false)) { bfd_unref(bfd); return NULL; } ovs_mutex_lock(&mutex); if (!bfd) { bfd = xzalloc(sizeof *bfd); bfd->name = xstrdup(name); bfd->forwarding_override = -1; bfd->disc = generate_discriminator(); hmap_insert(all_bfds, &bfd->node, bfd->disc); bfd->diag = DIAG_NONE; bfd->min_tx = 1000; bfd->mult = 3; atomic_init(&bfd->ref_cnt, 1); bfd->netdev = netdev_ref(netdev); bfd->rx_packets = bfd_rx_packets(bfd); bfd->in_decay = false; /* RFC 5881 section 4 * The source port MUST be in the range 49152 through 65535. The same * UDP source port number MUST be used for all BFD Control packets * associated with a particular session. The source port number SHOULD * be unique among all BFD sessions on the system. */ atomic_add(&udp_src, 1, &bfd->udp_src); bfd->udp_src = (bfd->udp_src % 16384) + 49152; bfd_set_state(bfd, STATE_DOWN, DIAG_NONE); memcpy(bfd->eth_dst, eth_addr_bfd, ETH_ADDR_LEN); } atomic_store(&bfd->check_tnl_key, smap_get_bool(cfg, "check_tnl_key", false)); min_tx = smap_get_int(cfg, "min_tx", 100); min_tx = MAX(min_tx, 100); if (bfd->cfg_min_tx != min_tx) { bfd->cfg_min_tx = min_tx; if (bfd->state != STATE_UP || (!bfd_in_poll(bfd) && bfd->cfg_min_tx < bfd->min_tx)) { bfd->min_tx = bfd->cfg_min_tx; } need_poll = true; } min_rx = smap_get_int(cfg, "min_rx", 1000); min_rx = MAX(min_rx, 100); if (bfd->cfg_min_rx != min_rx) { bfd->cfg_min_rx = min_rx; if (bfd->state != STATE_UP || (!bfd_in_poll(bfd) && bfd->cfg_min_rx > bfd->min_rx)) { bfd->min_rx = bfd->cfg_min_rx; } cfg_min_rx_changed = true; need_poll = true; } decay_min_rx = smap_get_int(cfg, "decay_min_rx", 0); if (bfd->decay_min_rx != decay_min_rx || cfg_min_rx_changed) { if (decay_min_rx > 0 && decay_min_rx < bfd->cfg_min_rx) { VLOG_WARN("%s: decay_min_rx cannot be less than %lld ms", bfd->name, bfd->cfg_min_rx); bfd->decay_min_rx = 0; } else { bfd->decay_min_rx = decay_min_rx; } /* Resets decay. */ bfd->in_decay = false; bfd_decay_update(bfd); need_poll = true; } cpath_down = smap_get_bool(cfg, "cpath_down", false); if (bfd->cpath_down != cpath_down) { bfd->cpath_down = cpath_down; bfd_set_state(bfd, bfd->state, DIAG_NONE); need_poll = true; } hwaddr = smap_get(cfg, "bfd_dst_mac"); if (hwaddr && eth_addr_from_string(hwaddr, ea) && !eth_addr_is_zero(ea)) { memcpy(bfd->eth_dst, ea, ETH_ADDR_LEN); bfd->eth_dst_set = true; } else if (bfd->eth_dst_set) { memcpy(bfd->eth_dst, eth_addr_bfd, ETH_ADDR_LEN); bfd->eth_dst_set = false; } forwarding_if_rx = smap_get_bool(cfg, "forwarding_if_rx", false); if (bfd->forwarding_if_rx != forwarding_if_rx) { bfd->forwarding_if_rx = forwarding_if_rx; if (bfd->state == STATE_UP && bfd->forwarding_if_rx) { bfd_forwarding_if_rx_update(bfd); } else { bfd->forwarding_if_rx_detect_time = 0; } } if (need_poll) { bfd_poll(bfd); } ovs_mutex_unlock(&mutex); return bfd; } struct bfd * bfd_ref(const struct bfd *bfd_) { struct bfd *bfd = CONST_CAST(struct bfd *, bfd_); if (bfd) { int orig; atomic_add(&bfd->ref_cnt, 1, &orig); ovs_assert(orig > 0); } return bfd; } void bfd_unref(struct bfd *bfd) OVS_EXCLUDED(mutex) { if (bfd) { int orig; atomic_sub(&bfd->ref_cnt, 1, &orig); ovs_assert(orig > 0); if (orig == 1) { ovs_mutex_lock(&mutex); hmap_remove(all_bfds, &bfd->node); netdev_close(bfd->netdev); free(bfd->name); free(bfd); ovs_mutex_unlock(&mutex); } } } void bfd_wait(const struct bfd *bfd) OVS_EXCLUDED(mutex) { ovs_mutex_lock(&mutex); if (bfd->flags & FLAG_FINAL) { poll_immediate_wake(); } poll_timer_wait_until(bfd->next_tx); if (bfd->state > STATE_DOWN) { poll_timer_wait_until(bfd->detect_time); } ovs_mutex_unlock(&mutex); } void bfd_run(struct bfd *bfd) OVS_EXCLUDED(mutex) { long long int now; bool old_in_decay; ovs_mutex_lock(&mutex); now = time_msec(); old_in_decay = bfd->in_decay; if (bfd->state > STATE_DOWN && now >= bfd->detect_time) { bfd_set_state(bfd, STATE_DOWN, DIAG_EXPIRED); } /* Decay may only happen when state is STATE_UP, bfd->decay_min_rx is * configured, and decay_detect_time is reached. */ if (bfd->state == STATE_UP && bfd->decay_min_rx > 0 && now >= bfd->decay_detect_time) { bfd_try_decay(bfd); } /* Always checks the reception of any packet. */ bfd_check_rx(bfd); if (bfd->min_tx != bfd->cfg_min_tx || (bfd->min_rx != bfd->cfg_min_rx && bfd->min_rx != bfd->decay_min_rx) || bfd->in_decay != old_in_decay) { bfd_poll(bfd); } ovs_mutex_unlock(&mutex); } bool bfd_should_send_packet(const struct bfd *bfd) OVS_EXCLUDED(mutex) { bool ret; ovs_mutex_lock(&mutex); ret = bfd->flags & FLAG_FINAL || time_msec() >= bfd->next_tx; ovs_mutex_unlock(&mutex); return ret; } void bfd_put_packet(struct bfd *bfd, struct ofpbuf *p, uint8_t eth_src[ETH_ADDR_LEN]) OVS_EXCLUDED(mutex) { long long int min_tx, min_rx; struct udp_header *udp; struct eth_header *eth; struct ip_header *ip; struct msg *msg; ovs_mutex_lock(&mutex); if (bfd->next_tx) { long long int delay = time_msec() - bfd->next_tx; long long int interval = bfd_tx_interval(bfd); if (delay > interval * 3 / 2) { VLOG_INFO("%s: long delay of %lldms (expected %lldms) sending BFD" " control message", bfd->name, delay, interval); } } /* RFC 5880 Section 6.5 * A BFD Control packet MUST NOT have both the Poll (P) and Final (F) bits * set. */ ovs_assert(!(bfd->flags & FLAG_POLL) || !(bfd->flags & FLAG_FINAL)); ofpbuf_reserve(p, 2); /* Properly align after the ethernet header. */ eth = ofpbuf_put_uninit(p, sizeof *eth); memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); memcpy(eth->eth_dst, bfd->eth_dst, ETH_ADDR_LEN); eth->eth_type = htons(ETH_TYPE_IP); ip = ofpbuf_put_zeros(p, sizeof *ip); ip->ip_ihl_ver = IP_IHL_VER(5, 4); ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg); ip->ip_ttl = MAXTTL; ip->ip_tos = IPTOS_LOWDELAY | IPTOS_THROUGHPUT; ip->ip_proto = IPPROTO_UDP; /* Use link local addresses: */ put_16aligned_be32(&ip->ip_src, htonl(0xA9FE0100)); /* 169.254.1.0. */ put_16aligned_be32(&ip->ip_dst, htonl(0xA9FE0101)); /* 169.254.1.1. */ ip->ip_csum = csum(ip, sizeof *ip); udp = ofpbuf_put_zeros(p, sizeof *udp); udp->udp_src = htons(bfd->udp_src); udp->udp_dst = htons(BFD_DEST_PORT); udp->udp_len = htons(sizeof *udp + sizeof *msg); msg = ofpbuf_put_uninit(p, sizeof *msg); msg->vers_diag = (BFD_VERSION << 5) | bfd->diag; msg->flags = (bfd->state & STATE_MASK) | bfd->flags; msg->mult = bfd->mult; msg->length = BFD_PACKET_LEN; msg->my_disc = htonl(bfd->disc); msg->your_disc = htonl(bfd->rmt_disc); msg->min_rx_echo = htonl(0); if (bfd_in_poll(bfd)) { min_tx = bfd->poll_min_tx; min_rx = bfd->poll_min_rx; } else { min_tx = bfd_min_tx(bfd); min_rx = bfd->min_rx; } msg->min_tx = htonl(min_tx * 1000); msg->min_rx = htonl(min_rx * 1000); bfd->flags &= ~FLAG_FINAL; log_msg(VLL_DBG, msg, "Sending BFD Message", bfd); bfd->last_tx = time_msec(); bfd_set_next_tx(bfd); ovs_mutex_unlock(&mutex); } bool bfd_should_process_flow(const struct bfd *bfd_, const struct flow *flow, struct flow_wildcards *wc) { struct bfd *bfd = CONST_CAST(struct bfd *, bfd_); bool check_tnl_key; memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); if (bfd->eth_dst_set && memcmp(bfd->eth_dst, flow->dl_dst, ETH_ADDR_LEN)) { return false; } memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst); atomic_read(&bfd->check_tnl_key, &check_tnl_key); if (check_tnl_key) { memset(&wc->masks.tunnel.tun_id, 0xff, sizeof wc->masks.tunnel.tun_id); } return (flow->dl_type == htons(ETH_TYPE_IP) && flow->nw_proto == IPPROTO_UDP && flow->tp_dst == htons(BFD_DEST_PORT) && (!check_tnl_key || flow->tunnel.tun_id == htonll(0))); } void bfd_process_packet(struct bfd *bfd, const struct flow *flow, const struct ofpbuf *p) OVS_EXCLUDED(mutex) { uint32_t rmt_min_rx, pkt_your_disc; enum state rmt_state; enum flags flags; uint8_t version; struct msg *msg; /* This function is designed to follow section RFC 5880 6.8.6 closely. */ ovs_mutex_lock(&mutex); /* Increments the decay rx counter. */ bfd->decay_rx_ctl++; if (flow->nw_ttl != 255) { /* XXX Should drop in the kernel to prevent DOS. */ goto out; } msg = ofpbuf_at(p, (uint8_t *)p->l7 - (uint8_t *)p->data, BFD_PACKET_LEN); if (!msg) { VLOG_INFO_RL(&rl, "%s: Received unparseable BFD control message.", bfd->name); goto out; } /* RFC 5880 Section 6.8.6 * If the Length field is greater than the payload of the encapsulating * protocol, the packet MUST be discarded. * * Note that we make this check implicity. Above we use ofpbuf_at() to * ensure that there are at least BFD_PACKET_LEN bytes in the payload of * the encapsulating protocol. Below we require msg->length to be exactly * BFD_PACKET_LEN bytes. */ flags = msg->flags & FLAGS_MASK; rmt_state = msg->flags & STATE_MASK; version = msg->vers_diag >> VERS_SHIFT; log_msg(VLL_DBG, msg, "Received BFD control message", bfd); if (version != BFD_VERSION) { log_msg(VLL_WARN, msg, "Incorrect version", bfd); goto out; } /* Technically this should happen after the length check. We don't support * authentication however, so it's simpler to do the check first. */ if (flags & FLAG_AUTH) { log_msg(VLL_WARN, msg, "Authenticated control message with" " authentication disabled", bfd); goto out; } if (msg->length != BFD_PACKET_LEN) { log_msg(VLL_WARN, msg, "Unexpected length", bfd); if (msg->length < BFD_PACKET_LEN) { goto out; } } if (!msg->mult) { log_msg(VLL_WARN, msg, "Zero multiplier", bfd); goto out; } if (flags & FLAG_MULTIPOINT) { log_msg(VLL_WARN, msg, "Unsupported multipoint flag", bfd); goto out; } if (!msg->my_disc) { log_msg(VLL_WARN, msg, "NULL my_disc", bfd); goto out; } pkt_your_disc = ntohl(msg->your_disc); if (pkt_your_disc) { /* Technically, we should use the your discriminator field to figure * out which 'struct bfd' this packet is destined towards. That way a * bfd session could migrate from one interface to another * transparently. This doesn't fit in with the OVS structure very * well, so in this respect, we are not compliant. */ if (pkt_your_disc != bfd->disc) { log_msg(VLL_WARN, msg, "Incorrect your_disc", bfd); goto out; } } else if (rmt_state > STATE_DOWN) { log_msg(VLL_WARN, msg, "Null your_disc", bfd); goto out; } bfd->rmt_disc = ntohl(msg->my_disc); bfd->rmt_state = rmt_state; bfd->rmt_flags = flags; bfd->rmt_diag = msg->vers_diag & DIAG_MASK; if (flags & FLAG_FINAL && bfd_in_poll(bfd)) { bfd->min_tx = bfd->poll_min_tx; bfd->min_rx = bfd->poll_min_rx; bfd->flags &= ~FLAG_POLL; log_msg(VLL_INFO, msg, "Poll sequence terminated", bfd); } if (flags & FLAG_POLL) { /* RFC 5880 Section 6.5 * When the other system receives a Poll, it immediately transmits a * BFD Control packet with the Final (F) bit set, independent of any * periodic BFD Control packets it may be sending * (see section 6.8.7). */ bfd->flags &= ~FLAG_POLL; bfd->flags |= FLAG_FINAL; } rmt_min_rx = MAX(ntohl(msg->min_rx) / 1000, 1); if (bfd->rmt_min_rx != rmt_min_rx) { bfd->rmt_min_rx = rmt_min_rx; bfd_set_next_tx(bfd); log_msg(VLL_INFO, msg, "New remote min_rx", bfd); } bfd->rmt_min_tx = MAX(ntohl(msg->min_tx) / 1000, 1); bfd->detect_time = bfd_rx_interval(bfd) * bfd->mult + time_msec(); if (bfd->state == STATE_ADMIN_DOWN) { VLOG_DBG_RL(&rl, "Administratively down, dropping control message."); goto out; } if (rmt_state == STATE_ADMIN_DOWN) { if (bfd->state != STATE_DOWN) { bfd_set_state(bfd, STATE_DOWN, DIAG_RMT_DOWN); } } else { switch (bfd->state) { case STATE_DOWN: if (rmt_state == STATE_DOWN) { bfd_set_state(bfd, STATE_INIT, bfd->diag); } else if (rmt_state == STATE_INIT) { bfd_set_state(bfd, STATE_UP, bfd->diag); } break; case STATE_INIT: if (rmt_state > STATE_DOWN) { bfd_set_state(bfd, STATE_UP, bfd->diag); } break; case STATE_UP: if (rmt_state <= STATE_DOWN) { bfd_set_state(bfd, STATE_DOWN, DIAG_RMT_DOWN); log_msg(VLL_INFO, msg, "Remote signaled STATE_DOWN", bfd); } break; case STATE_ADMIN_DOWN: default: NOT_REACHED(); } } /* XXX: RFC 5880 Section 6.8.6 Demand mode related calculations here. */ out: ovs_mutex_unlock(&mutex); } /* Must be called when the netdev owned by 'bfd' should change. */ void bfd_set_netdev(struct bfd *bfd, const struct netdev *netdev) OVS_EXCLUDED(mutex) { ovs_mutex_lock(&mutex); if (bfd->netdev != netdev) { netdev_close(bfd->netdev); bfd->netdev = netdev_ref(netdev); if (bfd->decay_min_rx && bfd->state == STATE_UP) { bfd_decay_update(bfd); } if (bfd->forwarding_if_rx && bfd->state == STATE_UP) { bfd_forwarding_if_rx_update(bfd); } bfd->rx_packets = bfd_rx_packets(bfd); } ovs_mutex_unlock(&mutex); } static bool bfd_forwarding__(const struct bfd *bfd) OVS_REQUIRES(mutex) { long long int time; if (bfd->forwarding_override != -1) { return bfd->forwarding_override == 1; } time = bfd->forwarding_if_rx_detect_time; return (bfd->state == STATE_UP || (bfd->forwarding_if_rx && time > time_msec())) && bfd->rmt_diag != DIAG_PATH_DOWN && bfd->rmt_diag != DIAG_CPATH_DOWN && bfd->rmt_diag != DIAG_RCPATH_DOWN; } /* Helpers. */ static bool bfd_in_poll(const struct bfd *bfd) OVS_REQUIRES(mutex) { return (bfd->flags & FLAG_POLL) != 0; } static void bfd_poll(struct bfd *bfd) OVS_REQUIRES(mutex) { if (bfd->state > STATE_DOWN && !bfd_in_poll(bfd) && !(bfd->flags & FLAG_FINAL)) { bfd->poll_min_tx = bfd->cfg_min_tx; bfd->poll_min_rx = bfd->in_decay ? bfd->decay_min_rx : bfd->cfg_min_rx; bfd->flags |= FLAG_POLL; bfd->next_tx = 0; VLOG_INFO_RL(&rl, "%s: Initiating poll sequence", bfd->name); } } static long long int bfd_min_tx(const struct bfd *bfd) OVS_REQUIRES(mutex) { /* RFC 5880 Section 6.8.3 * When bfd.SessionState is not Up, the system MUST set * bfd.DesiredMinTxInterval to a value of not less than one second * (1,000,000 microseconds). This is intended to ensure that the * bandwidth consumed by BFD sessions that are not Up is negligible, * particularly in the case where a neighbor may not be running BFD. */ return (bfd->state == STATE_UP ? bfd->min_tx : MAX(bfd->min_tx, 1000)); } static long long int bfd_tx_interval(const struct bfd *bfd) OVS_REQUIRES(mutex) { long long int interval = bfd_min_tx(bfd); return MAX(interval, bfd->rmt_min_rx); } static long long int bfd_rx_interval(const struct bfd *bfd) OVS_REQUIRES(mutex) { return MAX(bfd->min_rx, bfd->rmt_min_tx); } static void bfd_set_next_tx(struct bfd *bfd) OVS_REQUIRES(mutex) { long long int interval = bfd_tx_interval(bfd); interval -= interval * random_range(26) / 100; bfd->next_tx = bfd->last_tx + interval; } static const char * bfd_flag_str(enum flags flags) { struct ds ds = DS_EMPTY_INITIALIZER; static char flag_str[128]; if (!flags) { return "none"; } if (flags & FLAG_MULTIPOINT) { ds_put_cstr(&ds, "multipoint "); } if (flags & FLAG_DEMAND) { ds_put_cstr(&ds, "demand "); } if (flags & FLAG_AUTH) { ds_put_cstr(&ds, "auth "); } if (flags & FLAG_CTL) { ds_put_cstr(&ds, "ctl "); } if (flags & FLAG_FINAL) { ds_put_cstr(&ds, "final "); } if (flags & FLAG_POLL) { ds_put_cstr(&ds, "poll "); } /* Do not copy the trailing whitespace. */ ds_chomp(&ds, ' '); ovs_strlcpy(flag_str, ds_cstr(&ds), sizeof flag_str); ds_destroy(&ds); return flag_str; } static const char * bfd_state_str(enum state state) { switch (state) { case STATE_ADMIN_DOWN: return "admin_down"; case STATE_DOWN: return "down"; case STATE_INIT: return "init"; case STATE_UP: return "up"; default: return "invalid"; } } static const char * bfd_diag_str(enum diag diag) { switch (diag) { case DIAG_NONE: return "No Diagnostic"; case DIAG_EXPIRED: return "Control Detection Time Expired"; case DIAG_ECHO_FAILED: return "Echo Function Failed"; case DIAG_RMT_DOWN: return "Neighbor Signaled Session Down"; case DIAG_FWD_RESET: return "Forwarding Plane Reset"; case DIAG_PATH_DOWN: return "Path Down"; case DIAG_CPATH_DOWN: return "Concatenated Path Down"; case DIAG_ADMIN_DOWN: return "Administratively Down"; case DIAG_RCPATH_DOWN: return "Reverse Concatenated Path Down"; default: return "Invalid Diagnostic"; } }; static void log_msg(enum vlog_level level, const struct msg *p, const char *message, const struct bfd *bfd) OVS_REQUIRES(mutex) { struct ds ds = DS_EMPTY_INITIALIZER; if (vlog_should_drop(THIS_MODULE, level, &rl)) { return; } ds_put_format(&ds, "%s: %s." "\n\tvers:%"PRIu8" diag:\"%s\" state:%s mult:%"PRIu8 " length:%"PRIu8 "\n\tflags: %s" "\n\tmy_disc:0x%"PRIx32" your_disc:0x%"PRIx32 "\n\tmin_tx:%"PRIu32"us (%"PRIu32"ms)" "\n\tmin_rx:%"PRIu32"us (%"PRIu32"ms)" "\n\tmin_rx_echo:%"PRIu32"us (%"PRIu32"ms)", bfd->name, message, p->vers_diag >> VERS_SHIFT, bfd_diag_str(p->vers_diag & DIAG_MASK), bfd_state_str(p->flags & STATE_MASK), p->mult, p->length, bfd_flag_str(p->flags & FLAGS_MASK), ntohl(p->my_disc), ntohl(p->your_disc), ntohl(p->min_tx), ntohl(p->min_tx) / 1000, ntohl(p->min_rx), ntohl(p->min_rx) / 1000, ntohl(p->min_rx_echo), ntohl(p->min_rx_echo) / 1000); bfd_put_details(&ds, bfd); VLOG(level, "%s", ds_cstr(&ds)); ds_destroy(&ds); } static void bfd_set_state(struct bfd *bfd, enum state state, enum diag diag) OVS_REQUIRES(mutex) { if (bfd->cpath_down) { diag = DIAG_CPATH_DOWN; } if (bfd->state != state || bfd->diag != diag) { if (!VLOG_DROP_INFO(&rl)) { struct ds ds = DS_EMPTY_INITIALIZER; ds_put_format(&ds, "%s: BFD state change: %s->%s" " \"%s\"->\"%s\".\n", bfd->name, bfd_state_str(bfd->state), bfd_state_str(state), bfd_diag_str(bfd->diag), bfd_diag_str(diag)); bfd_put_details(&ds, bfd); VLOG_INFO("%s", ds_cstr(&ds)); ds_destroy(&ds); } bfd->state = state; bfd->diag = diag; if (bfd->state <= STATE_DOWN) { bfd->rmt_state = STATE_DOWN; bfd->rmt_diag = DIAG_NONE; bfd->rmt_min_rx = 1; bfd->rmt_flags = 0; bfd->rmt_disc = 0; bfd->rmt_min_tx = 0; /* Resets the min_rx if in_decay. */ if (bfd->in_decay) { bfd->min_rx = bfd->cfg_min_rx; bfd->in_decay = false; } } /* Resets the decay when state changes to STATE_UP * and decay_min_rx is configured. */ if (bfd->state == STATE_UP && bfd->decay_min_rx) { bfd_decay_update(bfd); } } } static uint64_t bfd_rx_packets(const struct bfd *bfd) OVS_REQUIRES(mutex) { struct netdev_stats stats; if (!netdev_get_stats(bfd->netdev, &stats)) { return stats.rx_packets; } else { return 0; } } /* Decays the bfd->min_rx to bfd->decay_min_rx when 'diff' is less than * the 'expect' value. */ static void bfd_try_decay(struct bfd *bfd) OVS_REQUIRES(mutex) { int64_t diff, expect; /* The 'diff' is the difference between current interface rx_packets * stats and last-time check. The 'expect' is the recorded number of * bfd control packets received within an approximately decay_min_rx * (2000 ms if decay_min_rx is less than 2000 ms) interval. * * Since the update of rx_packets stats at interface happens * asynchronously to the bfd_rx_packets() function, the 'diff' value * can be jittered. Thusly, we double the decay_rx_ctl to provide * more wiggle room. */ diff = bfd_rx_packets(bfd) - bfd->decay_rx_packets; expect = 2 * MAX(bfd->decay_rx_ctl, 1); bfd->in_decay = diff <= expect ? true : false; bfd_decay_update(bfd); } /* Updates the rx_packets, decay_rx_ctl and decay_detect_time. */ static void bfd_decay_update(struct bfd * bfd) OVS_REQUIRES(mutex) { bfd->decay_rx_packets = bfd_rx_packets(bfd); bfd->decay_rx_ctl = 0; bfd->decay_detect_time = MAX(bfd->decay_min_rx, 2000) + time_msec(); } /* Checks if there are packets received during the time since last call. * If forwarding_if_rx is enabled and packets are received, updates the * forwarding_if_rx_detect_time. */ static void bfd_check_rx(struct bfd *bfd) OVS_REQUIRES(mutex) { uint64_t rx_packets = bfd_rx_packets(bfd); int64_t diff; diff = rx_packets - bfd->rx_packets; bfd->rx_packets = rx_packets; if (diff < 0) { VLOG_INFO_RL(&rl, "rx_packets count is smaller than last time."); } if (bfd->forwarding_if_rx && diff > 0) { bfd_forwarding_if_rx_update(bfd); } } /* Updates the forwarding_if_rx_detect_time. */ static void bfd_forwarding_if_rx_update(struct bfd *bfd) OVS_REQUIRES(mutex) { int64_t incr = bfd_rx_interval(bfd) * bfd->mult; bfd->forwarding_if_rx_detect_time = MAX(incr, 2000) + time_msec(); } static uint32_t generate_discriminator(void) { uint32_t disc = 0; /* RFC 5880 Section 6.8.1 * It SHOULD be set to a random (but still unique) value to improve * security. The value is otherwise outside the scope of this * specification. */ while (!disc) { struct bfd *bfd; /* 'disc' is by definition random, so there's no reason to waste time * hashing it. */ disc = random_uint32(); HMAP_FOR_EACH_IN_BUCKET (bfd, node, disc, all_bfds) { if (bfd->disc == disc) { disc = 0; break; } } } return disc; } static struct bfd * bfd_find_by_name(const char *name) OVS_REQUIRES(mutex) { struct bfd *bfd; HMAP_FOR_EACH (bfd, node, all_bfds) { if (!strcmp(bfd->name, name)) { return bfd; } } return NULL; } static void bfd_put_details(struct ds *ds, const struct bfd *bfd) OVS_REQUIRES(mutex) { ds_put_format(ds, "\tForwarding: %s\n", bfd_forwarding__(bfd) ? "true" : "false"); ds_put_format(ds, "\tDetect Multiplier: %d\n", bfd->mult); ds_put_format(ds, "\tConcatenated Path Down: %s\n", bfd->cpath_down ? "true" : "false"); ds_put_format(ds, "\tTX Interval: Approx %lldms\n", bfd_tx_interval(bfd)); ds_put_format(ds, "\tRX Interval: Approx %lldms\n", bfd_rx_interval(bfd)); ds_put_format(ds, "\tDetect Time: now %+lldms\n", time_msec() - bfd->detect_time); ds_put_format(ds, "\tNext TX Time: now %+lldms\n", time_msec() - bfd->next_tx); ds_put_format(ds, "\tLast TX Time: now %+lldms\n", time_msec() - bfd->last_tx); ds_put_cstr(ds, "\n"); ds_put_format(ds, "\tLocal Flags: %s\n", bfd_flag_str(bfd->flags)); ds_put_format(ds, "\tLocal Session State: %s\n", bfd_state_str(bfd->state)); ds_put_format(ds, "\tLocal Diagnostic: %s\n", bfd_diag_str(bfd->diag)); ds_put_format(ds, "\tLocal Discriminator: 0x%"PRIx32"\n", bfd->disc); ds_put_format(ds, "\tLocal Minimum TX Interval: %lldms\n", bfd_min_tx(bfd)); ds_put_format(ds, "\tLocal Minimum RX Interval: %lldms\n", bfd->min_rx); ds_put_cstr(ds, "\n"); ds_put_format(ds, "\tRemote Flags: %s\n", bfd_flag_str(bfd->rmt_flags)); ds_put_format(ds, "\tRemote Session State: %s\n", bfd_state_str(bfd->rmt_state)); ds_put_format(ds, "\tRemote Diagnostic: %s\n", bfd_diag_str(bfd->rmt_diag)); ds_put_format(ds, "\tRemote Discriminator: 0x%"PRIx32"\n", bfd->rmt_disc); ds_put_format(ds, "\tRemote Minimum TX Interval: %lldms\n", bfd->rmt_min_tx); ds_put_format(ds, "\tRemote Minimum RX Interval: %lldms\n", bfd->rmt_min_rx); } static void bfd_unixctl_show(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) OVS_EXCLUDED(mutex) { struct ds ds = DS_EMPTY_INITIALIZER; struct bfd *bfd; ovs_mutex_lock(&mutex); if (argc > 1) { bfd = bfd_find_by_name(argv[1]); if (!bfd) { unixctl_command_reply_error(conn, "no such bfd object"); goto out; } bfd_put_details(&ds, bfd); } else { HMAP_FOR_EACH (bfd, node, all_bfds) { ds_put_format(&ds, "---- %s ----\n", bfd->name); bfd_put_details(&ds, bfd); } } unixctl_command_reply(conn, ds_cstr(&ds)); ds_destroy(&ds); out: ovs_mutex_unlock(&mutex); } static void bfd_unixctl_set_forwarding_override(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) OVS_EXCLUDED(mutex) { const char *forward_str = argv[argc - 1]; int forwarding_override; struct bfd *bfd; ovs_mutex_lock(&mutex); if (!strcasecmp("true", forward_str)) { forwarding_override = 1; } else if (!strcasecmp("false", forward_str)) { forwarding_override = 0; } else if (!strcasecmp("normal", forward_str)) { forwarding_override = -1; } else { unixctl_command_reply_error(conn, "unknown fault string"); goto out; } if (argc > 2) { bfd = bfd_find_by_name(argv[1]); if (!bfd) { unixctl_command_reply_error(conn, "no such BFD object"); goto out; } bfd->forwarding_override = forwarding_override; } else { HMAP_FOR_EACH (bfd, node, all_bfds) { bfd->forwarding_override = forwarding_override; } } unixctl_command_reply(conn, "OK"); out: ovs_mutex_unlock(&mutex); } openvswitch-2.0.1+git20140120/lib/bfd.h000066400000000000000000000032121226605124000171220ustar00rootroot00000000000000/* Copyright (c) 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef BFD_H #define BFD_H 1 #define BFD_PACKET_LEN 24 #define BFD_DEST_PORT 3784 #include #include struct bfd; struct flow; struct flow_wildcards; struct netdev; struct ofpbuf; struct smap; void bfd_wait(const struct bfd *); void bfd_run(struct bfd *); bool bfd_should_send_packet(const struct bfd *); void bfd_put_packet(struct bfd *bfd, struct ofpbuf *packet, uint8_t eth_src[6]); bool bfd_should_process_flow(const struct bfd *, const struct flow *, struct flow_wildcards *); void bfd_process_packet(struct bfd *, const struct flow *, const struct ofpbuf *); struct bfd *bfd_configure(struct bfd *, const char *name, const struct smap *smap, struct netdev *netdev); struct bfd *bfd_ref(const struct bfd *); void bfd_unref(struct bfd *); bool bfd_forwarding(const struct bfd *); void bfd_get_status(const struct bfd *, struct smap *); void bfd_set_netdev(struct bfd *, const struct netdev *); #endif /* bfd.h */ openvswitch-2.0.1+git20140120/lib/bitmap.c000066400000000000000000000053301226605124000176410ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "bitmap.h" #include /* Allocates and returns a bitmap initialized to all-1-bits. */ unsigned long * bitmap_allocate1(size_t n_bits) { size_t n_bytes = bitmap_n_bytes(n_bits); size_t n_longs = bitmap_n_longs(n_bits); size_t r_bits = n_bits % BITMAP_ULONG_BITS; unsigned long *bitmap; /* Allocate and initialize most of the bitmap. */ bitmap = xmalloc(n_bytes); memset(bitmap, 0xff, n_bytes); /* Ensure that the last "unsigned long" in the bitmap only has as many * 1-bits as there actually should be. */ if (r_bits) { bitmap[n_longs - 1] = (1UL << r_bits) - 1; } return bitmap; } /* Sets 'count' consecutive bits in 'bitmap', starting at bit offset 'start', * to 'value'. */ void bitmap_set_multiple(unsigned long *bitmap, size_t start, size_t count, bool value) { for (; count && start % BITMAP_ULONG_BITS; count--) { bitmap_set(bitmap, start++, value); } for (; count >= BITMAP_ULONG_BITS; count -= BITMAP_ULONG_BITS) { *bitmap_unit__(bitmap, start) = -(unsigned long) value; start += BITMAP_ULONG_BITS; } for (; count; count--) { bitmap_set(bitmap, start++, value); } } /* Compares the 'n' bits in bitmaps 'a' and 'b'. Returns true if all bits are * equal, false otherwise. */ bool bitmap_equal(const unsigned long *a, const unsigned long *b, size_t n) { size_t i; if (memcmp(a, b, n / BITMAP_ULONG_BITS * sizeof(unsigned long))) { return false; } for (i = ROUND_DOWN(n, BITMAP_ULONG_BITS); i < n; i++) { if (bitmap_is_set(a, i) != bitmap_is_set(b, i)) { return false; } } return true; } /* Scans 'bitmap' from bit offset 'start' to 'end', excluding 'end' itself. * Returns the bit offset of the lowest-numbered bit set to 1, or 'end' if * all of the bits are set to 0. */ size_t bitmap_scan(const unsigned long int *bitmap, size_t start, size_t end) { /* XXX slow */ size_t i; for (i = start; i < end; i++) { if (bitmap_is_set(bitmap, i)) { break; } } return i; } openvswitch-2.0.1+git20140120/lib/bitmap.h000066400000000000000000000052601226605124000176500ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef BITMAP_H #define BITMAP_H 1 #include #include #include "util.h" #define BITMAP_ULONG_BITS (sizeof(unsigned long) * CHAR_BIT) static inline unsigned long * bitmap_unit__(const unsigned long *bitmap, size_t offset) { return CONST_CAST(unsigned long *, &bitmap[offset / BITMAP_ULONG_BITS]); } static inline unsigned long bitmap_bit__(size_t offset) { return 1UL << (offset % BITMAP_ULONG_BITS); } static inline size_t bitmap_n_longs(size_t n_bits) { return DIV_ROUND_UP(n_bits, BITMAP_ULONG_BITS); } static inline size_t bitmap_n_bytes(size_t n_bits) { return bitmap_n_longs(n_bits) * sizeof(unsigned long int); } static inline unsigned long * bitmap_allocate(size_t n_bits) { return xzalloc(bitmap_n_bytes(n_bits)); } unsigned long *bitmap_allocate1(size_t n_bits); static inline unsigned long * bitmap_clone(const unsigned long *bitmap, size_t n_bits) { return xmemdup(bitmap, bitmap_n_bytes(n_bits)); } static inline void bitmap_free(unsigned long *bitmap) { free(bitmap); } static inline bool bitmap_is_set(const unsigned long *bitmap, size_t offset) { return (*bitmap_unit__(bitmap, offset) & bitmap_bit__(offset)) != 0; } static inline void bitmap_set1(unsigned long *bitmap, size_t offset) { *bitmap_unit__(bitmap, offset) |= bitmap_bit__(offset); } static inline void bitmap_set0(unsigned long *bitmap, size_t offset) { *bitmap_unit__(bitmap, offset) &= ~bitmap_bit__(offset); } static inline void bitmap_set(unsigned long *bitmap, size_t offset, bool value) { if (value) { bitmap_set1(bitmap, offset); } else { bitmap_set0(bitmap, offset); } } void bitmap_set_multiple(unsigned long *, size_t start, size_t count, bool value); bool bitmap_equal(const unsigned long *, const unsigned long *, size_t n); size_t bitmap_scan(const unsigned long int *, size_t start, size_t end); #define BITMAP_FOR_EACH_1(IDX, SIZE, BITMAP) \ for ((IDX) = bitmap_scan(BITMAP, 0, SIZE); (IDX) < (SIZE); \ (IDX) = bitmap_scan(BITMAP, (IDX) + 1, SIZE)) #endif /* bitmap.h */ openvswitch-2.0.1+git20140120/lib/bond.c000066400000000000000000001357111226605124000173160ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "bond.h" #include #include #include #include #include "coverage.h" #include "dynamic-string.h" #include "flow.h" #include "hmap.h" #include "lacp.h" #include "list.h" #include "netdev.h" #include "odp-util.h" #include "ofpbuf.h" #include "packets.h" #include "poll-loop.h" #include "shash.h" #include "timeval.h" #include "unixctl.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(bond); /* Bit-mask for hashing a flow down to a bucket. * There are (BOND_MASK + 1) buckets. */ #define BOND_MASK 0xff /* A hash bucket for mapping a flow to a slave. * "struct bond" has an array of (BOND_MASK + 1) of these. */ struct bond_entry { struct bond_slave *slave; /* Assigned slave, NULL if unassigned. */ uint64_t tx_bytes; /* Count of bytes recently transmitted. */ struct list list_node; /* In bond_slave's 'entries' list. */ }; /* A bond slave, that is, one of the links comprising a bond. */ struct bond_slave { struct hmap_node hmap_node; /* In struct bond's slaves hmap. */ struct bond *bond; /* The bond that contains this slave. */ void *aux; /* Client-provided handle for this slave. */ struct netdev *netdev; /* Network device, owned by the client. */ unsigned int change_seq; /* Tracks changes in 'netdev'. */ char *name; /* Name (a copy of netdev_get_name(netdev)). */ /* Link status. */ long long delay_expires; /* Time after which 'enabled' may change. */ bool enabled; /* May be chosen for flows? */ bool may_enable; /* Client considers this slave bondable. */ /* Rebalancing info. Used only by bond_rebalance(). */ struct list bal_node; /* In bond_rebalance()'s 'bals' list. */ struct list entries; /* 'struct bond_entry's assigned here. */ uint64_t tx_bytes; /* Sum across 'tx_bytes' of entries. */ }; /* A bond, that is, a set of network devices grouped to improve performance or * robustness. */ struct bond { struct hmap_node hmap_node; /* In 'all_bonds' hmap. */ char *name; /* Name provided by client. */ /* Slaves. */ struct hmap slaves; /* Bonding info. */ enum bond_mode balance; /* Balancing mode, one of BM_*. */ struct bond_slave *active_slave; int updelay, downdelay; /* Delay before slave goes up/down, in ms. */ enum lacp_status lacp_status; /* Status of LACP negotiations. */ bool bond_revalidate; /* True if flows need revalidation. */ uint32_t basis; /* Basis for flow hash function. */ /* SLB specific bonding info. */ struct bond_entry *hash; /* An array of (BOND_MASK + 1) elements. */ int rebalance_interval; /* Interval between rebalances, in ms. */ long long int next_rebalance; /* Next rebalancing time. */ bool send_learning_packets; /* Legacy compatibility. */ long long int next_fake_iface_update; /* LLONG_MAX if disabled. */ atomic_int ref_cnt; }; static struct ovs_rwlock rwlock = OVS_RWLOCK_INITIALIZER; static struct hmap all_bonds__ = HMAP_INITIALIZER(&all_bonds__); static struct hmap *const all_bonds OVS_GUARDED_BY(rwlock) = &all_bonds__; static void bond_entry_reset(struct bond *) OVS_REQ_WRLOCK(rwlock); static struct bond_slave *bond_slave_lookup(struct bond *, const void *slave_) OVS_REQ_RDLOCK(rwlock); static void bond_enable_slave(struct bond_slave *, bool enable) OVS_REQ_WRLOCK(rwlock); static void bond_link_status_update(struct bond_slave *) OVS_REQ_WRLOCK(rwlock); static void bond_choose_active_slave(struct bond *) OVS_REQ_WRLOCK(rwlock);; static unsigned int bond_hash_src(const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan, uint32_t basis); static unsigned int bond_hash_tcp(const struct flow *, uint16_t vlan, uint32_t basis); static struct bond_entry *lookup_bond_entry(const struct bond *, const struct flow *, uint16_t vlan) OVS_REQ_RDLOCK(rwlock); static struct bond_slave *choose_output_slave(const struct bond *, const struct flow *, struct flow_wildcards *, uint16_t vlan) OVS_REQ_RDLOCK(rwlock); static void bond_update_fake_slave_stats(struct bond *) OVS_REQ_RDLOCK(rwlock); /* Attempts to parse 's' as the name of a bond balancing mode. If successful, * stores the mode in '*balance' and returns true. Otherwise returns false * without modifying '*balance'. */ bool bond_mode_from_string(enum bond_mode *balance, const char *s) { if (!strcmp(s, bond_mode_to_string(BM_TCP))) { *balance = BM_TCP; } else if (!strcmp(s, bond_mode_to_string(BM_SLB))) { *balance = BM_SLB; } else if (!strcmp(s, bond_mode_to_string(BM_AB))) { *balance = BM_AB; } else { return false; } return true; } /* Returns a string representing 'balance'. */ const char * bond_mode_to_string(enum bond_mode balance) { switch (balance) { case BM_TCP: return "balance-tcp"; case BM_SLB: return "balance-slb"; case BM_AB: return "active-backup"; } NOT_REACHED(); } /* Creates and returns a new bond whose configuration is initially taken from * 's'. * * The caller should register each slave on the new bond by calling * bond_slave_register(). */ struct bond * bond_create(const struct bond_settings *s) { struct bond *bond; bond = xzalloc(sizeof *bond); hmap_init(&bond->slaves); bond->next_fake_iface_update = LLONG_MAX; atomic_init(&bond->ref_cnt, 1); bond_reconfigure(bond, s); return bond; } struct bond * bond_ref(const struct bond *bond_) { struct bond *bond = CONST_CAST(struct bond *, bond_); if (bond) { int orig; atomic_add(&bond->ref_cnt, 1, &orig); ovs_assert(orig > 0); } return bond; } /* Frees 'bond'. */ void bond_unref(struct bond *bond) { struct bond_slave *slave, *next_slave; int orig; if (!bond) { return; } atomic_sub(&bond->ref_cnt, 1, &orig); ovs_assert(orig > 0); if (orig != 1) { return; } ovs_rwlock_wrlock(&rwlock); hmap_remove(all_bonds, &bond->hmap_node); ovs_rwlock_unlock(&rwlock); HMAP_FOR_EACH_SAFE (slave, next_slave, hmap_node, &bond->slaves) { hmap_remove(&bond->slaves, &slave->hmap_node); /* Client owns 'slave->netdev'. */ free(slave->name); free(slave); } hmap_destroy(&bond->slaves); free(bond->hash); free(bond->name); free(bond); } /* Updates 'bond''s overall configuration to 's'. * * The caller should register each slave on 'bond' by calling * bond_slave_register(). This is optional if none of the slaves' * configuration has changed. In any case it can't hurt. * * Returns true if the configuration has changed in such a way that requires * flow revalidation. * */ bool bond_reconfigure(struct bond *bond, const struct bond_settings *s) { bool revalidate = false; ovs_rwlock_wrlock(&rwlock); if (!bond->name || strcmp(bond->name, s->name)) { if (bond->name) { hmap_remove(all_bonds, &bond->hmap_node); free(bond->name); } bond->name = xstrdup(s->name); hmap_insert(all_bonds, &bond->hmap_node, hash_string(bond->name, 0)); } bond->updelay = s->up_delay; bond->downdelay = s->down_delay; if (bond->rebalance_interval != s->rebalance_interval) { bond->rebalance_interval = s->rebalance_interval; revalidate = true; } if (bond->balance != s->balance) { bond->balance = s->balance; revalidate = true; } if (bond->basis != s->basis) { bond->basis = s->basis; revalidate = true; } if (s->fake_iface) { if (bond->next_fake_iface_update == LLONG_MAX) { bond->next_fake_iface_update = time_msec(); } } else { bond->next_fake_iface_update = LLONG_MAX; } if (bond->bond_revalidate) { revalidate = true; bond->bond_revalidate = false; } if (bond->balance == BM_AB || !bond->hash || revalidate) { bond_entry_reset(bond); } ovs_rwlock_unlock(&rwlock); return revalidate; } static void bond_slave_set_netdev__(struct bond_slave *slave, struct netdev *netdev) OVS_REQ_WRLOCK(rwlock) { if (slave->netdev != netdev) { slave->netdev = netdev; slave->change_seq = 0; } } /* Registers 'slave_' as a slave of 'bond'. The 'slave_' pointer is an * arbitrary client-provided pointer that uniquely identifies a slave within a * bond. If 'slave_' already exists within 'bond' then this function * reconfigures the existing slave. * * 'netdev' must be the network device that 'slave_' represents. It is owned * by the client, so the client must not close it before either unregistering * 'slave_' or destroying 'bond'. */ void bond_slave_register(struct bond *bond, void *slave_, struct netdev *netdev) { struct bond_slave *slave; ovs_rwlock_wrlock(&rwlock); slave = bond_slave_lookup(bond, slave_); if (!slave) { slave = xzalloc(sizeof *slave); hmap_insert(&bond->slaves, &slave->hmap_node, hash_pointer(slave_, 0)); slave->bond = bond; slave->aux = slave_; slave->delay_expires = LLONG_MAX; slave->name = xstrdup(netdev_get_name(netdev)); bond->bond_revalidate = true; slave->enabled = false; bond_enable_slave(slave, netdev_get_carrier(netdev)); } bond_slave_set_netdev__(slave, netdev); free(slave->name); slave->name = xstrdup(netdev_get_name(netdev)); ovs_rwlock_unlock(&rwlock); } /* Updates the network device to be used with 'slave_' to 'netdev'. * * This is useful if the caller closes and re-opens the network device * registered with bond_slave_register() but doesn't need to change anything * else. */ void bond_slave_set_netdev(struct bond *bond, void *slave_, struct netdev *netdev) { struct bond_slave *slave; ovs_rwlock_wrlock(&rwlock); slave = bond_slave_lookup(bond, slave_); if (slave) { bond_slave_set_netdev__(slave, netdev); } ovs_rwlock_unlock(&rwlock); } /* Unregisters 'slave_' from 'bond'. If 'bond' does not contain such a slave * then this function has no effect. * * Unregistering a slave invalidates all flows. */ void bond_slave_unregister(struct bond *bond, const void *slave_) { struct bond_slave *slave; bool del_active; ovs_rwlock_wrlock(&rwlock); slave = bond_slave_lookup(bond, slave_); if (!slave) { goto out; } bond->bond_revalidate = true; bond_enable_slave(slave, false); del_active = bond->active_slave == slave; if (bond->hash) { struct bond_entry *e; for (e = bond->hash; e <= &bond->hash[BOND_MASK]; e++) { if (e->slave == slave) { e->slave = NULL; } } } free(slave->name); hmap_remove(&bond->slaves, &slave->hmap_node); /* Client owns 'slave->netdev'. */ free(slave); if (del_active) { bond_choose_active_slave(bond); bond->send_learning_packets = true; } out: ovs_rwlock_unlock(&rwlock); } /* Should be called on each slave in 'bond' before bond_run() to indicate * whether or not 'slave_' may be enabled. This function is intended to allow * other protocols to have some impact on bonding decisions. For example LACP * or high level link monitoring protocols may decide that a given slave should * not be able to send traffic. */ void bond_slave_set_may_enable(struct bond *bond, void *slave_, bool may_enable) { ovs_rwlock_wrlock(&rwlock); bond_slave_lookup(bond, slave_)->may_enable = may_enable; ovs_rwlock_unlock(&rwlock); } /* Performs periodic maintenance on 'bond'. * * Returns true if the caller should revalidate its flows. * * The caller should check bond_should_send_learning_packets() afterward. */ bool bond_run(struct bond *bond, enum lacp_status lacp_status) { struct bond_slave *slave; bool revalidate; ovs_rwlock_wrlock(&rwlock); if (bond->lacp_status != lacp_status) { bond->lacp_status = lacp_status; bond->bond_revalidate = true; } /* Enable slaves based on link status and LACP feedback. */ HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { bond_link_status_update(slave); slave->change_seq = netdev_change_seq(slave->netdev); } if (!bond->active_slave || !bond->active_slave->enabled) { bond_choose_active_slave(bond); } /* Update fake bond interface stats. */ if (time_msec() >= bond->next_fake_iface_update) { bond_update_fake_slave_stats(bond); bond->next_fake_iface_update = time_msec() + 1000; } revalidate = bond->bond_revalidate; bond->bond_revalidate = false; ovs_rwlock_unlock(&rwlock); return revalidate; } /* Causes poll_block() to wake up when 'bond' needs something to be done. */ void bond_wait(struct bond *bond) { struct bond_slave *slave; ovs_rwlock_rdlock(&rwlock); HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { if (slave->delay_expires != LLONG_MAX) { poll_timer_wait_until(slave->delay_expires); } if (slave->change_seq != netdev_change_seq(slave->netdev)) { poll_immediate_wake(); } } if (bond->next_fake_iface_update != LLONG_MAX) { poll_timer_wait_until(bond->next_fake_iface_update); } if (bond->bond_revalidate) { poll_immediate_wake(); } ovs_rwlock_unlock(&rwlock); /* We don't wait for bond->next_rebalance because rebalancing can only run * at a flow account checkpoint. ofproto does checkpointing on its own * schedule and bond_rebalance() gets called afterward, so we'd just be * waking up for no purpose. */ } /* MAC learning table interaction. */ static bool may_send_learning_packets(const struct bond *bond) { return bond->lacp_status == LACP_DISABLED && (bond->balance == BM_SLB || bond->balance == BM_AB) && bond->active_slave; } /* Returns true if 'bond' needs the client to send out packets to assist with * MAC learning on 'bond'. If this function returns true, then the client * should iterate through its MAC learning table for the bridge on which 'bond' * is located. For each MAC that has been learned on a port other than 'bond', * it should call bond_compose_learning_packet(). * * This function will only return true if 'bond' is in SLB or active-backup * mode and LACP is not negotiated. Otherwise sending learning packets isn't * necessary. * * Calling this function resets the state that it checks. */ bool bond_should_send_learning_packets(struct bond *bond) { bool send; ovs_rwlock_wrlock(&rwlock); send = bond->send_learning_packets && may_send_learning_packets(bond); bond->send_learning_packets = false; ovs_rwlock_unlock(&rwlock); return send; } /* Sends a gratuitous learning packet on 'bond' from 'eth_src' on 'vlan'. * * See bond_should_send_learning_packets() for description of usage. The * caller should send the composed packet on the port associated with * port_aux and takes ownership of the returned ofpbuf. */ struct ofpbuf * bond_compose_learning_packet(struct bond *bond, const uint8_t eth_src[ETH_ADDR_LEN], uint16_t vlan, void **port_aux) { struct bond_slave *slave; struct ofpbuf *packet; struct flow flow; ovs_rwlock_rdlock(&rwlock); ovs_assert(may_send_learning_packets(bond)); memset(&flow, 0, sizeof flow); memcpy(flow.dl_src, eth_src, ETH_ADDR_LEN); slave = choose_output_slave(bond, &flow, NULL, vlan); packet = ofpbuf_new(0); compose_rarp(packet, eth_src); if (vlan) { eth_push_vlan(packet, htons(vlan)); } *port_aux = slave->aux; ovs_rwlock_unlock(&rwlock); return packet; } /* Checks whether a packet that arrived on 'slave_' within 'bond', with an * Ethernet destination address of 'eth_dst', should be admitted. * * The return value is one of the following: * * - BV_ACCEPT: Admit the packet. * * - BV_DROP: Drop the packet. * * - BV_DROP_IF_MOVED: Consult the MAC learning table for the packet's * Ethernet source address and VLAN. If there is none, or if the packet * is on the learned port, then admit the packet. If a different port has * been learned, however, drop the packet (and do not use it for MAC * learning). */ enum bond_verdict bond_check_admissibility(struct bond *bond, const void *slave_, const uint8_t eth_dst[ETH_ADDR_LEN]) { enum bond_verdict verdict = BV_DROP; struct bond_slave *slave; ovs_rwlock_rdlock(&rwlock); slave = bond_slave_lookup(bond, slave_); if (!slave) { goto out; } /* LACP bonds have very loose admissibility restrictions because we can * assume the remote switch is aware of the bond and will "do the right * thing". However, as a precaution we drop packets on disabled slaves * because no correctly implemented partner switch should be sending * packets to them. * * If LACP is configured, but LACP negotiations have been unsuccessful, we * drop all incoming traffic. */ switch (bond->lacp_status) { case LACP_NEGOTIATED: verdict = slave->enabled ? BV_ACCEPT : BV_DROP; goto out; case LACP_CONFIGURED: goto out; case LACP_DISABLED: break; } /* Drop all multicast packets on inactive slaves. */ if (eth_addr_is_multicast(eth_dst)) { if (bond->active_slave != slave) { goto out; } } switch (bond->balance) { case BM_AB: /* Drop all packets which arrive on backup slaves. This is similar to * how Linux bonding handles active-backup bonds. */ if (bond->active_slave != slave) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_DBG_RL(&rl, "active-backup bond received packet on backup" " slave (%s) destined for " ETH_ADDR_FMT, slave->name, ETH_ADDR_ARGS(eth_dst)); goto out; } verdict = BV_ACCEPT; goto out; case BM_TCP: /* TCP balanced bonds require successful LACP negotiated. Based on the * above check, LACP is off on this bond. Therfore, we drop all * incoming traffic. */ goto out; case BM_SLB: /* Drop all packets for which we have learned a different input port, * because we probably sent the packet on one slave and got it back on * the other. Gratuitous ARP packets are an exception to this rule: * the host has moved to another switch. The exception to the * exception is if we locked the learning table to avoid reflections on * bond slaves. */ verdict = BV_DROP_IF_MOVED; goto out; } NOT_REACHED(); out: ovs_rwlock_unlock(&rwlock); return verdict; } /* Returns the slave (registered on 'bond' by bond_slave_register()) to which * a packet with the given 'flow' and 'vlan' should be forwarded. Returns * NULL if the packet should be dropped because no slaves are enabled. * * 'vlan' is not necessarily the same as 'flow->vlan_tci'. First, 'vlan' * should be a VID only (i.e. excluding the PCP bits). Second, * 'flow->vlan_tci' is the VLAN TCI that appeared on the packet (so it will be * nonzero only for trunk ports), whereas 'vlan' is the logical VLAN that the * packet belongs to (so for an access port it will be the access port's VLAN). * * If 'wc' is non-NULL, bitwise-OR's 'wc' with the set of bits that were * significant in the selection. At some point earlier, 'wc' should * have been initialized (e.g., by flow_wildcards_init_catchall()). */ void * bond_choose_output_slave(struct bond *bond, const struct flow *flow, struct flow_wildcards *wc, uint16_t vlan) { struct bond_slave *slave; void *aux; ovs_rwlock_rdlock(&rwlock); slave = choose_output_slave(bond, flow, wc, vlan); aux = slave ? slave->aux : NULL; ovs_rwlock_unlock(&rwlock); return aux; } /* Rebalancing. */ static bool bond_is_balanced(const struct bond *bond) OVS_REQ_RDLOCK(rwlock) { return bond->rebalance_interval && (bond->balance == BM_SLB || bond->balance == BM_TCP); } /* Notifies 'bond' that 'n_bytes' bytes were sent in 'flow' within 'vlan'. */ void bond_account(struct bond *bond, const struct flow *flow, uint16_t vlan, uint64_t n_bytes) { ovs_rwlock_wrlock(&rwlock); if (bond_is_balanced(bond)) { lookup_bond_entry(bond, flow, vlan)->tx_bytes += n_bytes; } ovs_rwlock_unlock(&rwlock); } static struct bond_slave * bond_slave_from_bal_node(struct list *bal) OVS_REQ_RDLOCK(rwlock) { return CONTAINER_OF(bal, struct bond_slave, bal_node); } static void log_bals(struct bond *bond, const struct list *bals) { if (VLOG_IS_DBG_ENABLED()) { struct ds ds = DS_EMPTY_INITIALIZER; const struct bond_slave *slave; LIST_FOR_EACH (slave, bal_node, bals) { if (ds.length) { ds_put_char(&ds, ','); } ds_put_format(&ds, " %s %"PRIu64"kB", slave->name, slave->tx_bytes / 1024); if (!slave->enabled) { ds_put_cstr(&ds, " (disabled)"); } if (!list_is_empty(&slave->entries)) { struct bond_entry *e; ds_put_cstr(&ds, " ("); LIST_FOR_EACH (e, list_node, &slave->entries) { if (&e->list_node != list_front(&slave->entries)) { ds_put_cstr(&ds, " + "); } ds_put_format(&ds, "h%td: %"PRIu64"kB", e - bond->hash, e->tx_bytes / 1024); } ds_put_cstr(&ds, ")"); } } VLOG_DBG("bond %s:%s", bond->name, ds_cstr(&ds)); ds_destroy(&ds); } } /* Shifts 'hash' from its current slave to 'to'. */ static void bond_shift_load(struct bond_entry *hash, struct bond_slave *to) { struct bond_slave *from = hash->slave; struct bond *bond = from->bond; uint64_t delta = hash->tx_bytes; VLOG_INFO("bond %s: shift %"PRIu64"kB of load (with hash %td) " "from %s to %s (now carrying %"PRIu64"kB and " "%"PRIu64"kB load, respectively)", bond->name, delta / 1024, hash - bond->hash, from->name, to->name, (from->tx_bytes - delta) / 1024, (to->tx_bytes + delta) / 1024); /* Shift load away from 'from' to 'to'. */ from->tx_bytes -= delta; to->tx_bytes += delta; /* Arrange for flows to be revalidated. */ hash->slave = to; bond->bond_revalidate = true; } /* Picks and returns a bond_entry to migrate from 'from' (the most heavily * loaded bond slave) to a bond slave that has 'to_tx_bytes' bytes of load, * given that doing so must decrease the ratio of the load on the two slaves by * at least 0.1. Returns NULL if there is no appropriate entry. * * The list of entries isn't sorted. I don't know of a reason to prefer to * shift away small hashes or large hashes. */ static struct bond_entry * choose_entry_to_migrate(const struct bond_slave *from, uint64_t to_tx_bytes) { struct bond_entry *e; if (list_is_short(&from->entries)) { /* 'from' carries no more than one MAC hash, so shifting load away from * it would be pointless. */ return NULL; } LIST_FOR_EACH (e, list_node, &from->entries) { double old_ratio, new_ratio; uint64_t delta; if (to_tx_bytes == 0) { /* Nothing on the new slave, move it. */ return e; } delta = e->tx_bytes; old_ratio = (double)from->tx_bytes / to_tx_bytes; new_ratio = (double)(from->tx_bytes - delta) / (to_tx_bytes + delta); if (old_ratio - new_ratio > 0.1 && fabs(new_ratio - 1.0) < fabs(old_ratio - 1.0)) { /* We're aiming for an ideal ratio of 1, meaning both the 'from' and 'to' slave have the same load. Therefore, we only move an entry if it decreases the load on 'from', and brings us closer to equal traffic load. */ return e; } } return NULL; } /* Inserts 'slave' into 'bals' so that descending order of 'tx_bytes' is * maintained. */ static void insert_bal(struct list *bals, struct bond_slave *slave) { struct bond_slave *pos; LIST_FOR_EACH (pos, bal_node, bals) { if (slave->tx_bytes > pos->tx_bytes) { break; } } list_insert(&pos->bal_node, &slave->bal_node); } /* Removes 'slave' from its current list and then inserts it into 'bals' so * that descending order of 'tx_bytes' is maintained. */ static void reinsert_bal(struct list *bals, struct bond_slave *slave) { list_remove(&slave->bal_node); insert_bal(bals, slave); } /* If 'bond' needs rebalancing, does so. * * The caller should have called bond_account() for each active flow, to ensure * that flow data is consistently accounted at this point. */ void bond_rebalance(struct bond *bond) { struct bond_slave *slave; struct bond_entry *e; struct list bals; ovs_rwlock_wrlock(&rwlock); if (!bond_is_balanced(bond) || time_msec() < bond->next_rebalance) { ovs_rwlock_unlock(&rwlock); return; } bond->next_rebalance = time_msec() + bond->rebalance_interval; /* Add each bond_entry to its slave's 'entries' list. * Compute each slave's tx_bytes as the sum of its entries' tx_bytes. */ HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { slave->tx_bytes = 0; list_init(&slave->entries); } for (e = &bond->hash[0]; e <= &bond->hash[BOND_MASK]; e++) { if (e->slave && e->tx_bytes) { e->slave->tx_bytes += e->tx_bytes; list_push_back(&e->slave->entries, &e->list_node); } } /* Add enabled slaves to 'bals' in descending order of tx_bytes. * * XXX This is O(n**2) in the number of slaves but it could be O(n lg n) * with a proper list sort algorithm. */ list_init(&bals); HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { if (slave->enabled) { insert_bal(&bals, slave); } } log_bals(bond, &bals); /* Shift load from the most-loaded slaves to the least-loaded slaves. */ while (!list_is_short(&bals)) { struct bond_slave *from = bond_slave_from_bal_node(list_front(&bals)); struct bond_slave *to = bond_slave_from_bal_node(list_back(&bals)); uint64_t overload; overload = from->tx_bytes - to->tx_bytes; if (overload < to->tx_bytes >> 5 || overload < 100000) { /* The extra load on 'from' (and all less-loaded slaves), compared * to that of 'to' (the least-loaded slave), is less than ~3%, or * it is less than ~1Mbps. No point in rebalancing. */ break; } /* 'from' is carrying significantly more load than 'to'. Pick a hash * to move from 'from' to 'to'. */ e = choose_entry_to_migrate(from, to->tx_bytes); if (e) { bond_shift_load(e, to); /* Delete element from from->entries. * * We don't add the element to to->hashes. That would only allow * 'e' to be migrated to another slave in this rebalancing run, and * there is no point in doing that. */ list_remove(&e->list_node); /* Re-sort 'bals'. */ reinsert_bal(&bals, from); reinsert_bal(&bals, to); } else { /* Can't usefully migrate anything away from 'from'. * Don't reconsider it. */ list_remove(&from->bal_node); } } /* Implement exponentially weighted moving average. A weight of 1/2 causes * historical data to decay to <1% in 7 rebalancing runs. 1,000,000 bytes * take 20 rebalancing runs to decay to 0 and get deleted entirely. */ for (e = &bond->hash[0]; e <= &bond->hash[BOND_MASK]; e++) { e->tx_bytes /= 2; if (!e->tx_bytes) { e->slave = NULL; } } ovs_rwlock_unlock(&rwlock); } /* Bonding unixctl user interface functions. */ static struct bond * bond_find(const char *name) OVS_REQ_RDLOCK(rwlock) { struct bond *bond; HMAP_FOR_EACH_WITH_HASH (bond, hmap_node, hash_string(name, 0), all_bonds) { if (!strcmp(bond->name, name)) { return bond; } } return NULL; } static struct bond_slave * bond_lookup_slave(struct bond *bond, const char *slave_name) { struct bond_slave *slave; HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { if (!strcmp(slave->name, slave_name)) { return slave; } } return NULL; } static void bond_unixctl_list(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { struct ds ds = DS_EMPTY_INITIALIZER; const struct bond *bond; ds_put_cstr(&ds, "bond\ttype\tslaves\n"); ovs_rwlock_rdlock(&rwlock); HMAP_FOR_EACH (bond, hmap_node, all_bonds) { const struct bond_slave *slave; size_t i; ds_put_format(&ds, "%s\t%s\t", bond->name, bond_mode_to_string(bond->balance)); i = 0; HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { if (i++ > 0) { ds_put_cstr(&ds, ", "); } ds_put_cstr(&ds, slave->name); } ds_put_char(&ds, '\n'); } ovs_rwlock_unlock(&rwlock); unixctl_command_reply(conn, ds_cstr(&ds)); ds_destroy(&ds); } static void bond_print_details(struct ds *ds, const struct bond *bond) OVS_REQ_RDLOCK(rwlock) { struct shash slave_shash = SHASH_INITIALIZER(&slave_shash); const struct shash_node **sorted_slaves = NULL; const struct bond_slave *slave; int i; ds_put_format(ds, "---- %s ----\n", bond->name); ds_put_format(ds, "bond_mode: %s\n", bond_mode_to_string(bond->balance)); ds_put_format(ds, "bond-hash-basis: %"PRIu32"\n", bond->basis); ds_put_format(ds, "updelay: %d ms\n", bond->updelay); ds_put_format(ds, "downdelay: %d ms\n", bond->downdelay); if (bond_is_balanced(bond)) { ds_put_format(ds, "next rebalance: %lld ms\n", bond->next_rebalance - time_msec()); } ds_put_cstr(ds, "lacp_status: "); switch (bond->lacp_status) { case LACP_NEGOTIATED: ds_put_cstr(ds, "negotiated\n"); break; case LACP_CONFIGURED: ds_put_cstr(ds, "configured\n"); break; case LACP_DISABLED: ds_put_cstr(ds, "off\n"); break; default: ds_put_cstr(ds, "\n"); break; } HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { shash_add(&slave_shash, slave->name, slave); } sorted_slaves = shash_sort(&slave_shash); for (i = 0; i < shash_count(&slave_shash); i++) { struct bond_entry *be; slave = sorted_slaves[i]->data; /* Basic info. */ ds_put_format(ds, "\nslave %s: %s\n", slave->name, slave->enabled ? "enabled" : "disabled"); if (slave == bond->active_slave) { ds_put_cstr(ds, "\tactive slave\n"); } if (slave->delay_expires != LLONG_MAX) { ds_put_format(ds, "\t%s expires in %lld ms\n", slave->enabled ? "downdelay" : "updelay", slave->delay_expires - time_msec()); } ds_put_format(ds, "\tmay_enable: %s\n", slave->may_enable ? "true" : "false"); if (!bond_is_balanced(bond)) { continue; } /* Hashes. */ for (be = bond->hash; be <= &bond->hash[BOND_MASK]; be++) { int hash = be - bond->hash; if (be->slave != slave) { continue; } ds_put_format(ds, "\thash %d: %"PRIu64" kB load\n", hash, be->tx_bytes / 1024); /* XXX How can we list the MACs assigned to hashes of SLB bonds? */ } } shash_destroy(&slave_shash); free(sorted_slaves); ds_put_cstr(ds, "\n"); } static void bond_unixctl_show(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) { struct ds ds = DS_EMPTY_INITIALIZER; ovs_rwlock_rdlock(&rwlock); if (argc > 1) { const struct bond *bond = bond_find(argv[1]); if (!bond) { unixctl_command_reply_error(conn, "no such bond"); goto out; } bond_print_details(&ds, bond); } else { const struct bond *bond; HMAP_FOR_EACH (bond, hmap_node, all_bonds) { bond_print_details(&ds, bond); } } unixctl_command_reply(conn, ds_cstr(&ds)); ds_destroy(&ds); out: ovs_rwlock_unlock(&rwlock); } static void bond_unixctl_migrate(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) { const char *bond_s = argv[1]; const char *hash_s = argv[2]; const char *slave_s = argv[3]; struct bond *bond; struct bond_slave *slave; struct bond_entry *entry; int hash; ovs_rwlock_wrlock(&rwlock); bond = bond_find(bond_s); if (!bond) { unixctl_command_reply_error(conn, "no such bond"); goto out; } if (bond->balance != BM_SLB) { unixctl_command_reply_error(conn, "not an SLB bond"); goto out; } if (strspn(hash_s, "0123456789") == strlen(hash_s)) { hash = atoi(hash_s) & BOND_MASK; } else { unixctl_command_reply_error(conn, "bad hash"); goto out; } slave = bond_lookup_slave(bond, slave_s); if (!slave) { unixctl_command_reply_error(conn, "no such slave"); goto out; } if (!slave->enabled) { unixctl_command_reply_error(conn, "cannot migrate to disabled slave"); goto out; } entry = &bond->hash[hash]; bond->bond_revalidate = true; entry->slave = slave; unixctl_command_reply(conn, "migrated"); out: ovs_rwlock_unlock(&rwlock); } static void bond_unixctl_set_active_slave(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) { const char *bond_s = argv[1]; const char *slave_s = argv[2]; struct bond *bond; struct bond_slave *slave; ovs_rwlock_wrlock(&rwlock); bond = bond_find(bond_s); if (!bond) { unixctl_command_reply_error(conn, "no such bond"); goto out; } slave = bond_lookup_slave(bond, slave_s); if (!slave) { unixctl_command_reply_error(conn, "no such slave"); goto out; } if (!slave->enabled) { unixctl_command_reply_error(conn, "cannot make disabled slave active"); goto out; } if (bond->active_slave != slave) { bond->bond_revalidate = true; bond->active_slave = slave; VLOG_INFO("bond %s: active interface is now %s", bond->name, slave->name); bond->send_learning_packets = true; unixctl_command_reply(conn, "done"); } else { unixctl_command_reply(conn, "no change"); } out: ovs_rwlock_unlock(&rwlock); } static void enable_slave(struct unixctl_conn *conn, const char *argv[], bool enable) { const char *bond_s = argv[1]; const char *slave_s = argv[2]; struct bond *bond; struct bond_slave *slave; ovs_rwlock_wrlock(&rwlock); bond = bond_find(bond_s); if (!bond) { unixctl_command_reply_error(conn, "no such bond"); goto out; } slave = bond_lookup_slave(bond, slave_s); if (!slave) { unixctl_command_reply_error(conn, "no such slave"); goto out; } bond_enable_slave(slave, enable); unixctl_command_reply(conn, enable ? "enabled" : "disabled"); out: ovs_rwlock_unlock(&rwlock); } static void bond_unixctl_enable_slave(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) { enable_slave(conn, argv, true); } static void bond_unixctl_disable_slave(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) { enable_slave(conn, argv, false); } static void bond_unixctl_hash(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) { const char *mac_s = argv[1]; const char *vlan_s = argc > 2 ? argv[2] : NULL; const char *basis_s = argc > 3 ? argv[3] : NULL; uint8_t mac[ETH_ADDR_LEN]; uint8_t hash; char *hash_cstr; unsigned int vlan; uint32_t basis; if (vlan_s) { if (sscanf(vlan_s, "%u", &vlan) != 1) { unixctl_command_reply_error(conn, "invalid vlan"); return; } } else { vlan = 0; } if (basis_s) { if (sscanf(basis_s, "%"PRIu32, &basis) != 1) { unixctl_command_reply_error(conn, "invalid basis"); return; } } else { basis = 0; } if (sscanf(mac_s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac)) == ETH_ADDR_SCAN_COUNT) { hash = bond_hash_src(mac, vlan, basis) & BOND_MASK; hash_cstr = xasprintf("%u", hash); unixctl_command_reply(conn, hash_cstr); free(hash_cstr); } else { unixctl_command_reply_error(conn, "invalid mac"); } } void bond_init(void) { unixctl_command_register("bond/list", "", 0, 0, bond_unixctl_list, NULL); unixctl_command_register("bond/show", "[port]", 0, 1, bond_unixctl_show, NULL); unixctl_command_register("bond/migrate", "port hash slave", 3, 3, bond_unixctl_migrate, NULL); unixctl_command_register("bond/set-active-slave", "port slave", 2, 2, bond_unixctl_set_active_slave, NULL); unixctl_command_register("bond/enable-slave", "port slave", 2, 2, bond_unixctl_enable_slave, NULL); unixctl_command_register("bond/disable-slave", "port slave", 2, 2, bond_unixctl_disable_slave, NULL); unixctl_command_register("bond/hash", "mac [vlan] [basis]", 1, 3, bond_unixctl_hash, NULL); } static void bond_entry_reset(struct bond *bond) { if (bond->balance != BM_AB) { size_t hash_len = (BOND_MASK + 1) * sizeof *bond->hash; if (!bond->hash) { bond->hash = xmalloc(hash_len); } memset(bond->hash, 0, hash_len); bond->next_rebalance = time_msec() + bond->rebalance_interval; } else { free(bond->hash); bond->hash = NULL; } } static struct bond_slave * bond_slave_lookup(struct bond *bond, const void *slave_) { struct bond_slave *slave; HMAP_FOR_EACH_IN_BUCKET (slave, hmap_node, hash_pointer(slave_, 0), &bond->slaves) { if (slave->aux == slave_) { return slave; } } return NULL; } static void bond_enable_slave(struct bond_slave *slave, bool enable) { slave->delay_expires = LLONG_MAX; if (enable != slave->enabled) { slave->bond->bond_revalidate = true; slave->enabled = enable; VLOG_INFO("interface %s: %s", slave->name, slave->enabled ? "enabled" : "disabled"); } } static void bond_link_status_update(struct bond_slave *slave) { struct bond *bond = slave->bond; bool up; up = netdev_get_carrier(slave->netdev) && slave->may_enable; if ((up == slave->enabled) != (slave->delay_expires == LLONG_MAX)) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); VLOG_INFO_RL(&rl, "interface %s: link state %s", slave->name, up ? "up" : "down"); if (up == slave->enabled) { slave->delay_expires = LLONG_MAX; VLOG_INFO_RL(&rl, "interface %s: will not be %s", slave->name, up ? "disabled" : "enabled"); } else { int delay = (bond->lacp_status != LACP_DISABLED ? 0 : up ? bond->updelay : bond->downdelay); slave->delay_expires = time_msec() + delay; if (delay) { VLOG_INFO_RL(&rl, "interface %s: will be %s if it stays %s " "for %d ms", slave->name, up ? "enabled" : "disabled", up ? "up" : "down", delay); } } } if (time_msec() >= slave->delay_expires) { bond_enable_slave(slave, up); } } static unsigned int bond_hash_src(const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan, uint32_t basis) { return hash_3words(hash_bytes(mac, ETH_ADDR_LEN, 0), vlan, basis); } static unsigned int bond_hash_tcp(const struct flow *flow, uint16_t vlan, uint32_t basis) { struct flow hash_flow = *flow; hash_flow.vlan_tci = htons(vlan); /* The symmetric quality of this hash function is not required, but * flow_hash_symmetric_l4 already exists, and is sufficient for our * purposes, so we use it out of convenience. */ return flow_hash_symmetric_l4(&hash_flow, basis); } static unsigned int bond_hash(const struct bond *bond, const struct flow *flow, uint16_t vlan) { ovs_assert(bond->balance == BM_TCP || bond->balance == BM_SLB); return (bond->balance == BM_TCP ? bond_hash_tcp(flow, vlan, bond->basis) : bond_hash_src(flow->dl_src, vlan, bond->basis)); } static struct bond_entry * lookup_bond_entry(const struct bond *bond, const struct flow *flow, uint16_t vlan) { return &bond->hash[bond_hash(bond, flow, vlan) & BOND_MASK]; } static struct bond_slave * choose_output_slave(const struct bond *bond, const struct flow *flow, struct flow_wildcards *wc, uint16_t vlan) { struct bond_entry *e; if (bond->lacp_status == LACP_CONFIGURED) { /* LACP has been configured on this bond but negotiations were * unsuccussful. Drop all traffic. */ return NULL; } switch (bond->balance) { case BM_AB: return bond->active_slave; case BM_TCP: if (bond->lacp_status != LACP_NEGOTIATED) { /* Must have LACP negotiations for TCP balanced bonds. */ return NULL; } if (wc) { flow_mask_hash_fields(flow, wc, NX_HASH_FIELDS_SYMMETRIC_L4); } /* Fall Through. */ case BM_SLB: if (wc) { flow_mask_hash_fields(flow, wc, NX_HASH_FIELDS_ETH_SRC); } e = lookup_bond_entry(bond, flow, vlan); if (!e->slave || !e->slave->enabled) { e->slave = CONTAINER_OF(hmap_random_node(&bond->slaves), struct bond_slave, hmap_node); if (!e->slave->enabled) { e->slave = bond->active_slave; } } return e->slave; default: NOT_REACHED(); } } static struct bond_slave * bond_choose_slave(const struct bond *bond) { struct bond_slave *slave, *best; /* Find an enabled slave. */ HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { if (slave->enabled) { return slave; } } /* All interfaces are disabled. Find an interface that will be enabled * after its updelay expires. */ best = NULL; HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { if (slave->delay_expires != LLONG_MAX && slave->may_enable && (!best || slave->delay_expires < best->delay_expires)) { best = slave; } } return best; } static void bond_choose_active_slave(struct bond *bond) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); struct bond_slave *old_active_slave = bond->active_slave; bond->active_slave = bond_choose_slave(bond); if (bond->active_slave) { if (bond->active_slave->enabled) { VLOG_INFO_RL(&rl, "bond %s: active interface is now %s", bond->name, bond->active_slave->name); } else { VLOG_INFO_RL(&rl, "bond %s: active interface is now %s, skipping " "remaining %lld ms updelay (since no interface was " "enabled)", bond->name, bond->active_slave->name, bond->active_slave->delay_expires - time_msec()); bond_enable_slave(bond->active_slave, true); } bond->send_learning_packets = true; } else if (old_active_slave) { VLOG_INFO_RL(&rl, "bond %s: all interfaces disabled", bond->name); } } /* Attempts to make the sum of the bond slaves' statistics appear on the fake * bond interface. */ static void bond_update_fake_slave_stats(struct bond *bond) { struct netdev_stats bond_stats; struct bond_slave *slave; struct netdev *bond_dev; memset(&bond_stats, 0, sizeof bond_stats); HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { struct netdev_stats slave_stats; if (!netdev_get_stats(slave->netdev, &slave_stats)) { /* XXX: We swap the stats here because they are swapped back when * reported by the internal device. The reason for this is * internal devices normally represent packets going into the * system but when used as fake bond device they represent packets * leaving the system. We really should do this in the internal * device itself because changing it here reverses the counts from * the perspective of the switch. However, the internal device * doesn't know what type of device it represents so we have to do * it here for now. */ bond_stats.tx_packets += slave_stats.rx_packets; bond_stats.tx_bytes += slave_stats.rx_bytes; bond_stats.rx_packets += slave_stats.tx_packets; bond_stats.rx_bytes += slave_stats.tx_bytes; } } if (!netdev_open(bond->name, "system", &bond_dev)) { netdev_set_stats(bond_dev, &bond_stats); netdev_close(bond_dev); } } openvswitch-2.0.1+git20140120/lib/bond.h000066400000000000000000000065451226605124000173250ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef BOND_H #define BOND_H 1 #include #include #include "packets.h" struct flow; struct netdev; struct ofpbuf; enum lacp_status; /* How flows are balanced among bond slaves. */ enum bond_mode { BM_TCP, /* Transport Layer Load Balance. */ BM_SLB, /* Source Load Balance. */ BM_AB /* Active Backup. */ }; bool bond_mode_from_string(enum bond_mode *, const char *); const char *bond_mode_to_string(enum bond_mode); /* Configuration for a bond as a whole. */ struct bond_settings { char *name; /* Bond's name, for log messages. */ uint32_t basis; /* Flow hashing basis. */ /* Balancing configuration. */ enum bond_mode balance; int rebalance_interval; /* Milliseconds between rebalances. Zero to disable rebalancing. */ /* Link status detection. */ int up_delay; /* ms before enabling an up slave. */ int down_delay; /* ms before disabling a down slave. */ /* Legacy compatibility. */ bool fake_iface; /* Update fake stats for netdev 'name'? */ }; /* Program startup. */ void bond_init(void); /* Basics. */ struct bond *bond_create(const struct bond_settings *); void bond_unref(struct bond *); struct bond *bond_ref(const struct bond *); bool bond_reconfigure(struct bond *, const struct bond_settings *); void bond_slave_register(struct bond *, void *slave_, struct netdev *); void bond_slave_set_netdev(struct bond *, void *slave_, struct netdev *); void bond_slave_unregister(struct bond *, const void *slave); bool bond_run(struct bond *, enum lacp_status); void bond_wait(struct bond *); void bond_slave_set_may_enable(struct bond *, void *slave_, bool may_enable); /* Special MAC learning support for SLB bonding. */ bool bond_should_send_learning_packets(struct bond *); struct ofpbuf *bond_compose_learning_packet(struct bond *, const uint8_t eth_src[ETH_ADDR_LEN], uint16_t vlan, void **port_aux); /* Packet processing. */ enum bond_verdict { BV_ACCEPT, /* Accept this packet. */ BV_DROP, /* Drop this packet. */ BV_DROP_IF_MOVED /* Drop if we've learned a different port. */ }; enum bond_verdict bond_check_admissibility(struct bond *, const void *slave_, const uint8_t dst[ETH_ADDR_LEN]); void *bond_choose_output_slave(struct bond *, const struct flow *, struct flow_wildcards *, uint16_t vlan); /* Rebalancing. */ void bond_account(struct bond *, const struct flow *, uint16_t vlan, uint64_t n_bytes); void bond_rebalance(struct bond *); #endif /* bond.h */ openvswitch-2.0.1+git20140120/lib/bundle.c000066400000000000000000000317371226605124000176500ustar00rootroot00000000000000/* Copyright (c) 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "bundle.h" #include #include #include "dynamic-string.h" #include "multipath.h" #include "meta-flow.h" #include "nx-match.h" #include "ofpbuf.h" #include "ofp-actions.h" #include "ofp-errors.h" #include "ofp-util.h" #include "openflow/nicira-ext.h" #include "vlog.h" #define BUNDLE_MAX_SLAVES 2048 VLOG_DEFINE_THIS_MODULE(bundle); static ofp_port_t execute_ab(const struct ofpact_bundle *bundle, bool (*slave_enabled)(ofp_port_t ofp_port, void *aux), void *aux) { size_t i; for (i = 0; i < bundle->n_slaves; i++) { ofp_port_t slave = bundle->slaves[i]; if (slave_enabled(slave, aux)) { return slave; } } return OFPP_NONE; } static ofp_port_t execute_hrw(const struct ofpact_bundle *bundle, const struct flow *flow, struct flow_wildcards *wc, bool (*slave_enabled)(ofp_port_t ofp_port, void *aux), void *aux) { uint32_t flow_hash, best_hash; int best, i; if (bundle->n_slaves > 1) { flow_mask_hash_fields(flow, wc, bundle->fields); } flow_hash = flow_hash_fields(flow, bundle->fields, bundle->basis); best = -1; best_hash = 0; for (i = 0; i < bundle->n_slaves; i++) { if (slave_enabled(bundle->slaves[i], aux)) { uint32_t hash = hash_2words(i, flow_hash); if (best < 0 || hash > best_hash) { best_hash = hash; best = i; } } } return best >= 0 ? bundle->slaves[best] : OFPP_NONE; } /* Executes 'bundle' on 'flow'. Sets fields in 'wc' that were used to * calculate the result. Uses 'slave_enabled' to determine if the slave * designated by 'ofp_port' is up. Returns the chosen slave, or * OFPP_NONE if none of the slaves are acceptable. */ ofp_port_t bundle_execute(const struct ofpact_bundle *bundle, const struct flow *flow, struct flow_wildcards *wc, bool (*slave_enabled)(ofp_port_t ofp_port, void *aux), void *aux) { switch (bundle->algorithm) { case NX_BD_ALG_HRW: return execute_hrw(bundle, flow, wc, slave_enabled, aux); case NX_BD_ALG_ACTIVE_BACKUP: return execute_ab(bundle, slave_enabled, aux); default: NOT_REACHED(); } } /* Checks that 'nab' specifies a bundle action which is supported by this * bundle module. Uses the 'max_ports' parameter to validate each port using * ofputil_check_output_port(). Returns 0 if 'nab' is supported, otherwise an * OFPERR_* error code. */ enum ofperr bundle_from_openflow(const struct nx_action_bundle *nab, struct ofpbuf *ofpacts) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); struct ofpact_bundle *bundle; uint16_t subtype; uint32_t slave_type; size_t slaves_size, i; enum ofperr error; bundle = ofpact_put_BUNDLE(ofpacts); subtype = ntohs(nab->subtype); bundle->n_slaves = ntohs(nab->n_slaves); bundle->basis = ntohs(nab->basis); bundle->fields = ntohs(nab->fields); bundle->algorithm = ntohs(nab->algorithm); slave_type = ntohl(nab->slave_type); slaves_size = ntohs(nab->len) - sizeof *nab; error = OFPERR_OFPBAC_BAD_ARGUMENT; if (!flow_hash_fields_valid(bundle->fields)) { VLOG_WARN_RL(&rl, "unsupported fields %d", (int) bundle->fields); } else if (bundle->n_slaves > BUNDLE_MAX_SLAVES) { VLOG_WARN_RL(&rl, "too may slaves"); } else if (bundle->algorithm != NX_BD_ALG_HRW && bundle->algorithm != NX_BD_ALG_ACTIVE_BACKUP) { VLOG_WARN_RL(&rl, "unsupported algorithm %d", (int) bundle->algorithm); } else if (slave_type != NXM_OF_IN_PORT) { VLOG_WARN_RL(&rl, "unsupported slave type %"PRIu16, slave_type); } else { error = 0; } if (!is_all_zeros(nab->zero, sizeof nab->zero)) { VLOG_WARN_RL(&rl, "reserved field is nonzero"); error = OFPERR_OFPBAC_BAD_ARGUMENT; } if (subtype == NXAST_BUNDLE && (nab->ofs_nbits || nab->dst)) { VLOG_WARN_RL(&rl, "bundle action has nonzero reserved fields"); error = OFPERR_OFPBAC_BAD_ARGUMENT; } if (subtype == NXAST_BUNDLE_LOAD) { bundle->dst.field = mf_from_nxm_header(ntohl(nab->dst)); bundle->dst.ofs = nxm_decode_ofs(nab->ofs_nbits); bundle->dst.n_bits = nxm_decode_n_bits(nab->ofs_nbits); if (bundle->dst.n_bits < 16) { VLOG_WARN_RL(&rl, "bundle_load action requires at least 16 bit " "destination."); error = OFPERR_OFPBAC_BAD_ARGUMENT; } } if (slaves_size < bundle->n_slaves * sizeof(ovs_be16)) { VLOG_WARN_RL(&rl, "Nicira action %"PRIu16" only has %zu bytes " "allocated for slaves. %zu bytes are required for " "%"PRIu16" slaves.", subtype, slaves_size, bundle->n_slaves * sizeof(ovs_be16), bundle->n_slaves); error = OFPERR_OFPBAC_BAD_LEN; } for (i = 0; i < bundle->n_slaves; i++) { uint16_t ofp_port = ntohs(((ovs_be16 *)(nab + 1))[i]); ofpbuf_put(ofpacts, &ofp_port, sizeof ofp_port); } bundle = ofpacts->l2; ofpact_update_len(ofpacts, &bundle->ofpact); if (!error) { error = bundle_check(bundle, OFPP_MAX, NULL); } return error; } enum ofperr bundle_check(const struct ofpact_bundle *bundle, ofp_port_t max_ports, const struct flow *flow) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); size_t i; if (bundle->dst.field) { enum ofperr error = mf_check_dst(&bundle->dst, flow); if (error) { return error; } } for (i = 0; i < bundle->n_slaves; i++) { ofp_port_t ofp_port = bundle->slaves[i]; enum ofperr error; error = ofputil_check_output_port(ofp_port, max_ports); if (error) { VLOG_WARN_RL(&rl, "invalid slave %"PRIu16, ofp_port); return error; } /* Controller slaves are unsupported due to the lack of a max_len * argument. This may or may not change in the future. There doesn't * seem to be a real-world use-case for supporting it. */ if (ofp_port == OFPP_CONTROLLER) { VLOG_WARN_RL(&rl, "unsupported controller slave"); return OFPERR_OFPBAC_BAD_OUT_PORT; } } return 0; } void bundle_to_nxast(const struct ofpact_bundle *bundle, struct ofpbuf *openflow) { int slaves_len = ROUND_UP(2 * bundle->n_slaves, OFP_ACTION_ALIGN); struct nx_action_bundle *nab; ovs_be16 *slaves; size_t i; nab = (bundle->dst.field ? ofputil_put_NXAST_BUNDLE_LOAD(openflow) : ofputil_put_NXAST_BUNDLE(openflow)); nab->len = htons(ntohs(nab->len) + slaves_len); nab->algorithm = htons(bundle->algorithm); nab->fields = htons(bundle->fields); nab->basis = htons(bundle->basis); nab->slave_type = htonl(NXM_OF_IN_PORT); nab->n_slaves = htons(bundle->n_slaves); if (bundle->dst.field) { nab->ofs_nbits = nxm_encode_ofs_nbits(bundle->dst.ofs, bundle->dst.n_bits); nab->dst = htonl(bundle->dst.field->nxm_header); } slaves = ofpbuf_put_zeros(openflow, slaves_len); for (i = 0; i < bundle->n_slaves; i++) { slaves[i] = htons(ofp_to_u16(bundle->slaves[i])); } } /* Helper for bundle_parse and bundle_parse_load. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string.*/ static char * WARN_UNUSED_RESULT bundle_parse__(const char *s, char **save_ptr, const char *fields, const char *basis, const char *algorithm, const char *slave_type, const char *dst, const char *slave_delim, struct ofpbuf *ofpacts) { struct ofpact_bundle *bundle; if (!slave_delim) { return xasprintf("%s: not enough arguments to bundle action", s); } if (strcasecmp(slave_delim, "slaves")) { return xasprintf("%s: missing slave delimiter, expected `slaves' " "got `%s'", s, slave_delim); } bundle = ofpact_put_BUNDLE(ofpacts); for (;;) { ofp_port_t slave_port; char *slave; slave = strtok_r(NULL, ", []", save_ptr); if (!slave || bundle->n_slaves >= BUNDLE_MAX_SLAVES) { break; } if (!ofputil_port_from_string(slave, &slave_port)) { return xasprintf("%s: bad port number", slave); } ofpbuf_put(ofpacts, &slave_port, sizeof slave_port); bundle = ofpacts->l2; bundle->n_slaves++; } ofpact_update_len(ofpacts, &bundle->ofpact); bundle->basis = atoi(basis); if (!strcasecmp(fields, "eth_src")) { bundle->fields = NX_HASH_FIELDS_ETH_SRC; } else if (!strcasecmp(fields, "symmetric_l4")) { bundle->fields = NX_HASH_FIELDS_SYMMETRIC_L4; } else { return xasprintf("%s: unknown fields `%s'", s, fields); } if (!strcasecmp(algorithm, "active_backup")) { bundle->algorithm = NX_BD_ALG_ACTIVE_BACKUP; } else if (!strcasecmp(algorithm, "hrw")) { bundle->algorithm = NX_BD_ALG_HRW; } else { return xasprintf("%s: unknown algorithm `%s'", s, algorithm); } if (strcasecmp(slave_type, "ofport")) { return xasprintf("%s: unknown slave_type `%s'", s, slave_type); } if (dst) { char *error = mf_parse_subfield(&bundle->dst, dst); if (error) { return error; } } return NULL; } /* Converts a bundle action string contained in 's' to an nx_action_bundle and * stores it in 'b'. Sets 'b''s l2 pointer to NULL. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ char * WARN_UNUSED_RESULT bundle_parse(const char *s, struct ofpbuf *ofpacts) { char *fields, *basis, *algorithm, *slave_type, *slave_delim; char *tokstr, *save_ptr; char *error; save_ptr = NULL; tokstr = xstrdup(s); fields = strtok_r(tokstr, ", ", &save_ptr); basis = strtok_r(NULL, ", ", &save_ptr); algorithm = strtok_r(NULL, ", ", &save_ptr); slave_type = strtok_r(NULL, ", ", &save_ptr); slave_delim = strtok_r(NULL, ": ", &save_ptr); error = bundle_parse__(s, &save_ptr, fields, basis, algorithm, slave_type, NULL, slave_delim, ofpacts); free(tokstr); return error; } /* Converts a bundle_load action string contained in 's' to an nx_action_bundle * and stores it in 'b'. Sets 'b''s l2 pointer to NULL. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string.*/ char * WARN_UNUSED_RESULT bundle_parse_load(const char *s, struct ofpbuf *ofpacts) { char *fields, *basis, *algorithm, *slave_type, *dst, *slave_delim; char *tokstr, *save_ptr; char *error; save_ptr = NULL; tokstr = xstrdup(s); fields = strtok_r(tokstr, ", ", &save_ptr); basis = strtok_r(NULL, ", ", &save_ptr); algorithm = strtok_r(NULL, ", ", &save_ptr); slave_type = strtok_r(NULL, ", ", &save_ptr); dst = strtok_r(NULL, ", ", &save_ptr); slave_delim = strtok_r(NULL, ": ", &save_ptr); error = bundle_parse__(s, &save_ptr, fields, basis, algorithm, slave_type, dst, slave_delim, ofpacts); free(tokstr); return error; } /* Appends a human-readable representation of 'nab' to 's'. */ void bundle_format(const struct ofpact_bundle *bundle, struct ds *s) { const char *action, *fields, *algorithm; size_t i; fields = flow_hash_fields_to_str(bundle->fields); switch (bundle->algorithm) { case NX_BD_ALG_HRW: algorithm = "hrw"; break; case NX_BD_ALG_ACTIVE_BACKUP: algorithm = "active_backup"; break; default: algorithm = ""; } action = bundle->dst.field ? "bundle_load" : "bundle"; ds_put_format(s, "%s(%s,%"PRIu16",%s,%s,", action, fields, bundle->basis, algorithm, "ofport"); if (bundle->dst.field) { mf_format_subfield(&bundle->dst, s); ds_put_cstr(s, ","); } ds_put_cstr(s, "slaves:"); for (i = 0; i < bundle->n_slaves; i++) { if (i) { ds_put_cstr(s, ","); } ofputil_format_port(bundle->slaves[i], s); } ds_put_cstr(s, ")"); } openvswitch-2.0.1+git20140120/lib/bundle.h000066400000000000000000000034731226605124000176510ustar00rootroot00000000000000/* Copyright (c) 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef BUNDLE_H #define BUNDLE_H 1 #include #include #include #include #include "compiler.h" #include "ofp-errors.h" #include "openflow/nicira-ext.h" #include "openvswitch/types.h" struct ds; struct flow; struct flow_wildcards; struct ofpact_bundle; struct ofpbuf; /* NXAST_BUNDLE helper functions. * * See include/openflow/nicira-ext.h for NXAST_BUNDLE specification. */ ofp_port_t bundle_execute(const struct ofpact_bundle *, const struct flow *, struct flow_wildcards *wc, bool (*slave_enabled)(ofp_port_t ofp_port, void *aux), void *aux); enum ofperr bundle_from_openflow(const struct nx_action_bundle *, struct ofpbuf *ofpact); enum ofperr bundle_check(const struct ofpact_bundle *, ofp_port_t max_ports, const struct flow *); void bundle_to_nxast(const struct ofpact_bundle *, struct ofpbuf *of10); char *bundle_parse(const char *, struct ofpbuf *ofpacts) WARN_UNUSED_RESULT; char *bundle_parse_load(const char *, struct ofpbuf *ofpacts) WARN_UNUSED_RESULT; void bundle_format(const struct ofpact_bundle *, struct ds *); #endif /* bundle.h */ openvswitch-2.0.1+git20140120/lib/byte-order.h000066400000000000000000000061401226605124000204460ustar00rootroot00000000000000/* * Copyright (c) 2008, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef BYTE_ORDER_H #define BYTE_ORDER_H 1 #include #include #include #include "openvswitch/types.h" #ifndef __CHECKER__ static inline ovs_be64 htonll(uint64_t n) { return htonl(1) == 1 ? n : ((uint64_t) htonl(n) << 32) | htonl(n >> 32); } static inline uint64_t ntohll(ovs_be64 n) { return htonl(1) == 1 ? n : ((uint64_t) ntohl(n) << 32) | ntohl(n >> 32); } #else /* Making sparse happy with these functions also makes them unreadable, so * don't bother to show it their implementations. */ ovs_be64 htonll(uint64_t); uint64_t ntohll(ovs_be64); #endif #if defined(WORDS_BIGENDIAN) static inline uint32_t uint32_byteswap(uint32_t crc) { return (((crc & 0x000000ff) << 24) | ((crc & 0x0000ff00) << 8) | ((crc & 0x00ff0000) >> 8) | ((crc & 0xff000000) >> 24)); } #endif /* These macros may substitute for htons(), htonl(), and htonll() in contexts * where function calls are not allowed, such as case labels. They should not * be used elsewhere because all of them evaluate their argument many times. */ #if defined(WORDS_BIGENDIAN) || __CHECKER__ #define CONSTANT_HTONS(VALUE) ((OVS_FORCE ovs_be16) ((VALUE) & 0xffff)) #define CONSTANT_HTONL(VALUE) ((OVS_FORCE ovs_be32) ((VALUE) & 0xffffffff)) #define CONSTANT_HTONLL(VALUE) \ ((OVS_FORCE ovs_be64) ((VALUE) & UINT64_C(0xffffffffffffffff))) #else #define CONSTANT_HTONS(VALUE) \ (((((ovs_be16) (VALUE)) & 0xff00) >> 8) | \ ((((ovs_be16) (VALUE)) & 0x00ff) << 8)) #define CONSTANT_HTONL(VALUE) \ (((((ovs_be32) (VALUE)) & 0x000000ff) << 24) | \ ((((ovs_be32) (VALUE)) & 0x0000ff00) << 8) | \ ((((ovs_be32) (VALUE)) & 0x00ff0000) >> 8) | \ ((((ovs_be32) (VALUE)) & 0xff000000) >> 24)) #define CONSTANT_HTONLL(VALUE) \ (((((ovs_be64) (VALUE)) & UINT64_C(0x00000000000000ff)) << 56) | \ ((((ovs_be64) (VALUE)) & UINT64_C(0x000000000000ff00)) << 40) | \ ((((ovs_be64) (VALUE)) & UINT64_C(0x0000000000ff0000)) << 24) | \ ((((ovs_be64) (VALUE)) & UINT64_C(0x00000000ff000000)) << 8) | \ ((((ovs_be64) (VALUE)) & UINT64_C(0x000000ff00000000)) >> 8) | \ ((((ovs_be64) (VALUE)) & UINT64_C(0x0000ff0000000000)) >> 24) | \ ((((ovs_be64) (VALUE)) & UINT64_C(0x00ff000000000000)) >> 40) | \ ((((ovs_be64) (VALUE)) & UINT64_C(0xff00000000000000)) >> 56)) #endif #endif /* byte-order.h */ openvswitch-2.0.1+git20140120/lib/byteq.c000066400000000000000000000122311226605124000175070ustar00rootroot00000000000000/* Copyright (c) 2008, 2009, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "byteq.h" #include #include #include #include "util.h" /* Initializes 'q' as an empty byteq that uses the 'size' bytes of 'buffer' to * store data. 'size' must be a power of 2. * * The caller must ensure that 'buffer' remains available to the byteq as long * as 'q' is in use. */ void byteq_init(struct byteq *q, uint8_t *buffer, size_t size) { ovs_assert(is_pow2(size)); q->buffer = buffer; q->size = size; q->head = q->tail = 0; } /* Returns the number of bytes current queued in 'q'. */ int byteq_used(const struct byteq *q) { return q->head - q->tail; } /* Returns the number of bytes that can be added to 'q' without overflow. */ int byteq_avail(const struct byteq *q) { return q->size - byteq_used(q); } /* Returns true if no bytes are queued in 'q', * false if at least one byte is queued. */ bool byteq_is_empty(const struct byteq *q) { return !byteq_used(q); } /* Returns true if 'q' has no room to queue additional bytes, * false if 'q' has room for at least one more byte. */ bool byteq_is_full(const struct byteq *q) { return !byteq_avail(q); } /* Adds 'c' at the head of 'q', which must not be full. */ void byteq_put(struct byteq *q, uint8_t c) { ovs_assert(!byteq_is_full(q)); *byteq_head(q) = c; q->head++; } /* Adds the 'n' bytes in 'p' at the head of 'q', which must have at least 'n' * bytes of free space. */ void byteq_putn(struct byteq *q, const void *p_, size_t n) { const uint8_t *p = p_; ovs_assert(byteq_avail(q) >= n); while (n > 0) { size_t chunk = MIN(n, byteq_headroom(q)); memcpy(byteq_head(q), p, chunk); byteq_advance_head(q, chunk); p += chunk; n -= chunk; } } /* Appends null-terminated string 's' to the head of 'q', which must have * enough space. The null terminator is not added to 'q'. */ void byteq_put_string(struct byteq *q, const char *s) { byteq_putn(q, s, strlen(s)); } /* Removes a byte from the tail of 'q' and returns it. 'q' must not be * empty. */ uint8_t byteq_get(struct byteq *q) { uint8_t c; ovs_assert(!byteq_is_empty(q)); c = *byteq_tail(q); q->tail++; return c; } /* Writes as much of 'q' as possible to 'fd'. Returns 0 if 'q' is fully * drained by the write, otherwise a positive errno value (e.g. EAGAIN if a * socket or tty buffer filled up). */ int byteq_write(struct byteq *q, int fd) { while (!byteq_is_empty(q)) { ssize_t n = write(fd, byteq_tail(q), byteq_tailroom(q)); if (n > 0) { byteq_advance_tail(q, n); } else { ovs_assert(n < 0); return errno; } } return 0; } /* Reads as much possible from 'fd' into 'q'. Returns 0 if 'q' is completely * filled up by the read, EOF if end-of-file was reached before 'q' was filled, * and otherwise a positive errno value (e.g. EAGAIN if a socket or tty buffer * was drained). */ int byteq_read(struct byteq *q, int fd) { while (!byteq_is_full(q)) { ssize_t n = read(fd, byteq_head(q), byteq_headroom(q)); if (n > 0) { byteq_advance_head(q, n); } else { return !n ? EOF : errno; } } return 0; } /* Returns the number of contiguous bytes of in-use space starting at the tail * of 'q'. */ int byteq_tailroom(const struct byteq *q) { int used = byteq_used(q); int tail_to_end = q->size - (q->tail & (q->size - 1)); return MIN(used, tail_to_end); } /* Returns the first in-use byte of 'q', the point at which data is removed * from 'q'. */ const uint8_t * byteq_tail(const struct byteq *q) { return &q->buffer[q->tail & (q->size - 1)]; } /* Removes 'n' bytes from the tail of 'q', which must have at least 'n' bytes * of tailroom. */ void byteq_advance_tail(struct byteq *q, unsigned int n) { ovs_assert(byteq_tailroom(q) >= n); q->tail += n; } /* Returns the byte after the last in-use byte of 'q', the point at which new * data will be added to 'q'. */ uint8_t * byteq_head(struct byteq *q) { return &q->buffer[q->head & (q->size - 1)]; } /* Returns the number of contiguous bytes of free space starting at the head * of 'q'. */ int byteq_headroom(const struct byteq *q) { int avail = byteq_avail(q); int head_to_end = q->size - (q->head & (q->size - 1)); return MIN(avail, head_to_end); } /* Adds to 'q' the 'n' bytes after the last currently in-use byte of 'q'. 'q' * must have at least 'n' bytes of headroom. */ void byteq_advance_head(struct byteq *q, unsigned int n) { ovs_assert(byteq_headroom(q) >= n); q->head += n; } openvswitch-2.0.1+git20140120/lib/byteq.h000066400000000000000000000034271226605124000175230ustar00rootroot00000000000000/* Copyright (c) 2008, 2009, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef BYTEQ_H #define BYTEQ_H 1 #include #include #include /* General-purpose circular queue of bytes. */ struct byteq { uint8_t *buffer; /* Circular queue. */ unsigned int size; /* Number of bytes allocated for 'buffer'. */ unsigned int head; /* Head of queue. */ unsigned int tail; /* Chases the head. */ }; void byteq_init(struct byteq *, uint8_t *buffer, size_t size); int byteq_used(const struct byteq *); int byteq_avail(const struct byteq *); bool byteq_is_empty(const struct byteq *); bool byteq_is_full(const struct byteq *); void byteq_put(struct byteq *, uint8_t c); void byteq_putn(struct byteq *, const void *, size_t n); void byteq_put_string(struct byteq *, const char *); uint8_t byteq_get(struct byteq *); int byteq_write(struct byteq *, int fd); int byteq_read(struct byteq *, int fd); uint8_t *byteq_head(struct byteq *); int byteq_headroom(const struct byteq *); void byteq_advance_head(struct byteq *, unsigned int n); int byteq_tailroom(const struct byteq *); const uint8_t *byteq_tail(const struct byteq *); void byteq_advance_tail(struct byteq *, unsigned int n); #endif /* byteq.h */ openvswitch-2.0.1+git20140120/lib/cfm.c000066400000000000000000000750101226605124000171340ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "cfm.h" #include #include #include #include "byte-order.h" #include "dynamic-string.h" #include "flow.h" #include "hash.h" #include "hmap.h" #include "netdev.h" #include "ofpbuf.h" #include "packets.h" #include "poll-loop.h" #include "random.h" #include "timer.h" #include "timeval.h" #include "unixctl.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(cfm); #define CFM_MAX_RMPS 256 /* Ethernet destination address of CCM packets. */ static const uint8_t eth_addr_ccm[6] = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x30 }; static const uint8_t eth_addr_ccm_x[6] = { 0x01, 0x23, 0x20, 0x00, 0x00, 0x30 }; #define ETH_TYPE_CFM 0x8902 /* A 'ccm' represents a Continuity Check Message from the 802.1ag * specification. Continuity Check Messages are broadcast periodically so that * hosts can determine whom they have connectivity to. * * The minimum length of a CCM as specified by IEEE 802.1ag is 75 bytes. * Previous versions of Open vSwitch generated 74-byte CCM messages, so we * accept such messages too. */ #define CCM_LEN 75 #define CCM_ACCEPT_LEN 74 #define CCM_MAID_LEN 48 #define CCM_OPCODE 1 /* CFM message opcode meaning CCM. */ #define CCM_RDI_MASK 0x80 #define CFM_HEALTH_INTERVAL 6 OVS_PACKED( struct ccm { uint8_t mdlevel_version; /* MD Level and Version */ uint8_t opcode; uint8_t flags; uint8_t tlv_offset; ovs_be32 seq; ovs_be16 mpid; uint8_t maid[CCM_MAID_LEN]; /* Defined by ITU-T Y.1731 should be zero */ ovs_be16 interval_ms_x; /* Transmission interval in ms. */ ovs_be64 mpid64; /* MPID in extended mode. */ uint8_t opdown; /* Operationally down. */ uint8_t zero[5]; /* TLV space. */ uint8_t end_tlv; }); BUILD_ASSERT_DECL(CCM_LEN == sizeof(struct ccm)); struct cfm { const char *name; /* Name of this CFM object. */ struct hmap_node hmap_node; /* Node in all_cfms list. */ struct netdev *netdev; uint64_t rx_packets; /* Packets received by 'netdev'. */ uint64_t mpid; bool demand; /* Demand mode. */ bool booted; /* A full fault interval has occurred. */ enum cfm_fault_reason fault; /* Connectivity fault status. */ enum cfm_fault_reason recv_fault; /* Bit mask of faults occurring on receive. */ bool opup; /* Operational State. */ bool remote_opup; /* Remote Operational State. */ int fault_override; /* Manual override of 'fault' status. Ignored if negative. */ uint32_t seq; /* The sequence number of our last CCM. */ uint8_t ccm_interval; /* The CCM transmission interval. */ int ccm_interval_ms; /* 'ccm_interval' in milliseconds. */ uint16_t ccm_vlan; /* Vlan tag of CCM PDUs. CFM_RANDOM_VLAN if random. */ uint8_t ccm_pcp; /* Priority of CCM PDUs. */ uint8_t maid[CCM_MAID_LEN]; /* The MAID of this CFM. */ struct timer tx_timer; /* Send CCM when expired. */ struct timer fault_timer; /* Check for faults when expired. */ struct hmap remote_mps; /* Remote MPs. */ /* Result of cfm_get_remote_mpids(). Updated only during fault check to * avoid flapping. */ uint64_t *rmps_array; /* Cache of remote_mps. */ size_t rmps_array_len; /* Number of rmps in 'rmps_array'. */ int health; /* Percentage of the number of CCM frames received. */ int health_interval; /* Number of fault_intervals since health was recomputed. */ long long int last_tx; /* Last CCM transmission time. */ atomic_bool check_tnl_key; /* Verify the tunnel key of inbound packets? */ atomic_bool extended; /* Extended mode. */ atomic_int ref_cnt; }; /* Remote MPs represent foreign network entities that are configured to have * the same MAID as this CFM instance. */ struct remote_mp { uint64_t mpid; /* The Maintenance Point ID of this 'remote_mp'. */ struct hmap_node node; /* Node in 'remote_mps' map. */ bool recv; /* CCM was received since last fault check. */ bool opup; /* Operational State. */ uint32_t seq; /* Most recently received sequence number. */ uint8_t num_health_ccm; /* Number of received ccm frames every CFM_HEALTH_INTERVAL * 'fault_interval'. */ long long int last_rx; /* Last CCM reception time. */ }; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(20, 30); static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; static struct hmap all_cfms__ = HMAP_INITIALIZER(&all_cfms__); static struct hmap *const all_cfms OVS_GUARDED_BY(mutex) = &all_cfms__; static unixctl_cb_func cfm_unixctl_show; static unixctl_cb_func cfm_unixctl_set_fault; static uint64_t cfm_rx_packets(const struct cfm *cfm) OVS_REQUIRES(mutex) { struct netdev_stats stats; if (!netdev_get_stats(cfm->netdev, &stats)) { return stats.rx_packets; } else { return 0; } } static const uint8_t * cfm_ccm_addr(struct cfm *cfm) { bool extended; atomic_read(&cfm->extended, &extended); return extended ? eth_addr_ccm_x : eth_addr_ccm; } /* Returns the string representation of the given cfm_fault_reason 'reason'. */ const char * cfm_fault_reason_to_str(int reason) { switch (reason) { #define CFM_FAULT_REASON(NAME, STR) case CFM_FAULT_##NAME: return #STR; CFM_FAULT_REASONS #undef CFM_FAULT_REASON default: return ""; } } static void ds_put_cfm_fault(struct ds *ds, int fault) { int i; for (i = 0; i < CFM_FAULT_N_REASONS; i++) { int reason = 1 << i; if (fault & reason) { ds_put_format(ds, "%s ", cfm_fault_reason_to_str(reason)); } } ds_chomp(ds, ' '); } static void cfm_generate_maid(struct cfm *cfm) OVS_REQUIRES(mutex) { const char *ovs_md_name = "ovs"; const char *ovs_ma_name = "ovs"; uint8_t *ma_p; size_t md_len, ma_len; memset(cfm->maid, 0, CCM_MAID_LEN); md_len = strlen(ovs_md_name); ma_len = strlen(ovs_ma_name); ovs_assert(md_len && ma_len && md_len + ma_len + 4 <= CCM_MAID_LEN); cfm->maid[0] = 4; /* MD name string format. */ cfm->maid[1] = md_len; /* MD name size. */ memcpy(&cfm->maid[2], ovs_md_name, md_len); /* MD name. */ ma_p = cfm->maid + 2 + md_len; ma_p[0] = 2; /* MA name string format. */ ma_p[1] = ma_len; /* MA name size. */ memcpy(&ma_p[2], ovs_ma_name, ma_len); /* MA name. */ } static int ccm_interval_to_ms(uint8_t interval) { switch (interval) { case 0: NOT_REACHED(); /* Explicitly not supported by 802.1ag. */ case 1: return 3; /* Not recommended due to timer resolution. */ case 2: return 10; /* Not recommended due to timer resolution. */ case 3: return 100; case 4: return 1000; case 5: return 10000; case 6: return 60000; case 7: return 600000; default: NOT_REACHED(); /* Explicitly not supported by 802.1ag. */ } NOT_REACHED(); } static long long int cfm_fault_interval(struct cfm *cfm) OVS_REQUIRES(mutex) { /* According to the 802.1ag specification we should assume every other MP * with the same MAID has the same transmission interval that we have. If * an MP has a different interval, cfm_process_heartbeat will register it * as a fault (likely due to a configuration error). Thus we can check all * MPs at once making this quite a bit simpler. * * When cfm is not in demand mode, we check when (ccm_interval_ms * 3.5) ms * have passed. When cfm is in demand mode, we check when * (MAX(ccm_interval_ms, 500) * 3.5) ms have passed. This ensures that * ovs-vswitchd has enough time to pull statistics from the datapath. */ return (MAX(cfm->ccm_interval_ms, cfm->demand ? 500 : cfm->ccm_interval_ms) * 7) / 2; } static uint8_t ms_to_ccm_interval(int interval_ms) { uint8_t i; for (i = 7; i > 0; i--) { if (ccm_interval_to_ms(i) <= interval_ms) { return i; } } return 1; } static uint32_t hash_mpid(uint64_t mpid) { return hash_bytes(&mpid, sizeof mpid, 0); } static bool cfm_is_valid_mpid(bool extended, uint64_t mpid) { /* 802.1ag specification requires MPIDs to be within the range [1, 8191]. * In extended mode we relax this requirement. */ return mpid >= 1 && (extended || mpid <= 8191); } static struct remote_mp * lookup_remote_mp(const struct cfm *cfm, uint64_t mpid) OVS_REQUIRES(mutex) { struct remote_mp *rmp; HMAP_FOR_EACH_IN_BUCKET (rmp, node, hash_mpid(mpid), &cfm->remote_mps) { if (rmp->mpid == mpid) { return rmp; } } return NULL; } void cfm_init(void) { unixctl_command_register("cfm/show", "[interface]", 0, 1, cfm_unixctl_show, NULL); unixctl_command_register("cfm/set-fault", "[interface] normal|false|true", 1, 2, cfm_unixctl_set_fault, NULL); } /* Allocates a 'cfm' object called 'name'. 'cfm' should be initialized by * cfm_configure() before use. */ struct cfm * cfm_create(const struct netdev *netdev) OVS_EXCLUDED(mutex) { struct cfm *cfm; cfm = xzalloc(sizeof *cfm); cfm->netdev = netdev_ref(netdev); cfm->name = netdev_get_name(cfm->netdev); hmap_init(&cfm->remote_mps); cfm->remote_opup = true; cfm->fault_override = -1; cfm->health = -1; cfm->last_tx = 0; atomic_init(&cfm->extended, false); atomic_init(&cfm->check_tnl_key, false); atomic_init(&cfm->ref_cnt, 1); ovs_mutex_lock(&mutex); cfm_generate_maid(cfm); hmap_insert(all_cfms, &cfm->hmap_node, hash_string(cfm->name, 0)); ovs_mutex_unlock(&mutex); return cfm; } void cfm_unref(struct cfm *cfm) OVS_EXCLUDED(mutex) { struct remote_mp *rmp, *rmp_next; int orig; if (!cfm) { return; } atomic_sub(&cfm->ref_cnt, 1, &orig); ovs_assert(orig > 0); if (orig != 1) { return; } ovs_mutex_lock(&mutex); hmap_remove(all_cfms, &cfm->hmap_node); ovs_mutex_unlock(&mutex); HMAP_FOR_EACH_SAFE (rmp, rmp_next, node, &cfm->remote_mps) { hmap_remove(&cfm->remote_mps, &rmp->node); free(rmp); } hmap_destroy(&cfm->remote_mps); netdev_close(cfm->netdev); free(cfm->rmps_array); free(cfm); } struct cfm * cfm_ref(const struct cfm *cfm_) { struct cfm *cfm = CONST_CAST(struct cfm *, cfm_); if (cfm) { int orig; atomic_add(&cfm->ref_cnt, 1, &orig); ovs_assert(orig > 0); } return cfm; } /* Should be run periodically to update fault statistics messages. */ void cfm_run(struct cfm *cfm) OVS_EXCLUDED(mutex) { ovs_mutex_lock(&mutex); if (timer_expired(&cfm->fault_timer)) { long long int interval = cfm_fault_interval(cfm); struct remote_mp *rmp, *rmp_next; bool old_cfm_fault = cfm->fault; bool demand_override; bool rmp_set_opup = false; bool rmp_set_opdown = false; cfm->fault = cfm->recv_fault; cfm->recv_fault = 0; cfm->rmps_array_len = 0; free(cfm->rmps_array); cfm->rmps_array = xmalloc(hmap_count(&cfm->remote_mps) * sizeof *cfm->rmps_array); if (cfm->health_interval == CFM_HEALTH_INTERVAL) { /* Calculate the cfm health of the interface. If the number of * remote_mpids of a cfm interface is > 1, the cfm health is * undefined. If the number of remote_mpids is 1, the cfm health is * the percentage of the ccm frames received in the * (CFM_HEALTH_INTERVAL * 3.5)ms, else it is 0. */ if (hmap_count(&cfm->remote_mps) > 1) { cfm->health = -1; } else if (hmap_is_empty(&cfm->remote_mps)) { cfm->health = 0; } else { int exp_ccm_recvd; rmp = CONTAINER_OF(hmap_first(&cfm->remote_mps), struct remote_mp, node); exp_ccm_recvd = (CFM_HEALTH_INTERVAL * 7) / 2; /* Calculate the percentage of healthy ccm frames received. * Since the 'fault_interval' is (3.5 * cfm_interval), and * 1 CCM packet must be received every cfm_interval, * the 'remote_mpid' health reports the percentage of * healthy CCM frames received every * 'CFM_HEALTH_INTERVAL'th 'fault_interval'. */ cfm->health = (rmp->num_health_ccm * 100) / exp_ccm_recvd; cfm->health = MIN(cfm->health, 100); rmp->num_health_ccm = 0; ovs_assert(cfm->health >= 0 && cfm->health <= 100); } cfm->health_interval = 0; } cfm->health_interval++; demand_override = false; if (cfm->demand) { uint64_t rx_packets = cfm_rx_packets(cfm); demand_override = hmap_count(&cfm->remote_mps) == 1 && rx_packets > cfm->rx_packets; cfm->rx_packets = rx_packets; } HMAP_FOR_EACH_SAFE (rmp, rmp_next, node, &cfm->remote_mps) { if (!rmp->recv) { VLOG_INFO("%s: Received no CCM from RMP %"PRIu64" in the last" " %lldms", cfm->name, rmp->mpid, time_msec() - rmp->last_rx); if (!demand_override) { hmap_remove(&cfm->remote_mps, &rmp->node); free(rmp); } } else { rmp->recv = false; if (rmp->opup) { rmp_set_opup = true; } else { rmp_set_opdown = true; } cfm->rmps_array[cfm->rmps_array_len++] = rmp->mpid; } } if (rmp_set_opdown) { cfm->remote_opup = false; } else if (rmp_set_opup) { cfm->remote_opup = true; } if (hmap_is_empty(&cfm->remote_mps)) { cfm->fault |= CFM_FAULT_RECV; } if (old_cfm_fault != cfm->fault && !VLOG_DROP_INFO(&rl)) { struct ds ds = DS_EMPTY_INITIALIZER; ds_put_cstr(&ds, "from ["); ds_put_cfm_fault(&ds, old_cfm_fault); ds_put_cstr(&ds, "] to ["); ds_put_cfm_fault(&ds, cfm->fault); ds_put_char(&ds, ']'); VLOG_INFO("%s: CFM faults changed %s.", cfm->name, ds_cstr(&ds)); ds_destroy(&ds); } cfm->booted = true; timer_set_duration(&cfm->fault_timer, interval); VLOG_DBG("%s: new fault interval", cfm->name); } ovs_mutex_unlock(&mutex); } /* Should be run periodically to check if the CFM module has a CCM message it * wishes to send. */ bool cfm_should_send_ccm(struct cfm *cfm) OVS_EXCLUDED(mutex) { bool ret; ovs_mutex_lock(&mutex); ret = timer_expired(&cfm->tx_timer); ovs_mutex_unlock(&mutex); return ret; } /* Composes a CCM message into 'packet'. Messages generated with this function * should be sent whenever cfm_should_send_ccm() indicates. */ void cfm_compose_ccm(struct cfm *cfm, struct ofpbuf *packet, uint8_t eth_src[ETH_ADDR_LEN]) OVS_EXCLUDED(mutex) { uint16_t ccm_vlan; struct ccm *ccm; bool extended; ovs_mutex_lock(&mutex); timer_set_duration(&cfm->tx_timer, cfm->ccm_interval_ms); eth_compose(packet, cfm_ccm_addr(cfm), eth_src, ETH_TYPE_CFM, sizeof *ccm); ccm_vlan = (cfm->ccm_vlan != CFM_RANDOM_VLAN ? cfm->ccm_vlan : random_uint16()); ccm_vlan = ccm_vlan & VLAN_VID_MASK; if (ccm_vlan || cfm->ccm_pcp) { uint16_t tci = ccm_vlan | (cfm->ccm_pcp << VLAN_PCP_SHIFT); eth_push_vlan(packet, htons(tci)); } ccm = packet->l3; ccm->mdlevel_version = 0; ccm->opcode = CCM_OPCODE; ccm->tlv_offset = 70; ccm->seq = htonl(++cfm->seq); ccm->flags = cfm->ccm_interval; memcpy(ccm->maid, cfm->maid, sizeof ccm->maid); memset(ccm->zero, 0, sizeof ccm->zero); ccm->end_tlv = 0; atomic_read(&cfm->extended, &extended); if (extended) { ccm->mpid = htons(hash_mpid(cfm->mpid)); ccm->mpid64 = htonll(cfm->mpid); ccm->opdown = !cfm->opup; } else { ccm->mpid = htons(cfm->mpid); ccm->mpid64 = htonll(0); ccm->opdown = 0; } if (cfm->ccm_interval == 0) { ovs_assert(extended); ccm->interval_ms_x = htons(cfm->ccm_interval_ms); } else { ccm->interval_ms_x = htons(0); } if (cfm->booted && hmap_is_empty(&cfm->remote_mps)) { ccm->flags |= CCM_RDI_MASK; } if (cfm->last_tx) { long long int delay = time_msec() - cfm->last_tx; if (delay > (cfm->ccm_interval_ms * 3 / 2)) { VLOG_WARN("%s: long delay of %lldms (expected %dms) sending CCM" " seq %"PRIu32, cfm->name, delay, cfm->ccm_interval_ms, cfm->seq); } } cfm->last_tx = time_msec(); ovs_mutex_unlock(&mutex); } void cfm_wait(struct cfm *cfm) OVS_EXCLUDED(mutex) { ovs_mutex_lock(&mutex); timer_wait(&cfm->tx_timer); timer_wait(&cfm->fault_timer); ovs_mutex_unlock(&mutex); } /* Configures 'cfm' with settings from 's'. */ bool cfm_configure(struct cfm *cfm, const struct cfm_settings *s) OVS_EXCLUDED(mutex) { uint8_t interval; int interval_ms; if (!cfm_is_valid_mpid(s->extended, s->mpid) || s->interval <= 0) { return false; } ovs_mutex_lock(&mutex); cfm->mpid = s->mpid; cfm->opup = s->opup; interval = ms_to_ccm_interval(s->interval); interval_ms = ccm_interval_to_ms(interval); atomic_store(&cfm->check_tnl_key, s->check_tnl_key); atomic_store(&cfm->extended, s->extended); cfm->ccm_vlan = s->ccm_vlan; cfm->ccm_pcp = s->ccm_pcp & (VLAN_PCP_MASK >> VLAN_PCP_SHIFT); if (s->extended && interval_ms != s->interval) { interval = 0; interval_ms = MIN(s->interval, UINT16_MAX); } if (s->extended && s->demand) { if (!cfm->demand) { cfm->demand = true; cfm->rx_packets = cfm_rx_packets(cfm); } } else { cfm->demand = false; } if (interval != cfm->ccm_interval || interval_ms != cfm->ccm_interval_ms) { cfm->ccm_interval = interval; cfm->ccm_interval_ms = interval_ms; timer_set_expired(&cfm->tx_timer); timer_set_duration(&cfm->fault_timer, cfm_fault_interval(cfm)); } ovs_mutex_unlock(&mutex); return true; } /* Must be called when the netdev owned by 'cfm' should change. */ void cfm_set_netdev(struct cfm *cfm, const struct netdev *netdev) OVS_EXCLUDED(mutex) { ovs_mutex_lock(&mutex); if (cfm->netdev != netdev) { netdev_close(cfm->netdev); cfm->netdev = netdev_ref(netdev); } ovs_mutex_unlock(&mutex); } /* Returns true if 'cfm' should process packets from 'flow'. Sets * fields in 'wc' that were used to make the determination. */ bool cfm_should_process_flow(const struct cfm *cfm_, const struct flow *flow, struct flow_wildcards *wc) { struct cfm *cfm = CONST_CAST(struct cfm *, cfm_); bool check_tnl_key; atomic_read(&cfm->check_tnl_key, &check_tnl_key); memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); if (check_tnl_key) { memset(&wc->masks.tunnel.tun_id, 0xff, sizeof wc->masks.tunnel.tun_id); } return (ntohs(flow->dl_type) == ETH_TYPE_CFM && eth_addr_equals(flow->dl_dst, cfm_ccm_addr(cfm)) && (!check_tnl_key || flow->tunnel.tun_id == htonll(0))); } /* Updates internal statistics relevant to packet 'p'. Should be called on * every packet whose flow returned true when passed to * cfm_should_process_flow. */ void cfm_process_heartbeat(struct cfm *cfm, const struct ofpbuf *p) OVS_EXCLUDED(mutex) { struct ccm *ccm; struct eth_header *eth; ovs_mutex_lock(&mutex); eth = p->l2; ccm = ofpbuf_at(p, (uint8_t *)p->l3 - (uint8_t *)p->data, CCM_ACCEPT_LEN); if (!ccm) { VLOG_INFO_RL(&rl, "%s: Received an unparseable 802.1ag CCM heartbeat.", cfm->name); goto out; } if (ccm->opcode != CCM_OPCODE) { VLOG_INFO_RL(&rl, "%s: Received an unsupported 802.1ag message. " "(opcode %u)", cfm->name, ccm->opcode); goto out; } /* According to the 802.1ag specification, reception of a CCM with an * incorrect ccm_interval, unexpected MAID, or unexpected MPID should * trigger a fault. We ignore this requirement for several reasons. * * Faults can cause a controller or Open vSwitch to make potentially * expensive changes to the network topology. It seems prudent to trigger * them judiciously, especially when CFM is used to check slave status of * bonds. Furthermore, faults can be maliciously triggered by crafting * unexpected CCMs. */ if (memcmp(ccm->maid, cfm->maid, sizeof ccm->maid)) { cfm->recv_fault |= CFM_FAULT_MAID; VLOG_WARN_RL(&rl, "%s: Received unexpected remote MAID from MAC " ETH_ADDR_FMT, cfm->name, ETH_ADDR_ARGS(eth->eth_src)); } else { uint8_t ccm_interval = ccm->flags & 0x7; bool ccm_rdi = ccm->flags & CCM_RDI_MASK; uint16_t ccm_interval_ms_x = ntohs(ccm->interval_ms_x); struct remote_mp *rmp; uint64_t ccm_mpid; uint32_t ccm_seq; bool ccm_opdown; bool extended; enum cfm_fault_reason cfm_fault = 0; atomic_read(&cfm->extended, &extended); if (extended) { ccm_mpid = ntohll(ccm->mpid64); ccm_opdown = ccm->opdown; } else { ccm_mpid = ntohs(ccm->mpid); ccm_opdown = false; } ccm_seq = ntohl(ccm->seq); if (ccm_interval != cfm->ccm_interval) { VLOG_WARN_RL(&rl, "%s: received a CCM with an unexpected interval" " (%"PRIu8") from RMP %"PRIu64, cfm->name, ccm_interval, ccm_mpid); } if (extended && ccm_interval == 0 && ccm_interval_ms_x != cfm->ccm_interval_ms) { VLOG_WARN_RL(&rl, "%s: received a CCM with an unexpected extended" " interval (%"PRIu16"ms) from RMP %"PRIu64, cfm->name, ccm_interval_ms_x, ccm_mpid); } rmp = lookup_remote_mp(cfm, ccm_mpid); if (!rmp) { if (hmap_count(&cfm->remote_mps) < CFM_MAX_RMPS) { rmp = xzalloc(sizeof *rmp); hmap_insert(&cfm->remote_mps, &rmp->node, hash_mpid(ccm_mpid)); } else { cfm_fault |= CFM_FAULT_OVERFLOW; VLOG_WARN_RL(&rl, "%s: dropped CCM with MPID %"PRIu64" from MAC " ETH_ADDR_FMT, cfm->name, ccm_mpid, ETH_ADDR_ARGS(eth->eth_src)); } } if (ccm_rdi) { cfm_fault |= CFM_FAULT_RDI; VLOG_DBG("%s: RDI bit flagged from RMP %"PRIu64, cfm->name, ccm_mpid); } VLOG_DBG("%s: received CCM (seq %"PRIu32") (mpid %"PRIu64")" " (interval %"PRIu8") (RDI %s)", cfm->name, ccm_seq, ccm_mpid, ccm_interval, ccm_rdi ? "true" : "false"); if (rmp) { if (rmp->mpid == cfm->mpid) { cfm_fault |= CFM_FAULT_LOOPBACK; VLOG_WARN_RL(&rl,"%s: received CCM with local MPID" " %"PRIu64, cfm->name, rmp->mpid); } if (rmp->seq && ccm_seq != (rmp->seq + 1)) { VLOG_WARN_RL(&rl, "%s: (mpid %"PRIu64") detected sequence" " numbers which indicate possible connectivity" " problems (previous %"PRIu32") (current %"PRIu32 ")", cfm->name, ccm_mpid, rmp->seq, ccm_seq); } rmp->mpid = ccm_mpid; if (!cfm_fault) { rmp->num_health_ccm++; } rmp->recv = true; cfm->recv_fault |= cfm_fault; rmp->seq = ccm_seq; rmp->opup = !ccm_opdown; rmp->last_rx = time_msec(); } } out: ovs_mutex_unlock(&mutex); } static int cfm_get_fault__(const struct cfm *cfm) OVS_REQUIRES(mutex) { if (cfm->fault_override >= 0) { return cfm->fault_override ? CFM_FAULT_OVERRIDE : 0; } return cfm->fault; } /* Gets the fault status of 'cfm'. Returns a bit mask of 'cfm_fault_reason's * indicating the cause of the connectivity fault, or zero if there is no * fault. */ int cfm_get_fault(const struct cfm *cfm) OVS_EXCLUDED(mutex) { int fault; ovs_mutex_lock(&mutex); fault = cfm_get_fault__(cfm); ovs_mutex_unlock(&mutex); return fault; } /* Gets the health of 'cfm'. Returns an integer between 0 and 100 indicating * the health of the link as a percentage of ccm frames received in * CFM_HEALTH_INTERVAL * 'fault_interval' if there is only 1 remote_mpid, * returns 0 if there are no remote_mpids, and returns -1 if there are more * than 1 remote_mpids. */ int cfm_get_health(const struct cfm *cfm) OVS_EXCLUDED(mutex) { int health; ovs_mutex_lock(&mutex); health = cfm->health; ovs_mutex_unlock(&mutex); return health; } /* Gets the operational state of 'cfm'. 'cfm' is considered operationally down * if it has received a CCM with the operationally down bit set from any of its * remote maintenance points. Returns 1 if 'cfm' is operationally up, 0 if * 'cfm' is operationally down, or -1 if 'cfm' has no operational state * (because it isn't in extended mode). */ int cfm_get_opup(const struct cfm *cfm_) OVS_EXCLUDED(mutex) { struct cfm *cfm = CONST_CAST(struct cfm *, cfm_); bool extended; int opup; ovs_mutex_lock(&mutex); atomic_read(&cfm->extended, &extended); opup = extended ? cfm->remote_opup : -1; ovs_mutex_unlock(&mutex); return opup; } /* Populates 'rmps' with an array of remote maintenance points reachable by * 'cfm'. The number of remote maintenance points is written to 'n_rmps'. * 'cfm' retains ownership of the array written to 'rmps' */ void cfm_get_remote_mpids(const struct cfm *cfm, uint64_t **rmps, size_t *n_rmps) OVS_EXCLUDED(mutex) { ovs_mutex_lock(&mutex); *rmps = xmemdup(cfm->rmps_array, cfm->rmps_array_len * sizeof **rmps); *n_rmps = cfm->rmps_array_len; ovs_mutex_unlock(&mutex); } static struct cfm * cfm_find(const char *name) OVS_REQUIRES(mutex) { struct cfm *cfm; HMAP_FOR_EACH_WITH_HASH (cfm, hmap_node, hash_string(name, 0), all_cfms) { if (!strcmp(cfm->name, name)) { return cfm; } } return NULL; } static void cfm_print_details(struct ds *ds, struct cfm *cfm) OVS_REQUIRES(mutex) { struct remote_mp *rmp; bool extended; int fault; atomic_read(&cfm->extended, &extended); ds_put_format(ds, "---- %s ----\n", cfm->name); ds_put_format(ds, "MPID %"PRIu64":%s%s\n", cfm->mpid, extended ? " extended" : "", cfm->fault_override >= 0 ? " fault_override" : ""); fault = cfm_get_fault__(cfm); if (fault) { ds_put_cstr(ds, "\tfault: "); ds_put_cfm_fault(ds, fault); ds_put_cstr(ds, "\n"); } if (cfm->health == -1) { ds_put_format(ds, "\taverage health: undefined\n"); } else { ds_put_format(ds, "\taverage health: %d\n", cfm->health); } ds_put_format(ds, "\topstate: %s\n", cfm->opup ? "up" : "down"); ds_put_format(ds, "\tremote_opstate: %s\n", cfm->remote_opup ? "up" : "down"); ds_put_format(ds, "\tinterval: %dms\n", cfm->ccm_interval_ms); ds_put_format(ds, "\tnext CCM tx: %lldms\n", timer_msecs_until_expired(&cfm->tx_timer)); ds_put_format(ds, "\tnext fault check: %lldms\n", timer_msecs_until_expired(&cfm->fault_timer)); HMAP_FOR_EACH (rmp, node, &cfm->remote_mps) { ds_put_format(ds, "Remote MPID %"PRIu64"\n", rmp->mpid); ds_put_format(ds, "\trecv since check: %s\n", rmp->recv ? "true" : "false"); ds_put_format(ds, "\topstate: %s\n", rmp->opup? "up" : "down"); } } static void cfm_unixctl_show(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) OVS_EXCLUDED(mutex) { struct ds ds = DS_EMPTY_INITIALIZER; struct cfm *cfm; ovs_mutex_lock(&mutex); if (argc > 1) { cfm = cfm_find(argv[1]); if (!cfm) { unixctl_command_reply_error(conn, "no such CFM object"); goto out; } cfm_print_details(&ds, cfm); } else { HMAP_FOR_EACH (cfm, hmap_node, all_cfms) { cfm_print_details(&ds, cfm); } } unixctl_command_reply(conn, ds_cstr(&ds)); ds_destroy(&ds); out: ovs_mutex_unlock(&mutex); } static void cfm_unixctl_set_fault(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) OVS_EXCLUDED(mutex) { const char *fault_str = argv[argc - 1]; int fault_override; struct cfm *cfm; ovs_mutex_lock(&mutex); if (!strcasecmp("true", fault_str)) { fault_override = 1; } else if (!strcasecmp("false", fault_str)) { fault_override = 0; } else if (!strcasecmp("normal", fault_str)) { fault_override = -1; } else { unixctl_command_reply_error(conn, "unknown fault string"); goto out; } if (argc > 2) { cfm = cfm_find(argv[1]); if (!cfm) { unixctl_command_reply_error(conn, "no such CFM object"); goto out; } cfm->fault_override = fault_override; } else { HMAP_FOR_EACH (cfm, hmap_node, all_cfms) { cfm->fault_override = fault_override; } } unixctl_command_reply(conn, "OK"); out: ovs_mutex_unlock(&mutex); } openvswitch-2.0.1+git20140120/lib/cfm.h000066400000000000000000000055771226605124000171540ustar00rootroot00000000000000/* Copyright (c) 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef CFM_H #define CFM_H 1 #include #include "hmap.h" #include "openvswitch/types.h" struct flow; struct ofpbuf; struct netdev; struct flow_wildcards; #define CFM_RANDOM_VLAN UINT16_MAX #define CFM_FAULT_REASONS \ CFM_FAULT_REASON(RECV, recv) \ CFM_FAULT_REASON(RDI, rdi) \ CFM_FAULT_REASON(MAID, maid) \ CFM_FAULT_REASON(LOOPBACK, loopback) \ CFM_FAULT_REASON(OVERFLOW, overflow) \ CFM_FAULT_REASON(OVERRIDE, override) enum cfm_fault_bit_index { #define CFM_FAULT_REASON(NAME, STR) CFM_FAULT_INDEX_##NAME, CFM_FAULT_REASONS #undef CFM_FAULT_REASON CFM_FAULT_N_REASONS }; enum cfm_fault_reason { #define CFM_FAULT_REASON(NAME, STR) \ CFM_FAULT_##NAME = 1 << CFM_FAULT_INDEX_##NAME, CFM_FAULT_REASONS #undef CFM_FAULT_REASON }; struct cfm_settings { uint64_t mpid; /* The MPID of this CFM. */ int interval; /* The requested transmission interval. */ bool extended; /* Run in extended mode. */ bool demand; /* Run in demand mode. */ bool opup; /* Operational State. */ uint16_t ccm_vlan; /* CCM Vlan tag. Zero if none. CFM_RANDOM_VLAN if random. */ uint8_t ccm_pcp; /* CCM Priority. Zero if none. */ bool check_tnl_key; /* Verify inbound packet key? */ }; void cfm_init(void); struct cfm *cfm_create(const struct netdev *); struct cfm *cfm_ref(const struct cfm *); void cfm_unref(struct cfm *); void cfm_run(struct cfm *); bool cfm_should_send_ccm(struct cfm *); void cfm_compose_ccm(struct cfm *, struct ofpbuf *packet, uint8_t eth_src[6]); void cfm_wait(struct cfm *); bool cfm_configure(struct cfm *, const struct cfm_settings *); void cfm_set_netdev(struct cfm *, const struct netdev *); bool cfm_should_process_flow(const struct cfm *cfm, const struct flow *, struct flow_wildcards *); void cfm_process_heartbeat(struct cfm *, const struct ofpbuf *packet); int cfm_get_fault(const struct cfm *); int cfm_get_health(const struct cfm *); int cfm_get_opup(const struct cfm *); void cfm_get_remote_mpids(const struct cfm *, uint64_t **rmps, size_t *n_rmps); const char *cfm_fault_reason_to_str(int fault); #endif /* cfm.h */ openvswitch-2.0.1+git20140120/lib/classifier.c000066400000000000000000000607021226605124000205150ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "classifier.h" #include #include #include "byte-order.h" #include "dynamic-string.h" #include "flow.h" #include "hash.h" #include "odp-util.h" #include "ofp-util.h" #include "packets.h" #include "ovs-thread.h" static struct cls_table *find_table(const struct classifier *, const struct minimask *); static struct cls_table *insert_table(struct classifier *, const struct minimask *); static void destroy_table(struct classifier *, struct cls_table *); static void update_tables_after_insertion(struct classifier *, struct cls_table *, unsigned int new_priority); static void update_tables_after_removal(struct classifier *, struct cls_table *, unsigned int del_priority); static struct cls_rule *find_match(const struct cls_table *, const struct flow *); static struct cls_rule *find_equal(struct cls_table *, const struct miniflow *, uint32_t hash); static struct cls_rule *insert_rule(struct classifier *, struct cls_table *, struct cls_rule *); /* Iterates RULE over HEAD and all of the cls_rules on HEAD->list. */ #define FOR_EACH_RULE_IN_LIST(RULE, HEAD) \ for ((RULE) = (HEAD); (RULE) != NULL; (RULE) = next_rule_in_list(RULE)) #define FOR_EACH_RULE_IN_LIST_SAFE(RULE, NEXT, HEAD) \ for ((RULE) = (HEAD); \ (RULE) != NULL && ((NEXT) = next_rule_in_list(RULE), true); \ (RULE) = (NEXT)) static struct cls_rule *next_rule_in_list__(struct cls_rule *); static struct cls_rule *next_rule_in_list(struct cls_rule *); /* cls_rule. */ /* Initializes 'rule' to match packets specified by 'match' at the given * 'priority'. 'match' must satisfy the invariant described in the comment at * the definition of struct match. * * The caller must eventually destroy 'rule' with cls_rule_destroy(). * * (OpenFlow uses priorities between 0 and UINT16_MAX, inclusive, but * internally Open vSwitch supports a wider range.) */ void cls_rule_init(struct cls_rule *rule, const struct match *match, unsigned int priority) { minimatch_init(&rule->match, match); rule->priority = priority; } /* Same as cls_rule_init() for initialization from a "struct minimatch". */ void cls_rule_init_from_minimatch(struct cls_rule *rule, const struct minimatch *match, unsigned int priority) { minimatch_clone(&rule->match, match); rule->priority = priority; } /* Initializes 'dst' as a copy of 'src'. * * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ void cls_rule_clone(struct cls_rule *dst, const struct cls_rule *src) { minimatch_clone(&dst->match, &src->match); dst->priority = src->priority; } /* Initializes 'dst' with the data in 'src', destroying 'src'. * * The caller must eventually destroy 'dst' with cls_rule_destroy(). */ void cls_rule_move(struct cls_rule *dst, struct cls_rule *src) { minimatch_move(&dst->match, &src->match); dst->priority = src->priority; } /* Frees memory referenced by 'rule'. Doesn't free 'rule' itself (it's * normally embedded into a larger structure). * * ('rule' must not currently be in a classifier.) */ void cls_rule_destroy(struct cls_rule *rule) { minimatch_destroy(&rule->match); } /* Returns true if 'a' and 'b' match the same packets at the same priority, * false if they differ in some way. */ bool cls_rule_equal(const struct cls_rule *a, const struct cls_rule *b) { return a->priority == b->priority && minimatch_equal(&a->match, &b->match); } /* Returns a hash value for 'rule', folding in 'basis'. */ uint32_t cls_rule_hash(const struct cls_rule *rule, uint32_t basis) { return minimatch_hash(&rule->match, hash_int(rule->priority, basis)); } /* Appends a string describing 'rule' to 's'. */ void cls_rule_format(const struct cls_rule *rule, struct ds *s) { minimatch_format(&rule->match, s, rule->priority); } /* Returns true if 'rule' matches every packet, false otherwise. */ bool cls_rule_is_catchall(const struct cls_rule *rule) { return minimask_is_catchall(&rule->match.mask); } /* Initializes 'cls' as a classifier that initially contains no classification * rules. */ void classifier_init(struct classifier *cls) { cls->n_rules = 0; hmap_init(&cls->tables); list_init(&cls->tables_priority); ovs_rwlock_init(&cls->rwlock); } /* Destroys 'cls'. Rules within 'cls', if any, are not freed; this is the * caller's responsibility. */ void classifier_destroy(struct classifier *cls) { if (cls) { struct cls_table *table, *next_table; HMAP_FOR_EACH_SAFE (table, next_table, hmap_node, &cls->tables) { destroy_table(cls, table); } hmap_destroy(&cls->tables); ovs_rwlock_destroy(&cls->rwlock); } } /* Returns true if 'cls' contains no classification rules, false otherwise. */ bool classifier_is_empty(const struct classifier *cls) { return cls->n_rules == 0; } /* Returns the number of rules in 'cls'. */ int classifier_count(const struct classifier *cls) { return cls->n_rules; } /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller * must not modify or free it. * * If 'cls' already contains an identical rule (including wildcards, values of * fixed fields, and priority), replaces the old rule by 'rule' and returns the * rule that was replaced. The caller takes ownership of the returned rule and * is thus responsible for destroying it with cls_rule_destroy(), freeing the * memory block in which it resides, etc., as necessary. * * Returns NULL if 'cls' does not contain a rule with an identical key, after * inserting the new rule. In this case, no rules are displaced by the new * rule, even rules that cannot have any effect because the new rule matches a * superset of their flows and has higher priority. */ struct cls_rule * classifier_replace(struct classifier *cls, struct cls_rule *rule) { struct cls_rule *old_rule; struct cls_table *table; table = find_table(cls, &rule->match.mask); if (!table) { table = insert_table(cls, &rule->match.mask); } old_rule = insert_rule(cls, table, rule); if (!old_rule) { table->n_table_rules++; cls->n_rules++; } return old_rule; } /* Inserts 'rule' into 'cls'. Until 'rule' is removed from 'cls', the caller * must not modify or free it. * * 'cls' must not contain an identical rule (including wildcards, values of * fixed fields, and priority). Use classifier_find_rule_exactly() to find * such a rule. */ void classifier_insert(struct classifier *cls, struct cls_rule *rule) { struct cls_rule *displaced_rule = classifier_replace(cls, rule); ovs_assert(!displaced_rule); } /* Removes 'rule' from 'cls'. It is the caller's responsibility to destroy * 'rule' with cls_rule_destroy(), freeing the memory block in which 'rule' * resides, etc., as necessary. */ void classifier_remove(struct classifier *cls, struct cls_rule *rule) { struct cls_rule *head; struct cls_table *table; table = find_table(cls, &rule->match.mask); head = find_equal(table, &rule->match.flow, rule->hmap_node.hash); if (head != rule) { list_remove(&rule->list); } else if (list_is_empty(&rule->list)) { hmap_remove(&table->rules, &rule->hmap_node); } else { struct cls_rule *next = CONTAINER_OF(rule->list.next, struct cls_rule, list); list_remove(&rule->list); hmap_replace(&table->rules, &rule->hmap_node, &next->hmap_node); } if (--table->n_table_rules == 0) { destroy_table(cls, table); } else { update_tables_after_removal(cls, table, rule->priority); } cls->n_rules--; } /* Finds and returns the highest-priority rule in 'cls' that matches 'flow'. * Returns a null pointer if no rules in 'cls' match 'flow'. If multiple rules * of equal priority match 'flow', returns one arbitrarily. * * If a rule is found and 'wc' is non-null, bitwise-OR's 'wc' with the * set of bits that were significant in the lookup. At some point * earlier, 'wc' should have been initialized (e.g., by * flow_wildcards_init_catchall()). */ struct cls_rule * classifier_lookup(const struct classifier *cls, const struct flow *flow, struct flow_wildcards *wc) { struct cls_table *table; struct cls_rule *best; best = NULL; LIST_FOR_EACH (table, list_node, &cls->tables_priority) { struct cls_rule *rule = find_match(table, flow); if (wc) { flow_wildcards_fold_minimask(wc, &table->mask); } if (rule) { best = rule; LIST_FOR_EACH_CONTINUE (table, list_node, &cls->tables_priority) { if (table->max_priority <= best->priority) { /* Tables in descending priority order, * can not find anything better. */ return best; } rule = find_match(table, flow); if (wc) { flow_wildcards_fold_minimask(wc, &table->mask); } if (rule && rule->priority > best->priority) { best = rule; } } break; } } return best; } /* Finds and returns a rule in 'cls' with exactly the same priority and * matching criteria as 'target'. Returns a null pointer if 'cls' doesn't * contain an exact match. */ struct cls_rule * classifier_find_rule_exactly(const struct classifier *cls, const struct cls_rule *target) { struct cls_rule *head, *rule; struct cls_table *table; table = find_table(cls, &target->match.mask); if (!table) { return NULL; } /* Skip if there is no hope. */ if (target->priority > table->max_priority) { return NULL; } head = find_equal(table, &target->match.flow, miniflow_hash_in_minimask(&target->match.flow, &target->match.mask, 0)); FOR_EACH_RULE_IN_LIST (rule, head) { if (target->priority >= rule->priority) { return target->priority == rule->priority ? rule : NULL; } } return NULL; } /* Finds and returns a rule in 'cls' with priority 'priority' and exactly the * same matching criteria as 'target'. Returns a null pointer if 'cls' doesn't * contain an exact match. */ struct cls_rule * classifier_find_match_exactly(const struct classifier *cls, const struct match *target, unsigned int priority) { struct cls_rule *retval; struct cls_rule cr; cls_rule_init(&cr, target, priority); retval = classifier_find_rule_exactly(cls, &cr); cls_rule_destroy(&cr); return retval; } /* Checks if 'target' would overlap any other rule in 'cls'. Two rules are * considered to overlap if both rules have the same priority and a packet * could match both. */ bool classifier_rule_overlaps(const struct classifier *cls, const struct cls_rule *target) { struct cls_table *table; /* Iterate tables in the descending max priority order. */ LIST_FOR_EACH (table, list_node, &cls->tables_priority) { uint32_t storage[FLOW_U32S]; struct minimask mask; struct cls_rule *head; if (target->priority > table->max_priority) { break; /* Can skip this and the rest of the tables. */ } minimask_combine(&mask, &target->match.mask, &table->mask, storage); HMAP_FOR_EACH (head, hmap_node, &table->rules) { struct cls_rule *rule; FOR_EACH_RULE_IN_LIST (rule, head) { if (rule->priority < target->priority) { break; /* Rules in descending priority order. */ } if (rule->priority == target->priority && miniflow_equal_in_minimask(&target->match.flow, &rule->match.flow, &mask)) { return true; } } } } return false; } /* Returns true if 'rule' exactly matches 'criteria' or if 'rule' is more * specific than 'criteria'. That is, 'rule' matches 'criteria' and this * function returns true if, for every field: * * - 'criteria' and 'rule' specify the same (non-wildcarded) value for the * field, or * * - 'criteria' wildcards the field, * * Conversely, 'rule' does not match 'criteria' and this function returns false * if, for at least one field: * * - 'criteria' and 'rule' specify different values for the field, or * * - 'criteria' specifies a value for the field but 'rule' wildcards it. * * Equivalently, the truth table for whether a field matches is: * * rule * * c wildcard exact * r +---------+---------+ * i wild | yes | yes | * t card | | | * e +---------+---------+ * r exact | no |if values| * i | |are equal| * a +---------+---------+ * * This is the matching rule used by OpenFlow 1.0 non-strict OFPT_FLOW_MOD * commands and by OpenFlow 1.0 aggregate and flow stats. * * Ignores rule->priority. */ bool cls_rule_is_loose_match(const struct cls_rule *rule, const struct minimatch *criteria) { return (!minimask_has_extra(&rule->match.mask, &criteria->mask) && miniflow_equal_in_minimask(&rule->match.flow, &criteria->flow, &criteria->mask)); } /* Iteration. */ static bool rule_matches(const struct cls_rule *rule, const struct cls_rule *target) { return (!target || miniflow_equal_in_minimask(&rule->match.flow, &target->match.flow, &target->match.mask)); } static struct cls_rule * search_table(const struct cls_table *table, const struct cls_rule *target) { if (!target || !minimask_has_extra(&table->mask, &target->match.mask)) { struct cls_rule *rule; HMAP_FOR_EACH (rule, hmap_node, &table->rules) { if (rule_matches(rule, target)) { return rule; } } } return NULL; } /* Initializes 'cursor' for iterating through rules in 'cls': * * - If 'target' is null, the cursor will visit every rule in 'cls'. * * - If 'target' is nonnull, the cursor will visit each 'rule' in 'cls' * such that cls_rule_is_loose_match(rule, target) returns true. * * Ignores target->priority. */ void cls_cursor_init(struct cls_cursor *cursor, const struct classifier *cls, const struct cls_rule *target) { cursor->cls = cls; cursor->target = target && !cls_rule_is_catchall(target) ? target : NULL; } /* Returns the first matching cls_rule in 'cursor''s iteration, or a null * pointer if there are no matches. */ struct cls_rule * cls_cursor_first(struct cls_cursor *cursor) { struct cls_table *table; HMAP_FOR_EACH (table, hmap_node, &cursor->cls->tables) { struct cls_rule *rule = search_table(table, cursor->target); if (rule) { cursor->table = table; return rule; } } return NULL; } /* Returns the next matching cls_rule in 'cursor''s iteration, or a null * pointer if there are no more matches. */ struct cls_rule * cls_cursor_next(struct cls_cursor *cursor, const struct cls_rule *rule_) { struct cls_rule *rule = CONST_CAST(struct cls_rule *, rule_); const struct cls_table *table; struct cls_rule *next; next = next_rule_in_list__(rule); if (next->priority < rule->priority) { return next; } /* 'next' is the head of the list, that is, the rule that is included in * the table's hmap. (This is important when the classifier contains rules * that differ only in priority.) */ rule = next; HMAP_FOR_EACH_CONTINUE (rule, hmap_node, &cursor->table->rules) { if (rule_matches(rule, cursor->target)) { return rule; } } table = cursor->table; HMAP_FOR_EACH_CONTINUE (table, hmap_node, &cursor->cls->tables) { rule = search_table(table, cursor->target); if (rule) { cursor->table = table; return rule; } } return NULL; } static struct cls_table * find_table(const struct classifier *cls, const struct minimask *mask) { struct cls_table *table; HMAP_FOR_EACH_IN_BUCKET (table, hmap_node, minimask_hash(mask, 0), &cls->tables) { if (minimask_equal(mask, &table->mask)) { return table; } } return NULL; } static struct cls_table * insert_table(struct classifier *cls, const struct minimask *mask) { struct cls_table *table; table = xzalloc(sizeof *table); hmap_init(&table->rules); minimask_clone(&table->mask, mask); hmap_insert(&cls->tables, &table->hmap_node, minimask_hash(mask, 0)); list_push_back(&cls->tables_priority, &table->list_node); return table; } static void destroy_table(struct classifier *cls, struct cls_table *table) { minimask_destroy(&table->mask); hmap_remove(&cls->tables, &table->hmap_node); hmap_destroy(&table->rules); list_remove(&table->list_node); free(table); } /* This function performs the following updates for 'table' in 'cls' following * the addition of a new rule with priority 'new_priority' to 'table': * * - Update 'table->max_priority' and 'table->max_count' if necessary. * * - Update 'table''s position in 'cls->tables_priority' if necessary. * * This function should only be called after adding a new rule, not after * replacing a rule by an identical one or modifying a rule in-place. */ static void update_tables_after_insertion(struct classifier *cls, struct cls_table *table, unsigned int new_priority) { if (new_priority == table->max_priority) { ++table->max_count; } else if (new_priority > table->max_priority) { struct cls_table *iter; table->max_priority = new_priority; table->max_count = 1; /* Possibly move 'table' earlier in the priority list. If we break out * of the loop, then 'table' should be moved just after that 'iter'. * If the loop terminates normally, then 'iter' will be the list head * and we'll move table just after that (e.g. to the front of the * list). */ iter = table; LIST_FOR_EACH_REVERSE_CONTINUE (iter, list_node, &cls->tables_priority) { if (iter->max_priority >= table->max_priority) { break; } } /* Move 'table' just after 'iter' (unless it's already there). */ if (iter->list_node.next != &table->list_node) { list_splice(iter->list_node.next, &table->list_node, table->list_node.next); } } } /* This function performs the following updates for 'table' in 'cls' following * the deletion of a rule with priority 'del_priority' from 'table': * * - Update 'table->max_priority' and 'table->max_count' if necessary. * * - Update 'table''s position in 'cls->tables_priority' if necessary. * * This function should only be called after removing a rule, not after * replacing a rule by an identical one or modifying a rule in-place. */ static void update_tables_after_removal(struct classifier *cls, struct cls_table *table, unsigned int del_priority) { struct cls_table *iter; if (del_priority == table->max_priority && --table->max_count == 0) { struct cls_rule *head; table->max_priority = 0; HMAP_FOR_EACH (head, hmap_node, &table->rules) { if (head->priority > table->max_priority) { table->max_priority = head->priority; table->max_count = 1; } else if (head->priority == table->max_priority) { ++table->max_count; } } /* Possibly move 'table' later in the priority list. If we break out * of the loop, then 'table' should be moved just before that 'iter'. * If the loop terminates normally, then 'iter' will be the list head * and we'll move table just before that (e.g. to the back of the * list). */ iter = table; LIST_FOR_EACH_CONTINUE (iter, list_node, &cls->tables_priority) { if (iter->max_priority <= table->max_priority) { break; } } /* Move 'table' just before 'iter' (unless it's already there). */ if (iter->list_node.prev != &table->list_node) { list_splice(&iter->list_node, &table->list_node, table->list_node.next); } } } static struct cls_rule * find_match(const struct cls_table *table, const struct flow *flow) { uint32_t hash = flow_hash_in_minimask(flow, &table->mask, 0); struct cls_rule *rule; HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &table->rules) { if (miniflow_equal_flow_in_minimask(&rule->match.flow, flow, &table->mask)) { return rule; } } return NULL; } static struct cls_rule * find_equal(struct cls_table *table, const struct miniflow *flow, uint32_t hash) { struct cls_rule *head; HMAP_FOR_EACH_WITH_HASH (head, hmap_node, hash, &table->rules) { if (miniflow_equal(&head->match.flow, flow)) { return head; } } return NULL; } static struct cls_rule * insert_rule(struct classifier *cls, struct cls_table *table, struct cls_rule *new) { struct cls_rule *head; struct cls_rule *old = NULL; new->hmap_node.hash = miniflow_hash_in_minimask(&new->match.flow, &new->match.mask, 0); head = find_equal(table, &new->match.flow, new->hmap_node.hash); if (!head) { hmap_insert(&table->rules, &new->hmap_node, new->hmap_node.hash); list_init(&new->list); goto out; } else { /* Scan the list for the insertion point that will keep the list in * order of decreasing priority. */ struct cls_rule *rule; FOR_EACH_RULE_IN_LIST (rule, head) { if (new->priority >= rule->priority) { if (rule == head) { /* 'new' is the new highest-priority flow in the list. */ hmap_replace(&table->rules, &rule->hmap_node, &new->hmap_node); } if (new->priority == rule->priority) { list_replace(&new->list, &rule->list); old = rule; goto out; } else { list_insert(&rule->list, &new->list); goto out; } } } /* Insert 'new' at the end of the list. */ list_push_back(&head->list, &new->list); } out: if (!old) { update_tables_after_insertion(cls, table, new->priority); } return old; } static struct cls_rule * next_rule_in_list__(struct cls_rule *rule) { struct cls_rule *next = OBJECT_CONTAINING(rule->list.next, next, list); return next; } static struct cls_rule * next_rule_in_list(struct cls_rule *rule) { struct cls_rule *next = next_rule_in_list__(rule); return next->priority < rule->priority ? next : NULL; } openvswitch-2.0.1+git20140120/lib/classifier.h000066400000000000000000000147431226605124000205260ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef CLASSIFIER_H #define CLASSIFIER_H 1 /* Flow classifier. * * A classifier is a "struct classifier", * a hash map from a set of wildcards to a "struct cls_table", * a hash map from fixed field values to "struct cls_rule", * which can contain a list of otherwise identical rules * with lower priorities. * * Thread-safety * ============= * * When locked properly, the classifier is thread safe as long as the following * conditions are satisfied. * - Only the main thread calls functions requiring a write lock. * - Only the main thread is allowed to iterate over rules. */ #include "flow.h" #include "hmap.h" #include "list.h" #include "match.h" #include "openflow/nicira-ext.h" #include "openflow/openflow.h" #include "ovs-thread.h" #include "util.h" #ifdef __cplusplus extern "C" { #endif /* Needed only for the lock annotation in struct classifier. */ extern struct ovs_mutex ofproto_mutex; /* A flow classifier. */ struct classifier { int n_rules; /* Total number of rules. */ struct hmap tables; /* Contains "struct cls_table"s. */ struct list tables_priority; /* Tables in descending priority order */ struct ovs_rwlock rwlock OVS_ACQ_AFTER(ofproto_mutex); }; /* A set of rules that all have the same fields wildcarded. */ struct cls_table { struct hmap_node hmap_node; /* Within struct classifier 'tables' hmap. */ struct list list_node; /* Within classifier 'tables_priority_list' */ struct hmap rules; /* Contains "struct cls_rule"s. */ struct minimask mask; /* Wildcards for fields. */ int n_table_rules; /* Number of rules, including duplicates. */ unsigned int max_priority; /* Max priority of any rule in the table. */ unsigned int max_count; /* Count of max_priority rules. */ }; /* Returns true if 'table' is a "catch-all" table that will match every * packet (if there is no higher-priority match). */ static inline bool cls_table_is_catchall(const struct cls_table *table) { return minimask_is_catchall(&table->mask); } /* A rule in a "struct classifier". */ struct cls_rule { struct hmap_node hmap_node; /* Within struct cls_table 'rules'. */ struct list list; /* List of identical, lower-priority rules. */ struct minimatch match; /* Matching rule. */ unsigned int priority; /* Larger numbers are higher priorities. */ }; void cls_rule_init(struct cls_rule *, const struct match *, unsigned int priority); void cls_rule_init_from_minimatch(struct cls_rule *, const struct minimatch *, unsigned int priority); void cls_rule_clone(struct cls_rule *, const struct cls_rule *); void cls_rule_move(struct cls_rule *dst, struct cls_rule *src); void cls_rule_destroy(struct cls_rule *); bool cls_rule_equal(const struct cls_rule *, const struct cls_rule *); uint32_t cls_rule_hash(const struct cls_rule *, uint32_t basis); void cls_rule_format(const struct cls_rule *, struct ds *); bool cls_rule_is_catchall(const struct cls_rule *); bool cls_rule_is_loose_match(const struct cls_rule *rule, const struct minimatch *criteria); void classifier_init(struct classifier *cls); void classifier_destroy(struct classifier *); bool classifier_is_empty(const struct classifier *cls) OVS_REQ_RDLOCK(cls->rwlock); int classifier_count(const struct classifier *cls) OVS_REQ_RDLOCK(cls->rwlock); void classifier_insert(struct classifier *cls, struct cls_rule *) OVS_REQ_WRLOCK(cls->rwlock); struct cls_rule *classifier_replace(struct classifier *cls, struct cls_rule *) OVS_REQ_WRLOCK(cls->rwlock); void classifier_remove(struct classifier *cls, struct cls_rule *) OVS_REQ_WRLOCK(cls->rwlock); struct cls_rule *classifier_lookup(const struct classifier *cls, const struct flow *, struct flow_wildcards *) OVS_REQ_RDLOCK(cls->rwlock); bool classifier_rule_overlaps(const struct classifier *cls, const struct cls_rule *) OVS_REQ_RDLOCK(cls->rwlock); typedef void cls_cb_func(struct cls_rule *, void *aux); struct cls_rule *classifier_find_rule_exactly(const struct classifier *cls, const struct cls_rule *) OVS_REQ_RDLOCK(cls->rwlock); struct cls_rule *classifier_find_match_exactly(const struct classifier *cls, const struct match *, unsigned int priority) OVS_REQ_RDLOCK(cls->rwlock); /* Iteration. */ struct cls_cursor { const struct classifier *cls; const struct cls_table *table; const struct cls_rule *target; }; void cls_cursor_init(struct cls_cursor *cursor, const struct classifier *cls, const struct cls_rule *match) OVS_REQ_RDLOCK(cls->rwlock); struct cls_rule *cls_cursor_first(struct cls_cursor *cursor); struct cls_rule *cls_cursor_next(struct cls_cursor *cursor, const struct cls_rule *); #define CLS_CURSOR_FOR_EACH(RULE, MEMBER, CURSOR) \ for (ASSIGN_CONTAINER(RULE, cls_cursor_first(CURSOR), MEMBER); \ RULE != OBJECT_CONTAINING(NULL, RULE, MEMBER); \ ASSIGN_CONTAINER(RULE, cls_cursor_next(CURSOR, &(RULE)->MEMBER), \ MEMBER)) #define CLS_CURSOR_FOR_EACH_SAFE(RULE, NEXT, MEMBER, CURSOR) \ for (ASSIGN_CONTAINER(RULE, cls_cursor_first(CURSOR), MEMBER); \ (RULE != OBJECT_CONTAINING(NULL, RULE, MEMBER) \ ? ASSIGN_CONTAINER(NEXT, cls_cursor_next(CURSOR, &(RULE)->MEMBER), \ MEMBER), 1 \ : 0); \ (RULE) = (NEXT)) #ifdef __cplusplus } #endif #endif /* classifier.h */ openvswitch-2.0.1+git20140120/lib/command-line.c000066400000000000000000000151151226605124000207320ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "command-line.h" #include #include #include #include "ovs-thread.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(command_line); /* Given the GNU-style long options in 'options', returns a string that may be * passed to getopt() with the corresponding short options. The caller is * responsible for freeing the string. */ char * long_options_to_short_options(const struct option options[]) { char short_options[UCHAR_MAX * 3 + 1]; char *p = short_options; for (; options->name; options++) { const struct option *o = options; if (o->flag == NULL && o->val > 0 && o->val <= UCHAR_MAX) { *p++ = o->val; if (o->has_arg == required_argument) { *p++ = ':'; } else if (o->has_arg == optional_argument) { *p++ = ':'; *p++ = ':'; } } } *p = '\0'; return xstrdup(short_options); } /* Runs the command designated by argv[0] within the command table specified by * 'commands', which must be terminated by a command whose 'name' member is a * null pointer. * * Command-line options should be stripped off, so that a typical invocation * looks like "run_command(argc - optind, argv + optind, my_commands);". */ void run_command(int argc, char *argv[], const struct command commands[]) { const struct command *p; if (argc < 1) { ovs_fatal(0, "missing command name; use --help for help"); } for (p = commands; p->name != NULL; p++) { if (!strcmp(p->name, argv[0])) { int n_arg = argc - 1; if (n_arg < p->min_args) { VLOG_FATAL( "'%s' command requires at least %d arguments", p->name, p->min_args); } else if (n_arg > p->max_args) { VLOG_FATAL("'%s' command takes at most %d arguments", p->name, p->max_args); } else { p->handler(argc, argv); if (ferror(stdout)) { VLOG_FATAL("write to stdout failed"); } if (ferror(stderr)) { VLOG_FATAL("write to stderr failed"); } return; } } } VLOG_FATAL("unknown command '%s'; use --help for help", argv[0]); } /* Process title. */ #ifdef LINUX_DATAPATH static struct ovs_mutex proctitle_mutex = OVS_MUTEX_INITIALIZER; /* Start of command-line arguments in memory. */ static char *argv_start OVS_GUARDED_BY(proctitle_mutex); /* Number of bytes of command-line arguments. */ static size_t argv_size OVS_GUARDED_BY(proctitle_mutex); /* Saved command-line arguments. */ static char *saved_proctitle OVS_GUARDED_BY(proctitle_mutex); /* Prepares the process so that proctitle_set() can later succeed. * * This modifies the argv[] array so that it no longer points into the memory * that it originally does. Later, proctitle_set() might overwrite that * memory. That means that this function should be called before anything else * that accesses the process's argv[] array. Ideally, it should be called * before anything else, period, at the very beginning of program * execution. */ void proctitle_init(int argc, char **argv) { int i; assert_single_threaded(); if (!argc || !argv[0]) { /* This situation should never occur, but... */ return; } ovs_mutex_lock(&proctitle_mutex); /* Specialized version of first loop iteration below. */ argv_start = argv[0]; argv_size = strlen(argv[0]) + 1; argv[0] = xstrdup(argv[0]); for (i = 1; i < argc; i++) { size_t size = strlen(argv[i]) + 1; /* Add (argv[i], strlen(argv[i])+1) to (argv_start, argv_size). */ if (argv[i] + size == argv_start) { /* Arguments grow downward in memory. */ argv_start -= size; argv_size += size; } else if (argv[i] == argv_start + argv_size) { /* Arguments grow upward in memory. */ argv_size += size; } else { /* Arguments not contiguous. (Is this really Linux?) */ } /* Copy out the old argument so we can reuse the space. */ argv[i] = xstrdup(argv[i]); } ovs_mutex_unlock(&proctitle_mutex); } /* Changes the name of the process, as shown by "ps", to the program name * followed by 'format', which is formatted as if by printf(). */ void proctitle_set(const char *format, ...) { va_list args; int n; ovs_mutex_lock(&proctitle_mutex); if (!argv_start || argv_size < 8) { goto out; } if (!saved_proctitle) { saved_proctitle = xmemdup(argv_start, argv_size); } va_start(args, format); n = snprintf(argv_start, argv_size, "%s: ", program_name); if (n < argv_size) { n += vsnprintf(argv_start + n, argv_size - n, format, args); } if (n >= argv_size) { /* The name is too long, so add an ellipsis at the end. */ strcpy(&argv_start[argv_size - 4], "..."); } else { /* Fill the extra space with null bytes, so that trailing bytes don't * show up in the command line. */ memset(&argv_start[n], '\0', argv_size - n); } va_end(args); out: ovs_mutex_unlock(&proctitle_mutex); } /* Restores the process's original command line, as seen by "ps". */ void proctitle_restore(void) { ovs_mutex_lock(&proctitle_mutex); if (saved_proctitle) { memcpy(argv_start, saved_proctitle, argv_size); free(saved_proctitle); saved_proctitle = NULL; } ovs_mutex_unlock(&proctitle_mutex); } #else /* !LINUX_DATAPATH*/ /* Stubs that don't do anything on non-Linux systems. */ void proctitle_init(int argc OVS_UNUSED, char **argv OVS_UNUSED) { } #if !(defined(__FreeBSD__) || defined(__NetBSD__)) /* On these platforms we #define this to setproctitle. */ void proctitle_set(const char *format OVS_UNUSED, ...) { } #endif void proctitle_restore(void) { } #endif /* !LINUX_DATAPATH */ openvswitch-2.0.1+git20140120/lib/command-line.h000066400000000000000000000023611226605124000207360ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef COMMAND_LINE_H #define COMMAND_LINE_H 1 /* Utilities for command-line parsing. */ #include "compiler.h" struct option; struct command { const char *name; int min_args; int max_args; void (*handler)(int argc, char *argv[]); }; char *long_options_to_short_options(const struct option *options); void run_command(int argc, char *argv[], const struct command[]); void proctitle_init(int argc, char **argv); #if defined(__FreeBSD__) || defined(__NetBSD__) #define proctitle_set setproctitle #else void proctitle_set(const char *, ...) PRINTF_FORMAT(1, 2); #endif void proctitle_restore(void); #endif /* command-line.h */ openvswitch-2.0.1+git20140120/lib/common-syn.man000066400000000000000000000001241226605124000210110ustar00rootroot00000000000000.IP "Common options:" [\fB\-h\fR | \fB\-\-help\fR] [\fB\-V\fR | \fB\-\-version\fR] openvswitch-2.0.1+git20140120/lib/common.man000066400000000000000000000003111226605124000202000ustar00rootroot00000000000000.de IQ . br . ns . IP "\\$1" .. .IP "\fB\-h\fR" .IQ "\fB\-\-help\fR" Prints a brief help message to the console. . .IP "\fB\-V\fR" .IQ "\fB\-\-version\fR" Prints version information to the console. openvswitch-2.0.1+git20140120/lib/compiler.h000066400000000000000000000157441226605124000202160ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef COMPILER_H #define COMPILER_H 1 #ifndef __has_feature #define __has_feature(x) 0 #endif #ifndef __has_extension #define __has_extension(x) 0 #endif #if __GNUC__ && !__CHECKER__ #define NO_RETURN __attribute__((__noreturn__)) #define OVS_UNUSED __attribute__((__unused__)) #define PRINTF_FORMAT(FMT, ARG1) __attribute__((__format__(printf, FMT, ARG1))) #define STRFTIME_FORMAT(FMT) __attribute__((__format__(__strftime__, FMT, 0))) #define MALLOC_LIKE __attribute__((__malloc__)) #define ALWAYS_INLINE __attribute__((always_inline)) #define WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) #define SENTINEL(N) __attribute__((sentinel(N))) #define OVS_LIKELY(CONDITION) __builtin_expect(!!(CONDITION), 1) #define OVS_UNLIKELY(CONDITION) __builtin_expect(!!(CONDITION), 0) #else #define NO_RETURN #define OVS_UNUSED #define PRINTF_FORMAT(FMT, ARG1) #define STRFTIME_FORMAT(FMT) #define MALLOC_LIKE #define ALWAYS_INLINE #define WARN_UNUSED_RESULT #define SENTINEL(N) #define OVS_LIKELY(CONDITION) (!!(CONDITION)) #define OVS_UNLIKELY(CONDITION) (!!(CONDITION)) #endif #if __has_feature(c_thread_safety_attributes) /* "clang" annotations for thread safety check. * * OVS_LOCKABLE indicates that the struct contains mutex element * which can be locked by functions like ovs_mutex_lock(). * * Below, the word MUTEX stands for the name of an object with an OVS_LOCKABLE * struct type. It can also be a comma-separated list of multiple structs, * e.g. to require a function to hold multiple locks while invoked. * * * On a variable: * * - OVS_GUARDED indicates that the variable may only be accessed some mutex * is held. * * - OVS_GUARDED_BY(MUTEX) indicates that the variable may only be accessed * while the specific MUTEX is held. * * * On a variable A of mutex type: * * - OVS_ACQ_BEFORE(B), where B is a mutex or a comma-separated list of * mutexes, declare that if both A and B are acquired at the same time, * then A must be acquired before B. That is, B nests inside A. * * - OVS_ACQ_AFTER(B) is the opposite of OVS_ACQ_BEFORE(B), that is, it * declares that A nests inside B. * * * On a function, the following attributes apply to mutexes: * * - OVS_ACQUIRES(MUTEX) indicate that the function must be called without * holding MUTEX and that it returns holding MUTEX. * * - OVS_RELEASES(MUTEX) indicates that the function may only be called with * MUTEX held and that it returns with MUTEX released. It can be used for * all types of MUTEX. * * - OVS_TRY_LOCK(RETVAL, MUTEX) indicate that the function will try to * acquire MUTEX. RETVAL is an integer or boolean value specifying the * return value of a successful lock acquisition. * * - OVS_REQUIRES(MUTEX) indicate that the function may only be called with * MUTEX held and that the function does not release MUTEX. * * - OVS_LOCKS_EXCLUDED(MUTEX) indicates that the function may only be * called when MUTEX is not held. * * * The following variants, with the same syntax, apply to reader-writer locks: * * mutex rwlock, for reading rwlock, for writing * ------------------- ------------------- ------------------- * OVS_ACQUIRES OVS_ACQ_RDLOCK OVS_ACQ_WRLOCK * OVS_RELEASES OVS_RELEASES OVS_RELEASES * OVS_TRY_LOCK OVS_TRY_RDLOCK OVS_TRY_WRLOCK * OVS_REQUIRES OVS_REQ_RDLOCK OVS_REQ_WRLOCK * OVS_LOCKS_EXCLUDED OVS_LOCKS_EXCLUDED OVS_LOCKS_EXCLUDED */ #define OVS_LOCKABLE __attribute__((lockable)) #define OVS_REQ_RDLOCK(...) __attribute__((shared_locks_required(__VA_ARGS__))) #define OVS_ACQ_RDLOCK(...) __attribute__((shared_lock_function(__VA_ARGS__))) #define OVS_REQ_WRLOCK(...) \ __attribute__((exclusive_locks_required(__VA_ARGS__))) #define OVS_ACQ_WRLOCK(...) \ __attribute__((exclusive_lock_function(__VA_ARGS__))) #define OVS_REQUIRES(...) \ __attribute__((exclusive_locks_required(__VA_ARGS__))) #define OVS_ACQUIRES(...) \ __attribute__((exclusive_lock_function(__VA_ARGS__))) #define OVS_TRY_WRLOCK(RETVAL, ...) \ __attribute__((exclusive_trylock_function(RETVAL, __VA_ARGS__))) #define OVS_TRY_RDLOCK(RETVAL, ...) \ __attribute__((shared_trylock_function(RETVAL, __VA_ARGS__))) #define OVS_TRY_LOCK(RETVAL, ...) \ __attribute__((exclusive_trylock_function(RETVAL, __VA_ARGS__))) #define OVS_GUARDED __attribute__((guarded_var)) #define OVS_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) #define OVS_RELEASES(...) __attribute__((unlock_function(__VA_ARGS__))) #define OVS_EXCLUDED(...) __attribute__((locks_excluded(__VA_ARGS__))) #define OVS_ACQ_BEFORE(...) __attribute__((acquired_before(__VA_ARGS__))) #define OVS_ACQ_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) #define OVS_NO_THREAD_SAFETY_ANALYSIS \ __attribute__((no_thread_safety_analysis)) #else /* not Clang */ #define OVS_LOCKABLE #define OVS_REQ_RDLOCK(...) #define OVS_ACQ_RDLOCK(...) #define OVS_REQ_WRLOCK(...) #define OVS_ACQ_WRLOCK(...) #define OVS_REQUIRES(...) #define OVS_ACQUIRES(...) #define OVS_TRY_WRLOCK(...) #define OVS_TRY_RDLOCK(...) #define OVS_TRY_LOCK(...) #define OVS_GUARDED #define OVS_GUARDED_BY(...) #define OVS_EXCLUDED(...) #define OVS_RELEASES(...) #define OVS_ACQ_BEFORE(...) #define OVS_ACQ_AFTER(...) #define OVS_NO_THREAD_SAFETY_ANALYSIS #endif /* ISO C says that a C implementation may choose any integer type for an enum * that is sufficient to hold all of its values. Common ABIs (such as the * System V ABI used on i386 GNU/Linux) always use a full-sized "int", even * when a smaller type would suffice. * * In GNU C, "enum __attribute__((packed)) name { ... }" defines 'name' as an * enum compatible with a type that is no bigger than necessary. This is the * intended use of OVS_PACKED_ENUM. * * OVS_PACKED_ENUM is intended for use only as a space optimization, since it * only works with GCC. That means that it must not be used in wire protocols * or otherwise exposed outside of a single process. */ #if __GNUC__ && !__CHECKER__ #define OVS_PACKED_ENUM __attribute__((__packed__)) #else #define OVS_PACKED_ENUM #endif #ifndef _MSC_VER #define OVS_PACKED(DECL) DECL __attribute__((__packed__)) #else #define OVS_PACKED(DECL) __pragma(pack(push, 1)) DECL __pragma(pack(pop)) #endif #endif /* compiler.h */ openvswitch-2.0.1+git20140120/lib/coverage-unixctl.man000066400000000000000000000007721226605124000222020ustar00rootroot00000000000000.SS "COVERAGE COMMANDS" These commands manage \fB\*(PN\fR's ``coverage counters,'' which count the number of times particular events occur during a daemon's runtime. In addition to these commands, \fB\*(PN\fR automatically logs coverage counter values, at \fBINFO\fR level, when it detects that the daemon's main loop takes unusually long to run. .PP Coverage counters are useful mainly for performance analysis and debugging. .IP "\fBcoverage/show\fR" Displays the values of all of the coverage counters. openvswitch-2.0.1+git20140120/lib/coverage.c000066400000000000000000000167731226605124000201750ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "coverage.h" #include #include #include "dynamic-string.h" #include "hash.h" #include "svec.h" #include "timeval.h" #include "unixctl.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(coverage); /* The coverage counters. */ #if USE_LINKER_SECTIONS extern struct coverage_counter *__start_coverage[]; extern struct coverage_counter *__stop_coverage[]; #define coverage_counters __start_coverage #define n_coverage_counters (__stop_coverage - __start_coverage) #else /* !USE_LINKER_SECTIONS */ #define COVERAGE_COUNTER(COUNTER) \ DECLARE_EXTERN_PER_THREAD_DATA(unsigned int, \ counter_##COUNTER); \ DEFINE_EXTERN_PER_THREAD_DATA(counter_##COUNTER, 0); \ static unsigned int COUNTER##_count(void) \ { \ unsigned int *countp = counter_##COUNTER##_get(); \ unsigned int count = *countp; \ *countp = 0; \ return count; \ } \ extern struct coverage_counter counter_##COUNTER; \ struct coverage_counter counter_##COUNTER \ = { #COUNTER, COUNTER##_count, 0 }; #include "coverage.def" #undef COVERAGE_COUNTER extern struct coverage_counter *coverage_counters[]; struct coverage_counter *coverage_counters[] = { #define COVERAGE_COUNTER(NAME) &counter_##NAME, #include "coverage.def" #undef COVERAGE_COUNTER }; #define n_coverage_counters ARRAY_SIZE(coverage_counters) #endif /* !USE_LINKER_SECTIONS */ static struct ovs_mutex coverage_mutex = OVS_MUTEX_INITIALIZER; static void coverage_read(struct svec *); static void coverage_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { struct svec lines; char *reply; svec_init(&lines); coverage_read(&lines); reply = svec_join(&lines, "\n", "\n"); unixctl_command_reply(conn, reply); free(reply); svec_destroy(&lines); } void coverage_init(void) { unixctl_command_register("coverage/show", "", 0, 0, coverage_unixctl_show, NULL); } /* Sorts coverage counters in descending order by total, within equal * totals alphabetically by name. */ static int compare_coverage_counters(const void *a_, const void *b_) { const struct coverage_counter *const *ap = a_; const struct coverage_counter *const *bp = b_; const struct coverage_counter *a = *ap; const struct coverage_counter *b = *bp; if (a->total != b->total) { return a->total < b->total ? 1 : -1; } else { return strcmp(a->name, b->name); } } static uint32_t coverage_hash(void) { struct coverage_counter **c; uint32_t hash = 0; int n_groups, i; /* Sort coverage counters into groups with equal totals. */ c = xmalloc(n_coverage_counters * sizeof *c); ovs_mutex_lock(&coverage_mutex); for (i = 0; i < n_coverage_counters; i++) { c[i] = coverage_counters[i]; } ovs_mutex_unlock(&coverage_mutex); qsort(c, n_coverage_counters, sizeof *c, compare_coverage_counters); /* Hash the names in each group along with the rank. */ n_groups = 0; for (i = 0; i < n_coverage_counters; ) { int j; if (!c[i]->total) { break; } n_groups++; hash = hash_int(i, hash); for (j = i; j < n_coverage_counters; j++) { if (c[j]->total != c[i]->total) { break; } hash = hash_string(c[j]->name, hash); } i = j; } free(c); return hash_int(n_groups, hash); } static bool coverage_hit(uint32_t hash) { enum { HIT_BITS = 1024, BITS_PER_WORD = 32 }; static uint32_t hit[HIT_BITS / BITS_PER_WORD]; BUILD_ASSERT_DECL(IS_POW2(HIT_BITS)); static long long int next_clear = LLONG_MIN; unsigned int bit_index = hash & (HIT_BITS - 1); unsigned int word_index = bit_index / BITS_PER_WORD; unsigned int word_mask = 1u << (bit_index % BITS_PER_WORD); /* Expire coverage hash suppression once a day. */ if (time_msec() >= next_clear) { memset(hit, 0, sizeof hit); next_clear = time_msec() + 60 * 60 * 24 * 1000LL; } if (hit[word_index] & word_mask) { return true; } else { hit[word_index] |= word_mask; return false; } } /* Logs the coverage counters, unless a similar set of events has already been * logged. * * This function logs at log level VLL_INFO. Use care before adjusting this * level, because depending on its configuration, syslogd can write changes * synchronously, which can cause the coverage messages to take several seconds * to write. */ void coverage_log(void) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 3); if (!VLOG_DROP_INFO(&rl)) { uint32_t hash = coverage_hash(); if (coverage_hit(hash)) { VLOG_INFO("Skipping details of duplicate event coverage for " "hash=%08"PRIx32, hash); } else { struct svec lines; const char *line; size_t i; svec_init(&lines); coverage_read(&lines); SVEC_FOR_EACH (i, line, &lines) { VLOG_INFO("%s", line); } svec_destroy(&lines); } } } /* Adds coverage counter information to 'lines'. */ static void coverage_read(struct svec *lines) { unsigned long long int *totals; size_t n_never_hit; uint32_t hash; size_t i; hash = coverage_hash(); n_never_hit = 0; svec_add_nocopy(lines, xasprintf("Event coverage, hash=%08"PRIx32":", hash)); totals = xmalloc(n_coverage_counters * sizeof *totals); ovs_mutex_lock(&coverage_mutex); for (i = 0; i < n_coverage_counters; i++) { totals[i] = coverage_counters[i]->total; } ovs_mutex_unlock(&coverage_mutex); for (i = 0; i < n_coverage_counters; i++) { if (totals[i]) { svec_add_nocopy(lines, xasprintf("%-24s %9llu", coverage_counters[i]->name, totals[i])); } else { n_never_hit++; } } svec_add_nocopy(lines, xasprintf("%zu events never hit", n_never_hit)); free(totals); } void coverage_clear(void) { size_t i; ovs_mutex_lock(&coverage_mutex); for (i = 0; i < n_coverage_counters; i++) { struct coverage_counter *c = coverage_counters[i]; c->total += c->count(); } ovs_mutex_unlock(&coverage_mutex); } openvswitch-2.0.1+git20140120/lib/coverage.h000066400000000000000000000076751226605124000202030ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef COVERAGE_H #define COVERAGE_H 1 /* This file implements a simple form of coverage instrumentation. Points in * source code that are of interest must be explicitly annotated with * COVERAGE_INC. The coverage counters may be logged at any time with * coverage_log(). * * This form of coverage instrumentation is intended to be so lightweight that * it can be enabled in production builds. It is obviously not a substitute * for traditional coverage instrumentation with e.g. "gcov", but it is still * a useful debugging tool. */ #include "ovs-thread.h" #include "vlog.h" /* A coverage counter. */ struct coverage_counter { const char *const name; /* Textual name. */ unsigned int (*const count)(void); /* Gets, zeros this thread's count. */ unsigned long long int total; /* Total count. */ }; /* Defines COUNTER. There must be exactly one such definition at file scope * within a program. */ #if USE_LINKER_SECTIONS #define COVERAGE_DEFINE(COUNTER) \ DEFINE_STATIC_PER_THREAD_DATA(unsigned int, \ counter_##COUNTER, 0); \ static unsigned int COUNTER##_count(void) \ { \ unsigned int *countp = counter_##COUNTER##_get(); \ unsigned int count = *countp; \ *countp = 0; \ return count; \ } \ static inline void COUNTER##_add(unsigned int n) \ { \ *counter_##COUNTER##_get() += n; \ } \ extern struct coverage_counter counter_##COUNTER; \ struct coverage_counter counter_##COUNTER \ = { #COUNTER, COUNTER##_count, 0 }; \ extern struct coverage_counter *counter_ptr_##COUNTER; \ struct coverage_counter *counter_ptr_##COUNTER \ __attribute__((section("coverage"))) = &counter_##COUNTER #else #define COVERAGE_DEFINE(COUNTER) \ DECLARE_EXTERN_PER_THREAD_DATA(unsigned int, \ counter_##COUNTER); \ static inline void COUNTER##_add(unsigned int n) \ { \ *counter_##COUNTER##_get() += n; \ } \ extern struct coverage_counter counter_##COUNTER #endif /* Adds 1 to COUNTER. */ #define COVERAGE_INC(COUNTER) COVERAGE_ADD(COUNTER, 1) /* Adds AMOUNT to COUNTER. */ #define COVERAGE_ADD(COUNTER, AMOUNT) COUNTER##_add(AMOUNT) void coverage_init(void); void coverage_log(void); void coverage_clear(void); /* Implementation detail. */ #define COVERAGE_DEFINE__(COUNTER) \ #endif /* coverage.h */ openvswitch-2.0.1+git20140120/lib/crc32c.c000066400000000000000000000172601226605124000174510ustar00rootroot00000000000000/*- * COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or * code or tables extracted from it, as desired without restriction. */ /* * First, the polynomial itself and its table of feedback terms. The * polynomial is * X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0 * * Note that we take it "backwards" and put the highest-order term in * the lowest-order bit. The X^32 term is "implied"; the LSB is the * X^31 term, etc. The X^0 term (usually shown as "+1") results in * the MSB being 1 * * Note that the usual hardware shift register implementation, which * is what we're using (we're merely optimizing it by doing eight-bit * chunks at a time) shifts bits into the lowest-order term. In our * implementation, that means shifting towards the right. Why do we * do it this way? Because the calculated CRC must be transmitted in * order from highest-order term to lowest-order term. UARTs transmit * characters in order from LSB to MSB. By storing the CRC this way * we hand it to the UART in the order low-byte to high-byte; the UART * sends each low-bit to hight-bit; and the result is transmission bit * by bit from highest- to lowest-order term without requiring any bit * shuffling on our part. Reception works similarly * * The feedback terms table consists of 256, 32-bit entries. Notes * * The table can be generated at runtime if desired; code to do so * can be found in FreeBSD. It might not be obvious, but the feedback * terms simply represent the results of eight shift/xor opera * tions for all combinations of data and CRC register values * * The values must be right-shifted by eight bits by the "updcrc * logic; the shift must be unsigned (bring in zeroes). On some * hardware you could probably optimize the shift in assembler by * using byte-swap instructions * polynomial $edb88320 * * * CRC32 code derived from work by Gary S. Brown. */ #include #include "crc32c.h" #include "byte-order.h" /*****************************************************************/ /* */ /* CRC LOOKUP TABLE */ /* ================ */ /* The following CRC lookup table was generated automagically */ /* by the Rocksoft^tm Model CRC Algorithm Table Generation */ /* Program V1.0 using the following model parameters: */ /* */ /* Width : 4 bytes. */ /* Poly : 0x1EDC6F41L */ /* Reverse : TRUE. */ /* */ /* For more information on the Rocksoft^tm Model CRC Algorithm, */ /* see the document titled "A Painless Guide to CRC Error */ /* Detection Algorithms" by Ross Williams */ /* (ross@guest.adelaide.edu.au.). This document is likely to be */ /* in the FTP archive "ftp.adelaide.edu.au/pub/rocksoft". */ /* */ /*****************************************************************/ static const uint32_t crc32Table[256] = { 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, 0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, 0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL, 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L, 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL, 0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L, 0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L, 0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL, 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL, 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L, 0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L, 0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL, 0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L, 0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL, 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL, 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L, 0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L, 0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L, 0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L, 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L, 0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L, 0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L, 0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L, 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L, 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L, 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L, 0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L, 0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L, 0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L, 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L, 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L, 0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L, 0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL, 0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L, 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L, 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL, 0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L, 0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL, 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL, 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L, 0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L, 0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL, 0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL, 0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L, 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL, 0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L, 0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L, 0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL, 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L, 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL, 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL, 0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L, 0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL, 0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L, 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L, 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL, 0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL, 0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L, 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L, 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL, 0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L, 0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL, 0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL, 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L }; /* * Compute a CRC32c checksum as per the SCTP requirements in RFC4960. This * includes beginning with a checksum of all ones, and returning the negated * CRC. Unlike the RFC, we return the checksum in network byte-order. */ ovs_be32 crc32c(const uint8_t *data, size_t size) { uint32_t crc = 0xffffffffL; while (size--) { crc = crc32Table[(crc ^ *data++) & 0xff] ^ (crc >> 8); } /* The result of this CRC calculation provides us a value in the reverse * byte-order as compared with our architecture. On big-endian systems, * this is opposite to our return type. So, to return a big-endian * value, we must swap the byte-order. */ #if defined(WORDS_BIGENDIAN) crc = uint32_byteswap(crc); #endif /* Our value is in network byte-order. OVS_FORCE keeps sparse happy. */ return (OVS_FORCE ovs_be32) ~crc; } openvswitch-2.0.1+git20140120/lib/crc32c.h000066400000000000000000000014121226605124000174460ustar00rootroot00000000000000/* * Copyright (c) 2012 The University of Waikato. * Author: Joe Stringer * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef CRC32C_H #define CRC32C_H 1 #include "openvswitch/types.h" ovs_be32 crc32c(const uint8_t *data, size_t); #endif /* crc32c.h */ openvswitch-2.0.1+git20140120/lib/csum.c000066400000000000000000000111441226605124000173340ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "csum.h" #include "unaligned.h" #ifndef __CHECKER__ /* Returns the IP checksum of the 'n' bytes in 'data'. * * The return value has the same endianness as the data. That is, if 'data' * consists of a packet in network byte order, then the return value is a value * in network byte order, and if 'data' consists of a data structure in host * byte order, then the return value is in host byte order. */ ovs_be16 csum(const void *data, size_t n) { return csum_finish(csum_continue(0, data, n)); } /* Adds the 16 bits in 'new' to the partial IP checksum 'partial' and returns * the updated checksum. (To start a new checksum, pass 0 for 'partial'. To * obtain the finished checksum, pass the return value to csum_finish().) */ uint32_t csum_add16(uint32_t partial, ovs_be16 new) { return partial + new; } /* Adds the 32 bits in 'new' to the partial IP checksum 'partial' and returns * the updated checksum. (To start a new checksum, pass 0 for 'partial'. To * obtain the finished checksum, pass the return value to csum_finish().) */ uint32_t csum_add32(uint32_t partial, ovs_be32 new) { return partial + (new >> 16) + (new & 0xffff); } /* Adds the 'n' bytes in 'data' to the partial IP checksum 'partial' and * returns the updated checksum. (To start a new checksum, pass 0 for * 'partial'. To obtain the finished checksum, pass the return value to * csum_finish().) */ uint32_t csum_continue(uint32_t partial, const void *data_, size_t n) { const ovs_be16 *data = data_; for (; n > 1; n -= 2, data++) { partial = csum_add16(partial, get_unaligned_be16(data)); } if (n) { partial += *(uint8_t *) data; } return partial; } /* Returns the IP checksum corresponding to 'partial', which is a value updated * by some combination of csum_add16(), csum_add32(), and csum_continue(). * * The return value has the same endianness as the checksummed data. That is, * if the data consist of a packet in network byte order, then the return value * is a value in network byte order, and if the data are a data structure in * host byte order, then the return value is in host byte order. */ ovs_be16 csum_finish(uint32_t partial) { while (partial >> 16) { partial = (partial & 0xffff) + (partial >> 16); } return ~partial; } /* Returns the new checksum for a packet in which the checksum field previously * contained 'old_csum' and in which a field that contained 'old_u16' was * changed to contain 'new_u16'. */ ovs_be16 recalc_csum16(ovs_be16 old_csum, ovs_be16 old_u16, ovs_be16 new_u16) { /* Ones-complement arithmetic is endian-independent, so this code does not * use htons() or ntohs(). * * See RFC 1624 for formula and explanation. */ uint16_t hc_complement = ~old_csum; uint16_t m_complement = ~old_u16; uint16_t m_prime = new_u16; uint32_t sum = hc_complement + m_complement + m_prime; return csum_finish(sum); } /* Returns the new checksum for a packet in which the checksum field previously * contained 'old_csum' and in which a field that contained 'old_u32' was * changed to contain 'new_u32'. */ ovs_be16 recalc_csum32(ovs_be16 old_csum, ovs_be32 old_u32, ovs_be32 new_u32) { return recalc_csum16(recalc_csum16(old_csum, old_u32, new_u32), old_u32 >> 16, new_u32 >> 16); } /* Returns the new checksum for a packet in which the checksum field previously * contained 'old_csum' and in which a field that contained 'old_u32[4]' was * changed to contain 'new_u32[4]'. */ ovs_be16 recalc_csum128(ovs_be16 old_csum, ovs_16aligned_be32 old_u32[4], const ovs_be32 new_u32[4]) { ovs_be16 new_csum = old_csum; int i; for (i = 0; i < 4; ++i) { new_csum = recalc_csum32(new_csum, get_16aligned_be32(&old_u32[i]), new_u32[i]); } return new_csum; } #else /* __CHECKER__ */ /* Making sparse happy with these functions also makes them unreadable, so * don't bother to show it their implementations. */ #endif openvswitch-2.0.1+git20140120/lib/csum.h000066400000000000000000000023511226605124000173410ustar00rootroot00000000000000/* * Copyright (c) 2008, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef CSUM_H #define CSUM_H 1 #include #include #include "openvswitch/types.h" ovs_be16 csum(const void *, size_t); uint32_t csum_add16(uint32_t partial, ovs_be16); uint32_t csum_add32(uint32_t partial, ovs_be32); uint32_t csum_continue(uint32_t partial, const void *, size_t); ovs_be16 csum_finish(uint32_t partial); ovs_be16 recalc_csum16(ovs_be16 old_csum, ovs_be16 old_u16, ovs_be16 new_u16); ovs_be16 recalc_csum32(ovs_be16 old_csum, ovs_be32 old_u32, ovs_be32 new_u32); ovs_be16 recalc_csum128(ovs_be16 old_csum, ovs_16aligned_be32 old_u32[4], const ovs_be32 new_u32[4]); #endif /* csum.h */ openvswitch-2.0.1+git20140120/lib/daemon-syn.man000066400000000000000000000002051226605124000207640ustar00rootroot00000000000000.IP "Daemon options:" [\fB\-\-pidfile\fR[\fB=\fIpidfile\fR]] [\fB\-\-overwrite\-pidfile\fR] [\fB\-\-detach\fR] [\fB\-\-no\-chdir\fR] openvswitch-2.0.1+git20140120/lib/daemon.c000066400000000000000000000530511226605124000176330ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "daemon.h" #include #include #include #include #include #include #include #include #include #include "command-line.h" #include "fatal-signal.h" #include "dirs.h" #include "lockfile.h" #include "ovs-thread.h" #include "process.h" #include "socket-util.h" #include "timeval.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(daemon); /* --detach: Should we run in the background? */ static bool detach; /* Was --detach specified? */ static bool detached; /* Have we already detached? */ /* --pidfile: Name of pidfile (null if none). */ static char *pidfile; /* Device and inode of pidfile, so we can avoid reopening it. */ static dev_t pidfile_dev; static ino_t pidfile_ino; /* --overwrite-pidfile: Create pidfile even if one already exists and is locked? */ static bool overwrite_pidfile; /* --no-chdir: Should we chdir to "/"? */ static bool chdir_ = true; /* File descriptor used by daemonize_start() and daemonize_complete(). */ static int daemonize_fd = -1; /* --monitor: Should a supervisory process monitor the daemon and restart it if * it dies due to an error signal? */ static bool monitor; /* For each of the standard file descriptors, whether to replace it by * /dev/null (if false) or keep it for the daemon to use (if true). */ static bool save_fds[3]; static void check_already_running(void); static int lock_pidfile(FILE *, int command); /* Returns the file name that would be used for a pidfile if 'name' were * provided to set_pidfile(). The caller must free the returned string. */ char * make_pidfile_name(const char *name) { return (!name ? xasprintf("%s/%s.pid", ovs_rundir(), program_name) : abs_file_name(ovs_rundir(), name)); } /* Sets up a following call to daemonize() to create a pidfile named 'name'. * If 'name' begins with '/', then it is treated as an absolute path. * Otherwise, it is taken relative to RUNDIR, which is $(prefix)/var/run by * default. * * If 'name' is null, then program_name followed by ".pid" is used. */ void set_pidfile(const char *name) { assert_single_threaded(); free(pidfile); pidfile = make_pidfile_name(name); } /* Returns an absolute path to the configured pidfile, or a null pointer if no * pidfile is configured. The caller must not modify or free the returned * string. */ const char * get_pidfile(void) { return pidfile; } /* Sets that we do not chdir to "/". */ void set_no_chdir(void) { chdir_ = false; } /* Will we chdir to "/" as part of daemonizing? */ bool is_chdir_enabled(void) { return chdir_; } /* Normally, daemonize() or damonize_start() will terminate the program with a * message if a locked pidfile already exists. If this function is called, an * existing pidfile will be replaced, with a warning. */ void ignore_existing_pidfile(void) { overwrite_pidfile = true; } /* Sets up a following call to daemonize() to detach from the foreground * session, running this process in the background. */ void set_detach(void) { detach = true; } /* Will daemonize() really detach? */ bool get_detach(void) { return detach; } /* Sets up a following call to daemonize() to fork a supervisory process to * monitor the daemon and restart it if it dies due to an error signal. */ void daemon_set_monitor(void) { monitor = true; } /* A daemon doesn't normally have any use for the file descriptors for stdin, * stdout, and stderr after it detaches. To keep these file descriptors from * e.g. holding an SSH session open, by default detaching replaces each of * these file descriptors by /dev/null. But a few daemons expect the user to * redirect stdout or stderr to a file, in which case it is desirable to keep * these file descriptors. This function, therefore, disables replacing 'fd' * by /dev/null when the daemon detaches. */ void daemon_save_fd(int fd) { ovs_assert(fd == STDIN_FILENO || fd == STDOUT_FILENO || fd == STDERR_FILENO); save_fds[fd] = true; } /* Unregisters pidfile from being unlinked when the program terminates via * exit() or a fatal signal. */ void remove_pidfile_from_unlink(void) { if (pidfile) { fatal_signal_remove_file_to_unlink(pidfile); } } /* Registers pidfile to be unlinked when the program terminates via exit() or a * fatal signal. */ void add_pidfile_to_unlink(void) { if (pidfile) { fatal_signal_add_file_to_unlink(pidfile); } } /* If a pidfile has been configured, creates it and stores the running * process's pid in it. Ensures that the pidfile will be deleted when the * process exits. */ static void make_pidfile(void) { long int pid = getpid(); struct stat s; char *tmpfile; FILE *file; int error; /* Create a temporary pidfile. */ if (overwrite_pidfile) { tmpfile = xasprintf("%s.tmp%ld", pidfile, pid); fatal_signal_add_file_to_unlink(tmpfile); } else { /* Everyone shares the same file which will be treated as a lock. To * avoid some uncomfortable race conditions, we can't set up the fatal * signal unlink until we've acquired it. */ tmpfile = xasprintf("%s.tmp", pidfile); } file = fopen(tmpfile, "a+"); if (!file) { VLOG_FATAL("%s: create failed (%s)", tmpfile, ovs_strerror(errno)); } error = lock_pidfile(file, F_SETLK); if (error) { /* Looks like we failed to acquire the lock. Note that, if we failed * for some other reason (and '!overwrite_pidfile'), we will have * left 'tmpfile' as garbage in the file system. */ VLOG_FATAL("%s: fcntl(F_SETLK) failed (%s)", tmpfile, ovs_strerror(error)); } if (!overwrite_pidfile) { /* We acquired the lock. Make sure to clean up on exit, and verify * that we're allowed to create the actual pidfile. */ fatal_signal_add_file_to_unlink(tmpfile); check_already_running(); } if (fstat(fileno(file), &s) == -1) { VLOG_FATAL("%s: fstat failed (%s)", tmpfile, ovs_strerror(errno)); } if (ftruncate(fileno(file), 0) == -1) { VLOG_FATAL("%s: truncate failed (%s)", tmpfile, ovs_strerror(errno)); } fprintf(file, "%ld\n", pid); if (fflush(file) == EOF) { VLOG_FATAL("%s: write failed (%s)", tmpfile, ovs_strerror(errno)); } error = rename(tmpfile, pidfile); /* Due to a race, 'tmpfile' may be owned by a different process, so we * shouldn't delete it on exit. */ fatal_signal_remove_file_to_unlink(tmpfile); if (error < 0) { VLOG_FATAL("failed to rename \"%s\" to \"%s\" (%s)", tmpfile, pidfile, ovs_strerror(errno)); } /* Ensure that the pidfile will get deleted on exit. */ fatal_signal_add_file_to_unlink(pidfile); /* Clean up. * * We don't close 'file' because its file descriptor must remain open to * hold the lock. */ pidfile_dev = s.st_dev; pidfile_ino = s.st_ino; free(tmpfile); } /* If configured with set_pidfile() or set_detach(), creates the pid file and * detaches from the foreground session. */ void daemonize(void) { daemonize_start(); daemonize_complete(); } /* Calls fork() and on success returns its return value. On failure, logs an * error and exits unsuccessfully. * * Post-fork, but before returning, this function calls a few other functions * that are generally useful if the child isn't planning to exec a new * process. */ pid_t fork_and_clean_up(void) { pid_t pid = xfork(); if (pid > 0) { /* Running in parent process. */ fatal_signal_fork(); } else if (!pid) { /* Running in child process. */ time_postfork(); lockfile_postfork(); } return pid; } /* Forks, then: * * - In the parent, waits for the child to signal that it has completed its * startup sequence. Then stores -1 in '*fdp' and returns the child's pid. * * - In the child, stores a fd in '*fdp' and returns 0. The caller should * pass the fd to fork_notify_startup() after it finishes its startup * sequence. * * If something goes wrong with the fork, logs a critical error and aborts the * process. */ static pid_t fork_and_wait_for_startup(int *fdp) { int fds[2]; pid_t pid; xpipe(fds); pid = fork_and_clean_up(); if (pid > 0) { /* Running in parent process. */ size_t bytes_read; char c; close(fds[1]); if (read_fully(fds[0], &c, 1, &bytes_read) != 0) { int retval; int status; do { retval = waitpid(pid, &status, 0); } while (retval == -1 && errno == EINTR); if (retval == pid) { if (WIFEXITED(status) && WEXITSTATUS(status)) { /* Child exited with an error. Convey the same error * to our parent process as a courtesy. */ exit(WEXITSTATUS(status)); } else { char *status_msg = process_status_msg(status); VLOG_FATAL("fork child died before signaling startup (%s)", status_msg); } } else if (retval < 0) { VLOG_FATAL("waitpid failed (%s)", ovs_strerror(errno)); } else { NOT_REACHED(); } } close(fds[0]); *fdp = -1; } else if (!pid) { /* Running in child process. */ close(fds[0]); *fdp = fds[1]; } return pid; } static void fork_notify_startup(int fd) { if (fd != -1) { size_t bytes_written; int error; error = write_fully(fd, "", 1, &bytes_written); if (error) { VLOG_FATAL("pipe write failed (%s)", ovs_strerror(error)); } close(fd); } } static bool should_restart(int status) { if (WIFSIGNALED(status)) { static const int error_signals[] = { SIGABRT, SIGALRM, SIGBUS, SIGFPE, SIGILL, SIGPIPE, SIGSEGV, SIGXCPU, SIGXFSZ }; size_t i; for (i = 0; i < ARRAY_SIZE(error_signals); i++) { if (error_signals[i] == WTERMSIG(status)) { return true; } } } return false; } static void monitor_daemon(pid_t daemon_pid) { /* XXX Should log daemon's stderr output at startup time. */ time_t last_restart; char *status_msg; int crashes; set_subprogram_name("monitor"); status_msg = xstrdup("healthy"); last_restart = TIME_MIN; crashes = 0; for (;;) { int retval; int status; proctitle_set("monitoring pid %lu (%s)", (unsigned long int) daemon_pid, status_msg); do { retval = waitpid(daemon_pid, &status, 0); } while (retval == -1 && errno == EINTR); if (retval == -1) { VLOG_FATAL("waitpid failed (%s)", ovs_strerror(errno)); } else if (retval == daemon_pid) { char *s = process_status_msg(status); if (should_restart(status)) { free(status_msg); status_msg = xasprintf("%d crashes: pid %lu died, %s", ++crashes, (unsigned long int) daemon_pid, s); free(s); if (WCOREDUMP(status)) { /* Disable further core dumps to save disk space. */ struct rlimit r; r.rlim_cur = 0; r.rlim_max = 0; if (setrlimit(RLIMIT_CORE, &r) == -1) { VLOG_WARN("failed to disable core dumps: %s", ovs_strerror(errno)); } } /* Throttle restarts to no more than once every 10 seconds. */ if (time(NULL) < last_restart + 10) { VLOG_WARN("%s, waiting until 10 seconds since last " "restart", status_msg); for (;;) { time_t now = time(NULL); time_t wakeup = last_restart + 10; if (now >= wakeup) { break; } sleep(wakeup - now); } } last_restart = time(NULL); VLOG_ERR("%s, restarting", status_msg); daemon_pid = fork_and_wait_for_startup(&daemonize_fd); if (!daemon_pid) { break; } } else { VLOG_INFO("pid %lu died, %s, exiting", (unsigned long int) daemon_pid, s); free(s); exit(0); } } } free(status_msg); /* Running in new daemon process. */ proctitle_restore(); set_subprogram_name(""); } /* Close standard file descriptors (except any that the client has requested we * leave open by calling daemon_save_fd()). If we're started from e.g. an SSH * session, then this keeps us from holding that session open artificially. */ static void close_standard_fds(void) { int null_fd = get_null_fd(); if (null_fd >= 0) { int fd; for (fd = 0; fd < 3; fd++) { if (!save_fds[fd]) { dup2(null_fd, fd); } } } /* Disable logging to stderr to avoid wasting CPU time. */ vlog_set_levels(NULL, VLF_CONSOLE, VLL_OFF); } /* If daemonization is configured, then starts daemonization, by forking and * returning in the child process. The parent process hangs around until the * child lets it know either that it completed startup successfully (by calling * daemon_complete()) or that it failed to start up (by exiting with a nonzero * exit code). */ void daemonize_start(void) { assert_single_threaded(); daemonize_fd = -1; if (detach) { if (fork_and_wait_for_startup(&daemonize_fd) > 0) { /* Running in parent process. */ exit(0); } /* Running in daemon or monitor process. */ setsid(); } if (monitor) { int saved_daemonize_fd = daemonize_fd; pid_t daemon_pid; daemon_pid = fork_and_wait_for_startup(&daemonize_fd); if (daemon_pid > 0) { /* Running in monitor process. */ fork_notify_startup(saved_daemonize_fd); close_standard_fds(); monitor_daemon(daemon_pid); } /* Running in daemon process. */ } forbid_forking("running in daemon process"); if (pidfile) { make_pidfile(); } /* Make sure that the unixctl commands for vlog get registered in a * daemon, even before the first log message. */ vlog_init(); } /* If daemonization is configured, then this function notifies the parent * process that the child process has completed startup successfully. It also * call daemonize_post_detach(). * * Calling this function more than once has no additional effect. */ void daemonize_complete(void) { if (pidfile) { free(pidfile); pidfile = NULL; } if (!detached) { detached = true; fork_notify_startup(daemonize_fd); daemonize_fd = -1; daemonize_post_detach(); } } /* If daemonization is configured, then this function does traditional Unix * daemonization behavior: join a new session, chdir to the root (if not * disabled), and close the standard file descriptors. * * It only makes sense to call this function as part of an implementation of a * special daemon subprocess. A normal daemon should just call * daemonize_complete(). */ void daemonize_post_detach(void) { if (detach) { if (chdir_) { ignore(chdir("/")); } close_standard_fds(); } } void daemon_usage(void) { printf( "\nDaemon options:\n" " --detach run in background as daemon\n" " --no-chdir do not chdir to '/'\n" " --pidfile[=FILE] create pidfile (default: %s/%s.pid)\n" " --overwrite-pidfile with --pidfile, start even if already " "running\n", ovs_rundir(), program_name); } static int lock_pidfile__(FILE *file, int command, struct flock *lck) { int error; lck->l_type = F_WRLCK; lck->l_whence = SEEK_SET; lck->l_start = 0; lck->l_len = 0; lck->l_pid = 0; do { error = fcntl(fileno(file), command, lck) == -1 ? errno : 0; } while (error == EINTR); return error; } static int lock_pidfile(FILE *file, int command) { struct flock lck; return lock_pidfile__(file, command, &lck); } static pid_t read_pidfile__(const char *pidfile, bool delete_if_stale) { struct stat s, s2; struct flock lck; char line[128]; FILE *file; int error; if ((pidfile_ino || pidfile_dev) && !stat(pidfile, &s) && s.st_ino == pidfile_ino && s.st_dev == pidfile_dev) { /* It's our own pidfile. We can't afford to open it, because closing * *any* fd for a file that a process has locked also releases all the * locks on that file. * * Fortunately, we know the associated pid anyhow: */ return getpid(); } file = fopen(pidfile, "r+"); if (!file) { if (errno == ENOENT && delete_if_stale) { return 0; } error = errno; VLOG_WARN("%s: open: %s", pidfile, ovs_strerror(error)); goto error; } error = lock_pidfile__(file, F_GETLK, &lck); if (error) { VLOG_WARN("%s: fcntl: %s", pidfile, ovs_strerror(error)); goto error; } if (lck.l_type == F_UNLCK) { /* pidfile exists but it isn't locked by anyone. We need to delete it * so that a new pidfile can go in its place. But just calling * unlink(pidfile) makes a nasty race: what if someone else unlinks it * before we do and then replaces it by a valid pidfile? We'd unlink * their valid pidfile. We do a little dance to avoid the race, by * locking the invalid pidfile. Only one process can have the invalid * pidfile locked, and only that process has the right to unlink it. */ if (!delete_if_stale) { error = ESRCH; VLOG_DBG("%s: pid file is stale", pidfile); goto error; } /* Get the lock. */ error = lock_pidfile(file, F_SETLK); if (error) { /* We lost a race with someone else doing the same thing. */ VLOG_WARN("%s: lost race to lock pidfile", pidfile); goto error; } /* Is the file we have locked still named 'pidfile'? */ if (stat(pidfile, &s) || fstat(fileno(file), &s2) || s.st_ino != s2.st_ino || s.st_dev != s2.st_dev) { /* No. We lost a race with someone else who got the lock before * us, deleted the pidfile, and closed it (releasing the lock). */ error = EALREADY; VLOG_WARN("%s: lost race to delete pidfile", pidfile); goto error; } /* We won the right to delete the stale pidfile. */ if (unlink(pidfile)) { error = errno; VLOG_WARN("%s: failed to delete stale pidfile (%s)", pidfile, ovs_strerror(error)); goto error; } VLOG_DBG("%s: deleted stale pidfile", pidfile); fclose(file); return 0; } if (!fgets(line, sizeof line, file)) { if (ferror(file)) { error = errno; VLOG_WARN("%s: read: %s", pidfile, ovs_strerror(error)); } else { error = ESRCH; VLOG_WARN("%s: read: unexpected end of file", pidfile); } goto error; } if (lck.l_pid != strtoul(line, NULL, 10)) { /* The process that has the pidfile locked is not the process that * created it. It must be stale, with the process that has it locked * preparing to delete it. */ error = ESRCH; VLOG_WARN("%s: stale pidfile for pid %s being deleted by pid %ld", pidfile, line, (long int) lck.l_pid); goto error; } fclose(file); return lck.l_pid; error: if (file) { fclose(file); } return -error; } /* Opens and reads a PID from 'pidfile'. Returns the positive PID if * successful, otherwise a negative errno value. */ pid_t read_pidfile(const char *pidfile) { return read_pidfile__(pidfile, false); } /* Checks whether a process with the given 'pidfile' is already running and, * if so, aborts. If 'pidfile' is stale, deletes it. */ static void check_already_running(void) { long int pid = read_pidfile__(pidfile, true); if (pid > 0) { VLOG_FATAL("%s: already running as pid %ld, aborting", pidfile, pid); } else if (pid < 0) { VLOG_FATAL("%s: pidfile check failed (%s), aborting", pidfile, ovs_strerror(-pid)); } } openvswitch-2.0.1+git20140120/lib/daemon.h000066400000000000000000000057631226605124000176470ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef DAEMON_H #define DAEMON_H 1 #include #include #include #define DAEMON_OPTION_ENUMS \ OPT_DETACH, \ OPT_NO_CHDIR, \ OPT_OVERWRITE_PIDFILE, \ OPT_PIDFILE, \ OPT_MONITOR #define DAEMON_LONG_OPTIONS \ {"detach", no_argument, NULL, OPT_DETACH}, \ {"no-chdir", no_argument, NULL, OPT_NO_CHDIR}, \ {"pidfile", optional_argument, NULL, OPT_PIDFILE}, \ {"overwrite-pidfile", no_argument, NULL, OPT_OVERWRITE_PIDFILE}, \ {"monitor", no_argument, NULL, OPT_MONITOR} #define DAEMON_OPTION_HANDLERS \ case OPT_DETACH: \ set_detach(); \ break; \ \ case OPT_NO_CHDIR: \ set_no_chdir(); \ break; \ \ case OPT_PIDFILE: \ set_pidfile(optarg); \ break; \ \ case OPT_OVERWRITE_PIDFILE: \ ignore_existing_pidfile(); \ break; \ \ case OPT_MONITOR: \ daemon_set_monitor(); \ break; char *make_pidfile_name(const char *name); void set_pidfile(const char *name); const char *get_pidfile(void); void set_no_chdir(void); bool is_chdir_enabled(void); void set_detach(void); bool get_detach(void); void daemon_set_monitor(void); void daemon_save_fd(int fd); void remove_pidfile_from_unlink(void); void add_pidfile_to_unlink(void); void daemonize(void); void daemonize_start(void); void daemonize_complete(void); void ignore_existing_pidfile(void); void daemon_usage(void); pid_t read_pidfile(const char *name); pid_t read_pidfile_if_exists(const char *name); pid_t fork_and_clean_up(void); void daemonize_post_detach(void); #endif /* daemon.h */ openvswitch-2.0.1+git20140120/lib/daemon.man000066400000000000000000000036171226605124000201670ustar00rootroot00000000000000.TP \fB\-\-pidfile\fR[\fB=\fIpidfile\fR] Causes a file (by default, \fB\*(PN.pid\fR) to be created indicating the PID of the running process. If the \fIpidfile\fR argument is not specified, or if it does not begin with \fB/\fR, then it is created in \fB@RUNDIR@\fR. .IP If \fB\-\-pidfile\fR is not specified, no pidfile is created. . .TP \fB\-\-overwrite\-pidfile\fR By default, when \fB\-\-pidfile\fR is specified and the specified pidfile already exists and is locked by a running process, \fB\*(PN\fR refuses to start. Specify \fB\-\-overwrite\-pidfile\fR to cause it to instead overwrite the pidfile. .IP When \fB\-\-pidfile\fR is not specified, this option has no effect. . .TP \fB\-\-detach\fR Causes \fB\*(PN\fR to detach itself from the foreground session and run as a background process. \*(DD . .TP \fB\-\-monitor\fR Creates an additional process to monitor the \fB\*(PN\fR daemon. If the daemon dies due to a signal that indicates a programming error (e.g. \fBSIGSEGV\fR, \fBSIGABRT\fR), then the monitor process starts a new copy of it. If the daemon die or exits for another reason, the monitor process exits. .IP This option is normally used with \fB\-\-detach\fR, but it also functions without it. . .TP \fB\-\-no\-chdir\fR By default, when \fB\-\-detach\fR is specified, \fB\*(PN\fR changes its current working directory to the root directory after it detaches. Otherwise, invoking \fB\*(PN\fR from a carelessly chosen directory would prevent the administrator from unmounting the file system that holds that directory. .IP Specifying \fB\-\-no\-chdir\fR suppresses this behavior, preventing \fB\*(PN\fR from changing its current working directory. This may be useful for collecting core files, since it is common behavior to write core dumps into the current working directory and the root directory is not a good directory to use. .IP This option has no effect when \fB\-\-detach\fR is not specified. openvswitch-2.0.1+git20140120/lib/dh1024.pem000066400000000000000000000007541226605124000176330ustar00rootroot00000000000000-----BEGIN DH PARAMETERS----- MIGHAoGBAPSI/VhOSdvNILSd5JEHNmszbDgNRR0PfIizHHxbLY7288kjwEPwpVsY jY67VYy4XTjTNP18F1dDox0YbN4zISy1Kv884bEpQBgRjXyEpwpy1obEAxnIByl6 ypUM2Zafq9AKUJsCRtMIPWakXUGfnHy9iUsiGSa6q6Jew1XpL3jHAgEC -----END DH PARAMETERS----- These are the 1024 bit DH parameters from "Assigned Number for SKIP Protocols" (http://www.skip-vpn.org/spec/numbers.html). See there for how they were generated. Note that g is not a generator, but this is not a problem since p is a safe prime. openvswitch-2.0.1+git20140120/lib/dh2048.pem000066400000000000000000000011141226605124000176310ustar00rootroot00000000000000-----BEGIN DH PARAMETERS----- MIIBCAKCAQEA9kJXtwh/CBdyorrWqULzBej5UxE5T7bxbrlLOCDaAadWoxTpj0BV 89AHxstDqZSt90xkhkn4DIO9ZekX1KHTUPj1WV/cdlJPPT2N286Z4VeSWc39uK50 T8X8dryDxUcwYc58yWb/Ffm7/ZFexwGq01uejaClcjrUGvC/RgBYK+X0iP1YTknb zSC0neSRBzZrM2w4DUUdD3yIsxx8Wy2O9vPJI8BD8KVbGI2Ou1WMuF040zT9fBdX Q6MdGGzeMyEstSr/POGxKUAYEY18hKcKctaGxAMZyAcpesqVDNmWn6vQClCbAkbT CD1mpF1Bn5x8vYlLIhkmuquiXsNV6TILOwIBAg== -----END DH PARAMETERS----- These are the 2048 bit DH parameters from "Assigned Number for SKIP Protocols" (http://www.skip-vpn.org/spec/numbers.html). See there for how they were generated. openvswitch-2.0.1+git20140120/lib/dh4096.pem000066400000000000000000000017701226605124000176460ustar00rootroot00000000000000-----BEGIN DH PARAMETERS----- MIICCAKCAgEA+hRyUsFN4VpJ1O8JLcCo/VWr19k3BCgJ4uk+d+KhehjdRqNDNyOQ l/MOyQNQfWXPeGKmOmIig6Ev/nm6Nf9Z2B1h3R4hExf+zTiHnvVPeRBhjdQi81rt Xeoh6TNrSBIKIHfUJWBh3va0TxxjQIs6IZOLeVNRLMqzeylWqMf49HsIXqbcokUS Vt1BkvLdW48j8PPv5DsKRN3tloTxqDJGo9tKvj1Fuk74A+Xda1kNhB7KFlqMyN98 VETEJ6c7KpfOo30mnK30wqw3S8OtaIR/maYX72tGOno2ehFDkq3pnPtEbD2CScxc alJC+EL7RPk5c/tgeTvCngvc1KZn92Y//EI7G9tPZtylj2b56sHtMftIoYJ9+ODM sccD5Piz/rejE3Ome8EOOceUSCYAhXn8b3qvxVI1ddd1pED6FHRhFvLrZxFvBEM9 ERRMp5QqOaHJkM+Dxv8Cj6MqrCbfC4u+ZErxodzuusgDgvZiLF22uxMZbobFWyte OvOzKGtwcTqO/1wV5gKkzu1ZVswVUQd5Gg8lJicwqRWyyNRczDDoG9jVDxmogKTH AaqLulO7R8Ifa1SwF2DteSGVtgWEN8gDpN3RBmmPTDngyF2DHb5qmpnznwtFKdTL KWbuHn491xNO25CQWMtem80uKw+pTnisBRF/454n1Jnhub144YRBoN8CAQI= -----END DH PARAMETERS----- These are the 4096 bit DH parameters from "Assigned Number for SKIP Protocols" (http://www.skip-vpn.org/spec/numbers.html). See there for how they were generated. Note that g is not a generator, but this is not a problem since p is a safe prime. openvswitch-2.0.1+git20140120/lib/dhcp.h000066400000000000000000000036671226605124000173230ustar00rootroot00000000000000/* * Copyright (c) 2008, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef DHCP_H #define DHCP_H 1 #include #include "packets.h" #include "util.h" /* Ports used by DHCP. */ #define DHCP_SERVER_PORT 67 /* Port used by DHCP server. */ #define DHCP_CLIENT_PORT 68 /* Port used by DHCP client. */ #define DHCP_HEADER_LEN 236 struct dhcp_header { uint8_t op; /* DHCP_BOOTREQUEST or DHCP_BOOTREPLY. */ uint8_t htype; /* ARP_HRD_ETHERNET (typically). */ uint8_t hlen; /* ETH_ADDR_LEN (typically). */ uint8_t hops; /* Hop count; set to 0 by client. */ ovs_be32 xid; /* Transaction ID. */ ovs_be16 secs; /* Since client started address acquisition. */ ovs_be16 flags; /* DHCP_FLAGS_*. */ ovs_be32 ciaddr; /* Client IP, if it has a lease for one. */ ovs_be32 yiaddr; /* Client ("your") IP address. */ ovs_be32 siaddr; /* Next server IP address. */ ovs_be32 giaddr; /* Relay agent IP address. */ uint8_t chaddr[16]; /* Client hardware address. */ char sname[64]; /* Optional server host name. */ char file[128]; /* Boot file name. */ /* Followed by variable-length options field. */ }; BUILD_ASSERT_DECL(DHCP_HEADER_LEN == sizeof(struct dhcp_header)); #endif /* dhcp.h */ openvswitch-2.0.1+git20140120/lib/dhparams.h000066400000000000000000000013671226605124000201770ustar00rootroot00000000000000/* * Copyright (c) 2008 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef DHPARAMS_H #define DHPARAMS_H 1 #include DH *get_dh1024(void); DH *get_dh2048(void); DH *get_dh4096(void); #endif /* dhparams.h */ openvswitch-2.0.1+git20140120/lib/dirs.c.in000066400000000000000000000052461226605124000177410ustar00rootroot00000000000000#line 2 "@srcdir@/lib/dirs.c.in" /* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "dirs.h" #include #include "ovs-thread.h" #include "util.h" struct directory { const char *value; /* Actual value; NULL if not yet determined. */ const char *default_value; /* Default value. */ const char *var_name; /* Environment variable to override default. */ struct ovsthread_once once; /* Ensures 'value' gets initialized once. */ }; static const char * get_dir(struct directory *d) { if (ovsthread_once_start(&d->once)) { d->value = getenv(d->var_name); if (!d->value || !d->value[0]) { d->value = d->default_value; } ovsthread_once_done(&d->once); } return d->value; } const char * ovs_sysconfdir(void) { static struct directory d = { NULL, @sysconfdir@, "OVS_SYSCONFDIR", OVSTHREAD_ONCE_INITIALIZER }; return get_dir(&d); } const char * ovs_pkgdatadir(void) { static struct directory d = { NULL, @pkgdatadir@, "OVS_PKGDATADIR", OVSTHREAD_ONCE_INITIALIZER }; return get_dir(&d); } const char * ovs_rundir(void) { static struct directory d = { NULL, @RUNDIR@, "OVS_RUNDIR", OVSTHREAD_ONCE_INITIALIZER }; return get_dir(&d); } const char * ovs_logdir(void) { static struct directory d = { NULL, @LOGDIR@, "OVS_LOGDIR", OVSTHREAD_ONCE_INITIALIZER }; return get_dir(&d); } const char * ovs_dbdir(void) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; static const char *dbdir; if (ovsthread_once_start(&once)) { dbdir = getenv("OVS_DBDIR"); if (!dbdir || !dbdir[0]) { char *sysconfdir = getenv("OVS_SYSCONFDIR"); dbdir = (sysconfdir ? xasprintf("%s/openvswitch", sysconfdir) : @DBDIR@); } ovsthread_once_done(&once); } return dbdir; } const char * ovs_bindir(void) { static struct directory d = { NULL, @bindir@, "OVS_BINDIR", OVSTHREAD_ONCE_INITIALIZER }; return get_dir(&d); } openvswitch-2.0.1+git20140120/lib/dirs.h000066400000000000000000000020541226605124000173330ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef DIRS_H #define DIRS_H 1 const char *ovs_sysconfdir(void); /* /usr/local/etc */ const char *ovs_pkgdatadir(void); /* /usr/local/share/openvswitch */ const char *ovs_rundir(void); /* /usr/local/var/run/openvswitch */ const char *ovs_logdir(void); /* /usr/local/var/log/openvswitch */ const char *ovs_dbdir(void); /* /usr/local/etc/openvswitch */ const char *ovs_bindir(void); /* /usr/local/bin */ #endif /* dirs.h */ openvswitch-2.0.1+git20140120/lib/dpif-linux.c000066400000000000000000001733071226605124000204560ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "dpif-linux.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "bitmap.h" #include "dpif-provider.h" #include "dynamic-string.h" #include "flow.h" #include "netdev.h" #include "netdev-linux.h" #include "netdev-vport.h" #include "netlink-notifier.h" #include "netlink-socket.h" #include "netlink.h" #include "odp-util.h" #include "ofpbuf.h" #include "packets.h" #include "poll-loop.h" #include "random.h" #include "shash.h" #include "sset.h" #include "timeval.h" #include "unaligned.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(dpif_linux); enum { MAX_PORTS = USHRT_MAX }; /* This ethtool flag was introduced in Linux 2.6.24, so it might be * missing if we have old headers. */ #define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */ struct dpif_linux_dp { /* Generic Netlink header. */ uint8_t cmd; /* struct ovs_header. */ int dp_ifindex; /* Attributes. */ const char *name; /* OVS_DP_ATTR_NAME. */ const uint32_t *upcall_pid; /* OVS_DP_UPCALL_PID. */ struct ovs_dp_stats stats; /* OVS_DP_ATTR_STATS. */ }; static void dpif_linux_dp_init(struct dpif_linux_dp *); static int dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *, const struct ofpbuf *); static void dpif_linux_dp_dump_start(struct nl_dump *); static int dpif_linux_dp_transact(const struct dpif_linux_dp *request, struct dpif_linux_dp *reply, struct ofpbuf **bufp); static int dpif_linux_dp_get(const struct dpif *, struct dpif_linux_dp *reply, struct ofpbuf **bufp); struct dpif_linux_flow { /* Generic Netlink header. */ uint8_t cmd; /* struct ovs_header. */ unsigned int nlmsg_flags; int dp_ifindex; /* Attributes. * * The 'stats' member points to 64-bit data that might only be aligned on * 32-bit boundaries, so get_unaligned_u64() should be used to access its * values. * * If 'actions' is nonnull then OVS_FLOW_ATTR_ACTIONS will be included in * the Netlink version of the command, even if actions_len is zero. */ const struct nlattr *key; /* OVS_FLOW_ATTR_KEY. */ size_t key_len; const struct nlattr *mask; /* OVS_FLOW_ATTR_MASK. */ size_t mask_len; const struct nlattr *actions; /* OVS_FLOW_ATTR_ACTIONS. */ size_t actions_len; const struct ovs_flow_stats *stats; /* OVS_FLOW_ATTR_STATS. */ const uint8_t *tcp_flags; /* OVS_FLOW_ATTR_TCP_FLAGS. */ const ovs_32aligned_u64 *used; /* OVS_FLOW_ATTR_USED. */ bool clear; /* OVS_FLOW_ATTR_CLEAR. */ }; static void dpif_linux_flow_init(struct dpif_linux_flow *); static int dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *, const struct ofpbuf *); static void dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *, struct ofpbuf *); static int dpif_linux_flow_transact(struct dpif_linux_flow *request, struct dpif_linux_flow *reply, struct ofpbuf **bufp); static void dpif_linux_flow_get_stats(const struct dpif_linux_flow *, struct dpif_flow_stats *); /* One of the dpif channels between the kernel and userspace. */ struct dpif_channel { struct nl_sock *sock; /* Netlink socket. */ long long int last_poll; /* Last time this channel was polled. */ }; static void report_loss(struct dpif *, struct dpif_channel *); /* Datapath interface for the openvswitch Linux kernel module. */ struct dpif_linux { struct dpif dpif; int dp_ifindex; /* Upcall messages. */ struct ovs_mutex upcall_lock; int uc_array_size; /* Size of 'channels' and 'epoll_events'. */ struct dpif_channel *channels; struct epoll_event *epoll_events; int epoll_fd; /* epoll fd that includes channel socks. */ int n_events; /* Num events returned by epoll_wait(). */ int event_offset; /* Offset into 'epoll_events'. */ /* Change notification. */ struct nl_sock *port_notifier; /* vport multicast group subscriber. */ }; static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(9999, 5); /* Generic Netlink family numbers for OVS. * * Initialized by dpif_linux_init(). */ static int ovs_datapath_family; static int ovs_vport_family; static int ovs_flow_family; static int ovs_packet_family; /* Generic Netlink multicast groups for OVS. * * Initialized by dpif_linux_init(). */ static unsigned int ovs_vport_mcgroup; static int dpif_linux_init(void); static int open_dpif(const struct dpif_linux_dp *, struct dpif **); static uint32_t dpif_linux_port_get_pid(const struct dpif *, odp_port_t port_no); static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *, struct ofpbuf *); static int dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *, const struct ofpbuf *); static struct dpif_linux * dpif_linux_cast(const struct dpif *dpif) { dpif_assert_class(dpif, &dpif_linux_class); return CONTAINER_OF(dpif, struct dpif_linux, dpif); } static int dpif_linux_enumerate(struct sset *all_dps) { struct nl_dump dump; struct ofpbuf msg; int error; error = dpif_linux_init(); if (error) { return error; } dpif_linux_dp_dump_start(&dump); while (nl_dump_next(&dump, &msg)) { struct dpif_linux_dp dp; if (!dpif_linux_dp_from_ofpbuf(&dp, &msg)) { sset_add(all_dps, dp.name); } } return nl_dump_done(&dump); } static int dpif_linux_open(const struct dpif_class *class OVS_UNUSED, const char *name, bool create, struct dpif **dpifp) { struct dpif_linux_dp dp_request, dp; struct ofpbuf *buf; uint32_t upcall_pid; int error; error = dpif_linux_init(); if (error) { return error; } /* Create or look up datapath. */ dpif_linux_dp_init(&dp_request); if (create) { dp_request.cmd = OVS_DP_CMD_NEW; upcall_pid = 0; dp_request.upcall_pid = &upcall_pid; } else { dp_request.cmd = OVS_DP_CMD_GET; } dp_request.name = name; error = dpif_linux_dp_transact(&dp_request, &dp, &buf); if (error) { return error; } error = open_dpif(&dp, dpifp); ofpbuf_delete(buf); return error; } static int open_dpif(const struct dpif_linux_dp *dp, struct dpif **dpifp) { struct dpif_linux *dpif; dpif = xzalloc(sizeof *dpif); dpif->port_notifier = NULL; ovs_mutex_init(&dpif->upcall_lock); dpif->epoll_fd = -1; dpif_init(&dpif->dpif, &dpif_linux_class, dp->name, dp->dp_ifindex, dp->dp_ifindex); dpif->dp_ifindex = dp->dp_ifindex; *dpifp = &dpif->dpif; return 0; } static void destroy_channels(struct dpif_linux *dpif) { unsigned int i; if (dpif->epoll_fd < 0) { return; } for (i = 0; i < dpif->uc_array_size; i++ ) { struct dpif_linux_vport vport_request; struct dpif_channel *ch = &dpif->channels[i]; uint32_t upcall_pid = 0; if (!ch->sock) { continue; } epoll_ctl(dpif->epoll_fd, EPOLL_CTL_DEL, nl_sock_fd(ch->sock), NULL); /* Turn off upcalls. */ dpif_linux_vport_init(&vport_request); vport_request.cmd = OVS_VPORT_CMD_SET; vport_request.dp_ifindex = dpif->dp_ifindex; vport_request.port_no = u32_to_odp(i); vport_request.upcall_pid = &upcall_pid; dpif_linux_vport_transact(&vport_request, NULL, NULL); nl_sock_destroy(ch->sock); } free(dpif->channels); dpif->channels = NULL; dpif->uc_array_size = 0; free(dpif->epoll_events); dpif->epoll_events = NULL; dpif->n_events = dpif->event_offset = 0; /* Don't close dpif->epoll_fd since that would cause other threads that * call dpif_recv_wait(dpif) to wait on an arbitrary fd or a closed fd. */ } static int add_channel(struct dpif_linux *dpif, odp_port_t port_no, struct nl_sock *sock) { struct epoll_event event; uint32_t port_idx = odp_to_u32(port_no); if (dpif->epoll_fd < 0) { return 0; } /* We assume that the datapath densely chooses port numbers, which * can therefore be used as an index into an array of channels. */ if (port_idx >= dpif->uc_array_size) { uint32_t new_size = port_idx + 1; uint32_t i; if (new_size > MAX_PORTS) { VLOG_WARN_RL(&error_rl, "%s: datapath port %"PRIu32" too big", dpif_name(&dpif->dpif), port_no); return EFBIG; } dpif->channels = xrealloc(dpif->channels, new_size * sizeof *dpif->channels); for (i = dpif->uc_array_size; i < new_size; i++) { dpif->channels[i].sock = NULL; } dpif->epoll_events = xrealloc(dpif->epoll_events, new_size * sizeof *dpif->epoll_events); dpif->uc_array_size = new_size; } memset(&event, 0, sizeof event); event.events = EPOLLIN; event.data.u32 = port_idx; if (epoll_ctl(dpif->epoll_fd, EPOLL_CTL_ADD, nl_sock_fd(sock), &event) < 0) { return errno; } nl_sock_destroy(dpif->channels[port_idx].sock); dpif->channels[port_idx].sock = sock; dpif->channels[port_idx].last_poll = LLONG_MIN; return 0; } static void del_channel(struct dpif_linux *dpif, odp_port_t port_no) { struct dpif_channel *ch; uint32_t port_idx = odp_to_u32(port_no); if (dpif->epoll_fd < 0 || port_idx >= dpif->uc_array_size) { return; } ch = &dpif->channels[port_idx]; if (!ch->sock) { return; } epoll_ctl(dpif->epoll_fd, EPOLL_CTL_DEL, nl_sock_fd(ch->sock), NULL); dpif->event_offset = dpif->n_events = 0; nl_sock_destroy(ch->sock); ch->sock = NULL; } static void dpif_linux_close(struct dpif *dpif_) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); nl_sock_destroy(dpif->port_notifier); destroy_channels(dpif); if (dpif->epoll_fd >= 0) { close(dpif->epoll_fd); } ovs_mutex_destroy(&dpif->upcall_lock); free(dpif); } static int dpif_linux_destroy(struct dpif *dpif_) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); struct dpif_linux_dp dp; dpif_linux_dp_init(&dp); dp.cmd = OVS_DP_CMD_DEL; dp.dp_ifindex = dpif->dp_ifindex; return dpif_linux_dp_transact(&dp, NULL, NULL); } static int dpif_linux_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats) { struct dpif_linux_dp dp; struct ofpbuf *buf; int error; error = dpif_linux_dp_get(dpif_, &dp, &buf); if (!error) { stats->n_hit = dp.stats.n_hit; stats->n_missed = dp.stats.n_missed; stats->n_lost = dp.stats.n_lost; stats->n_flows = dp.stats.n_flows; ofpbuf_delete(buf); } return error; } static const char * get_vport_type(const struct dpif_linux_vport *vport) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); switch (vport->type) { case OVS_VPORT_TYPE_NETDEV: return "system"; case OVS_VPORT_TYPE_INTERNAL: return "internal"; case OVS_VPORT_TYPE_GRE: return "gre"; case OVS_VPORT_TYPE_GRE64: return "gre64"; case OVS_VPORT_TYPE_VXLAN: return "vxlan"; case OVS_VPORT_TYPE_LISP: return "lisp"; case OVS_VPORT_TYPE_UNSPEC: case __OVS_VPORT_TYPE_MAX: break; } VLOG_WARN_RL(&rl, "dp%d: port `%s' has unsupported type %u", vport->dp_ifindex, vport->name, (unsigned int) vport->type); return "unknown"; } static enum ovs_vport_type netdev_to_ovs_vport_type(const struct netdev *netdev) { const char *type = netdev_get_type(netdev); if (!strcmp(type, "tap") || !strcmp(type, "system")) { return OVS_VPORT_TYPE_NETDEV; } else if (!strcmp(type, "internal")) { return OVS_VPORT_TYPE_INTERNAL; } else if (strstr(type, "gre64")) { return OVS_VPORT_TYPE_GRE64; } else if (strstr(type, "gre")) { return OVS_VPORT_TYPE_GRE; } else if (!strcmp(type, "vxlan")) { return OVS_VPORT_TYPE_VXLAN; } else if (!strcmp(type, "lisp")) { return OVS_VPORT_TYPE_LISP; } else { return OVS_VPORT_TYPE_UNSPEC; } } static int dpif_linux_port_add__(struct dpif *dpif_, struct netdev *netdev, odp_port_t *port_nop) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); const struct netdev_tunnel_config *tnl_cfg; char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; const char *name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf); const char *type = netdev_get_type(netdev); struct dpif_linux_vport request, reply; struct nl_sock *sock = NULL; uint32_t upcall_pid; struct ofpbuf *buf; uint64_t options_stub[64 / 8]; struct ofpbuf options; int error; if (dpif->epoll_fd >= 0) { error = nl_sock_create(NETLINK_GENERIC, &sock); if (error) { return error; } } dpif_linux_vport_init(&request); request.cmd = OVS_VPORT_CMD_NEW; request.dp_ifindex = dpif->dp_ifindex; request.type = netdev_to_ovs_vport_type(netdev); if (request.type == OVS_VPORT_TYPE_UNSPEC) { VLOG_WARN_RL(&error_rl, "%s: cannot create port `%s' because it has " "unsupported type `%s'", dpif_name(dpif_), name, type); nl_sock_destroy(sock); return EINVAL; } request.name = name; if (request.type == OVS_VPORT_TYPE_NETDEV) { netdev_linux_ethtool_set_flag(netdev, ETH_FLAG_LRO, "LRO", false); } tnl_cfg = netdev_get_tunnel_config(netdev); if (tnl_cfg && tnl_cfg->dst_port != 0) { ofpbuf_use_stack(&options, options_stub, sizeof options_stub); nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT, ntohs(tnl_cfg->dst_port)); request.options = options.data; request.options_len = options.size; } request.port_no = *port_nop; upcall_pid = sock ? nl_sock_pid(sock) : 0; request.upcall_pid = &upcall_pid; error = dpif_linux_vport_transact(&request, &reply, &buf); if (!error) { *port_nop = reply.port_no; VLOG_DBG("%s: assigning port %"PRIu32" to netlink pid %"PRIu32, dpif_name(dpif_), reply.port_no, upcall_pid); } else { if (error == EBUSY && *port_nop != ODPP_NONE) { VLOG_INFO("%s: requested port %"PRIu32" is in use", dpif_name(dpif_), *port_nop); } nl_sock_destroy(sock); ofpbuf_delete(buf); return error; } ofpbuf_delete(buf); if (sock) { error = add_channel(dpif, *port_nop, sock); if (error) { VLOG_INFO("%s: could not add channel for port %s", dpif_name(dpif_), name); /* Delete the port. */ dpif_linux_vport_init(&request); request.cmd = OVS_VPORT_CMD_DEL; request.dp_ifindex = dpif->dp_ifindex; request.port_no = *port_nop; dpif_linux_vport_transact(&request, NULL, NULL); nl_sock_destroy(sock); return error; } } return 0; } static int dpif_linux_port_add(struct dpif *dpif_, struct netdev *netdev, odp_port_t *port_nop) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); int error; ovs_mutex_lock(&dpif->upcall_lock); error = dpif_linux_port_add__(dpif_, netdev, port_nop); ovs_mutex_unlock(&dpif->upcall_lock); return error; } static int dpif_linux_port_del__(struct dpif *dpif_, odp_port_t port_no) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); struct dpif_linux_vport vport; int error; dpif_linux_vport_init(&vport); vport.cmd = OVS_VPORT_CMD_DEL; vport.dp_ifindex = dpif->dp_ifindex; vport.port_no = port_no; error = dpif_linux_vport_transact(&vport, NULL, NULL); del_channel(dpif, port_no); return error; } static int dpif_linux_port_del(struct dpif *dpif_, odp_port_t port_no) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); int error; ovs_mutex_lock(&dpif->upcall_lock); error = dpif_linux_port_del__(dpif_, port_no); ovs_mutex_unlock(&dpif->upcall_lock); return error; } static int dpif_linux_port_query__(const struct dpif *dpif, odp_port_t port_no, const char *port_name, struct dpif_port *dpif_port) { struct dpif_linux_vport request; struct dpif_linux_vport reply; struct ofpbuf *buf; int error; dpif_linux_vport_init(&request); request.cmd = OVS_VPORT_CMD_GET; request.dp_ifindex = dpif_linux_cast(dpif)->dp_ifindex; request.port_no = port_no; request.name = port_name; error = dpif_linux_vport_transact(&request, &reply, &buf); if (!error) { if (reply.dp_ifindex != request.dp_ifindex) { /* A query by name reported that 'port_name' is in some datapath * other than 'dpif', but the caller wants to know about 'dpif'. */ error = ENODEV; } else if (dpif_port) { dpif_port->name = xstrdup(reply.name); dpif_port->type = xstrdup(get_vport_type(&reply)); dpif_port->port_no = reply.port_no; } ofpbuf_delete(buf); } return error; } static int dpif_linux_port_query_by_number(const struct dpif *dpif, odp_port_t port_no, struct dpif_port *dpif_port) { return dpif_linux_port_query__(dpif, port_no, NULL, dpif_port); } static int dpif_linux_port_query_by_name(const struct dpif *dpif, const char *devname, struct dpif_port *dpif_port) { return dpif_linux_port_query__(dpif, 0, devname, dpif_port); } static uint32_t dpif_linux_get_max_ports(const struct dpif *dpif OVS_UNUSED) { return MAX_PORTS; } static uint32_t dpif_linux_port_get_pid(const struct dpif *dpif_, odp_port_t port_no) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); uint32_t port_idx = odp_to_u32(port_no); uint32_t pid = 0; ovs_mutex_lock(&dpif->upcall_lock); if (dpif->epoll_fd >= 0) { /* The ODPP_NONE "reserved" port number uses the "ovs-system"'s * channel, since it is not heavily loaded. */ uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx; pid = nl_sock_pid(dpif->channels[idx].sock); } ovs_mutex_unlock(&dpif->upcall_lock); return pid; } static int dpif_linux_flow_flush(struct dpif *dpif_) { const struct dpif_linux *dpif = dpif_linux_cast(dpif_); struct dpif_linux_flow flow; dpif_linux_flow_init(&flow); flow.cmd = OVS_FLOW_CMD_DEL; flow.dp_ifindex = dpif->dp_ifindex; return dpif_linux_flow_transact(&flow, NULL, NULL); } struct dpif_linux_port_state { struct nl_dump dump; }; static int dpif_linux_port_dump_start(const struct dpif *dpif_, void **statep) { const struct dpif_linux *dpif = dpif_linux_cast(dpif_); struct dpif_linux_port_state *state; struct dpif_linux_vport request; struct ofpbuf *buf; *statep = state = xmalloc(sizeof *state); dpif_linux_vport_init(&request); request.cmd = OVS_DP_CMD_GET; request.dp_ifindex = dpif->dp_ifindex; buf = ofpbuf_new(1024); dpif_linux_vport_to_ofpbuf(&request, buf); nl_dump_start(&state->dump, NETLINK_GENERIC, buf); ofpbuf_delete(buf); return 0; } static int dpif_linux_port_dump_next(const struct dpif *dpif OVS_UNUSED, void *state_, struct dpif_port *dpif_port) { struct dpif_linux_port_state *state = state_; struct dpif_linux_vport vport; struct ofpbuf buf; int error; if (!nl_dump_next(&state->dump, &buf)) { return EOF; } error = dpif_linux_vport_from_ofpbuf(&vport, &buf); if (error) { return error; } dpif_port->name = CONST_CAST(char *, vport.name); dpif_port->type = CONST_CAST(char *, get_vport_type(&vport)); dpif_port->port_no = vport.port_no; return 0; } static int dpif_linux_port_dump_done(const struct dpif *dpif_ OVS_UNUSED, void *state_) { struct dpif_linux_port_state *state = state_; int error = nl_dump_done(&state->dump); free(state); return error; } static int dpif_linux_port_poll(const struct dpif *dpif_, char **devnamep) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); /* Lazily create the Netlink socket to listen for notifications. */ if (!dpif->port_notifier) { struct nl_sock *sock; int error; error = nl_sock_create(NETLINK_GENERIC, &sock); if (error) { return error; } error = nl_sock_join_mcgroup(sock, ovs_vport_mcgroup); if (error) { nl_sock_destroy(sock); return error; } dpif->port_notifier = sock; /* We have no idea of the current state so report that everything * changed. */ return ENOBUFS; } for (;;) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); uint64_t buf_stub[4096 / 8]; struct ofpbuf buf; int error; ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); error = nl_sock_recv(dpif->port_notifier, &buf, false); if (!error) { struct dpif_linux_vport vport; error = dpif_linux_vport_from_ofpbuf(&vport, &buf); if (!error) { if (vport.dp_ifindex == dpif->dp_ifindex && (vport.cmd == OVS_VPORT_CMD_NEW || vport.cmd == OVS_VPORT_CMD_DEL || vport.cmd == OVS_VPORT_CMD_SET)) { VLOG_DBG("port_changed: dpif:%s vport:%s cmd:%"PRIu8, dpif->dpif.full_name, vport.name, vport.cmd); *devnamep = xstrdup(vport.name); ofpbuf_uninit(&buf); return 0; } } } else if (error != EAGAIN) { VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)", ovs_strerror(error)); nl_sock_drain(dpif->port_notifier); error = ENOBUFS; } ofpbuf_uninit(&buf); if (error) { return error; } } } static void dpif_linux_port_poll_wait(const struct dpif *dpif_) { const struct dpif_linux *dpif = dpif_linux_cast(dpif_); if (dpif->port_notifier) { nl_sock_wait(dpif->port_notifier, POLLIN); } else { poll_immediate_wake(); } } static int dpif_linux_flow_get__(const struct dpif *dpif_, const struct nlattr *key, size_t key_len, struct dpif_linux_flow *reply, struct ofpbuf **bufp) { const struct dpif_linux *dpif = dpif_linux_cast(dpif_); struct dpif_linux_flow request; dpif_linux_flow_init(&request); request.cmd = OVS_FLOW_CMD_GET; request.dp_ifindex = dpif->dp_ifindex; request.key = key; request.key_len = key_len; return dpif_linux_flow_transact(&request, reply, bufp); } static int dpif_linux_flow_get(const struct dpif *dpif_, const struct nlattr *key, size_t key_len, struct ofpbuf **actionsp, struct dpif_flow_stats *stats) { struct dpif_linux_flow reply; struct ofpbuf *buf; int error; error = dpif_linux_flow_get__(dpif_, key, key_len, &reply, &buf); if (!error) { if (stats) { dpif_linux_flow_get_stats(&reply, stats); } if (actionsp) { buf->data = CONST_CAST(struct nlattr *, reply.actions); buf->size = reply.actions_len; *actionsp = buf; } else { ofpbuf_delete(buf); } } return error; } static void dpif_linux_init_flow_put(struct dpif *dpif_, const struct dpif_flow_put *put, struct dpif_linux_flow *request) { static const struct nlattr dummy_action; const struct dpif_linux *dpif = dpif_linux_cast(dpif_); dpif_linux_flow_init(request); request->cmd = (put->flags & DPIF_FP_CREATE ? OVS_FLOW_CMD_NEW : OVS_FLOW_CMD_SET); request->dp_ifindex = dpif->dp_ifindex; request->key = put->key; request->key_len = put->key_len; request->mask = put->mask; request->mask_len = put->mask_len; /* Ensure that OVS_FLOW_ATTR_ACTIONS will always be included. */ request->actions = (put->actions ? put->actions : CONST_CAST(struct nlattr *, &dummy_action)); request->actions_len = put->actions_len; if (put->flags & DPIF_FP_ZERO_STATS) { request->clear = true; } request->nlmsg_flags = put->flags & DPIF_FP_MODIFY ? 0 : NLM_F_CREATE; } static int dpif_linux_flow_put(struct dpif *dpif_, const struct dpif_flow_put *put) { struct dpif_linux_flow request, reply; struct ofpbuf *buf; int error; dpif_linux_init_flow_put(dpif_, put, &request); error = dpif_linux_flow_transact(&request, put->stats ? &reply : NULL, put->stats ? &buf : NULL); if (!error && put->stats) { dpif_linux_flow_get_stats(&reply, put->stats); ofpbuf_delete(buf); } return error; } static void dpif_linux_init_flow_del(struct dpif *dpif_, const struct dpif_flow_del *del, struct dpif_linux_flow *request) { const struct dpif_linux *dpif = dpif_linux_cast(dpif_); dpif_linux_flow_init(request); request->cmd = OVS_FLOW_CMD_DEL; request->dp_ifindex = dpif->dp_ifindex; request->key = del->key; request->key_len = del->key_len; } static int dpif_linux_flow_del(struct dpif *dpif_, const struct dpif_flow_del *del) { struct dpif_linux_flow request, reply; struct ofpbuf *buf; int error; dpif_linux_init_flow_del(dpif_, del, &request); error = dpif_linux_flow_transact(&request, del->stats ? &reply : NULL, del->stats ? &buf : NULL); if (!error && del->stats) { dpif_linux_flow_get_stats(&reply, del->stats); ofpbuf_delete(buf); } return error; } struct dpif_linux_flow_state { struct nl_dump dump; struct dpif_linux_flow flow; struct dpif_flow_stats stats; struct ofpbuf *buf; }; static int dpif_linux_flow_dump_start(const struct dpif *dpif_, void **statep) { const struct dpif_linux *dpif = dpif_linux_cast(dpif_); struct dpif_linux_flow_state *state; struct dpif_linux_flow request; struct ofpbuf *buf; *statep = state = xmalloc(sizeof *state); dpif_linux_flow_init(&request); request.cmd = OVS_DP_CMD_GET; request.dp_ifindex = dpif->dp_ifindex; buf = ofpbuf_new(1024); dpif_linux_flow_to_ofpbuf(&request, buf); nl_dump_start(&state->dump, NETLINK_GENERIC, buf); ofpbuf_delete(buf); state->buf = NULL; return 0; } static int dpif_linux_flow_dump_next(const struct dpif *dpif_ OVS_UNUSED, void *state_, const struct nlattr **key, size_t *key_len, const struct nlattr **mask, size_t *mask_len, const struct nlattr **actions, size_t *actions_len, const struct dpif_flow_stats **stats) { struct dpif_linux_flow_state *state = state_; struct ofpbuf buf; int error; do { ofpbuf_delete(state->buf); state->buf = NULL; if (!nl_dump_next(&state->dump, &buf)) { return EOF; } error = dpif_linux_flow_from_ofpbuf(&state->flow, &buf); if (error) { return error; } if (actions && !state->flow.actions) { error = dpif_linux_flow_get__(dpif_, state->flow.key, state->flow.key_len, &state->flow, &state->buf); if (error == ENOENT) { VLOG_DBG("dumped flow disappeared on get"); } else if (error) { VLOG_WARN("error fetching dumped flow: %s", ovs_strerror(error)); } } } while (error); if (actions) { *actions = state->flow.actions; *actions_len = state->flow.actions_len; } if (key) { *key = state->flow.key; *key_len = state->flow.key_len; } if (mask) { *mask = state->flow.mask; *mask_len = state->flow.mask ? state->flow.mask_len : 0; } if (stats) { dpif_linux_flow_get_stats(&state->flow, &state->stats); *stats = &state->stats; } return error; } static int dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) { struct dpif_linux_flow_state *state = state_; int error = nl_dump_done(&state->dump); ofpbuf_delete(state->buf); free(state); return error; } static void dpif_linux_encode_execute(int dp_ifindex, const struct dpif_execute *d_exec, struct ofpbuf *buf) { struct ovs_header *k_exec; ofpbuf_prealloc_tailroom(buf, (64 + d_exec->packet->size + d_exec->key_len + d_exec->actions_len)); nl_msg_put_genlmsghdr(buf, 0, ovs_packet_family, NLM_F_REQUEST, OVS_PACKET_CMD_EXECUTE, OVS_PACKET_VERSION); k_exec = ofpbuf_put_uninit(buf, sizeof *k_exec); k_exec->dp_ifindex = dp_ifindex; nl_msg_put_unspec(buf, OVS_PACKET_ATTR_PACKET, d_exec->packet->data, d_exec->packet->size); nl_msg_put_unspec(buf, OVS_PACKET_ATTR_KEY, d_exec->key, d_exec->key_len); nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS, d_exec->actions, d_exec->actions_len); } static int dpif_linux_execute__(int dp_ifindex, const struct dpif_execute *execute) { uint64_t request_stub[1024 / 8]; struct ofpbuf request; int error; ofpbuf_use_stub(&request, request_stub, sizeof request_stub); dpif_linux_encode_execute(dp_ifindex, execute, &request); error = nl_transact(NETLINK_GENERIC, &request, NULL); ofpbuf_uninit(&request); return error; } static int dpif_linux_execute(struct dpif *dpif_, const struct dpif_execute *execute) { const struct dpif_linux *dpif = dpif_linux_cast(dpif_); return dpif_linux_execute__(dpif->dp_ifindex, execute); } #define MAX_OPS 50 static void dpif_linux_operate__(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops) { const struct dpif_linux *dpif = dpif_linux_cast(dpif_); struct op_auxdata { struct nl_transaction txn; struct ofpbuf request; uint64_t request_stub[1024 / 8]; struct ofpbuf reply; uint64_t reply_stub[1024 / 8]; } auxes[MAX_OPS]; struct nl_transaction *txnsp[MAX_OPS]; size_t i; ovs_assert(n_ops <= MAX_OPS); for (i = 0; i < n_ops; i++) { struct op_auxdata *aux = &auxes[i]; struct dpif_op *op = ops[i]; struct dpif_flow_put *put; struct dpif_flow_del *del; struct dpif_execute *execute; struct dpif_linux_flow flow; ofpbuf_use_stub(&aux->request, aux->request_stub, sizeof aux->request_stub); aux->txn.request = &aux->request; ofpbuf_use_stub(&aux->reply, aux->reply_stub, sizeof aux->reply_stub); aux->txn.reply = NULL; switch (op->type) { case DPIF_OP_FLOW_PUT: put = &op->u.flow_put; dpif_linux_init_flow_put(dpif_, put, &flow); if (put->stats) { flow.nlmsg_flags |= NLM_F_ECHO; aux->txn.reply = &aux->reply; } dpif_linux_flow_to_ofpbuf(&flow, &aux->request); break; case DPIF_OP_FLOW_DEL: del = &op->u.flow_del; dpif_linux_init_flow_del(dpif_, del, &flow); if (del->stats) { flow.nlmsg_flags |= NLM_F_ECHO; aux->txn.reply = &aux->reply; } dpif_linux_flow_to_ofpbuf(&flow, &aux->request); break; case DPIF_OP_EXECUTE: execute = &op->u.execute; dpif_linux_encode_execute(dpif->dp_ifindex, execute, &aux->request); break; default: NOT_REACHED(); } } for (i = 0; i < n_ops; i++) { txnsp[i] = &auxes[i].txn; } nl_transact_multiple(NETLINK_GENERIC, txnsp, n_ops); for (i = 0; i < n_ops; i++) { struct op_auxdata *aux = &auxes[i]; struct nl_transaction *txn = &auxes[i].txn; struct dpif_op *op = ops[i]; struct dpif_flow_put *put; struct dpif_flow_del *del; op->error = txn->error; switch (op->type) { case DPIF_OP_FLOW_PUT: put = &op->u.flow_put; if (put->stats) { if (!op->error) { struct dpif_linux_flow reply; op->error = dpif_linux_flow_from_ofpbuf(&reply, txn->reply); if (!op->error) { dpif_linux_flow_get_stats(&reply, put->stats); } } if (op->error) { memset(put->stats, 0, sizeof *put->stats); } } break; case DPIF_OP_FLOW_DEL: del = &op->u.flow_del; if (del->stats) { if (!op->error) { struct dpif_linux_flow reply; op->error = dpif_linux_flow_from_ofpbuf(&reply, txn->reply); if (!op->error) { dpif_linux_flow_get_stats(&reply, del->stats); } } if (op->error) { memset(del->stats, 0, sizeof *del->stats); } } break; case DPIF_OP_EXECUTE: break; default: NOT_REACHED(); } ofpbuf_uninit(&aux->request); ofpbuf_uninit(&aux->reply); } } static void dpif_linux_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) { while (n_ops > 0) { size_t chunk = MIN(n_ops, MAX_OPS); dpif_linux_operate__(dpif, ops, chunk); ops += chunk; n_ops -= chunk; } } static int dpif_linux_recv_set__(struct dpif *dpif_, bool enable) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); if ((dpif->epoll_fd >= 0) == enable) { return 0; } if (!enable) { destroy_channels(dpif); } else { struct dpif_port_dump port_dump; struct dpif_port port; if (dpif->epoll_fd < 0) { dpif->epoll_fd = epoll_create(10); if (dpif->epoll_fd < 0) { return errno; } } DPIF_PORT_FOR_EACH (&port, &port_dump, &dpif->dpif) { struct dpif_linux_vport vport_request; struct nl_sock *sock; uint32_t upcall_pid; int error; error = nl_sock_create(NETLINK_GENERIC, &sock); if (error) { return error; } upcall_pid = nl_sock_pid(sock); dpif_linux_vport_init(&vport_request); vport_request.cmd = OVS_VPORT_CMD_SET; vport_request.dp_ifindex = dpif->dp_ifindex; vport_request.port_no = port.port_no; vport_request.upcall_pid = &upcall_pid; error = dpif_linux_vport_transact(&vport_request, NULL, NULL); if (!error) { VLOG_DBG("%s: assigning port %"PRIu32" to netlink pid %"PRIu32, dpif_name(&dpif->dpif), vport_request.port_no, upcall_pid); } else { VLOG_WARN_RL(&error_rl, "%s: failed to set upcall pid on port: %s", dpif_name(&dpif->dpif), ovs_strerror(error)); nl_sock_destroy(sock); if (error == ENODEV || error == ENOENT) { /* This device isn't there, but keep trying the others. */ continue; } else { return error; } } error = add_channel(dpif, port.port_no, sock); if (error) { VLOG_INFO("%s: could not add channel for port %s", dpif_name(dpif_), port.name); nl_sock_destroy(sock); return error; } } } return 0; } static int dpif_linux_recv_set(struct dpif *dpif_, bool enable) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); int error; ovs_mutex_lock(&dpif->upcall_lock); error = dpif_linux_recv_set__(dpif_, enable); ovs_mutex_unlock(&dpif->upcall_lock); return error; } static int dpif_linux_queue_to_priority(const struct dpif *dpif OVS_UNUSED, uint32_t queue_id, uint32_t *priority) { if (queue_id < 0xf000) { *priority = TC_H_MAKE(1 << 16, queue_id + 1); return 0; } else { return EINVAL; } } static int parse_odp_packet(struct ofpbuf *buf, struct dpif_upcall *upcall, int *dp_ifindex) { static const struct nl_policy ovs_packet_policy[] = { /* Always present. */ [OVS_PACKET_ATTR_PACKET] = { .type = NL_A_UNSPEC, .min_len = ETH_HEADER_LEN }, [OVS_PACKET_ATTR_KEY] = { .type = NL_A_NESTED }, /* OVS_PACKET_CMD_ACTION only. */ [OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true }, }; struct ovs_header *ovs_header; struct nlattr *a[ARRAY_SIZE(ovs_packet_policy)]; struct nlmsghdr *nlmsg; struct genlmsghdr *genl; struct ofpbuf b; int type; ofpbuf_use_const(&b, buf->data, buf->size); nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); genl = ofpbuf_try_pull(&b, sizeof *genl); ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header); if (!nlmsg || !genl || !ovs_header || nlmsg->nlmsg_type != ovs_packet_family || !nl_policy_parse(&b, 0, ovs_packet_policy, a, ARRAY_SIZE(ovs_packet_policy))) { return EINVAL; } type = (genl->cmd == OVS_PACKET_CMD_MISS ? DPIF_UC_MISS : genl->cmd == OVS_PACKET_CMD_ACTION ? DPIF_UC_ACTION : -1); if (type < 0) { return EINVAL; } memset(upcall, 0, sizeof *upcall); upcall->type = type; upcall->packet = buf; upcall->packet->data = CONST_CAST(struct nlattr *, nl_attr_get(a[OVS_PACKET_ATTR_PACKET])); upcall->packet->size = nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]); upcall->key = CONST_CAST(struct nlattr *, nl_attr_get(a[OVS_PACKET_ATTR_KEY])); upcall->key_len = nl_attr_get_size(a[OVS_PACKET_ATTR_KEY]); upcall->userdata = a[OVS_PACKET_ATTR_USERDATA]; *dp_ifindex = ovs_header->dp_ifindex; return 0; } static int dpif_linux_recv__(struct dpif *dpif_, struct dpif_upcall *upcall, struct ofpbuf *buf) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); int read_tries = 0; if (dpif->epoll_fd < 0) { return EAGAIN; } if (dpif->event_offset >= dpif->n_events) { int retval; dpif->event_offset = dpif->n_events = 0; do { retval = epoll_wait(dpif->epoll_fd, dpif->epoll_events, dpif->uc_array_size, 0); } while (retval < 0 && errno == EINTR); if (retval < 0) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); VLOG_WARN_RL(&rl, "epoll_wait failed (%s)", ovs_strerror(errno)); } else if (retval > 0) { dpif->n_events = retval; } } while (dpif->event_offset < dpif->n_events) { int idx = dpif->epoll_events[dpif->event_offset].data.u32; struct dpif_channel *ch = &dpif->channels[idx]; dpif->event_offset++; for (;;) { int dp_ifindex; int error; if (++read_tries > 50) { return EAGAIN; } error = nl_sock_recv(ch->sock, buf, false); if (error == ENOBUFS) { /* ENOBUFS typically means that we've received so many * packets that the buffer overflowed. Try again * immediately because there's almost certainly a packet * waiting for us. */ report_loss(dpif_, ch); continue; } ch->last_poll = time_msec(); if (error) { if (error == EAGAIN) { break; } return error; } error = parse_odp_packet(buf, upcall, &dp_ifindex); if (!error && dp_ifindex == dpif->dp_ifindex) { return 0; } else if (error) { return error; } } } return EAGAIN; } static int dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall, struct ofpbuf *buf) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); int error; ovs_mutex_lock(&dpif->upcall_lock); error = dpif_linux_recv__(dpif_, upcall, buf); ovs_mutex_unlock(&dpif->upcall_lock); return error; } static void dpif_linux_recv_wait(struct dpif *dpif_) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); ovs_mutex_lock(&dpif->upcall_lock); if (dpif->epoll_fd >= 0) { poll_fd_wait(dpif->epoll_fd, POLLIN); } ovs_mutex_unlock(&dpif->upcall_lock); } static void dpif_linux_recv_purge(struct dpif *dpif_) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); ovs_mutex_lock(&dpif->upcall_lock); if (dpif->epoll_fd >= 0) { struct dpif_channel *ch; for (ch = dpif->channels; ch < &dpif->channels[dpif->uc_array_size]; ch++) { if (ch->sock) { nl_sock_drain(ch->sock); } } } ovs_mutex_unlock(&dpif->upcall_lock); } const struct dpif_class dpif_linux_class = { "system", dpif_linux_enumerate, NULL, dpif_linux_open, dpif_linux_close, dpif_linux_destroy, NULL, /* run */ NULL, /* wait */ dpif_linux_get_stats, dpif_linux_port_add, dpif_linux_port_del, dpif_linux_port_query_by_number, dpif_linux_port_query_by_name, dpif_linux_get_max_ports, dpif_linux_port_get_pid, dpif_linux_port_dump_start, dpif_linux_port_dump_next, dpif_linux_port_dump_done, dpif_linux_port_poll, dpif_linux_port_poll_wait, dpif_linux_flow_get, dpif_linux_flow_put, dpif_linux_flow_del, dpif_linux_flow_flush, dpif_linux_flow_dump_start, dpif_linux_flow_dump_next, dpif_linux_flow_dump_done, dpif_linux_execute, dpif_linux_operate, dpif_linux_recv_set, dpif_linux_queue_to_priority, dpif_linux_recv, dpif_linux_recv_wait, dpif_linux_recv_purge, }; static int dpif_linux_init(void) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; static int error; if (ovsthread_once_start(&once)) { error = nl_lookup_genl_family(OVS_DATAPATH_FAMILY, &ovs_datapath_family); if (error) { VLOG_ERR("Generic Netlink family '%s' does not exist. " "The Open vSwitch kernel module is probably not loaded.", OVS_DATAPATH_FAMILY); } if (!error) { error = nl_lookup_genl_family(OVS_VPORT_FAMILY, &ovs_vport_family); } if (!error) { error = nl_lookup_genl_family(OVS_FLOW_FAMILY, &ovs_flow_family); } if (!error) { error = nl_lookup_genl_family(OVS_PACKET_FAMILY, &ovs_packet_family); } if (!error) { error = nl_lookup_genl_mcgroup(OVS_VPORT_FAMILY, OVS_VPORT_MCGROUP, &ovs_vport_mcgroup); } ovsthread_once_done(&once); } return error; } bool dpif_linux_is_internal_device(const char *name) { struct dpif_linux_vport reply; struct ofpbuf *buf; int error; error = dpif_linux_vport_get(name, &reply, &buf); if (!error) { ofpbuf_delete(buf); } else if (error != ENODEV && error != ENOENT) { VLOG_WARN_RL(&error_rl, "%s: vport query failed (%s)", name, ovs_strerror(error)); } return reply.type == OVS_VPORT_TYPE_INTERNAL; } /* Parses the contents of 'buf', which contains a "struct ovs_header" followed * by Netlink attributes, into 'vport'. Returns 0 if successful, otherwise a * positive errno value. * * 'vport' will contain pointers into 'buf', so the caller should not free * 'buf' while 'vport' is still in use. */ static int dpif_linux_vport_from_ofpbuf(struct dpif_linux_vport *vport, const struct ofpbuf *buf) { static const struct nl_policy ovs_vport_policy[] = { [OVS_VPORT_ATTR_PORT_NO] = { .type = NL_A_U32 }, [OVS_VPORT_ATTR_TYPE] = { .type = NL_A_U32 }, [OVS_VPORT_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ }, [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NL_A_U32 }, [OVS_VPORT_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_vport_stats), .optional = true }, [OVS_VPORT_ATTR_OPTIONS] = { .type = NL_A_NESTED, .optional = true }, }; struct nlattr *a[ARRAY_SIZE(ovs_vport_policy)]; struct ovs_header *ovs_header; struct nlmsghdr *nlmsg; struct genlmsghdr *genl; struct ofpbuf b; dpif_linux_vport_init(vport); ofpbuf_use_const(&b, buf->data, buf->size); nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); genl = ofpbuf_try_pull(&b, sizeof *genl); ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header); if (!nlmsg || !genl || !ovs_header || nlmsg->nlmsg_type != ovs_vport_family || !nl_policy_parse(&b, 0, ovs_vport_policy, a, ARRAY_SIZE(ovs_vport_policy))) { return EINVAL; } vport->cmd = genl->cmd; vport->dp_ifindex = ovs_header->dp_ifindex; vport->port_no = nl_attr_get_odp_port(a[OVS_VPORT_ATTR_PORT_NO]); vport->type = nl_attr_get_u32(a[OVS_VPORT_ATTR_TYPE]); vport->name = nl_attr_get_string(a[OVS_VPORT_ATTR_NAME]); if (a[OVS_VPORT_ATTR_UPCALL_PID]) { vport->upcall_pid = nl_attr_get(a[OVS_VPORT_ATTR_UPCALL_PID]); } if (a[OVS_VPORT_ATTR_STATS]) { vport->stats = nl_attr_get(a[OVS_VPORT_ATTR_STATS]); } if (a[OVS_VPORT_ATTR_OPTIONS]) { vport->options = nl_attr_get(a[OVS_VPORT_ATTR_OPTIONS]); vport->options_len = nl_attr_get_size(a[OVS_VPORT_ATTR_OPTIONS]); } return 0; } /* Appends to 'buf' (which must initially be empty) a "struct ovs_header" * followed by Netlink attributes corresponding to 'vport'. */ static void dpif_linux_vport_to_ofpbuf(const struct dpif_linux_vport *vport, struct ofpbuf *buf) { struct ovs_header *ovs_header; nl_msg_put_genlmsghdr(buf, 0, ovs_vport_family, NLM_F_REQUEST | NLM_F_ECHO, vport->cmd, OVS_VPORT_VERSION); ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header); ovs_header->dp_ifindex = vport->dp_ifindex; if (vport->port_no != ODPP_NONE) { nl_msg_put_odp_port(buf, OVS_VPORT_ATTR_PORT_NO, vport->port_no); } if (vport->type != OVS_VPORT_TYPE_UNSPEC) { nl_msg_put_u32(buf, OVS_VPORT_ATTR_TYPE, vport->type); } if (vport->name) { nl_msg_put_string(buf, OVS_VPORT_ATTR_NAME, vport->name); } if (vport->upcall_pid) { nl_msg_put_u32(buf, OVS_VPORT_ATTR_UPCALL_PID, *vport->upcall_pid); } if (vport->stats) { nl_msg_put_unspec(buf, OVS_VPORT_ATTR_STATS, vport->stats, sizeof *vport->stats); } if (vport->options) { nl_msg_put_nested(buf, OVS_VPORT_ATTR_OPTIONS, vport->options, vport->options_len); } } /* Clears 'vport' to "empty" values. */ void dpif_linux_vport_init(struct dpif_linux_vport *vport) { memset(vport, 0, sizeof *vport); vport->port_no = ODPP_NONE; } /* Executes 'request' in the kernel datapath. If the command fails, returns a * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the * result of the command is expected to be an ovs_vport also, which is decoded * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the * reply is no longer needed ('reply' will contain pointers into '*bufp'). */ int dpif_linux_vport_transact(const struct dpif_linux_vport *request, struct dpif_linux_vport *reply, struct ofpbuf **bufp) { struct ofpbuf *request_buf; int error; ovs_assert((reply != NULL) == (bufp != NULL)); error = dpif_linux_init(); if (error) { if (reply) { *bufp = NULL; dpif_linux_vport_init(reply); } return error; } request_buf = ofpbuf_new(1024); dpif_linux_vport_to_ofpbuf(request, request_buf); error = nl_transact(NETLINK_GENERIC, request_buf, bufp); ofpbuf_delete(request_buf); if (reply) { if (!error) { error = dpif_linux_vport_from_ofpbuf(reply, *bufp); } if (error) { dpif_linux_vport_init(reply); ofpbuf_delete(*bufp); *bufp = NULL; } } return error; } /* Obtains information about the kernel vport named 'name' and stores it into * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no * longer needed ('reply' will contain pointers into '*bufp'). */ int dpif_linux_vport_get(const char *name, struct dpif_linux_vport *reply, struct ofpbuf **bufp) { struct dpif_linux_vport request; dpif_linux_vport_init(&request); request.cmd = OVS_VPORT_CMD_GET; request.name = name; return dpif_linux_vport_transact(&request, reply, bufp); } /* Parses the contents of 'buf', which contains a "struct ovs_header" followed * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a * positive errno value. * * 'dp' will contain pointers into 'buf', so the caller should not free 'buf' * while 'dp' is still in use. */ static int dpif_linux_dp_from_ofpbuf(struct dpif_linux_dp *dp, const struct ofpbuf *buf) { static const struct nl_policy ovs_datapath_policy[] = { [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .max_len = IFNAMSIZ }, [OVS_DP_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_dp_stats), .optional = true }, }; struct nlattr *a[ARRAY_SIZE(ovs_datapath_policy)]; struct ovs_header *ovs_header; struct nlmsghdr *nlmsg; struct genlmsghdr *genl; struct ofpbuf b; dpif_linux_dp_init(dp); ofpbuf_use_const(&b, buf->data, buf->size); nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); genl = ofpbuf_try_pull(&b, sizeof *genl); ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header); if (!nlmsg || !genl || !ovs_header || nlmsg->nlmsg_type != ovs_datapath_family || !nl_policy_parse(&b, 0, ovs_datapath_policy, a, ARRAY_SIZE(ovs_datapath_policy))) { return EINVAL; } dp->cmd = genl->cmd; dp->dp_ifindex = ovs_header->dp_ifindex; dp->name = nl_attr_get_string(a[OVS_DP_ATTR_NAME]); if (a[OVS_DP_ATTR_STATS]) { /* Can't use structure assignment because Netlink doesn't ensure * sufficient alignment for 64-bit members. */ memcpy(&dp->stats, nl_attr_get(a[OVS_DP_ATTR_STATS]), sizeof dp->stats); } return 0; } /* Appends to 'buf' the Generic Netlink message described by 'dp'. */ static void dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp *dp, struct ofpbuf *buf) { struct ovs_header *ovs_header; nl_msg_put_genlmsghdr(buf, 0, ovs_datapath_family, NLM_F_REQUEST | NLM_F_ECHO, dp->cmd, OVS_DATAPATH_VERSION); ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header); ovs_header->dp_ifindex = dp->dp_ifindex; if (dp->name) { nl_msg_put_string(buf, OVS_DP_ATTR_NAME, dp->name); } if (dp->upcall_pid) { nl_msg_put_u32(buf, OVS_DP_ATTR_UPCALL_PID, *dp->upcall_pid); } /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */ } /* Clears 'dp' to "empty" values. */ static void dpif_linux_dp_init(struct dpif_linux_dp *dp) { memset(dp, 0, sizeof *dp); } static void dpif_linux_dp_dump_start(struct nl_dump *dump) { struct dpif_linux_dp request; struct ofpbuf *buf; dpif_linux_dp_init(&request); request.cmd = OVS_DP_CMD_GET; buf = ofpbuf_new(1024); dpif_linux_dp_to_ofpbuf(&request, buf); nl_dump_start(dump, NETLINK_GENERIC, buf); ofpbuf_delete(buf); } /* Executes 'request' in the kernel datapath. If the command fails, returns a * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the * result of the command is expected to be of the same form, which is decoded * and stored in '*reply' and '*bufp'. The caller must free '*bufp' when the * reply is no longer needed ('reply' will contain pointers into '*bufp'). */ static int dpif_linux_dp_transact(const struct dpif_linux_dp *request, struct dpif_linux_dp *reply, struct ofpbuf **bufp) { struct ofpbuf *request_buf; int error; ovs_assert((reply != NULL) == (bufp != NULL)); request_buf = ofpbuf_new(1024); dpif_linux_dp_to_ofpbuf(request, request_buf); error = nl_transact(NETLINK_GENERIC, request_buf, bufp); ofpbuf_delete(request_buf); if (reply) { if (!error) { error = dpif_linux_dp_from_ofpbuf(reply, *bufp); } if (error) { dpif_linux_dp_init(reply); ofpbuf_delete(*bufp); *bufp = NULL; } } return error; } /* Obtains information about 'dpif_' and stores it into '*reply' and '*bufp'. * The caller must free '*bufp' when the reply is no longer needed ('reply' * will contain pointers into '*bufp'). */ static int dpif_linux_dp_get(const struct dpif *dpif_, struct dpif_linux_dp *reply, struct ofpbuf **bufp) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); struct dpif_linux_dp request; dpif_linux_dp_init(&request); request.cmd = OVS_DP_CMD_GET; request.dp_ifindex = dpif->dp_ifindex; return dpif_linux_dp_transact(&request, reply, bufp); } /* Parses the contents of 'buf', which contains a "struct ovs_header" followed * by Netlink attributes, into 'flow'. Returns 0 if successful, otherwise a * positive errno value. * * 'flow' will contain pointers into 'buf', so the caller should not free 'buf' * while 'flow' is still in use. */ static int dpif_linux_flow_from_ofpbuf(struct dpif_linux_flow *flow, const struct ofpbuf *buf) { static const struct nl_policy ovs_flow_policy[] = { [OVS_FLOW_ATTR_KEY] = { .type = NL_A_NESTED }, [OVS_FLOW_ATTR_MASK] = { .type = NL_A_NESTED, .optional = true }, [OVS_FLOW_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true }, [OVS_FLOW_ATTR_STATS] = { NL_POLICY_FOR(struct ovs_flow_stats), .optional = true }, [OVS_FLOW_ATTR_TCP_FLAGS] = { .type = NL_A_U8, .optional = true }, [OVS_FLOW_ATTR_USED] = { .type = NL_A_U64, .optional = true }, /* The kernel never uses OVS_FLOW_ATTR_CLEAR. */ }; struct nlattr *a[ARRAY_SIZE(ovs_flow_policy)]; struct ovs_header *ovs_header; struct nlmsghdr *nlmsg; struct genlmsghdr *genl; struct ofpbuf b; dpif_linux_flow_init(flow); ofpbuf_use_const(&b, buf->data, buf->size); nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); genl = ofpbuf_try_pull(&b, sizeof *genl); ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header); if (!nlmsg || !genl || !ovs_header || nlmsg->nlmsg_type != ovs_flow_family || !nl_policy_parse(&b, 0, ovs_flow_policy, a, ARRAY_SIZE(ovs_flow_policy))) { return EINVAL; } flow->nlmsg_flags = nlmsg->nlmsg_flags; flow->dp_ifindex = ovs_header->dp_ifindex; flow->key = nl_attr_get(a[OVS_FLOW_ATTR_KEY]); flow->key_len = nl_attr_get_size(a[OVS_FLOW_ATTR_KEY]); if (a[OVS_FLOW_ATTR_MASK]) { flow->mask = nl_attr_get(a[OVS_FLOW_ATTR_MASK]); flow->mask_len = nl_attr_get_size(a[OVS_FLOW_ATTR_MASK]); } if (a[OVS_FLOW_ATTR_ACTIONS]) { flow->actions = nl_attr_get(a[OVS_FLOW_ATTR_ACTIONS]); flow->actions_len = nl_attr_get_size(a[OVS_FLOW_ATTR_ACTIONS]); } if (a[OVS_FLOW_ATTR_STATS]) { flow->stats = nl_attr_get(a[OVS_FLOW_ATTR_STATS]); } if (a[OVS_FLOW_ATTR_TCP_FLAGS]) { flow->tcp_flags = nl_attr_get(a[OVS_FLOW_ATTR_TCP_FLAGS]); } if (a[OVS_FLOW_ATTR_USED]) { flow->used = nl_attr_get(a[OVS_FLOW_ATTR_USED]); } return 0; } /* Appends to 'buf' (which must initially be empty) a "struct ovs_header" * followed by Netlink attributes corresponding to 'flow'. */ static void dpif_linux_flow_to_ofpbuf(const struct dpif_linux_flow *flow, struct ofpbuf *buf) { struct ovs_header *ovs_header; nl_msg_put_genlmsghdr(buf, 0, ovs_flow_family, NLM_F_REQUEST | flow->nlmsg_flags, flow->cmd, OVS_FLOW_VERSION); ovs_header = ofpbuf_put_uninit(buf, sizeof *ovs_header); ovs_header->dp_ifindex = flow->dp_ifindex; if (flow->key_len) { nl_msg_put_unspec(buf, OVS_FLOW_ATTR_KEY, flow->key, flow->key_len); } if (flow->mask_len) { nl_msg_put_unspec(buf, OVS_FLOW_ATTR_MASK, flow->mask, flow->mask_len); } if (flow->actions || flow->actions_len) { nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS, flow->actions, flow->actions_len); } /* We never need to send these to the kernel. */ ovs_assert(!flow->stats); ovs_assert(!flow->tcp_flags); ovs_assert(!flow->used); if (flow->clear) { nl_msg_put_flag(buf, OVS_FLOW_ATTR_CLEAR); } } /* Clears 'flow' to "empty" values. */ static void dpif_linux_flow_init(struct dpif_linux_flow *flow) { memset(flow, 0, sizeof *flow); } /* Executes 'request' in the kernel datapath. If the command fails, returns a * positive errno value. Otherwise, if 'reply' and 'bufp' are null, returns 0 * without doing anything else. If 'reply' and 'bufp' are nonnull, then the * result of the command is expected to be a flow also, which is decoded and * stored in '*reply' and '*bufp'. The caller must free '*bufp' when the reply * is no longer needed ('reply' will contain pointers into '*bufp'). */ static int dpif_linux_flow_transact(struct dpif_linux_flow *request, struct dpif_linux_flow *reply, struct ofpbuf **bufp) { struct ofpbuf *request_buf; int error; ovs_assert((reply != NULL) == (bufp != NULL)); if (reply) { request->nlmsg_flags |= NLM_F_ECHO; } request_buf = ofpbuf_new(1024); dpif_linux_flow_to_ofpbuf(request, request_buf); error = nl_transact(NETLINK_GENERIC, request_buf, bufp); ofpbuf_delete(request_buf); if (reply) { if (!error) { error = dpif_linux_flow_from_ofpbuf(reply, *bufp); } if (error) { dpif_linux_flow_init(reply); ofpbuf_delete(*bufp); *bufp = NULL; } } return error; } static void dpif_linux_flow_get_stats(const struct dpif_linux_flow *flow, struct dpif_flow_stats *stats) { if (flow->stats) { stats->n_packets = get_unaligned_u64(&flow->stats->n_packets); stats->n_bytes = get_unaligned_u64(&flow->stats->n_bytes); } else { stats->n_packets = 0; stats->n_bytes = 0; } stats->used = flow->used ? get_32aligned_u64(flow->used) : 0; stats->tcp_flags = flow->tcp_flags ? *flow->tcp_flags : 0; } /* Logs information about a packet that was recently lost in 'ch' (in * 'dpif_'). */ static void report_loss(struct dpif *dpif_, struct dpif_channel *ch) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); struct ds s; if (VLOG_DROP_WARN(&rl)) { return; } ds_init(&s); if (ch->last_poll != LLONG_MIN) { ds_put_format(&s, " (last polled %lld ms ago)", time_msec() - ch->last_poll); } VLOG_WARN("%s: lost packet on channel %td%s", dpif_name(dpif_), ch - dpif->channels, ds_cstr(&s)); ds_destroy(&s); } openvswitch-2.0.1+git20140120/lib/dpif-linux.h000066400000000000000000000036501226605124000204540ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef DPIF_LINUX_H #define DPIF_LINUX_H 1 #include #include #include #include #include "flow.h" struct ofpbuf; struct dpif_linux_vport { /* Generic Netlink header. */ uint8_t cmd; /* ovs_vport header. */ int dp_ifindex; odp_port_t port_no; /* ODPP_NONE if unknown. */ enum ovs_vport_type type; /* Attributes. * * The 'stats' member points to 64-bit data that might only be aligned on * 32-bit boundaries, so use get_unaligned_u64() to access its values. */ const char *name; /* OVS_VPORT_ATTR_NAME. */ const uint32_t *upcall_pid; /* OVS_VPORT_ATTR_UPCALL_PID. */ const struct ovs_vport_stats *stats; /* OVS_VPORT_ATTR_STATS. */ const struct nlattr *options; /* OVS_VPORT_ATTR_OPTIONS. */ size_t options_len; }; void dpif_linux_vport_init(struct dpif_linux_vport *); int dpif_linux_vport_transact(const struct dpif_linux_vport *request, struct dpif_linux_vport *reply, struct ofpbuf **bufp); int dpif_linux_vport_get(const char *name, struct dpif_linux_vport *reply, struct ofpbuf **bufp); bool dpif_linux_is_internal_device(const char *name); #endif /* dpif-linux.h */ openvswitch-2.0.1+git20140120/lib/dpif-netdev.c000066400000000000000000001126021226605124000205730ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "dpif.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "csum.h" #include "dpif.h" #include "dpif-provider.h" #include "dummy.h" #include "dynamic-string.h" #include "flow.h" #include "hmap.h" #include "list.h" #include "netdev.h" #include "netdev-vport.h" #include "netlink.h" #include "odp-execute.h" #include "odp-util.h" #include "ofp-print.h" #include "ofpbuf.h" #include "packets.h" #include "poll-loop.h" #include "random.h" #include "seq.h" #include "shash.h" #include "sset.h" #include "timeval.h" #include "unixctl.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(dpif_netdev); /* Configuration parameters. */ enum { MAX_PORTS = 256 }; /* Maximum number of ports. */ enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ /* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP * headers to be aligned on a 4-byte boundary. */ enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN }; /* Queues. */ enum { N_QUEUES = 2 }; /* Number of queues for dpif_recv(). */ enum { MAX_QUEUE_LEN = 128 }; /* Maximum number of packets per queue. */ enum { QUEUE_MASK = MAX_QUEUE_LEN - 1 }; BUILD_ASSERT_DECL(IS_POW2(MAX_QUEUE_LEN)); struct dp_netdev_upcall { struct dpif_upcall upcall; /* Queued upcall information. */ struct ofpbuf buf; /* ofpbuf instance for upcall.packet. */ }; struct dp_netdev_queue { struct dp_netdev_upcall upcalls[MAX_QUEUE_LEN]; unsigned int head, tail; }; /* Datapath based on the network device interface from netdev.h. */ struct dp_netdev { const struct dpif_class *class; char *name; int open_cnt; bool destroyed; int max_mtu; /* Maximum MTU of any port added so far. */ struct dp_netdev_queue queues[N_QUEUES]; struct hmap flow_table; /* Flow table. */ struct seq *queue_seq; /* Incremented whenever a packet is queued. */ /* Statistics. */ long long int n_hit; /* Number of flow table matches. */ long long int n_missed; /* Number of flow table misses. */ long long int n_lost; /* Number of misses not passed to client. */ /* Ports. */ struct dp_netdev_port *ports[MAX_PORTS]; struct list port_list; struct seq *port_seq; /* Incremented whenever a port changes. */ }; /* A port in a netdev-based datapath. */ struct dp_netdev_port { odp_port_t port_no; /* Index into dp_netdev's 'ports'. */ struct list node; /* Element in dp_netdev's 'port_list'. */ struct netdev *netdev; struct netdev_saved_flags *sf; struct netdev_rx *rx; char *type; /* Port type as requested by user. */ }; /* A flow in dp_netdev's 'flow_table'. */ struct dp_netdev_flow { struct hmap_node node; /* Element in dp_netdev's 'flow_table'. */ struct flow key; /* Statistics. */ long long int used; /* Last used time, in monotonic msecs. */ long long int packet_count; /* Number of packets matched. */ long long int byte_count; /* Number of bytes matched. */ uint8_t tcp_flags; /* Bitwise-OR of seen tcp_flags values. */ /* Actions. */ struct nlattr *actions; size_t actions_len; }; /* Interface to netdev-based datapath. */ struct dpif_netdev { struct dpif dpif; struct dp_netdev *dp; uint64_t last_port_seq; }; /* All netdev-based datapaths. */ static struct shash dp_netdevs = SHASH_INITIALIZER(&dp_netdevs); /* Global lock for all data. */ static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER; static int get_port_by_number(struct dp_netdev *, odp_port_t port_no, struct dp_netdev_port **portp); static int get_port_by_name(struct dp_netdev *, const char *devname, struct dp_netdev_port **portp); static void dp_netdev_free(struct dp_netdev *); static void dp_netdev_flow_flush(struct dp_netdev *); static int do_add_port(struct dp_netdev *, const char *devname, const char *type, odp_port_t port_no); static int do_del_port(struct dp_netdev *, odp_port_t port_no); static int dpif_netdev_open(const struct dpif_class *, const char *name, bool create, struct dpif **); static int dp_netdev_output_userspace(struct dp_netdev *, const struct ofpbuf *, int queue_no, const struct flow *, const struct nlattr *userdata); static void dp_netdev_execute_actions(struct dp_netdev *, struct ofpbuf *, struct flow *, const struct nlattr *actions, size_t actions_len); static void dp_netdev_port_input(struct dp_netdev *dp, struct dp_netdev_port *port, struct ofpbuf *packet, uint32_t skb_priority, uint32_t pkt_mark, const struct flow_tnl *tnl); static struct dpif_netdev * dpif_netdev_cast(const struct dpif *dpif) { ovs_assert(dpif->dpif_class->open == dpif_netdev_open); return CONTAINER_OF(dpif, struct dpif_netdev, dpif); } static struct dp_netdev * get_dp_netdev(const struct dpif *dpif) { return dpif_netdev_cast(dpif)->dp; } static int dpif_netdev_enumerate(struct sset *all_dps) { struct shash_node *node; ovs_mutex_lock(&dp_netdev_mutex); SHASH_FOR_EACH(node, &dp_netdevs) { sset_add(all_dps, node->name); } ovs_mutex_unlock(&dp_netdev_mutex); return 0; } static bool dpif_netdev_class_is_dummy(const struct dpif_class *class) { return class != &dpif_netdev_class; } static const char * dpif_netdev_port_open_type(const struct dpif_class *class, const char *type) { return strcmp(type, "internal") ? type : dpif_netdev_class_is_dummy(class) ? "dummy" : "tap"; } static struct dpif * create_dpif_netdev(struct dp_netdev *dp) { uint16_t netflow_id = hash_string(dp->name, 0); struct dpif_netdev *dpif; dp->open_cnt++; dpif = xmalloc(sizeof *dpif); dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id); dpif->dp = dp; dpif->last_port_seq = seq_read(dp->port_seq); return &dpif->dpif; } /* Choose an unused, non-zero port number and return it on success. * Return ODPP_NONE on failure. */ static odp_port_t choose_port(struct dp_netdev *dp, const char *name) { uint32_t port_no; if (dp->class != &dpif_netdev_class) { const char *p; int start_no = 0; /* If the port name begins with "br", start the number search at * 100 to make writing tests easier. */ if (!strncmp(name, "br", 2)) { start_no = 100; } /* If the port name contains a number, try to assign that port number. * This can make writing unit tests easier because port numbers are * predictable. */ for (p = name; *p != '\0'; p++) { if (isdigit((unsigned char) *p)) { port_no = start_no + strtol(p, NULL, 10); if (port_no > 0 && port_no < MAX_PORTS && !dp->ports[port_no]) { return u32_to_odp(port_no); } break; } } } for (port_no = 1; port_no < MAX_PORTS; port_no++) { if (!dp->ports[port_no]) { return u32_to_odp(port_no); } } return ODPP_NONE; } static int create_dp_netdev(const char *name, const struct dpif_class *class, struct dp_netdev **dpp) { struct dp_netdev *dp; int error; int i; dp = xzalloc(sizeof *dp); dp->class = class; dp->name = xstrdup(name); dp->open_cnt = 0; dp->max_mtu = ETH_PAYLOAD_MAX; for (i = 0; i < N_QUEUES; i++) { dp->queues[i].head = dp->queues[i].tail = 0; } dp->queue_seq = seq_create(); hmap_init(&dp->flow_table); list_init(&dp->port_list); dp->port_seq = seq_create(); error = do_add_port(dp, name, "internal", ODPP_LOCAL); if (error) { dp_netdev_free(dp); return error; } shash_add(&dp_netdevs, name, dp); *dpp = dp; return 0; } static int dpif_netdev_open(const struct dpif_class *class, const char *name, bool create, struct dpif **dpifp) { struct dp_netdev *dp; int error; ovs_mutex_lock(&dp_netdev_mutex); dp = shash_find_data(&dp_netdevs, name); if (!dp) { error = create ? create_dp_netdev(name, class, &dp) : ENODEV; } else { error = (dp->class != class ? EINVAL : create ? EEXIST : 0); } if (!error) { *dpifp = create_dpif_netdev(dp); } ovs_mutex_unlock(&dp_netdev_mutex); return error; } static void dp_netdev_purge_queues(struct dp_netdev *dp) { int i; for (i = 0; i < N_QUEUES; i++) { struct dp_netdev_queue *q = &dp->queues[i]; while (q->tail != q->head) { struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK]; ofpbuf_uninit(&u->buf); } } } static void dp_netdev_free(struct dp_netdev *dp) { struct dp_netdev_port *port, *next; dp_netdev_flow_flush(dp); LIST_FOR_EACH_SAFE (port, next, node, &dp->port_list) { do_del_port(dp, port->port_no); } dp_netdev_purge_queues(dp); seq_destroy(dp->queue_seq); hmap_destroy(&dp->flow_table); seq_destroy(dp->port_seq); free(dp->name); free(dp); } static void dpif_netdev_close(struct dpif *dpif) { struct dp_netdev *dp = get_dp_netdev(dpif); ovs_mutex_lock(&dp_netdev_mutex); ovs_assert(dp->open_cnt > 0); if (--dp->open_cnt == 0 && dp->destroyed) { shash_find_and_delete(&dp_netdevs, dp->name); dp_netdev_free(dp); } free(dpif); ovs_mutex_unlock(&dp_netdev_mutex); } static int dpif_netdev_destroy(struct dpif *dpif) { struct dp_netdev *dp = get_dp_netdev(dpif); ovs_mutex_lock(&dp_netdev_mutex); dp->destroyed = true; ovs_mutex_unlock(&dp_netdev_mutex); return 0; } static int dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats) { struct dp_netdev *dp = get_dp_netdev(dpif); ovs_mutex_lock(&dp_netdev_mutex); stats->n_flows = hmap_count(&dp->flow_table); stats->n_hit = dp->n_hit; stats->n_missed = dp->n_missed; stats->n_lost = dp->n_lost; ovs_mutex_unlock(&dp_netdev_mutex); return 0; } static int do_add_port(struct dp_netdev *dp, const char *devname, const char *type, odp_port_t port_no) { struct netdev_saved_flags *sf; struct dp_netdev_port *port; struct netdev *netdev; struct netdev_rx *rx; const char *open_type; int mtu; int error; /* XXX reject devices already in some dp_netdev. */ /* Open and validate network device. */ open_type = dpif_netdev_port_open_type(dp->class, type); error = netdev_open(devname, open_type, &netdev); if (error) { return error; } /* XXX reject loopback devices */ /* XXX reject non-Ethernet devices */ error = netdev_rx_open(netdev, &rx); if (error && !(error == EOPNOTSUPP && dpif_netdev_class_is_dummy(dp->class))) { VLOG_ERR("%s: cannot receive packets on this network device (%s)", devname, ovs_strerror(errno)); netdev_close(netdev); return error; } error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, &sf); if (error) { netdev_rx_close(rx); netdev_close(netdev); return error; } port = xmalloc(sizeof *port); port->port_no = port_no; port->netdev = netdev; port->sf = sf; port->rx = rx; port->type = xstrdup(type); error = netdev_get_mtu(netdev, &mtu); if (!error && mtu > dp->max_mtu) { dp->max_mtu = mtu; } list_push_back(&dp->port_list, &port->node); dp->ports[odp_to_u32(port_no)] = port; seq_change(dp->port_seq); return 0; } static int dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop) { struct dp_netdev *dp = get_dp_netdev(dpif); char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; const char *dpif_port; odp_port_t port_no; int error; ovs_mutex_lock(&dp_netdev_mutex); dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf); if (*port_nop != ODPP_NONE) { uint32_t port_idx = odp_to_u32(*port_nop); if (port_idx >= MAX_PORTS) { error = EFBIG; } else if (dp->ports[port_idx]) { error = EBUSY; } else { error = 0; port_no = *port_nop; } } else { port_no = choose_port(dp, dpif_port); error = port_no == ODPP_NONE ? EFBIG : 0; } if (!error) { *port_nop = port_no; error = do_add_port(dp, dpif_port, netdev_get_type(netdev), port_no); } ovs_mutex_unlock(&dp_netdev_mutex); return error; } static int dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no) { struct dp_netdev *dp = get_dp_netdev(dpif); int error; ovs_mutex_lock(&dp_netdev_mutex); error = port_no == ODPP_LOCAL ? EINVAL : do_del_port(dp, port_no); ovs_mutex_unlock(&dp_netdev_mutex); return error; } static bool is_valid_port_number(odp_port_t port_no) { return odp_to_u32(port_no) < MAX_PORTS; } static int get_port_by_number(struct dp_netdev *dp, odp_port_t port_no, struct dp_netdev_port **portp) { if (!is_valid_port_number(port_no)) { *portp = NULL; return EINVAL; } else { *portp = dp->ports[odp_to_u32(port_no)]; return *portp ? 0 : ENOENT; } } static int get_port_by_name(struct dp_netdev *dp, const char *devname, struct dp_netdev_port **portp) { struct dp_netdev_port *port; LIST_FOR_EACH (port, node, &dp->port_list) { if (!strcmp(netdev_get_name(port->netdev), devname)) { *portp = port; return 0; } } return ENOENT; } static int do_del_port(struct dp_netdev *dp, odp_port_t port_no) { struct dp_netdev_port *port; int error; error = get_port_by_number(dp, port_no, &port); if (error) { return error; } list_remove(&port->node); dp->ports[odp_to_u32(port_no)] = NULL; seq_change(dp->port_seq); netdev_close(port->netdev); netdev_restore_flags(port->sf); netdev_rx_close(port->rx); free(port->type); free(port); return 0; } static void answer_port_query(const struct dp_netdev_port *port, struct dpif_port *dpif_port) { dpif_port->name = xstrdup(netdev_get_name(port->netdev)); dpif_port->type = xstrdup(port->type); dpif_port->port_no = port->port_no; } static int dpif_netdev_port_query_by_number(const struct dpif *dpif, odp_port_t port_no, struct dpif_port *dpif_port) { struct dp_netdev *dp = get_dp_netdev(dpif); struct dp_netdev_port *port; int error; ovs_mutex_lock(&dp_netdev_mutex); error = get_port_by_number(dp, port_no, &port); if (!error && dpif_port) { answer_port_query(port, dpif_port); } ovs_mutex_unlock(&dp_netdev_mutex); return error; } static int dpif_netdev_port_query_by_name(const struct dpif *dpif, const char *devname, struct dpif_port *dpif_port) { struct dp_netdev *dp = get_dp_netdev(dpif); struct dp_netdev_port *port; int error; ovs_mutex_lock(&dp_netdev_mutex); error = get_port_by_name(dp, devname, &port); if (!error && dpif_port) { answer_port_query(port, dpif_port); } ovs_mutex_unlock(&dp_netdev_mutex); return error; } static uint32_t dpif_netdev_get_max_ports(const struct dpif *dpif OVS_UNUSED) { return MAX_PORTS; } static void dp_netdev_free_flow(struct dp_netdev *dp, struct dp_netdev_flow *flow) { hmap_remove(&dp->flow_table, &flow->node); free(flow->actions); free(flow); } static void dp_netdev_flow_flush(struct dp_netdev *dp) { struct dp_netdev_flow *flow, *next; HMAP_FOR_EACH_SAFE (flow, next, node, &dp->flow_table) { dp_netdev_free_flow(dp, flow); } } static int dpif_netdev_flow_flush(struct dpif *dpif) { struct dp_netdev *dp = get_dp_netdev(dpif); ovs_mutex_lock(&dp_netdev_mutex); dp_netdev_flow_flush(dp); ovs_mutex_unlock(&dp_netdev_mutex); return 0; } struct dp_netdev_port_state { odp_port_t port_no; char *name; }; static int dpif_netdev_port_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep) { *statep = xzalloc(sizeof(struct dp_netdev_port_state)); return 0; } static int dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_, struct dpif_port *dpif_port) { struct dp_netdev_port_state *state = state_; struct dp_netdev *dp = get_dp_netdev(dpif); uint32_t port_idx; ovs_mutex_lock(&dp_netdev_mutex); for (port_idx = odp_to_u32(state->port_no); port_idx < MAX_PORTS; port_idx++) { struct dp_netdev_port *port = dp->ports[port_idx]; if (port) { free(state->name); state->name = xstrdup(netdev_get_name(port->netdev)); dpif_port->name = state->name; dpif_port->type = port->type; dpif_port->port_no = port->port_no; state->port_no = u32_to_odp(port_idx + 1); ovs_mutex_unlock(&dp_netdev_mutex); return 0; } } ovs_mutex_unlock(&dp_netdev_mutex); return EOF; } static int dpif_netdev_port_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) { struct dp_netdev_port_state *state = state_; free(state->name); free(state); return 0; } static int dpif_netdev_port_poll(const struct dpif *dpif_, char **devnamep OVS_UNUSED) { struct dpif_netdev *dpif = dpif_netdev_cast(dpif_); uint64_t new_port_seq; int error; ovs_mutex_lock(&dp_netdev_mutex); new_port_seq = seq_read(dpif->dp->port_seq); if (dpif->last_port_seq != new_port_seq) { dpif->last_port_seq = new_port_seq; error = ENOBUFS; } else { error = EAGAIN; } ovs_mutex_unlock(&dp_netdev_mutex); return error; } static void dpif_netdev_port_poll_wait(const struct dpif *dpif_) { struct dpif_netdev *dpif = dpif_netdev_cast(dpif_); ovs_mutex_lock(&dp_netdev_mutex); seq_wait(dpif->dp->port_seq, dpif->last_port_seq); ovs_mutex_unlock(&dp_netdev_mutex); } static struct dp_netdev_flow * dp_netdev_lookup_flow(const struct dp_netdev *dp, const struct flow *key) { struct dp_netdev_flow *flow; HMAP_FOR_EACH_WITH_HASH (flow, node, flow_hash(key, 0), &dp->flow_table) { if (flow_equal(&flow->key, key)) { return flow; } } return NULL; } static void get_dpif_flow_stats(struct dp_netdev_flow *flow, struct dpif_flow_stats *stats) { stats->n_packets = flow->packet_count; stats->n_bytes = flow->byte_count; stats->used = flow->used; stats->tcp_flags = flow->tcp_flags; } static int dpif_netdev_flow_from_nlattrs(const struct nlattr *key, uint32_t key_len, struct flow *flow) { odp_port_t in_port; if (odp_flow_key_to_flow(key, key_len, flow) != ODP_FIT_PERFECT) { /* This should not happen: it indicates that odp_flow_key_from_flow() * and odp_flow_key_to_flow() disagree on the acceptable form of a * flow. Log the problem as an error, with enough details to enable * debugging. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); if (!VLOG_DROP_ERR(&rl)) { struct ds s; ds_init(&s); odp_flow_key_format(key, key_len, &s); VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s)); ds_destroy(&s); } return EINVAL; } in_port = flow->in_port.odp_port; if (!is_valid_port_number(in_port) && in_port != ODPP_NONE) { return EINVAL; } return 0; } static int dpif_netdev_flow_get(const struct dpif *dpif, const struct nlattr *nl_key, size_t nl_key_len, struct ofpbuf **actionsp, struct dpif_flow_stats *stats) { struct dp_netdev *dp = get_dp_netdev(dpif); struct dp_netdev_flow *flow; struct flow key; int error; error = dpif_netdev_flow_from_nlattrs(nl_key, nl_key_len, &key); if (error) { return error; } ovs_mutex_lock(&dp_netdev_mutex); flow = dp_netdev_lookup_flow(dp, &key); if (flow) { if (stats) { get_dpif_flow_stats(flow, stats); } if (actionsp) { *actionsp = ofpbuf_clone_data(flow->actions, flow->actions_len); } } else { error = ENOENT; } ovs_mutex_unlock(&dp_netdev_mutex); return error; } static int set_flow_actions(struct dp_netdev_flow *flow, const struct nlattr *actions, size_t actions_len) { flow->actions = xrealloc(flow->actions, actions_len); flow->actions_len = actions_len; memcpy(flow->actions, actions, actions_len); return 0; } static int dp_netdev_flow_add(struct dp_netdev *dp, const struct flow *key, const struct nlattr *actions, size_t actions_len) { struct dp_netdev_flow *flow; int error; flow = xzalloc(sizeof *flow); flow->key = *key; error = set_flow_actions(flow, actions, actions_len); if (error) { free(flow); return error; } hmap_insert(&dp->flow_table, &flow->node, flow_hash(&flow->key, 0)); return 0; } static void clear_stats(struct dp_netdev_flow *flow) { flow->used = 0; flow->packet_count = 0; flow->byte_count = 0; flow->tcp_flags = 0; } static int dpif_netdev_flow_put(struct dpif *dpif, const struct dpif_flow_put *put) { struct dp_netdev *dp = get_dp_netdev(dpif); struct dp_netdev_flow *flow; struct flow key; int error; error = dpif_netdev_flow_from_nlattrs(put->key, put->key_len, &key); if (error) { return error; } ovs_mutex_lock(&dp_netdev_mutex); flow = dp_netdev_lookup_flow(dp, &key); if (!flow) { if (put->flags & DPIF_FP_CREATE) { if (hmap_count(&dp->flow_table) < MAX_FLOWS) { if (put->stats) { memset(put->stats, 0, sizeof *put->stats); } error = dp_netdev_flow_add(dp, &key, put->actions, put->actions_len); } else { error = EFBIG; } } else { error = ENOENT; } } else { if (put->flags & DPIF_FP_MODIFY) { error = set_flow_actions(flow, put->actions, put->actions_len); if (!error) { if (put->stats) { get_dpif_flow_stats(flow, put->stats); } if (put->flags & DPIF_FP_ZERO_STATS) { clear_stats(flow); } } } else { error = EEXIST; } } ovs_mutex_unlock(&dp_netdev_mutex); return error; } static int dpif_netdev_flow_del(struct dpif *dpif, const struct dpif_flow_del *del) { struct dp_netdev *dp = get_dp_netdev(dpif); struct dp_netdev_flow *flow; struct flow key; int error; error = dpif_netdev_flow_from_nlattrs(del->key, del->key_len, &key); if (error) { return error; } ovs_mutex_lock(&dp_netdev_mutex); flow = dp_netdev_lookup_flow(dp, &key); if (flow) { if (del->stats) { get_dpif_flow_stats(flow, del->stats); } dp_netdev_free_flow(dp, flow); } else { error = ENOENT; } ovs_mutex_unlock(&dp_netdev_mutex); return error; } struct dp_netdev_flow_state { uint32_t bucket; uint32_t offset; struct nlattr *actions; struct odputil_keybuf keybuf; struct dpif_flow_stats stats; }; static int dpif_netdev_flow_dump_start(const struct dpif *dpif OVS_UNUSED, void **statep) { struct dp_netdev_flow_state *state; *statep = state = xmalloc(sizeof *state); state->bucket = 0; state->offset = 0; state->actions = NULL; return 0; } static int dpif_netdev_flow_dump_next(const struct dpif *dpif, void *state_, const struct nlattr **key, size_t *key_len, const struct nlattr **mask, size_t *mask_len, const struct nlattr **actions, size_t *actions_len, const struct dpif_flow_stats **stats) { struct dp_netdev_flow_state *state = state_; struct dp_netdev *dp = get_dp_netdev(dpif); struct dp_netdev_flow *flow; struct hmap_node *node; ovs_mutex_lock(&dp_netdev_mutex); node = hmap_at_position(&dp->flow_table, &state->bucket, &state->offset); if (!node) { ovs_mutex_unlock(&dp_netdev_mutex); return EOF; } flow = CONTAINER_OF(node, struct dp_netdev_flow, node); if (key) { struct ofpbuf buf; ofpbuf_use_stack(&buf, &state->keybuf, sizeof state->keybuf); odp_flow_key_from_flow(&buf, &flow->key, flow->key.in_port.odp_port); *key = buf.data; *key_len = buf.size; } if (mask) { *mask = NULL; *mask_len = 0; } if (actions) { free(state->actions); state->actions = xmemdup(flow->actions, flow->actions_len); *actions = state->actions; *actions_len = flow->actions_len; } if (stats) { get_dpif_flow_stats(flow, &state->stats); *stats = &state->stats; } ovs_mutex_unlock(&dp_netdev_mutex); return 0; } static int dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) { struct dp_netdev_flow_state *state = state_; free(state->actions); free(state); return 0; } static int dpif_netdev_execute(struct dpif *dpif, const struct dpif_execute *execute) { struct dp_netdev *dp = get_dp_netdev(dpif); struct ofpbuf copy; struct flow key; int error; if (execute->packet->size < ETH_HEADER_LEN || execute->packet->size > UINT16_MAX) { return EINVAL; } /* Make a deep copy of 'packet', because we might modify its data. */ ofpbuf_init(©, DP_NETDEV_HEADROOM + execute->packet->size); ofpbuf_reserve(©, DP_NETDEV_HEADROOM); ofpbuf_put(©, execute->packet->data, execute->packet->size); flow_extract(©, 0, 0, NULL, NULL, &key); error = dpif_netdev_flow_from_nlattrs(execute->key, execute->key_len, &key); if (!error) { ovs_mutex_lock(&dp_netdev_mutex); dp_netdev_execute_actions(dp, ©, &key, execute->actions, execute->actions_len); ovs_mutex_unlock(&dp_netdev_mutex); } ofpbuf_uninit(©); return error; } static int dpif_netdev_recv_set(struct dpif *dpif OVS_UNUSED, bool enable OVS_UNUSED) { return 0; } static int dpif_netdev_queue_to_priority(const struct dpif *dpif OVS_UNUSED, uint32_t queue_id, uint32_t *priority) { *priority = queue_id; return 0; } static struct dp_netdev_queue * find_nonempty_queue(struct dpif *dpif) { struct dp_netdev *dp = get_dp_netdev(dpif); int i; for (i = 0; i < N_QUEUES; i++) { struct dp_netdev_queue *q = &dp->queues[i]; if (q->head != q->tail) { return q; } } return NULL; } static int dpif_netdev_recv(struct dpif *dpif, struct dpif_upcall *upcall, struct ofpbuf *buf) { struct dp_netdev_queue *q; int error; ovs_mutex_lock(&dp_netdev_mutex); q = find_nonempty_queue(dpif); if (q) { struct dp_netdev_upcall *u = &q->upcalls[q->tail++ & QUEUE_MASK]; *upcall = u->upcall; upcall->packet = buf; ofpbuf_uninit(buf); *buf = u->buf; error = 0; } else { error = EAGAIN; } ovs_mutex_unlock(&dp_netdev_mutex); return error; } static void dpif_netdev_recv_wait(struct dpif *dpif) { struct dp_netdev *dp = get_dp_netdev(dpif); uint64_t seq; ovs_mutex_lock(&dp_netdev_mutex); seq = seq_read(dp->queue_seq); if (find_nonempty_queue(dpif)) { poll_immediate_wake(); } else { seq_wait(dp->queue_seq, seq); } ovs_mutex_unlock(&dp_netdev_mutex); } static void dpif_netdev_recv_purge(struct dpif *dpif) { struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif); ovs_mutex_lock(&dp_netdev_mutex); dp_netdev_purge_queues(dpif_netdev->dp); ovs_mutex_unlock(&dp_netdev_mutex); } static void dp_netdev_flow_used(struct dp_netdev_flow *flow, const struct ofpbuf *packet) { flow->used = time_msec(); flow->packet_count++; flow->byte_count += packet->size; flow->tcp_flags |= packet_get_tcp_flags(packet, &flow->key); } static void dp_netdev_port_input(struct dp_netdev *dp, struct dp_netdev_port *port, struct ofpbuf *packet, uint32_t skb_priority, uint32_t pkt_mark, const struct flow_tnl *tnl) { struct dp_netdev_flow *flow; struct flow key; union flow_in_port in_port_; if (packet->size < ETH_HEADER_LEN) { return; } in_port_.odp_port = port->port_no; flow_extract(packet, skb_priority, pkt_mark, tnl, &in_port_, &key); flow = dp_netdev_lookup_flow(dp, &key); if (flow) { dp_netdev_flow_used(flow, packet); dp_netdev_execute_actions(dp, packet, &key, flow->actions, flow->actions_len); dp->n_hit++; } else { dp->n_missed++; dp_netdev_output_userspace(dp, packet, DPIF_UC_MISS, &key, NULL); } } static void dpif_netdev_run(struct dpif *dpif) { struct dp_netdev_port *port; struct dp_netdev *dp; struct ofpbuf packet; ovs_mutex_lock(&dp_netdev_mutex); dp = get_dp_netdev(dpif); ofpbuf_init(&packet, DP_NETDEV_HEADROOM + VLAN_ETH_HEADER_LEN + dp->max_mtu); LIST_FOR_EACH (port, node, &dp->port_list) { int error; /* Reset packet contents. */ ofpbuf_clear(&packet); ofpbuf_reserve(&packet, DP_NETDEV_HEADROOM); error = port->rx ? netdev_rx_recv(port->rx, &packet) : EOPNOTSUPP; if (!error) { dp_netdev_port_input(dp, port, &packet, 0, 0, NULL); } else if (error != EAGAIN && error != EOPNOTSUPP) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_ERR_RL(&rl, "error receiving data from %s: %s", netdev_get_name(port->netdev), ovs_strerror(error)); } } ofpbuf_uninit(&packet); ovs_mutex_unlock(&dp_netdev_mutex); } static void dpif_netdev_wait(struct dpif *dpif) { struct dp_netdev_port *port; /* There is a race here, if thread A calls dpif_netdev_wait(dpif) and * thread B calls dpif_port_add(dpif) or dpif_port_remove(dpif) before * A makes it to poll_block(). * * But I think it doesn't matter: * * - In the dpif_port_add() case, A will not wake up when a packet * arrives on the new port, but this would also happen if the * ordering were reversed. * * - In the dpif_port_remove() case, A might wake up spuriously, but * that is harmless. */ ovs_mutex_lock(&dp_netdev_mutex); LIST_FOR_EACH (port, node, &get_dp_netdev(dpif)->port_list) { if (port->rx) { netdev_rx_wait(port->rx); } } ovs_mutex_unlock(&dp_netdev_mutex); } static void dp_netdev_output_port(void *dp_, struct ofpbuf *packet, uint32_t out_port) { struct dp_netdev *dp = dp_; struct dp_netdev_port *p = dp->ports[out_port]; if (p) { netdev_send(p->netdev, packet); } } static int dp_netdev_output_userspace(struct dp_netdev *dp, const struct ofpbuf *packet, int queue_no, const struct flow *flow, const struct nlattr *userdata) { struct dp_netdev_queue *q = &dp->queues[queue_no]; if (q->head - q->tail < MAX_QUEUE_LEN) { struct dp_netdev_upcall *u = &q->upcalls[q->head++ & QUEUE_MASK]; struct dpif_upcall *upcall = &u->upcall; struct ofpbuf *buf = &u->buf; size_t buf_size; upcall->type = queue_no; /* Allocate buffer big enough for everything. */ buf_size = ODPUTIL_FLOW_KEY_BYTES + 2 + packet->size; if (userdata) { buf_size += NLA_ALIGN(userdata->nla_len); } ofpbuf_init(buf, buf_size); /* Put ODP flow. */ odp_flow_key_from_flow(buf, flow, flow->in_port.odp_port); upcall->key = buf->data; upcall->key_len = buf->size; /* Put userdata. */ if (userdata) { upcall->userdata = ofpbuf_put(buf, userdata, NLA_ALIGN(userdata->nla_len)); } /* Put packet. * * We adjust 'data' and 'size' in 'buf' so that only the packet itself * is visible in 'upcall->packet'. The ODP flow and (if present) * userdata become part of the headroom. */ ofpbuf_put_zeros(buf, 2); buf->data = ofpbuf_put(buf, packet->data, packet->size); buf->size = packet->size; upcall->packet = buf; seq_change(dp->queue_seq); return 0; } else { dp->n_lost++; return ENOBUFS; } } static void dp_netdev_action_userspace(void *dp, struct ofpbuf *packet, const struct flow *key, const struct nlattr *userdata) { dp_netdev_output_userspace(dp, packet, DPIF_UC_ACTION, key, userdata); } static void dp_netdev_execute_actions(struct dp_netdev *dp, struct ofpbuf *packet, struct flow *key, const struct nlattr *actions, size_t actions_len) { odp_execute_actions(dp, packet, key, actions, actions_len, dp_netdev_output_port, dp_netdev_action_userspace); } const struct dpif_class dpif_netdev_class = { "netdev", dpif_netdev_enumerate, dpif_netdev_port_open_type, dpif_netdev_open, dpif_netdev_close, dpif_netdev_destroy, dpif_netdev_run, dpif_netdev_wait, dpif_netdev_get_stats, dpif_netdev_port_add, dpif_netdev_port_del, dpif_netdev_port_query_by_number, dpif_netdev_port_query_by_name, dpif_netdev_get_max_ports, NULL, /* port_get_pid */ dpif_netdev_port_dump_start, dpif_netdev_port_dump_next, dpif_netdev_port_dump_done, dpif_netdev_port_poll, dpif_netdev_port_poll_wait, dpif_netdev_flow_get, dpif_netdev_flow_put, dpif_netdev_flow_del, dpif_netdev_flow_flush, dpif_netdev_flow_dump_start, dpif_netdev_flow_dump_next, dpif_netdev_flow_dump_done, dpif_netdev_execute, NULL, /* operate */ dpif_netdev_recv_set, dpif_netdev_queue_to_priority, dpif_netdev_recv, dpif_netdev_recv_wait, dpif_netdev_recv_purge, }; static void dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) { struct dp_netdev_port *port; struct dp_netdev *dp; int port_no; dp = shash_find_data(&dp_netdevs, argv[1]); if (!dp || !dpif_netdev_class_is_dummy(dp->class)) { unixctl_command_reply_error(conn, "unknown datapath or not a dummy"); return; } if (get_port_by_name(dp, argv[2], &port)) { unixctl_command_reply_error(conn, "unknown port"); return; } port_no = atoi(argv[3]); if (port_no <= 0 || port_no >= MAX_PORTS) { unixctl_command_reply_error(conn, "bad port number"); return; } if (dp->ports[port_no]) { unixctl_command_reply_error(conn, "port number already in use"); return; } dp->ports[odp_to_u32(port->port_no)] = NULL; dp->ports[port_no] = port; port->port_no = u32_to_odp(port_no); seq_change(dp->port_seq); unixctl_command_reply(conn, NULL); } static void dpif_dummy_register__(const char *type) { struct dpif_class *class; class = xmalloc(sizeof *class); *class = dpif_netdev_class; class->type = xstrdup(type); dp_register_provider(class); } void dpif_dummy_register(bool override) { if (override) { struct sset types; const char *type; sset_init(&types); dp_enumerate_types(&types); SSET_FOR_EACH (type, &types) { if (!dp_unregister_provider(type)) { dpif_dummy_register__(type); } } sset_destroy(&types); } dpif_dummy_register__("dummy"); unixctl_command_register("dpif-dummy/change-port-number", "DP PORT NEW-NUMBER", 3, 3, dpif_dummy_change_port_number, NULL); } openvswitch-2.0.1+git20140120/lib/dpif-provider.h000066400000000000000000000416731226605124000211560ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef DPIF_PROVIDER_H #define DPIF_PROVIDER_H 1 /* Provider interface to dpifs, which provide an interface to an Open vSwitch * datapath. A datapath is a collection of physical or virtual ports that are * exposed over OpenFlow as a single switch. Datapaths and the collections of * ports that they contain may be fixed or dynamic. */ #include "openflow/openflow.h" #include "dpif.h" #include "util.h" #ifdef __cplusplus extern "C" { #endif /* Open vSwitch datapath interface. * * This structure should be treated as opaque by dpif implementations. */ struct dpif { const struct dpif_class *dpif_class; char *base_name; char *full_name; uint8_t netflow_engine_type; uint8_t netflow_engine_id; }; void dpif_init(struct dpif *, const struct dpif_class *, const char *name, uint8_t netflow_engine_type, uint8_t netflow_engine_id); void dpif_uninit(struct dpif *dpif, bool close); static inline void dpif_assert_class(const struct dpif *dpif, const struct dpif_class *dpif_class) { ovs_assert(dpif->dpif_class == dpif_class); } /* Datapath interface class structure, to be defined by each implementation of * a datapath interface. * * These functions return 0 if successful or a positive errno value on failure, * except where otherwise noted. * * These functions are expected to execute synchronously, that is, to block as * necessary to obtain a result. Thus, they may not return EAGAIN or * EWOULDBLOCK or EINPROGRESS. We may relax this requirement in the future if * and when we encounter performance problems. */ struct dpif_class { /* Type of dpif in this class, e.g. "system", "netdev", etc. * * One of the providers should supply a "system" type, since this is * the type assumed if no type is specified when opening a dpif. */ const char *type; /* Enumerates the names of all known created datapaths, if possible, into * 'all_dps'. The caller has already initialized 'all_dps' and other dpif * classes might already have added names to it. * * This is used by the vswitch at startup, so that it can delete any * datapaths that are not configured. * * Some kinds of datapaths might not be practically enumerable, in which * case this function may be a null pointer. */ int (*enumerate)(struct sset *all_dps); /* Returns the type to pass to netdev_open() when a dpif of class * 'dpif_class' has a port of type 'type', for a few special cases * when a netdev type differs from a port type. For example, when * using the userspace datapath, a port of type "internal" needs to * be opened as "tap". * * Returns either 'type' itself or a string literal, which must not * be freed. */ const char *(*port_open_type)(const struct dpif_class *dpif_class, const char *type); /* Attempts to open an existing dpif called 'name', if 'create' is false, * or to open an existing dpif or create a new one, if 'create' is true. * * 'dpif_class' is the class of dpif to open. * * If successful, stores a pointer to the new dpif in '*dpifp', which must * have class 'dpif_class'. On failure there are no requirements on what * is stored in '*dpifp'. */ int (*open)(const struct dpif_class *dpif_class, const char *name, bool create, struct dpif **dpifp); /* Closes 'dpif' and frees associated memory. */ void (*close)(struct dpif *dpif); /* Attempts to destroy the dpif underlying 'dpif'. * * If successful, 'dpif' will not be used again except as an argument for * the 'close' member function. */ int (*destroy)(struct dpif *dpif); /* Performs periodic work needed by 'dpif', if any is necessary. */ void (*run)(struct dpif *dpif); /* Arranges for poll_block() to wake up if the "run" member function needs * to be called for 'dpif'. */ void (*wait)(struct dpif *dpif); /* Retrieves statistics for 'dpif' into 'stats'. */ int (*get_stats)(const struct dpif *dpif, struct dpif_dp_stats *stats); /* Adds 'netdev' as a new port in 'dpif'. If '*port_no' is not * UINT32_MAX, attempts to use that as the port's port number. * * If port is successfully added, sets '*port_no' to the new port's * port number. Returns EBUSY if caller attempted to choose a port * number, and it was in use. */ int (*port_add)(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_no); /* Removes port numbered 'port_no' from 'dpif'. */ int (*port_del)(struct dpif *dpif, odp_port_t port_no); /* Queries 'dpif' for a port with the given 'port_no' or 'devname'. * If 'port' is not null, stores information about the port into * '*port' if successful. * * If 'port' is not null, the caller takes ownership of data in * 'port' and must free it with dpif_port_destroy() when it is no * longer needed. */ int (*port_query_by_number)(const struct dpif *dpif, odp_port_t port_no, struct dpif_port *port); int (*port_query_by_name)(const struct dpif *dpif, const char *devname, struct dpif_port *port); /* Returns one greater than the largest port number accepted in flow * actions. */ uint32_t (*get_max_ports)(const struct dpif *dpif); /* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE * actions as the OVS_USERSPACE_ATTR_PID attribute's value, for use in * flows whose packets arrived on port 'port_no'. * * A 'port_no' of UINT32_MAX should be treated as a special case. The * implementation should return a reserved PID, not allocated to any port, * that the client may use for special purposes. * * The return value only needs to be meaningful when DPIF_UC_ACTION has * been enabled in the 'dpif''s listen mask, and it is allowed to change * when DPIF_UC_ACTION is disabled and then re-enabled. * * A dpif provider that doesn't have meaningful Netlink PIDs can use NULL * for this function. This is equivalent to always returning 0. */ uint32_t (*port_get_pid)(const struct dpif *dpif, odp_port_t port_no); /* Attempts to begin dumping the ports in a dpif. On success, returns 0 * and initializes '*statep' with any data needed for iteration. On * failure, returns a positive errno value. */ int (*port_dump_start)(const struct dpif *dpif, void **statep); /* Attempts to retrieve another port from 'dpif' for 'state', which was * initialized by a successful call to the 'port_dump_start' function for * 'dpif'. On success, stores a new dpif_port into 'port' and returns 0. * Returns EOF if the end of the port table has been reached, or a positive * errno value on error. This function will not be called again once it * returns nonzero once for a given iteration (but the 'port_dump_done' * function will be called afterward). * * The dpif provider retains ownership of the data stored in 'port'. It * must remain valid until at least the next call to 'port_dump_next' or * 'port_dump_done' for 'state'. */ int (*port_dump_next)(const struct dpif *dpif, void *state, struct dpif_port *port); /* Releases resources from 'dpif' for 'state', which was initialized by a * successful call to the 'port_dump_start' function for 'dpif'. */ int (*port_dump_done)(const struct dpif *dpif, void *state); /* Polls for changes in the set of ports in 'dpif'. If the set of ports in * 'dpif' has changed, then this function should do one of the * following: * * - Preferably: store the name of the device that was added to or deleted * from 'dpif' in '*devnamep' and return 0. The caller is responsible * for freeing '*devnamep' (with free()) when it no longer needs it. * * - Alternatively: return ENOBUFS, without indicating the device that was * added or deleted. * * Occasional 'false positives', in which the function returns 0 while * indicating a device that was not actually added or deleted or returns * ENOBUFS without any change, are acceptable. * * If the set of ports in 'dpif' has not changed, returns EAGAIN. May also * return other positive errno values to indicate that something has gone * wrong. */ int (*port_poll)(const struct dpif *dpif, char **devnamep); /* Arranges for the poll loop to wake up when 'port_poll' will return a * value other than EAGAIN. */ void (*port_poll_wait)(const struct dpif *dpif); /* Queries 'dpif' for a flow entry. The flow is specified by the Netlink * attributes with types OVS_KEY_ATTR_* in the 'key_len' bytes starting at * 'key'. * * Returns 0 if successful. If no flow matches, returns ENOENT. On other * failure, returns a positive errno value. * * If 'actionsp' is nonnull, then on success '*actionsp' must be set to an * ofpbuf owned by the caller that contains the Netlink attributes for the * flow's actions. The caller must free the ofpbuf (with ofpbuf_delete()) * when it is no longer needed. * * If 'stats' is nonnull, then on success it must be updated with the * flow's statistics. */ int (*flow_get)(const struct dpif *dpif, const struct nlattr *key, size_t key_len, struct ofpbuf **actionsp, struct dpif_flow_stats *stats); /* Adds or modifies a flow in 'dpif'. The flow is specified by the Netlink * attributes with types OVS_KEY_ATTR_* in the 'put->key_len' bytes * starting at 'put->key'. The associated actions are specified by the * Netlink attributes with types OVS_ACTION_ATTR_* in the * 'put->actions_len' bytes starting at 'put->actions'. * * - If the flow's key does not exist in 'dpif', then the flow will be * added if 'put->flags' includes DPIF_FP_CREATE. Otherwise the * operation will fail with ENOENT. * * If the operation succeeds, then 'put->stats', if nonnull, must be * zeroed. * * - If the flow's key does exist in 'dpif', then the flow's actions will * be updated if 'put->flags' includes DPIF_FP_MODIFY. Otherwise the * operation will fail with EEXIST. If the flow's actions are updated, * then its statistics will be zeroed if 'put->flags' includes * DPIF_FP_ZERO_STATS, and left as-is otherwise. * * If the operation succeeds, then 'put->stats', if nonnull, must be set * to the flow's statistics before the update. */ int (*flow_put)(struct dpif *dpif, const struct dpif_flow_put *put); /* Deletes a flow from 'dpif' and returns 0, or returns ENOENT if 'dpif' * does not contain such a flow. The flow is specified by the Netlink * attributes with types OVS_KEY_ATTR_* in the 'del->key_len' bytes * starting at 'del->key'. * * If the operation succeeds, then 'del->stats', if nonnull, must be set to * the flow's statistics before its deletion. */ int (*flow_del)(struct dpif *dpif, const struct dpif_flow_del *del); /* Deletes all flows from 'dpif' and clears all of its queues of received * packets. */ int (*flow_flush)(struct dpif *dpif); /* Attempts to begin dumping the flows in a dpif. On success, returns 0 * and initializes '*statep' with any data needed for iteration. On * failure, returns a positive errno value. */ int (*flow_dump_start)(const struct dpif *dpif, void **statep); /* Attempts to retrieve another flow from 'dpif' for 'state', which was * initialized by a successful call to the 'flow_dump_start' function for * 'dpif'. On success, updates the output parameters as described below * and returns 0. Returns EOF if the end of the flow table has been * reached, or a positive errno value on error. This function will not be * called again once it returns nonzero within a given iteration (but the * 'flow_dump_done' function will be called afterward). * * On success: * * - If 'key' and 'key_len' are nonnull, then '*key' and '*key_len' * must be set to Netlink attributes with types OVS_KEY_ATTR_* * representing the dumped flow's key. * * - If 'mask' and 'mask_len' are nonnull then '*mask' and '*mask_len' * must be set to Netlink attributes with types of OVS_KEY_ATTR_* * representing the dumped flow's mask. * * - If 'actions' and 'actions_len' are nonnull then they should be set * to Netlink attributes with types OVS_ACTION_ATTR_* representing * the dumped flow's actions. * * - If 'stats' is nonnull then it should be set to the dumped flow's * statistics. * * All of the returned data is owned by 'dpif', not by the caller, and the * caller must not modify or free it. 'dpif' must guarantee that it * remains accessible and unchanging until at least the next call to * 'flow_dump_next' or 'flow_dump_done' for 'state'. */ int (*flow_dump_next)(const struct dpif *dpif, void *state, const struct nlattr **key, size_t *key_len, const struct nlattr **mask, size_t *mask_len, const struct nlattr **actions, size_t *actions_len, const struct dpif_flow_stats **stats); /* Releases resources from 'dpif' for 'state', which was initialized by a * successful call to the 'flow_dump_start' function for 'dpif'. */ int (*flow_dump_done)(const struct dpif *dpif, void *state); /* Performs the 'execute->actions_len' bytes of actions in * 'execute->actions' on the Ethernet frame specified in 'execute->packet' * taken from the flow specified in the 'execute->key_len' bytes of * 'execute->key'. ('execute->key' is mostly redundant with * 'execute->packet', but it contains some metadata that cannot be * recovered from 'execute->packet', such as tunnel and in_port.) */ int (*execute)(struct dpif *dpif, const struct dpif_execute *execute); /* Executes each of the 'n_ops' operations in 'ops' on 'dpif', in the order * in which they are specified, placing each operation's results in the * "output" members documented in comments. * * This function is optional. It is only worthwhile to implement it if * 'dpif' can perform operations in batch faster than individually. */ void (*operate)(struct dpif *dpif, struct dpif_op **ops, size_t n_ops); /* Enables or disables receiving packets with dpif_recv() for 'dpif'. * Turning packet receive off and then back on is allowed to change Netlink * PID assignments (see ->port_get_pid()). The client is responsible for * updating flows as necessary if it does this. */ int (*recv_set)(struct dpif *dpif, bool enable); /* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a * priority value used for setting packet priority. */ int (*queue_to_priority)(const struct dpif *dpif, uint32_t queue_id, uint32_t *priority); /* Polls for an upcall from 'dpif'. If successful, stores the upcall into * '*upcall', using 'buf' for storage. Should only be called if 'recv_set' * has been used to enable receiving packets from 'dpif'. * * The implementation should point 'upcall->packet' and 'upcall->key' into * data in the caller-provided 'buf'. If necessary to make room, the * implementation may expand the data in 'buf'. (This is hardly a great * way to do things but it works out OK for the dpif providers that exist * so far.) * * This function must not block. If no upcall is pending when it is * called, it should return EAGAIN without blocking. */ int (*recv)(struct dpif *dpif, struct dpif_upcall *upcall, struct ofpbuf *buf); /* Arranges for the poll loop to wake up when 'dpif' has a message queued * to be received with the recv member function. */ void (*recv_wait)(struct dpif *dpif); /* Throws away any queued upcalls that 'dpif' currently has ready to * return. */ void (*recv_purge)(struct dpif *dpif); }; extern const struct dpif_class dpif_linux_class; extern const struct dpif_class dpif_netdev_class; #ifdef __cplusplus } #endif #endif /* dpif-provider.h */ openvswitch-2.0.1+git20140120/lib/dpif.c000066400000000000000000001311471226605124000173150ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "dpif-provider.h" #include #include #include #include #include #include "coverage.h" #include "dynamic-string.h" #include "flow.h" #include "netdev.h" #include "netlink.h" #include "odp-util.h" #include "ofp-errors.h" #include "ofp-print.h" #include "ofp-util.h" #include "ofpbuf.h" #include "packets.h" #include "poll-loop.h" #include "shash.h" #include "sset.h" #include "timeval.h" #include "util.h" #include "valgrind.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(dpif); COVERAGE_DEFINE(dpif_destroy); COVERAGE_DEFINE(dpif_port_add); COVERAGE_DEFINE(dpif_port_del); COVERAGE_DEFINE(dpif_flow_flush); COVERAGE_DEFINE(dpif_flow_get); COVERAGE_DEFINE(dpif_flow_put); COVERAGE_DEFINE(dpif_flow_del); COVERAGE_DEFINE(dpif_flow_query_list); COVERAGE_DEFINE(dpif_flow_query_list_n); COVERAGE_DEFINE(dpif_execute); COVERAGE_DEFINE(dpif_purge); static const struct dpif_class *base_dpif_classes[] = { #ifdef LINUX_DATAPATH &dpif_linux_class, #endif &dpif_netdev_class, }; struct registered_dpif_class { const struct dpif_class *dpif_class; int refcount; }; static struct shash dpif_classes = SHASH_INITIALIZER(&dpif_classes); static struct sset dpif_blacklist = SSET_INITIALIZER(&dpif_blacklist); /* Protects 'dpif_classes', including the refcount, and 'dpif_blacklist'. */ static struct ovs_mutex dpif_mutex = OVS_MUTEX_INITIALIZER; /* Rate limit for individual messages going to or from the datapath, output at * DBG level. This is very high because, if these are enabled, it is because * we really need to see them. */ static struct vlog_rate_limit dpmsg_rl = VLOG_RATE_LIMIT_INIT(600, 600); /* Not really much point in logging many dpif errors. */ static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5); static void log_flow_message(const struct dpif *dpif, int error, const char *operation, const struct nlattr *key, size_t key_len, const struct nlattr *mask, size_t mask_len, const struct dpif_flow_stats *stats, const struct nlattr *actions, size_t actions_len); static void log_operation(const struct dpif *, const char *operation, int error); static bool should_log_flow_message(int error); static void log_flow_put_message(struct dpif *, const struct dpif_flow_put *, int error); static void log_flow_del_message(struct dpif *, const struct dpif_flow_del *, int error); static void log_execute_message(struct dpif *, const struct dpif_execute *, int error); static void dp_initialize(void) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; if (ovsthread_once_start(&once)) { int i; for (i = 0; i < ARRAY_SIZE(base_dpif_classes); i++) { dp_register_provider(base_dpif_classes[i]); } ovsthread_once_done(&once); } } static int dp_register_provider__(const struct dpif_class *new_class) { struct registered_dpif_class *registered_class; if (sset_contains(&dpif_blacklist, new_class->type)) { VLOG_DBG("attempted to register blacklisted provider: %s", new_class->type); return EINVAL; } if (shash_find(&dpif_classes, new_class->type)) { VLOG_WARN("attempted to register duplicate datapath provider: %s", new_class->type); return EEXIST; } registered_class = xmalloc(sizeof *registered_class); registered_class->dpif_class = new_class; registered_class->refcount = 0; shash_add(&dpif_classes, new_class->type, registered_class); return 0; } /* Registers a new datapath provider. After successful registration, new * datapaths of that type can be opened using dpif_open(). */ int dp_register_provider(const struct dpif_class *new_class) { int error; ovs_mutex_lock(&dpif_mutex); error = dp_register_provider__(new_class); ovs_mutex_unlock(&dpif_mutex); return error; } /* Unregisters a datapath provider. 'type' must have been previously * registered and not currently be in use by any dpifs. After unregistration * new datapaths of that type cannot be opened using dpif_open(). */ static int dp_unregister_provider__(const char *type) { struct shash_node *node; struct registered_dpif_class *registered_class; node = shash_find(&dpif_classes, type); if (!node) { VLOG_WARN("attempted to unregister a datapath provider that is not " "registered: %s", type); return EAFNOSUPPORT; } registered_class = node->data; if (registered_class->refcount) { VLOG_WARN("attempted to unregister in use datapath provider: %s", type); return EBUSY; } shash_delete(&dpif_classes, node); free(registered_class); return 0; } /* Unregisters a datapath provider. 'type' must have been previously * registered and not currently be in use by any dpifs. After unregistration * new datapaths of that type cannot be opened using dpif_open(). */ int dp_unregister_provider(const char *type) { int error; dp_initialize(); ovs_mutex_lock(&dpif_mutex); error = dp_unregister_provider__(type); ovs_mutex_unlock(&dpif_mutex); return error; } /* Blacklists a provider. Causes future calls of dp_register_provider() with * a dpif_class which implements 'type' to fail. */ void dp_blacklist_provider(const char *type) { ovs_mutex_lock(&dpif_mutex); sset_add(&dpif_blacklist, type); ovs_mutex_unlock(&dpif_mutex); } /* Clears 'types' and enumerates the types of all currently registered datapath * providers into it. The caller must first initialize the sset. */ void dp_enumerate_types(struct sset *types) { struct shash_node *node; dp_initialize(); sset_clear(types); ovs_mutex_lock(&dpif_mutex); SHASH_FOR_EACH(node, &dpif_classes) { const struct registered_dpif_class *registered_class = node->data; sset_add(types, registered_class->dpif_class->type); } ovs_mutex_unlock(&dpif_mutex); } static void dp_class_unref(struct registered_dpif_class *rc) { ovs_mutex_lock(&dpif_mutex); ovs_assert(rc->refcount); rc->refcount--; ovs_mutex_unlock(&dpif_mutex); } static struct registered_dpif_class * dp_class_lookup(const char *type) { struct registered_dpif_class *rc; ovs_mutex_lock(&dpif_mutex); rc = shash_find_data(&dpif_classes, type); if (rc) { rc->refcount++; } ovs_mutex_unlock(&dpif_mutex); return rc; } /* Clears 'names' and enumerates the names of all known created datapaths with * the given 'type'. The caller must first initialize the sset. Returns 0 if * successful, otherwise a positive errno value. * * Some kinds of datapaths might not be practically enumerable. This is not * considered an error. */ int dp_enumerate_names(const char *type, struct sset *names) { struct registered_dpif_class *registered_class; const struct dpif_class *dpif_class; int error; dp_initialize(); sset_clear(names); registered_class = dp_class_lookup(type); if (!registered_class) { VLOG_WARN("could not enumerate unknown type: %s", type); return EAFNOSUPPORT; } dpif_class = registered_class->dpif_class; error = dpif_class->enumerate ? dpif_class->enumerate(names) : 0; if (error) { VLOG_WARN("failed to enumerate %s datapaths: %s", dpif_class->type, ovs_strerror(error)); } dp_class_unref(registered_class); return error; } /* Parses 'datapath_name_', which is of the form [type@]name into its * component pieces. 'name' and 'type' must be freed by the caller. * * The returned 'type' is normalized, as if by dpif_normalize_type(). */ void dp_parse_name(const char *datapath_name_, char **name, char **type) { char *datapath_name = xstrdup(datapath_name_); char *separator; separator = strchr(datapath_name, '@'); if (separator) { *separator = '\0'; *type = datapath_name; *name = xstrdup(dpif_normalize_type(separator + 1)); } else { *name = datapath_name; *type = xstrdup(dpif_normalize_type(NULL)); } } static int do_open(const char *name, const char *type, bool create, struct dpif **dpifp) { struct dpif *dpif = NULL; int error; struct registered_dpif_class *registered_class; dp_initialize(); type = dpif_normalize_type(type); registered_class = dp_class_lookup(type); if (!registered_class) { VLOG_WARN("could not create datapath %s of unknown type %s", name, type); error = EAFNOSUPPORT; goto exit; } error = registered_class->dpif_class->open(registered_class->dpif_class, name, create, &dpif); if (!error) { ovs_assert(dpif->dpif_class == registered_class->dpif_class); } else { dp_class_unref(registered_class); } exit: *dpifp = error ? NULL : dpif; return error; } /* Tries to open an existing datapath named 'name' and type 'type'. Will fail * if no datapath with 'name' and 'type' exists. 'type' may be either NULL or * the empty string to specify the default system type. Returns 0 if * successful, otherwise a positive errno value. On success stores a pointer * to the datapath in '*dpifp', otherwise a null pointer. */ int dpif_open(const char *name, const char *type, struct dpif **dpifp) { return do_open(name, type, false, dpifp); } /* Tries to create and open a new datapath with the given 'name' and 'type'. * 'type' may be either NULL or the empty string to specify the default system * type. Will fail if a datapath with 'name' and 'type' already exists. * Returns 0 if successful, otherwise a positive errno value. On success * stores a pointer to the datapath in '*dpifp', otherwise a null pointer. */ int dpif_create(const char *name, const char *type, struct dpif **dpifp) { return do_open(name, type, true, dpifp); } /* Tries to open a datapath with the given 'name' and 'type', creating it if it * does not exist. 'type' may be either NULL or the empty string to specify * the default system type. Returns 0 if successful, otherwise a positive * errno value. On success stores a pointer to the datapath in '*dpifp', * otherwise a null pointer. */ int dpif_create_and_open(const char *name, const char *type, struct dpif **dpifp) { int error; error = dpif_create(name, type, dpifp); if (error == EEXIST || error == EBUSY) { error = dpif_open(name, type, dpifp); if (error) { VLOG_WARN("datapath %s already exists but cannot be opened: %s", name, ovs_strerror(error)); } } else if (error) { VLOG_WARN("failed to create datapath %s: %s", name, ovs_strerror(error)); } return error; } /* Closes and frees the connection to 'dpif'. Does not destroy the datapath * itself; call dpif_delete() first, instead, if that is desirable. */ void dpif_close(struct dpif *dpif) { if (dpif) { struct registered_dpif_class *rc; rc = shash_find_data(&dpif_classes, dpif->dpif_class->type); dpif_uninit(dpif, true); dp_class_unref(rc); } } /* Performs periodic work needed by 'dpif'. */ void dpif_run(struct dpif *dpif) { if (dpif->dpif_class->run) { dpif->dpif_class->run(dpif); } } /* Arranges for poll_block() to wake up when dp_run() needs to be called for * 'dpif'. */ void dpif_wait(struct dpif *dpif) { if (dpif->dpif_class->wait) { dpif->dpif_class->wait(dpif); } } /* Returns the name of datapath 'dpif' prefixed with the type * (for use in log messages). */ const char * dpif_name(const struct dpif *dpif) { return dpif->full_name; } /* Returns the name of datapath 'dpif' without the type * (for use in device names). */ const char * dpif_base_name(const struct dpif *dpif) { return dpif->base_name; } /* Returns the type of datapath 'dpif'. */ const char * dpif_type(const struct dpif *dpif) { return dpif->dpif_class->type; } /* Returns the fully spelled out name for the given datapath 'type'. * * Normalized type string can be compared with strcmp(). Unnormalized type * string might be the same even if they have different spellings. */ const char * dpif_normalize_type(const char *type) { return type && type[0] ? type : "system"; } /* Destroys the datapath that 'dpif' is connected to, first removing all of its * ports. After calling this function, it does not make sense to pass 'dpif' * to any functions other than dpif_name() or dpif_close(). */ int dpif_delete(struct dpif *dpif) { int error; COVERAGE_INC(dpif_destroy); error = dpif->dpif_class->destroy(dpif); log_operation(dpif, "delete", error); return error; } /* Retrieves statistics for 'dpif' into 'stats'. Returns 0 if successful, * otherwise a positive errno value. */ int dpif_get_dp_stats(const struct dpif *dpif, struct dpif_dp_stats *stats) { int error = dpif->dpif_class->get_stats(dpif, stats); if (error) { memset(stats, 0, sizeof *stats); } log_operation(dpif, "get_stats", error); return error; } const char * dpif_port_open_type(const char *datapath_type, const char *port_type) { struct registered_dpif_class *rc; datapath_type = dpif_normalize_type(datapath_type); ovs_mutex_lock(&dpif_mutex); rc = shash_find_data(&dpif_classes, datapath_type); if (rc && rc->dpif_class->port_open_type) { port_type = rc->dpif_class->port_open_type(rc->dpif_class, port_type); } ovs_mutex_unlock(&dpif_mutex); return port_type; } /* Attempts to add 'netdev' as a port on 'dpif'. If 'port_nop' is * non-null and its value is not ODPP_NONE, then attempts to use the * value as the port number. * * If successful, returns 0 and sets '*port_nop' to the new port's port * number (if 'port_nop' is non-null). On failure, returns a positive * errno value and sets '*port_nop' to ODPP_NONE (if 'port_nop' is * non-null). */ int dpif_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop) { const char *netdev_name = netdev_get_name(netdev); odp_port_t port_no = ODPP_NONE; int error; COVERAGE_INC(dpif_port_add); if (port_nop) { port_no = *port_nop; } error = dpif->dpif_class->port_add(dpif, netdev, &port_no); if (!error) { VLOG_DBG_RL(&dpmsg_rl, "%s: added %s as port %"PRIu32, dpif_name(dpif), netdev_name, port_no); } else { VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s", dpif_name(dpif), netdev_name, ovs_strerror(error)); port_no = ODPP_NONE; } if (port_nop) { *port_nop = port_no; } return error; } /* Attempts to remove 'dpif''s port number 'port_no'. Returns 0 if successful, * otherwise a positive errno value. */ int dpif_port_del(struct dpif *dpif, odp_port_t port_no) { int error; COVERAGE_INC(dpif_port_del); error = dpif->dpif_class->port_del(dpif, port_no); if (!error) { VLOG_DBG_RL(&dpmsg_rl, "%s: port_del(%"PRIu32")", dpif_name(dpif), port_no); } else { log_operation(dpif, "port_del", error); } return error; } /* Makes a deep copy of 'src' into 'dst'. */ void dpif_port_clone(struct dpif_port *dst, const struct dpif_port *src) { dst->name = xstrdup(src->name); dst->type = xstrdup(src->type); dst->port_no = src->port_no; } /* Frees memory allocated to members of 'dpif_port'. * * Do not call this function on a dpif_port obtained from * dpif_port_dump_next(): that function retains ownership of the data in the * dpif_port. */ void dpif_port_destroy(struct dpif_port *dpif_port) { free(dpif_port->name); free(dpif_port->type); } /* Checks if port named 'devname' exists in 'dpif'. If so, returns * true; otherwise, returns false. */ bool dpif_port_exists(const struct dpif *dpif, const char *devname) { int error = dpif->dpif_class->port_query_by_name(dpif, devname, NULL); if (error != 0 && error != ENOENT && error != ENODEV) { VLOG_WARN_RL(&error_rl, "%s: failed to query port %s: %s", dpif_name(dpif), devname, ovs_strerror(error)); } return !error; } /* Looks up port number 'port_no' in 'dpif'. On success, returns 0 and * initializes '*port' appropriately; on failure, returns a positive errno * value. * * The caller owns the data in 'port' and must free it with * dpif_port_destroy() when it is no longer needed. */ int dpif_port_query_by_number(const struct dpif *dpif, odp_port_t port_no, struct dpif_port *port) { int error = dpif->dpif_class->port_query_by_number(dpif, port_no, port); if (!error) { VLOG_DBG_RL(&dpmsg_rl, "%s: port %"PRIu32" is device %s", dpif_name(dpif), port_no, port->name); } else { memset(port, 0, sizeof *port); VLOG_WARN_RL(&error_rl, "%s: failed to query port %"PRIu32": %s", dpif_name(dpif), port_no, ovs_strerror(error)); } return error; } /* Looks up port named 'devname' in 'dpif'. On success, returns 0 and * initializes '*port' appropriately; on failure, returns a positive errno * value. * * The caller owns the data in 'port' and must free it with * dpif_port_destroy() when it is no longer needed. */ int dpif_port_query_by_name(const struct dpif *dpif, const char *devname, struct dpif_port *port) { int error = dpif->dpif_class->port_query_by_name(dpif, devname, port); if (!error) { VLOG_DBG_RL(&dpmsg_rl, "%s: device %s is on port %"PRIu32, dpif_name(dpif), devname, port->port_no); } else { memset(port, 0, sizeof *port); /* For ENOENT or ENODEV we use DBG level because the caller is probably * interested in whether 'dpif' actually has a port 'devname', so that * it's not an issue worth logging if it doesn't. Other errors are * uncommon and more likely to indicate a real problem. */ VLOG_RL(&error_rl, error == ENOENT || error == ENODEV ? VLL_DBG : VLL_WARN, "%s: failed to query port %s: %s", dpif_name(dpif), devname, ovs_strerror(error)); } return error; } /* Returns one greater than the maximum port number accepted in flow * actions. */ uint32_t dpif_get_max_ports(const struct dpif *dpif) { return dpif->dpif_class->get_max_ports(dpif); } /* Returns the Netlink PID value to supply in OVS_ACTION_ATTR_USERSPACE actions * as the OVS_USERSPACE_ATTR_PID attribute's value, for use in flows whose * packets arrived on port 'port_no'. * * A 'port_no' of ODPP_NONE is a special case: it returns a reserved PID, not * allocated to any port, that the client may use for special purposes. * * The return value is only meaningful when DPIF_UC_ACTION has been enabled in * the 'dpif''s listen mask. It is allowed to change when DPIF_UC_ACTION is * disabled and then re-enabled, so a client that does that must be prepared to * update all of the flows that it installed that contain * OVS_ACTION_ATTR_USERSPACE actions. */ uint32_t dpif_port_get_pid(const struct dpif *dpif, odp_port_t port_no) { return (dpif->dpif_class->port_get_pid ? (dpif->dpif_class->port_get_pid)(dpif, port_no) : 0); } /* Looks up port number 'port_no' in 'dpif'. On success, returns 0 and copies * the port's name into the 'name_size' bytes in 'name', ensuring that the * result is null-terminated. On failure, returns a positive errno value and * makes 'name' the empty string. */ int dpif_port_get_name(struct dpif *dpif, odp_port_t port_no, char *name, size_t name_size) { struct dpif_port port; int error; ovs_assert(name_size > 0); error = dpif_port_query_by_number(dpif, port_no, &port); if (!error) { ovs_strlcpy(name, port.name, name_size); dpif_port_destroy(&port); } else { *name = '\0'; } return error; } /* Initializes 'dump' to begin dumping the ports in a dpif. * * This function provides no status indication. An error status for the entire * dump operation is provided when it is completed by calling * dpif_port_dump_done(). */ void dpif_port_dump_start(struct dpif_port_dump *dump, const struct dpif *dpif) { dump->dpif = dpif; dump->error = dpif->dpif_class->port_dump_start(dpif, &dump->state); log_operation(dpif, "port_dump_start", dump->error); } /* Attempts to retrieve another port from 'dump', which must have been * initialized with dpif_port_dump_start(). On success, stores a new dpif_port * into 'port' and returns true. On failure, returns false. * * Failure might indicate an actual error or merely that the last port has been * dumped. An error status for the entire dump operation is provided when it * is completed by calling dpif_port_dump_done(). * * The dpif owns the data stored in 'port'. It will remain valid until at * least the next time 'dump' is passed to dpif_port_dump_next() or * dpif_port_dump_done(). */ bool dpif_port_dump_next(struct dpif_port_dump *dump, struct dpif_port *port) { const struct dpif *dpif = dump->dpif; if (dump->error) { return false; } dump->error = dpif->dpif_class->port_dump_next(dpif, dump->state, port); if (dump->error == EOF) { VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all ports", dpif_name(dpif)); } else { log_operation(dpif, "port_dump_next", dump->error); } if (dump->error) { dpif->dpif_class->port_dump_done(dpif, dump->state); return false; } return true; } /* Completes port table dump operation 'dump', which must have been initialized * with dpif_port_dump_start(). Returns 0 if the dump operation was * error-free, otherwise a positive errno value describing the problem. */ int dpif_port_dump_done(struct dpif_port_dump *dump) { const struct dpif *dpif = dump->dpif; if (!dump->error) { dump->error = dpif->dpif_class->port_dump_done(dpif, dump->state); log_operation(dpif, "port_dump_done", dump->error); } return dump->error == EOF ? 0 : dump->error; } /* Polls for changes in the set of ports in 'dpif'. If the set of ports in * 'dpif' has changed, this function does one of the following: * * - Stores the name of the device that was added to or deleted from 'dpif' in * '*devnamep' and returns 0. The caller is responsible for freeing * '*devnamep' (with free()) when it no longer needs it. * * - Returns ENOBUFS and sets '*devnamep' to NULL. * * This function may also return 'false positives', where it returns 0 and * '*devnamep' names a device that was not actually added or deleted or it * returns ENOBUFS without any change. * * Returns EAGAIN if the set of ports in 'dpif' has not changed. May also * return other positive errno values to indicate that something has gone * wrong. */ int dpif_port_poll(const struct dpif *dpif, char **devnamep) { int error = dpif->dpif_class->port_poll(dpif, devnamep); if (error) { *devnamep = NULL; } return error; } /* Arranges for the poll loop to wake up when port_poll(dpif) will return a * value other than EAGAIN. */ void dpif_port_poll_wait(const struct dpif *dpif) { dpif->dpif_class->port_poll_wait(dpif); } /* Extracts the flow stats for a packet. The 'flow' and 'packet' * arguments must have been initialized through a call to flow_extract(). * 'used' is stored into stats->used. */ void dpif_flow_stats_extract(const struct flow *flow, const struct ofpbuf *packet, long long int used, struct dpif_flow_stats *stats) { stats->tcp_flags = packet_get_tcp_flags(packet, flow); stats->n_bytes = packet->size; stats->n_packets = 1; stats->used = used; } /* Appends a human-readable representation of 'stats' to 's'. */ void dpif_flow_stats_format(const struct dpif_flow_stats *stats, struct ds *s) { ds_put_format(s, "packets:%"PRIu64", bytes:%"PRIu64", used:", stats->n_packets, stats->n_bytes); if (stats->used) { ds_put_format(s, "%.3fs", (time_msec() - stats->used) / 1000.0); } else { ds_put_format(s, "never"); } if (stats->tcp_flags) { ds_put_cstr(s, ", flags:"); packet_format_tcp_flags(s, stats->tcp_flags); } } /* Deletes all flows from 'dpif'. Returns 0 if successful, otherwise a * positive errno value. */ int dpif_flow_flush(struct dpif *dpif) { int error; COVERAGE_INC(dpif_flow_flush); error = dpif->dpif_class->flow_flush(dpif); log_operation(dpif, "flow_flush", error); return error; } /* Queries 'dpif' for a flow entry. The flow is specified by the Netlink * attributes with types OVS_KEY_ATTR_* in the 'key_len' bytes starting at * 'key'. * * Returns 0 if successful. If no flow matches, returns ENOENT. On other * failure, returns a positive errno value. * * If 'actionsp' is nonnull, then on success '*actionsp' will be set to an * ofpbuf owned by the caller that contains the Netlink attributes for the * flow's actions. The caller must free the ofpbuf (with ofpbuf_delete()) when * it is no longer needed. * * If 'stats' is nonnull, then on success it will be updated with the flow's * statistics. */ int dpif_flow_get(const struct dpif *dpif, const struct nlattr *key, size_t key_len, struct ofpbuf **actionsp, struct dpif_flow_stats *stats) { int error; COVERAGE_INC(dpif_flow_get); error = dpif->dpif_class->flow_get(dpif, key, key_len, actionsp, stats); if (error) { if (actionsp) { *actionsp = NULL; } if (stats) { memset(stats, 0, sizeof *stats); } } if (should_log_flow_message(error)) { const struct nlattr *actions; size_t actions_len; if (!error && actionsp) { actions = (*actionsp)->data; actions_len = (*actionsp)->size; } else { actions = NULL; actions_len = 0; } log_flow_message(dpif, error, "flow_get", key, key_len, NULL, 0, stats, actions, actions_len); } return error; } static int dpif_flow_put__(struct dpif *dpif, const struct dpif_flow_put *put) { int error; COVERAGE_INC(dpif_flow_put); ovs_assert(!(put->flags & ~(DPIF_FP_CREATE | DPIF_FP_MODIFY | DPIF_FP_ZERO_STATS))); error = dpif->dpif_class->flow_put(dpif, put); if (error && put->stats) { memset(put->stats, 0, sizeof *put->stats); } log_flow_put_message(dpif, put, error); return error; } /* Adds or modifies a flow in 'dpif'. The flow is specified by the Netlink * attribute OVS_FLOW_ATTR_KEY with types OVS_KEY_ATTR_* in the 'key_len' bytes * starting at 'key', and OVS_FLOW_ATTR_MASK with types of OVS_KEY_ATTR_* in the * 'mask_len' bytes starting at 'mask'. The associated actions are specified by * the Netlink attributes with types OVS_ACTION_ATTR_* in the 'actions_len' * bytes starting at 'actions'. * * - If the flow's key does not exist in 'dpif', then the flow will be added if * 'flags' includes DPIF_FP_CREATE. Otherwise the operation will fail with * ENOENT. * * If the operation succeeds, then 'stats', if nonnull, will be zeroed. * * - If the flow's key does exist in 'dpif', then the flow's actions will be * updated if 'flags' includes DPIF_FP_MODIFY. Otherwise the operation will * fail with EEXIST. If the flow's actions are updated, then its statistics * will be zeroed if 'flags' includes DPIF_FP_ZERO_STATS, and left as-is * otherwise. * * If the operation succeeds, then 'stats', if nonnull, will be set to the * flow's statistics before the update. */ int dpif_flow_put(struct dpif *dpif, enum dpif_flow_put_flags flags, const struct nlattr *key, size_t key_len, const struct nlattr *mask, size_t mask_len, const struct nlattr *actions, size_t actions_len, struct dpif_flow_stats *stats) { struct dpif_flow_put put; put.flags = flags; put.key = key; put.key_len = key_len; put.mask = mask; put.mask_len = mask_len; put.actions = actions; put.actions_len = actions_len; put.stats = stats; return dpif_flow_put__(dpif, &put); } static int dpif_flow_del__(struct dpif *dpif, struct dpif_flow_del *del) { int error; COVERAGE_INC(dpif_flow_del); error = dpif->dpif_class->flow_del(dpif, del); if (error && del->stats) { memset(del->stats, 0, sizeof *del->stats); } log_flow_del_message(dpif, del, error); return error; } /* Deletes a flow from 'dpif' and returns 0, or returns ENOENT if 'dpif' does * not contain such a flow. The flow is specified by the Netlink attributes * with types OVS_KEY_ATTR_* in the 'key_len' bytes starting at 'key'. * * If the operation succeeds, then 'stats', if nonnull, will be set to the * flow's statistics before its deletion. */ int dpif_flow_del(struct dpif *dpif, const struct nlattr *key, size_t key_len, struct dpif_flow_stats *stats) { struct dpif_flow_del del; del.key = key; del.key_len = key_len; del.stats = stats; return dpif_flow_del__(dpif, &del); } /* Initializes 'dump' to begin dumping the flows in a dpif. * * This function provides no status indication. An error status for the entire * dump operation is provided when it is completed by calling * dpif_flow_dump_done(). */ void dpif_flow_dump_start(struct dpif_flow_dump *dump, const struct dpif *dpif) { dump->dpif = dpif; dump->error = dpif->dpif_class->flow_dump_start(dpif, &dump->state); log_operation(dpif, "flow_dump_start", dump->error); } /* Attempts to retrieve another flow from 'dump', which must have been * initialized with dpif_flow_dump_start(). On success, updates the output * parameters as described below and returns true. Otherwise, returns false. * Failure might indicate an actual error or merely the end of the flow table. * An error status for the entire dump operation is provided when it is * completed by calling dpif_flow_dump_done(). * * On success, if 'key' and 'key_len' are nonnull then '*key' and '*key_len' * will be set to Netlink attributes with types OVS_KEY_ATTR_* representing the * dumped flow's key. If 'actions' and 'actions_len' are nonnull then they are * set to Netlink attributes with types OVS_ACTION_ATTR_* representing the * dumped flow's actions. If 'stats' is nonnull then it will be set to the * dumped flow's statistics. * * All of the returned data is owned by 'dpif', not by the caller, and the * caller must not modify or free it. 'dpif' guarantees that it remains * accessible and unchanging until at least the next call to 'flow_dump_next' * or 'flow_dump_done' for 'dump'. */ bool dpif_flow_dump_next(struct dpif_flow_dump *dump, const struct nlattr **key, size_t *key_len, const struct nlattr **mask, size_t *mask_len, const struct nlattr **actions, size_t *actions_len, const struct dpif_flow_stats **stats) { const struct dpif *dpif = dump->dpif; int error = dump->error; if (!error) { error = dpif->dpif_class->flow_dump_next(dpif, dump->state, key, key_len, mask, mask_len, actions, actions_len, stats); if (error) { dpif->dpif_class->flow_dump_done(dpif, dump->state); } } if (error) { if (key) { *key = NULL; *key_len = 0; } if (mask) { *mask = NULL; *mask_len = 0; } if (actions) { *actions = NULL; *actions_len = 0; } if (stats) { *stats = NULL; } } if (!dump->error) { if (error == EOF) { VLOG_DBG_RL(&dpmsg_rl, "%s: dumped all flows", dpif_name(dpif)); } else if (should_log_flow_message(error)) { log_flow_message(dpif, error, "flow_dump", key ? *key : NULL, key ? *key_len : 0, mask ? *mask : NULL, mask ? *mask_len : 0, stats ? *stats : NULL, actions ? *actions : NULL, actions ? *actions_len : 0); } } dump->error = error; return !error; } /* Completes flow table dump operation 'dump', which must have been initialized * with dpif_flow_dump_start(). Returns 0 if the dump operation was * error-free, otherwise a positive errno value describing the problem. */ int dpif_flow_dump_done(struct dpif_flow_dump *dump) { const struct dpif *dpif = dump->dpif; if (!dump->error) { dump->error = dpif->dpif_class->flow_dump_done(dpif, dump->state); log_operation(dpif, "flow_dump_done", dump->error); } return dump->error == EOF ? 0 : dump->error; } static int dpif_execute__(struct dpif *dpif, const struct dpif_execute *execute) { int error; COVERAGE_INC(dpif_execute); if (execute->actions_len > 0) { error = dpif->dpif_class->execute(dpif, execute); } else { error = 0; } log_execute_message(dpif, execute, error); return error; } /* Causes 'dpif' to perform the 'actions_len' bytes of actions in 'actions' on * the Ethernet frame specified in 'packet' taken from the flow specified in * the 'key_len' bytes of 'key'. ('key' is mostly redundant with 'packet', but * it contains some metadata that cannot be recovered from 'packet', such as * tunnel and in_port.) * * Returns 0 if successful, otherwise a positive errno value. */ int dpif_execute(struct dpif *dpif, const struct nlattr *key, size_t key_len, const struct nlattr *actions, size_t actions_len, const struct ofpbuf *buf) { struct dpif_execute execute; execute.key = key; execute.key_len = key_len; execute.actions = actions; execute.actions_len = actions_len; execute.packet = buf; return dpif_execute__(dpif, &execute); } /* Executes each of the 'n_ops' operations in 'ops' on 'dpif', in the order in * which they are specified, placing each operation's results in the "output" * members documented in comments. * * This function exists because some datapaths can perform batched operations * faster than individual operations. */ void dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops) { size_t i; if (dpif->dpif_class->operate) { dpif->dpif_class->operate(dpif, ops, n_ops); for (i = 0; i < n_ops; i++) { struct dpif_op *op = ops[i]; switch (op->type) { case DPIF_OP_FLOW_PUT: log_flow_put_message(dpif, &op->u.flow_put, op->error); break; case DPIF_OP_FLOW_DEL: log_flow_del_message(dpif, &op->u.flow_del, op->error); break; case DPIF_OP_EXECUTE: log_execute_message(dpif, &op->u.execute, op->error); break; } } return; } for (i = 0; i < n_ops; i++) { struct dpif_op *op = ops[i]; switch (op->type) { case DPIF_OP_FLOW_PUT: op->error = dpif_flow_put__(dpif, &op->u.flow_put); break; case DPIF_OP_FLOW_DEL: op->error = dpif_flow_del__(dpif, &op->u.flow_del); break; case DPIF_OP_EXECUTE: op->error = dpif_execute__(dpif, &op->u.execute); break; default: NOT_REACHED(); } } } /* Returns a string that represents 'type', for use in log messages. */ const char * dpif_upcall_type_to_string(enum dpif_upcall_type type) { switch (type) { case DPIF_UC_MISS: return "miss"; case DPIF_UC_ACTION: return "action"; case DPIF_N_UC_TYPES: default: return ""; } } /* Enables or disables receiving packets with dpif_recv() on 'dpif'. Returns 0 * if successful, otherwise a positive errno value. * * Turning packet receive off and then back on may change the Netlink PID * assignments returned by dpif_port_get_pid(). If the client does this, it * must update all of the flows that have OVS_ACTION_ATTR_USERSPACE actions * using the new PID assignment. */ int dpif_recv_set(struct dpif *dpif, bool enable) { int error = dpif->dpif_class->recv_set(dpif, enable); log_operation(dpif, "recv_set", error); return error; } /* Polls for an upcall from 'dpif'. If successful, stores the upcall into * '*upcall', using 'buf' for storage. Should only be called if * dpif_recv_set() has been used to enable receiving packets on 'dpif'. * * 'upcall->packet' and 'upcall->key' point into data in the caller-provided * 'buf', so their memory cannot be freed separately from 'buf'. (This is * hardly a great way to do things but it works out OK for the dpif providers * and clients that exist so far.) * * Returns 0 if successful, otherwise a positive errno value. Returns EAGAIN * if no upcall is immediately available. */ int dpif_recv(struct dpif *dpif, struct dpif_upcall *upcall, struct ofpbuf *buf) { int error = dpif->dpif_class->recv(dpif, upcall, buf); if (!error && !VLOG_DROP_DBG(&dpmsg_rl)) { struct ds flow; char *packet; packet = ofp_packet_to_string(upcall->packet->data, upcall->packet->size); ds_init(&flow); odp_flow_key_format(upcall->key, upcall->key_len, &flow); VLOG_DBG("%s: %s upcall:\n%s\n%s", dpif_name(dpif), dpif_upcall_type_to_string(upcall->type), ds_cstr(&flow), packet); ds_destroy(&flow); free(packet); } else if (error && error != EAGAIN) { log_operation(dpif, "recv", error); } return error; } /* Discards all messages that would otherwise be received by dpif_recv() on * 'dpif'. */ void dpif_recv_purge(struct dpif *dpif) { COVERAGE_INC(dpif_purge); if (dpif->dpif_class->recv_purge) { dpif->dpif_class->recv_purge(dpif); } } /* Arranges for the poll loop to wake up when 'dpif' has a message queued to be * received with dpif_recv(). */ void dpif_recv_wait(struct dpif *dpif) { dpif->dpif_class->recv_wait(dpif); } /* Obtains the NetFlow engine type and engine ID for 'dpif' into '*engine_type' * and '*engine_id', respectively. */ void dpif_get_netflow_ids(const struct dpif *dpif, uint8_t *engine_type, uint8_t *engine_id) { *engine_type = dpif->netflow_engine_type; *engine_id = dpif->netflow_engine_id; } /* Translates OpenFlow queue ID 'queue_id' (in host byte order) into a priority * value used for setting packet priority. * On success, returns 0 and stores the priority into '*priority'. * On failure, returns a positive errno value and stores 0 into '*priority'. */ int dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id, uint32_t *priority) { int error = (dpif->dpif_class->queue_to_priority ? dpif->dpif_class->queue_to_priority(dpif, queue_id, priority) : EOPNOTSUPP); if (error) { *priority = 0; } log_operation(dpif, "queue_to_priority", error); return error; } void dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class, const char *name, uint8_t netflow_engine_type, uint8_t netflow_engine_id) { dpif->dpif_class = dpif_class; dpif->base_name = xstrdup(name); dpif->full_name = xasprintf("%s@%s", dpif_class->type, name); dpif->netflow_engine_type = netflow_engine_type; dpif->netflow_engine_id = netflow_engine_id; } /* Undoes the results of initialization. * * Normally this function only needs to be called from dpif_close(). * However, it may be called by providers due to an error on opening * that occurs after initialization. It this case dpif_close() would * never be called. */ void dpif_uninit(struct dpif *dpif, bool close) { char *base_name = dpif->base_name; char *full_name = dpif->full_name; if (close) { dpif->dpif_class->close(dpif); } free(base_name); free(full_name); } static void log_operation(const struct dpif *dpif, const char *operation, int error) { if (!error) { VLOG_DBG_RL(&dpmsg_rl, "%s: %s success", dpif_name(dpif), operation); } else if (ofperr_is_valid(error)) { VLOG_WARN_RL(&error_rl, "%s: %s failed (%s)", dpif_name(dpif), operation, ofperr_get_name(error)); } else { VLOG_WARN_RL(&error_rl, "%s: %s failed (%s)", dpif_name(dpif), operation, ovs_strerror(error)); } } static enum vlog_level flow_message_log_level(int error) { /* If flows arrive in a batch, userspace may push down multiple * unique flow definitions that overlap when wildcards are applied. * Kernels that support flow wildcarding will reject these flows as * duplicates (EEXIST), so lower the log level to debug for these * types of messages. */ return (error && error != EEXIST) ? VLL_WARN : VLL_DBG; } static bool should_log_flow_message(int error) { return !vlog_should_drop(THIS_MODULE, flow_message_log_level(error), error ? &error_rl : &dpmsg_rl); } static void log_flow_message(const struct dpif *dpif, int error, const char *operation, const struct nlattr *key, size_t key_len, const struct nlattr *mask, size_t mask_len, const struct dpif_flow_stats *stats, const struct nlattr *actions, size_t actions_len) { struct ds ds = DS_EMPTY_INITIALIZER; ds_put_format(&ds, "%s: ", dpif_name(dpif)); if (error) { ds_put_cstr(&ds, "failed to "); } ds_put_format(&ds, "%s ", operation); if (error) { ds_put_format(&ds, "(%s) ", ovs_strerror(error)); } odp_flow_format(key, key_len, mask, mask_len, &ds, true); if (stats) { ds_put_cstr(&ds, ", "); dpif_flow_stats_format(stats, &ds); } if (actions || actions_len) { ds_put_cstr(&ds, ", actions:"); format_odp_actions(&ds, actions, actions_len); } vlog(THIS_MODULE, flow_message_log_level(error), "%s", ds_cstr(&ds)); ds_destroy(&ds); } static void log_flow_put_message(struct dpif *dpif, const struct dpif_flow_put *put, int error) { if (should_log_flow_message(error)) { struct ds s; ds_init(&s); ds_put_cstr(&s, "put"); if (put->flags & DPIF_FP_CREATE) { ds_put_cstr(&s, "[create]"); } if (put->flags & DPIF_FP_MODIFY) { ds_put_cstr(&s, "[modify]"); } if (put->flags & DPIF_FP_ZERO_STATS) { ds_put_cstr(&s, "[zero]"); } log_flow_message(dpif, error, ds_cstr(&s), put->key, put->key_len, put->mask, put->mask_len, put->stats, put->actions, put->actions_len); ds_destroy(&s); } } static void log_flow_del_message(struct dpif *dpif, const struct dpif_flow_del *del, int error) { if (should_log_flow_message(error)) { log_flow_message(dpif, error, "flow_del", del->key, del->key_len, NULL, 0, !error ? del->stats : NULL, NULL, 0); } } static void log_execute_message(struct dpif *dpif, const struct dpif_execute *execute, int error) { if (!(error ? VLOG_DROP_WARN(&error_rl) : VLOG_DROP_DBG(&dpmsg_rl))) { struct ds ds = DS_EMPTY_INITIALIZER; char *packet; packet = ofp_packet_to_string(execute->packet->data, execute->packet->size); ds_put_format(&ds, "%s: execute ", dpif_name(dpif)); format_odp_actions(&ds, execute->actions, execute->actions_len); if (error) { ds_put_format(&ds, " failed (%s)", ovs_strerror(error)); } ds_put_format(&ds, " on packet %s", packet); vlog(THIS_MODULE, error ? VLL_WARN : VLL_DBG, "%s", ds_cstr(&ds)); ds_destroy(&ds); free(packet); } } openvswitch-2.0.1+git20140120/lib/dpif.h000066400000000000000000000607361226605124000173270ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * dpif, the DataPath InterFace. * * In Open vSwitch terminology, a "datapath" is a flow-based software switch. * A datapath has no intelligence of its own. Rather, it relies entirely on * its client to set up flows. The datapath layer is core to the Open vSwitch * software switch: one could say, without much exaggeration, that everything * in ovs-vswitchd above dpif exists only to make the correct decisions * interacting with dpif. * * Typically, the client of a datapath is the software switch module in * "ovs-vswitchd", but other clients can be written. The "ovs-dpctl" utility * is also a (simple) client. * * * Overview * ======== * * The terms written in quotes below are defined in later sections. * * When a datapath "port" receives a packet, it extracts the headers (the * "flow"). If the datapath's "flow table" contains a "flow entry" whose flow * is the same as the packet's, then it executes the "actions" in the flow * entry and increments the flow's statistics. If there is no matching flow * entry, the datapath instead appends the packet to an "upcall" queue. * * * Ports * ===== * * A datapath has a set of ports that are analogous to the ports on an Ethernet * switch. At the datapath level, each port has the following information * associated with it: * * - A name, a short string that must be unique within the host. This is * typically a name that would be familiar to the system administrator, * e.g. "eth0" or "vif1.1", but it is otherwise arbitrary. * * - A 32-bit port number that must be unique within the datapath but is * otherwise arbitrary. The port number is the most important identifier * for a port in the datapath interface. * * - A type, a short string that identifies the kind of port. On a Linux * host, typical types are "system" (for a network device such as eth0), * "internal" (for a simulated port used to connect to the TCP/IP stack), * and "gre" (for a GRE tunnel). * * - A Netlink PID (see "Upcall Queuing and Ordering" below). * * The dpif interface has functions for adding and deleting ports. When a * datapath implements these (e.g. as the Linux and netdev datapaths do), then * Open vSwitch's ovs-vswitchd daemon can directly control what ports are used * for switching. Some datapaths might not implement them, or implement them * with restrictions on the types of ports that can be added or removed * (e.g. on ESX), on systems where port membership can only be changed by some * external entity. * * Each datapath must have a port, sometimes called the "local port", whose * name is the same as the datapath itself, with port number 0. The local port * cannot be deleted. * * Ports are available as "struct netdev"s. To obtain a "struct netdev *" for * a port named 'name' with type 'port_type', in a datapath of type * 'datapath_type', call netdev_open(name, dpif_port_open_type(datapath_type, * port_type). The netdev can be used to get and set important data related to * the port, such as: * * - MTU (netdev_get_mtu(), netdev_set_mtu()). * * - Ethernet address (netdev_get_etheraddr(), netdev_set_etheraddr()). * * - Statistics such as the number of packets and bytes transmitted and * received (netdev_get_stats()). * * - Carrier status (netdev_get_carrier()). * * - Speed (netdev_get_features()). * * - QoS queue configuration (netdev_get_queue(), netdev_set_queue() and * related functions.) * * - Arbitrary port-specific configuration parameters (netdev_get_config(), * netdev_set_config()). An example of such a parameter is the IP * endpoint for a GRE tunnel. * * * Flow Table * ========== * * The flow table is a hash table of "flow entries". Each flow entry contains: * * - A "flow", that is, a summary of the headers in an Ethernet packet. The * flow is the hash key and thus must be unique within the flow table. * Flows are fine-grained entities that include L2, L3, and L4 headers. A * single TCP connection consists of two flows, one in each direction. * * In Open vSwitch userspace, "struct flow" is the typical way to describe * a flow, but the datapath interface uses a different data format to * allow ABI forward- and backward-compatibility. datapath/README * describes the rationale and design. Refer to OVS_KEY_ATTR_* and * "struct ovs_key_*" in include/linux/openvswitch.h for details. * lib/odp-util.h defines several functions for working with these flows. * * (In case you are familiar with OpenFlow, datapath flows are analogous * to OpenFlow flow matches. The most important difference is that * OpenFlow allows fields to be wildcarded and prioritized, whereas a * datapath's flow table is a hash table so every flow must be * exact-match, thus without priorities.) * * - A list of "actions" that tell the datapath what to do with packets * within a flow. Some examples of actions are OVS_ACTION_ATTR_OUTPUT, * which transmits the packet out a port, and OVS_ACTION_ATTR_SET, which * modifies packet headers. Refer to OVS_ACTION_ATTR_* and "struct * ovs_action_*" in include/linux/openvswitch.h for details. * lib/odp-util.h defines several functions for working with datapath * actions. * * The actions list may be empty. This indicates that nothing should be * done to matching packets, that is, they should be dropped. * * (In case you are familiar with OpenFlow, datapath actions are analogous * to OpenFlow actions.) * * - Statistics: the number of packets and bytes that the flow has * processed, the last time that the flow processed a packet, and the * union of all the TCP flags in packets processed by the flow. (The * latter is 0 if the flow is not a TCP flow.) * * The datapath's client manages the flow table, primarily in reaction to * "upcalls" (see below). * * * Upcalls * ======= * * A datapath sometimes needs to notify its client that a packet was received. * The datapath mechanism to do this is called an "upcall". * * Upcalls are used in two situations: * * - When a packet is received, but there is no matching flow entry in its * flow table (a flow table "miss"), this causes an upcall of type * DPIF_UC_MISS. These are called "miss" upcalls. * * - A datapath action of type OVS_ACTION_ATTR_USERSPACE causes an upcall of * type DPIF_UC_ACTION. These are called "action" upcalls. * * An upcall contains an entire packet. There is no attempt to, e.g., copy * only as much of the packet as normally needed to make a forwarding decision. * Such an optimization is doable, but experimental prototypes showed it to be * of little benefit because an upcall typically contains the first packet of a * flow, which is usually short (e.g. a TCP SYN). Also, the entire packet can * sometimes really be needed. * * After a client reads a given upcall, the datapath is finished with it, that * is, the datapath doesn't maintain any lingering state past that point. * * The latency from the time that a packet arrives at a port to the time that * it is received from dpif_recv() is critical in some benchmarks. For * example, if this latency is 1 ms, then a netperf TCP_CRR test, which opens * and closes TCP connections one at a time as quickly as it can, cannot * possibly achieve more than 500 transactions per second, since every * connection consists of two flows with 1-ms latency to set up each one. * * To receive upcalls, a client has to enable them with dpif_recv_set(). A * datapath should generally support multiple clients at once (e.g. so that one * may run "ovs-dpctl show" or "ovs-dpctl dump-flows" while "ovs-vswitchd" is * also running) but need not support multiple clients enabling upcalls at * once. * * * Upcall Queuing and Ordering * --------------------------- * * The datapath's client reads upcalls one at a time by calling dpif_recv(). * When more than one upcall is pending, the order in which the datapath * presents upcalls to its client is important. The datapath's client does not * directly control this order, so the datapath implementer must take care * during design. * * The minimal behavior, suitable for initial testing of a datapath * implementation, is that all upcalls are appended to a single queue, which is * delivered to the client in order. * * The datapath should ensure that a high rate of upcalls from one particular * port cannot cause upcalls from other sources to be dropped or unreasonably * delayed. Otherwise, one port conducting a port scan or otherwise initiating * high-rate traffic spanning many flows could suppress other traffic. * Ideally, the datapath should present upcalls from each port in a "round * robin" manner, to ensure fairness. * * The client has no control over "miss" upcalls and no insight into the * datapath's implementation, so the datapath is entirely responsible for * queuing and delivering them. On the other hand, the datapath has * considerable freedom of implementation. One good approach is to maintain a * separate queue for each port, to prevent any given port's upcalls from * interfering with other ports' upcalls. If this is impractical, then another * reasonable choice is to maintain some fixed number of queues and assign each * port to one of them. Ports assigned to the same queue can then interfere * with each other, but not with ports assigned to different queues. Other * approaches are also possible. * * The client has some control over "action" upcalls: it can specify a 32-bit * "Netlink PID" as part of the action. This terminology comes from the Linux * datapath implementation, which uses a protocol called Netlink in which a PID * designates a particular socket and the upcall data is delivered to the * socket's receive queue. Generically, though, a Netlink PID identifies a * queue for upcalls. The basic requirements on the datapath are: * * - The datapath must provide a Netlink PID associated with each port. The * client can retrieve the PID with dpif_port_get_pid(). * * - The datapath must provide a "special" Netlink PID not associated with * any port. dpif_port_get_pid() also provides this PID. (ovs-vswitchd * uses this PID to queue special packets that must not be lost even if a * port is otherwise busy, such as packets used for tunnel monitoring.) * * The minimal behavior of dpif_port_get_pid() and the treatment of the Netlink * PID in "action" upcalls is that dpif_port_get_pid() returns a constant value * and all upcalls are appended to a single queue. * * The ideal behavior is: * * - Each port has a PID that identifies the queue used for "miss" upcalls * on that port. (Thus, if each port has its own queue for "miss" * upcalls, then each port has a different Netlink PID.) * * - "miss" upcalls for a given port and "action" upcalls that specify that * port's Netlink PID add their upcalls to the same queue. The upcalls * are delivered to the datapath's client in the order that the packets * were received, regardless of whether the upcalls are "miss" or "action" * upcalls. * * - Upcalls that specify the "special" Netlink PID are queued separately. * * * Packet Format * ============= * * The datapath interface works with packets in a particular form. This is the * form taken by packets received via upcalls (i.e. by dpif_recv()). Packets * supplied to the datapath for processing (i.e. to dpif_execute()) also take * this form. * * A VLAN tag is represented by an 802.1Q header. If the layer below the * datapath interface uses another representation, then the datapath interface * must perform conversion. * * The datapath interface requires all packets to fit within the MTU. Some * operating systems internally process packets larger than MTU, with features * such as TSO and UFO. When such a packet passes through the datapath * interface, it must be broken into multiple MTU or smaller sized packets for * presentation as upcalls. (This does not happen often, because an upcall * typically contains the first packet of a flow, which is usually short.) * * Some operating system TCP/IP stacks maintain packets in an unchecksummed or * partially checksummed state until transmission. The datapath interface * requires all host-generated packets to be fully checksummed (e.g. IP and TCP * checksums must be correct). On such an OS, the datapath interface must fill * in these checksums. * * Packets passed through the datapath interface must be at least 14 bytes * long, that is, they must have a complete Ethernet header. They are not * required to be padded to the minimum Ethernet length. * * * Typical Usage * ============= * * Typically, the client of a datapath begins by configuring the datapath with * a set of ports. Afterward, the client runs in a loop polling for upcalls to * arrive. * * For each upcall received, the client examines the enclosed packet and * figures out what should be done with it. For example, if the client * implements a MAC-learning switch, then it searches the forwarding database * for the packet's destination MAC and VLAN and determines the set of ports to * which it should be sent. In any case, the client composes a set of datapath * actions to properly dispatch the packet and then directs the datapath to * execute those actions on the packet (e.g. with dpif_execute()). * * Most of the time, the actions that the client executed on the packet apply * to every packet with the same flow. For example, the flow includes both * destination MAC and VLAN ID (and much more), so this is true for the * MAC-learning switch example above. In such a case, the client can also * direct the datapath to treat any further packets in the flow in the same * way, using dpif_flow_put() to add a new flow entry. * * Other tasks the client might need to perform, in addition to reacting to * upcalls, include: * * - Periodically polling flow statistics, perhaps to supply to its own * clients. * * - Deleting flow entries from the datapath that haven't been used * recently, to save memory. * * - Updating flow entries whose actions should change. For example, if a * MAC learning switch learns that a MAC has moved, then it must update * the actions of flow entries that sent packets to the MAC at its old * location. * * - Adding and removing ports to achieve a new configuration. * * * Thread-safety * ============= * * Most of the dpif functions are fully thread-safe: they may be called from * any number of threads on the same or different dpif objects. The exceptions * are: * * - dpif_port_poll() and dpif_port_poll_wait() are conditionally * thread-safe: they may be called from different threads only on * different dpif objects. * * - Functions that operate on struct dpif_port_dump or struct * dpif_flow_dump are conditionally thread-safe with respect to those * objects. That is, one may dump ports or flows from any number of * threads at once, but each thread must use its own struct dpif_port_dump * or dpif_flow_dump. */ #ifndef DPIF_H #define DPIF_H 1 #include #include #include #include "openflow/openflow.h" #include "netdev.h" #include "util.h" #ifdef __cplusplus extern "C" { #endif struct dpif; struct ds; struct flow; struct nlattr; struct ofpbuf; struct sset; struct dpif_class; int dp_register_provider(const struct dpif_class *); int dp_unregister_provider(const char *type); void dp_blacklist_provider(const char *type); void dp_enumerate_types(struct sset *types); const char *dpif_normalize_type(const char *); int dp_enumerate_names(const char *type, struct sset *names); void dp_parse_name(const char *datapath_name, char **name, char **type); int dpif_open(const char *name, const char *type, struct dpif **); int dpif_create(const char *name, const char *type, struct dpif **); int dpif_create_and_open(const char *name, const char *type, struct dpif **); void dpif_close(struct dpif *); void dpif_run(struct dpif *); void dpif_wait(struct dpif *); const char *dpif_name(const struct dpif *); const char *dpif_base_name(const struct dpif *); const char *dpif_type(const struct dpif *); int dpif_delete(struct dpif *); /* Statistics for a dpif as a whole. */ struct dpif_dp_stats { uint64_t n_hit; /* Number of flow table matches. */ uint64_t n_missed; /* Number of flow table misses. */ uint64_t n_lost; /* Number of misses not sent to userspace. */ uint64_t n_flows; /* Number of flows present. */ }; int dpif_get_dp_stats(const struct dpif *, struct dpif_dp_stats *); /* Port operations. */ const char *dpif_port_open_type(const char *datapath_type, const char *port_type); int dpif_port_add(struct dpif *, struct netdev *, odp_port_t *port_nop); int dpif_port_del(struct dpif *, odp_port_t port_no); /* A port within a datapath. * * 'name' and 'type' are suitable for passing to netdev_open(). */ struct dpif_port { char *name; /* Network device name, e.g. "eth0". */ char *type; /* Network device type, e.g. "system". */ odp_port_t port_no; /* Port number within datapath. */ }; void dpif_port_clone(struct dpif_port *, const struct dpif_port *); void dpif_port_destroy(struct dpif_port *); bool dpif_port_exists(const struct dpif *dpif, const char *devname); int dpif_port_query_by_number(const struct dpif *, odp_port_t port_no, struct dpif_port *); int dpif_port_query_by_name(const struct dpif *, const char *devname, struct dpif_port *); int dpif_port_get_name(struct dpif *, odp_port_t port_no, char *name, size_t name_size); uint32_t dpif_get_max_ports(const struct dpif *); uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no); struct dpif_port_dump { const struct dpif *dpif; int error; void *state; }; void dpif_port_dump_start(struct dpif_port_dump *, const struct dpif *); bool dpif_port_dump_next(struct dpif_port_dump *, struct dpif_port *); int dpif_port_dump_done(struct dpif_port_dump *); /* Iterates through each DPIF_PORT in DPIF, using DUMP as state. * * Arguments all have pointer type. * * If you break out of the loop, then you need to free the dump structure by * hand using dpif_port_dump_done(). */ #define DPIF_PORT_FOR_EACH(DPIF_PORT, DUMP, DPIF) \ for (dpif_port_dump_start(DUMP, DPIF); \ (dpif_port_dump_next(DUMP, DPIF_PORT) \ ? true \ : (dpif_port_dump_done(DUMP), false)); \ ) int dpif_port_poll(const struct dpif *, char **devnamep); void dpif_port_poll_wait(const struct dpif *); /* Flow table operations. */ struct dpif_flow_stats { uint64_t n_packets; uint64_t n_bytes; long long int used; uint8_t tcp_flags; }; void dpif_flow_stats_extract(const struct flow *, const struct ofpbuf *packet, long long int used, struct dpif_flow_stats *); void dpif_flow_stats_format(const struct dpif_flow_stats *, struct ds *); enum dpif_flow_put_flags { DPIF_FP_CREATE = 1 << 0, /* Allow creating a new flow. */ DPIF_FP_MODIFY = 1 << 1, /* Allow modifying an existing flow. */ DPIF_FP_ZERO_STATS = 1 << 2 /* Zero the stats of an existing flow. */ }; int dpif_flow_flush(struct dpif *); int dpif_flow_put(struct dpif *, enum dpif_flow_put_flags, const struct nlattr *key, size_t key_len, const struct nlattr *mask, size_t mask_len, const struct nlattr *actions, size_t actions_len, struct dpif_flow_stats *); int dpif_flow_del(struct dpif *, const struct nlattr *key, size_t key_len, struct dpif_flow_stats *); int dpif_flow_get(const struct dpif *, const struct nlattr *key, size_t key_len, struct ofpbuf **actionsp, struct dpif_flow_stats *); struct dpif_flow_dump { const struct dpif *dpif; int error; void *state; }; void dpif_flow_dump_start(struct dpif_flow_dump *, const struct dpif *); bool dpif_flow_dump_next(struct dpif_flow_dump *, const struct nlattr **key, size_t *key_len, const struct nlattr **mask, size_t *mask_len, const struct nlattr **actions, size_t *actions_len, const struct dpif_flow_stats **); int dpif_flow_dump_done(struct dpif_flow_dump *); /* Packet operations. */ int dpif_execute(struct dpif *, const struct nlattr *key, size_t key_len, const struct nlattr *actions, size_t actions_len, const struct ofpbuf *); /* Operation batching interface. * * Some datapaths are faster at performing N operations together than the same * N operations individually, hence an interface for batching. */ enum dpif_op_type { DPIF_OP_FLOW_PUT = 1, DPIF_OP_FLOW_DEL, DPIF_OP_EXECUTE, }; struct dpif_flow_put { /* Input. */ enum dpif_flow_put_flags flags; /* DPIF_FP_*. */ const struct nlattr *key; /* Flow to put. */ size_t key_len; /* Length of 'key' in bytes. */ const struct nlattr *mask; /* Mask to put. */ size_t mask_len; /* Length of 'mask' in bytes. */ const struct nlattr *actions; /* Actions to perform on flow. */ size_t actions_len; /* Length of 'actions' in bytes. */ /* Output. */ struct dpif_flow_stats *stats; /* Optional flow statistics. */ }; struct dpif_flow_del { /* Input. */ const struct nlattr *key; /* Flow to delete. */ size_t key_len; /* Length of 'key' in bytes. */ /* Output. */ struct dpif_flow_stats *stats; /* Optional flow statistics. */ }; struct dpif_execute { const struct nlattr *key; /* Partial flow key (only for metadata). */ size_t key_len; /* Length of 'key' in bytes. */ const struct nlattr *actions; /* Actions to execute on packet. */ size_t actions_len; /* Length of 'actions' in bytes. */ const struct ofpbuf *packet; /* Packet to execute. */ }; struct dpif_op { enum dpif_op_type type; int error; union { struct dpif_flow_put flow_put; struct dpif_flow_del flow_del; struct dpif_execute execute; } u; }; void dpif_operate(struct dpif *, struct dpif_op **ops, size_t n_ops); /* Upcalls. */ enum dpif_upcall_type { DPIF_UC_MISS, /* Miss in flow table. */ DPIF_UC_ACTION, /* OVS_ACTION_ATTR_USERSPACE action. */ DPIF_N_UC_TYPES }; const char *dpif_upcall_type_to_string(enum dpif_upcall_type); /* A packet passed up from the datapath to userspace. * * If 'key', 'actions', or 'userdata' is nonnull, then it points into data * owned by 'packet', so their memory cannot be freed separately. (This is * hardly a great way to do things but it works out OK for the dpif providers * and clients that exist so far.) */ struct dpif_upcall { /* All types. */ enum dpif_upcall_type type; struct ofpbuf *packet; /* Packet data. */ struct nlattr *key; /* Flow key. */ size_t key_len; /* Length of 'key' in bytes. */ /* DPIF_UC_ACTION only. */ struct nlattr *userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */ }; int dpif_recv_set(struct dpif *, bool enable); int dpif_recv(struct dpif *, struct dpif_upcall *, struct ofpbuf *); void dpif_recv_purge(struct dpif *); void dpif_recv_wait(struct dpif *); /* Miscellaneous. */ void dpif_get_netflow_ids(const struct dpif *, uint8_t *engine_type, uint8_t *engine_id); int dpif_queue_to_priority(const struct dpif *, uint32_t queue_id, uint32_t *priority); #ifdef __cplusplus } #endif #endif /* dpif.h */ openvswitch-2.0.1+git20140120/lib/dummy.c000066400000000000000000000025061226605124000175220ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "dummy.h" /* Enables support for "dummy" network devices and dpifs, which are useful for * testing. A client program might call this function if it is designed * specifically for testing or the user enables it on the command line. * * If 'override' is false, then "dummy" dpif and netdev classes will be * created. If 'override' is true, then in addition all existing dpif and * netdev classes will be deleted and replaced by dummy classes. * * There is no strong reason why dummy devices shouldn't always be enabled. */ void dummy_enable(bool override) { netdev_dummy_register(override); dpif_dummy_register(override); timeval_dummy_register(); vlandev_dummy_enable(); } openvswitch-2.0.1+git20140120/lib/dummy.h000066400000000000000000000017301226605124000175250ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef DUMMY_H #define DUMMY_H 1 #include /* For client programs to call directly to enable dummy support. */ void dummy_enable(bool override); /* Implementation details. */ void dpif_dummy_register(bool override); void netdev_dummy_register(bool override); void timeval_dummy_register(void); void vlandev_dummy_enable(void); #endif /* dummy.h */ openvswitch-2.0.1+git20140120/lib/dynamic-string.c000066400000000000000000000254771226605124000213330ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "dynamic-string.h" #include #include #include #include "timeval.h" #include "util.h" /* Initializes 'ds' as an empty string buffer. */ void ds_init(struct ds *ds) { ds->string = NULL; ds->length = 0; ds->allocated = 0; } /* Sets 'ds''s length to 0, effectively clearing any existing content. Does * not free any memory. */ void ds_clear(struct ds *ds) { ds->length = 0; } /* Reduces 'ds''s length to no more than 'new_length'. (If its length is * already 'new_length' or less, does nothing.) */ void ds_truncate(struct ds *ds, size_t new_length) { if (ds->length > new_length) { ds->length = new_length; ds->string[new_length] = '\0'; } } /* Ensures that at least 'min_length + 1' bytes (including space for a null * terminator) are allocated for ds->string, allocating or reallocating memory * as necessary. */ void ds_reserve(struct ds *ds, size_t min_length) { if (min_length > ds->allocated || !ds->string) { ds->allocated += MAX(min_length, ds->allocated); ds->allocated = MAX(8, ds->allocated); ds->string = xrealloc(ds->string, ds->allocated + 1); } } /* Appends space for 'n' bytes to the end of 'ds->string', increasing * 'ds->length' by the same amount, and returns the first appended byte. The * caller should fill in all 'n' bytes starting at the return value. */ char * ds_put_uninit(struct ds *ds, size_t n) { ds_reserve(ds, ds->length + n); ds->length += n; ds->string[ds->length] = '\0'; return &ds->string[ds->length - n]; } void ds_put_char__(struct ds *ds, char c) { *ds_put_uninit(ds, 1) = c; } /* Appends unicode code point 'uc' to 'ds' in UTF-8 encoding. */ void ds_put_utf8(struct ds *ds, int uc) { if (uc <= 0x7f) { ds_put_char(ds, uc); } else if (uc <= 0x7ff) { ds_put_char(ds, 0xc0 | (uc >> 6)); ds_put_char(ds, 0x80 | (uc & 0x3f)); } else if (uc <= 0xffff) { ds_put_char(ds, 0xe0 | (uc >> 12)); ds_put_char(ds, 0x80 | ((uc >> 6) & 0x3f)); ds_put_char(ds, 0x80 | (uc & 0x3f)); } else if (uc <= 0x10ffff) { ds_put_char(ds, 0xf0 | (uc >> 18)); ds_put_char(ds, 0x80 | ((uc >> 12) & 0x3f)); ds_put_char(ds, 0x80 | ((uc >> 6) & 0x3f)); ds_put_char(ds, 0x80 | (uc & 0x3f)); } else { /* Invalid code point. Insert the Unicode general substitute * REPLACEMENT CHARACTER. */ ds_put_utf8(ds, 0xfffd); } } void ds_put_char_multiple(struct ds *ds, char c, size_t n) { memset(ds_put_uninit(ds, n), c, n); } void ds_put_buffer(struct ds *ds, const char *s, size_t n) { memcpy(ds_put_uninit(ds, n), s, n); } void ds_put_cstr(struct ds *ds, const char *s) { size_t s_len = strlen(s); memcpy(ds_put_uninit(ds, s_len), s, s_len); } void ds_put_and_free_cstr(struct ds *ds, char *s) { ds_put_cstr(ds, s); free(s); } void ds_put_format(struct ds *ds, const char *format, ...) { va_list args; va_start(args, format); ds_put_format_valist(ds, format, args); va_end(args); } void ds_put_format_valist(struct ds *ds, const char *format, va_list args_) { va_list args; size_t available; int needed; va_copy(args, args_); available = ds->string ? ds->allocated - ds->length + 1 : 0; needed = vsnprintf(&ds->string[ds->length], available, format, args); va_end(args); if (needed < available) { ds->length += needed; } else { ds_reserve(ds, ds->length + needed); va_copy(args, args_); available = ds->allocated - ds->length + 1; needed = vsnprintf(&ds->string[ds->length], available, format, args); va_end(args); ovs_assert(needed < available); ds->length += needed; } } void ds_put_printable(struct ds *ds, const char *s, size_t n) { ds_reserve(ds, ds->length + n); while (n-- > 0) { unsigned char c = *s++; if (c < 0x20 || c > 0x7e || c == '\\' || c == '"') { ds_put_format(ds, "\\%03o", (int) c); } else { ds_put_char(ds, c); } } } /* Writes the current time with optional millisecond resolution to 'string' * based on 'template'. * The current time is either localtime or UTC based on 'utc'. */ void ds_put_strftime_msec(struct ds *ds, const char *template, long long int when, bool utc) { struct tm_msec tm; if (utc) { gmtime_msec(when, &tm); } else { localtime_msec(when, &tm); } for (;;) { size_t avail = ds->string ? ds->allocated - ds->length + 1 : 0; size_t used = strftime_msec(&ds->string[ds->length], avail, template, &tm); if (used) { ds->length += used; return; } ds_reserve(ds, ds->length + (avail < 32 ? 64 : 2 * avail)); } } /* Returns a malloc()'d string for time 'when' based on 'template', in local * time or UTC based on 'utc'. */ char * xastrftime_msec(const char *template, long long int when, bool utc) { struct ds s; ds_init(&s); ds_put_strftime_msec(&s, template, when, utc); return s.string; } int ds_get_line(struct ds *ds, FILE *file) { ds_clear(ds); for (;;) { int c = getc(file); if (c == EOF) { return ds->length ? 0 : EOF; } else if (c == '\n') { return 0; } else { ds_put_char(ds, c); } } } /* Reads a line from 'file' into 'ds', clearing anything initially in 'ds'. * Deletes comments introduced by "#" and skips lines that contains only white * space (after deleting comments). * * If 'line_numberp' is nonnull, increments '*line_numberp' by the number of * lines read from 'file'. * * Returns 0 if successful, EOF if no non-blank line was found. */ int ds_get_preprocessed_line(struct ds *ds, FILE *file, int *line_numberp) { while (!ds_get_line(ds, file)) { char *line = ds_cstr(ds); char *comment; if (line_numberp) { ++*line_numberp; } /* Delete comments. */ comment = strchr(line, '#'); if (comment) { *comment = '\0'; } /* Return successfully unless the line is all spaces. */ if (line[strspn(line, " \t\n")] != '\0') { return 0; } } return EOF; } /* Reads a line from 'file' into 'ds' and does some preprocessing on it: * * - If the line begins with #, prints it on stdout and reads the next line. * * - Otherwise, if the line contains an # somewhere else, strips it and * everything following it (as a comment). * * - If (after comment removal) the line contains only white space, prints * a blank line on stdout and reads the next line. * * - Otherwise, returns the line to the caller. * * This is useful in some of the OVS tests, where we want to check that parsing * and then re-formatting some kind of data does not change it, but we also * want to be able to put comments in the input. * * Returns 0 if successful, EOF if no non-blank line was found. */ int ds_get_test_line(struct ds *ds, FILE *file) { for (;;) { char *s, *comment; int retval; retval = ds_get_line(ds, file); if (retval) { return retval; } s = ds_cstr(ds); if (*s == '#') { puts(s); continue; } comment = strchr(s, '#'); if (comment) { *comment = '\0'; } if (s[strspn(s, " \t\n")] == '\0') { putchar('\n'); continue; } return 0; } } char * ds_cstr(struct ds *ds) { if (!ds->string) { ds_reserve(ds, 0); } ds->string[ds->length] = '\0'; return ds->string; } const char * ds_cstr_ro(const struct ds *ds) { return ds_cstr(CONST_CAST(struct ds *, ds)); } /* Returns a null-terminated string representing the current contents of 'ds', * which the caller is expected to free with free(), then clears the contents * of 'ds'. */ char * ds_steal_cstr(struct ds *ds) { char *s = ds_cstr(ds); ds_init(ds); return s; } void ds_destroy(struct ds *ds) { free(ds->string); } /* Swaps the content of 'a' and 'b'. */ void ds_swap(struct ds *a, struct ds *b) { struct ds temp = *a; *a = *b; *b = temp; } /* Writes the 'size' bytes in 'buf' to 'string' as hex bytes arranged 16 per * line. Numeric offsets are also included, starting at 'ofs' for the first * byte in 'buf'. If 'ascii' is true then the corresponding ASCII characters * are also rendered alongside. */ void ds_put_hex_dump(struct ds *ds, const void *buf_, size_t size, uintptr_t ofs, bool ascii) { const uint8_t *buf = buf_; const size_t per_line = 16; /* Maximum bytes per line. */ while (size > 0) { size_t start, end, n; size_t i; /* Number of bytes on this line. */ start = ofs % per_line; end = per_line; if (end - start > size) end = start + size; n = end - start; /* Print line. */ ds_put_format(ds, "%08jx ", (uintmax_t) ROUND_DOWN(ofs, per_line)); for (i = 0; i < start; i++) { ds_put_format(ds, " "); } for (; i < end; i++) { ds_put_format(ds, "%02hhx%c", buf[i - start], i == per_line / 2 - 1? '-' : ' '); } if (ascii) { for (; i < per_line; i++) ds_put_format(ds, " "); ds_put_format(ds, "|"); for (i = 0; i < start; i++) ds_put_format(ds, " "); for (; i < end; i++) { int c = buf[i - start]; ds_put_char(ds, c >= 32 && c < 127 ? c : '.'); } for (; i < per_line; i++) ds_put_format(ds, " "); ds_put_format(ds, "|"); } else { ds_chomp(ds, ' '); } ds_put_format(ds, "\n"); ofs += n; buf += n; size -= n; } } int ds_last(const struct ds *ds) { return ds->length > 0 ? (unsigned char) ds->string[ds->length - 1] : EOF; } void ds_chomp(struct ds *ds, int c) { if (ds->length > 0 && ds->string[ds->length - 1] == (char) c) { ds->string[--ds->length] = '\0'; } } openvswitch-2.0.1+git20140120/lib/dynamic-string.h000066400000000000000000000062501226605124000213240ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef DYNAMIC_STRING_H #define DYNAMIC_STRING_H 1 #include #include #include #include #include #include #include "compiler.h" /* A "dynamic string", that is, a buffer that can be used to construct a * string across a series of operations that extend or modify it. * * The 'string' member does not always point to a null-terminated string. * Initially it is NULL, and even when it is nonnull, some operations do not * ensure that it is null-terminated. Use ds_cstr() to ensure that memory is * allocated for the string and that it is null-terminated. */ struct ds { char *string; /* Null-terminated string. */ size_t length; /* Bytes used, not including null terminator. */ size_t allocated; /* Bytes allocated, not including null terminator. */ }; #define DS_EMPTY_INITIALIZER { NULL, 0, 0 } void ds_init(struct ds *); void ds_clear(struct ds *); void ds_truncate(struct ds *, size_t new_length); void ds_reserve(struct ds *, size_t min_length); char *ds_put_uninit(struct ds *, size_t n); static inline void ds_put_char(struct ds *, char); void ds_put_utf8(struct ds *, int uc); void ds_put_char_multiple(struct ds *, char, size_t n); void ds_put_buffer(struct ds *, const char *, size_t n); void ds_put_cstr(struct ds *, const char *); void ds_put_and_free_cstr(struct ds *, char *); void ds_put_format(struct ds *, const char *, ...) PRINTF_FORMAT(2, 3); void ds_put_format_valist(struct ds *, const char *, va_list) PRINTF_FORMAT(2, 0); void ds_put_printable(struct ds *, const char *, size_t); void ds_put_hex_dump(struct ds *ds, const void *buf_, size_t size, uintptr_t ofs, bool ascii); int ds_get_line(struct ds *, FILE *); int ds_get_preprocessed_line(struct ds *, FILE *, int *line_number); int ds_get_test_line(struct ds *, FILE *); void ds_put_strftime_msec(struct ds *, const char *template, long long int when, bool utc); char *xastrftime_msec(const char *template, long long int when, bool utc); char *ds_cstr(struct ds *); const char *ds_cstr_ro(const struct ds *); char *ds_steal_cstr(struct ds *); void ds_destroy(struct ds *); void ds_swap(struct ds *, struct ds *); int ds_last(const struct ds *); void ds_chomp(struct ds *, int c); /* Inline functions. */ void ds_put_char__(struct ds *, char); static inline void ds_put_char(struct ds *ds, char c) { if (ds->length < ds->allocated) { ds->string[ds->length++] = c; ds->string[ds->length] = '\0'; } else { ds_put_char__(ds, c); } } #endif /* dynamic-string.h */ openvswitch-2.0.1+git20140120/lib/entropy.c000066400000000000000000000033611226605124000200670ustar00rootroot00000000000000/* Copyright (c) 2008, 2009, 2010, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "entropy.h" #include #include #include #include "socket-util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(entropy); static const char urandom[] = "/dev/urandom"; /* Initializes 'buffer' with 'n' bytes of high-quality random numbers. Returns * 0 if successful, otherwise a positive errno value or EOF on error. */ int get_entropy(void *buffer, size_t n) { size_t bytes_read; int error; int fd; fd = open(urandom, O_RDONLY); if (fd < 0) { VLOG_ERR("%s: open failed (%s)", urandom, ovs_strerror(errno)); return errno ? errno : EINVAL; } error = read_fully(fd, buffer, n, &bytes_read); close(fd); if (error) { VLOG_ERR("%s: read error (%s)", urandom, ovs_retval_to_string(error)); } return error; } /* Initializes 'buffer' with 'n' bytes of high-quality random numbers. Exits * if an error occurs. */ void get_entropy_or_die(void *buffer, size_t n) { int error = get_entropy(buffer, n); if (error) { VLOG_FATAL("%s: read error (%s)", urandom, ovs_retval_to_string(error)); } } openvswitch-2.0.1+git20140120/lib/entropy.h000066400000000000000000000013651226605124000200760ustar00rootroot00000000000000/* Copyright (c) 2010 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ENTROPY_H #define ENTROPY_H 1 #include int get_entropy(void *, size_t); void get_entropy_or_die(void *, size_t); #endif /* entropy.h */ openvswitch-2.0.1+git20140120/lib/fatal-signal.c000066400000000000000000000215601226605124000207320ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "fatal-signal.h" #include #include #include #include #include #include #include #include #include "ovs-thread.h" #include "poll-loop.h" #include "shash.h" #include "sset.h" #include "signals.h" #include "socket-util.h" #include "util.h" #include "vlog.h" #include "type-props.h" #ifndef SIG_ATOMIC_MAX #define SIG_ATOMIC_MAX TYPE_MAXIMUM(sig_atomic_t) #endif VLOG_DEFINE_THIS_MODULE(fatal_signal); /* Signals to catch. */ static const int fatal_signals[] = { SIGTERM, SIGINT, SIGHUP, SIGALRM }; /* Hooks to call upon catching a signal */ struct hook { void (*hook_cb)(void *aux); void (*cancel_cb)(void *aux); void *aux; bool run_at_exit; }; #define MAX_HOOKS 32 static struct hook hooks[MAX_HOOKS]; static size_t n_hooks; static int signal_fds[2]; static volatile sig_atomic_t stored_sig_nr = SIG_ATOMIC_MAX; static struct ovs_mutex mutex; static void atexit_handler(void); static void call_hooks(int sig_nr); /* Initializes the fatal signal handling module. Calling this function is * optional, because calling any other function in the module will also * initialize it. However, in a multithreaded program, the module must be * initialized while the process is still single-threaded. */ void fatal_signal_init(void) { static bool inited = false; if (!inited) { size_t i; assert_single_threaded(); inited = true; ovs_mutex_init_recursive(&mutex); xpipe_nonblocking(signal_fds); for (i = 0; i < ARRAY_SIZE(fatal_signals); i++) { int sig_nr = fatal_signals[i]; struct sigaction old_sa; xsigaction(sig_nr, NULL, &old_sa); if (old_sa.sa_handler == SIG_DFL && signal(sig_nr, fatal_signal_handler) == SIG_ERR) { VLOG_FATAL("signal failed (%s)", ovs_strerror(errno)); } } atexit(atexit_handler); } } /* Registers 'hook_cb' to be called from inside poll_block() following a fatal * signal. 'hook_cb' does not need to be async-signal-safe. In a * multithreaded program 'hook_cb' might be called from any thread, with * threads other than the one running 'hook_cb' in unknown states. * * If 'run_at_exit' is true, 'hook_cb' is also called during normal process * termination, e.g. when exit() is called or when main() returns. * * If the current process forks, fatal_signal_fork() may be called to clear the * parent process's fatal signal hooks, so that 'hook_cb' is only called when * the child terminates, not when the parent does. When fatal_signal_fork() is * called, it calls the 'cancel_cb' function if it is nonnull, passing 'aux', * to notify that the hook has been canceled. This allows the hook to free * memory, etc. */ void fatal_signal_add_hook(void (*hook_cb)(void *aux), void (*cancel_cb)(void *aux), void *aux, bool run_at_exit) { fatal_signal_init(); ovs_mutex_lock(&mutex); ovs_assert(n_hooks < MAX_HOOKS); hooks[n_hooks].hook_cb = hook_cb; hooks[n_hooks].cancel_cb = cancel_cb; hooks[n_hooks].aux = aux; hooks[n_hooks].run_at_exit = run_at_exit; n_hooks++; ovs_mutex_unlock(&mutex); } /* Handles fatal signal number 'sig_nr'. * * Ordinarily this is the actual signal handler. When other code needs to * handle one of our signals, however, it can register for that signal and, if * and when necessary, call this function to do fatal signal processing for it * and terminate the process. Currently only timeval.c does this, for SIGALRM. * (It is not important whether the other code sets up its signal handler * before or after this file, because this file will only set up a signal * handler in the case where the signal has its default handling.) */ void fatal_signal_handler(int sig_nr) { ignore(write(signal_fds[1], "", 1)); stored_sig_nr = sig_nr; } /* Check whether a fatal signal has occurred and, if so, call the fatal signal * hooks and exit. * * This function is called automatically by poll_block(), but specialized * programs that may not always call poll_block() on a regular basis should * also call it periodically. (Therefore, any function with "block" in its * name should call fatal_signal_run() each time it is called, either directly * or through poll_block(), because such functions can only used by specialized * programs that can afford to block outside their main loop around * poll_block().) */ void fatal_signal_run(void) { sig_atomic_t sig_nr; fatal_signal_init(); sig_nr = stored_sig_nr; if (sig_nr != SIG_ATOMIC_MAX) { char namebuf[SIGNAL_NAME_BUFSIZE]; ovs_mutex_lock(&mutex); VLOG_WARN("terminating with signal %d (%s)", (int)sig_nr, signal_name(sig_nr, namebuf, sizeof namebuf)); call_hooks(sig_nr); /* Re-raise the signal with the default handling so that the program * termination status reflects that we were killed by this signal */ signal(sig_nr, SIG_DFL); raise(sig_nr); ovs_mutex_unlock(&mutex); NOT_REACHED(); } } void fatal_signal_wait(void) { fatal_signal_init(); poll_fd_wait(signal_fds[0], POLLIN); } static void atexit_handler(void) { call_hooks(0); } static void call_hooks(int sig_nr) { static volatile sig_atomic_t recurse = 0; if (!recurse) { size_t i; recurse = 1; for (i = 0; i < n_hooks; i++) { struct hook *h = &hooks[i]; if (sig_nr || h->run_at_exit) { h->hook_cb(h->aux); } } } } /* Files to delete on exit. */ static struct sset files = SSET_INITIALIZER(&files); /* Has a hook function been registered with fatal_signal_add_hook() (and not * cleared by fatal_signal_fork())? */ static bool added_hook; static void unlink_files(void *aux); static void cancel_files(void *aux); static void do_unlink_files(void); /* Registers 'file' to be unlinked when the program terminates via exit() or a * fatal signal. */ void fatal_signal_add_file_to_unlink(const char *file) { fatal_signal_init(); ovs_mutex_lock(&mutex); if (!added_hook) { added_hook = true; fatal_signal_add_hook(unlink_files, cancel_files, NULL, true); } sset_add(&files, file); ovs_mutex_unlock(&mutex); } /* Unregisters 'file' from being unlinked when the program terminates via * exit() or a fatal signal. */ void fatal_signal_remove_file_to_unlink(const char *file) { fatal_signal_init(); ovs_mutex_lock(&mutex); sset_find_and_delete(&files, file); ovs_mutex_unlock(&mutex); } /* Like fatal_signal_remove_file_to_unlink(), but also unlinks 'file'. * Returns 0 if successful, otherwise a positive errno value. */ int fatal_signal_unlink_file_now(const char *file) { int error; fatal_signal_init(); ovs_mutex_lock(&mutex); error = unlink(file) ? errno : 0; if (error) { VLOG_WARN("could not unlink \"%s\" (%s)", file, ovs_strerror(error)); } fatal_signal_remove_file_to_unlink(file); ovs_mutex_unlock(&mutex); return error; } static void unlink_files(void *aux OVS_UNUSED) { do_unlink_files(); } static void cancel_files(void *aux OVS_UNUSED) { sset_clear(&files); added_hook = false; } static void do_unlink_files(void) { const char *file; SSET_FOR_EACH (file, &files) { unlink(file); } } /* Clears all of the fatal signal hooks without executing them. If any of the * hooks passed a 'cancel_cb' function to fatal_signal_add_hook(), then those * functions will be called, allowing them to free resources, etc. * * Following a fork, one of the resulting processes can call this function to * allow it to terminate without calling the hooks registered before calling * this function. New hooks registered after calling this function will take * effect normally. */ void fatal_signal_fork(void) { size_t i; assert_single_threaded(); for (i = 0; i < n_hooks; i++) { struct hook *h = &hooks[i]; if (h->cancel_cb) { h->cancel_cb(h->aux); } } n_hooks = 0; /* Raise any signals that we have already received with the default * handler. */ if (stored_sig_nr != SIG_ATOMIC_MAX) { raise(stored_sig_nr); } } openvswitch-2.0.1+git20140120/lib/fatal-signal.h000066400000000000000000000027101226605124000207330ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef FATAL_SIGNAL_H #define FATAL_SIGNAL_H 1 #include /* Basic interface. */ void fatal_signal_init(void); void fatal_signal_add_hook(void (*hook_cb)(void *aux), void (*cancel_cb)(void *aux), void *aux, bool run_at_exit); void fatal_signal_fork(void); void fatal_signal_run(void); void fatal_signal_wait(void); /* Convenience functions for unlinking files upon termination. * * These functions also unlink the files upon normal process termination via * exit(). */ void fatal_signal_add_file_to_unlink(const char *); void fatal_signal_remove_file_to_unlink(const char *); int fatal_signal_unlink_file_now(const char *); /* Interface for other code that catches one of our signals and needs to pass * it through. */ void fatal_signal_handler(int sig_nr); #endif /* fatal-signal.h */ openvswitch-2.0.1+git20140120/lib/flow.c000066400000000000000000001266611226605124000173470ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include "flow.h" #include #include #include #include #include #include #include #include #include #include "byte-order.h" #include "coverage.h" #include "csum.h" #include "dynamic-string.h" #include "hash.h" #include "jhash.h" #include "match.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "packets.h" #include "unaligned.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(flow); COVERAGE_DEFINE(flow_extract); COVERAGE_DEFINE(miniflow_malloc); static struct arp_eth_header * pull_arp(struct ofpbuf *packet) { return ofpbuf_try_pull(packet, ARP_ETH_HEADER_LEN); } static struct ip_header * pull_ip(struct ofpbuf *packet) { if (packet->size >= IP_HEADER_LEN) { struct ip_header *ip = packet->data; int ip_len = IP_IHL(ip->ip_ihl_ver) * 4; if (ip_len >= IP_HEADER_LEN && packet->size >= ip_len) { return ofpbuf_pull(packet, ip_len); } } return NULL; } static struct tcp_header * pull_tcp(struct ofpbuf *packet) { if (packet->size >= TCP_HEADER_LEN) { struct tcp_header *tcp = packet->data; int tcp_len = TCP_OFFSET(tcp->tcp_ctl) * 4; if (tcp_len >= TCP_HEADER_LEN && packet->size >= tcp_len) { return ofpbuf_pull(packet, tcp_len); } } return NULL; } static struct udp_header * pull_udp(struct ofpbuf *packet) { return ofpbuf_try_pull(packet, UDP_HEADER_LEN); } static struct sctp_header * pull_sctp(struct ofpbuf *packet) { return ofpbuf_try_pull(packet, SCTP_HEADER_LEN); } static struct icmp_header * pull_icmp(struct ofpbuf *packet) { return ofpbuf_try_pull(packet, ICMP_HEADER_LEN); } static struct icmp6_hdr * pull_icmpv6(struct ofpbuf *packet) { return ofpbuf_try_pull(packet, sizeof(struct icmp6_hdr)); } static void parse_mpls(struct ofpbuf *b, struct flow *flow) { struct mpls_hdr *mh; while ((mh = ofpbuf_try_pull(b, sizeof *mh))) { if (flow->mpls_depth++ == 0) { flow->mpls_lse = mh->mpls_lse; } if (mh->mpls_lse & htonl(MPLS_BOS_MASK)) { break; } } } static void parse_vlan(struct ofpbuf *b, struct flow *flow) { struct qtag_prefix { ovs_be16 eth_type; /* ETH_TYPE_VLAN */ ovs_be16 tci; }; if (b->size >= sizeof(struct qtag_prefix) + sizeof(ovs_be16)) { struct qtag_prefix *qp = ofpbuf_pull(b, sizeof *qp); flow->vlan_tci = qp->tci | htons(VLAN_CFI); } } static ovs_be16 parse_ethertype(struct ofpbuf *b) { struct llc_snap_header *llc; ovs_be16 proto; proto = *(ovs_be16 *) ofpbuf_pull(b, sizeof proto); if (ntohs(proto) >= ETH_TYPE_MIN) { return proto; } if (b->size < sizeof *llc) { return htons(FLOW_DL_TYPE_NONE); } llc = b->data; if (llc->llc.llc_dsap != LLC_DSAP_SNAP || llc->llc.llc_ssap != LLC_SSAP_SNAP || llc->llc.llc_cntl != LLC_CNTL_SNAP || memcmp(llc->snap.snap_org, SNAP_ORG_ETHERNET, sizeof llc->snap.snap_org)) { return htons(FLOW_DL_TYPE_NONE); } ofpbuf_pull(b, sizeof *llc); if (ntohs(llc->snap.snap_type) >= ETH_TYPE_MIN) { return llc->snap.snap_type; } return htons(FLOW_DL_TYPE_NONE); } static int parse_ipv6(struct ofpbuf *packet, struct flow *flow) { const struct ovs_16aligned_ip6_hdr *nh; ovs_be32 tc_flow; int nexthdr; nh = ofpbuf_try_pull(packet, sizeof *nh); if (!nh) { return EINVAL; } nexthdr = nh->ip6_nxt; memcpy(&flow->ipv6_src, &nh->ip6_src, sizeof flow->ipv6_src); memcpy(&flow->ipv6_dst, &nh->ip6_dst, sizeof flow->ipv6_dst); tc_flow = get_16aligned_be32(&nh->ip6_flow); flow->nw_tos = ntohl(tc_flow) >> 20; flow->ipv6_label = tc_flow & htonl(IPV6_LABEL_MASK); flow->nw_ttl = nh->ip6_hlim; flow->nw_proto = IPPROTO_NONE; while (1) { if ((nexthdr != IPPROTO_HOPOPTS) && (nexthdr != IPPROTO_ROUTING) && (nexthdr != IPPROTO_DSTOPTS) && (nexthdr != IPPROTO_AH) && (nexthdr != IPPROTO_FRAGMENT)) { /* It's either a terminal header (e.g., TCP, UDP) or one we * don't understand. In either case, we're done with the * packet, so use it to fill in 'nw_proto'. */ break; } /* We only verify that at least 8 bytes of the next header are * available, but many of these headers are longer. Ensure that * accesses within the extension header are within those first 8 * bytes. All extension headers are required to be at least 8 * bytes. */ if (packet->size < 8) { return EINVAL; } if ((nexthdr == IPPROTO_HOPOPTS) || (nexthdr == IPPROTO_ROUTING) || (nexthdr == IPPROTO_DSTOPTS)) { /* These headers, while different, have the fields we care about * in the same location and with the same interpretation. */ const struct ip6_ext *ext_hdr = packet->data; nexthdr = ext_hdr->ip6e_nxt; if (!ofpbuf_try_pull(packet, (ext_hdr->ip6e_len + 1) * 8)) { return EINVAL; } } else if (nexthdr == IPPROTO_AH) { /* A standard AH definition isn't available, but the fields * we care about are in the same location as the generic * option header--only the header length is calculated * differently. */ const struct ip6_ext *ext_hdr = packet->data; nexthdr = ext_hdr->ip6e_nxt; if (!ofpbuf_try_pull(packet, (ext_hdr->ip6e_len + 2) * 4)) { return EINVAL; } } else if (nexthdr == IPPROTO_FRAGMENT) { const struct ovs_16aligned_ip6_frag *frag_hdr = packet->data; nexthdr = frag_hdr->ip6f_nxt; if (!ofpbuf_try_pull(packet, sizeof *frag_hdr)) { return EINVAL; } /* We only process the first fragment. */ if (frag_hdr->ip6f_offlg != htons(0)) { flow->nw_frag = FLOW_NW_FRAG_ANY; if ((frag_hdr->ip6f_offlg & IP6F_OFF_MASK) != htons(0)) { flow->nw_frag |= FLOW_NW_FRAG_LATER; nexthdr = IPPROTO_FRAGMENT; break; } } } } flow->nw_proto = nexthdr; return 0; } static void parse_tcp(struct ofpbuf *packet, struct ofpbuf *b, struct flow *flow) { const struct tcp_header *tcp = pull_tcp(b); if (tcp) { flow->tp_src = tcp->tcp_src; flow->tp_dst = tcp->tcp_dst; packet->l7 = b->data; } } static void parse_udp(struct ofpbuf *packet, struct ofpbuf *b, struct flow *flow) { const struct udp_header *udp = pull_udp(b); if (udp) { flow->tp_src = udp->udp_src; flow->tp_dst = udp->udp_dst; packet->l7 = b->data; } } static void parse_sctp(struct ofpbuf *packet, struct ofpbuf *b, struct flow *flow) { const struct sctp_header *sctp = pull_sctp(b); if (sctp) { flow->tp_src = sctp->sctp_src; flow->tp_dst = sctp->sctp_dst; packet->l7 = b->data; } } static bool parse_icmpv6(struct ofpbuf *b, struct flow *flow) { const struct icmp6_hdr *icmp = pull_icmpv6(b); if (!icmp) { return false; } /* The ICMPv6 type and code fields use the 16-bit transport port * fields, so we need to store them in 16-bit network byte order. */ flow->tp_src = htons(icmp->icmp6_type); flow->tp_dst = htons(icmp->icmp6_code); if (icmp->icmp6_code == 0 && (icmp->icmp6_type == ND_NEIGHBOR_SOLICIT || icmp->icmp6_type == ND_NEIGHBOR_ADVERT)) { const struct in6_addr *nd_target; nd_target = ofpbuf_try_pull(b, sizeof *nd_target); if (!nd_target) { return false; } flow->nd_target = *nd_target; while (b->size >= 8) { /* The minimum size of an option is 8 bytes, which also is * the size of Ethernet link-layer options. */ const struct nd_opt_hdr *nd_opt = b->data; int opt_len = nd_opt->nd_opt_len * 8; if (!opt_len || opt_len > b->size) { goto invalid; } /* Store the link layer address if the appropriate option is * provided. It is considered an error if the same link * layer option is specified twice. */ if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LINKADDR && opt_len == 8) { if (eth_addr_is_zero(flow->arp_sha)) { memcpy(flow->arp_sha, nd_opt + 1, ETH_ADDR_LEN); } else { goto invalid; } } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LINKADDR && opt_len == 8) { if (eth_addr_is_zero(flow->arp_tha)) { memcpy(flow->arp_tha, nd_opt + 1, ETH_ADDR_LEN); } else { goto invalid; } } if (!ofpbuf_try_pull(b, opt_len)) { goto invalid; } } } return true; invalid: memset(&flow->nd_target, 0, sizeof(flow->nd_target)); memset(flow->arp_sha, 0, sizeof(flow->arp_sha)); memset(flow->arp_tha, 0, sizeof(flow->arp_tha)); return false; } /* Initializes 'flow' members from 'packet', 'skb_priority', 'tnl', and * 'in_port'. * * Initializes 'packet' header pointers as follows: * * - packet->l2 to the start of the Ethernet header. * * - packet->l2_5 to the start of the MPLS shim header. * * - packet->l3 to just past the Ethernet header, or just past the * vlan_header if one is present, to the first byte of the payload of the * Ethernet frame. * * - packet->l4 to just past the IPv4 header, if one is present and has a * correct length, and otherwise NULL. * * - packet->l7 to just past the TCP/UDP/SCTP/ICMP header, if one is * present and has a correct length, and otherwise NULL. */ void flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t pkt_mark, const struct flow_tnl *tnl, const union flow_in_port *in_port, struct flow *flow) { struct ofpbuf b = *packet; struct eth_header *eth; COVERAGE_INC(flow_extract); memset(flow, 0, sizeof *flow); if (tnl) { ovs_assert(tnl != &flow->tunnel); flow->tunnel = *tnl; } if (in_port) { flow->in_port = *in_port; } flow->skb_priority = skb_priority; flow->pkt_mark = pkt_mark; packet->l2 = b.data; packet->l2_5 = NULL; packet->l3 = NULL; packet->l4 = NULL; packet->l7 = NULL; if (b.size < sizeof *eth) { return; } /* Link layer. */ eth = b.data; memcpy(flow->dl_src, eth->eth_src, ETH_ADDR_LEN); memcpy(flow->dl_dst, eth->eth_dst, ETH_ADDR_LEN); /* dl_type, vlan_tci. */ ofpbuf_pull(&b, ETH_ADDR_LEN * 2); if (eth->eth_type == htons(ETH_TYPE_VLAN)) { parse_vlan(&b, flow); } flow->dl_type = parse_ethertype(&b); /* Parse mpls, copy l3 ttl. */ if (eth_type_mpls(flow->dl_type)) { packet->l2_5 = b.data; parse_mpls(&b, flow); } /* Network layer. */ packet->l3 = b.data; if (flow->dl_type == htons(ETH_TYPE_IP)) { const struct ip_header *nh = pull_ip(&b); if (nh) { packet->l4 = b.data; flow->nw_src = get_16aligned_be32(&nh->ip_src); flow->nw_dst = get_16aligned_be32(&nh->ip_dst); flow->nw_proto = nh->ip_proto; flow->nw_tos = nh->ip_tos; if (IP_IS_FRAGMENT(nh->ip_frag_off)) { flow->nw_frag = FLOW_NW_FRAG_ANY; if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) { flow->nw_frag |= FLOW_NW_FRAG_LATER; } } flow->nw_ttl = nh->ip_ttl; if (!(nh->ip_frag_off & htons(IP_FRAG_OFF_MASK))) { if (flow->nw_proto == IPPROTO_TCP) { parse_tcp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_UDP) { parse_udp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_SCTP) { parse_sctp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_ICMP) { const struct icmp_header *icmp = pull_icmp(&b); if (icmp) { flow->tp_src = htons(icmp->icmp_type); flow->tp_dst = htons(icmp->icmp_code); packet->l7 = b.data; } } } } } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) { if (parse_ipv6(&b, flow)) { return; } packet->l4 = b.data; if (flow->nw_proto == IPPROTO_TCP) { parse_tcp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_UDP) { parse_udp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_SCTP) { parse_sctp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_ICMPV6) { if (parse_icmpv6(&b, flow)) { packet->l7 = b.data; } } } else if (flow->dl_type == htons(ETH_TYPE_ARP) || flow->dl_type == htons(ETH_TYPE_RARP)) { const struct arp_eth_header *arp = pull_arp(&b); if (arp && arp->ar_hrd == htons(1) && arp->ar_pro == htons(ETH_TYPE_IP) && arp->ar_hln == ETH_ADDR_LEN && arp->ar_pln == 4) { /* We only match on the lower 8 bits of the opcode. */ if (ntohs(arp->ar_op) <= 0xff) { flow->nw_proto = ntohs(arp->ar_op); } flow->nw_src = get_16aligned_be32(&arp->ar_spa); flow->nw_dst = get_16aligned_be32(&arp->ar_tpa); memcpy(flow->arp_sha, arp->ar_sha, ETH_ADDR_LEN); memcpy(flow->arp_tha, arp->ar_tha, ETH_ADDR_LEN); } } } /* For every bit of a field that is wildcarded in 'wildcards', sets the * corresponding bit in 'flow' to zero. */ void flow_zero_wildcards(struct flow *flow, const struct flow_wildcards *wildcards) { uint32_t *flow_u32 = (uint32_t *) flow; const uint32_t *wc_u32 = (const uint32_t *) &wildcards->masks; size_t i; for (i = 0; i < FLOW_U32S; i++) { flow_u32[i] &= wc_u32[i]; } } /* Initializes 'fmd' with the metadata found in 'flow'. */ void flow_get_metadata(const struct flow *flow, struct flow_metadata *fmd) { BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20); fmd->tun_id = flow->tunnel.tun_id; fmd->tun_src = flow->tunnel.ip_src; fmd->tun_dst = flow->tunnel.ip_dst; fmd->metadata = flow->metadata; memcpy(fmd->regs, flow->regs, sizeof fmd->regs); fmd->pkt_mark = flow->pkt_mark; fmd->in_port = flow->in_port.ofp_port; } char * flow_to_string(const struct flow *flow) { struct ds ds = DS_EMPTY_INITIALIZER; flow_format(&ds, flow); return ds_cstr(&ds); } const char * flow_tun_flag_to_string(uint32_t flags) { switch (flags) { case FLOW_TNL_F_DONT_FRAGMENT: return "df"; case FLOW_TNL_F_CSUM: return "csum"; case FLOW_TNL_F_KEY: return "key"; default: return NULL; } } void format_flags(struct ds *ds, const char *(*bit_to_string)(uint32_t), uint32_t flags, char del) { uint32_t bad = 0; if (!flags) { return; } while (flags) { uint32_t bit = rightmost_1bit(flags); const char *s; s = bit_to_string(bit); if (s) { ds_put_format(ds, "%s%c", s, del); } else { bad |= bit; } flags &= ~bit; } if (bad) { ds_put_format(ds, "0x%"PRIx32"%c", bad, del); } ds_chomp(ds, del); } void flow_format(struct ds *ds, const struct flow *flow) { struct match match; match_wc_init(&match, flow); match_format(&match, ds, OFP_DEFAULT_PRIORITY); } void flow_print(FILE *stream, const struct flow *flow) { char *s = flow_to_string(flow); fputs(s, stream); free(s); } /* flow_wildcards functions. */ /* Initializes 'wc' as a set of wildcards that matches every packet. */ void flow_wildcards_init_catchall(struct flow_wildcards *wc) { memset(&wc->masks, 0, sizeof wc->masks); } /* Initializes 'wc' as an exact-match set of wildcards; that is, 'wc' does not * wildcard any bits or fields. */ void flow_wildcards_init_exact(struct flow_wildcards *wc) { memset(&wc->masks, 0xff, sizeof wc->masks); memset(wc->masks.zeros, 0, sizeof wc->masks.zeros); } /* Clear the metadata and register wildcard masks. They are not packet * header fields. */ void flow_wildcards_clear_non_packet_fields(struct flow_wildcards *wc) { memset(&wc->masks.metadata, 0, sizeof wc->masks.metadata); memset(&wc->masks.regs, 0, sizeof wc->masks.regs); } /* Returns true if 'wc' matches every packet, false if 'wc' fixes any bits or * fields. */ bool flow_wildcards_is_catchall(const struct flow_wildcards *wc) { const uint32_t *wc_u32 = (const uint32_t *) &wc->masks; size_t i; for (i = 0; i < FLOW_U32S; i++) { if (wc_u32[i]) { return false; } } return true; } /* Sets 'dst' as the bitwise AND of wildcards in 'src1' and 'src2'. * That is, a bit or a field is wildcarded in 'dst' if it is wildcarded * in 'src1' or 'src2' or both. */ void flow_wildcards_and(struct flow_wildcards *dst, const struct flow_wildcards *src1, const struct flow_wildcards *src2) { uint32_t *dst_u32 = (uint32_t *) &dst->masks; const uint32_t *src1_u32 = (const uint32_t *) &src1->masks; const uint32_t *src2_u32 = (const uint32_t *) &src2->masks; size_t i; for (i = 0; i < FLOW_U32S; i++) { dst_u32[i] = src1_u32[i] & src2_u32[i]; } } /* Sets 'dst' as the bitwise OR of wildcards in 'src1' and 'src2'. That * is, a bit or a field is wildcarded in 'dst' if it is neither * wildcarded in 'src1' nor 'src2'. */ void flow_wildcards_or(struct flow_wildcards *dst, const struct flow_wildcards *src1, const struct flow_wildcards *src2) { uint32_t *dst_u32 = (uint32_t *) &dst->masks; const uint32_t *src1_u32 = (const uint32_t *) &src1->masks; const uint32_t *src2_u32 = (const uint32_t *) &src2->masks; size_t i; for (i = 0; i < FLOW_U32S; i++) { dst_u32[i] = src1_u32[i] | src2_u32[i]; } } /* Perform a bitwise OR of miniflow 'src' flow data with the equivalent * fields in 'dst', storing the result in 'dst'. */ static void flow_union_with_miniflow(struct flow *dst, const struct miniflow *src) { uint32_t *dst_u32 = (uint32_t *) dst; int ofs; int i; ofs = 0; for (i = 0; i < MINI_N_MAPS; i++) { uint32_t map; for (map = src->map[i]; map; map = zero_rightmost_1bit(map)) { dst_u32[raw_ctz(map) + i * 32] |= src->values[ofs++]; } } } /* Fold minimask 'mask''s wildcard mask into 'wc's wildcard mask. */ void flow_wildcards_fold_minimask(struct flow_wildcards *wc, const struct minimask *mask) { flow_union_with_miniflow(&wc->masks, &mask->masks); } /* Returns a hash of the wildcards in 'wc'. */ uint32_t flow_wildcards_hash(const struct flow_wildcards *wc, uint32_t basis) { return flow_hash(&wc->masks, basis); } /* Returns true if 'a' and 'b' represent the same wildcards, false if they are * different. */ bool flow_wildcards_equal(const struct flow_wildcards *a, const struct flow_wildcards *b) { return flow_equal(&a->masks, &b->masks); } /* Returns true if at least one bit or field is wildcarded in 'a' but not in * 'b', false otherwise. */ bool flow_wildcards_has_extra(const struct flow_wildcards *a, const struct flow_wildcards *b) { const uint32_t *a_u32 = (const uint32_t *) &a->masks; const uint32_t *b_u32 = (const uint32_t *) &b->masks; size_t i; for (i = 0; i < FLOW_U32S; i++) { if ((a_u32[i] & b_u32[i]) != b_u32[i]) { return true; } } return false; } /* Returns true if 'a' and 'b' are equal, except that 0-bits (wildcarded bits) * in 'wc' do not need to be equal in 'a' and 'b'. */ bool flow_equal_except(const struct flow *a, const struct flow *b, const struct flow_wildcards *wc) { const uint32_t *a_u32 = (const uint32_t *) a; const uint32_t *b_u32 = (const uint32_t *) b; const uint32_t *wc_u32 = (const uint32_t *) &wc->masks; size_t i; for (i = 0; i < FLOW_U32S; i++) { if ((a_u32[i] ^ b_u32[i]) & wc_u32[i]) { return false; } } return true; } /* Sets the wildcard mask for register 'idx' in 'wc' to 'mask'. * (A 0-bit indicates a wildcard bit.) */ void flow_wildcards_set_reg_mask(struct flow_wildcards *wc, int idx, uint32_t mask) { wc->masks.regs[idx] = mask; } /* Hashes 'flow' based on its L2 through L4 protocol information. */ uint32_t flow_hash_symmetric_l4(const struct flow *flow, uint32_t basis) { struct { union { ovs_be32 ipv4_addr; struct in6_addr ipv6_addr; }; ovs_be16 eth_type; ovs_be16 vlan_tci; ovs_be16 tp_port; uint8_t eth_addr[ETH_ADDR_LEN]; uint8_t ip_proto; } fields; int i; memset(&fields, 0, sizeof fields); for (i = 0; i < ETH_ADDR_LEN; i++) { fields.eth_addr[i] = flow->dl_src[i] ^ flow->dl_dst[i]; } fields.vlan_tci = flow->vlan_tci & htons(VLAN_VID_MASK); fields.eth_type = flow->dl_type; /* UDP source and destination port are not taken into account because they * will not necessarily be symmetric in a bidirectional flow. */ if (fields.eth_type == htons(ETH_TYPE_IP)) { fields.ipv4_addr = flow->nw_src ^ flow->nw_dst; fields.ip_proto = flow->nw_proto; if (fields.ip_proto == IPPROTO_TCP || fields.ip_proto == IPPROTO_SCTP) { fields.tp_port = flow->tp_src ^ flow->tp_dst; } } else if (fields.eth_type == htons(ETH_TYPE_IPV6)) { const uint8_t *a = &flow->ipv6_src.s6_addr[0]; const uint8_t *b = &flow->ipv6_dst.s6_addr[0]; uint8_t *ipv6_addr = &fields.ipv6_addr.s6_addr[0]; for (i=0; i<16; i++) { ipv6_addr[i] = a[i] ^ b[i]; } fields.ip_proto = flow->nw_proto; if (fields.ip_proto == IPPROTO_TCP || fields.ip_proto == IPPROTO_SCTP) { fields.tp_port = flow->tp_src ^ flow->tp_dst; } } return jhash_bytes(&fields, sizeof fields, basis); } /* Masks the fields in 'wc' that are used by the flow hash 'fields'. */ void flow_mask_hash_fields(const struct flow *flow, struct flow_wildcards *wc, enum nx_hash_fields fields) { switch (fields) { case NX_HASH_FIELDS_ETH_SRC: memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src); break; case NX_HASH_FIELDS_SYMMETRIC_L4: memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src); memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); if (flow->dl_type == htons(ETH_TYPE_IP)) { memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src); memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst); } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) { memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src); memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst); } if (is_ip_any(flow)) { memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src); memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst); } wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI); break; default: NOT_REACHED(); } } /* Hashes the portions of 'flow' designated by 'fields'. */ uint32_t flow_hash_fields(const struct flow *flow, enum nx_hash_fields fields, uint16_t basis) { switch (fields) { case NX_HASH_FIELDS_ETH_SRC: return jhash_bytes(flow->dl_src, sizeof flow->dl_src, basis); case NX_HASH_FIELDS_SYMMETRIC_L4: return flow_hash_symmetric_l4(flow, basis); } NOT_REACHED(); } /* Returns a string representation of 'fields'. */ const char * flow_hash_fields_to_str(enum nx_hash_fields fields) { switch (fields) { case NX_HASH_FIELDS_ETH_SRC: return "eth_src"; case NX_HASH_FIELDS_SYMMETRIC_L4: return "symmetric_l4"; default: return ""; } } /* Returns true if the value of 'fields' is supported. Otherwise false. */ bool flow_hash_fields_valid(enum nx_hash_fields fields) { return fields == NX_HASH_FIELDS_ETH_SRC || fields == NX_HASH_FIELDS_SYMMETRIC_L4; } /* Returns a hash value for the bits of 'flow' that are active based on * 'wc', given 'basis'. */ uint32_t flow_hash_in_wildcards(const struct flow *flow, const struct flow_wildcards *wc, uint32_t basis) { const uint32_t *wc_u32 = (const uint32_t *) &wc->masks; const uint32_t *flow_u32 = (const uint32_t *) flow; uint32_t hash; size_t i; hash = basis; for (i = 0; i < FLOW_U32S; i++) { hash = mhash_add(hash, flow_u32[i] & wc_u32[i]); } return mhash_finish(hash, 4 * FLOW_U32S); } /* Sets the VLAN VID that 'flow' matches to 'vid', which is interpreted as an * OpenFlow 1.0 "dl_vlan" value: * * - If it is in the range 0...4095, 'flow->vlan_tci' is set to match * that VLAN. Any existing PCP match is unchanged (it becomes 0 if * 'flow' previously matched packets without a VLAN header). * * - If it is OFP_VLAN_NONE, 'flow->vlan_tci' is set to match a packet * without a VLAN tag. * * - Other values of 'vid' should not be used. */ void flow_set_dl_vlan(struct flow *flow, ovs_be16 vid) { if (vid == htons(OFP10_VLAN_NONE)) { flow->vlan_tci = htons(0); } else { vid &= htons(VLAN_VID_MASK); flow->vlan_tci &= ~htons(VLAN_VID_MASK); flow->vlan_tci |= htons(VLAN_CFI) | vid; } } /* Sets the VLAN VID that 'flow' matches to 'vid', which is interpreted as an * OpenFlow 1.2 "vlan_vid" value, that is, the low 13 bits of 'vlan_tci' (VID * plus CFI). */ void flow_set_vlan_vid(struct flow *flow, ovs_be16 vid) { ovs_be16 mask = htons(VLAN_VID_MASK | VLAN_CFI); flow->vlan_tci &= ~mask; flow->vlan_tci |= vid & mask; } /* Sets the VLAN PCP that 'flow' matches to 'pcp', which should be in the * range 0...7. * * This function has no effect on the VLAN ID that 'flow' matches. * * After calling this function, 'flow' will not match packets without a VLAN * header. */ void flow_set_vlan_pcp(struct flow *flow, uint8_t pcp) { pcp &= 0x07; flow->vlan_tci &= ~htons(VLAN_PCP_MASK); flow->vlan_tci |= htons((pcp << VLAN_PCP_SHIFT) | VLAN_CFI); } /* Sets the MPLS Label that 'flow' matches to 'label', which is interpreted * as an OpenFlow 1.1 "mpls_label" value. */ void flow_set_mpls_label(struct flow *flow, ovs_be32 label) { set_mpls_lse_label(&flow->mpls_lse, label); } /* Sets the MPLS TTL that 'flow' matches to 'ttl', which should be in the * range 0...255. */ void flow_set_mpls_ttl(struct flow *flow, uint8_t ttl) { set_mpls_lse_ttl(&flow->mpls_lse, ttl); } /* Sets the MPLS TC that 'flow' matches to 'tc', which should be in the * range 0...7. */ void flow_set_mpls_tc(struct flow *flow, uint8_t tc) { set_mpls_lse_tc(&flow->mpls_lse, tc); } /* Sets the MPLS BOS bit that 'flow' matches to which should be 0 or 1. */ void flow_set_mpls_bos(struct flow *flow, uint8_t bos) { set_mpls_lse_bos(&flow->mpls_lse, bos); } /* Puts into 'b' a packet that flow_extract() would parse as having the given * 'flow'. * * (This is useful only for testing, obviously, and the packet isn't really * valid. It hasn't got some checksums filled in, for one, and lots of fields * are just zeroed.) */ void flow_compose(struct ofpbuf *b, const struct flow *flow) { eth_compose(b, flow->dl_dst, flow->dl_src, ntohs(flow->dl_type), 0); if (flow->dl_type == htons(FLOW_DL_TYPE_NONE)) { struct eth_header *eth = b->l2; eth->eth_type = htons(b->size); return; } if (flow->vlan_tci & htons(VLAN_CFI)) { eth_push_vlan(b, flow->vlan_tci); } if (flow->dl_type == htons(ETH_TYPE_IP)) { struct ip_header *ip; b->l3 = ip = ofpbuf_put_zeros(b, sizeof *ip); ip->ip_ihl_ver = IP_IHL_VER(5, 4); ip->ip_tos = flow->nw_tos; ip->ip_ttl = flow->nw_ttl; ip->ip_proto = flow->nw_proto; put_16aligned_be32(&ip->ip_src, flow->nw_src); put_16aligned_be32(&ip->ip_dst, flow->nw_dst); if (flow->nw_frag & FLOW_NW_FRAG_ANY) { ip->ip_frag_off |= htons(IP_MORE_FRAGMENTS); if (flow->nw_frag & FLOW_NW_FRAG_LATER) { ip->ip_frag_off |= htons(100); } } if (!(flow->nw_frag & FLOW_NW_FRAG_ANY) || !(flow->nw_frag & FLOW_NW_FRAG_LATER)) { if (flow->nw_proto == IPPROTO_TCP) { struct tcp_header *tcp; b->l4 = tcp = ofpbuf_put_zeros(b, sizeof *tcp); tcp->tcp_src = flow->tp_src; tcp->tcp_dst = flow->tp_dst; tcp->tcp_ctl = TCP_CTL(0, 5); } else if (flow->nw_proto == IPPROTO_UDP) { struct udp_header *udp; b->l4 = udp = ofpbuf_put_zeros(b, sizeof *udp); udp->udp_src = flow->tp_src; udp->udp_dst = flow->tp_dst; } else if (flow->nw_proto == IPPROTO_SCTP) { struct sctp_header *sctp; b->l4 = sctp = ofpbuf_put_zeros(b, sizeof *sctp); sctp->sctp_src = flow->tp_src; sctp->sctp_dst = flow->tp_dst; } else if (flow->nw_proto == IPPROTO_ICMP) { struct icmp_header *icmp; b->l4 = icmp = ofpbuf_put_zeros(b, sizeof *icmp); icmp->icmp_type = ntohs(flow->tp_src); icmp->icmp_code = ntohs(flow->tp_dst); icmp->icmp_csum = csum(icmp, ICMP_HEADER_LEN); } } ip = b->l3; ip->ip_tot_len = htons((uint8_t *) b->data + b->size - (uint8_t *) b->l3); ip->ip_csum = csum(ip, sizeof *ip); } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) { /* XXX */ } else if (flow->dl_type == htons(ETH_TYPE_ARP) || flow->dl_type == htons(ETH_TYPE_RARP)) { struct arp_eth_header *arp; b->l3 = arp = ofpbuf_put_zeros(b, sizeof *arp); arp->ar_hrd = htons(1); arp->ar_pro = htons(ETH_TYPE_IP); arp->ar_hln = ETH_ADDR_LEN; arp->ar_pln = 4; arp->ar_op = htons(flow->nw_proto); if (flow->nw_proto == ARP_OP_REQUEST || flow->nw_proto == ARP_OP_REPLY) { put_16aligned_be32(&arp->ar_spa, flow->nw_src); put_16aligned_be32(&arp->ar_tpa, flow->nw_dst); memcpy(arp->ar_sha, flow->arp_sha, ETH_ADDR_LEN); memcpy(arp->ar_tha, flow->arp_tha, ETH_ADDR_LEN); } } if (eth_type_mpls(flow->dl_type)) { b->l2_5 = b->l3; push_mpls(b, flow->dl_type, flow->mpls_lse); } } /* Compressed flow. */ static int miniflow_n_values(const struct miniflow *flow) { int n, i; n = 0; for (i = 0; i < MINI_N_MAPS; i++) { n += popcount(flow->map[i]); } return n; } static uint32_t * miniflow_alloc_values(struct miniflow *flow, int n) { if (n <= MINI_N_INLINE) { return flow->inline_values; } else { COVERAGE_INC(miniflow_malloc); return xmalloc(n * sizeof *flow->values); } } /* Initializes 'dst' as a copy of 'src'. The caller must eventually free 'dst' * with miniflow_destroy(). */ void miniflow_init(struct miniflow *dst, const struct flow *src) { const uint32_t *src_u32 = (const uint32_t *) src; unsigned int ofs; unsigned int i; int n; /* Initialize dst->map, counting the number of nonzero elements. */ n = 0; memset(dst->map, 0, sizeof dst->map); for (i = 0; i < FLOW_U32S; i++) { if (src_u32[i]) { dst->map[i / 32] |= 1u << (i % 32); n++; } } /* Initialize dst->values. */ dst->values = miniflow_alloc_values(dst, n); ofs = 0; for (i = 0; i < MINI_N_MAPS; i++) { uint32_t map; for (map = dst->map[i]; map; map = zero_rightmost_1bit(map)) { dst->values[ofs++] = src_u32[raw_ctz(map) + i * 32]; } } } /* Initializes 'dst' as a copy of 'src'. The caller must eventually free 'dst' * with miniflow_destroy(). */ void miniflow_clone(struct miniflow *dst, const struct miniflow *src) { int n = miniflow_n_values(src); memcpy(dst->map, src->map, sizeof dst->map); dst->values = miniflow_alloc_values(dst, n); memcpy(dst->values, src->values, n * sizeof *dst->values); } /* Initializes 'dst' with the data in 'src', destroying 'src'. * The caller must eventually free 'dst' with miniflow_destroy(). */ void miniflow_move(struct miniflow *dst, struct miniflow *src) { if (src->values == src->inline_values) { dst->values = dst->inline_values; memcpy(dst->values, src->values, miniflow_n_values(src) * sizeof *dst->values); } else { dst->values = src->values; } memcpy(dst->map, src->map, sizeof dst->map); } /* Frees any memory owned by 'flow'. Does not free the storage in which 'flow' * itself resides; the caller is responsible for that. */ void miniflow_destroy(struct miniflow *flow) { if (flow->values != flow->inline_values) { free(flow->values); } } /* Initializes 'dst' as a copy of 'src'. */ void miniflow_expand(const struct miniflow *src, struct flow *dst) { memset(dst, 0, sizeof *dst); flow_union_with_miniflow(dst, src); } static const uint32_t * miniflow_get__(const struct miniflow *flow, unsigned int u32_ofs) { if (!(flow->map[u32_ofs / 32] & (1u << (u32_ofs % 32)))) { static const uint32_t zero = 0; return &zero; } else { const uint32_t *p = flow->values; BUILD_ASSERT(MINI_N_MAPS == 2); if (u32_ofs < 32) { p += popcount(flow->map[0] & ((1u << u32_ofs) - 1)); } else { p += popcount(flow->map[0]); p += popcount(flow->map[1] & ((1u << (u32_ofs - 32)) - 1)); } return p; } } /* Returns the uint32_t that would be at byte offset '4 * u32_ofs' if 'flow' * were expanded into a "struct flow". */ uint32_t miniflow_get(const struct miniflow *flow, unsigned int u32_ofs) { return *miniflow_get__(flow, u32_ofs); } /* Returns the ovs_be16 that would be at byte offset 'u8_ofs' if 'flow' were * expanded into a "struct flow". */ static ovs_be16 miniflow_get_be16(const struct miniflow *flow, unsigned int u8_ofs) { const uint32_t *u32p = miniflow_get__(flow, u8_ofs / 4); const ovs_be16 *be16p = (const ovs_be16 *) u32p; return be16p[u8_ofs % 4 != 0]; } /* Returns the VID within the vlan_tci member of the "struct flow" represented * by 'flow'. */ uint16_t miniflow_get_vid(const struct miniflow *flow) { ovs_be16 tci = miniflow_get_be16(flow, offsetof(struct flow, vlan_tci)); return vlan_tci_to_vid(tci); } /* Returns true if 'a' and 'b' are the same flow, false otherwise. */ bool miniflow_equal(const struct miniflow *a, const struct miniflow *b) { int i; for (i = 0; i < MINI_N_MAPS; i++) { if (a->map[i] != b->map[i]) { return false; } } return !memcmp(a->values, b->values, miniflow_n_values(a) * sizeof *a->values); } /* Returns true if 'a' and 'b' are equal at the places where there are 1-bits * in 'mask', false if they differ. */ bool miniflow_equal_in_minimask(const struct miniflow *a, const struct miniflow *b, const struct minimask *mask) { const uint32_t *p; int i; p = mask->masks.values; for (i = 0; i < MINI_N_MAPS; i++) { uint32_t map; for (map = mask->masks.map[i]; map; map = zero_rightmost_1bit(map)) { int ofs = raw_ctz(map) + i * 32; if ((miniflow_get(a, ofs) ^ miniflow_get(b, ofs)) & *p) { return false; } p++; } } return true; } /* Returns true if 'a' and 'b' are equal at the places where there are 1-bits * in 'mask', false if they differ. */ bool miniflow_equal_flow_in_minimask(const struct miniflow *a, const struct flow *b, const struct minimask *mask) { const uint32_t *b_u32 = (const uint32_t *) b; const uint32_t *p; int i; p = mask->masks.values; for (i = 0; i < MINI_N_MAPS; i++) { uint32_t map; for (map = mask->masks.map[i]; map; map = zero_rightmost_1bit(map)) { int ofs = raw_ctz(map) + i * 32; if ((miniflow_get(a, ofs) ^ b_u32[ofs]) & *p) { return false; } p++; } } return true; } /* Returns a hash value for 'flow', given 'basis'. */ uint32_t miniflow_hash(const struct miniflow *flow, uint32_t basis) { BUILD_ASSERT_DECL(MINI_N_MAPS == 2); return hash_3words(flow->map[0], flow->map[1], hash_words(flow->values, miniflow_n_values(flow), basis)); } /* Returns a hash value for the bits of 'flow' where there are 1-bits in * 'mask', given 'basis'. * * The hash values returned by this function are the same as those returned by * flow_hash_in_minimask(), only the form of the arguments differ. */ uint32_t miniflow_hash_in_minimask(const struct miniflow *flow, const struct minimask *mask, uint32_t basis) { const uint32_t *p = mask->masks.values; uint32_t hash; int i; hash = basis; for (i = 0; i < MINI_N_MAPS; i++) { uint32_t map; for (map = mask->masks.map[i]; map; map = zero_rightmost_1bit(map)) { int ofs = raw_ctz(map) + i * 32; hash = mhash_add(hash, miniflow_get(flow, ofs) & *p); p++; } } return mhash_finish(hash, (p - mask->masks.values) * 4); } /* Returns a hash value for the bits of 'flow' where there are 1-bits in * 'mask', given 'basis'. * * The hash values returned by this function are the same as those returned by * miniflow_hash_in_minimask(), only the form of the arguments differ. */ uint32_t flow_hash_in_minimask(const struct flow *flow, const struct minimask *mask, uint32_t basis) { const uint32_t *flow_u32 = (const uint32_t *) flow; const uint32_t *p = mask->masks.values; uint32_t hash; int i; hash = basis; for (i = 0; i < MINI_N_MAPS; i++) { uint32_t map; for (map = mask->masks.map[i]; map; map = zero_rightmost_1bit(map)) { int ofs = raw_ctz(map) + i * 32; hash = mhash_add(hash, flow_u32[ofs] & *p); p++; } } return mhash_finish(hash, (p - mask->masks.values) * 4); } /* Initializes 'dst' as a copy of 'src'. The caller must eventually free 'dst' * with minimask_destroy(). */ void minimask_init(struct minimask *mask, const struct flow_wildcards *wc) { miniflow_init(&mask->masks, &wc->masks); } /* Initializes 'dst' as a copy of 'src'. The caller must eventually free 'dst' * with minimask_destroy(). */ void minimask_clone(struct minimask *dst, const struct minimask *src) { miniflow_clone(&dst->masks, &src->masks); } /* Initializes 'dst' with the data in 'src', destroying 'src'. * The caller must eventually free 'dst' with minimask_destroy(). */ void minimask_move(struct minimask *dst, struct minimask *src) { miniflow_move(&dst->masks, &src->masks); } /* Initializes 'dst_' as the bit-wise "and" of 'a_' and 'b_'. * * The caller must provide room for FLOW_U32S "uint32_t"s in 'storage', for use * by 'dst_'. The caller must *not* free 'dst_' with minimask_destroy(). */ void minimask_combine(struct minimask *dst_, const struct minimask *a_, const struct minimask *b_, uint32_t storage[FLOW_U32S]) { struct miniflow *dst = &dst_->masks; const struct miniflow *a = &a_->masks; const struct miniflow *b = &b_->masks; int i, n; n = 0; dst->values = storage; for (i = 0; i < MINI_N_MAPS; i++) { uint32_t map; dst->map[i] = 0; for (map = a->map[i] & b->map[i]; map; map = zero_rightmost_1bit(map)) { int ofs = raw_ctz(map) + i * 32; uint32_t mask = miniflow_get(a, ofs) & miniflow_get(b, ofs); if (mask) { dst->map[i] |= rightmost_1bit(map); dst->values[n++] = mask; } } } } /* Frees any memory owned by 'mask'. Does not free the storage in which 'mask' * itself resides; the caller is responsible for that. */ void minimask_destroy(struct minimask *mask) { miniflow_destroy(&mask->masks); } /* Initializes 'dst' as a copy of 'src'. */ void minimask_expand(const struct minimask *mask, struct flow_wildcards *wc) { miniflow_expand(&mask->masks, &wc->masks); } /* Returns the uint32_t that would be at byte offset '4 * u32_ofs' if 'mask' * were expanded into a "struct flow_wildcards". */ uint32_t minimask_get(const struct minimask *mask, unsigned int u32_ofs) { return miniflow_get(&mask->masks, u32_ofs); } /* Returns the VID mask within the vlan_tci member of the "struct * flow_wildcards" represented by 'mask'. */ uint16_t minimask_get_vid_mask(const struct minimask *mask) { return miniflow_get_vid(&mask->masks); } /* Returns true if 'a' and 'b' are the same flow mask, false otherwise. */ bool minimask_equal(const struct minimask *a, const struct minimask *b) { return miniflow_equal(&a->masks, &b->masks); } /* Returns a hash value for 'mask', given 'basis'. */ uint32_t minimask_hash(const struct minimask *mask, uint32_t basis) { return miniflow_hash(&mask->masks, basis); } /* Returns true if at least one bit is wildcarded in 'a_' but not in 'b_', * false otherwise. */ bool minimask_has_extra(const struct minimask *a_, const struct minimask *b_) { const struct miniflow *a = &a_->masks; const struct miniflow *b = &b_->masks; int i; for (i = 0; i < MINI_N_MAPS; i++) { uint32_t map; for (map = a->map[i] | b->map[i]; map; map = zero_rightmost_1bit(map)) { int ofs = raw_ctz(map) + i * 32; uint32_t a_u32 = miniflow_get(a, ofs); uint32_t b_u32 = miniflow_get(b, ofs); if ((a_u32 & b_u32) != b_u32) { return true; } } } return false; } /* Returns true if 'mask' matches every packet, false if 'mask' fixes any bits * or fields. */ bool minimask_is_catchall(const struct minimask *mask_) { const struct miniflow *mask = &mask_->masks; BUILD_ASSERT(MINI_N_MAPS == 2); return !(mask->map[0] | mask->map[1]); } openvswitch-2.0.1+git20140120/lib/flow.h000066400000000000000000000325241226605124000173460ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef FLOW_H #define FLOW_H 1 #include #include #include #include #include #include "openflow/nicira-ext.h" #include "openflow/openflow.h" #include "hash.h" #include "util.h" struct dpif_flow_stats; struct ds; struct flow_wildcards; struct miniflow; struct minimask; struct ofpbuf; /* This sequence number should be incremented whenever anything involving flows * or the wildcarding of flows changes. This will cause build assertion * failures in places which likely need to be updated. */ #define FLOW_WC_SEQ 20 #define FLOW_N_REGS 8 BUILD_ASSERT_DECL(FLOW_N_REGS <= NXM_NX_MAX_REGS); /* Used for struct flow's dl_type member for frames that have no Ethernet * type, that is, pure 802.2 frames. */ #define FLOW_DL_TYPE_NONE 0x5ff /* Fragment bits, used for IPv4 and IPv6, always zero for non-IP flows. */ #define FLOW_NW_FRAG_ANY (1 << 0) /* Set for any IP frag. */ #define FLOW_NW_FRAG_LATER (1 << 1) /* Set for IP frag with nonzero offset. */ #define FLOW_NW_FRAG_MASK (FLOW_NW_FRAG_ANY | FLOW_NW_FRAG_LATER) BUILD_ASSERT_DECL(FLOW_NW_FRAG_ANY == NX_IP_FRAG_ANY); BUILD_ASSERT_DECL(FLOW_NW_FRAG_LATER == NX_IP_FRAG_LATER); #define FLOW_TNL_F_DONT_FRAGMENT (1 << 0) #define FLOW_TNL_F_CSUM (1 << 1) #define FLOW_TNL_F_KEY (1 << 2) const char *flow_tun_flag_to_string(uint32_t flags); struct flow_tnl { ovs_be64 tun_id; ovs_be32 ip_src; ovs_be32 ip_dst; uint16_t flags; uint8_t ip_tos; uint8_t ip_ttl; }; /* Unfortunately, a "struct flow" sometimes has to handle OpenFlow port * numbers and other times datapath (dpif) port numbers. This union allows * access to both. */ union flow_in_port { ofp_port_t ofp_port; odp_port_t odp_port; }; /* * A flow in the network. * * The meaning of 'in_port' is context-dependent. In most cases, it is a * 16-bit OpenFlow 1.0 port number. In the software datapath interface (dpif) * layer and its implementations (e.g. dpif-linux, dpif-netdev), it is instead * a 32-bit datapath port number. */ struct flow { struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */ ovs_be64 metadata; /* OpenFlow Metadata. */ struct in6_addr ipv6_src; /* IPv6 source address. */ struct in6_addr ipv6_dst; /* IPv6 destination address. */ struct in6_addr nd_target; /* IPv6 neighbor discovery (ND) target. */ uint32_t skb_priority; /* Packet priority for QoS. */ uint32_t regs[FLOW_N_REGS]; /* Registers. */ ovs_be32 nw_src; /* IPv4 source address. */ ovs_be32 nw_dst; /* IPv4 destination address. */ ovs_be32 ipv6_label; /* IPv6 flow label. */ union flow_in_port in_port; /* Input port.*/ uint32_t pkt_mark; /* Packet mark. */ ovs_be32 mpls_lse; /* MPLS label stack entry. */ uint16_t mpls_depth; /* Depth of MPLS stack. */ ovs_be16 vlan_tci; /* If 802.1Q, TCI | VLAN_CFI; otherwise 0. */ ovs_be16 dl_type; /* Ethernet frame type. */ ovs_be16 tp_src; /* TCP/UDP/SCTP source port. */ ovs_be16 tp_dst; /* TCP/UDP/SCTP destination port. */ uint8_t dl_src[6]; /* Ethernet source address. */ uint8_t dl_dst[6]; /* Ethernet destination address. */ uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */ uint8_t nw_tos; /* IP ToS (including DSCP and ECN). */ uint8_t arp_sha[6]; /* ARP/ND source hardware address. */ uint8_t arp_tha[6]; /* ARP/ND target hardware address. */ uint8_t nw_ttl; /* IP TTL/Hop Limit. */ uint8_t nw_frag; /* FLOW_FRAG_* flags. */ uint8_t zeros[6]; }; BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0); #define FLOW_U32S (sizeof(struct flow) / 4) /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */ BUILD_ASSERT_DECL(sizeof(struct flow) == sizeof(struct flow_tnl) + 160 && FLOW_WC_SEQ == 20); /* Represents the metadata fields of struct flow. */ struct flow_metadata { ovs_be64 tun_id; /* Encapsulating tunnel ID. */ ovs_be32 tun_src; /* Tunnel outer IPv4 src addr */ ovs_be32 tun_dst; /* Tunnel outer IPv4 dst addr */ ovs_be64 metadata; /* OpenFlow 1.1+ metadata field. */ uint32_t regs[FLOW_N_REGS]; /* Registers. */ uint32_t pkt_mark; /* Packet mark. */ ofp_port_t in_port; /* OpenFlow port or zero. */ }; void flow_extract(struct ofpbuf *, uint32_t priority, uint32_t mark, const struct flow_tnl *, const union flow_in_port *in_port, struct flow *); void flow_zero_wildcards(struct flow *, const struct flow_wildcards *); void flow_get_metadata(const struct flow *, struct flow_metadata *); char *flow_to_string(const struct flow *); void format_flags(struct ds *ds, const char *(*bit_to_string)(uint32_t), uint32_t flags, char del); void flow_format(struct ds *, const struct flow *); void flow_print(FILE *, const struct flow *); static inline int flow_compare_3way(const struct flow *, const struct flow *); static inline bool flow_equal(const struct flow *, const struct flow *); static inline size_t flow_hash(const struct flow *, uint32_t basis); void flow_set_dl_vlan(struct flow *, ovs_be16 vid); void flow_set_vlan_vid(struct flow *, ovs_be16 vid); void flow_set_vlan_pcp(struct flow *, uint8_t pcp); void flow_set_mpls_label(struct flow *flow, ovs_be32 label); void flow_set_mpls_ttl(struct flow *flow, uint8_t ttl); void flow_set_mpls_tc(struct flow *flow, uint8_t tc); void flow_set_mpls_bos(struct flow *flow, uint8_t stack); void flow_compose(struct ofpbuf *, const struct flow *); static inline int flow_compare_3way(const struct flow *a, const struct flow *b) { return memcmp(a, b, sizeof *a); } static inline bool flow_equal(const struct flow *a, const struct flow *b) { return !flow_compare_3way(a, b); } static inline size_t flow_hash(const struct flow *flow, uint32_t basis) { return hash_words((const uint32_t *) flow, sizeof *flow / 4, basis); } static inline uint16_t ofp_to_u16(ofp_port_t ofp_port) { return (OVS_FORCE uint16_t) ofp_port; } static inline uint32_t odp_to_u32(odp_port_t odp_port) { return (OVS_FORCE uint32_t) odp_port; } static inline uint32_t ofp11_to_u32(ofp11_port_t ofp11_port) { return (OVS_FORCE uint32_t) ofp11_port; } static inline ofp_port_t u16_to_ofp(uint16_t port) { return OFP_PORT_C(port); } static inline odp_port_t u32_to_odp(uint32_t port) { return ODP_PORT_C(port); } static inline ofp11_port_t u32_to_ofp11(uint32_t port) { return OFP11_PORT_C(port); } static inline uint32_t hash_ofp_port(ofp_port_t ofp_port) { return hash_int(ofp_to_u16(ofp_port), 0); } static inline uint32_t hash_odp_port(odp_port_t odp_port) { return hash_int(odp_to_u32(odp_port), 0); } uint32_t flow_hash_in_minimask(const struct flow *, const struct minimask *, uint32_t basis); /* Wildcards for a flow. * * A 1-bit in each bit in 'masks' indicates that the corresponding bit of * the flow is significant (must match). A 0-bit indicates that the * corresponding bit of the flow is wildcarded (need not match). */ struct flow_wildcards { struct flow masks; }; void flow_wildcards_init_catchall(struct flow_wildcards *); void flow_wildcards_init_exact(struct flow_wildcards *); void flow_wildcards_clear_non_packet_fields(struct flow_wildcards *); bool flow_wildcards_is_catchall(const struct flow_wildcards *); void flow_wildcards_set_reg_mask(struct flow_wildcards *, int idx, uint32_t mask); void flow_wildcards_and(struct flow_wildcards *dst, const struct flow_wildcards *src1, const struct flow_wildcards *src2); void flow_wildcards_or(struct flow_wildcards *dst, const struct flow_wildcards *src1, const struct flow_wildcards *src2); bool flow_wildcards_has_extra(const struct flow_wildcards *, const struct flow_wildcards *); void flow_wildcards_fold_minimask(struct flow_wildcards *, const struct minimask *); uint32_t flow_wildcards_hash(const struct flow_wildcards *, uint32_t basis); bool flow_wildcards_equal(const struct flow_wildcards *, const struct flow_wildcards *); uint32_t flow_hash_symmetric_l4(const struct flow *flow, uint32_t basis); void flow_mask_hash_fields(const struct flow *, struct flow_wildcards *, enum nx_hash_fields); uint32_t flow_hash_fields(const struct flow *, enum nx_hash_fields, uint16_t basis); const char *flow_hash_fields_to_str(enum nx_hash_fields); bool flow_hash_fields_valid(enum nx_hash_fields); uint32_t flow_hash_in_wildcards(const struct flow *, const struct flow_wildcards *, uint32_t basis); bool flow_equal_except(const struct flow *a, const struct flow *b, const struct flow_wildcards *); /* Compressed flow. */ #define MINI_N_INLINE (sizeof(void *) == 4 ? 7 : 8) #define MINI_N_MAPS DIV_ROUND_UP(FLOW_U32S, 32) /* A sparse representation of a "struct flow". * * A "struct flow" is fairly large and tends to be mostly zeros. Sparse * representation has two advantages. First, it saves memory. Second, it * saves time when the goal is to iterate over only the nonzero parts of the * struct. * * The 'map' member holds one bit for each uint32_t in a "struct flow". Each * 0-bit indicates that the corresponding uint32_t is zero, each 1-bit that it * is nonzero. * * 'values' points to the start of an array that has one element for each 1-bit * in 'map'. The least-numbered 1-bit is in values[0], the next 1-bit is in * values[1], and so on. * * 'values' may point to a few different locations: * * - If 'map' has MINI_N_INLINE or fewer 1-bits, it may point to * 'inline_values'. One hopes that this is the common case. * * - If 'map' has more than MINI_N_INLINE 1-bits, it may point to memory * allocated with malloc(). * * - The caller could provide storage on the stack for situations where * that makes sense. So far that's only proved useful for * minimask_combine(), but the principle works elsewhere. * * The implementation maintains and depends on the invariant that every element * in 'values' is nonzero; that is, wherever a 1-bit appears in 'map', the * corresponding element of 'values' must be nonzero. */ struct miniflow { uint32_t *values; uint32_t inline_values[MINI_N_INLINE]; uint32_t map[MINI_N_MAPS]; }; void miniflow_init(struct miniflow *, const struct flow *); void miniflow_clone(struct miniflow *, const struct miniflow *); void miniflow_move(struct miniflow *dst, struct miniflow *); void miniflow_destroy(struct miniflow *); void miniflow_expand(const struct miniflow *, struct flow *); uint32_t miniflow_get(const struct miniflow *, unsigned int u32_ofs); uint16_t miniflow_get_vid(const struct miniflow *); bool miniflow_equal(const struct miniflow *a, const struct miniflow *b); bool miniflow_equal_in_minimask(const struct miniflow *a, const struct miniflow *b, const struct minimask *); bool miniflow_equal_flow_in_minimask(const struct miniflow *a, const struct flow *b, const struct minimask *); uint32_t miniflow_hash(const struct miniflow *, uint32_t basis); uint32_t miniflow_hash_in_minimask(const struct miniflow *, const struct minimask *, uint32_t basis); /* Compressed flow wildcards. */ /* A sparse representation of a "struct flow_wildcards". * * See the large comment on struct miniflow for details. */ struct minimask { struct miniflow masks; }; void minimask_init(struct minimask *, const struct flow_wildcards *); void minimask_clone(struct minimask *, const struct minimask *); void minimask_move(struct minimask *dst, struct minimask *src); void minimask_combine(struct minimask *dst, const struct minimask *a, const struct minimask *b, uint32_t storage[FLOW_U32S]); void minimask_destroy(struct minimask *); void minimask_expand(const struct minimask *, struct flow_wildcards *); uint32_t minimask_get(const struct minimask *, unsigned int u32_ofs); uint16_t minimask_get_vid_mask(const struct minimask *); bool minimask_equal(const struct minimask *a, const struct minimask *b); uint32_t minimask_hash(const struct minimask *, uint32_t basis); bool minimask_has_extra(const struct minimask *, const struct minimask *); bool minimask_is_catchall(const struct minimask *); #endif /* flow.h */ openvswitch-2.0.1+git20140120/lib/guarded-list.c000066400000000000000000000042261226605124000207540ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "guarded-list.h" void guarded_list_init(struct guarded_list *list) { ovs_mutex_init(&list->mutex); list_init(&list->list); list->n = 0; } void guarded_list_destroy(struct guarded_list *list) { ovs_mutex_destroy(&list->mutex); } bool guarded_list_is_empty(const struct guarded_list *list) { bool empty; ovs_mutex_lock(&list->mutex); empty = list->n == 0; ovs_mutex_unlock(&list->mutex); return empty; } /* If 'list' has fewer than 'max' elements, adds 'node' at the end of the list * and returns the number of elements now on the list. * * If 'list' already has at least 'max' elements, returns 0 without modifying * the list. */ size_t guarded_list_push_back(struct guarded_list *list, struct list *node, size_t max) { size_t retval = 0; ovs_mutex_lock(&list->mutex); if (list->n < max) { list_push_back(&list->list, node); retval = ++list->n; } ovs_mutex_unlock(&list->mutex); return retval; } struct list * guarded_list_pop_front(struct guarded_list *list) { struct list *node = NULL; ovs_mutex_lock(&list->mutex); if (list->n) { node = list_pop_front(&list->list); list->n--; } ovs_mutex_unlock(&list->mutex); return node; } size_t guarded_list_pop_all(struct guarded_list *list, struct list *elements) { size_t n; ovs_mutex_lock(&list->mutex); list_move(elements, &list->list); n = list->n; list_init(&list->list); list->n = 0; ovs_mutex_unlock(&list->mutex); return n; } openvswitch-2.0.1+git20140120/lib/guarded-list.h000066400000000000000000000023411226605124000207550ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef GUARDED_LIST_H #define GUARDED_LIST_H 1 #include #include "compiler.h" #include "list.h" #include "ovs-thread.h" struct guarded_list { struct ovs_mutex mutex; struct list list; size_t n; }; void guarded_list_init(struct guarded_list *); void guarded_list_destroy(struct guarded_list *); bool guarded_list_is_empty(const struct guarded_list *); size_t guarded_list_push_back(struct guarded_list *, struct list *, size_t max); struct list *guarded_list_pop_front(struct guarded_list *); size_t guarded_list_pop_all(struct guarded_list *, struct list *); #endif /* guarded-list.h */ openvswitch-2.0.1+git20140120/lib/hash.c000066400000000000000000000036461226605124000173200ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "hash.h" #include #include "unaligned.h" /* Returns the hash of 'a', 'b', and 'c'. */ uint32_t hash_3words(uint32_t a, uint32_t b, uint32_t c) { return mhash_finish(mhash_add(mhash_add(mhash_add(a, 0), b), c), 12); } /* Returns the hash of the 'n' bytes at 'p', starting from 'basis'. */ uint32_t hash_bytes(const void *p_, size_t n, uint32_t basis) { const uint32_t *p = p_; size_t orig_n = n; uint32_t hash; hash = basis; while (n >= 4) { hash = mhash_add(hash, get_unaligned_u32(p)); n -= 4; p += 1; } if (n) { uint32_t tmp = 0; memcpy(&tmp, p, n); hash = mhash_add__(hash, tmp); } return mhash_finish(hash, orig_n); } /* Returns the hash of the 'n' 32-bit words at 'p', starting from 'basis'. * 'p' must be properly aligned. */ uint32_t hash_words(const uint32_t p[], size_t n_words, uint32_t basis) { uint32_t hash; size_t i; hash = basis; for (i = 0; i < n_words; i++) { hash = mhash_add(hash, p[i]); } return mhash_finish(hash, n_words * 4); } uint32_t hash_double(double x, uint32_t basis) { uint32_t value[2]; BUILD_ASSERT_DECL(sizeof x == sizeof value); memcpy(value, &x, sizeof value); return hash_3words(value[0], value[1], basis); } openvswitch-2.0.1+git20140120/lib/hash.h000066400000000000000000000072531226605124000173230ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef HASH_H #define HASH_H 1 #include #include #include #include #include "util.h" #ifdef __cplusplus extern "C" { #endif static inline uint32_t hash_rot(uint32_t x, int k) { return (x << k) | (x >> (32 - k)); } uint32_t hash_words(const uint32_t data[], size_t n_words, uint32_t basis); uint32_t hash_bytes(const void *, size_t n_bytes, uint32_t basis); static inline uint32_t hash_int(uint32_t x, uint32_t basis); static inline uint32_t hash_2words(uint32_t, uint32_t); uint32_t hash_3words(uint32_t, uint32_t, uint32_t); static inline uint32_t hash_boolean(bool x, uint32_t basis); uint32_t hash_double(double, uint32_t basis); static inline uint32_t hash_pointer(const void *, uint32_t basis); static inline uint32_t hash_string(const char *, uint32_t basis); /* Murmurhash by Austin Appleby, * from http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp. * * The upstream license there says: * * // MurmurHash3 was written by Austin Appleby, and is placed in the public * // domain. The author hereby disclaims copyright to this source code. * * See hash_words() for sample usage. */ static inline uint32_t mhash_add__(uint32_t hash, uint32_t data) { data *= 0xcc9e2d51; data = hash_rot(data, 15); data *= 0x1b873593; return hash ^ data; } static inline uint32_t mhash_add(uint32_t hash, uint32_t data) { hash = mhash_add__(hash, data); hash = hash_rot(hash, 13); return hash * 5 + 0xe6546b64; } static inline uint32_t mhash_finish(uint32_t hash, size_t n_bytes) { hash ^= n_bytes; hash ^= hash >> 16; hash *= 0x85ebca6b; hash ^= hash >> 13; hash *= 0xc2b2ae35; hash ^= hash >> 16; return hash; } static inline uint32_t hash_string(const char *s, uint32_t basis) { return hash_bytes(s, strlen(s), basis); } static inline uint32_t hash_int(uint32_t x, uint32_t basis) { return hash_2words(x, basis); } /* An attempt at a useful 1-bit hash function. Has not been analyzed for * quality. */ static inline uint32_t hash_boolean(bool x, uint32_t basis) { const uint32_t P0 = 0xc2b73583; /* This is hash_int(1, 0). */ const uint32_t P1 = 0xe90f1258; /* This is hash_int(2, 0). */ return (x ? P0 : P1) ^ hash_rot(basis, 1); } static inline uint32_t hash_pointer(const void *p, uint32_t basis) { /* Often pointers are hashed simply by casting to integer type, but that * has pitfalls since the lower bits of a pointer are often all 0 for * alignment reasons. It's hard to guess where the entropy really is, so * we give up here and just use a high-quality hash function. * * The double cast suppresses a warning on 64-bit systems about casting to * an integer to different size. That's OK in this case, since most of the * entropy in the pointer is almost certainly in the lower 32 bits. */ return hash_int((uint32_t) (uintptr_t) p, basis); } static inline uint32_t hash_2words(uint32_t x, uint32_t y) { return mhash_finish(mhash_add(mhash_add(x, 0), y), 4); } #ifdef __cplusplus } #endif #endif /* hash.h */ openvswitch-2.0.1+git20140120/lib/heap.c000066400000000000000000000124061226605124000173040ustar00rootroot00000000000000/* * Copyright (c) 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "heap.h" #include #include "util.h" static void put_node(struct heap *, struct heap_node *, size_t i); static void swap_nodes(struct heap *, size_t i, size_t j); static bool float_up(struct heap *, size_t i); static void float_down(struct heap *, size_t i); static void float_up_or_down(struct heap *, size_t i); /* Initializes 'heap' as an empty heap. */ void heap_init(struct heap *heap) { heap->array = NULL; heap->n = 0; heap->allocated = 0; } /* Frees memory owned internally by 'heap'. The caller is responsible for * freeing 'heap' itself, if necessary. */ void heap_destroy(struct heap *heap) { if (heap) { free(heap->array); } } /* Removes all of the elements from 'heap', without freeing any allocated * memory. */ void heap_clear(struct heap *heap) { heap->n = 0; } /* Exchanges the contents of 'a' and 'b'. */ void heap_swap(struct heap *a, struct heap *b) { struct heap tmp = *a; *a = *b; *b = tmp; } /* Inserts 'node' into 'heap' with the specified 'priority'. * * This takes time O(lg n). */ void heap_insert(struct heap *heap, struct heap_node *node, uint32_t priority) { heap_raw_insert(heap, node, priority); float_up(heap, node->idx); } /* Removes 'node' from 'heap'. * * This takes time O(lg n). */ void heap_remove(struct heap *heap, struct heap_node *node) { size_t i = node->idx; heap_raw_remove(heap, node); if (i <= heap->n) { float_up_or_down(heap, i); } } /* Changes the priority of 'node' (which must be in 'heap') to 'priority'. * * This takes time O(lg n). */ void heap_change(struct heap *heap, struct heap_node *node, uint32_t priority) { heap_raw_change(node, priority); float_up_or_down(heap, node->idx); } /* Inserts 'node' into 'heap' with the specified 'priority', without * maintaining the heap invariant. * * After this call, heap_max() will no longer necessarily return the maximum * value in the heap, and HEAP_FOR_EACH will no longer necessarily iterate in * heap level order, until the next call to heap_rebuild(heap). * * This takes time O(1). */ void heap_raw_insert(struct heap *heap, struct heap_node *node, uint32_t priority) { if (heap->n >= heap->allocated) { heap->allocated = heap->n == 0 ? 1 : 2 * heap->n; heap->array = xrealloc(heap->array, (heap->allocated + 1) * sizeof *heap->array); } put_node(heap, node, ++heap->n); node->priority = priority; } /* Removes 'node' from 'heap', without maintaining the heap invariant. * * After this call, heap_max() will no longer necessarily return the maximum * value in the heap, and HEAP_FOR_EACH will no longer necessarily iterate in * heap level order, until the next call to heap_rebuild(heap). * * This takes time O(1). */ void heap_raw_remove(struct heap *heap, struct heap_node *node) { size_t i = node->idx; if (i < heap->n) { put_node(heap, heap->array[heap->n], i); } heap->n--; } /* Rebuilds 'heap' to restore the heap invariant following a series of one or * more calls to heap_raw_*() functions. (Otherwise this function need not be * called.) * * This takes time O(n) in the current size of the heap. */ void heap_rebuild(struct heap *heap) { size_t i; for (i = heap->n / 2; i >= 1; i--) { float_down(heap, i); } } static void put_node(struct heap *heap, struct heap_node *node, size_t i) { heap->array[i] = node; node->idx = i; } static void swap_nodes(struct heap *heap, size_t i, size_t j) { struct heap_node *old_i = heap->array[i]; struct heap_node *old_j = heap->array[j]; put_node(heap, old_j, i); put_node(heap, old_i, j); } static bool float_up(struct heap *heap, size_t i) { bool moved = false; size_t parent; for (; i > 1; i = parent) { parent = heap_parent__(i); if (heap->array[parent]->priority >= heap->array[i]->priority) { break; } swap_nodes(heap, parent, i); moved = true; } return moved; } static void float_down(struct heap *heap, size_t i) { while (!heap_is_leaf__(heap, i)) { size_t left = heap_left__(i); size_t right = heap_right__(i); size_t max = i; if (heap->array[left]->priority > heap->array[max]->priority) { max = left; } if (right <= heap->n && heap->array[right]->priority > heap->array[max]->priority) { max = right; } if (max == i) { break; } swap_nodes(heap, max, i); i = max; } } static void float_up_or_down(struct heap *heap, size_t i) { if (!float_up(heap, i)) { float_down(heap, i); } } openvswitch-2.0.1+git20140120/lib/heap.h000066400000000000000000000124671226605124000173200ustar00rootroot00000000000000/* * Copyright (c) 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef HEAP_H #define HEAP_H 1 #include #include #include /* A heap node, to be embedded inside the data structure in the heap. */ struct heap_node { size_t idx; uint32_t priority; }; /* A max-heap. */ struct heap { struct heap_node **array; /* Data in elements 1...n, element 0 unused. */ size_t n; /* Number of nodes currently in the heap. */ size_t allocated; /* Max 'n' before 'array' must be enlarged. */ }; /* Initialization. */ void heap_init(struct heap *); void heap_destroy(struct heap *); void heap_clear(struct heap *); void heap_swap(struct heap *a, struct heap *b); static inline size_t heap_count(const struct heap *); static inline bool heap_is_empty(const struct heap *); /* Insertion and deletion. */ void heap_insert(struct heap *, struct heap_node *, uint32_t priority); void heap_change(struct heap *, struct heap_node *, uint32_t priority); void heap_remove(struct heap *, struct heap_node *); static inline struct heap_node *heap_pop(struct heap *); /* Maximum. */ static inline struct heap_node *heap_max(const struct heap *); /* The "raw" functions below do not preserve the heap invariants. After you * call them, heap_max() will not necessarily return the right value until you * subsequently call heap_rebuild(). */ void heap_raw_insert(struct heap *, struct heap_node *, uint32_t priority); static inline void heap_raw_change(struct heap_node *, uint32_t priority); void heap_raw_remove(struct heap *, struct heap_node *); void heap_rebuild(struct heap *); /* Iterates through each NODE in HEAP, where NODE->MEMBER must be a "struct * heap_node". Iterates in heap level order, which in particular means that * the first node visited is the maximum value in the heap. * * If a heap_raw_*() function has been called without a later call to * heap_rebuild(), then the first node visited may not be the maximum * element. */ #define HEAP_FOR_EACH(NODE, MEMBER, HEAP) \ for (((HEAP)->n > 0 \ ? ASSIGN_CONTAINER(NODE, (HEAP)->array[1], MEMBER) \ : ((NODE) = NULL, (void) 0)); \ (NODE) != NULL; \ ((NODE)->MEMBER.idx < (HEAP)->n \ ? ASSIGN_CONTAINER(NODE, \ (HEAP)->array[(NODE)->MEMBER.idx + 1], \ MEMBER) \ : ((NODE) = NULL, (void) 0))) /* Returns the index of the node that is the parent of the node with the given * 'idx' within a heap. */ static inline size_t heap_parent__(size_t idx) { return idx / 2; } /* Returns the index of the node that is the left child of the node with the * given 'idx' within a heap. */ static inline size_t heap_left__(size_t idx) { return idx * 2; } /* Returns the index of the node that is the right child of the node with the * given 'idx' within a heap. */ static inline size_t heap_right__(size_t idx) { return idx * 2 + 1; } /* Returns true if 'idx' is the index of a leaf node in 'heap', false * otherwise. */ static inline bool heap_is_leaf__(const struct heap *heap, size_t idx) { return heap_left__(idx) > heap->n; } /* Returns the number of elements in 'heap'. */ static inline size_t heap_count(const struct heap *heap) { return heap->n; } /* Returns true if 'heap' is empty, false if it contains at least one * element. */ static inline bool heap_is_empty(const struct heap *heap) { return heap->n == 0; } /* Returns the largest element in 'heap'. * * The caller must ensure that 'heap' contains at least one element. * * The return value may be wrong (i.e. not the maximum element but some other * element) if a heap_raw_*() function has been called without a later call to * heap_rebuild(). */ static inline struct heap_node * heap_max(const struct heap *heap) { return heap->array[1]; } /* Removes an arbitrary node from 'heap', in O(1), maintaining the heap * invariant. Returns the node removed. * * The caller must ensure that 'heap' contains at least one element. */ static inline struct heap_node * heap_pop(struct heap *heap) { return heap->array[heap->n--]; } /* Changes the priority of 'node' (which must be in 'heap') to 'priority'. * * After this call, heap_max() will no longer necessarily return the maximum * value in the heap, and HEAP_FOR_EACH will no longer necessarily iterate in * heap level order, until the next call to heap_rebuild(heap). * * This takes time O(1). */ static inline void heap_raw_change(struct heap_node *node, uint32_t priority) { node->priority = priority; } #endif /* heap.h */ openvswitch-2.0.1+git20140120/lib/hindex.c000066400000000000000000000232751226605124000176540ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "hindex.h" #include "coverage.h" static bool hindex_node_is_body(const struct hindex_node *); static bool hindex_node_is_head(const struct hindex_node *); static void hindex_resize(struct hindex *, size_t new_mask); static size_t hindex_calc_mask(size_t capacity); COVERAGE_DEFINE(hindex_pathological); COVERAGE_DEFINE(hindex_expand); COVERAGE_DEFINE(hindex_shrink); COVERAGE_DEFINE(hindex_reserve); /* Initializes 'hindex' as an empty hash index. */ void hindex_init(struct hindex *hindex) { hindex->buckets = &hindex->one; hindex->one = NULL; hindex->mask = 0; hindex->n_unique = 0; } /* Frees memory reserved by 'hindex'. It is the client's responsibility to * free the nodes themselves, if necessary. */ void hindex_destroy(struct hindex *hindex) { if (hindex && hindex->buckets != &hindex->one) { free(hindex->buckets); } } /* Removes all node from 'hindex', leaving it ready to accept more nodes. Does * not free memory allocated for 'hindex'. * * This function is appropriate when 'hindex' will soon have about as many * elements as it before. If 'hindex' will likely have fewer elements than * before, use hindex_destroy() followed by hindex_clear() to save memory and * iteration time. */ void hindex_clear(struct hindex *hindex) { if (hindex->n_unique > 0) { hindex->n_unique = 0; memset(hindex->buckets, 0, (hindex->mask + 1) * sizeof *hindex->buckets); } } /* Exchanges hash indexes 'a' and 'b'. */ void hindex_swap(struct hindex *a, struct hindex *b) { struct hindex tmp = *a; *a = *b; *b = tmp; hindex_moved(a); hindex_moved(b); } /* Adjusts 'hindex' to compensate for having moved position in memory (e.g. due * to realloc()). */ void hindex_moved(struct hindex *hindex) { if (!hindex->mask) { hindex->buckets = &hindex->one; } } /* Expands 'hindex', if necessary, to optimize the performance of searches. */ void hindex_expand(struct hindex *hindex) { size_t new_mask = hindex_calc_mask(hindex->n_unique); if (new_mask > hindex->mask) { COVERAGE_INC(hindex_expand); hindex_resize(hindex, new_mask); } } /* Shrinks 'hindex', if necessary, to optimize the performance of iteration. */ void hindex_shrink(struct hindex *hindex) { size_t new_mask = hindex_calc_mask(hindex->n_unique); if (new_mask < hindex->mask) { COVERAGE_INC(hindex_shrink); hindex_resize(hindex, new_mask); } } /* Expands 'hindex', if necessary, to optimize the performance of searches when * it has up to 'n' unique hashes. (But iteration will be slow in a hash index * whose allocated capacity is much higher than its current number of * nodes.) */ void hindex_reserve(struct hindex *hindex, size_t n) { size_t new_mask = hindex_calc_mask(n); if (new_mask > hindex->mask) { COVERAGE_INC(hindex_reserve); hindex_resize(hindex, new_mask); } } /* Inserts 'node', with the given 'hash', into 'hindex'. Never automatically * expands 'hindex' (use hindex_insert() instead if you want that). */ void hindex_insert_fast(struct hindex *hindex, struct hindex_node *node, size_t hash) { struct hindex_node *head = hindex_node_with_hash(hindex, hash); if (head) { /* 'head' is an existing head with hash == 'hash'. * Insert 'node' as a body node just below 'head'. */ node->s = head->s; node->d = head; if (node->s) { node->s->d = node; } head->s = node; } else { /* No existing node has hash 'hash'. Insert 'node' as a new head in * its bucket. */ struct hindex_node **bucket = &hindex->buckets[hash & hindex->mask]; node->s = NULL; node->d = *bucket; *bucket = node; hindex->n_unique++; } node->hash = hash; } /* Inserts 'node', with the given 'hash', into 'hindex', and expands 'hindex' * if necessary to optimize search performance. */ void hindex_insert(struct hindex *hindex, struct hindex_node *node, size_t hash) { hindex_insert_fast(hindex, node, hash); if (hindex->n_unique / 2 > hindex->mask) { hindex_expand(hindex); } } /* Removes 'node' from 'hindex'. Does not shrink the hash index; call * hindex_shrink() directly if desired. */ void hindex_remove(struct hindex *hindex, struct hindex_node *node) { if (!hindex_node_is_head(node)) { node->d->s = node->s; if (node->s) { node->s->d = node->d; } } else { struct hindex_node **head; for (head = &hindex->buckets[node->hash & hindex->mask]; (*head)->hash != node->hash; head = &(*head)->d) { continue; } if (node->s) { *head = node->s; node->s->d = node->d; } else { *head = node->d; hindex->n_unique--; } } } /* Helper functions. */ /* Returns true if 'node', which must be inserted into an hindex, is a "body" * node, that is, it is not reachable from a bucket by following zero or more * 'd' pointers. Returns false otherwise. */ static bool hindex_node_is_body(const struct hindex_node *node) { return node->d && node->d->hash == node->hash; } /* Returns true if 'node', which must be inserted into an hindex, is a "head" * node, that is, if it is reachable from a bucket by following zero or more * 'd' pointers. Returns false if 'node' is a body node (and therefore one * must follow at least one 's' pointer to reach it). */ static bool hindex_node_is_head(const struct hindex_node *node) { return !hindex_node_is_body(node); } /* Reallocates 'hindex''s array of buckets to use bitwise mask 'new_mask'. */ static void hindex_resize(struct hindex *hindex, size_t new_mask) { struct hindex tmp; size_t i; ovs_assert(is_pow2(new_mask + 1)); ovs_assert(new_mask != SIZE_MAX); hindex_init(&tmp); if (new_mask) { tmp.buckets = xmalloc(sizeof *tmp.buckets * (new_mask + 1)); tmp.mask = new_mask; for (i = 0; i <= tmp.mask; i++) { tmp.buckets[i] = NULL; } } for (i = 0; i <= hindex->mask; i++) { struct hindex_node *node, *next; int count; count = 0; for (node = hindex->buckets[i]; node; node = next) { struct hindex_node **head = &tmp.buckets[node->hash & tmp.mask]; next = node->d; node->d = *head; *head = node; count++; } if (count > 5) { COVERAGE_INC(hindex_pathological); } } tmp.n_unique = hindex->n_unique; hindex_swap(hindex, &tmp); hindex_destroy(&tmp); } /* Returns the bitwise mask to use in struct hindex to support 'capacity' * hindex_nodes with unique hashes. */ static size_t hindex_calc_mask(size_t capacity) { size_t mask = capacity / 2; mask |= mask >> 1; mask |= mask >> 2; mask |= mask >> 4; mask |= mask >> 8; mask |= mask >> 16; #if SIZE_MAX > UINT32_MAX mask |= mask >> 32; #endif /* If we need to dynamically allocate buckets we might as well allocate at * least 4 of them. */ mask |= (mask & 1) << 1; return mask; } /* Returns the head node in 'hindex' with the given 'hash', or a null pointer * if no nodes have that hash value. */ struct hindex_node * hindex_node_with_hash(const struct hindex *hindex, size_t hash) { struct hindex_node *node = hindex->buckets[hash & hindex->mask]; while (node && node->hash != hash) { node = node->d; } return node; } /* Returns the head node in 'hindex' with the given 'hash'. 'hindex' must * contain a head node with the given hash. */ static struct hindex_node * hindex_head_node(const struct hindex *hindex, size_t hash) { struct hindex_node *node = hindex->buckets[hash & hindex->mask]; while (node->hash != hash) { node = node->d; } return node; } static struct hindex_node * hindex_next__(const struct hindex *hindex, size_t start) { size_t i; for (i = start; i <= hindex->mask; i++) { struct hindex_node *node = hindex->buckets[i]; if (node) { return node; } } return NULL; } /* Returns the first node in 'hindex', in arbitrary order, or a null pointer if * 'hindex' is empty. */ struct hindex_node * hindex_first(const struct hindex *hindex) { return hindex_next__(hindex, 0); } /* Returns the next node in 'hindex' following 'node', in arbitrary order, or a * null pointer if 'node' is the last node in 'hindex'. * * If the hash index has been reallocated since 'node' was visited, some nodes * may be skipped or visited twice. */ struct hindex_node * hindex_next(const struct hindex *hindex, const struct hindex_node *node) { struct hindex_node *head; /* If there's a node with the same hash, return it. */ if (node->s) { return node->s; } /* If there's another node in the same bucket, return it. */ head = hindex_head_node(hindex, node->hash); if (head->d) { return head->d; } /* Return the first node in the next (or later) bucket. */ return hindex_next__(hindex, (node->hash & hindex->mask) + 1); } openvswitch-2.0.1+git20140120/lib/hindex.h000066400000000000000000000145641226605124000176620ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef HINDEX_H #define HINDEX_H 1 /* Hashed multimap. * * hindex is a hash table data structure that gracefully handles duplicates. * With a high-quality hash function, insertion, deletion, and search are O(1) * expected time, regardless of the number of duplicates for a given key. */ #include #include #include "util.h" /* A hash index node, to embed inside the data structure being indexed. * * Nodes are linked together like this (the boxes are labeled with hash * values): * * +--------+ d +--------+ d +--------+ d * bucket---> | 6 |---->| 20 |---->| 15 |---->null * +-|------+ +-|------+ +-|------+ * | ^ | | ^ * s| |d |s s| |d * V | V V | * +------|-+ null +------|-+ * | 6 | | 15 | * +-|------+ +-|------+ * | ^ | * s| |d s| * V | V * +------|-+ null * | 6 | * +-|------+ * | * s| * V * null * * The basic usage is: * * - To visit the unique hash values in the hindex, follow the 'd' * ("different") pointers starting from each bucket. The nodes visited * this way are called "head" nodes, because they are at the head of the * vertical chains. * * - To visit the nodes with hash value H, follow the 'd' pointers in the * appropriate bucket until you find one with hash H, then follow the 's' * ("same") pointers until you hit a null pointer. The non-head nodes * visited this way are called "body" nodes. * * - The 'd' pointers in body nodes point back to the previous body node * or, for the first body node, to the head node. (This makes it * possible to remove a body node without traversing all the way downward * from the head). */ struct hindex_node { /* Hash value. */ size_t hash; /* In a head node, the next head node (with a hash different from this * node), or NULL if this is the last node in this bucket. * * In a body node, the previous head or body node (with the same hash as * this node). Never null. */ struct hindex_node *d; /* In a head or a body node, the next body node with the same hash as this * node. NULL if this is the last node with this hash. */ struct hindex_node *s; }; /* A hash index. */ struct hindex { struct hindex_node **buckets; /* Must point to 'one' iff 'mask' == 0. */ struct hindex_node *one; size_t mask; /* 0 or more lowest-order bits set, others cleared. */ size_t n_unique; /* Number of unique hashes (the number of head nodes). */ }; /* Initializer for an empty hash index. */ #define HINDEX_INITIALIZER(HINDEX) \ { (struct hindex_node **const) &(HINDEX)->one, NULL, 0, 0 } /* Initialization. */ void hindex_init(struct hindex *); void hindex_destroy(struct hindex *); void hindex_clear(struct hindex *); void hindex_swap(struct hindex *a, struct hindex *b); void hindex_moved(struct hindex *hindex); static inline bool hindex_is_empty(const struct hindex *); /* Adjusting capacity. */ void hindex_expand(struct hindex *); void hindex_shrink(struct hindex *); void hindex_reserve(struct hindex *, size_t capacity); /* Insertion and deletion. */ void hindex_insert_fast(struct hindex *, struct hindex_node *, size_t hash); void hindex_insert(struct hindex *, struct hindex_node *, size_t hash); void hindex_remove(struct hindex *, struct hindex_node *); /* Search. * * HINDEX_FOR_EACH_WITH_HASH iterates NODE over all of the nodes in HINDEX that * have hash value equal to HASH. MEMBER must be the name of the 'struct * hindex_node' member within NODE. * * The loop should not change NODE to point to a different node or insert or * delete nodes in HINDEX (unless it "break"s out of the loop to terminate * iteration). * * Evaluates HASH only once. */ #define HINDEX_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HINDEX) \ for (ASSIGN_CONTAINER(NODE, hindex_node_with_hash(HINDEX, HASH), MEMBER); \ NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ ASSIGN_CONTAINER(NODE, (NODE)->MEMBER.s, MEMBER)) struct hindex_node *hindex_node_with_hash(const struct hindex *, size_t hash); /* Iteration. */ /* Iterates through every node in HINDEX. */ #define HINDEX_FOR_EACH(NODE, MEMBER, HINDEX) \ for (ASSIGN_CONTAINER(NODE, hindex_first(HINDEX), MEMBER); \ NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ ASSIGN_CONTAINER(NODE, hindex_next(HINDEX, &(NODE)->MEMBER), MEMBER)) /* Safe when NODE may be freed (not needed when NODE may be removed from the * hash index but its members remain accessible and intact). */ #define HINDEX_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HINDEX) \ for (ASSIGN_CONTAINER(NODE, hindex_first(HINDEX), MEMBER); \ (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER) \ ? ASSIGN_CONTAINER(NEXT, hindex_next(HINDEX, &(NODE)->MEMBER), MEMBER), 1 \ : 0); \ (NODE) = (NEXT)) struct hindex_node *hindex_first(const struct hindex *); struct hindex_node *hindex_next(const struct hindex *, const struct hindex_node *); /* Returns true if 'hindex' currently contains no nodes, false otherwise. */ static inline bool hindex_is_empty(const struct hindex *hindex) { return hindex->n_unique == 0; } #endif /* hindex.h */ openvswitch-2.0.1+git20140120/lib/hmap.c000066400000000000000000000177071226605124000173250ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "hmap.h" #include #include #include "coverage.h" #include "random.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(hmap); COVERAGE_DEFINE(hmap_pathological); COVERAGE_DEFINE(hmap_expand); COVERAGE_DEFINE(hmap_shrink); COVERAGE_DEFINE(hmap_reserve); /* Initializes 'hmap' as an empty hash table. */ void hmap_init(struct hmap *hmap) { hmap->buckets = &hmap->one; hmap->one = NULL; hmap->mask = 0; hmap->n = 0; } /* Frees memory reserved by 'hmap'. It is the client's responsibility to free * the nodes themselves, if necessary. */ void hmap_destroy(struct hmap *hmap) { if (hmap && hmap->buckets != &hmap->one) { free(hmap->buckets); } } /* Removes all node from 'hmap', leaving it ready to accept more nodes. Does * not free memory allocated for 'hmap'. * * This function is appropriate when 'hmap' will soon have about as many * elements as it before. If 'hmap' will likely have fewer elements than * before, use hmap_destroy() followed by hmap_clear() to save memory and * iteration time. */ void hmap_clear(struct hmap *hmap) { if (hmap->n > 0) { hmap->n = 0; memset(hmap->buckets, 0, (hmap->mask + 1) * sizeof *hmap->buckets); } } /* Exchanges hash maps 'a' and 'b'. */ void hmap_swap(struct hmap *a, struct hmap *b) { struct hmap tmp = *a; *a = *b; *b = tmp; hmap_moved(a); hmap_moved(b); } /* Adjusts 'hmap' to compensate for having moved position in memory (e.g. due * to realloc()). */ void hmap_moved(struct hmap *hmap) { if (!hmap->mask) { hmap->buckets = &hmap->one; } } static void resize(struct hmap *hmap, size_t new_mask, const char *where) { struct hmap tmp; size_t i; ovs_assert(is_pow2(new_mask + 1)); hmap_init(&tmp); if (new_mask) { tmp.buckets = xmalloc(sizeof *tmp.buckets * (new_mask + 1)); tmp.mask = new_mask; for (i = 0; i <= tmp.mask; i++) { tmp.buckets[i] = NULL; } } for (i = 0; i <= hmap->mask; i++) { struct hmap_node *node, *next; int count = 0; for (node = hmap->buckets[i]; node; node = next) { next = node->next; hmap_insert_fast(&tmp, node, node->hash); count++; } if (count > 5) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); COVERAGE_INC(hmap_pathological); VLOG_DBG_RL(&rl, "%s: %d nodes in bucket (%zu nodes, %zu buckets)", where, count, hmap->n, hmap->mask + 1); } } hmap_swap(hmap, &tmp); hmap_destroy(&tmp); } static size_t calc_mask(size_t capacity) { size_t mask = capacity / 2; mask |= mask >> 1; mask |= mask >> 2; mask |= mask >> 4; mask |= mask >> 8; mask |= mask >> 16; #if SIZE_MAX > UINT32_MAX mask |= mask >> 32; #endif /* If we need to dynamically allocate buckets we might as well allocate at * least 4 of them. */ mask |= (mask & 1) << 1; return mask; } /* Expands 'hmap', if necessary, to optimize the performance of searches. * * ('where' is used in debug logging. Commonly one would use hmap_expand() to * automatically provide the caller's source file and line number for * 'where'.) */ void hmap_expand_at(struct hmap *hmap, const char *where) { size_t new_mask = calc_mask(hmap->n); if (new_mask > hmap->mask) { COVERAGE_INC(hmap_expand); resize(hmap, new_mask, where); } } /* Shrinks 'hmap', if necessary, to optimize the performance of iteration. * * ('where' is used in debug logging. Commonly one would use hmap_shrink() to * automatically provide the caller's source file and line number for * 'where'.) */ void hmap_shrink_at(struct hmap *hmap, const char *where) { size_t new_mask = calc_mask(hmap->n); if (new_mask < hmap->mask) { COVERAGE_INC(hmap_shrink); resize(hmap, new_mask, where); } } /* Expands 'hmap', if necessary, to optimize the performance of searches when * it has up to 'n' elements. (But iteration will be slow in a hash map whose * allocated capacity is much higher than its current number of nodes.) * * ('where' is used in debug logging. Commonly one would use hmap_reserve() to * automatically provide the caller's source file and line number for * 'where'.) */ void hmap_reserve_at(struct hmap *hmap, size_t n, const char *where) { size_t new_mask = calc_mask(n); if (new_mask > hmap->mask) { COVERAGE_INC(hmap_reserve); resize(hmap, new_mask, where); } } /* Adjusts 'hmap' to compensate for 'old_node' having moved position in memory * to 'node' (e.g. due to realloc()). */ void hmap_node_moved(struct hmap *hmap, struct hmap_node *old_node, struct hmap_node *node) { struct hmap_node **bucket = &hmap->buckets[node->hash & hmap->mask]; while (*bucket != old_node) { bucket = &(*bucket)->next; } *bucket = node; } /* Chooses and returns a randomly selected node from 'hmap', which must not be * empty. * * I wouldn't depend on this algorithm to be fair, since I haven't analyzed it. * But it does at least ensure that any node in 'hmap' can be chosen. */ struct hmap_node * hmap_random_node(const struct hmap *hmap) { struct hmap_node *bucket, *node; size_t n, i; /* Choose a random non-empty bucket. */ for (i = random_uint32(); ; i++) { bucket = hmap->buckets[i & hmap->mask]; if (bucket) { break; } } /* Count nodes in bucket. */ n = 0; for (node = bucket; node; node = node->next) { n++; } /* Choose random node from bucket. */ i = random_range(n); for (node = bucket; i-- > 0; node = node->next) { continue; } return node; } /* Returns the next node in 'hmap' in hash order, or NULL if no nodes remain in * 'hmap'. Uses '*bucketp' and '*offsetp' to determine where to begin * iteration, and stores new values to pass on the next iteration into them * before returning. * * It's better to use plain HMAP_FOR_EACH and related functions, since they are * faster and better at dealing with hmaps that change during iteration. * * Before beginning iteration, store 0 into '*bucketp' and '*offsetp'. */ struct hmap_node * hmap_at_position(const struct hmap *hmap, uint32_t *bucketp, uint32_t *offsetp) { size_t offset; size_t b_idx; offset = *offsetp; for (b_idx = *bucketp; b_idx <= hmap->mask; b_idx++) { struct hmap_node *node; size_t n_idx; for (n_idx = 0, node = hmap->buckets[b_idx]; node != NULL; n_idx++, node = node->next) { if (n_idx == offset) { if (node->next) { *bucketp = node->hash & hmap->mask; *offsetp = offset + 1; } else { *bucketp = (node->hash & hmap->mask) + 1; *offsetp = 0; } return node; } } offset = 0; } *bucketp = 0; *offsetp = 0; return NULL; } /* Returns true if 'node' is in 'hmap', false otherwise. */ bool hmap_contains(const struct hmap *hmap, const struct hmap_node *node) { struct hmap_node *p; for (p = hmap_first_in_bucket(hmap, node->hash); p; p = p->next) { if (p == node) { return true; } } return false; } openvswitch-2.0.1+git20140120/lib/hmap.h000066400000000000000000000310621226605124000173200ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef HMAP_H #define HMAP_H 1 #include #include #include "util.h" #ifdef __cplusplus extern "C" { #endif /* A hash map node, to be embedded inside the data structure being mapped. */ struct hmap_node { size_t hash; /* Hash value. */ struct hmap_node *next; /* Next in linked list. */ }; /* Returns the hash value embedded in 'node'. */ static inline size_t hmap_node_hash(const struct hmap_node *node) { return node->hash; } #define HMAP_NODE_NULL ((struct hmap_node *) 1) #define HMAP_NODE_NULL_INITIALIZER { 0, HMAP_NODE_NULL } /* Returns true if 'node' has been set to null by hmap_node_nullify() and has * not been un-nullified by being inserted into an hmap. */ static inline bool hmap_node_is_null(const struct hmap_node *node) { return node->next == HMAP_NODE_NULL; } /* Marks 'node' with a distinctive value that can be tested with * hmap_node_is_null(). */ static inline void hmap_node_nullify(struct hmap_node *node) { node->next = HMAP_NODE_NULL; } /* A hash map. */ struct hmap { struct hmap_node **buckets; /* Must point to 'one' iff 'mask' == 0. */ struct hmap_node *one; size_t mask; size_t n; }; /* Initializer for an empty hash map. */ #define HMAP_INITIALIZER(HMAP) \ { (struct hmap_node **const) &(HMAP)->one, NULL, 0, 0 } /* Initialization. */ void hmap_init(struct hmap *); void hmap_destroy(struct hmap *); void hmap_clear(struct hmap *); void hmap_swap(struct hmap *a, struct hmap *b); void hmap_moved(struct hmap *hmap); static inline size_t hmap_count(const struct hmap *); static inline bool hmap_is_empty(const struct hmap *); /* Adjusting capacity. */ void hmap_expand_at(struct hmap *, const char *where); #define hmap_expand(HMAP) hmap_expand_at(HMAP, SOURCE_LOCATOR) void hmap_shrink_at(struct hmap *, const char *where); #define hmap_shrink(HMAP) hmap_shrink_at(HMAP, SOURCE_LOCATOR) void hmap_reserve_at(struct hmap *, size_t capacity, const char *where); #define hmap_reserve(HMAP, CAPACITY) \ hmap_reserve_at(HMAP, CAPACITY, SOURCE_LOCATOR) /* Insertion and deletion. */ static inline void hmap_insert_at(struct hmap *, struct hmap_node *, size_t hash, const char *where); #define hmap_insert(HMAP, NODE, HASH) \ hmap_insert_at(HMAP, NODE, HASH, SOURCE_LOCATOR) static inline void hmap_insert_fast(struct hmap *, struct hmap_node *, size_t hash); static inline void hmap_remove(struct hmap *, struct hmap_node *); void hmap_node_moved(struct hmap *, struct hmap_node *, struct hmap_node *); static inline void hmap_replace(struct hmap *, const struct hmap_node *old, struct hmap_node *new_node); struct hmap_node *hmap_random_node(const struct hmap *); /* Search. * * HMAP_FOR_EACH_WITH_HASH iterates NODE over all of the nodes in HMAP that * have hash value equal to HASH. HMAP_FOR_EACH_IN_BUCKET iterates NODE over * all of the nodes in HMAP that would fall in the same bucket as HASH. MEMBER * must be the name of the 'struct hmap_node' member within NODE. * * These macros may be used interchangeably to search for a particular value in * an hmap, see, e.g. shash_find() for an example. Usually, using * HMAP_FOR_EACH_WITH_HASH provides an optimization, because comparing a hash * value is usually cheaper than comparing an entire hash map key. But for * simple hash map keys, it makes sense to use HMAP_FOR_EACH_IN_BUCKET because * it avoids doing two comparisons when a single simple comparison suffices. * * The loop should not change NODE to point to a different node or insert or * delete nodes in HMAP (unless it "break"s out of the loop to terminate * iteration). * * HASH is only evaluated once. */ #define HMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HMAP) \ for (ASSIGN_CONTAINER(NODE, hmap_first_with_hash(HMAP, HASH), MEMBER); \ NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ ASSIGN_CONTAINER(NODE, hmap_next_with_hash(&(NODE)->MEMBER), \ MEMBER)) #define HMAP_FOR_EACH_IN_BUCKET(NODE, MEMBER, HASH, HMAP) \ for (ASSIGN_CONTAINER(NODE, hmap_first_in_bucket(HMAP, HASH), MEMBER); \ NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ ASSIGN_CONTAINER(NODE, hmap_next_in_bucket(&(NODE)->MEMBER), MEMBER)) static inline struct hmap_node *hmap_first_with_hash(const struct hmap *, size_t hash); static inline struct hmap_node *hmap_next_with_hash(const struct hmap_node *); static inline struct hmap_node *hmap_first_in_bucket(const struct hmap *, size_t hash); static inline struct hmap_node *hmap_next_in_bucket(const struct hmap_node *); bool hmap_contains(const struct hmap *, const struct hmap_node *); /* Iteration. */ /* Iterates through every node in HMAP. */ #define HMAP_FOR_EACH(NODE, MEMBER, HMAP) \ for (ASSIGN_CONTAINER(NODE, hmap_first(HMAP), MEMBER); \ NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER)) /* Safe when NODE may be freed (not needed when NODE may be removed from the * hash map but its members remain accessible and intact). */ #define HMAP_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HMAP) \ for (ASSIGN_CONTAINER(NODE, hmap_first(HMAP), MEMBER); \ (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER) \ ? ASSIGN_CONTAINER(NEXT, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER), 1 \ : 0); \ (NODE) = (NEXT)) /* Continues an iteration from just after NODE. */ #define HMAP_FOR_EACH_CONTINUE(NODE, MEMBER, HMAP) \ for (ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER); \ NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER)) static inline struct hmap_node *hmap_first(const struct hmap *); static inline struct hmap_node *hmap_next(const struct hmap *, const struct hmap_node *); struct hmap_node *hmap_at_position(const struct hmap *, uint32_t *bucket, uint32_t *offset); /* Returns the number of nodes currently in 'hmap'. */ static inline size_t hmap_count(const struct hmap *hmap) { return hmap->n; } /* Returns the maximum number of nodes that 'hmap' may hold before it should be * rehashed. */ static inline size_t hmap_capacity(const struct hmap *hmap) { return hmap->mask * 2 + 1; } /* Returns true if 'hmap' currently contains no nodes, * false otherwise. */ static inline bool hmap_is_empty(const struct hmap *hmap) { return hmap->n == 0; } /* Inserts 'node', with the given 'hash', into 'hmap'. 'hmap' is never * expanded automatically. */ static inline void hmap_insert_fast(struct hmap *hmap, struct hmap_node *node, size_t hash) { struct hmap_node **bucket = &hmap->buckets[hash & hmap->mask]; node->hash = hash; node->next = *bucket; *bucket = node; hmap->n++; } /* Inserts 'node', with the given 'hash', into 'hmap', and expands 'hmap' if * necessary to optimize search performance. * * ('where' is used in debug logging. Commonly one would use hmap_insert() to * automatically provide the caller's source file and line number for * 'where'.) */ static inline void hmap_insert_at(struct hmap *hmap, struct hmap_node *node, size_t hash, const char *where) { hmap_insert_fast(hmap, node, hash); if (hmap->n / 2 > hmap->mask) { hmap_expand_at(hmap, where); } } /* Removes 'node' from 'hmap'. Does not shrink the hash table; call * hmap_shrink() directly if desired. */ static inline void hmap_remove(struct hmap *hmap, struct hmap_node *node) { struct hmap_node **bucket = &hmap->buckets[node->hash & hmap->mask]; while (*bucket != node) { bucket = &(*bucket)->next; } *bucket = node->next; hmap->n--; } /* Puts 'new_node' in the position in 'hmap' currently occupied by 'old_node'. * The 'new_node' must hash to the same value as 'old_node'. The client is * responsible for ensuring that the replacement does not violate any * client-imposed invariants (e.g. uniqueness of keys within a map). * * Afterward, 'old_node' is not part of 'hmap', and the client is responsible * for freeing it (if this is desirable). */ static inline void hmap_replace(struct hmap *hmap, const struct hmap_node *old_node, struct hmap_node *new_node) { struct hmap_node **bucket = &hmap->buckets[old_node->hash & hmap->mask]; while (*bucket != old_node) { bucket = &(*bucket)->next; } *bucket = new_node; new_node->hash = old_node->hash; new_node->next = old_node->next; } static inline struct hmap_node * hmap_next_with_hash__(const struct hmap_node *node, size_t hash) { while (node != NULL && node->hash != hash) { node = node->next; } return CONST_CAST(struct hmap_node *, node); } /* Returns the first node in 'hmap' with the given 'hash', or a null pointer if * no nodes have that hash value. */ static inline struct hmap_node * hmap_first_with_hash(const struct hmap *hmap, size_t hash) { return hmap_next_with_hash__(hmap->buckets[hash & hmap->mask], hash); } /* Returns the first node in 'hmap' in the bucket in which the given 'hash' * would land, or a null pointer if that bucket is empty. */ static inline struct hmap_node * hmap_first_in_bucket(const struct hmap *hmap, size_t hash) { return hmap->buckets[hash & hmap->mask]; } /* Returns the next node in the same bucket as 'node', or a null pointer if * there are no more nodes in that bucket. * * If the hash map has been reallocated since 'node' was visited, some nodes * may be skipped; if new nodes with the same hash value have been added, they * will be skipped. (Removing 'node' from the hash map does not prevent * calling this function, since node->next is preserved, although freeing * 'node' of course does.) */ static inline struct hmap_node * hmap_next_in_bucket(const struct hmap_node *node) { return node->next; } /* Returns the next node in the same hash map as 'node' with the same hash * value, or a null pointer if no more nodes have that hash value. * * If the hash map has been reallocated since 'node' was visited, some nodes * may be skipped; if new nodes with the same hash value have been added, they * will be skipped. (Removing 'node' from the hash map does not prevent * calling this function, since node->next is preserved, although freeing * 'node' of course does.) */ static inline struct hmap_node * hmap_next_with_hash(const struct hmap_node *node) { return hmap_next_with_hash__(node->next, node->hash); } static inline struct hmap_node * hmap_next__(const struct hmap *hmap, size_t start) { size_t i; for (i = start; i <= hmap->mask; i++) { struct hmap_node *node = hmap->buckets[i]; if (node) { return node; } } return NULL; } /* Returns the first node in 'hmap', in arbitrary order, or a null pointer if * 'hmap' is empty. */ static inline struct hmap_node * hmap_first(const struct hmap *hmap) { return hmap_next__(hmap, 0); } /* Returns the next node in 'hmap' following 'node', in arbitrary order, or a * null pointer if 'node' is the last node in 'hmap'. * * If the hash map has been reallocated since 'node' was visited, some nodes * may be skipped or visited twice. (Removing 'node' from the hash map does * not prevent calling this function, since node->next is preserved, although * freeing 'node' of course does.) */ static inline struct hmap_node * hmap_next(const struct hmap *hmap, const struct hmap_node *node) { return (node->next ? node->next : hmap_next__(hmap, (node->hash & hmap->mask) + 1)); } #ifdef __cplusplus } #endif #endif /* hmap.h */ openvswitch-2.0.1+git20140120/lib/hmapx.c000066400000000000000000000112011226605124000174740ustar00rootroot00000000000000/* * Copyright (c) 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "hmapx.h" #include "hash.h" static struct hmapx_node * hmapx_find__(const struct hmapx *map, const void *data, size_t hash) { struct hmapx_node *node; HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash, &map->map) { if (node->data == data) { return node; } } return NULL; } static struct hmapx_node * hmapx_add__(struct hmapx *map, void *data, size_t hash) { struct hmapx_node *node = xmalloc(sizeof *node); node->data = data; hmap_insert(&map->map, &node->hmap_node, hash); return node; } /* Initializes 'map' as an empty set of pointers. */ void hmapx_init(struct hmapx *map) { hmap_init(&map->map); } /* Destroys 'map'. */ void hmapx_destroy(struct hmapx *map) { if (map) { hmapx_clear(map); hmap_destroy(&map->map); } } /* Initializes 'map' to contain the same pointers as 'orig'. */ void hmapx_clone(struct hmapx *map, const struct hmapx *orig) { struct hmapx_node *node; hmapx_init(map); HMAP_FOR_EACH (node, hmap_node, &orig->map) { hmapx_add__(map, node->data, node->hmap_node.hash); } } /* Exchanges the contents of 'a' and 'b'. */ void hmapx_swap(struct hmapx *a, struct hmapx *b) { hmap_swap(&a->map, &b->map); } /* Adjusts 'map' so that it is still valid after it has been moved around in * memory (e.g. due to realloc()). */ void hmapx_moved(struct hmapx *map) { hmap_moved(&map->map); } /* Returns true if 'map' contains no nodes, false if it contains at least one * node. */ bool hmapx_is_empty(const struct hmapx *map) { return hmap_is_empty(&map->map); } /* Returns the number of nodes in 'map'. */ size_t hmapx_count(const struct hmapx *map) { return hmap_count(&map->map); } /* Adds 'data' to 'map'. If 'data' is new, returns the new hmapx_node; * otherwise (if a 'data' already existed in 'map'), returns NULL. */ struct hmapx_node * hmapx_add(struct hmapx *map, void *data) { uint32_t hash = hash_pointer(data, 0); return (hmapx_find__(map, data, hash) ? NULL : hmapx_add__(map, data, hash)); } /* Adds 'data' to 'map'. Assert-fails if 'data' was already in 'map'. */ void hmapx_add_assert(struct hmapx *map, void *data) { bool added OVS_UNUSED = hmapx_add(map, data); ovs_assert(added); } /* Removes all of the nodes from 'map'. */ void hmapx_clear(struct hmapx *map) { struct hmapx_node *node, *next; HMAPX_FOR_EACH_SAFE (node, next, map) { hmapx_delete(map, node); } } /* Deletes 'node' from 'map' and frees 'node'. */ void hmapx_delete(struct hmapx *map, struct hmapx_node *node) { hmap_remove(&map->map, &node->hmap_node); free(node); } /* Searches for 'data' in 'map'. If found, deletes it and returns true. If * not found, returns false without modifying 'map'. */ bool hmapx_find_and_delete(struct hmapx *map, const void *data) { struct hmapx_node *node = hmapx_find(map, data); if (node) { hmapx_delete(map, node); } return node != NULL; } /* Searches for 'data' in 'map' and deletes it. Assert-fails if 'data' is not * in 'map'. */ void hmapx_find_and_delete_assert(struct hmapx *map, const void *data) { bool deleted OVS_UNUSED = hmapx_find_and_delete(map, data); ovs_assert(deleted); } /* Searches for 'data' in 'map'. Returns its node, if found, otherwise a null * pointer. */ struct hmapx_node * hmapx_find(const struct hmapx *map, const void *data) { return hmapx_find__(map, data, hash_pointer(data, 0)); } /* Returns true if 'map' contains 'data', false otherwise. */ bool hmapx_contains(const struct hmapx *map, const void *data) { return hmapx_find(map, data) != NULL; } /* Returns true if 'a' and 'b' contain the same pointers, false otherwise. */ bool hmapx_equals(const struct hmapx *a, const struct hmapx *b) { struct hmapx_node *node; if (hmapx_count(a) != hmapx_count(b)) { return false; } HMAP_FOR_EACH (node, hmap_node, &a->map) { if (!hmapx_find__(b, node->data, node->hmap_node.hash)) { return false; } } return true; } openvswitch-2.0.1+git20140120/lib/hmapx.h000066400000000000000000000042301226605124000175050ustar00rootroot00000000000000/* * Copyright (c) 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef HMAPX_H #define HMAPX_H #include "hmap.h" struct hmapx_node { struct hmap_node hmap_node; void *data; }; /* A set of "void *" pointers. */ struct hmapx { struct hmap map; }; #define HMAPX_INITIALIZER(HMAPX) { HMAP_INITIALIZER(&(HMAPX)->map) } /* Basics. */ void hmapx_init(struct hmapx *); void hmapx_destroy(struct hmapx *); void hmapx_clone(struct hmapx *, const struct hmapx *); void hmapx_swap(struct hmapx *, struct hmapx *); void hmapx_moved(struct hmapx *); /* Count. */ bool hmapx_is_empty(const struct hmapx *); size_t hmapx_count(const struct hmapx *); /* Insertion. */ struct hmapx_node *hmapx_add(struct hmapx *, void *); void hmapx_add_assert(struct hmapx *, void *); /* Deletion. */ void hmapx_clear(struct hmapx *); void hmapx_delete(struct hmapx *, struct hmapx_node *); bool hmapx_find_and_delete(struct hmapx *, const void *); void hmapx_find_and_delete_assert(struct hmapx *, const void *); /* Search. */ struct hmapx_node *hmapx_find(const struct hmapx *, const void *); bool hmapx_contains(const struct hmapx *, const void *); bool hmapx_equals(const struct hmapx *, const struct hmapx *); /* Iteration. */ /* Iterates through every hmapx_node in HMAPX. */ #define HMAPX_FOR_EACH(NODE, HMAPX) \ HMAP_FOR_EACH(NODE, hmap_node, &(HMAPX)->map) /* Safe when NODE may be freed (not needed when NODE may be removed from the * hash map but its members remain accessible and intact). */ #define HMAPX_FOR_EACH_SAFE(NODE, NEXT, HMAPX) \ HMAP_FOR_EACH_SAFE(NODE, NEXT, hmap_node, &(HMAPX)->map) #endif /* hmapx.h */ openvswitch-2.0.1+git20140120/lib/jhash.c000066400000000000000000000063631226605124000174710ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "jhash.h" #include #include "unaligned.h" /* This is the public domain lookup3 hash by Bob Jenkins from * http://burtleburtle.net/bob/c/lookup3.c, modified for style. */ static inline uint32_t jhash_rot(uint32_t x, int k) { return (x << k) | (x >> (32 - k)); } static inline void jhash_mix(uint32_t *a, uint32_t *b, uint32_t *c) { *a -= *c; *a ^= jhash_rot(*c, 4); *c += *b; *b -= *a; *b ^= jhash_rot(*a, 6); *a += *c; *c -= *b; *c ^= jhash_rot(*b, 8); *b += *a; *a -= *c; *a ^= jhash_rot(*c, 16); *c += *b; *b -= *a; *b ^= jhash_rot(*a, 19); *a += *c; *c -= *b; *c ^= jhash_rot(*b, 4); *b += *a; } static inline void jhash_final(uint32_t *a, uint32_t *b, uint32_t *c) { *c ^= *b; *c -= jhash_rot(*b, 14); *a ^= *c; *a -= jhash_rot(*c, 11); *b ^= *a; *b -= jhash_rot(*a, 25); *c ^= *b; *c -= jhash_rot(*b, 16); *a ^= *c; *a -= jhash_rot(*c, 4); *b ^= *a; *b -= jhash_rot(*a, 14); *c ^= *b; *c -= jhash_rot(*b, 24); } /* Returns the Jenkins hash of the 'n' 32-bit words at 'p', starting from * 'basis'. 'p' must be properly aligned. * * Use hash_words() instead, unless you're computing a hash function whose * value is exposed "on the wire" so we don't want to change it. */ uint32_t jhash_words(const uint32_t *p, size_t n, uint32_t basis) { uint32_t a, b, c; a = b = c = 0xdeadbeef + (((uint32_t) n) << 2) + basis; while (n > 3) { a += p[0]; b += p[1]; c += p[2]; jhash_mix(&a, &b, &c); n -= 3; p += 3; } switch (n) { case 3: c += p[2]; /* fall through */ case 2: b += p[1]; /* fall through */ case 1: a += p[0]; jhash_final(&a, &b, &c); /* fall through */ case 0: break; } return c; } /* Returns the Jenkins hash of the 'n' bytes at 'p', starting from 'basis'. * * Use jhash_bytes() instead, unless you're computing a hash function whose * value is exposed "on the wire" so we don't want to change it. */ uint32_t jhash_bytes(const void *p_, size_t n, uint32_t basis) { const uint32_t *p = p_; uint32_t a, b, c; a = b = c = 0xdeadbeef + n + basis; while (n >= 12) { a += get_unaligned_u32(p); b += get_unaligned_u32(p + 1); c += get_unaligned_u32(p + 2); jhash_mix(&a, &b, &c); n -= 12; p += 3; } if (n) { uint32_t tmp[3]; tmp[0] = tmp[1] = tmp[2] = 0; memcpy(tmp, p, n); a += tmp[0]; b += tmp[1]; c += tmp[2]; jhash_final(&a, &b, &c); } return c; } openvswitch-2.0.1+git20140120/lib/jhash.h000066400000000000000000000024061226605124000174700ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef JHASH_H #define JHASH_H 1 #include #include #include #include #include "util.h" #ifdef __cplusplus extern "C" { #endif /* This is the public domain lookup3 hash by Bob Jenkins from * http://burtleburtle.net/bob/c/lookup3.c, modified for style. * * Use the functions in hash.h instead if you can. These are here just for * places where we've exposed a hash function "on the wire" and don't want it * to change. */ uint32_t jhash_words(const uint32_t *, size_t n_word, uint32_t basis); uint32_t jhash_bytes(const void *, size_t n_bytes, uint32_t basis); #ifdef __cplusplus } #endif #endif /* jhash.h */ openvswitch-2.0.1+git20140120/lib/json.c000066400000000000000000001203201226605124000173330ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "json.h" #include #include #include #include #include #include "dynamic-string.h" #include "hash.h" #include "shash.h" #include "unicode.h" #include "util.h" /* The type of a JSON token. */ enum json_token_type { T_EOF = 0, T_BEGIN_ARRAY = '[', T_END_ARRAY = ']', T_BEGIN_OBJECT = '{', T_END_OBJECT = '}', T_NAME_SEPARATOR = ':', T_VALUE_SEPARATOR = ',', T_FALSE = UCHAR_MAX + 1, T_NULL, T_TRUE, T_INTEGER, T_REAL, T_STRING }; /* A JSON token. * * RFC 4627 doesn't define a lexical structure for JSON but I believe this to * be compliant with the standard. */ struct json_token { enum json_token_type type; union { double real; long long int integer; const char *string; } u; }; enum json_lex_state { JSON_LEX_START, /* Not inside a token. */ JSON_LEX_NUMBER, /* Reading a number. */ JSON_LEX_KEYWORD, /* Reading a keyword. */ JSON_LEX_STRING, /* Reading a quoted string. */ JSON_LEX_ESCAPE /* In a quoted string just after a "\". */ }; enum json_parse_state { JSON_PARSE_START, /* Beginning of input. */ JSON_PARSE_END, /* End of input. */ /* Objects. */ JSON_PARSE_OBJECT_INIT, /* Expecting '}' or an object name. */ JSON_PARSE_OBJECT_NAME, /* Expecting an object name. */ JSON_PARSE_OBJECT_COLON, /* Expecting ':'. */ JSON_PARSE_OBJECT_VALUE, /* Expecting an object value. */ JSON_PARSE_OBJECT_NEXT, /* Expecting ',' or '}'. */ /* Arrays. */ JSON_PARSE_ARRAY_INIT, /* Expecting ']' or a value. */ JSON_PARSE_ARRAY_VALUE, /* Expecting a value. */ JSON_PARSE_ARRAY_NEXT /* Expecting ',' or ']'. */ }; struct json_parser_node { struct json *json; }; /* A JSON parser. */ struct json_parser { int flags; /* Lexical analysis. */ enum json_lex_state lex_state; struct ds buffer; /* Buffer for accumulating token text. */ int line_number; int column_number; int byte_number; /* Parsing. */ enum json_parse_state parse_state; #define JSON_MAX_HEIGHT 1000 struct json_parser_node *stack; size_t height, allocated_height; char *member_name; /* Parse status. */ bool done; char *error; /* Error message, if any, null if none yet. */ }; static struct json *json_create(enum json_type type); static void json_parser_input(struct json_parser *, struct json_token *); static void json_error(struct json_parser *p, const char *format, ...) PRINTF_FORMAT(2, 3); const char * json_type_to_string(enum json_type type) { switch (type) { case JSON_NULL: return "null"; case JSON_FALSE: return "false"; case JSON_TRUE: return "true"; case JSON_OBJECT: return "object"; case JSON_ARRAY: return "array"; case JSON_INTEGER: case JSON_REAL: return "number"; case JSON_STRING: return "string"; case JSON_N_TYPES: default: return ""; } } /* Functions for manipulating struct json. */ struct json * json_null_create(void) { return json_create(JSON_NULL); } struct json * json_boolean_create(bool b) { return json_create(b ? JSON_TRUE : JSON_FALSE); } struct json * json_string_create_nocopy(char *s) { struct json *json = json_create(JSON_STRING); json->u.string = s; return json; } struct json * json_string_create(const char *s) { return json_string_create_nocopy(xstrdup(s)); } struct json * json_array_create_empty(void) { struct json *json = json_create(JSON_ARRAY); json->u.array.elems = NULL; json->u.array.n = 0; json->u.array.n_allocated = 0; return json; } void json_array_add(struct json *array_, struct json *element) { struct json_array *array = json_array(array_); if (array->n >= array->n_allocated) { array->elems = x2nrealloc(array->elems, &array->n_allocated, sizeof *array->elems); } array->elems[array->n++] = element; } void json_array_trim(struct json *array_) { struct json_array *array = json_array(array_); if (array->n < array->n_allocated){ array->n_allocated = array->n; array->elems = xrealloc(array->elems, array->n * sizeof *array->elems); } } struct json * json_array_create(struct json **elements, size_t n) { struct json *json = json_create(JSON_ARRAY); json->u.array.elems = elements; json->u.array.n = n; json->u.array.n_allocated = n; return json; } struct json * json_array_create_1(struct json *elem0) { struct json **elems = xmalloc(sizeof *elems); elems[0] = elem0; return json_array_create(elems, 1); } struct json * json_array_create_2(struct json *elem0, struct json *elem1) { struct json **elems = xmalloc(2 * sizeof *elems); elems[0] = elem0; elems[1] = elem1; return json_array_create(elems, 2); } struct json * json_array_create_3(struct json *elem0, struct json *elem1, struct json *elem2) { struct json **elems = xmalloc(3 * sizeof *elems); elems[0] = elem0; elems[1] = elem1; elems[2] = elem2; return json_array_create(elems, 3); } struct json * json_object_create(void) { struct json *json = json_create(JSON_OBJECT); json->u.object = xmalloc(sizeof *json->u.object); shash_init(json->u.object); return json; } struct json * json_integer_create(long long int integer) { struct json *json = json_create(JSON_INTEGER); json->u.integer = integer; return json; } struct json * json_real_create(double real) { struct json *json = json_create(JSON_REAL); json->u.real = real; return json; } void json_object_put(struct json *json, const char *name, struct json *value) { json_destroy(shash_replace(json->u.object, name, value)); } void json_object_put_string(struct json *json, const char *name, const char *value) { json_object_put(json, name, json_string_create(value)); } const char * json_string(const struct json *json) { ovs_assert(json->type == JSON_STRING); return json->u.string; } struct json_array * json_array(const struct json *json) { ovs_assert(json->type == JSON_ARRAY); return CONST_CAST(struct json_array *, &json->u.array); } struct shash * json_object(const struct json *json) { ovs_assert(json->type == JSON_OBJECT); return CONST_CAST(struct shash *, json->u.object); } bool json_boolean(const struct json *json) { ovs_assert(json->type == JSON_TRUE || json->type == JSON_FALSE); return json->type == JSON_TRUE; } double json_real(const struct json *json) { ovs_assert(json->type == JSON_REAL || json->type == JSON_INTEGER); return json->type == JSON_REAL ? json->u.real : json->u.integer; } int64_t json_integer(const struct json *json) { ovs_assert(json->type == JSON_INTEGER); return json->u.integer; } static void json_destroy_object(struct shash *object); static void json_destroy_array(struct json_array *array); /* Frees 'json' and everything it points to, recursively. */ void json_destroy(struct json *json) { if (json) { switch (json->type) { case JSON_OBJECT: json_destroy_object(json->u.object); break; case JSON_ARRAY: json_destroy_array(&json->u.array); break; case JSON_STRING: free(json->u.string); break; case JSON_NULL: case JSON_FALSE: case JSON_TRUE: case JSON_INTEGER: case JSON_REAL: break; case JSON_N_TYPES: NOT_REACHED(); } free(json); } } static void json_destroy_object(struct shash *object) { struct shash_node *node, *next; SHASH_FOR_EACH_SAFE (node, next, object) { struct json *value = node->data; json_destroy(value); shash_delete(object, node); } shash_destroy(object); free(object); } static void json_destroy_array(struct json_array *array) { size_t i; for (i = 0; i < array->n; i++) { json_destroy(array->elems[i]); } free(array->elems); } static struct json *json_clone_object(const struct shash *object); static struct json *json_clone_array(const struct json_array *array); /* Returns a deep copy of 'json'. */ struct json * json_clone(const struct json *json) { switch (json->type) { case JSON_OBJECT: return json_clone_object(json->u.object); case JSON_ARRAY: return json_clone_array(&json->u.array); case JSON_STRING: return json_string_create(json->u.string); case JSON_NULL: case JSON_FALSE: case JSON_TRUE: return json_create(json->type); case JSON_INTEGER: return json_integer_create(json->u.integer); case JSON_REAL: return json_real_create(json->u.real); case JSON_N_TYPES: default: NOT_REACHED(); } } static struct json * json_clone_object(const struct shash *object) { struct shash_node *node; struct json *json; json = json_object_create(); SHASH_FOR_EACH (node, object) { struct json *value = node->data; json_object_put(json, node->name, json_clone(value)); } return json; } static struct json * json_clone_array(const struct json_array *array) { struct json **elems; size_t i; elems = xmalloc(array->n * sizeof *elems); for (i = 0; i < array->n; i++) { elems[i] = json_clone(array->elems[i]); } return json_array_create(elems, array->n); } static size_t json_hash_object(const struct shash *object, size_t basis) { const struct shash_node **nodes; size_t n, i; nodes = shash_sort(object); n = shash_count(object); for (i = 0; i < n; i++) { const struct shash_node *node = nodes[i]; basis = hash_string(node->name, basis); basis = json_hash(node->data, basis); } return basis; } static size_t json_hash_array(const struct json_array *array, size_t basis) { size_t i; basis = hash_int(array->n, basis); for (i = 0; i < array->n; i++) { basis = json_hash(array->elems[i], basis); } return basis; } size_t json_hash(const struct json *json, size_t basis) { switch (json->type) { case JSON_OBJECT: return json_hash_object(json->u.object, basis); case JSON_ARRAY: return json_hash_array(&json->u.array, basis); case JSON_STRING: return hash_string(json->u.string, basis); case JSON_NULL: case JSON_FALSE: case JSON_TRUE: return hash_int(json->type << 8, basis); case JSON_INTEGER: return hash_int(json->u.integer, basis); case JSON_REAL: return hash_double(json->u.real, basis); case JSON_N_TYPES: default: NOT_REACHED(); } } static bool json_equal_object(const struct shash *a, const struct shash *b) { struct shash_node *a_node; if (shash_count(a) != shash_count(b)) { return false; } SHASH_FOR_EACH (a_node, a) { struct shash_node *b_node = shash_find(b, a_node->name); if (!b_node || !json_equal(a_node->data, b_node->data)) { return false; } } return true; } static bool json_equal_array(const struct json_array *a, const struct json_array *b) { size_t i; if (a->n != b->n) { return false; } for (i = 0; i < a->n; i++) { if (!json_equal(a->elems[i], b->elems[i])) { return false; } } return true; } bool json_equal(const struct json *a, const struct json *b) { if (a->type != b->type) { return false; } switch (a->type) { case JSON_OBJECT: return json_equal_object(a->u.object, b->u.object); case JSON_ARRAY: return json_equal_array(&a->u.array, &b->u.array); case JSON_STRING: return !strcmp(a->u.string, b->u.string); case JSON_NULL: case JSON_FALSE: case JSON_TRUE: return true; case JSON_INTEGER: return a->u.integer == b->u.integer; case JSON_REAL: return a->u.real == b->u.real; case JSON_N_TYPES: default: NOT_REACHED(); } } /* Lexical analysis. */ static void json_lex_keyword(struct json_parser *p) { struct json_token token; const char *s; s = ds_cstr(&p->buffer); if (!strcmp(s, "false")) { token.type = T_FALSE; } else if (!strcmp(s, "true")) { token.type = T_TRUE; } else if (!strcmp(s, "null")) { token.type = T_NULL; } else { json_error(p, "invalid keyword '%s'", s); return; } json_parser_input(p, &token); } static void json_lex_number(struct json_parser *p) { const char *cp = ds_cstr(&p->buffer); unsigned long long int significand = 0; struct json_token token; bool imprecise = false; bool negative = false; int pow10 = 0; /* Leading minus sign. */ if (*cp == '-') { negative = true; cp++; } /* At least one integer digit, but 0 may not be used as a leading digit for * a longer number. */ significand = 0; if (*cp == '0') { cp++; if (isdigit((unsigned char) *cp)) { json_error(p, "leading zeros not allowed"); return; } } else if (isdigit((unsigned char) *cp)) { do { if (significand <= ULLONG_MAX / 10) { significand = significand * 10 + (*cp - '0'); } else { pow10++; if (*cp != '0') { imprecise = true; } } cp++; } while (isdigit((unsigned char) *cp)); } else { json_error(p, "'-' must be followed by digit"); return; } /* Optional fraction. */ if (*cp == '.') { cp++; if (!isdigit((unsigned char) *cp)) { json_error(p, "decimal point must be followed by digit"); return; } do { if (significand <= ULLONG_MAX / 10) { significand = significand * 10 + (*cp - '0'); pow10--; } else if (*cp != '0') { imprecise = true; } cp++; } while (isdigit((unsigned char) *cp)); } /* Optional exponent. */ if (*cp == 'e' || *cp == 'E') { bool negative_exponent = false; int exponent; cp++; if (*cp == '+') { cp++; } else if (*cp == '-') { negative_exponent = true; cp++; } if (!isdigit((unsigned char) *cp)) { json_error(p, "exponent must contain at least one digit"); return; } exponent = 0; do { if (exponent >= INT_MAX / 10) { json_error(p, "exponent outside valid range"); return; } exponent = exponent * 10 + (*cp - '0'); cp++; } while (isdigit((unsigned char) *cp)); if (negative_exponent) { pow10 -= exponent; } else { pow10 += exponent; } } if (*cp != '\0') { json_error(p, "syntax error in number"); return; } /* Figure out number. * * We suppress negative zeros as a matter of policy. */ if (!significand) { token.type = T_INTEGER; token.u.integer = 0; json_parser_input(p, &token); return; } if (!imprecise) { while (pow10 > 0 && significand < ULLONG_MAX / 10) { significand *= 10; pow10--; } while (pow10 < 0 && significand % 10 == 0) { significand /= 10; pow10++; } if (pow10 == 0 && significand <= (negative ? (unsigned long long int) LLONG_MAX + 1 : LLONG_MAX)) { token.type = T_INTEGER; token.u.integer = negative ? -significand : significand; json_parser_input(p, &token); return; } } token.type = T_REAL; if (!str_to_double(ds_cstr(&p->buffer), &token.u.real)) { json_error(p, "number outside valid range"); return; } /* Suppress negative zero. */ if (token.u.real == 0) { token.u.real = 0; } json_parser_input(p, &token); } static const char * json_lex_4hex(const char *cp, const char *end, int *valuep) { unsigned int value; if (cp + 4 > end) { return "quoted string ends within \\u escape"; } value = hexits_value(cp, 4, NULL); if (value == UINT_MAX) { return "malformed \\u escape"; } if (!value) { return "null bytes not supported in quoted strings"; } *valuep = value; return NULL; } static const char * json_lex_unicode(const char *cp, const char *end, struct ds *out) { const char *error; int c0, c1; error = json_lex_4hex(cp, end, &c0); if (error) { ds_clear(out); ds_put_cstr(out, error); return NULL; } cp += 4; if (!uc_is_leading_surrogate(c0)) { ds_put_utf8(out, c0); return cp; } if (cp + 2 > end || *cp++ != '\\' || *cp++ != 'u') { ds_clear(out); ds_put_cstr(out, "malformed escaped surrogate pair"); return NULL; } error = json_lex_4hex(cp, end, &c1); if (error) { ds_clear(out); ds_put_cstr(out, error); return NULL; } cp += 4; if (!uc_is_trailing_surrogate(c1)) { ds_clear(out); ds_put_cstr(out, "second half of escaped surrogate pair is not " "trailing surrogate"); return NULL; } ds_put_utf8(out, utf16_decode_surrogate_pair(c0, c1)); return cp; } bool json_string_unescape(const char *in, size_t in_len, char **outp) { const char *end = in + in_len; bool ok = false; struct ds out; ds_init(&out); ds_reserve(&out, in_len); if (in_len > 0 && in[in_len - 1] == '\\') { ds_put_cstr(&out, "quoted string may not end with backslash"); goto exit; } while (in < end) { if (*in == '"') { ds_clear(&out); ds_put_cstr(&out, "quoted string may not include unescaped \""); goto exit; } if (*in != '\\') { ds_put_char(&out, *in++); continue; } in++; switch (*in++) { case '"': case '\\': case '/': ds_put_char(&out, in[-1]); break; case 'b': ds_put_char(&out, '\b'); break; case 'f': ds_put_char(&out, '\f'); break; case 'n': ds_put_char(&out, '\n'); break; case 'r': ds_put_char(&out, '\r'); break; case 't': ds_put_char(&out, '\t'); break; case 'u': in = json_lex_unicode(in, end, &out); if (!in) { goto exit; } break; default: ds_clear(&out); ds_put_format(&out, "bad escape \\%c", in[-1]); goto exit; } } ok = true; exit: *outp = ds_cstr(&out); return ok; } static void json_parser_input_string(struct json_parser *p, const char *s) { struct json_token token; token.type = T_STRING; token.u.string = s; json_parser_input(p, &token); } static void json_lex_string(struct json_parser *p) { const char *raw = ds_cstr(&p->buffer); if (!strchr(raw, '\\')) { json_parser_input_string(p, raw); } else { char *cooked; if (json_string_unescape(raw, strlen(raw), &cooked)) { json_parser_input_string(p, cooked); } else { json_error(p, "%s", cooked); } free(cooked); } } static bool json_lex_input(struct json_parser *p, unsigned char c) { struct json_token token; switch (p->lex_state) { case JSON_LEX_START: switch (c) { case ' ': case '\t': case '\n': case '\r': /* Nothing to do. */ return true; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': p->lex_state = JSON_LEX_KEYWORD; break; case '[': case '{': case ']': case '}': case ':': case ',': token.type = c; json_parser_input(p, &token); return true; case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': p->lex_state = JSON_LEX_NUMBER; break; case '"': p->lex_state = JSON_LEX_STRING; return true; default: if (isprint(c)) { json_error(p, "invalid character '%c'", c); } else { json_error(p, "invalid character U+%04x", c); } return true; } break; case JSON_LEX_KEYWORD: if (!isalpha((unsigned char) c)) { json_lex_keyword(p); return false; } break; case JSON_LEX_NUMBER: if (!strchr(".0123456789eE-+", c)) { json_lex_number(p); return false; } break; case JSON_LEX_STRING: if (c == '\\') { p->lex_state = JSON_LEX_ESCAPE; } else if (c == '"') { json_lex_string(p); return true; } else if (c < 0x20) { json_error(p, "U+%04X must be escaped in quoted string", c); return true; } break; case JSON_LEX_ESCAPE: p->lex_state = JSON_LEX_STRING; break; default: abort(); } ds_put_char(&p->buffer, c); return true; } /* Parsing. */ /* Parses 'string' as a JSON object or array and returns a newly allocated * 'struct json'. The caller must free the returned structure with * json_destroy() when it is no longer needed. * * 'string' must be encoded in UTF-8. * * If 'string' is valid JSON, then the returned 'struct json' will be either an * object (JSON_OBJECT) or an array (JSON_ARRAY). * * If 'string' is not valid JSON, then the returned 'struct json' will be a * string (JSON_STRING) that describes the particular error encountered during * parsing. (This is an acceptable means of error reporting because at its top * level JSON must be either an object or an array; a bare string is not * valid.) */ struct json * json_from_string(const char *string) { struct json_parser *p = json_parser_create(JSPF_TRAILER); json_parser_feed(p, string, strlen(string)); return json_parser_finish(p); } /* Reads the file named 'file_name', parses its contents as a JSON object or * array, and returns a newly allocated 'struct json'. The caller must free * the returned structure with json_destroy() when it is no longer needed. * * The file must be encoded in UTF-8. * * See json_from_string() for return value semantics. */ struct json * json_from_file(const char *file_name) { struct json *json; FILE *stream; stream = fopen(file_name, "r"); if (!stream) { return json_string_create_nocopy( xasprintf("error opening \"%s\": %s", file_name, ovs_strerror(errno))); } json = json_from_stream(stream); fclose(stream); return json; } /* Parses the contents of 'stream' as a JSON object or array, and returns a * newly allocated 'struct json'. The caller must free the returned structure * with json_destroy() when it is no longer needed. * * The file must be encoded in UTF-8. * * See json_from_string() for return value semantics. */ struct json * json_from_stream(FILE *stream) { struct json_parser *p; struct json *json; p = json_parser_create(JSPF_TRAILER); for (;;) { char buffer[BUFSIZ]; size_t n; n = fread(buffer, 1, sizeof buffer, stream); if (!n || json_parser_feed(p, buffer, n) != n) { break; } } json = json_parser_finish(p); if (ferror(stream)) { json_destroy(json); json = json_string_create_nocopy( xasprintf("error reading JSON stream: %s", ovs_strerror(errno))); } return json; } struct json_parser * json_parser_create(int flags) { struct json_parser *p = xzalloc(sizeof *p); p->flags = flags; return p; } size_t json_parser_feed(struct json_parser *p, const char *input, size_t n) { size_t i; for (i = 0; !p->done && i < n; ) { if (json_lex_input(p, input[i])) { p->byte_number++; if (input[i] == '\n') { p->column_number = 0; p->line_number++; } else { p->column_number++; } i++; } } return i; } bool json_parser_is_done(const struct json_parser *p) { return p->done; } struct json * json_parser_finish(struct json_parser *p) { struct json *json; switch (p->lex_state) { case JSON_LEX_START: break; case JSON_LEX_STRING: case JSON_LEX_ESCAPE: json_error(p, "unexpected end of input in quoted string"); break; case JSON_LEX_NUMBER: case JSON_LEX_KEYWORD: json_lex_input(p, ' '); break; } if (p->parse_state == JSON_PARSE_START) { json_error(p, "empty input stream"); } else if (p->parse_state != JSON_PARSE_END) { json_error(p, "unexpected end of input"); } if (!p->error) { ovs_assert(p->height == 1); ovs_assert(p->stack[0].json != NULL); json = p->stack[--p->height].json; } else { json = json_string_create_nocopy(p->error); p->error = NULL; } json_parser_abort(p); return json; } void json_parser_abort(struct json_parser *p) { if (p) { ds_destroy(&p->buffer); if (p->height) { json_destroy(p->stack[0].json); } free(p->stack); free(p->member_name); free(p->error); free(p); } } static struct json_parser_node * json_parser_top(struct json_parser *p) { return &p->stack[p->height - 1]; } static void json_parser_put_value(struct json_parser *p, struct json *value) { struct json_parser_node *node = json_parser_top(p); if (node->json->type == JSON_OBJECT) { json_object_put(node->json, p->member_name, value); free(p->member_name); p->member_name = NULL; } else if (node->json->type == JSON_ARRAY) { json_array_add(node->json, value); } else { NOT_REACHED(); } } static void json_parser_push(struct json_parser *p, struct json *new_json, enum json_parse_state new_state) { if (p->height < JSON_MAX_HEIGHT) { struct json_parser_node *node; if (p->height >= p->allocated_height) { p->stack = x2nrealloc(p->stack, &p->allocated_height, sizeof *p->stack); } if (p->height > 0) { json_parser_put_value(p, new_json); } node = &p->stack[p->height++]; node->json = new_json; p->parse_state = new_state; } else { json_destroy(new_json); json_error(p, "input exceeds maximum nesting depth %d", JSON_MAX_HEIGHT); } } static void json_parser_push_object(struct json_parser *p) { json_parser_push(p, json_object_create(), JSON_PARSE_OBJECT_INIT); } static void json_parser_push_array(struct json_parser *p) { json_parser_push(p, json_array_create_empty(), JSON_PARSE_ARRAY_INIT); } static void json_parse_value(struct json_parser *p, struct json_token *token, enum json_parse_state next_state) { struct json *value; switch (token->type) { case T_FALSE: value = json_boolean_create(false); break; case T_NULL: value = json_null_create(); break; case T_TRUE: value = json_boolean_create(true); break; case '{': json_parser_push_object(p); return; case '[': json_parser_push_array(p); return; case T_INTEGER: value = json_integer_create(token->u.integer); break; case T_REAL: value = json_real_create(token->u.real); break; case T_STRING: value = json_string_create(token->u.string); break; case T_EOF: case '}': case ']': case ':': case ',': default: json_error(p, "syntax error expecting value"); return; } json_parser_put_value(p, value); p->parse_state = next_state; } static void json_parser_pop(struct json_parser *p) { struct json_parser_node *node; /* Conserve memory. */ node = json_parser_top(p); if (node->json->type == JSON_ARRAY) { json_array_trim(node->json); } /* Pop off the top-of-stack. */ if (p->height == 1) { p->parse_state = JSON_PARSE_END; if (!(p->flags & JSPF_TRAILER)) { p->done = true; } } else { p->height--; node = json_parser_top(p); if (node->json->type == JSON_ARRAY) { p->parse_state = JSON_PARSE_ARRAY_NEXT; } else if (node->json->type == JSON_OBJECT) { p->parse_state = JSON_PARSE_OBJECT_NEXT; } else { NOT_REACHED(); } } } static void json_parser_input(struct json_parser *p, struct json_token *token) { switch (p->parse_state) { case JSON_PARSE_START: if (token->type == '{') { json_parser_push_object(p); } else if (token->type == '[') { json_parser_push_array(p); } else { json_error(p, "syntax error at beginning of input"); } break; case JSON_PARSE_END: json_error(p, "trailing garbage at end of input"); break; case JSON_PARSE_OBJECT_INIT: if (token->type == '}') { json_parser_pop(p); break; } /* Fall through. */ case JSON_PARSE_OBJECT_NAME: if (token->type == T_STRING) { p->member_name = xstrdup(token->u.string); p->parse_state = JSON_PARSE_OBJECT_COLON; } else { json_error(p, "syntax error parsing object expecting string"); } break; case JSON_PARSE_OBJECT_COLON: if (token->type == ':') { p->parse_state = JSON_PARSE_OBJECT_VALUE; } else { json_error(p, "syntax error parsing object expecting ':'"); } break; case JSON_PARSE_OBJECT_VALUE: json_parse_value(p, token, JSON_PARSE_OBJECT_NEXT); break; case JSON_PARSE_OBJECT_NEXT: if (token->type == ',') { p->parse_state = JSON_PARSE_OBJECT_NAME; } else if (token->type == '}') { json_parser_pop(p); } else { json_error(p, "syntax error expecting '}' or ','"); } break; case JSON_PARSE_ARRAY_INIT: if (token->type == ']') { json_parser_pop(p); break; } /* Fall through. */ case JSON_PARSE_ARRAY_VALUE: json_parse_value(p, token, JSON_PARSE_ARRAY_NEXT); break; case JSON_PARSE_ARRAY_NEXT: if (token->type == ',') { p->parse_state = JSON_PARSE_ARRAY_VALUE; } else if (token->type == ']') { json_parser_pop(p); } else { json_error(p, "syntax error expecting ']' or ','"); } break; default: abort(); } p->lex_state = JSON_LEX_START; ds_clear(&p->buffer); } static struct json * json_create(enum json_type type) { struct json *json = xmalloc(sizeof *json); json->type = type; return json; } static void json_error(struct json_parser *p, const char *format, ...) { if (!p->error) { struct ds msg; va_list args; ds_init(&msg); ds_put_format(&msg, "line %d, column %d, byte %d: ", p->line_number, p->column_number, p->byte_number); va_start(args, format); ds_put_format_valist(&msg, format, args); va_end(args); p->error = ds_steal_cstr(&msg); p->done = true; } } #define SPACES_PER_LEVEL 2 struct json_serializer { struct ds *ds; int depth; int flags; }; static void json_serialize(const struct json *, struct json_serializer *); static void json_serialize_object(const struct shash *object, struct json_serializer *); static void json_serialize_array(const struct json_array *, struct json_serializer *); static void json_serialize_string(const char *, struct ds *); /* Converts 'json' to a string in JSON format, encoded in UTF-8, and returns * that string. The caller is responsible for freeing the returned string, * with free(), when it is no longer needed. * * If 'flags' contains JSSF_PRETTY, the output is pretty-printed with each * nesting level introducing an additional indentation. Otherwise, the * returned string does not contain any new-line characters. * * If 'flags' contains JSSF_SORT, members of objects in the output are sorted * in bytewise lexicographic order for reproducibility. Otherwise, members of * objects are output in an indeterminate order. * * The returned string is valid JSON only if 'json' represents an array or an * object, since a bare literal does not satisfy the JSON grammar. */ char * json_to_string(const struct json *json, int flags) { struct ds ds; ds_init(&ds); json_to_ds(json, flags, &ds); return ds_steal_cstr(&ds); } /* Same as json_to_string(), but the output is appended to 'ds'. */ void json_to_ds(const struct json *json, int flags, struct ds *ds) { struct json_serializer s; s.ds = ds; s.depth = 0; s.flags = flags; json_serialize(json, &s); } static void json_serialize(const struct json *json, struct json_serializer *s) { struct ds *ds = s->ds; switch (json->type) { case JSON_NULL: ds_put_cstr(ds, "null"); break; case JSON_FALSE: ds_put_cstr(ds, "false"); break; case JSON_TRUE: ds_put_cstr(ds, "true"); break; case JSON_OBJECT: json_serialize_object(json->u.object, s); break; case JSON_ARRAY: json_serialize_array(&json->u.array, s); break; case JSON_INTEGER: ds_put_format(ds, "%lld", json->u.integer); break; case JSON_REAL: ds_put_format(ds, "%.*g", DBL_DIG, json->u.real); break; case JSON_STRING: json_serialize_string(json->u.string, ds); break; case JSON_N_TYPES: default: NOT_REACHED(); } } static void indent_line(struct json_serializer *s) { if (s->flags & JSSF_PRETTY) { ds_put_char(s->ds, '\n'); ds_put_char_multiple(s->ds, ' ', SPACES_PER_LEVEL * s->depth); } } static void json_serialize_object_member(size_t i, const struct shash_node *node, struct json_serializer *s) { struct ds *ds = s->ds; if (i) { ds_put_char(ds, ','); indent_line(s); } json_serialize_string(node->name, ds); ds_put_char(ds, ':'); if (s->flags & JSSF_PRETTY) { ds_put_char(ds, ' '); } json_serialize(node->data, s); } static void json_serialize_object(const struct shash *object, struct json_serializer *s) { struct ds *ds = s->ds; ds_put_char(ds, '{'); s->depth++; indent_line(s); if (s->flags & JSSF_SORT) { const struct shash_node **nodes; size_t n, i; nodes = shash_sort(object); n = shash_count(object); for (i = 0; i < n; i++) { json_serialize_object_member(i, nodes[i], s); } free(nodes); } else { struct shash_node *node; size_t i; i = 0; SHASH_FOR_EACH (node, object) { json_serialize_object_member(i++, node, s); } } ds_put_char(ds, '}'); s->depth--; } static void json_serialize_array(const struct json_array *array, struct json_serializer *s) { struct ds *ds = s->ds; size_t i; ds_put_char(ds, '['); s->depth++; if (array->n > 0) { indent_line(s); for (i = 0; i < array->n; i++) { if (i) { ds_put_char(ds, ','); indent_line(s); } json_serialize(array->elems[i], s); } } s->depth--; ds_put_char(ds, ']'); } static void json_serialize_string(const char *string, struct ds *ds) { uint8_t c; ds_put_char(ds, '"'); while ((c = *string++) != '\0') { switch (c) { case '"': ds_put_cstr(ds, "\\\""); break; case '\\': ds_put_cstr(ds, "\\\\"); break; case '\b': ds_put_cstr(ds, "\\b"); break; case '\f': ds_put_cstr(ds, "\\f"); break; case '\n': ds_put_cstr(ds, "\\n"); break; case '\r': ds_put_cstr(ds, "\\r"); break; case '\t': ds_put_cstr(ds, "\\t"); break; default: if (c >= 32) { ds_put_char(ds, c); } else { ds_put_format(ds, "\\u%04x", c); } break; } } ds_put_char(ds, '"'); } static size_t json_string_serialized_length(const char *string) { size_t length; uint8_t c; length = strlen("\"\""); while ((c = *string++) != '\0') { switch (c) { case '"': case '\\': case '\b': case '\f': case '\n': case '\r': case '\t': length += 2; break; default: if (c >= 32) { length++; } else { /* \uXXXX */ length += 6; } break; } } return length; } static size_t json_object_serialized_length(const struct shash *object) { size_t length = strlen("{}"); if (!shash_is_empty(object)) { struct shash_node *node; /* Commas and colons. */ length += 2 * shash_count(object) - 1; SHASH_FOR_EACH (node, object) { const struct json *value = node->data; length += json_string_serialized_length(node->name); length += json_serialized_length(value); } } return length; } static size_t json_array_serialized_length(const struct json_array *array) { size_t length = strlen("[]"); if (array->n) { size_t i; /* Commas. */ length += array->n - 1; for (i = 0; i < array->n; i++) { length += json_serialized_length(array->elems[i]); } } return length; } /* Returns strlen(json_to_string(json, 0)), that is, the number of bytes in the * JSON output by json_to_string() for 'json' when JSSF_PRETTY is not * requested. (JSSF_SORT does not affect the length of json_to_string()'s * output.) */ size_t json_serialized_length(const struct json *json) { switch (json->type) { case JSON_NULL: return strlen("null"); case JSON_FALSE: return strlen("false"); case JSON_TRUE: return strlen("true"); case JSON_OBJECT: return json_object_serialized_length(json->u.object); case JSON_ARRAY: return json_array_serialized_length(&json->u.array); case JSON_INTEGER: return snprintf(NULL, 0, "%lld", json->u.integer); case JSON_REAL: return snprintf(NULL, 0, "%.*g", DBL_DIG, json->u.real); case JSON_STRING: return json_string_serialized_length(json->u.string); case JSON_N_TYPES: default: NOT_REACHED(); } } openvswitch-2.0.1+git20140120/lib/json.h000066400000000000000000000105311226605124000173420ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef JSON_H #define JSON_H 1 /* This is an implementation of JavaScript Object Notation (JSON) as specified * by RFC 4627. It is intended to fully comply with RFC 4627, with the * following known exceptions and clarifications: * * - Null bytes (\u0000) are not allowed in strings. * * - Only UTF-8 encoding is supported (RFC 4627 allows for other Unicode * encodings). * * - Names within an object must be unique (RFC 4627 says that they * "should" be unique). */ #include "shash.h" #ifdef __cplusplus extern "C" { #endif struct ds; /* Type of a JSON value. */ enum json_type { JSON_NULL, /* null */ JSON_FALSE, /* false */ JSON_TRUE, /* true */ JSON_OBJECT, /* {"a": b, "c": d, ...} */ JSON_ARRAY, /* [1, 2, 3, ...] */ JSON_INTEGER, /* 123. */ JSON_REAL, /* 123.456. */ JSON_STRING, /* "..." */ JSON_N_TYPES }; const char *json_type_to_string(enum json_type); /* A JSON array. */ struct json_array { size_t n, n_allocated; struct json **elems; }; /* A JSON value. */ struct json { enum json_type type; union { struct shash *object; /* Contains "struct json *"s. */ struct json_array array; long long int integer; double real; char *string; } u; }; struct json *json_null_create(void); struct json *json_boolean_create(bool); struct json *json_string_create(const char *); struct json *json_string_create_nocopy(char *); struct json *json_integer_create(long long int); struct json *json_real_create(double); struct json *json_array_create_empty(void); void json_array_add(struct json *, struct json *element); void json_array_trim(struct json *); struct json *json_array_create(struct json **, size_t n); struct json *json_array_create_1(struct json *); struct json *json_array_create_2(struct json *, struct json *); struct json *json_array_create_3(struct json *, struct json *, struct json *); struct json *json_object_create(void); void json_object_put(struct json *, const char *name, struct json *value); void json_object_put_string(struct json *, const char *name, const char *value); const char *json_string(const struct json *); struct json_array *json_array(const struct json *); struct shash *json_object(const struct json *); bool json_boolean(const struct json *); double json_real(const struct json *); int64_t json_integer(const struct json *); struct json *json_clone(const struct json *); void json_destroy(struct json *); size_t json_hash(const struct json *, size_t basis); bool json_equal(const struct json *, const struct json *); /* Parsing JSON. */ enum { JSPF_TRAILER = 1 << 0 /* Check for garbage following input. */ }; struct json_parser *json_parser_create(int flags); size_t json_parser_feed(struct json_parser *, const char *, size_t); bool json_parser_is_done(const struct json_parser *); struct json *json_parser_finish(struct json_parser *); void json_parser_abort(struct json_parser *); struct json *json_from_string(const char *string); struct json *json_from_file(const char *file_name); struct json *json_from_stream(FILE *stream); /* Serializing JSON. */ enum { JSSF_PRETTY = 1 << 0, /* Multiple lines with indentation, if true. */ JSSF_SORT = 1 << 1 /* Object members in sorted order, if true. */ }; char *json_to_string(const struct json *, int flags); void json_to_ds(const struct json *, int flags, struct ds *); size_t json_serialized_length(const struct json *); /* JSON string formatting operations. */ bool json_string_unescape(const char *in, size_t in_len, char **outp); #ifdef __cplusplus } #endif #endif /* json.h */ openvswitch-2.0.1+git20140120/lib/jsonrpc.c000066400000000000000000000755401226605124000200550ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "jsonrpc.h" #include #include "byteq.h" #include "dynamic-string.h" #include "fatal-signal.h" #include "json.h" #include "list.h" #include "ofpbuf.h" #include "ovs-thread.h" #include "poll-loop.h" #include "reconnect.h" #include "stream.h" #include "timeval.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(jsonrpc); struct jsonrpc { struct stream *stream; char *name; int status; /* Input. */ struct byteq input; uint8_t input_buffer[512]; struct json_parser *parser; struct jsonrpc_msg *received; /* Output. */ struct list output; /* Contains "struct ofpbuf"s. */ size_t backlog; }; /* Rate limit for error messages. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); static void jsonrpc_received(struct jsonrpc *); static void jsonrpc_cleanup(struct jsonrpc *); static void jsonrpc_error(struct jsonrpc *, int error); /* This is just the same as stream_open() except that it uses the default * JSONRPC ports if none is specified. */ int jsonrpc_stream_open(const char *name, struct stream **streamp, uint8_t dscp) { return stream_open_with_default_ports(name, JSONRPC_TCP_PORT, JSONRPC_SSL_PORT, streamp, dscp); } /* This is just the same as pstream_open() except that it uses the default * JSONRPC ports if none is specified. */ int jsonrpc_pstream_open(const char *name, struct pstream **pstreamp, uint8_t dscp) { return pstream_open_with_default_ports(name, JSONRPC_TCP_PORT, JSONRPC_SSL_PORT, pstreamp, dscp); } /* Returns a new JSON-RPC stream that uses 'stream' for input and output. The * new jsonrpc object takes ownership of 'stream'. */ struct jsonrpc * jsonrpc_open(struct stream *stream) { struct jsonrpc *rpc; ovs_assert(stream != NULL); rpc = xzalloc(sizeof *rpc); rpc->name = xstrdup(stream_get_name(stream)); rpc->stream = stream; byteq_init(&rpc->input, rpc->input_buffer, sizeof rpc->input_buffer); list_init(&rpc->output); return rpc; } /* Destroys 'rpc', closing the stream on which it is based, and frees its * memory. */ void jsonrpc_close(struct jsonrpc *rpc) { if (rpc) { jsonrpc_cleanup(rpc); free(rpc->name); free(rpc); } } /* Performs periodic maintenance on 'rpc', such as flushing output buffers. */ void jsonrpc_run(struct jsonrpc *rpc) { if (rpc->status) { return; } stream_run(rpc->stream); while (!list_is_empty(&rpc->output)) { struct ofpbuf *buf = ofpbuf_from_list(rpc->output.next); int retval; retval = stream_send(rpc->stream, buf->data, buf->size); if (retval >= 0) { rpc->backlog -= retval; ofpbuf_pull(buf, retval); if (!buf->size) { list_remove(&buf->list_node); ofpbuf_delete(buf); } } else { if (retval != -EAGAIN) { VLOG_WARN_RL(&rl, "%s: send error: %s", rpc->name, ovs_strerror(-retval)); jsonrpc_error(rpc, -retval); } break; } } } /* Arranges for the poll loop to wake up when 'rpc' needs to perform * maintenance activities. */ void jsonrpc_wait(struct jsonrpc *rpc) { if (!rpc->status) { stream_run_wait(rpc->stream); if (!list_is_empty(&rpc->output)) { stream_send_wait(rpc->stream); } } } /* * Returns the current status of 'rpc'. The possible return values are: * - 0: no error yet * - >0: errno value * - EOF: end of file (remote end closed connection; not necessarily an error). * * When this functions nonzero, 'rpc' is effectively out of commission. 'rpc' * will not receive any more messages and any further messages that one * attempts to send with 'rpc' will be discarded. The caller can keep 'rpc' * around as long as it wants, but it's not going to provide any more useful * services. */ int jsonrpc_get_status(const struct jsonrpc *rpc) { return rpc->status; } /* Returns the number of bytes buffered by 'rpc' to be written to the * underlying stream. Always returns 0 if 'rpc' has encountered an error or if * the remote end closed the connection. */ size_t jsonrpc_get_backlog(const struct jsonrpc *rpc) { return rpc->status ? 0 : rpc->backlog; } /* Returns the number of bytes that have been received on 'rpc''s underlying * stream. (The value wraps around if it exceeds UINT_MAX.) */ unsigned int jsonrpc_get_received_bytes(const struct jsonrpc *rpc) { return rpc->input.head; } /* Returns 'rpc''s name, that is, the name returned by stream_get_name() for * the stream underlying 'rpc' when 'rpc' was created. */ const char * jsonrpc_get_name(const struct jsonrpc *rpc) { return rpc->name; } static void jsonrpc_log_msg(const struct jsonrpc *rpc, const char *title, const struct jsonrpc_msg *msg) { if (VLOG_IS_DBG_ENABLED()) { struct ds s = DS_EMPTY_INITIALIZER; if (msg->method) { ds_put_format(&s, ", method=\"%s\"", msg->method); } if (msg->params) { ds_put_cstr(&s, ", params="); json_to_ds(msg->params, 0, &s); } if (msg->result) { ds_put_cstr(&s, ", result="); json_to_ds(msg->result, 0, &s); } if (msg->error) { ds_put_cstr(&s, ", error="); json_to_ds(msg->error, 0, &s); } if (msg->id) { ds_put_cstr(&s, ", id="); json_to_ds(msg->id, 0, &s); } VLOG_DBG("%s: %s %s%s", rpc->name, title, jsonrpc_msg_type_to_string(msg->type), ds_cstr(&s)); ds_destroy(&s); } } /* Schedules 'msg' to be sent on 'rpc' and returns 'rpc''s status (as with * jsonrpc_get_status()). * * If 'msg' cannot be sent immediately, it is appended to a buffer. The caller * is responsible for ensuring that the amount of buffered data is somehow * limited. (jsonrpc_get_backlog() returns the amount of data currently * buffered in 'rpc'.) * * Always takes ownership of 'msg', regardless of success. */ int jsonrpc_send(struct jsonrpc *rpc, struct jsonrpc_msg *msg) { struct ofpbuf *buf; struct json *json; size_t length; char *s; if (rpc->status) { jsonrpc_msg_destroy(msg); return rpc->status; } jsonrpc_log_msg(rpc, "send", msg); json = jsonrpc_msg_to_json(msg); s = json_to_string(json, 0); length = strlen(s); json_destroy(json); buf = xmalloc(sizeof *buf); ofpbuf_use(buf, s, length); buf->size = length; list_push_back(&rpc->output, &buf->list_node); rpc->backlog += length; if (rpc->backlog == length) { jsonrpc_run(rpc); } return rpc->status; } /* Attempts to receive a message from 'rpc'. * * If successful, stores the received message in '*msgp' and returns 0. The * caller takes ownership of '*msgp' and must eventually destroy it with * jsonrpc_msg_destroy(). * * Otherwise, stores NULL in '*msgp' and returns one of the following: * * - EAGAIN: No message has been received. * * - EOF: The remote end closed the connection gracefully. * * - Otherwise an errno value that represents a JSON-RPC protocol violation * or another error fatal to the connection. 'rpc' will not send or * receive any more messages. */ int jsonrpc_recv(struct jsonrpc *rpc, struct jsonrpc_msg **msgp) { int i; *msgp = NULL; if (rpc->status) { return rpc->status; } for (i = 0; i < 50; i++) { if (rpc->received) { *msgp = rpc->received; rpc->received = NULL; return 0; } else if (byteq_is_empty(&rpc->input)) { size_t chunk; int retval; chunk = byteq_headroom(&rpc->input); retval = stream_recv(rpc->stream, byteq_head(&rpc->input), chunk); if (retval < 0) { if (retval == -EAGAIN) { return EAGAIN; } else { VLOG_WARN_RL(&rl, "%s: receive error: %s", rpc->name, ovs_strerror(-retval)); jsonrpc_error(rpc, -retval); return rpc->status; } } else if (retval == 0) { jsonrpc_error(rpc, EOF); return EOF; } byteq_advance_head(&rpc->input, retval); } else { size_t n, used; if (!rpc->parser) { rpc->parser = json_parser_create(0); } n = byteq_tailroom(&rpc->input); used = json_parser_feed(rpc->parser, (char *) byteq_tail(&rpc->input), n); byteq_advance_tail(&rpc->input, used); if (json_parser_is_done(rpc->parser)) { jsonrpc_received(rpc); if (rpc->status) { const struct byteq *q = &rpc->input; if (q->head <= q->size) { stream_report_content(q->buffer, q->head, STREAM_JSONRPC, THIS_MODULE, rpc->name); } return rpc->status; } } } } return EAGAIN; } /* Causes the poll loop to wake up when jsonrpc_recv() may return a value other * than EAGAIN. */ void jsonrpc_recv_wait(struct jsonrpc *rpc) { if (rpc->status || rpc->received || !byteq_is_empty(&rpc->input)) { poll_immediate_wake_at(rpc->name); } else { stream_recv_wait(rpc->stream); } } /* Sends 'msg' on 'rpc' and waits for it to be successfully queued to the * underlying stream. Returns 0 if 'msg' was sent successfully, otherwise a * status value (see jsonrpc_get_status()). * * Always takes ownership of 'msg', regardless of success. */ int jsonrpc_send_block(struct jsonrpc *rpc, struct jsonrpc_msg *msg) { int error; fatal_signal_run(); error = jsonrpc_send(rpc, msg); if (error) { return error; } for (;;) { jsonrpc_run(rpc); if (list_is_empty(&rpc->output) || rpc->status) { return rpc->status; } jsonrpc_wait(rpc); poll_block(); } } /* Waits for a message to be received on 'rpc'. Same semantics as * jsonrpc_recv() except that EAGAIN will never be returned. */ int jsonrpc_recv_block(struct jsonrpc *rpc, struct jsonrpc_msg **msgp) { for (;;) { int error = jsonrpc_recv(rpc, msgp); if (error != EAGAIN) { fatal_signal_run(); return error; } jsonrpc_run(rpc); jsonrpc_wait(rpc); jsonrpc_recv_wait(rpc); poll_block(); } } /* Sends 'request' to 'rpc' then waits for a reply. The return value is 0 if * successful, in which case '*replyp' is set to the reply, which the caller * must eventually free with jsonrpc_msg_destroy(). Otherwise returns a status * value (see jsonrpc_get_status()). * * Discards any message received on 'rpc' that is not a reply to 'request' * (based on message id). * * Always takes ownership of 'request', regardless of success. */ int jsonrpc_transact_block(struct jsonrpc *rpc, struct jsonrpc_msg *request, struct jsonrpc_msg **replyp) { struct jsonrpc_msg *reply = NULL; struct json *id; int error; id = json_clone(request->id); error = jsonrpc_send_block(rpc, request); if (!error) { for (;;) { error = jsonrpc_recv_block(rpc, &reply); if (error) { break; } if ((reply->type == JSONRPC_REPLY || reply->type == JSONRPC_ERROR) && json_equal(id, reply->id)) { break; } jsonrpc_msg_destroy(reply); } } *replyp = error ? NULL : reply; json_destroy(id); return error; } static void jsonrpc_received(struct jsonrpc *rpc) { struct jsonrpc_msg *msg; struct json *json; char *error; json = json_parser_finish(rpc->parser); rpc->parser = NULL; if (json->type == JSON_STRING) { VLOG_WARN_RL(&rl, "%s: error parsing stream: %s", rpc->name, json_string(json)); jsonrpc_error(rpc, EPROTO); json_destroy(json); return; } error = jsonrpc_msg_from_json(json, &msg); if (error) { VLOG_WARN_RL(&rl, "%s: received bad JSON-RPC message: %s", rpc->name, error); free(error); jsonrpc_error(rpc, EPROTO); return; } jsonrpc_log_msg(rpc, "received", msg); rpc->received = msg; } static void jsonrpc_error(struct jsonrpc *rpc, int error) { ovs_assert(error); if (!rpc->status) { rpc->status = error; jsonrpc_cleanup(rpc); } } static void jsonrpc_cleanup(struct jsonrpc *rpc) { stream_close(rpc->stream); rpc->stream = NULL; json_parser_abort(rpc->parser); rpc->parser = NULL; jsonrpc_msg_destroy(rpc->received); rpc->received = NULL; ofpbuf_list_delete(&rpc->output); rpc->backlog = 0; } static struct jsonrpc_msg * jsonrpc_create(enum jsonrpc_msg_type type, const char *method, struct json *params, struct json *result, struct json *error, struct json *id) { struct jsonrpc_msg *msg = xmalloc(sizeof *msg); msg->type = type; msg->method = method ? xstrdup(method) : NULL; msg->params = params; msg->result = result; msg->error = error; msg->id = id; return msg; } static struct json * jsonrpc_create_id(void) { static atomic_uint next_id = ATOMIC_VAR_INIT(0); unsigned int id; atomic_add(&next_id, 1, &id); return json_integer_create(id); } struct jsonrpc_msg * jsonrpc_create_request(const char *method, struct json *params, struct json **idp) { struct json *id = jsonrpc_create_id(); if (idp) { *idp = json_clone(id); } return jsonrpc_create(JSONRPC_REQUEST, method, params, NULL, NULL, id); } struct jsonrpc_msg * jsonrpc_create_notify(const char *method, struct json *params) { return jsonrpc_create(JSONRPC_NOTIFY, method, params, NULL, NULL, NULL); } struct jsonrpc_msg * jsonrpc_create_reply(struct json *result, const struct json *id) { return jsonrpc_create(JSONRPC_REPLY, NULL, NULL, result, NULL, json_clone(id)); } struct jsonrpc_msg * jsonrpc_create_error(struct json *error, const struct json *id) { return jsonrpc_create(JSONRPC_REPLY, NULL, NULL, NULL, error, json_clone(id)); } const char * jsonrpc_msg_type_to_string(enum jsonrpc_msg_type type) { switch (type) { case JSONRPC_REQUEST: return "request"; case JSONRPC_NOTIFY: return "notification"; case JSONRPC_REPLY: return "reply"; case JSONRPC_ERROR: return "error"; } return "(null)"; } char * jsonrpc_msg_is_valid(const struct jsonrpc_msg *m) { const char *type_name; unsigned int pattern; if (m->params && m->params->type != JSON_ARRAY) { return xstrdup("\"params\" must be JSON array"); } switch (m->type) { case JSONRPC_REQUEST: pattern = 0x11001; break; case JSONRPC_NOTIFY: pattern = 0x11000; break; case JSONRPC_REPLY: pattern = 0x00101; break; case JSONRPC_ERROR: pattern = 0x00011; break; default: return xasprintf("invalid JSON-RPC message type %d", m->type); } type_name = jsonrpc_msg_type_to_string(m->type); if ((m->method != NULL) != ((pattern & 0x10000) != 0)) { return xasprintf("%s must%s have \"method\"", type_name, (pattern & 0x10000) ? "" : " not"); } if ((m->params != NULL) != ((pattern & 0x1000) != 0)) { return xasprintf("%s must%s have \"params\"", type_name, (pattern & 0x1000) ? "" : " not"); } if ((m->result != NULL) != ((pattern & 0x100) != 0)) { return xasprintf("%s must%s have \"result\"", type_name, (pattern & 0x100) ? "" : " not"); } if ((m->error != NULL) != ((pattern & 0x10) != 0)) { return xasprintf("%s must%s have \"error\"", type_name, (pattern & 0x10) ? "" : " not"); } if ((m->id != NULL) != ((pattern & 0x1) != 0)) { return xasprintf("%s must%s have \"id\"", type_name, (pattern & 0x1) ? "" : " not"); } return NULL; } void jsonrpc_msg_destroy(struct jsonrpc_msg *m) { if (m) { free(m->method); json_destroy(m->params); json_destroy(m->result); json_destroy(m->error); json_destroy(m->id); free(m); } } static struct json * null_from_json_null(struct json *json) { if (json && json->type == JSON_NULL) { json_destroy(json); return NULL; } return json; } char * jsonrpc_msg_from_json(struct json *json, struct jsonrpc_msg **msgp) { struct json *method = NULL; struct jsonrpc_msg *msg = NULL; struct shash *object; char *error; if (json->type != JSON_OBJECT) { error = xstrdup("message is not a JSON object"); goto exit; } object = json_object(json); method = shash_find_and_delete(object, "method"); if (method && method->type != JSON_STRING) { error = xstrdup("method is not a JSON string"); goto exit; } msg = xzalloc(sizeof *msg); msg->method = method ? xstrdup(method->u.string) : NULL; msg->params = null_from_json_null(shash_find_and_delete(object, "params")); msg->result = null_from_json_null(shash_find_and_delete(object, "result")); msg->error = null_from_json_null(shash_find_and_delete(object, "error")); msg->id = null_from_json_null(shash_find_and_delete(object, "id")); msg->type = (msg->result ? JSONRPC_REPLY : msg->error ? JSONRPC_ERROR : msg->id ? JSONRPC_REQUEST : JSONRPC_NOTIFY); if (!shash_is_empty(object)) { error = xasprintf("message has unexpected member \"%s\"", shash_first(object)->name); goto exit; } error = jsonrpc_msg_is_valid(msg); if (error) { goto exit; } exit: json_destroy(method); json_destroy(json); if (error) { jsonrpc_msg_destroy(msg); msg = NULL; } *msgp = msg; return error; } struct json * jsonrpc_msg_to_json(struct jsonrpc_msg *m) { struct json *json = json_object_create(); if (m->method) { json_object_put(json, "method", json_string_create_nocopy(m->method)); } if (m->params) { json_object_put(json, "params", m->params); } if (m->result) { json_object_put(json, "result", m->result); } else if (m->type == JSONRPC_ERROR) { json_object_put(json, "result", json_null_create()); } if (m->error) { json_object_put(json, "error", m->error); } else if (m->type == JSONRPC_REPLY) { json_object_put(json, "error", json_null_create()); } if (m->id) { json_object_put(json, "id", m->id); } else if (m->type == JSONRPC_NOTIFY) { json_object_put(json, "id", json_null_create()); } free(m); return json; } /* A JSON-RPC session with reconnection. */ struct jsonrpc_session { struct reconnect *reconnect; struct jsonrpc *rpc; struct stream *stream; struct pstream *pstream; int last_error; unsigned int seqno; uint8_t dscp; }; /* Creates and returns a jsonrpc_session to 'name', which should be a string * acceptable to stream_open() or pstream_open(). * * If 'name' is an active connection method, e.g. "tcp:127.1.2.3", the new * jsonrpc_session connects to 'name'. If 'retry' is true, then the new * session connects and reconnects to 'name', with backoff. If 'retry' is * false, the new session will only try to connect once and after a connection * failure or a disconnection jsonrpc_session_is_alive() will return false for * the new session. * * If 'name' is a passive connection method, e.g. "ptcp:", the new * jsonrpc_session listens for connections to 'name'. It maintains at most one * connection at any given time. Any new connection causes the previous one * (if any) to be dropped. */ struct jsonrpc_session * jsonrpc_session_open(const char *name, bool retry) { struct jsonrpc_session *s; s = xmalloc(sizeof *s); s->reconnect = reconnect_create(time_msec()); reconnect_set_name(s->reconnect, name); reconnect_enable(s->reconnect, time_msec()); s->rpc = NULL; s->stream = NULL; s->pstream = NULL; s->seqno = 0; s->dscp = 0; s->last_error = 0; if (!pstream_verify_name(name)) { reconnect_set_passive(s->reconnect, true, time_msec()); } else if (!retry) { reconnect_set_max_tries(s->reconnect, 1); reconnect_set_backoff(s->reconnect, INT_MAX, INT_MAX); } if (!stream_or_pstream_needs_probes(name)) { reconnect_set_probe_interval(s->reconnect, 0); } return s; } /* Creates and returns a jsonrpc_session that is initially connected to * 'jsonrpc'. If the connection is dropped, it will not be reconnected. * * On the assumption that such connections are likely to be short-lived * (e.g. from ovs-vsctl), informational logging for them is suppressed. */ struct jsonrpc_session * jsonrpc_session_open_unreliably(struct jsonrpc *jsonrpc, uint8_t dscp) { struct jsonrpc_session *s; s = xmalloc(sizeof *s); s->reconnect = reconnect_create(time_msec()); reconnect_set_quiet(s->reconnect, true); reconnect_set_name(s->reconnect, jsonrpc_get_name(jsonrpc)); reconnect_set_max_tries(s->reconnect, 0); reconnect_connected(s->reconnect, time_msec()); s->dscp = dscp; s->rpc = jsonrpc; s->stream = NULL; s->pstream = NULL; s->seqno = 0; return s; } void jsonrpc_session_close(struct jsonrpc_session *s) { if (s) { jsonrpc_close(s->rpc); reconnect_destroy(s->reconnect); stream_close(s->stream); pstream_close(s->pstream); free(s); } } static void jsonrpc_session_disconnect(struct jsonrpc_session *s) { if (s->rpc) { jsonrpc_error(s->rpc, EOF); jsonrpc_close(s->rpc); s->rpc = NULL; s->seqno++; } else if (s->stream) { stream_close(s->stream); s->stream = NULL; s->seqno++; } } static void jsonrpc_session_connect(struct jsonrpc_session *s) { const char *name = reconnect_get_name(s->reconnect); int error; jsonrpc_session_disconnect(s); if (!reconnect_is_passive(s->reconnect)) { error = jsonrpc_stream_open(name, &s->stream, s->dscp); if (!error) { reconnect_connecting(s->reconnect, time_msec()); } else { s->last_error = error; } } else { error = s->pstream ? 0 : jsonrpc_pstream_open(name, &s->pstream, s->dscp); if (!error) { reconnect_listening(s->reconnect, time_msec()); } } if (error) { reconnect_connect_failed(s->reconnect, time_msec(), error); } s->seqno++; } void jsonrpc_session_run(struct jsonrpc_session *s) { if (s->pstream) { struct stream *stream; int error; error = pstream_accept(s->pstream, &stream); if (!error) { if (s->rpc || s->stream) { VLOG_INFO_RL(&rl, "%s: new connection replacing active connection", reconnect_get_name(s->reconnect)); jsonrpc_session_disconnect(s); } reconnect_connected(s->reconnect, time_msec()); s->rpc = jsonrpc_open(stream); } else if (error != EAGAIN) { reconnect_listen_error(s->reconnect, time_msec(), error); pstream_close(s->pstream); s->pstream = NULL; } } if (s->rpc) { size_t backlog; int error; backlog = jsonrpc_get_backlog(s->rpc); jsonrpc_run(s->rpc); if (jsonrpc_get_backlog(s->rpc) < backlog) { /* Data previously caught in a queue was successfully sent (or * there's an error, which we'll catch below.) * * We don't count data that is successfully sent immediately as * activity, because there's a lot of queuing downstream from us, * which means that we can push a lot of data into a connection * that has stalled and won't ever recover. */ reconnect_activity(s->reconnect, time_msec()); } error = jsonrpc_get_status(s->rpc); if (error) { reconnect_disconnected(s->reconnect, time_msec(), error); jsonrpc_session_disconnect(s); s->last_error = error; } } else if (s->stream) { int error; stream_run(s->stream); error = stream_connect(s->stream); if (!error) { reconnect_connected(s->reconnect, time_msec()); s->rpc = jsonrpc_open(s->stream); s->stream = NULL; } else if (error != EAGAIN) { reconnect_connect_failed(s->reconnect, time_msec(), error); stream_close(s->stream); s->stream = NULL; } } switch (reconnect_run(s->reconnect, time_msec())) { case RECONNECT_CONNECT: jsonrpc_session_connect(s); break; case RECONNECT_DISCONNECT: reconnect_disconnected(s->reconnect, time_msec(), 0); jsonrpc_session_disconnect(s); break; case RECONNECT_PROBE: if (s->rpc) { struct json *params; struct jsonrpc_msg *request; params = json_array_create_empty(); request = jsonrpc_create_request("echo", params, NULL); json_destroy(request->id); request->id = json_string_create("echo"); jsonrpc_send(s->rpc, request); } break; } } void jsonrpc_session_wait(struct jsonrpc_session *s) { if (s->rpc) { jsonrpc_wait(s->rpc); } else if (s->stream) { stream_run_wait(s->stream); stream_connect_wait(s->stream); } if (s->pstream) { pstream_wait(s->pstream); } reconnect_wait(s->reconnect, time_msec()); } size_t jsonrpc_session_get_backlog(const struct jsonrpc_session *s) { return s->rpc ? jsonrpc_get_backlog(s->rpc) : 0; } /* Always returns a pointer to a valid C string, assuming 's' was initialized * correctly. */ const char * jsonrpc_session_get_name(const struct jsonrpc_session *s) { return reconnect_get_name(s->reconnect); } /* Always takes ownership of 'msg', regardless of success. */ int jsonrpc_session_send(struct jsonrpc_session *s, struct jsonrpc_msg *msg) { if (s->rpc) { return jsonrpc_send(s->rpc, msg); } else { jsonrpc_msg_destroy(msg); return ENOTCONN; } } struct jsonrpc_msg * jsonrpc_session_recv(struct jsonrpc_session *s) { if (s->rpc) { unsigned int received_bytes; struct jsonrpc_msg *msg; received_bytes = jsonrpc_get_received_bytes(s->rpc); jsonrpc_recv(s->rpc, &msg); if (received_bytes != jsonrpc_get_received_bytes(s->rpc)) { /* Data was successfully received. * * Previously we only counted receiving a full message as activity, * but with large messages or a slow connection that policy could * time out the session mid-message. */ reconnect_activity(s->reconnect, time_msec()); } if (msg) { if (msg->type == JSONRPC_REQUEST && !strcmp(msg->method, "echo")) { /* Echo request. Send reply. */ struct jsonrpc_msg *reply; reply = jsonrpc_create_reply(json_clone(msg->params), msg->id); jsonrpc_session_send(s, reply); } else if (msg->type == JSONRPC_REPLY && msg->id && msg->id->type == JSON_STRING && !strcmp(msg->id->u.string, "echo")) { /* It's a reply to our echo request. Suppress it. */ } else { return msg; } jsonrpc_msg_destroy(msg); } } return NULL; } void jsonrpc_session_recv_wait(struct jsonrpc_session *s) { if (s->rpc) { jsonrpc_recv_wait(s->rpc); } } bool jsonrpc_session_is_alive(const struct jsonrpc_session *s) { return s->rpc || s->stream || reconnect_get_max_tries(s->reconnect); } bool jsonrpc_session_is_connected(const struct jsonrpc_session *s) { return s->rpc != NULL; } unsigned int jsonrpc_session_get_seqno(const struct jsonrpc_session *s) { return s->seqno; } int jsonrpc_session_get_status(const struct jsonrpc_session *s) { return s && s->rpc ? jsonrpc_get_status(s->rpc) : 0; } int jsonrpc_session_get_last_error(const struct jsonrpc_session *s) { return s->last_error; } void jsonrpc_session_get_reconnect_stats(const struct jsonrpc_session *s, struct reconnect_stats *stats) { reconnect_get_stats(s->reconnect, time_msec(), stats); } void jsonrpc_session_force_reconnect(struct jsonrpc_session *s) { reconnect_force_reconnect(s->reconnect, time_msec()); } void jsonrpc_session_set_max_backoff(struct jsonrpc_session *s, int max_backoff) { reconnect_set_backoff(s->reconnect, 0, max_backoff); } void jsonrpc_session_set_probe_interval(struct jsonrpc_session *s, int probe_interval) { reconnect_set_probe_interval(s->reconnect, probe_interval); } void jsonrpc_session_set_dscp(struct jsonrpc_session *s, uint8_t dscp) { if (s->dscp != dscp) { if (s->pstream) { int error; error = pstream_set_dscp(s->pstream, dscp); if (error) { VLOG_ERR("%s: failed set_dscp %s", reconnect_get_name(s->reconnect), ovs_strerror(error)); } /* * XXX race window between setting dscp to listening socket * and accepting socket. accepted socket may have old dscp value. * Ignore this race window for now. */ } s->dscp = dscp; jsonrpc_session_force_reconnect(s); } } openvswitch-2.0.1+git20140120/lib/jsonrpc.h000066400000000000000000000122131226605124000200460ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef JSONRPC_H #define JSONRPC_H 1 /* This is an implementation of the JSON-RPC 1.0 specification defined at * http://json-rpc.org/wiki/specification. */ #include #include #include "openvswitch/types.h" struct json; struct jsonrpc_msg; struct pstream; struct reconnect_stats; struct stream; /* API for a JSON-RPC stream. */ /* Default port numbers. * * There is nothing standard about these port numbers. They are simply what * we have chosen. */ #define JSONRPC_TCP_PORT 6632 #define JSONRPC_SSL_PORT 6632 int jsonrpc_stream_open(const char *name, struct stream **, uint8_t dscp); int jsonrpc_pstream_open(const char *name, struct pstream **, uint8_t dscp); struct jsonrpc *jsonrpc_open(struct stream *); void jsonrpc_close(struct jsonrpc *); void jsonrpc_run(struct jsonrpc *); void jsonrpc_wait(struct jsonrpc *); int jsonrpc_get_status(const struct jsonrpc *); size_t jsonrpc_get_backlog(const struct jsonrpc *); unsigned int jsonrpc_get_received_bytes(const struct jsonrpc *); const char *jsonrpc_get_name(const struct jsonrpc *); int jsonrpc_send(struct jsonrpc *, struct jsonrpc_msg *); int jsonrpc_recv(struct jsonrpc *, struct jsonrpc_msg **); void jsonrpc_recv_wait(struct jsonrpc *); int jsonrpc_send_block(struct jsonrpc *, struct jsonrpc_msg *); int jsonrpc_recv_block(struct jsonrpc *, struct jsonrpc_msg **); int jsonrpc_transact_block(struct jsonrpc *, struct jsonrpc_msg *, struct jsonrpc_msg **); /* Messages. */ enum jsonrpc_msg_type { JSONRPC_REQUEST, /* Request. */ JSONRPC_NOTIFY, /* Notification. */ JSONRPC_REPLY, /* Successful reply. */ JSONRPC_ERROR /* Error reply. */ }; struct jsonrpc_msg { enum jsonrpc_msg_type type; char *method; /* Request or notification only. */ struct json *params; /* Request or notification only. */ struct json *result; /* Successful reply only. */ struct json *error; /* Error reply only. */ struct json *id; /* Request or reply only. */ }; struct jsonrpc_msg *jsonrpc_create_request(const char *method, struct json *params, struct json **idp); struct jsonrpc_msg *jsonrpc_create_notify(const char *method, struct json *params); struct jsonrpc_msg *jsonrpc_create_reply(struct json *result, const struct json *id); struct jsonrpc_msg *jsonrpc_create_error(struct json *error, const struct json *id); const char *jsonrpc_msg_type_to_string(enum jsonrpc_msg_type); char *jsonrpc_msg_is_valid(const struct jsonrpc_msg *); void jsonrpc_msg_destroy(struct jsonrpc_msg *); char *jsonrpc_msg_from_json(struct json *, struct jsonrpc_msg **); struct json *jsonrpc_msg_to_json(struct jsonrpc_msg *); /* A JSON-RPC session with reconnection. */ struct jsonrpc_session *jsonrpc_session_open(const char *name, bool retry); struct jsonrpc_session *jsonrpc_session_open_unreliably(struct jsonrpc *, uint8_t); void jsonrpc_session_close(struct jsonrpc_session *); void jsonrpc_session_run(struct jsonrpc_session *); void jsonrpc_session_wait(struct jsonrpc_session *); size_t jsonrpc_session_get_backlog(const struct jsonrpc_session *); const char *jsonrpc_session_get_name(const struct jsonrpc_session *); int jsonrpc_session_send(struct jsonrpc_session *, struct jsonrpc_msg *); struct jsonrpc_msg *jsonrpc_session_recv(struct jsonrpc_session *); void jsonrpc_session_recv_wait(struct jsonrpc_session *); bool jsonrpc_session_is_alive(const struct jsonrpc_session *); bool jsonrpc_session_is_connected(const struct jsonrpc_session *); unsigned int jsonrpc_session_get_seqno(const struct jsonrpc_session *); int jsonrpc_session_get_status(const struct jsonrpc_session *); int jsonrpc_session_get_last_error(const struct jsonrpc_session *); void jsonrpc_session_get_reconnect_stats(const struct jsonrpc_session *, struct reconnect_stats *); void jsonrpc_session_force_reconnect(struct jsonrpc_session *); void jsonrpc_session_set_max_backoff(struct jsonrpc_session *, int max_backofF); void jsonrpc_session_set_probe_interval(struct jsonrpc_session *, int probe_interval); void jsonrpc_session_set_dscp(struct jsonrpc_session *, uint8_t dscp); #endif /* jsonrpc.h */ openvswitch-2.0.1+git20140120/lib/lacp.c000066400000000000000000000670001226605124000173060ustar00rootroot00000000000000/* Copyright (c) 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "lacp.h" #include #include "dynamic-string.h" #include "hash.h" #include "hmap.h" #include "ofpbuf.h" #include "packets.h" #include "poll-loop.h" #include "shash.h" #include "timer.h" #include "timeval.h" #include "unixctl.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(lacp); /* Masks for lacp_info state member. */ #define LACP_STATE_ACT 0x01 /* Activity. Active or passive? */ #define LACP_STATE_TIME 0x02 /* Timeout. Short or long timeout? */ #define LACP_STATE_AGG 0x04 /* Aggregation. Is the link is bondable? */ #define LACP_STATE_SYNC 0x08 /* Synchronization. Is the link in up to date? */ #define LACP_STATE_COL 0x10 /* Collecting. Is the link receiving frames? */ #define LACP_STATE_DIST 0x20 /* Distributing. Is the link sending frames? */ #define LACP_STATE_DEF 0x40 /* Defaulted. Using default partner info? */ #define LACP_STATE_EXP 0x80 /* Expired. Using expired partner info? */ #define LACP_FAST_TIME_TX 1000 /* Fast transmission rate. */ #define LACP_SLOW_TIME_TX 30000 /* Slow transmission rate. */ #define LACP_RX_MULTIPLIER 3 /* Multiply by TX rate to get RX rate. */ #define LACP_INFO_LEN 15 OVS_PACKED( struct lacp_info { ovs_be16 sys_priority; /* System priority. */ uint8_t sys_id[ETH_ADDR_LEN]; /* System ID. */ ovs_be16 key; /* Operational key. */ ovs_be16 port_priority; /* Port priority. */ ovs_be16 port_id; /* Port ID. */ uint8_t state; /* State mask. See LACP_STATE macros. */ }); BUILD_ASSERT_DECL(LACP_INFO_LEN == sizeof(struct lacp_info)); #define LACP_PDU_LEN 110 OVS_PACKED( struct lacp_pdu { uint8_t subtype; /* Always 1. */ uint8_t version; /* Always 1. */ uint8_t actor_type; /* Always 1. */ uint8_t actor_len; /* Always 20. */ struct lacp_info actor; /* LACP actor information. */ uint8_t z1[3]; /* Reserved. Always 0. */ uint8_t partner_type; /* Always 2. */ uint8_t partner_len; /* Always 20. */ struct lacp_info partner; /* LACP partner information. */ uint8_t z2[3]; /* Reserved. Always 0. */ uint8_t collector_type; /* Always 3. */ uint8_t collector_len; /* Always 16. */ ovs_be16 collector_delay; /* Maximum collector delay. Set to UINT16_MAX. */ uint8_t z3[64]; /* Combination of several fields. Always 0. */ }); BUILD_ASSERT_DECL(LACP_PDU_LEN == sizeof(struct lacp_pdu)); /* Implementation. */ enum slave_status { LACP_CURRENT, /* Current State. Partner up to date. */ LACP_EXPIRED, /* Expired State. Partner out of date. */ LACP_DEFAULTED, /* Defaulted State. No partner. */ }; struct lacp { struct list node; /* Node in all_lacps list. */ char *name; /* Name of this lacp object. */ uint8_t sys_id[ETH_ADDR_LEN]; /* System ID. */ uint16_t sys_priority; /* System Priority. */ bool active; /* Active or Passive. */ struct hmap slaves; /* Slaves this LACP object controls. */ struct slave *key_slave; /* Slave whose ID will be the aggregation key. */ bool fast; /* True if using fast probe interval. */ bool negotiated; /* True if LACP negotiations were successful. */ bool update; /* True if lacp_update() needs to be called. */ atomic_int ref_cnt; }; struct slave { void *aux; /* Handle used to identify this slave. */ struct hmap_node node; /* Node in master's slaves map. */ struct lacp *lacp; /* LACP object containing this slave. */ uint16_t port_id; /* Port ID. */ uint16_t port_priority; /* Port Priority. */ uint16_t key; /* Aggregation Key. 0 if default. */ char *name; /* Name of this slave. */ enum slave_status status; /* Slave status. */ bool attached; /* Attached. Traffic may flow. */ struct lacp_info partner; /* Partner information. */ struct lacp_info ntt_actor; /* Used to decide if we Need To Transmit. */ struct timer tx; /* Next message transmission timer. */ struct timer rx; /* Expected message receive timer. */ }; static struct ovs_mutex mutex; static struct list all_lacps__ = LIST_INITIALIZER(&all_lacps__); static struct list *const all_lacps OVS_GUARDED_BY(mutex) = &all_lacps__; static void lacp_update_attached(struct lacp *) OVS_REQUIRES(mutex); static void slave_destroy(struct slave *) OVS_REQUIRES(mutex); static void slave_set_defaulted(struct slave *) OVS_REQUIRES(mutex); static void slave_set_expired(struct slave *) OVS_REQUIRES(mutex); static void slave_get_actor(struct slave *, struct lacp_info *actor) OVS_REQUIRES(mutex); static void slave_get_priority(struct slave *, struct lacp_info *priority) OVS_REQUIRES(mutex); static bool slave_may_tx(const struct slave *) OVS_REQUIRES(mutex); static struct slave *slave_lookup(const struct lacp *, const void *slave) OVS_REQUIRES(mutex); static bool info_tx_equal(struct lacp_info *, struct lacp_info *) OVS_REQUIRES(mutex); static unixctl_cb_func lacp_unixctl_show; /* Populates 'pdu' with a LACP PDU comprised of 'actor' and 'partner'. */ static void compose_lacp_pdu(const struct lacp_info *actor, const struct lacp_info *partner, struct lacp_pdu *pdu) { memset(pdu, 0, sizeof *pdu); pdu->subtype = 1; pdu->version = 1; pdu->actor_type = 1; pdu->actor_len = 20; pdu->actor = *actor; pdu->partner_type = 2; pdu->partner_len = 20; pdu->partner = *partner; pdu->collector_type = 3; pdu->collector_len = 16; pdu->collector_delay = htons(0); } /* Parses 'b' which represents a packet containing a LACP PDU. This function * returns NULL if 'b' is malformed, or does not represent a LACP PDU format * supported by OVS. Otherwise, it returns a pointer to the lacp_pdu contained * within 'b'. */ static const struct lacp_pdu * parse_lacp_packet(const struct ofpbuf *b) { const struct lacp_pdu *pdu; pdu = ofpbuf_at(b, (uint8_t *)b->l3 - (uint8_t *)b->data, LACP_PDU_LEN); if (pdu && pdu->subtype == 1 && pdu->actor_type == 1 && pdu->actor_len == 20 && pdu->partner_type == 2 && pdu->partner_len == 20) { return pdu; } else { return NULL; } } /* LACP Protocol Implementation. */ /* Initializes the lacp module. */ void lacp_init(void) { unixctl_command_register("lacp/show", "[port]", 0, 1, lacp_unixctl_show, NULL); } /* Creates a LACP object. */ struct lacp * lacp_create(void) OVS_EXCLUDED(mutex) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; struct lacp *lacp; if (ovsthread_once_start(&once)) { ovs_mutex_init_recursive(&mutex); ovsthread_once_done(&once); } lacp = xzalloc(sizeof *lacp); hmap_init(&lacp->slaves); atomic_init(&lacp->ref_cnt, 1); ovs_mutex_lock(&mutex); list_push_back(all_lacps, &lacp->node); ovs_mutex_unlock(&mutex); return lacp; } struct lacp * lacp_ref(const struct lacp *lacp_) { struct lacp *lacp = CONST_CAST(struct lacp *, lacp_); if (lacp) { int orig; atomic_add(&lacp->ref_cnt, 1, &orig); ovs_assert(orig > 0); } return lacp; } /* Destroys 'lacp' and its slaves. Does nothing if 'lacp' is NULL. */ void lacp_unref(struct lacp *lacp) OVS_EXCLUDED(mutex) { int orig; if (!lacp) { return; } atomic_sub(&lacp->ref_cnt, 1, &orig); ovs_assert(orig > 0); if (orig == 1) { struct slave *slave, *next; ovs_mutex_lock(&mutex); HMAP_FOR_EACH_SAFE (slave, next, node, &lacp->slaves) { slave_destroy(slave); } hmap_destroy(&lacp->slaves); list_remove(&lacp->node); free(lacp->name); free(lacp); ovs_mutex_unlock(&mutex); } } /* Configures 'lacp' with settings from 's'. */ void lacp_configure(struct lacp *lacp, const struct lacp_settings *s) OVS_EXCLUDED(mutex) { ovs_assert(!eth_addr_is_zero(s->id)); ovs_mutex_lock(&mutex); if (!lacp->name || strcmp(s->name, lacp->name)) { free(lacp->name); lacp->name = xstrdup(s->name); } if (!eth_addr_equals(lacp->sys_id, s->id) || lacp->sys_priority != s->priority) { memcpy(lacp->sys_id, s->id, ETH_ADDR_LEN); lacp->sys_priority = s->priority; lacp->update = true; } lacp->active = s->active; lacp->fast = s->fast; ovs_mutex_unlock(&mutex); } /* Returns true if 'lacp' is configured in active mode, false if 'lacp' is * configured for passive mode. */ bool lacp_is_active(const struct lacp *lacp) OVS_EXCLUDED(mutex) { bool ret; ovs_mutex_lock(&mutex); ret = lacp->active; ovs_mutex_unlock(&mutex); return ret; } /* Processes 'packet' which was received on 'slave_'. This function should be * called on all packets received on 'slave_' with Ethernet Type ETH_TYPE_LACP. */ void lacp_process_packet(struct lacp *lacp, const void *slave_, const struct ofpbuf *packet) OVS_EXCLUDED(mutex) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); const struct lacp_pdu *pdu; long long int tx_rate; struct slave *slave; ovs_mutex_lock(&mutex); slave = slave_lookup(lacp, slave_); if (!slave) { goto out; } pdu = parse_lacp_packet(packet); if (!pdu) { VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU.", lacp->name); goto out; } slave->status = LACP_CURRENT; tx_rate = lacp->fast ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX; timer_set_duration(&slave->rx, LACP_RX_MULTIPLIER * tx_rate); slave->ntt_actor = pdu->partner; /* Update our information about our partner if it's out of date. This may * cause priorities to change so re-calculate attached status of all * slaves. */ if (memcmp(&slave->partner, &pdu->actor, sizeof pdu->actor)) { lacp->update = true; slave->partner = pdu->actor; } out: ovs_mutex_unlock(&mutex); } /* Returns the lacp_status of the given 'lacp' object (which may be NULL). */ enum lacp_status lacp_status(const struct lacp *lacp) OVS_EXCLUDED(mutex) { enum lacp_status ret; ovs_mutex_lock(&mutex); if (!lacp) { ret = LACP_DISABLED; } else if (lacp->negotiated) { ret = LACP_NEGOTIATED; } else { ret = LACP_CONFIGURED; } ovs_mutex_unlock(&mutex); return ret; } /* Registers 'slave_' as subordinate to 'lacp'. This should be called at least * once per slave in a LACP managed bond. Should also be called whenever a * slave's settings change. */ void lacp_slave_register(struct lacp *lacp, void *slave_, const struct lacp_slave_settings *s) OVS_EXCLUDED(mutex) { struct slave *slave; ovs_mutex_lock(&mutex); slave = slave_lookup(lacp, slave_); if (!slave) { slave = xzalloc(sizeof *slave); slave->lacp = lacp; slave->aux = slave_; hmap_insert(&lacp->slaves, &slave->node, hash_pointer(slave_, 0)); slave_set_defaulted(slave); if (!lacp->key_slave) { lacp->key_slave = slave; } } if (!slave->name || strcmp(s->name, slave->name)) { free(slave->name); slave->name = xstrdup(s->name); } if (slave->port_id != s->id || slave->port_priority != s->priority || slave->key != s->key) { slave->port_id = s->id; slave->port_priority = s->priority; slave->key = s->key; lacp->update = true; if (lacp->active || lacp->negotiated) { slave_set_expired(slave); } } ovs_mutex_unlock(&mutex); } /* Unregisters 'slave_' with 'lacp'. */ void lacp_slave_unregister(struct lacp *lacp, const void *slave_) OVS_EXCLUDED(mutex) { struct slave *slave; ovs_mutex_lock(&mutex); slave = slave_lookup(lacp, slave_); if (slave) { slave_destroy(slave); lacp->update = true; } ovs_mutex_unlock(&mutex); } /* This function should be called whenever the carrier status of 'slave_' has * changed. If 'lacp' is null, this function has no effect.*/ void lacp_slave_carrier_changed(const struct lacp *lacp, const void *slave_) OVS_EXCLUDED(mutex) { struct slave *slave; if (!lacp) { return; } ovs_mutex_lock(&mutex); slave = slave_lookup(lacp, slave_); if (!slave) { goto out; } if (slave->status == LACP_CURRENT || slave->lacp->active) { slave_set_expired(slave); } out: ovs_mutex_unlock(&mutex); } static bool slave_may_enable__(struct slave *slave) OVS_REQUIRES(mutex) { /* The slave may be enabled if it's attached to an aggregator and its * partner is synchronized.*/ return slave->attached && (slave->partner.state & LACP_STATE_SYNC); } /* This function should be called before enabling 'slave_' to send or receive * traffic. If it returns false, 'slave_' should not enabled. As a * convenience, returns true if 'lacp' is NULL. */ bool lacp_slave_may_enable(const struct lacp *lacp, const void *slave_) OVS_EXCLUDED(mutex) { if (lacp) { struct slave *slave; bool ret; ovs_mutex_lock(&mutex); slave = slave_lookup(lacp, slave_); ret = slave ? slave_may_enable__(slave) : false; ovs_mutex_unlock(&mutex); return ret; } else { return true; } } /* Returns true if partner information on 'slave_' is up to date. 'slave_' * not being current, generally indicates a connectivity problem, or a * misconfigured (or broken) partner. */ bool lacp_slave_is_current(const struct lacp *lacp, const void *slave_) OVS_EXCLUDED(mutex) { struct slave *slave; bool ret; ovs_mutex_lock(&mutex); slave = slave_lookup(lacp, slave_); ret = slave ? slave->status != LACP_DEFAULTED : false; ovs_mutex_unlock(&mutex); return ret; } /* This function should be called periodically to update 'lacp'. */ void lacp_run(struct lacp *lacp, lacp_send_pdu *send_pdu) OVS_EXCLUDED(mutex) { struct slave *slave; ovs_mutex_lock(&mutex); HMAP_FOR_EACH (slave, node, &lacp->slaves) { if (timer_expired(&slave->rx)) { if (slave->status == LACP_CURRENT) { slave_set_expired(slave); } else if (slave->status == LACP_EXPIRED) { slave_set_defaulted(slave); } } } if (lacp->update) { lacp_update_attached(lacp); } HMAP_FOR_EACH (slave, node, &lacp->slaves) { struct lacp_info actor; if (!slave_may_tx(slave)) { continue; } slave_get_actor(slave, &actor); if (timer_expired(&slave->tx) || !info_tx_equal(&actor, &slave->ntt_actor)) { long long int duration; struct lacp_pdu pdu; slave->ntt_actor = actor; compose_lacp_pdu(&actor, &slave->partner, &pdu); send_pdu(slave->aux, &pdu, sizeof pdu); duration = (slave->partner.state & LACP_STATE_TIME ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX); timer_set_duration(&slave->tx, duration); } } ovs_mutex_unlock(&mutex); } /* Causes poll_block() to wake up when lacp_run() needs to be called again. */ void lacp_wait(struct lacp *lacp) OVS_EXCLUDED(mutex) { struct slave *slave; ovs_mutex_lock(&mutex); HMAP_FOR_EACH (slave, node, &lacp->slaves) { if (slave_may_tx(slave)) { timer_wait(&slave->tx); } if (slave->status != LACP_DEFAULTED) { timer_wait(&slave->rx); } } ovs_mutex_unlock(&mutex); } /* Static Helpers. */ /* Updates the attached status of all slaves controlled by 'lacp' and sets its * negotiated parameter to true if any slaves are attachable. */ static void lacp_update_attached(struct lacp *lacp) OVS_REQUIRES(mutex) { struct slave *lead, *slave; struct lacp_info lead_pri; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10); lacp->update = false; lead = NULL; HMAP_FOR_EACH (slave, node, &lacp->slaves) { struct lacp_info pri; slave->attached = false; /* XXX: In the future allow users to configure the expected system ID. * For now just special case loopback. */ if (eth_addr_equals(slave->partner.sys_id, slave->lacp->sys_id)) { VLOG_WARN_RL(&rl, "slave %s: Loopback detected. Slave is " "connected to its own bond", slave->name); continue; } if (slave->status == LACP_DEFAULTED) { continue; } slave->attached = true; slave_get_priority(slave, &pri); if (!lead || memcmp(&pri, &lead_pri, sizeof pri) < 0) { lead = slave; lead_pri = pri; } } lacp->negotiated = lead != NULL; if (lead) { HMAP_FOR_EACH (slave, node, &lacp->slaves) { if (lead->partner.key != slave->partner.key || !eth_addr_equals(lead->partner.sys_id, slave->partner.sys_id)) { slave->attached = false; } } } } static void slave_destroy(struct slave *slave) OVS_REQUIRES(mutex) { if (slave) { struct lacp *lacp = slave->lacp; lacp->update = true; hmap_remove(&lacp->slaves, &slave->node); if (lacp->key_slave == slave) { struct hmap_node *slave_node = hmap_first(&lacp->slaves); if (slave_node) { lacp->key_slave = CONTAINER_OF(slave_node, struct slave, node); } else { lacp->key_slave = NULL; } } free(slave->name); free(slave); } } static void slave_set_defaulted(struct slave *slave) OVS_REQUIRES(mutex) { memset(&slave->partner, 0, sizeof slave->partner); slave->lacp->update = true; slave->status = LACP_DEFAULTED; } static void slave_set_expired(struct slave *slave) OVS_REQUIRES(mutex) { slave->status = LACP_EXPIRED; slave->partner.state |= LACP_STATE_TIME; slave->partner.state &= ~LACP_STATE_SYNC; timer_set_duration(&slave->rx, LACP_RX_MULTIPLIER * LACP_FAST_TIME_TX); } static void slave_get_actor(struct slave *slave, struct lacp_info *actor) OVS_REQUIRES(mutex) { struct lacp *lacp = slave->lacp; uint16_t key; uint8_t state = 0; if (lacp->active) { state |= LACP_STATE_ACT; } if (lacp->fast) { state |= LACP_STATE_TIME; } if (slave->attached) { state |= LACP_STATE_SYNC; } if (slave->status == LACP_DEFAULTED) { state |= LACP_STATE_DEF; } if (slave->status == LACP_EXPIRED) { state |= LACP_STATE_EXP; } if (hmap_count(&lacp->slaves) > 1) { state |= LACP_STATE_AGG; } if (slave->attached || !lacp->negotiated) { state |= LACP_STATE_COL | LACP_STATE_DIST; } key = lacp->key_slave->key; if (!key) { key = lacp->key_slave->port_id; } actor->state = state; actor->key = htons(key); actor->port_priority = htons(slave->port_priority); actor->port_id = htons(slave->port_id); actor->sys_priority = htons(lacp->sys_priority); memcpy(&actor->sys_id, lacp->sys_id, ETH_ADDR_LEN); } /* Given 'slave', populates 'priority' with data representing its LACP link * priority. If two priority objects populated by this function are compared * using memcmp, the higher priority link will be less than the lower priority * link. */ static void slave_get_priority(struct slave *slave, struct lacp_info *priority) OVS_REQUIRES(mutex) { uint16_t partner_priority, actor_priority; /* Choose the lacp_info of the higher priority system by comparing their * system priorities and mac addresses. */ actor_priority = slave->lacp->sys_priority; partner_priority = ntohs(slave->partner.sys_priority); if (actor_priority < partner_priority) { slave_get_actor(slave, priority); } else if (partner_priority < actor_priority) { *priority = slave->partner; } else if (eth_addr_compare_3way(slave->lacp->sys_id, slave->partner.sys_id) < 0) { slave_get_actor(slave, priority); } else { *priority = slave->partner; } /* Key and state are not used in priority comparisons. */ priority->key = 0; priority->state = 0; } static bool slave_may_tx(const struct slave *slave) OVS_REQUIRES(mutex) { return slave->lacp->active || slave->status != LACP_DEFAULTED; } static struct slave * slave_lookup(const struct lacp *lacp, const void *slave_) OVS_REQUIRES(mutex) { struct slave *slave; HMAP_FOR_EACH_IN_BUCKET (slave, node, hash_pointer(slave_, 0), &lacp->slaves) { if (slave->aux == slave_) { return slave; } } return NULL; } /* Two lacp_info structures are tx_equal if and only if they do not differ in * ways which would require a lacp_pdu transmission. */ static bool info_tx_equal(struct lacp_info *a, struct lacp_info *b) { /* LACP specification dictates that we transmit whenever the actor and * remote_actor differ in the following fields: Port, Port Priority, * System, System Priority, Aggregation Key, Activity State, Timeout State, * Sync State, and Aggregation State. The state flags are most likely to * change so are checked first. */ return !((a->state ^ b->state) & (LACP_STATE_ACT | LACP_STATE_TIME | LACP_STATE_SYNC | LACP_STATE_AGG)) && a->port_id == b->port_id && a->port_priority == b->port_priority && a->key == b->key && a->sys_priority == b->sys_priority && eth_addr_equals(a->sys_id, b->sys_id); } static struct lacp * lacp_find(const char *name) OVS_REQUIRES(mutex) { struct lacp *lacp; LIST_FOR_EACH (lacp, node, all_lacps) { if (!strcmp(lacp->name, name)) { return lacp; } } return NULL; } static void ds_put_lacp_state(struct ds *ds, uint8_t state) { if (state & LACP_STATE_ACT) { ds_put_cstr(ds, " activity"); } if (state & LACP_STATE_TIME) { ds_put_cstr(ds, " timeout"); } if (state & LACP_STATE_AGG) { ds_put_cstr(ds, " aggregation"); } if (state & LACP_STATE_SYNC) { ds_put_cstr(ds, " synchronized"); } if (state & LACP_STATE_COL) { ds_put_cstr(ds, " collecting"); } if (state & LACP_STATE_DIST) { ds_put_cstr(ds, " distributing"); } if (state & LACP_STATE_DEF) { ds_put_cstr(ds, " defaulted"); } if (state & LACP_STATE_EXP) { ds_put_cstr(ds, " expired"); } } static void lacp_print_details(struct ds *ds, struct lacp *lacp) OVS_REQUIRES(mutex) { struct shash slave_shash = SHASH_INITIALIZER(&slave_shash); const struct shash_node **sorted_slaves = NULL; struct slave *slave; int i; ds_put_format(ds, "---- %s ----\n", lacp->name); ds_put_format(ds, "\tstatus: %s", lacp->active ? "active" : "passive"); if (lacp->negotiated) { ds_put_cstr(ds, " negotiated"); } ds_put_cstr(ds, "\n"); ds_put_format(ds, "\tsys_id: " ETH_ADDR_FMT "\n", ETH_ADDR_ARGS(lacp->sys_id)); ds_put_format(ds, "\tsys_priority: %u\n", lacp->sys_priority); ds_put_cstr(ds, "\taggregation key: "); if (lacp->key_slave) { ds_put_format(ds, "%u", lacp->key_slave->key ? lacp->key_slave->key : lacp->key_slave->port_id); } else { ds_put_cstr(ds, "none"); } ds_put_cstr(ds, "\n"); ds_put_cstr(ds, "\tlacp_time: "); if (lacp->fast) { ds_put_cstr(ds, "fast\n"); } else { ds_put_cstr(ds, "slow\n"); } HMAP_FOR_EACH (slave, node, &lacp->slaves) { shash_add(&slave_shash, slave->name, slave); } sorted_slaves = shash_sort(&slave_shash); for (i = 0; i < shash_count(&slave_shash); i++) { char *status; struct lacp_info actor; slave = sorted_slaves[i]->data; slave_get_actor(slave, &actor); switch (slave->status) { case LACP_CURRENT: status = "current"; break; case LACP_EXPIRED: status = "expired"; break; case LACP_DEFAULTED: status = "defaulted"; break; default: NOT_REACHED(); } ds_put_format(ds, "\nslave: %s: %s %s\n", slave->name, status, slave->attached ? "attached" : "detached"); ds_put_format(ds, "\tport_id: %u\n", slave->port_id); ds_put_format(ds, "\tport_priority: %u\n", slave->port_priority); ds_put_format(ds, "\tmay_enable: %s\n", (slave_may_enable__(slave) ? "true" : "false")); ds_put_format(ds, "\n\tactor sys_id: " ETH_ADDR_FMT "\n", ETH_ADDR_ARGS(actor.sys_id)); ds_put_format(ds, "\tactor sys_priority: %u\n", ntohs(actor.sys_priority)); ds_put_format(ds, "\tactor port_id: %u\n", ntohs(actor.port_id)); ds_put_format(ds, "\tactor port_priority: %u\n", ntohs(actor.port_priority)); ds_put_format(ds, "\tactor key: %u\n", ntohs(actor.key)); ds_put_cstr(ds, "\tactor state:"); ds_put_lacp_state(ds, actor.state); ds_put_cstr(ds, "\n\n"); ds_put_format(ds, "\tpartner sys_id: " ETH_ADDR_FMT "\n", ETH_ADDR_ARGS(slave->partner.sys_id)); ds_put_format(ds, "\tpartner sys_priority: %u\n", ntohs(slave->partner.sys_priority)); ds_put_format(ds, "\tpartner port_id: %u\n", ntohs(slave->partner.port_id)); ds_put_format(ds, "\tpartner port_priority: %u\n", ntohs(slave->partner.port_priority)); ds_put_format(ds, "\tpartner key: %u\n", ntohs(slave->partner.key)); ds_put_cstr(ds, "\tpartner state:"); ds_put_lacp_state(ds, slave->partner.state); ds_put_cstr(ds, "\n"); } shash_destroy(&slave_shash); free(sorted_slaves); } static void lacp_unixctl_show(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) OVS_EXCLUDED(mutex) { struct ds ds = DS_EMPTY_INITIALIZER; struct lacp *lacp; ovs_mutex_lock(&mutex); if (argc > 1) { lacp = lacp_find(argv[1]); if (!lacp) { unixctl_command_reply_error(conn, "no such lacp object"); goto out; } lacp_print_details(&ds, lacp); } else { LIST_FOR_EACH (lacp, node, all_lacps) { lacp_print_details(&ds, lacp); } } unixctl_command_reply(conn, ds_cstr(&ds)); ds_destroy(&ds); out: ovs_mutex_unlock(&mutex); } openvswitch-2.0.1+git20140120/lib/lacp.h000066400000000000000000000051301226605124000173070ustar00rootroot00000000000000/* * Copyright (c) 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LACP_H #define LACP_H 1 #include #include #include "packets.h" /* LACP Protocol Implementation. */ enum lacp_status { LACP_NEGOTIATED, /* Successful LACP negotations. */ LACP_CONFIGURED, /* LACP is enabled but not negotiated. */ LACP_DISABLED /* LACP is not enabled. */ }; struct lacp_settings { char *name; /* Name (for debugging). */ uint8_t id[ETH_ADDR_LEN]; /* System ID. Must be nonzero. */ uint16_t priority; /* System priority. */ bool active; /* Active or passive mode? */ bool fast; /* Fast or slow probe interval. */ }; void lacp_init(void); struct lacp *lacp_create(void); void lacp_unref(struct lacp *); struct lacp *lacp_ref(const struct lacp *); void lacp_configure(struct lacp *, const struct lacp_settings *); bool lacp_is_active(const struct lacp *); void lacp_process_packet(struct lacp *, const void *slave, const struct ofpbuf *packet); enum lacp_status lacp_status(const struct lacp *); struct lacp_slave_settings { char *name; /* Name (for debugging). */ uint16_t id; /* Port ID. */ uint16_t priority; /* Port priority. */ uint16_t key; /* Aggregation key. */ }; void lacp_slave_register(struct lacp *, void *slave_, const struct lacp_slave_settings *); void lacp_slave_unregister(struct lacp *, const void *slave); void lacp_slave_carrier_changed(const struct lacp *, const void *slave); bool lacp_slave_may_enable(const struct lacp *, const void *slave); bool lacp_slave_is_current(const struct lacp *, const void *slave_); /* Callback function for lacp_run() for sending a LACP PDU. */ typedef void lacp_send_pdu(void *slave, const void *pdu, size_t pdu_size); void lacp_run(struct lacp *, lacp_send_pdu *); void lacp_wait(struct lacp *); #endif /* lacp.h */ openvswitch-2.0.1+git20140120/lib/latch.c000066400000000000000000000041431226605124000174610ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "latch.h" #include #include #include #include "poll-loop.h" #include "socket-util.h" /* Initializes 'latch' as initially unset. */ void latch_init(struct latch *latch) { xpipe_nonblocking(latch->fds); } /* Destroys 'latch'. */ void latch_destroy(struct latch *latch) { close(latch->fds[0]); close(latch->fds[1]); } /* Resets 'latch' to the unset state. Returns true if 'latch' was previously * set, false otherwise. */ bool latch_poll(struct latch *latch) { char buffer[_POSIX_PIPE_BUF]; return read(latch->fds[0], buffer, sizeof buffer) > 0; } /* Sets 'latch'. * * Calls are not additive: a single latch_poll() clears out any number of * latch_set(). */ void latch_set(struct latch *latch) { ignore(write(latch->fds[1], "", 1)); } /* Returns true if 'latch' is set, false otherwise. Does not reset 'latch' * to the unset state. */ bool latch_is_set(const struct latch *latch) { struct pollfd pfd; int retval; pfd.fd = latch->fds[0]; pfd.events = POLLIN; do { retval = poll(&pfd, 1, 0); } while (retval < 0 && errno == EINTR); return pfd.revents & POLLIN; } /* Causes the next poll_block() to wake up when 'latch' is set. * * ('where' is used in debug logging. Commonly one would use latch_wait() to * automatically provide the caller's source file and line number for * 'where'.) */ void latch_wait_at(const struct latch *latch, const char *where) { poll_fd_wait_at(latch->fds[0], POLLIN, where); } openvswitch-2.0.1+git20140120/lib/latch.h000066400000000000000000000023111226605124000174610ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LATCH_H #define LATCH_H 1 /* A thread-safe, signal-safe, pollable doorbell. * * This is a thin wrapper around a pipe that allows threads to notify each * other that an event has occurred in a signal-safe way */ #include #include "util.h" struct latch { int fds[2]; }; void latch_init(struct latch *); void latch_destroy(struct latch *); bool latch_poll(struct latch *); void latch_set(struct latch *); bool latch_is_set(const struct latch *); void latch_wait_at(const struct latch *, const char *where); #define latch_wait(latch) latch_wait_at(latch, SOURCE_LOCATOR) #endif /* latch.h */ openvswitch-2.0.1+git20140120/lib/learn.c000066400000000000000000000540621226605124000174740ustar00rootroot00000000000000/* * Copyright (c) 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "learn.h" #include "byte-order.h" #include "dynamic-string.h" #include "match.h" #include "meta-flow.h" #include "nx-match.h" #include "ofp-actions.h" #include "ofp-errors.h" #include "ofp-util.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "unaligned.h" static ovs_be16 get_be16(const void **pp) { const ovs_be16 *p = *pp; ovs_be16 value = *p; *pp = p + 1; return value; } static ovs_be32 get_be32(const void **pp) { const ovs_be32 *p = *pp; ovs_be32 value = get_unaligned_be32(p); *pp = p + 1; return value; } static void get_subfield(int n_bits, const void **p, struct mf_subfield *sf) { sf->field = mf_from_nxm_header(ntohl(get_be32(p))); sf->ofs = ntohs(get_be16(p)); sf->n_bits = n_bits; } static unsigned int learn_min_len(uint16_t header) { int n_bits = header & NX_LEARN_N_BITS_MASK; int src_type = header & NX_LEARN_SRC_MASK; int dst_type = header & NX_LEARN_DST_MASK; unsigned int min_len; min_len = 0; if (src_type == NX_LEARN_SRC_FIELD) { min_len += sizeof(ovs_be32); /* src_field */ min_len += sizeof(ovs_be16); /* src_ofs */ } else { min_len += DIV_ROUND_UP(n_bits, 16); } if (dst_type == NX_LEARN_DST_MATCH || dst_type == NX_LEARN_DST_LOAD) { min_len += sizeof(ovs_be32); /* dst_field */ min_len += sizeof(ovs_be16); /* dst_ofs */ } return min_len; } /* Converts 'nal' into a "struct ofpact_learn" and appends that struct to * 'ofpacts'. Returns 0 if successful, otherwise an OFPERR_*. */ enum ofperr learn_from_openflow(const struct nx_action_learn *nal, struct ofpbuf *ofpacts) { struct ofpact_learn *learn; const void *p, *end; if (nal->pad) { return OFPERR_OFPBAC_BAD_ARGUMENT; } learn = ofpact_put_LEARN(ofpacts); learn->idle_timeout = ntohs(nal->idle_timeout); learn->hard_timeout = ntohs(nal->hard_timeout); learn->priority = ntohs(nal->priority); learn->cookie = ntohll(nal->cookie); learn->table_id = nal->table_id; learn->fin_idle_timeout = ntohs(nal->fin_idle_timeout); learn->fin_hard_timeout = ntohs(nal->fin_hard_timeout); /* We only support "send-flow-removed" for now. */ switch (ntohs(nal->flags)) { case 0: learn->flags = 0; break; case OFPFF_SEND_FLOW_REM: learn->flags = OFPUTIL_FF_SEND_FLOW_REM; break; default: return OFPERR_OFPBAC_BAD_ARGUMENT; } if (learn->table_id == 0xff) { return OFPERR_OFPBAC_BAD_ARGUMENT; } end = (char *) nal + ntohs(nal->len); for (p = nal + 1; p != end; ) { struct ofpact_learn_spec *spec; uint16_t header = ntohs(get_be16(&p)); if (!header) { break; } spec = ofpbuf_put_zeros(ofpacts, sizeof *spec); learn = ofpacts->l2; learn->n_specs++; spec->src_type = header & NX_LEARN_SRC_MASK; spec->dst_type = header & NX_LEARN_DST_MASK; spec->n_bits = header & NX_LEARN_N_BITS_MASK; /* Check for valid src and dst type combination. */ if (spec->dst_type == NX_LEARN_DST_MATCH || spec->dst_type == NX_LEARN_DST_LOAD || (spec->dst_type == NX_LEARN_DST_OUTPUT && spec->src_type == NX_LEARN_SRC_FIELD)) { /* OK. */ } else { return OFPERR_OFPBAC_BAD_ARGUMENT; } /* Check that the arguments don't overrun the end of the action. */ if ((char *) end - (char *) p < learn_min_len(header)) { return OFPERR_OFPBAC_BAD_LEN; } /* Get the source. */ if (spec->src_type == NX_LEARN_SRC_FIELD) { get_subfield(spec->n_bits, &p, &spec->src); } else { int p_bytes = 2 * DIV_ROUND_UP(spec->n_bits, 16); bitwise_copy(p, p_bytes, 0, &spec->src_imm, sizeof spec->src_imm, 0, spec->n_bits); p = (const uint8_t *) p + p_bytes; } /* Get the destination. */ if (spec->dst_type == NX_LEARN_DST_MATCH || spec->dst_type == NX_LEARN_DST_LOAD) { get_subfield(spec->n_bits, &p, &spec->dst); } } ofpact_update_len(ofpacts, &learn->ofpact); if (!is_all_zeros(p, (char *) end - (char *) p)) { return OFPERR_OFPBAC_BAD_ARGUMENT; } return 0; } /* Checks that 'learn' is a valid action on 'flow'. Returns 0 if it is valid, * otherwise an OFPERR_*. */ enum ofperr learn_check(const struct ofpact_learn *learn, const struct flow *flow) { const struct ofpact_learn_spec *spec; struct match match; match_init_catchall(&match); for (spec = learn->specs; spec < &learn->specs[learn->n_specs]; spec++) { enum ofperr error; /* Check the source. */ if (spec->src_type == NX_LEARN_SRC_FIELD) { error = mf_check_src(&spec->src, flow); if (error) { return error; } } /* Check the destination. */ switch (spec->dst_type) { case NX_LEARN_DST_MATCH: error = mf_check_src(&spec->dst, &match.flow); if (error) { return error; } mf_write_subfield(&spec->dst, &spec->src_imm, &match); break; case NX_LEARN_DST_LOAD: error = mf_check_dst(&spec->dst, &match.flow); if (error) { return error; } break; case NX_LEARN_DST_OUTPUT: /* Nothing to do. */ break; } } return 0; } static void put_be16(struct ofpbuf *b, ovs_be16 x) { ofpbuf_put(b, &x, sizeof x); } static void put_be32(struct ofpbuf *b, ovs_be32 x) { ofpbuf_put(b, &x, sizeof x); } static void put_u16(struct ofpbuf *b, uint16_t x) { put_be16(b, htons(x)); } static void put_u32(struct ofpbuf *b, uint32_t x) { put_be32(b, htonl(x)); } /* Converts 'learn' into a "struct nx_action_learn" and appends that action to * 'ofpacts'. */ void learn_to_nxast(const struct ofpact_learn *learn, struct ofpbuf *openflow) { const struct ofpact_learn_spec *spec; struct nx_action_learn *nal; size_t start_ofs; start_ofs = openflow->size; nal = ofputil_put_NXAST_LEARN(openflow); nal->idle_timeout = htons(learn->idle_timeout); nal->hard_timeout = htons(learn->hard_timeout); nal->fin_idle_timeout = htons(learn->fin_idle_timeout); nal->fin_hard_timeout = htons(learn->fin_hard_timeout); nal->priority = htons(learn->priority); nal->cookie = htonll(learn->cookie); nal->flags = htons(learn->flags); nal->table_id = learn->table_id; for (spec = learn->specs; spec < &learn->specs[learn->n_specs]; spec++) { put_u16(openflow, spec->n_bits | spec->dst_type | spec->src_type); if (spec->src_type == NX_LEARN_SRC_FIELD) { put_u32(openflow, spec->src.field->nxm_header); put_u16(openflow, spec->src.ofs); } else { size_t n_dst_bytes = 2 * DIV_ROUND_UP(spec->n_bits, 16); uint8_t *bits = ofpbuf_put_zeros(openflow, n_dst_bytes); bitwise_copy(&spec->src_imm, sizeof spec->src_imm, 0, bits, n_dst_bytes, 0, spec->n_bits); } if (spec->dst_type == NX_LEARN_DST_MATCH || spec->dst_type == NX_LEARN_DST_LOAD) { put_u32(openflow, spec->dst.field->nxm_header); put_u16(openflow, spec->dst.ofs); } } if ((openflow->size - start_ofs) % 8) { ofpbuf_put_zeros(openflow, 8 - (openflow->size - start_ofs) % 8); } nal = ofpbuf_at_assert(openflow, start_ofs, sizeof *nal); nal->len = htons(openflow->size - start_ofs); } /* Composes 'fm' so that executing it will implement 'learn' given that the * packet being processed has 'flow' as its flow. * * Uses 'ofpacts' to store the flow mod's actions. The caller must initialize * 'ofpacts' and retains ownership of it. 'fm->ofpacts' will point into the * 'ofpacts' buffer. * * The caller has to actually execute 'fm'. */ void learn_execute(const struct ofpact_learn *learn, const struct flow *flow, struct ofputil_flow_mod *fm, struct ofpbuf *ofpacts) { const struct ofpact_learn_spec *spec; match_init_catchall(&fm->match); fm->priority = learn->priority; fm->cookie = htonll(0); fm->cookie_mask = htonll(0); fm->new_cookie = htonll(learn->cookie); fm->modify_cookie = fm->new_cookie != htonll(UINT64_MAX); fm->table_id = learn->table_id; fm->command = OFPFC_MODIFY_STRICT; fm->idle_timeout = learn->idle_timeout; fm->hard_timeout = learn->hard_timeout; fm->buffer_id = UINT32_MAX; fm->out_port = OFPP_NONE; fm->flags = learn->flags; fm->ofpacts = NULL; fm->ofpacts_len = 0; if (learn->fin_idle_timeout || learn->fin_hard_timeout) { struct ofpact_fin_timeout *oft; oft = ofpact_put_FIN_TIMEOUT(ofpacts); oft->fin_idle_timeout = learn->fin_idle_timeout; oft->fin_hard_timeout = learn->fin_hard_timeout; } for (spec = learn->specs; spec < &learn->specs[learn->n_specs]; spec++) { union mf_subvalue value; int chunk, ofs; if (spec->src_type == NX_LEARN_SRC_FIELD) { mf_read_subfield(&spec->src, flow, &value); } else { value = spec->src_imm; } switch (spec->dst_type) { case NX_LEARN_DST_MATCH: mf_write_subfield(&spec->dst, &value, &fm->match); break; case NX_LEARN_DST_LOAD: for (ofs = 0; ofs < spec->n_bits; ofs += chunk) { struct ofpact_reg_load *load; chunk = MIN(spec->n_bits - ofs, 64); load = ofpact_put_REG_LOAD(ofpacts); load->dst.field = spec->dst.field; load->dst.ofs = spec->dst.ofs + ofs; load->dst.n_bits = chunk; bitwise_copy(&value, sizeof value, ofs, &load->subvalue, sizeof load->subvalue, 0, chunk); } break; case NX_LEARN_DST_OUTPUT: if (spec->n_bits <= 16 || is_all_zeros(value.u8, sizeof value - 2)) { ofp_port_t port = u16_to_ofp(ntohs(value.be16[7])); if (ofp_to_u16(port) < ofp_to_u16(OFPP_MAX) || port == OFPP_IN_PORT || port == OFPP_FLOOD || port == OFPP_LOCAL || port == OFPP_ALL) { ofpact_put_OUTPUT(ofpacts)->port = port; } } break; } } ofpact_pad(ofpacts); fm->ofpacts = ofpacts->data; fm->ofpacts_len = ofpacts->size; } /* Perform a bitwise-OR on 'wc''s fields that are relevant as sources in * the learn action 'learn'. */ void learn_mask(const struct ofpact_learn *learn, struct flow_wildcards *wc) { const struct ofpact_learn_spec *spec; union mf_subvalue value; memset(&value, 0xff, sizeof value); for (spec = learn->specs; spec < &learn->specs[learn->n_specs]; spec++) { if (spec->src_type == NX_LEARN_SRC_FIELD) { mf_write_subfield_flow(&spec->src, &value, &wc->masks); } } } /* Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT learn_parse_load_immediate(const char *s, struct ofpact_learn_spec *spec) { const char *full_s = s; const char *arrow = strstr(s, "->"); struct mf_subfield dst; union mf_subvalue imm; char *error; memset(&imm, 0, sizeof imm); if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X') && arrow) { const char *in = arrow - 1; uint8_t *out = imm.u8 + sizeof imm.u8 - 1; int n = arrow - (s + 2); int i; for (i = 0; i < n; i++) { int hexit = hexit_value(in[-i]); if (hexit < 0) { return xasprintf("%s: bad hex digit in value", full_s); } out[-(i / 2)] |= i % 2 ? hexit << 4 : hexit; } s = arrow; } else { imm.be64[1] = htonll(strtoull(s, (char **) &s, 0)); } if (strncmp(s, "->", 2)) { return xasprintf("%s: missing `->' following value", full_s); } s += 2; error = mf_parse_subfield(&dst, s); if (error) { return error; } if (!bitwise_is_all_zeros(&imm, sizeof imm, dst.n_bits, (8 * sizeof imm) - dst.n_bits)) { return xasprintf("%s: value does not fit into %u bits", full_s, dst.n_bits); } spec->n_bits = dst.n_bits; spec->src_type = NX_LEARN_SRC_IMMEDIATE; spec->src_imm = imm; spec->dst_type = NX_LEARN_DST_LOAD; spec->dst = dst; return NULL; } /* Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT learn_parse_spec(const char *orig, char *name, char *value, struct ofpact_learn_spec *spec) { if (mf_from_name(name)) { const struct mf_field *dst = mf_from_name(name); union mf_value imm; char *error; error = mf_parse_value(dst, value, &imm); if (error) { return error; } spec->n_bits = dst->n_bits; spec->src_type = NX_LEARN_SRC_IMMEDIATE; memset(&spec->src_imm, 0, sizeof spec->src_imm); memcpy(&spec->src_imm.u8[sizeof spec->src_imm - dst->n_bytes], &imm, dst->n_bytes); spec->dst_type = NX_LEARN_DST_MATCH; spec->dst.field = dst; spec->dst.ofs = 0; spec->dst.n_bits = dst->n_bits; } else if (strchr(name, '[')) { /* Parse destination and check prerequisites. */ char *error; error = mf_parse_subfield(&spec->dst, name); if (error) { return error; } /* Parse source and check prerequisites. */ if (value[0] != '\0') { error = mf_parse_subfield(&spec->src, value); if (error) { return error; } if (spec->src.n_bits != spec->dst.n_bits) { return xasprintf("%s: bit widths of %s (%u) and %s (%u) " "differ", orig, name, spec->src.n_bits, value, spec->dst.n_bits); } } else { spec->src = spec->dst; } spec->n_bits = spec->src.n_bits; spec->src_type = NX_LEARN_SRC_FIELD; spec->dst_type = NX_LEARN_DST_MATCH; } else if (!strcmp(name, "load")) { if (value[strcspn(value, "[-")] == '-') { char *error = learn_parse_load_immediate(value, spec); if (error) { return error; } } else { struct ofpact_reg_move move; char *error; error = nxm_parse_reg_move(&move, value); if (error) { return error; } spec->n_bits = move.src.n_bits; spec->src_type = NX_LEARN_SRC_FIELD; spec->src = move.src; spec->dst_type = NX_LEARN_DST_LOAD; spec->dst = move.dst; } } else if (!strcmp(name, "output")) { char *error = mf_parse_subfield(&spec->src, value); if (error) { return error; } spec->n_bits = spec->src.n_bits; spec->src_type = NX_LEARN_SRC_FIELD; spec->dst_type = NX_LEARN_DST_OUTPUT; } else { return xasprintf("%s: unknown keyword %s", orig, name); } return NULL; } /* Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT learn_parse__(char *orig, char *arg, struct ofpbuf *ofpacts) { struct ofpact_learn *learn; struct match match; char *name, *value; learn = ofpact_put_LEARN(ofpacts); learn->idle_timeout = OFP_FLOW_PERMANENT; learn->hard_timeout = OFP_FLOW_PERMANENT; learn->priority = OFP_DEFAULT_PRIORITY; learn->table_id = 1; match_init_catchall(&match); while (ofputil_parse_key_value(&arg, &name, &value)) { if (!strcmp(name, "table")) { learn->table_id = atoi(value); if (learn->table_id == 255) { return xasprintf("%s: table id 255 not valid for `learn' " "action", orig); } } else if (!strcmp(name, "priority")) { learn->priority = atoi(value); } else if (!strcmp(name, "idle_timeout")) { learn->idle_timeout = atoi(value); } else if (!strcmp(name, "hard_timeout")) { learn->hard_timeout = atoi(value); } else if (!strcmp(name, "fin_idle_timeout")) { learn->fin_idle_timeout = atoi(value); } else if (!strcmp(name, "fin_hard_timeout")) { learn->fin_hard_timeout = atoi(value); } else if (!strcmp(name, "cookie")) { learn->cookie = strtoull(value, NULL, 0); } else { struct ofpact_learn_spec *spec; char *error; spec = ofpbuf_put_zeros(ofpacts, sizeof *spec); learn = ofpacts->l2; learn->n_specs++; error = learn_parse_spec(orig, name, value, spec); if (error) { return error; } /* Update 'match' to allow for satisfying destination * prerequisites. */ if (spec->src_type == NX_LEARN_SRC_IMMEDIATE && spec->dst_type == NX_LEARN_DST_MATCH) { mf_write_subfield(&spec->dst, &spec->src_imm, &match); } } } ofpact_update_len(ofpacts, &learn->ofpact); return NULL; } /* Parses 'arg' as a set of arguments to the "learn" action and appends a * matching OFPACT_LEARN action to 'ofpacts'. ovs-ofctl(8) describes the * format parsed. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. * * If 'flow' is nonnull, then it should be the flow from a struct match that is * the matching rule for the learning action. This helps to better validate * the action's arguments. * * Modifies 'arg'. */ char * WARN_UNUSED_RESULT learn_parse(char *arg, struct ofpbuf *ofpacts) { char *orig = xstrdup(arg); char *error = learn_parse__(orig, arg, ofpacts); free(orig); return error; } /* Appends a description of 'learn' to 's', in the format that ovs-ofctl(8) * describes. */ void learn_format(const struct ofpact_learn *learn, struct ds *s) { const struct ofpact_learn_spec *spec; struct match match; match_init_catchall(&match); ds_put_format(s, "learn(table=%"PRIu8, learn->table_id); if (learn->idle_timeout != OFP_FLOW_PERMANENT) { ds_put_format(s, ",idle_timeout=%"PRIu16, learn->idle_timeout); } if (learn->hard_timeout != OFP_FLOW_PERMANENT) { ds_put_format(s, ",hard_timeout=%"PRIu16, learn->hard_timeout); } if (learn->fin_idle_timeout) { ds_put_format(s, ",fin_idle_timeout=%"PRIu16, learn->fin_idle_timeout); } if (learn->fin_hard_timeout) { ds_put_format(s, ",fin_hard_timeout=%"PRIu16, learn->fin_hard_timeout); } if (learn->priority != OFP_DEFAULT_PRIORITY) { ds_put_format(s, ",priority=%"PRIu16, learn->priority); } if (learn->flags & OFPFF_SEND_FLOW_REM) { ds_put_cstr(s, ",OFPFF_SEND_FLOW_REM"); } if (learn->cookie != 0) { ds_put_format(s, ",cookie=%#"PRIx64, learn->cookie); } for (spec = learn->specs; spec < &learn->specs[learn->n_specs]; spec++) { ds_put_char(s, ','); switch (spec->src_type | spec->dst_type) { case NX_LEARN_SRC_IMMEDIATE | NX_LEARN_DST_MATCH: if (spec->dst.ofs == 0 && spec->dst.n_bits == spec->dst.field->n_bits) { union mf_value value; memset(&value, 0, sizeof value); bitwise_copy(&spec->src_imm, sizeof spec->src_imm, 0, &value, spec->dst.field->n_bytes, 0, spec->dst.field->n_bits); ds_put_format(s, "%s=", spec->dst.field->name); mf_format(spec->dst.field, &value, NULL, s); } else { mf_format_subfield(&spec->dst, s); ds_put_char(s, '='); mf_format_subvalue(&spec->src_imm, s); } break; case NX_LEARN_SRC_FIELD | NX_LEARN_DST_MATCH: mf_format_subfield(&spec->dst, s); if (spec->src.field != spec->dst.field || spec->src.ofs != spec->dst.ofs) { ds_put_char(s, '='); mf_format_subfield(&spec->src, s); } break; case NX_LEARN_SRC_IMMEDIATE | NX_LEARN_DST_LOAD: ds_put_format(s, "load:"); mf_format_subvalue(&spec->src_imm, s); ds_put_cstr(s, "->"); mf_format_subfield(&spec->dst, s); break; case NX_LEARN_SRC_FIELD | NX_LEARN_DST_LOAD: ds_put_cstr(s, "load:"); mf_format_subfield(&spec->src, s); ds_put_cstr(s, "->"); mf_format_subfield(&spec->dst, s); break; case NX_LEARN_SRC_FIELD | NX_LEARN_DST_OUTPUT: ds_put_cstr(s, "output:"); mf_format_subfield(&spec->src, s); break; } } ds_put_char(s, ')'); } openvswitch-2.0.1+git20140120/lib/learn.h000066400000000000000000000030471226605124000174760ustar00rootroot00000000000000/* * Copyright (c) 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LEARN_H #define LEARN_H 1 #include "compiler.h" #include "ofp-errors.h" struct ds; struct flow; struct flow_wildcards; struct ofpbuf; struct ofpact_learn; struct ofputil_flow_mod; struct nx_action_learn; /* NXAST_LEARN helper functions. * * See include/openflow/nicira-ext.h for NXAST_LEARN specification. */ enum ofperr learn_from_openflow(const struct nx_action_learn *, struct ofpbuf *ofpacts); enum ofperr learn_check(const struct ofpact_learn *, const struct flow *); void learn_to_nxast(const struct ofpact_learn *, struct ofpbuf *openflow); void learn_execute(const struct ofpact_learn *, const struct flow *, struct ofputil_flow_mod *, struct ofpbuf *ofpacts); void learn_mask(const struct ofpact_learn *, struct flow_wildcards *); char *learn_parse(char *, struct ofpbuf *ofpacts) WARN_UNUSED_RESULT; void learn_format(const struct ofpact_learn *, struct ds *); #endif /* learn.h */ openvswitch-2.0.1+git20140120/lib/learning-switch.c000066400000000000000000000472451226605124000214760ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "learning-switch.h" #include #include #include #include #include #include "byte-order.h" #include "classifier.h" #include "flow.h" #include "hmap.h" #include "mac-learning.h" #include "ofpbuf.h" #include "ofp-actions.h" #include "ofp-errors.h" #include "ofp-msgs.h" #include "ofp-parse.h" #include "ofp-print.h" #include "ofp-util.h" #include "openflow/openflow.h" #include "poll-loop.h" #include "rconn.h" #include "shash.h" #include "simap.h" #include "timeval.h" #include "vconn.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(learning_switch); struct lswitch_port { struct hmap_node hmap_node; /* Hash node for port number. */ ofp_port_t port_no; /* OpenFlow port number. */ uint32_t queue_id; /* OpenFlow queue number. */ }; enum lswitch_state { S_CONNECTING, /* Waiting for connection to complete. */ S_FEATURES_REPLY, /* Waiting for features reply. */ S_SWITCHING, /* Switching flows. */ }; struct lswitch { struct rconn *rconn; enum lswitch_state state; /* If nonnegative, the switch sets up flows that expire after the given * number of seconds (or never expire, if the value is OFP_FLOW_PERMANENT). * Otherwise, the switch processes every packet. */ int max_idle; enum ofputil_protocol protocol; unsigned long long int datapath_id; struct mac_learning *ml; /* NULL to act as hub instead of switch. */ struct flow_wildcards wc; /* Wildcards to apply to flows. */ bool action_normal; /* Use OFPP_NORMAL? */ /* Queue distribution. */ uint32_t default_queue; /* Default OpenFlow queue, or UINT32_MAX. */ struct hmap queue_numbers; /* Map from port number to lswitch_port. */ struct shash queue_names; /* Map from port name to lswitch_port. */ /* Number of outgoing queued packets on the rconn. */ struct rconn_packet_counter *queued; /* If true, do not reply to any messages from the switch (for debugging * fail-open mode). */ bool mute; /* Optional "flow mod" requests to send to the switch at connection time, * to set up the flow table. */ const struct ofputil_flow_mod *default_flows; size_t n_default_flows; enum ofputil_protocol usable_protocols; }; /* The log messages here could actually be useful in debugging, so keep the * rate limit relatively high. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300); static void queue_tx(struct lswitch *, struct ofpbuf *); static void send_features_request(struct lswitch *); static void lswitch_process_packet(struct lswitch *, const struct ofpbuf *); static enum ofperr process_switch_features(struct lswitch *, struct ofp_header *); static void process_packet_in(struct lswitch *, const struct ofp_header *); static void process_echo_request(struct lswitch *, const struct ofp_header *); /* Creates and returns a new learning switch whose configuration is given by * 'cfg'. * * 'rconn' is used to send out an OpenFlow features request. */ struct lswitch * lswitch_create(struct rconn *rconn, const struct lswitch_config *cfg) { struct lswitch *sw; uint32_t ofpfw; sw = xzalloc(sizeof *sw); sw->rconn = rconn; sw->state = S_CONNECTING; sw->max_idle = cfg->max_idle; sw->datapath_id = 0; sw->ml = (cfg->mode == LSW_LEARN ? mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME) : NULL); sw->action_normal = cfg->mode == LSW_NORMAL; switch (cfg->wildcards) { case 0: ofpfw = 0; break; case UINT32_MAX: /* Try to wildcard as many fields as possible, but we cannot * wildcard all fields. We need in_port to detect moves. We need * Ethernet source and dest and VLAN VID to do L2 learning. */ ofpfw = (OFPFW10_DL_TYPE | OFPFW10_DL_VLAN_PCP | OFPFW10_NW_SRC_ALL | OFPFW10_NW_DST_ALL | OFPFW10_NW_TOS | OFPFW10_NW_PROTO | OFPFW10_TP_SRC | OFPFW10_TP_DST); break; default: ofpfw = cfg->wildcards; break; } ofputil_wildcard_from_ofpfw10(ofpfw, &sw->wc); sw->default_queue = cfg->default_queue; hmap_init(&sw->queue_numbers); shash_init(&sw->queue_names); if (cfg->port_queues) { struct simap_node *node; SIMAP_FOR_EACH (node, cfg->port_queues) { struct lswitch_port *port = xmalloc(sizeof *port); hmap_node_nullify(&port->hmap_node); port->queue_id = node->data; shash_add(&sw->queue_names, node->name, port); } } sw->default_flows = cfg->default_flows; sw->n_default_flows = cfg->n_default_flows; sw->usable_protocols = cfg->usable_protocols; sw->queued = rconn_packet_counter_create(); return sw; } static void lswitch_handshake(struct lswitch *sw) { enum ofputil_protocol protocol; send_features_request(sw); protocol = ofputil_protocol_from_ofp_version(rconn_get_version(sw->rconn)); if (sw->default_flows) { struct ofpbuf *msg = NULL; int error = 0; size_t i; /* If the initial protocol isn't good enough for default_flows, then * pick one that will work and encode messages to set up that * protocol. * * This could be improved by actually negotiating a mutually acceptable * flow format with the switch, but that would require an asynchronous * state machine. This version ought to work fine in practice. */ if (!(protocol & sw->usable_protocols)) { enum ofputil_protocol want = rightmost_1bit(sw->usable_protocols); while (!error) { msg = ofputil_encode_set_protocol(protocol, want, &protocol); if (!msg) { break; } error = rconn_send(sw->rconn, msg, NULL); } } if (protocol & sw->usable_protocols) { for (i = 0; !error && i < sw->n_default_flows; i++) { msg = ofputil_encode_flow_mod(&sw->default_flows[i], protocol); error = rconn_send(sw->rconn, msg, NULL); } if (error) { VLOG_INFO_RL(&rl, "%s: failed to queue default flows (%s)", rconn_get_name(sw->rconn), ovs_strerror(error)); } } else { VLOG_INFO_RL(&rl, "%s: failed to set usable protocol", rconn_get_name(sw->rconn)); } } sw->protocol = protocol; } bool lswitch_is_alive(const struct lswitch *sw) { return rconn_is_alive(sw->rconn); } /* Destroys 'sw'. */ void lswitch_destroy(struct lswitch *sw) { if (sw) { struct lswitch_port *node, *next; rconn_destroy(sw->rconn); HMAP_FOR_EACH_SAFE (node, next, hmap_node, &sw->queue_numbers) { hmap_remove(&sw->queue_numbers, &node->hmap_node); free(node); } shash_destroy(&sw->queue_names); mac_learning_unref(sw->ml); rconn_packet_counter_destroy(sw->queued); free(sw); } } /* Takes care of necessary 'sw' activity, except for receiving packets (which * the caller must do). */ void lswitch_run(struct lswitch *sw) { int i; if (sw->ml) { ovs_rwlock_wrlock(&sw->ml->rwlock); mac_learning_run(sw->ml); ovs_rwlock_unlock(&sw->ml->rwlock); } rconn_run(sw->rconn); if (sw->state == S_CONNECTING) { if (rconn_get_version(sw->rconn) != -1) { lswitch_handshake(sw); sw->state = S_FEATURES_REPLY; } return; } for (i = 0; i < 50; i++) { struct ofpbuf *msg; msg = rconn_recv(sw->rconn); if (!msg) { break; } if (!sw->mute) { lswitch_process_packet(sw, msg); } ofpbuf_delete(msg); } } void lswitch_wait(struct lswitch *sw) { if (sw->ml) { ovs_rwlock_rdlock(&sw->ml->rwlock); mac_learning_wait(sw->ml); ovs_rwlock_unlock(&sw->ml->rwlock); } rconn_run_wait(sw->rconn); rconn_recv_wait(sw->rconn); } /* Processes 'msg', which should be an OpenFlow received on 'rconn', according * to the learning switch state in 'sw'. The most likely result of processing * is that flow-setup and packet-out OpenFlow messages will be sent out on * 'rconn'. */ static void lswitch_process_packet(struct lswitch *sw, const struct ofpbuf *msg) { enum ofptype type; struct ofpbuf b; b = *msg; if (ofptype_pull(&type, &b)) { return; } if (sw->state == S_FEATURES_REPLY && type != OFPTYPE_ECHO_REQUEST && type != OFPTYPE_FEATURES_REPLY) { return; } switch (type) { case OFPTYPE_ECHO_REQUEST: process_echo_request(sw, msg->data); break; case OFPTYPE_FEATURES_REPLY: if (sw->state == S_FEATURES_REPLY) { if (!process_switch_features(sw, msg->data)) { sw->state = S_SWITCHING; } else { rconn_disconnect(sw->rconn); } } break; case OFPTYPE_PACKET_IN: process_packet_in(sw, msg->data); break; case OFPTYPE_FLOW_REMOVED: /* Nothing to do. */ break; case OFPTYPE_HELLO: case OFPTYPE_ERROR: case OFPTYPE_ECHO_REPLY: case OFPTYPE_FEATURES_REQUEST: case OFPTYPE_GET_CONFIG_REQUEST: case OFPTYPE_GET_CONFIG_REPLY: case OFPTYPE_SET_CONFIG: case OFPTYPE_PORT_STATUS: case OFPTYPE_PACKET_OUT: case OFPTYPE_FLOW_MOD: case OFPTYPE_PORT_MOD: case OFPTYPE_BARRIER_REQUEST: case OFPTYPE_BARRIER_REPLY: case OFPTYPE_QUEUE_GET_CONFIG_REQUEST: case OFPTYPE_QUEUE_GET_CONFIG_REPLY: case OFPTYPE_DESC_STATS_REQUEST: case OFPTYPE_DESC_STATS_REPLY: case OFPTYPE_FLOW_STATS_REQUEST: case OFPTYPE_FLOW_STATS_REPLY: case OFPTYPE_AGGREGATE_STATS_REQUEST: case OFPTYPE_AGGREGATE_STATS_REPLY: case OFPTYPE_TABLE_STATS_REQUEST: case OFPTYPE_TABLE_STATS_REPLY: case OFPTYPE_PORT_STATS_REQUEST: case OFPTYPE_PORT_STATS_REPLY: case OFPTYPE_QUEUE_STATS_REQUEST: case OFPTYPE_QUEUE_STATS_REPLY: case OFPTYPE_PORT_DESC_STATS_REQUEST: case OFPTYPE_PORT_DESC_STATS_REPLY: case OFPTYPE_ROLE_REQUEST: case OFPTYPE_ROLE_REPLY: case OFPTYPE_SET_FLOW_FORMAT: case OFPTYPE_FLOW_MOD_TABLE_ID: case OFPTYPE_SET_PACKET_IN_FORMAT: case OFPTYPE_FLOW_AGE: case OFPTYPE_SET_CONTROLLER_ID: case OFPTYPE_FLOW_MONITOR_STATS_REQUEST: case OFPTYPE_FLOW_MONITOR_STATS_REPLY: case OFPTYPE_FLOW_MONITOR_CANCEL: case OFPTYPE_FLOW_MONITOR_PAUSED: case OFPTYPE_FLOW_MONITOR_RESUMED: case OFPTYPE_GET_ASYNC_REQUEST: case OFPTYPE_GET_ASYNC_REPLY: case OFPTYPE_SET_ASYNC_CONFIG: case OFPTYPE_METER_MOD: case OFPTYPE_GROUP_STATS_REQUEST: case OFPTYPE_GROUP_STATS_REPLY: case OFPTYPE_GROUP_DESC_STATS_REQUEST: case OFPTYPE_GROUP_DESC_STATS_REPLY: case OFPTYPE_GROUP_FEATURES_STATS_REQUEST: case OFPTYPE_GROUP_FEATURES_STATS_REPLY: case OFPTYPE_METER_STATS_REQUEST: case OFPTYPE_METER_STATS_REPLY: case OFPTYPE_METER_CONFIG_STATS_REQUEST: case OFPTYPE_METER_CONFIG_STATS_REPLY: case OFPTYPE_METER_FEATURES_STATS_REQUEST: case OFPTYPE_METER_FEATURES_STATS_REPLY: case OFPTYPE_TABLE_FEATURES_STATS_REQUEST: case OFPTYPE_TABLE_FEATURES_STATS_REPLY: default: if (VLOG_IS_DBG_ENABLED()) { char *s = ofp_to_string(msg->data, msg->size, 2); VLOG_DBG_RL(&rl, "%016llx: OpenFlow packet ignored: %s", sw->datapath_id, s); free(s); } } } static void send_features_request(struct lswitch *sw) { struct ofpbuf *b; struct ofp_switch_config *osc; int ofp_version = rconn_get_version(sw->rconn); ovs_assert(ofp_version > 0 && ofp_version < 0xff); /* Send OFPT_FEATURES_REQUEST. */ b = ofpraw_alloc(OFPRAW_OFPT_FEATURES_REQUEST, ofp_version, 0); queue_tx(sw, b); /* Send OFPT_SET_CONFIG. */ b = ofpraw_alloc(OFPRAW_OFPT_SET_CONFIG, ofp_version, sizeof *osc); osc = ofpbuf_put_zeros(b, sizeof *osc); osc->miss_send_len = htons(OFP_DEFAULT_MISS_SEND_LEN); queue_tx(sw, b); } static void queue_tx(struct lswitch *sw, struct ofpbuf *b) { int retval = rconn_send_with_limit(sw->rconn, b, sw->queued, 10); if (retval && retval != ENOTCONN) { if (retval == EAGAIN) { VLOG_INFO_RL(&rl, "%016llx: %s: tx queue overflow", sw->datapath_id, rconn_get_name(sw->rconn)); } else { VLOG_WARN_RL(&rl, "%016llx: %s: send: %s", sw->datapath_id, rconn_get_name(sw->rconn), ovs_strerror(retval)); } } } static enum ofperr process_switch_features(struct lswitch *sw, struct ofp_header *oh) { struct ofputil_switch_features features; struct ofputil_phy_port port; enum ofperr error; struct ofpbuf b; error = ofputil_decode_switch_features(oh, &features, &b); if (error) { VLOG_ERR("received invalid switch feature reply (%s)", ofperr_to_string(error)); return error; } sw->datapath_id = features.datapath_id; while (!ofputil_pull_phy_port(oh->version, &b, &port)) { struct lswitch_port *lp = shash_find_data(&sw->queue_names, port.name); if (lp && hmap_node_is_null(&lp->hmap_node)) { lp->port_no = port.port_no; hmap_insert(&sw->queue_numbers, &lp->hmap_node, hash_ofp_port(lp->port_no)); } } return 0; } static ofp_port_t lswitch_choose_destination(struct lswitch *sw, const struct flow *flow) { ofp_port_t out_port; /* Learn the source MAC. */ if (sw->ml) { ovs_rwlock_wrlock(&sw->ml->rwlock); if (mac_learning_may_learn(sw->ml, flow->dl_src, 0)) { struct mac_entry *mac = mac_learning_insert(sw->ml, flow->dl_src, 0); if (mac->port.ofp_port != flow->in_port.ofp_port) { VLOG_DBG_RL(&rl, "%016llx: learned that "ETH_ADDR_FMT" is on " "port %"PRIu16, sw->datapath_id, ETH_ADDR_ARGS(flow->dl_src), flow->in_port.ofp_port); mac->port.ofp_port = flow->in_port.ofp_port; mac_learning_changed(sw->ml); } } ovs_rwlock_unlock(&sw->ml->rwlock); } /* Drop frames for reserved multicast addresses. */ if (eth_addr_is_reserved(flow->dl_dst)) { return OFPP_NONE; } out_port = OFPP_FLOOD; if (sw->ml) { struct mac_entry *mac; ovs_rwlock_rdlock(&sw->ml->rwlock); mac = mac_learning_lookup(sw->ml, flow->dl_dst, 0); if (mac) { out_port = mac->port.ofp_port; if (out_port == flow->in_port.ofp_port) { /* Don't send a packet back out its input port. */ ovs_rwlock_unlock(&sw->ml->rwlock); return OFPP_NONE; } } ovs_rwlock_unlock(&sw->ml->rwlock); } /* Check if we need to use "NORMAL" action. */ if (sw->action_normal && out_port != OFPP_FLOOD) { return OFPP_NORMAL; } return out_port; } static uint32_t get_queue_id(const struct lswitch *sw, ofp_port_t in_port) { const struct lswitch_port *port; HMAP_FOR_EACH_WITH_HASH (port, hmap_node, hash_ofp_port(in_port), &sw->queue_numbers) { if (port->port_no == in_port) { return port->queue_id; } } return sw->default_queue; } static void process_packet_in(struct lswitch *sw, const struct ofp_header *oh) { struct ofputil_packet_in pi; uint32_t queue_id; ofp_port_t out_port; uint64_t ofpacts_stub[64 / 8]; struct ofpbuf ofpacts; struct ofputil_packet_out po; enum ofperr error; struct ofpbuf pkt; struct flow flow; union flow_in_port in_port_; error = ofputil_decode_packet_in(&pi, oh); if (error) { VLOG_WARN_RL(&rl, "failed to decode packet-in: %s", ofperr_to_string(error)); return; } /* Ignore packets sent via output to OFPP_CONTROLLER. This library never * uses such an action. You never know what experiments might be going on, * though, and it seems best not to interfere with them. */ if (pi.reason != OFPR_NO_MATCH) { return; } /* Extract flow data from 'opi' into 'flow'. */ ofpbuf_use_const(&pkt, pi.packet, pi.packet_len); in_port_.ofp_port = pi.fmd.in_port; flow_extract(&pkt, 0, 0, NULL, &in_port_, &flow); flow.tunnel.tun_id = pi.fmd.tun_id; /* Choose output port. */ out_port = lswitch_choose_destination(sw, &flow); /* Make actions. */ queue_id = get_queue_id(sw, pi.fmd.in_port); ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); if (out_port == OFPP_NONE) { /* No actions. */ } else if (queue_id == UINT32_MAX || ofp_to_u16(out_port) >= ofp_to_u16(OFPP_MAX)) { ofpact_put_OUTPUT(&ofpacts)->port = out_port; } else { struct ofpact_enqueue *enqueue = ofpact_put_ENQUEUE(&ofpacts); enqueue->port = out_port; enqueue->queue = queue_id; } ofpact_pad(&ofpacts); /* Prepare packet_out in case we need one. */ po.buffer_id = pi.buffer_id; if (po.buffer_id == UINT32_MAX) { po.packet = pkt.data; po.packet_len = pkt.size; } else { po.packet = NULL; po.packet_len = 0; } po.in_port = pi.fmd.in_port; po.ofpacts = ofpacts.data; po.ofpacts_len = ofpacts.size; /* Send the packet, and possibly the whole flow, to the output port. */ if (sw->max_idle >= 0 && (!sw->ml || out_port != OFPP_FLOOD)) { struct ofputil_flow_mod fm; struct ofpbuf *buffer; /* The output port is known, or we always flood everything, so add a * new flow. */ memset(&fm, 0, sizeof fm); match_init(&fm.match, &flow, &sw->wc); ofputil_normalize_match_quiet(&fm.match); fm.priority = 0; fm.table_id = 0xff; fm.command = OFPFC_ADD; fm.idle_timeout = sw->max_idle; fm.buffer_id = pi.buffer_id; fm.out_port = OFPP_NONE; fm.ofpacts = ofpacts.data; fm.ofpacts_len = ofpacts.size; buffer = ofputil_encode_flow_mod(&fm, sw->protocol); queue_tx(sw, buffer); /* If the switch didn't buffer the packet, we need to send a copy. */ if (pi.buffer_id == UINT32_MAX && out_port != OFPP_NONE) { queue_tx(sw, ofputil_encode_packet_out(&po, sw->protocol)); } } else { /* We don't know that MAC, or we don't set up flows. Send along the * packet without setting up a flow. */ if (pi.buffer_id != UINT32_MAX || out_port != OFPP_NONE) { queue_tx(sw, ofputil_encode_packet_out(&po, sw->protocol)); } } } static void process_echo_request(struct lswitch *sw, const struct ofp_header *rq) { queue_tx(sw, make_echo_reply(rq)); } openvswitch-2.0.1+git20140120/lib/learning-switch.h000066400000000000000000000047061226605124000214760ustar00rootroot00000000000000/* * Copyright (c) 2008, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LEARNING_SWITCH_H #define LEARNING_SWITCH_H 1 #include #include #include #include "ofp-util.h" struct ofpbuf; struct rconn; enum lswitch_mode { LSW_NORMAL, /* Always use OFPP_NORMAL. */ LSW_FLOOD, /* Always use OFPP_FLOOD. */ LSW_LEARN /* Learn MACs at controller. */ }; struct lswitch_config { enum lswitch_mode mode; /* 0 to use exact-match flow entries, * a OFPFW10_* bitmask to enable specific wildcards, * or UINT32_MAX to use the default wildcards (wildcarding as many fields * as possible. * * Ignored when max_idle < 0 (in which case no flows are set up). */ uint32_t wildcards; /* <0: Process every packet at the controller. * >=0: Expire flows after they are unused for 'max_idle' seconds. * OFP_FLOW_PERMANENT: Set up permanent flows. */ int max_idle; /* Optional "flow mod" requests to send to the switch at connection time, * to set up the flow table. */ const struct ofputil_flow_mod *default_flows; size_t n_default_flows; enum ofputil_protocol usable_protocols; /* The OpenFlow queue to use by default. Use UINT32_MAX to avoid * specifying a particular queue. */ uint32_t default_queue; /* Maps from a port name to a queue_id. */ const struct simap *port_queues; /* If true, do not reply to any messages from the switch (for debugging * fail-open mode). */ bool mute; }; struct lswitch *lswitch_create(struct rconn *, const struct lswitch_config *); bool lswitch_is_alive(const struct lswitch *); void lswitch_set_queue(struct lswitch *sw, uint32_t queue); void lswitch_run(struct lswitch *); void lswitch_wait(struct lswitch *); void lswitch_destroy(struct lswitch *); void lswitch_mute(struct lswitch *); #endif /* learning-switch.h */ openvswitch-2.0.1+git20140120/lib/list.c000066400000000000000000000123641226605124000173450ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "list.h" /* Initializes 'list' as an empty list. */ void list_init(struct list *list) { list->next = list->prev = list; } /* Initializes 'list' with pointers that will (probably) cause segfaults if * dereferenced and, better yet, show up clearly in a debugger. */ void list_poison(struct list *list) { memset(list, 0xcc, sizeof *list); } /* Inserts 'elem' just before 'before'. */ void list_insert(struct list *before, struct list *elem) { elem->prev = before->prev; elem->next = before; before->prev->next = elem; before->prev = elem; } /* Removes elements 'first' though 'last' (exclusive) from their current list, then inserts them just before 'before'. */ void list_splice(struct list *before, struct list *first, struct list *last) { if (first == last) { return; } last = last->prev; /* Cleanly remove 'first'...'last' from its current list. */ first->prev->next = last->next; last->next->prev = first->prev; /* Splice 'first'...'last' into new list. */ first->prev = before->prev; last->next = before; before->prev->next = first; before->prev = last; } /* Inserts 'elem' at the beginning of 'list', so that it becomes the front in 'list'. */ void list_push_front(struct list *list, struct list *elem) { list_insert(list->next, elem); } /* Inserts 'elem' at the end of 'list', so that it becomes the back in * 'list'. */ void list_push_back(struct list *list, struct list *elem) { list_insert(list, elem); } /* Puts 'elem' in the position currently occupied by 'position'. * Afterward, 'position' is not part of a list. */ void list_replace(struct list *element, const struct list *position) { element->next = position->next; element->next->prev = element; element->prev = position->prev; element->prev->next = element; } /* Adjusts pointers around 'list' to compensate for 'list' having been moved * around in memory (e.g. as a consequence of realloc()). * * This always works if 'list' is a member of a list, or if 'list' is the head * of a non-empty list. It fails badly, however, if 'list' is the head of an * empty list; just use list_init() in that case. */ void list_moved(struct list *list) { list->prev->next = list->next->prev = list; } /* Initializes 'dst' with the contents of 'src', compensating for moving it * around in memory. The effect is that, if 'src' was the head of a list, now * 'dst' is the head of a list containing the same elements. */ void list_move(struct list *dst, struct list *src) { if (!list_is_empty(src)) { *dst = *src; list_moved(dst); } else { list_init(dst); } } /* Removes 'elem' from its list and returns the element that followed it. Undefined behavior if 'elem' is not in a list. */ struct list * list_remove(struct list *elem) { elem->prev->next = elem->next; elem->next->prev = elem->prev; return elem->next; } /* Removes the front element from 'list' and returns it. Undefined behavior if 'list' is empty before removal. */ struct list * list_pop_front(struct list *list) { struct list *front = list->next; list_remove(front); return front; } /* Removes the back element from 'list' and returns it. Undefined behavior if 'list' is empty before removal. */ struct list * list_pop_back(struct list *list) { struct list *back = list->prev; list_remove(back); return back; } /* Returns the front element in 'list_'. Undefined behavior if 'list_' is empty. */ struct list * list_front(const struct list *list_) { struct list *list = CONST_CAST(struct list *, list_); ovs_assert(!list_is_empty(list)); return list->next; } /* Returns the back element in 'list_'. Undefined behavior if 'list_' is empty. */ struct list * list_back(const struct list *list_) { struct list *list = CONST_CAST(struct list *, list_); ovs_assert(!list_is_empty(list)); return list->prev; } /* Returns the number of elements in 'list'. Runs in O(n) in the number of elements. */ size_t list_size(const struct list *list) { const struct list *e; size_t cnt = 0; for (e = list->next; e != list; e = e->next) { cnt++; } return cnt; } /* Returns true if 'list' is empty, false otherwise. */ bool list_is_empty(const struct list *list) { return list->next == list; } /* Returns true if 'list' has exactly 1 element, false otherwise. */ bool list_is_singleton(const struct list *list) { return list_is_short(list) && !list_is_empty(list); } /* Returns true if 'list' has 0 or 1 elements, false otherwise. */ bool list_is_short(const struct list *list) { return list->next == list->prev; } openvswitch-2.0.1+git20140120/lib/list.h000066400000000000000000000065041226605124000173510ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LIST_H #define LIST_H 1 /* Doubly linked list. */ #include #include #include "util.h" /* Doubly linked list head or element. */ struct list { struct list *prev; /* Previous list element. */ struct list *next; /* Next list element. */ }; #define LIST_INITIALIZER(LIST) { LIST, LIST } void list_init(struct list *); void list_poison(struct list *); /* List insertion. */ void list_insert(struct list *, struct list *); void list_splice(struct list *before, struct list *first, struct list *last); void list_push_front(struct list *, struct list *); void list_push_back(struct list *, struct list *); void list_replace(struct list *, const struct list *); void list_moved(struct list *); void list_move(struct list *dst, struct list *src); /* List removal. */ struct list *list_remove(struct list *); struct list *list_pop_front(struct list *); struct list *list_pop_back(struct list *); /* List elements. */ struct list *list_front(const struct list *); struct list *list_back(const struct list *); /* List properties. */ size_t list_size(const struct list *); bool list_is_empty(const struct list *); bool list_is_singleton(const struct list *); bool list_is_short(const struct list *); #define LIST_FOR_EACH(ITER, MEMBER, LIST) \ for (ASSIGN_CONTAINER(ITER, (LIST)->next, MEMBER); \ &(ITER)->MEMBER != (LIST); \ ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER)) #define LIST_FOR_EACH_CONTINUE(ITER, MEMBER, LIST) \ for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER); \ &(ITER)->MEMBER != (LIST); \ ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER)) #define LIST_FOR_EACH_REVERSE(ITER, MEMBER, LIST) \ for (ASSIGN_CONTAINER(ITER, (LIST)->prev, MEMBER); \ &(ITER)->MEMBER != (LIST); \ ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) #define LIST_FOR_EACH_REVERSE_CONTINUE(ITER, MEMBER, LIST) \ for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER); \ &(ITER)->MEMBER != (LIST); \ ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) #define LIST_FOR_EACH_SAFE(ITER, NEXT, MEMBER, LIST) \ for (ASSIGN_CONTAINER(ITER, (LIST)->next, MEMBER); \ (&(ITER)->MEMBER != (LIST) \ ? ASSIGN_CONTAINER(NEXT, (ITER)->MEMBER.next, MEMBER), 1 \ : 0); \ (ITER) = (NEXT)) #endif /* list.h */ openvswitch-2.0.1+git20140120/lib/lockfile.c000066400000000000000000000177561226605124000201740ustar00rootroot00000000000000 /* Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "lockfile.h" #include #include #include #include #include #include #include "coverage.h" #include "hash.h" #include "hmap.h" #include "ovs-thread.h" #include "timeval.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(lockfile); COVERAGE_DEFINE(lockfile_lock); COVERAGE_DEFINE(lockfile_timeout); COVERAGE_DEFINE(lockfile_error); COVERAGE_DEFINE(lockfile_unlock); struct lockfile { struct hmap_node hmap_node; char *name; dev_t device; ino_t inode; int fd; }; /* Lock table. * * We have to do this stupid dance because POSIX says that closing *any* file * descriptor for a file on which a process holds a lock drops *all* locks on * that file. That means that we can't afford to open a lockfile more than * once. */ static struct ovs_mutex lock_table_mutex = OVS_MUTEX_INITIALIZER; static struct hmap lock_table__ = HMAP_INITIALIZER(&lock_table__); static struct hmap *const lock_table OVS_GUARDED_BY(lock_table_mutex) = &lock_table__; static void lockfile_unhash(struct lockfile *); static int lockfile_try_lock(const char *name, pid_t *pidp, struct lockfile **lockfilep); /* Returns the name of the lockfile that would be created for locking a file * named 'filename_'. The caller is responsible for freeing the returned name, * with free(), when it is no longer needed. */ char * lockfile_name(const char *filename_) { char *filename; const char *slash; char *lockname; /* If 'filename_' is a symlink, base the name of the lockfile on the * symlink's target rather than the name of the symlink. That way, if a * file is symlinked, but there is no symlink for its lockfile, then there * is only a single lockfile for both the source and the target of the * symlink, not one for each. */ filename = follow_symlinks(filename_); slash = strrchr(filename, '/'); lockname = (slash ? xasprintf("%.*s/.%s.~lock~", (int) (slash - filename), filename, slash + 1) : xasprintf(".%s.~lock~", filename)); free(filename); return lockname; } /* Locks the configuration file against modification by other processes and * re-reads it from disk. * * Returns 0 on success, otherwise a positive errno value. On success, * '*lockfilep' is set to point to a new "struct lockfile *" that may be * unlocked with lockfile_unlock(). On failure, '*lockfilep' is set to * NULL. Will not block if the lock cannot be immediately acquired. */ int lockfile_lock(const char *file, struct lockfile **lockfilep) { /* Only exclusive ("write") locks are supported. This is not a problem * because the Open vSwitch code that currently uses lock files does so in * stylized ways such that any number of readers may access a file while it * is being written. */ char *lock_name; pid_t pid; int error; COVERAGE_INC(lockfile_lock); lock_name = lockfile_name(file); ovs_mutex_lock(&lock_table_mutex); error = lockfile_try_lock(lock_name, &pid, lockfilep); ovs_mutex_unlock(&lock_table_mutex); if (error) { COVERAGE_INC(lockfile_error); if (error == EACCES) { error = EAGAIN; } if (pid) { VLOG_WARN("%s: cannot lock file because it is already locked by " "pid %ld", lock_name, (long int) pid); } else { VLOG_WARN("%s: failed to lock file: %s", lock_name, ovs_strerror(error)); } } free(lock_name); return error; } /* Unlocks 'lockfile', which must have been created by a call to * lockfile_lock(), and frees 'lockfile'. */ void lockfile_unlock(struct lockfile *lockfile) { if (lockfile) { ovs_mutex_lock(&lock_table_mutex); lockfile_unhash(lockfile); ovs_mutex_unlock(&lock_table_mutex); COVERAGE_INC(lockfile_unlock); free(lockfile->name); free(lockfile); } } /* Marks all the currently locked lockfiles as no longer locked. It makes * sense to call this function after fork(), because a child created by fork() * does not hold its parents' locks. */ void lockfile_postfork(void) { struct lockfile *lockfile; ovs_mutex_lock(&lock_table_mutex); HMAP_FOR_EACH (lockfile, hmap_node, lock_table) { if (lockfile->fd >= 0) { VLOG_WARN("%s: child does not inherit lock", lockfile->name); lockfile_unhash(lockfile); } } ovs_mutex_unlock(&lock_table_mutex); } static uint32_t lockfile_hash(dev_t device, ino_t inode) { return hash_bytes(&device, sizeof device, hash_bytes(&inode, sizeof inode, 0)); } static struct lockfile * lockfile_find(dev_t device, ino_t inode) OVS_REQUIRES(&lock_table_mutex) { struct lockfile *lockfile; HMAP_FOR_EACH_WITH_HASH (lockfile, hmap_node, lockfile_hash(device, inode), lock_table) { if (lockfile->device == device && lockfile->inode == inode) { return lockfile; } } return NULL; } static void lockfile_unhash(struct lockfile *lockfile) OVS_REQUIRES(&lock_table_mutex) { if (lockfile->fd >= 0) { close(lockfile->fd); lockfile->fd = -1; hmap_remove(lock_table, &lockfile->hmap_node); } } static struct lockfile * lockfile_register(const char *name, dev_t device, ino_t inode, int fd) OVS_REQUIRES(&lock_table_mutex) { struct lockfile *lockfile; lockfile = lockfile_find(device, inode); if (lockfile) { VLOG_ERR("%s: lock file disappeared and reappeared!", name); lockfile_unhash(lockfile); } lockfile = xmalloc(sizeof *lockfile); lockfile->name = xstrdup(name); lockfile->device = device; lockfile->inode = inode; lockfile->fd = fd; hmap_insert(lock_table, &lockfile->hmap_node, lockfile_hash(device, inode)); return lockfile; } static int lockfile_try_lock(const char *name, pid_t *pidp, struct lockfile **lockfilep) OVS_REQUIRES(&lock_table_mutex) { struct flock l; struct stat s; int error; int fd; *lockfilep = NULL; *pidp = 0; /* Check whether we've already got a lock on that file. */ if (!stat(name, &s)) { if (lockfile_find(s.st_dev, s.st_ino)) { return EDEADLK; } } else if (errno != ENOENT) { VLOG_WARN("%s: failed to stat lock file: %s", name, ovs_strerror(errno)); return errno; } /* Open the lock file. */ fd = open(name, O_RDWR | O_CREAT, 0600); if (fd < 0) { VLOG_WARN("%s: failed to open lock file: %s", name, ovs_strerror(errno)); return errno; } /* Get the inode and device number for the lock table. */ if (fstat(fd, &s)) { VLOG_ERR("%s: failed to fstat lock file: %s", name, ovs_strerror(errno)); close(fd); return errno; } /* Try to lock the file. */ memset(&l, 0, sizeof l); l.l_type = F_WRLCK; l.l_whence = SEEK_SET; l.l_start = 0; l.l_len = 0; error = fcntl(fd, F_SETLK, &l) == -1 ? errno : 0; if (!error) { *lockfilep = lockfile_register(name, s.st_dev, s.st_ino, fd); } else { if (!fcntl(fd, F_GETLK, &l) && l.l_type != F_UNLCK) { *pidp = l.l_pid; } close(fd); } return error; } openvswitch-2.0.1+git20140120/lib/lockfile.h000066400000000000000000000015341226605124000201640ustar00rootroot00000000000000/* Copyright (c) 2008, 2009 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LOCKFILE_H #define LOCKFILE_H 1 struct lockfile; char *lockfile_name(const char *file); int lockfile_lock(const char *file, struct lockfile **); void lockfile_unlock(struct lockfile *); void lockfile_postfork(void); #endif /* lib/lockfile.h */ openvswitch-2.0.1+git20140120/lib/mac-learning.c000066400000000000000000000246011226605124000207240ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "mac-learning.h" #include #include #include "bitmap.h" #include "coverage.h" #include "hash.h" #include "list.h" #include "poll-loop.h" #include "timeval.h" #include "unaligned.h" #include "util.h" #include "vlan-bitmap.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(mac_learning); COVERAGE_DEFINE(mac_learning_learned); COVERAGE_DEFINE(mac_learning_expired); /* Returns the number of seconds since 'e' (within 'ml') was last learned. */ int mac_entry_age(const struct mac_learning *ml, const struct mac_entry *e) { time_t remaining = e->expires - time_now(); return ml->idle_time - remaining; } static uint32_t mac_table_hash(const struct mac_learning *ml, const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan) { unsigned int mac1 = get_unaligned_u32(ALIGNED_CAST(uint32_t *, mac)); unsigned int mac2 = get_unaligned_u16(ALIGNED_CAST(uint16_t *, mac + 4)); return hash_3words(mac1, mac2 | (vlan << 16), ml->secret); } static struct mac_entry * mac_entry_from_lru_node(struct list *list) { return CONTAINER_OF(list, struct mac_entry, lru_node); } static struct mac_entry * mac_entry_lookup(const struct mac_learning *ml, const uint8_t mac[ETH_ADDR_LEN], uint16_t vlan) { struct mac_entry *e; HMAP_FOR_EACH_WITH_HASH (e, hmap_node, mac_table_hash(ml, mac, vlan), &ml->table) { if (e->vlan == vlan && eth_addr_equals(e->mac, mac)) { return e; } } return NULL; } /* If the LRU list is not empty, stores the least-recently-used entry in '*e' * and returns true. Otherwise, if the LRU list is empty, stores NULL in '*e' * and return false. */ static bool get_lru(struct mac_learning *ml, struct mac_entry **e) OVS_REQ_RDLOCK(ml->rwlock) { if (!list_is_empty(&ml->lrus)) { *e = mac_entry_from_lru_node(ml->lrus.next); return true; } else { *e = NULL; return false; } } static unsigned int normalize_idle_time(unsigned int idle_time) { return (idle_time < 15 ? 15 : idle_time > 3600 ? 3600 : idle_time); } /* Creates and returns a new MAC learning table with an initial MAC aging * timeout of 'idle_time' seconds and an initial maximum of MAC_DEFAULT_MAX * entries. */ struct mac_learning * mac_learning_create(unsigned int idle_time) { struct mac_learning *ml; ml = xmalloc(sizeof *ml); list_init(&ml->lrus); hmap_init(&ml->table); ml->secret = random_uint32(); ml->flood_vlans = NULL; ml->idle_time = normalize_idle_time(idle_time); ml->max_entries = MAC_DEFAULT_MAX; ml->need_revalidate = false; atomic_init(&ml->ref_cnt, 1); ovs_rwlock_init(&ml->rwlock); return ml; } struct mac_learning * mac_learning_ref(const struct mac_learning *ml_) { struct mac_learning *ml = CONST_CAST(struct mac_learning *, ml_); if (ml) { int orig; atomic_add(&ml->ref_cnt, 1, &orig); ovs_assert(orig > 0); } return ml; } /* Unreferences (and possibly destroys) MAC learning table 'ml'. */ void mac_learning_unref(struct mac_learning *ml) { int orig; if (!ml) { return; } atomic_sub(&ml->ref_cnt, 1, &orig); ovs_assert(orig > 0); if (orig == 1) { struct mac_entry *e, *next; HMAP_FOR_EACH_SAFE (e, next, hmap_node, &ml->table) { hmap_remove(&ml->table, &e->hmap_node); free(e); } hmap_destroy(&ml->table); bitmap_free(ml->flood_vlans); ovs_rwlock_destroy(&ml->rwlock); free(ml); } } /* Provides a bitmap of VLANs which have learning disabled, that is, VLANs on * which all packets are flooded. Returns true if the set has changed from the * previous value. */ bool mac_learning_set_flood_vlans(struct mac_learning *ml, const unsigned long *bitmap) { if (vlan_bitmap_equal(ml->flood_vlans, bitmap)) { return false; } else { bitmap_free(ml->flood_vlans); ml->flood_vlans = vlan_bitmap_clone(bitmap); return true; } } /* Changes the MAC aging timeout of 'ml' to 'idle_time' seconds. */ void mac_learning_set_idle_time(struct mac_learning *ml, unsigned int idle_time) { idle_time = normalize_idle_time(idle_time); if (idle_time != ml->idle_time) { struct mac_entry *e; int delta; delta = (int) idle_time - (int) ml->idle_time; LIST_FOR_EACH (e, lru_node, &ml->lrus) { e->expires += delta; } ml->idle_time = idle_time; } } /* Sets the maximum number of entries in 'ml' to 'max_entries', adjusting it * to be within a reasonable range. */ void mac_learning_set_max_entries(struct mac_learning *ml, size_t max_entries) { ml->max_entries = (max_entries < 10 ? 10 : max_entries > 1000 * 1000 ? 1000 * 1000 : max_entries); } static bool is_learning_vlan(const struct mac_learning *ml, uint16_t vlan) { return !ml->flood_vlans || !bitmap_is_set(ml->flood_vlans, vlan); } /* Returns true if 'src_mac' may be learned on 'vlan' for 'ml'. * Returns false if 'ml' is NULL, if src_mac is not valid for learning, or if * 'vlan' is configured on 'ml' to flood all packets. */ bool mac_learning_may_learn(const struct mac_learning *ml, const uint8_t src_mac[ETH_ADDR_LEN], uint16_t vlan) { return ml && is_learning_vlan(ml, vlan) && !eth_addr_is_multicast(src_mac); } /* Searches 'ml' for and returns a MAC learning entry for 'src_mac' in 'vlan', * inserting a new entry if necessary. The caller must have already verified, * by calling mac_learning_may_learn(), that 'src_mac' and 'vlan' are * learnable. * * If the returned MAC entry is new (as may be determined by calling * mac_entry_is_new()), then the caller must pass the new entry to * mac_learning_changed(). The caller must also initialize the new entry's * 'port' member. Otherwise calling those functions is at the caller's * discretion. */ struct mac_entry * mac_learning_insert(struct mac_learning *ml, const uint8_t src_mac[ETH_ADDR_LEN], uint16_t vlan) { struct mac_entry *e; e = mac_entry_lookup(ml, src_mac, vlan); if (!e) { uint32_t hash = mac_table_hash(ml, src_mac, vlan); if (hmap_count(&ml->table) >= ml->max_entries) { get_lru(ml, &e); mac_learning_expire(ml, e); } e = xmalloc(sizeof *e); hmap_insert(&ml->table, &e->hmap_node, hash); memcpy(e->mac, src_mac, ETH_ADDR_LEN); e->vlan = vlan; e->grat_arp_lock = TIME_MIN; e->port.p = NULL; } else { list_remove(&e->lru_node); } /* Mark 'e' as recently used. */ list_push_back(&ml->lrus, &e->lru_node); e->expires = time_now() + ml->idle_time; return e; } /* Changes 'e''s tag to a new, randomly selected one. Causes * mac_learning_run() to flag for revalidation the tag that would have been * previously used for this entry's MAC and VLAN (either before 'e' was * inserted, if it is new, or otherwise before its port was updated.) * * The client should call this function after obtaining a MAC learning entry * from mac_learning_insert(), if the entry is either new or if its learned * port has changed. */ void mac_learning_changed(struct mac_learning *ml) { COVERAGE_INC(mac_learning_learned); ml->need_revalidate = true; } /* Looks up MAC 'dst' for VLAN 'vlan' in 'ml' and returns the associated MAC * learning entry, if any. If 'tag' is nonnull, then the tag that associates * 'dst' and 'vlan' with its currently learned port will be OR'd into * '*tag'. */ struct mac_entry * mac_learning_lookup(const struct mac_learning *ml, const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan) { if (eth_addr_is_multicast(dst)) { /* No tag because the treatment of multicast destinations never * changes. */ return NULL; } else if (!is_learning_vlan(ml, vlan)) { /* We don't tag this property. The set of learning VLANs changes so * rarely that we revalidate every flow when it changes. */ return NULL; } else { struct mac_entry *e = mac_entry_lookup(ml, dst, vlan); ovs_assert(e == NULL || e->port.p != NULL) return e; } } /* Expires 'e' from the 'ml' hash table. */ void mac_learning_expire(struct mac_learning *ml, struct mac_entry *e) { hmap_remove(&ml->table, &e->hmap_node); list_remove(&e->lru_node); free(e); } /* Expires all the mac-learning entries in 'ml'. If not NULL, the tags in 'ml' * are added to 'tags'. Otherwise the tags in 'ml' are discarded. The client * is responsible for revalidating any flows that depend on 'ml', if * necessary. */ void mac_learning_flush(struct mac_learning *ml) { struct mac_entry *e; while (get_lru(ml, &e)){ ml->need_revalidate = true; mac_learning_expire(ml, e); } hmap_shrink(&ml->table); } /* Does periodic work required by 'ml'. Returns true if something changed that * may require flow revalidation. */ bool mac_learning_run(struct mac_learning *ml) { bool need_revalidate; struct mac_entry *e; while (get_lru(ml, &e) && (hmap_count(&ml->table) > ml->max_entries || time_now() >= e->expires)) { COVERAGE_INC(mac_learning_expired); ml->need_revalidate = true; mac_learning_expire(ml, e); } need_revalidate = ml->need_revalidate; ml->need_revalidate = false; return need_revalidate; } void mac_learning_wait(struct mac_learning *ml) { if (hmap_count(&ml->table) > ml->max_entries || ml->need_revalidate) { poll_immediate_wake(); } else if (!list_is_empty(&ml->lrus)) { struct mac_entry *e = mac_entry_from_lru_node(ml->lrus.next); poll_timer_wait_until(e->expires * 1000LL); } } openvswitch-2.0.1+git20140120/lib/mac-learning.h000066400000000000000000000115221226605124000207270ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef MAC_LEARNING_H #define MAC_LEARNING_H 1 #include #include "hmap.h" #include "list.h" #include "ovs-atomic.h" #include "ovs-thread.h" #include "packets.h" #include "timeval.h" struct mac_learning; /* Default maximum size of a MAC learning table, in entries. */ #define MAC_DEFAULT_MAX 2048 /* Time, in seconds, before expiring a mac_entry due to inactivity. */ #define MAC_ENTRY_DEFAULT_IDLE_TIME 300 /* Time, in seconds, to lock an entry updated by a gratuitous ARP to avoid * relearning based on a reflection from a bond slave. */ #define MAC_GRAT_ARP_LOCK_TIME 5 /* A MAC learning table entry. * Guarded by owning 'mac_learning''s rwlock */ struct mac_entry { struct hmap_node hmap_node; /* Node in a mac_learning hmap. */ time_t expires; /* Expiration time. */ time_t grat_arp_lock; /* Gratuitous ARP lock expiration time. */ uint8_t mac[ETH_ADDR_LEN]; /* Known MAC address. */ uint16_t vlan; /* VLAN tag. */ /* The following are marked guarded to prevent users from iterating over or * accessing a mac_entry without hodling the parent mac_learning rwlock. */ struct list lru_node OVS_GUARDED; /* Element in 'lrus' list. */ /* Learned port. */ union { void *p; ofp_port_t ofp_port; } port OVS_GUARDED; }; /* Sets a gratuitous ARP lock on 'mac' that will expire in * MAC_GRAT_ARP_LOCK_TIME seconds. */ static inline void mac_entry_set_grat_arp_lock(struct mac_entry *mac) { mac->grat_arp_lock = time_now() + MAC_GRAT_ARP_LOCK_TIME; } /* Returns true if a gratuitous ARP lock is in effect on 'mac', false if none * has ever been asserted or if it has expired. */ static inline bool mac_entry_is_grat_arp_locked(const struct mac_entry *mac) { return time_now() < mac->grat_arp_lock; } /* MAC learning table. */ struct mac_learning { struct hmap table; /* Learning table. */ struct list lrus OVS_GUARDED; /* In-use entries, least recently used at the front, most recently used at the back. */ uint32_t secret; /* Secret for randomizing hash table. */ unsigned long *flood_vlans; /* Bitmap of learning disabled VLANs. */ unsigned int idle_time; /* Max age before deleting an entry. */ size_t max_entries; /* Max number of learned MACs. */ atomic_int ref_cnt; struct ovs_rwlock rwlock; bool need_revalidate; }; int mac_entry_age(const struct mac_learning *ml, const struct mac_entry *e) OVS_REQ_RDLOCK(ml->rwlock); /* Basics. */ struct mac_learning *mac_learning_create(unsigned int idle_time); struct mac_learning *mac_learning_ref(const struct mac_learning *); void mac_learning_unref(struct mac_learning *); bool mac_learning_run(struct mac_learning *ml) OVS_REQ_WRLOCK(ml->rwlock); void mac_learning_wait(struct mac_learning *ml) OVS_REQ_RDLOCK(ml->rwlock); /* Configuration. */ bool mac_learning_set_flood_vlans(struct mac_learning *ml, const unsigned long *bitmap) OVS_REQ_WRLOCK(ml->rwlock); void mac_learning_set_idle_time(struct mac_learning *ml, unsigned int idle_time) OVS_REQ_WRLOCK(ml->rwlock); void mac_learning_set_max_entries(struct mac_learning *ml, size_t max_entries) OVS_REQ_WRLOCK(ml->rwlock); /* Learning. */ bool mac_learning_may_learn(const struct mac_learning *ml, const uint8_t src_mac[ETH_ADDR_LEN], uint16_t vlan) OVS_REQ_RDLOCK(ml->rwlock); struct mac_entry *mac_learning_insert(struct mac_learning *ml, const uint8_t src[ETH_ADDR_LEN], uint16_t vlan) OVS_REQ_WRLOCK(ml->rwlock); void mac_learning_changed(struct mac_learning *ml) OVS_REQ_WRLOCK(ml->rwlock); /* Lookup. */ struct mac_entry *mac_learning_lookup(const struct mac_learning *ml, const uint8_t dst[ETH_ADDR_LEN], uint16_t vlan) OVS_REQ_RDLOCK(ml->rwlock); /* Flushing. */ void mac_learning_expire(struct mac_learning *ml, struct mac_entry *e) OVS_REQ_WRLOCK(ml->rwlock); void mac_learning_flush(struct mac_learning *ml) OVS_REQ_WRLOCK(ml->rwlock); #endif /* mac-learning.h */ openvswitch-2.0.1+git20140120/lib/match.c000066400000000000000000001067601226605124000174720ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "match.h" #include #include "byte-order.h" #include "dynamic-string.h" #include "ofp-util.h" #include "packets.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(match); /* Converts the flow in 'flow' into a match in 'match', with the given * 'wildcards'. */ void match_init(struct match *match, const struct flow *flow, const struct flow_wildcards *wc) { match->flow = *flow; match->wc = *wc; match_zero_wildcarded_fields(match); } /* Converts a flow into a match. It sets the wildcard masks based on * the packet contents. It will not set the mask for fields that do not * make sense for the packet type. */ void match_wc_init(struct match *match, const struct flow *flow) { struct flow_wildcards *wc; int i; match->flow = *flow; wc = &match->wc; memset(&wc->masks, 0x0, sizeof wc->masks); memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type); if (flow->nw_proto) { memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); } if (flow->skb_priority) { memset(&wc->masks.skb_priority, 0xff, sizeof wc->masks.skb_priority); } if (flow->pkt_mark) { memset(&wc->masks.pkt_mark, 0xff, sizeof wc->masks.pkt_mark); } for (i = 0; i < FLOW_N_REGS; i++) { if (flow->regs[i]) { memset(&wc->masks.regs[i], 0xff, sizeof wc->masks.regs[i]); } } if (flow->tunnel.ip_dst) { if (flow->tunnel.flags & FLOW_TNL_F_KEY) { memset(&wc->masks.tunnel.tun_id, 0xff, sizeof wc->masks.tunnel.tun_id); } memset(&wc->masks.tunnel.ip_src, 0xff, sizeof wc->masks.tunnel.ip_src); memset(&wc->masks.tunnel.ip_dst, 0xff, sizeof wc->masks.tunnel.ip_dst); memset(&wc->masks.tunnel.flags, 0xff, sizeof wc->masks.tunnel.flags); memset(&wc->masks.tunnel.ip_tos, 0xff, sizeof wc->masks.tunnel.ip_tos); memset(&wc->masks.tunnel.ip_ttl, 0xff, sizeof wc->masks.tunnel.ip_ttl); } else if (flow->tunnel.tun_id) { memset(&wc->masks.tunnel.tun_id, 0xff, sizeof wc->masks.tunnel.tun_id); } memset(&wc->masks.metadata, 0xff, sizeof wc->masks.metadata); memset(&wc->masks.in_port, 0xff, sizeof wc->masks.in_port); memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci); memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src); memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); if (flow->dl_type == htons(ETH_TYPE_IPV6)) { memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src); memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst); memset(&wc->masks.ipv6_label, 0xff, sizeof wc->masks.ipv6_label); } else if (flow->dl_type == htons(ETH_TYPE_IP) || (flow->dl_type == htons(ETH_TYPE_ARP)) || (flow->dl_type == htons(ETH_TYPE_RARP))) { memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src); memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst); } else if (eth_type_mpls(flow->dl_type)) { memset(&wc->masks.mpls_lse, 0xff, sizeof wc->masks.mpls_lse); } if (flow->dl_type == htons(ETH_TYPE_ARP) || flow->dl_type == htons(ETH_TYPE_RARP)) { memset(&wc->masks.arp_sha, 0xff, sizeof wc->masks.arp_sha); memset(&wc->masks.arp_tha, 0xff, sizeof wc->masks.arp_tha); } if (is_ip_any(flow)) { memset(&wc->masks.nw_tos, 0xff, sizeof wc->masks.nw_tos); memset(&wc->masks.nw_ttl, 0xff, sizeof wc->masks.nw_ttl); if (flow->nw_frag) { memset(&wc->masks.nw_frag, 0xff, sizeof wc->masks.nw_frag); } if (flow->nw_proto == IPPROTO_ICMP || flow->nw_proto == IPPROTO_ICMPV6 || (flow->tp_src || flow->tp_dst)) { memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src); memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst); } if (flow->nw_proto == IPPROTO_ICMPV6) { memset(&wc->masks.arp_sha, 0xff, sizeof wc->masks.arp_sha); memset(&wc->masks.arp_tha, 0xff, sizeof wc->masks.arp_tha); } } return; } /* Converts the flow in 'flow' into an exact-match match in 'match'. */ void match_init_exact(struct match *match, const struct flow *flow) { match->flow = *flow; match->flow.skb_priority = 0; flow_wildcards_init_exact(&match->wc); } /* Initializes 'match' as a "catch-all" match that matches every packet. */ void match_init_catchall(struct match *match) { memset(&match->flow, 0, sizeof match->flow); flow_wildcards_init_catchall(&match->wc); } /* For each bit or field wildcarded in 'match', sets the corresponding bit or * field in 'flow' to all-0-bits. It is important to maintain this invariant * in a match that might be inserted into a classifier. * * It is never necessary to call this function directly for a match that is * initialized or modified only by match_*() functions. It is useful to * restore the invariant in a match whose 'wc' member is modified by hand. */ void match_zero_wildcarded_fields(struct match *match) { flow_zero_wildcards(&match->flow, &match->wc); } void match_set_reg(struct match *match, unsigned int reg_idx, uint32_t value) { match_set_reg_masked(match, reg_idx, value, UINT32_MAX); } void match_set_reg_masked(struct match *match, unsigned int reg_idx, uint32_t value, uint32_t mask) { ovs_assert(reg_idx < FLOW_N_REGS); flow_wildcards_set_reg_mask(&match->wc, reg_idx, mask); match->flow.regs[reg_idx] = value & mask; } void match_set_metadata(struct match *match, ovs_be64 metadata) { match_set_metadata_masked(match, metadata, htonll(UINT64_MAX)); } void match_set_metadata_masked(struct match *match, ovs_be64 metadata, ovs_be64 mask) { match->wc.masks.metadata = mask; match->flow.metadata = metadata & mask; } void match_set_tun_id(struct match *match, ovs_be64 tun_id) { match_set_tun_id_masked(match, tun_id, htonll(UINT64_MAX)); } void match_set_tun_id_masked(struct match *match, ovs_be64 tun_id, ovs_be64 mask) { match->wc.masks.tunnel.tun_id = mask; match->flow.tunnel.tun_id = tun_id & mask; } void match_set_tun_src(struct match *match, ovs_be32 src) { match_set_tun_src_masked(match, src, htonl(UINT32_MAX)); } void match_set_tun_src_masked(struct match *match, ovs_be32 src, ovs_be32 mask) { match->wc.masks.tunnel.ip_src = mask; match->flow.tunnel.ip_src = src & mask; } void match_set_tun_dst(struct match *match, ovs_be32 dst) { match_set_tun_dst_masked(match, dst, htonl(UINT32_MAX)); } void match_set_tun_dst_masked(struct match *match, ovs_be32 dst, ovs_be32 mask) { match->wc.masks.tunnel.ip_dst = mask; match->flow.tunnel.ip_dst = dst & mask; } void match_set_tun_ttl(struct match *match, uint8_t ttl) { match_set_tun_ttl_masked(match, ttl, UINT8_MAX); } void match_set_tun_ttl_masked(struct match *match, uint8_t ttl, uint8_t mask) { match->wc.masks.tunnel.ip_ttl = mask; match->flow.tunnel.ip_ttl = ttl & mask; } void match_set_tun_tos(struct match *match, uint8_t tos) { match_set_tun_tos_masked(match, tos, UINT8_MAX); } void match_set_tun_tos_masked(struct match *match, uint8_t tos, uint8_t mask) { match->wc.masks.tunnel.ip_tos = mask; match->flow.tunnel.ip_tos = tos & mask; } void match_set_tun_flags(struct match *match, uint16_t flags) { match_set_tun_flags_masked(match, flags, UINT16_MAX); } void match_set_tun_flags_masked(struct match *match, uint16_t flags, uint16_t mask) { match->wc.masks.tunnel.flags = mask; match->flow.tunnel.flags = flags & mask; } void match_set_in_port(struct match *match, ofp_port_t ofp_port) { match->wc.masks.in_port.ofp_port = u16_to_ofp(UINT16_MAX); match->flow.in_port.ofp_port = ofp_port; } void match_set_skb_priority(struct match *match, uint32_t skb_priority) { match->wc.masks.skb_priority = UINT32_MAX; match->flow.skb_priority = skb_priority; } void match_set_pkt_mark(struct match *match, uint32_t pkt_mark) { match_set_pkt_mark_masked(match, pkt_mark, UINT32_MAX); } void match_set_pkt_mark_masked(struct match *match, uint32_t pkt_mark, uint32_t mask) { match->flow.pkt_mark = pkt_mark & mask; match->wc.masks.pkt_mark = mask; } void match_set_dl_type(struct match *match, ovs_be16 dl_type) { match->wc.masks.dl_type = htons(UINT16_MAX); match->flow.dl_type = dl_type; } /* Modifies 'value_src' so that the Ethernet address must match 'value_dst' * exactly. 'mask_dst' is set to all 1s. */ static void set_eth(const uint8_t value_src[ETH_ADDR_LEN], uint8_t value_dst[ETH_ADDR_LEN], uint8_t mask_dst[ETH_ADDR_LEN]) { memcpy(value_dst, value_src, ETH_ADDR_LEN); memset(mask_dst, 0xff, ETH_ADDR_LEN); } /* Modifies 'value_src' so that the Ethernet address must match 'value_src' * after each byte is ANDed with the appropriate byte in 'mask_src'. * 'mask_dst' is set to 'mask_src' */ static void set_eth_masked(const uint8_t value_src[ETH_ADDR_LEN], const uint8_t mask_src[ETH_ADDR_LEN], uint8_t value_dst[ETH_ADDR_LEN], uint8_t mask_dst[ETH_ADDR_LEN]) { size_t i; for (i = 0; i < ETH_ADDR_LEN; i++) { value_dst[i] = value_src[i] & mask_src[i]; mask_dst[i] = mask_src[i]; } } /* Modifies 'rule' so that the source Ethernet address must match 'dl_src' * exactly. */ void match_set_dl_src(struct match *match, const uint8_t dl_src[ETH_ADDR_LEN]) { set_eth(dl_src, match->flow.dl_src, match->wc.masks.dl_src); } /* Modifies 'rule' so that the source Ethernet address must match 'dl_src' * after each byte is ANDed with the appropriate byte in 'mask'. */ void match_set_dl_src_masked(struct match *match, const uint8_t dl_src[ETH_ADDR_LEN], const uint8_t mask[ETH_ADDR_LEN]) { set_eth_masked(dl_src, mask, match->flow.dl_src, match->wc.masks.dl_src); } /* Modifies 'match' so that the Ethernet address must match 'dl_dst' * exactly. */ void match_set_dl_dst(struct match *match, const uint8_t dl_dst[ETH_ADDR_LEN]) { set_eth(dl_dst, match->flow.dl_dst, match->wc.masks.dl_dst); } /* Modifies 'match' so that the Ethernet address must match 'dl_dst' after each * byte is ANDed with the appropriate byte in 'mask'. * * This function will assert-fail if 'mask' is invalid. Only 'mask' values * accepted by flow_wildcards_is_dl_dst_mask_valid() are allowed. */ void match_set_dl_dst_masked(struct match *match, const uint8_t dl_dst[ETH_ADDR_LEN], const uint8_t mask[ETH_ADDR_LEN]) { set_eth_masked(dl_dst, mask, match->flow.dl_dst, match->wc.masks.dl_dst); } void match_set_dl_tci(struct match *match, ovs_be16 tci) { match_set_dl_tci_masked(match, tci, htons(0xffff)); } void match_set_dl_tci_masked(struct match *match, ovs_be16 tci, ovs_be16 mask) { match->flow.vlan_tci = tci & mask; match->wc.masks.vlan_tci = mask; } /* Modifies 'match' so that the VLAN VID is wildcarded. If the PCP is already * wildcarded, then 'match' will match a packet regardless of whether it has an * 802.1Q header or not. */ void match_set_any_vid(struct match *match) { if (match->wc.masks.vlan_tci & htons(VLAN_PCP_MASK)) { match->wc.masks.vlan_tci &= ~htons(VLAN_VID_MASK); match->flow.vlan_tci &= ~htons(VLAN_VID_MASK); } else { match_set_dl_tci_masked(match, htons(0), htons(0)); } } /* Modifies 'match' depending on 'dl_vlan': * * - If 'dl_vlan' is htons(OFP_VLAN_NONE), makes 'match' match only packets * without an 802.1Q header. * * - Otherwise, makes 'match' match only packets with an 802.1Q header whose * VID equals the low 12 bits of 'dl_vlan'. */ void match_set_dl_vlan(struct match *match, ovs_be16 dl_vlan) { flow_set_dl_vlan(&match->flow, dl_vlan); if (dl_vlan == htons(OFP10_VLAN_NONE)) { match->wc.masks.vlan_tci = htons(UINT16_MAX); } else { match->wc.masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI); } } /* Sets the VLAN VID that 'match' matches to 'vid', which is interpreted as an * OpenFlow 1.2 "vlan_vid" value, that is, the low 13 bits of 'vlan_tci' (VID * plus CFI). */ void match_set_vlan_vid(struct match *match, ovs_be16 vid) { match_set_vlan_vid_masked(match, vid, htons(VLAN_VID_MASK | VLAN_CFI)); } /* Sets the VLAN VID that 'flow' matches to 'vid', which is interpreted as an * OpenFlow 1.2 "vlan_vid" value, that is, the low 13 bits of 'vlan_tci' (VID * plus CFI), with the corresponding 'mask'. */ void match_set_vlan_vid_masked(struct match *match, ovs_be16 vid, ovs_be16 mask) { ovs_be16 pcp_mask = htons(VLAN_PCP_MASK); ovs_be16 vid_mask = htons(VLAN_VID_MASK | VLAN_CFI); mask &= vid_mask; flow_set_vlan_vid(&match->flow, vid & mask); match->wc.masks.vlan_tci = mask | (match->wc.masks.vlan_tci & pcp_mask); } /* Modifies 'match' so that the VLAN PCP is wildcarded. If the VID is already * wildcarded, then 'match' will match a packet regardless of whether it has an * 802.1Q header or not. */ void match_set_any_pcp(struct match *match) { if (match->wc.masks.vlan_tci & htons(VLAN_VID_MASK)) { match->wc.masks.vlan_tci &= ~htons(VLAN_PCP_MASK); match->flow.vlan_tci &= ~htons(VLAN_PCP_MASK); } else { match_set_dl_tci_masked(match, htons(0), htons(0)); } } /* Modifies 'match' so that it matches only packets with an 802.1Q header whose * PCP equals the low 3 bits of 'dl_vlan_pcp'. */ void match_set_dl_vlan_pcp(struct match *match, uint8_t dl_vlan_pcp) { flow_set_vlan_pcp(&match->flow, dl_vlan_pcp); match->wc.masks.vlan_tci |= htons(VLAN_CFI | VLAN_PCP_MASK); } /* Modifies 'match' so that the MPLS label is wildcarded. */ void match_set_any_mpls_label(struct match *match) { match->wc.masks.mpls_lse &= ~htonl(MPLS_LABEL_MASK); flow_set_mpls_label(&match->flow, htonl(0)); } /* Modifies 'match' so that it matches only packets with an MPLS header whose * label equals the low 20 bits of 'mpls_label'. */ void match_set_mpls_label(struct match *match, ovs_be32 mpls_label) { match->wc.masks.mpls_lse |= htonl(MPLS_LABEL_MASK); flow_set_mpls_label(&match->flow, mpls_label); } /* Modifies 'match' so that the MPLS TC is wildcarded. */ void match_set_any_mpls_tc(struct match *match) { match->wc.masks.mpls_lse &= ~htonl(MPLS_TC_MASK); flow_set_mpls_tc(&match->flow, 0); } /* Modifies 'match' so that it matches only packets with an MPLS header whose * Traffic Class equals the low 3 bits of 'mpls_tc'. */ void match_set_mpls_tc(struct match *match, uint8_t mpls_tc) { match->wc.masks.mpls_lse |= htonl(MPLS_TC_MASK); flow_set_mpls_tc(&match->flow, mpls_tc); } /* Modifies 'match' so that the MPLS stack flag is wildcarded. */ void match_set_any_mpls_bos(struct match *match) { match->wc.masks.mpls_lse &= ~htonl(MPLS_BOS_MASK); flow_set_mpls_bos(&match->flow, 0); } /* Modifies 'match' so that it matches only packets with an MPLS header whose * Stack Flag equals the lower bit of 'mpls_bos' */ void match_set_mpls_bos(struct match *match, uint8_t mpls_bos) { match->wc.masks.mpls_lse |= htonl(MPLS_BOS_MASK); flow_set_mpls_bos(&match->flow, mpls_bos); } void match_set_tp_src(struct match *match, ovs_be16 tp_src) { match_set_tp_src_masked(match, tp_src, htons(UINT16_MAX)); } void match_set_tp_src_masked(struct match *match, ovs_be16 port, ovs_be16 mask) { match->flow.tp_src = port & mask; match->wc.masks.tp_src = mask; } void match_set_tp_dst(struct match *match, ovs_be16 tp_dst) { match_set_tp_dst_masked(match, tp_dst, htons(UINT16_MAX)); } void match_set_tp_dst_masked(struct match *match, ovs_be16 port, ovs_be16 mask) { match->flow.tp_dst = port & mask; match->wc.masks.tp_dst = mask; } void match_set_nw_proto(struct match *match, uint8_t nw_proto) { match->flow.nw_proto = nw_proto; match->wc.masks.nw_proto = UINT8_MAX; } void match_set_nw_src(struct match *match, ovs_be32 nw_src) { match->flow.nw_src = nw_src; match->wc.masks.nw_src = htonl(UINT32_MAX); } void match_set_nw_src_masked(struct match *match, ovs_be32 nw_src, ovs_be32 mask) { match->flow.nw_src = nw_src & mask; match->wc.masks.nw_src = mask; } void match_set_nw_dst(struct match *match, ovs_be32 nw_dst) { match->flow.nw_dst = nw_dst; match->wc.masks.nw_dst = htonl(UINT32_MAX); } void match_set_nw_dst_masked(struct match *match, ovs_be32 ip, ovs_be32 mask) { match->flow.nw_dst = ip & mask; match->wc.masks.nw_dst = mask; } void match_set_nw_dscp(struct match *match, uint8_t nw_dscp) { match->wc.masks.nw_tos |= IP_DSCP_MASK; match->flow.nw_tos &= ~IP_DSCP_MASK; match->flow.nw_tos |= nw_dscp & IP_DSCP_MASK; } void match_set_nw_ecn(struct match *match, uint8_t nw_ecn) { match->wc.masks.nw_tos |= IP_ECN_MASK; match->flow.nw_tos &= ~IP_ECN_MASK; match->flow.nw_tos |= nw_ecn & IP_ECN_MASK; } void match_set_nw_ttl(struct match *match, uint8_t nw_ttl) { match->wc.masks.nw_ttl = UINT8_MAX; match->flow.nw_ttl = nw_ttl; } void match_set_nw_frag(struct match *match, uint8_t nw_frag) { match->wc.masks.nw_frag |= FLOW_NW_FRAG_MASK; match->flow.nw_frag = nw_frag; } void match_set_nw_frag_masked(struct match *match, uint8_t nw_frag, uint8_t mask) { match->flow.nw_frag = nw_frag & mask; match->wc.masks.nw_frag = mask; } void match_set_icmp_type(struct match *match, uint8_t icmp_type) { match_set_tp_src(match, htons(icmp_type)); } void match_set_icmp_code(struct match *match, uint8_t icmp_code) { match_set_tp_dst(match, htons(icmp_code)); } void match_set_arp_sha(struct match *match, const uint8_t sha[ETH_ADDR_LEN]) { memcpy(match->flow.arp_sha, sha, ETH_ADDR_LEN); memset(match->wc.masks.arp_sha, UINT8_MAX, ETH_ADDR_LEN); } void match_set_arp_sha_masked(struct match *match, const uint8_t arp_sha[ETH_ADDR_LEN], const uint8_t mask[ETH_ADDR_LEN]) { set_eth_masked(arp_sha, mask, match->flow.arp_sha, match->wc.masks.arp_sha); } void match_set_arp_tha(struct match *match, const uint8_t tha[ETH_ADDR_LEN]) { memcpy(match->flow.arp_tha, tha, ETH_ADDR_LEN); memset(match->wc.masks.arp_tha, UINT8_MAX, ETH_ADDR_LEN); } void match_set_arp_tha_masked(struct match *match, const uint8_t arp_tha[ETH_ADDR_LEN], const uint8_t mask[ETH_ADDR_LEN]) { set_eth_masked(arp_tha, mask, match->flow.arp_tha, match->wc.masks.arp_tha); } void match_set_ipv6_src(struct match *match, const struct in6_addr *src) { match->flow.ipv6_src = *src; match->wc.masks.ipv6_src = in6addr_exact; } void match_set_ipv6_src_masked(struct match *match, const struct in6_addr *src, const struct in6_addr *mask) { match->flow.ipv6_src = ipv6_addr_bitand(src, mask); match->wc.masks.ipv6_src = *mask; } void match_set_ipv6_dst(struct match *match, const struct in6_addr *dst) { match->flow.ipv6_dst = *dst; match->wc.masks.ipv6_dst = in6addr_exact; } void match_set_ipv6_dst_masked(struct match *match, const struct in6_addr *dst, const struct in6_addr *mask) { match->flow.ipv6_dst = ipv6_addr_bitand(dst, mask); match->wc.masks.ipv6_dst = *mask; } void match_set_ipv6_label(struct match *match, ovs_be32 ipv6_label) { match->wc.masks.ipv6_label = htonl(UINT32_MAX); match->flow.ipv6_label = ipv6_label; } void match_set_ipv6_label_masked(struct match *match, ovs_be32 ipv6_label, ovs_be32 mask) { match->flow.ipv6_label = ipv6_label & mask; match->wc.masks.ipv6_label = mask; } void match_set_nd_target(struct match *match, const struct in6_addr *target) { match->flow.nd_target = *target; match->wc.masks.nd_target = in6addr_exact; } void match_set_nd_target_masked(struct match *match, const struct in6_addr *target, const struct in6_addr *mask) { match->flow.nd_target = ipv6_addr_bitand(target, mask); match->wc.masks.nd_target = *mask; } /* Returns true if 'a' and 'b' wildcard the same fields and have the same * values for fixed fields, otherwise false. */ bool match_equal(const struct match *a, const struct match *b) { return (flow_wildcards_equal(&a->wc, &b->wc) && flow_equal(&a->flow, &b->flow)); } /* Returns a hash value for the flow and wildcards in 'match', starting from * 'basis'. */ uint32_t match_hash(const struct match *match, uint32_t basis) { return flow_wildcards_hash(&match->wc, flow_hash(&match->flow, basis)); } static void format_eth_masked(struct ds *s, const char *name, const uint8_t eth[6], const uint8_t mask[6]) { if (!eth_addr_is_zero(mask)) { ds_put_format(s, "%s=", name); eth_format_masked(eth, mask, s); ds_put_char(s, ','); } } static void format_ip_netmask(struct ds *s, const char *name, ovs_be32 ip, ovs_be32 netmask) { if (netmask) { ds_put_format(s, "%s=", name); ip_format_masked(ip, netmask, s); ds_put_char(s, ','); } } static void format_ipv6_netmask(struct ds *s, const char *name, const struct in6_addr *addr, const struct in6_addr *netmask) { if (!ipv6_mask_is_any(netmask)) { ds_put_format(s, "%s=", name); print_ipv6_masked(s, addr, netmask); ds_put_char(s, ','); } } static void format_be16_masked(struct ds *s, const char *name, ovs_be16 value, ovs_be16 mask) { if (mask != htons(0)) { ds_put_format(s, "%s=", name); if (mask == htons(UINT16_MAX)) { ds_put_format(s, "%"PRIu16, ntohs(value)); } else { ds_put_format(s, "0x%"PRIx16"/0x%"PRIx16, ntohs(value), ntohs(mask)); } ds_put_char(s, ','); } } static void format_flow_tunnel(struct ds *s, const struct match *match) { const struct flow_wildcards *wc = &match->wc; const struct flow_tnl *tnl = &match->flow.tunnel; switch (wc->masks.tunnel.tun_id) { case 0: break; case CONSTANT_HTONLL(UINT64_MAX): ds_put_format(s, "tun_id=%#"PRIx64",", ntohll(tnl->tun_id)); break; default: ds_put_format(s, "tun_id=%#"PRIx64"/%#"PRIx64",", ntohll(tnl->tun_id), ntohll(wc->masks.tunnel.tun_id)); break; } format_ip_netmask(s, "tun_src", tnl->ip_src, wc->masks.tunnel.ip_src); format_ip_netmask(s, "tun_dst", tnl->ip_dst, wc->masks.tunnel.ip_dst); if (wc->masks.tunnel.ip_tos) { ds_put_format(s, "tun_tos=%"PRIx8",", tnl->ip_tos); } if (wc->masks.tunnel.ip_ttl) { ds_put_format(s, "tun_ttl=%"PRIu8",", tnl->ip_ttl); } if (wc->masks.tunnel.flags) { format_flags(s, flow_tun_flag_to_string, tnl->flags, '|'); ds_put_char(s, ','); } } /* Appends a string representation of 'match' to 's'. If 'priority' is * different from OFP_DEFAULT_PRIORITY, includes it in 's'. */ void match_format(const struct match *match, struct ds *s, unsigned int priority) { const struct flow_wildcards *wc = &match->wc; size_t start_len = s->length; const struct flow *f = &match->flow; bool skip_type = false; bool skip_proto = false; int i; BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20); if (priority != OFP_DEFAULT_PRIORITY) { ds_put_format(s, "priority=%u,", priority); } switch (wc->masks.pkt_mark) { case 0: break; case UINT32_MAX: ds_put_format(s, "pkt_mark=%#"PRIx32",", f->pkt_mark); break; default: ds_put_format(s, "pkt_mark=%#"PRIx32"/%#"PRIx32",", f->pkt_mark, wc->masks.pkt_mark); break; } if (wc->masks.skb_priority) { ds_put_format(s, "skb_priority=%#"PRIx32",", f->skb_priority); } if (wc->masks.dl_type) { skip_type = true; if (f->dl_type == htons(ETH_TYPE_IP)) { if (wc->masks.nw_proto) { skip_proto = true; if (f->nw_proto == IPPROTO_ICMP) { ds_put_cstr(s, "icmp,"); } else if (f->nw_proto == IPPROTO_TCP) { ds_put_cstr(s, "tcp,"); } else if (f->nw_proto == IPPROTO_UDP) { ds_put_cstr(s, "udp,"); } else if (f->nw_proto == IPPROTO_SCTP) { ds_put_cstr(s, "sctp,"); } else { ds_put_cstr(s, "ip,"); skip_proto = false; } } else { ds_put_cstr(s, "ip,"); } } else if (f->dl_type == htons(ETH_TYPE_IPV6)) { if (wc->masks.nw_proto) { skip_proto = true; if (f->nw_proto == IPPROTO_ICMPV6) { ds_put_cstr(s, "icmp6,"); } else if (f->nw_proto == IPPROTO_TCP) { ds_put_cstr(s, "tcp6,"); } else if (f->nw_proto == IPPROTO_UDP) { ds_put_cstr(s, "udp6,"); } else if (f->nw_proto == IPPROTO_SCTP) { ds_put_cstr(s, "sctp6,"); } else { ds_put_cstr(s, "ipv6,"); skip_proto = false; } } else { ds_put_cstr(s, "ipv6,"); } } else if (f->dl_type == htons(ETH_TYPE_ARP)) { ds_put_cstr(s, "arp,"); } else if (f->dl_type == htons(ETH_TYPE_RARP)) { ds_put_cstr(s, "rarp,"); } else if (f->dl_type == htons(ETH_TYPE_MPLS)) { ds_put_cstr(s, "mpls,"); } else if (f->dl_type == htons(ETH_TYPE_MPLS_MCAST)) { ds_put_cstr(s, "mplsm,"); } else { skip_type = false; } } for (i = 0; i < FLOW_N_REGS; i++) { switch (wc->masks.regs[i]) { case 0: break; case UINT32_MAX: ds_put_format(s, "reg%d=0x%"PRIx32",", i, f->regs[i]); break; default: ds_put_format(s, "reg%d=0x%"PRIx32"/0x%"PRIx32",", i, f->regs[i], wc->masks.regs[i]); break; } } format_flow_tunnel(s, match); switch (wc->masks.metadata) { case 0: break; case CONSTANT_HTONLL(UINT64_MAX): ds_put_format(s, "metadata=%#"PRIx64",", ntohll(f->metadata)); break; default: ds_put_format(s, "metadata=%#"PRIx64"/%#"PRIx64",", ntohll(f->metadata), ntohll(wc->masks.metadata)); break; } if (wc->masks.in_port.ofp_port) { ds_put_cstr(s, "in_port="); ofputil_format_port(f->in_port.ofp_port, s); ds_put_char(s, ','); } if (wc->masks.vlan_tci) { ovs_be16 vid_mask = wc->masks.vlan_tci & htons(VLAN_VID_MASK); ovs_be16 pcp_mask = wc->masks.vlan_tci & htons(VLAN_PCP_MASK); ovs_be16 cfi = wc->masks.vlan_tci & htons(VLAN_CFI); if (cfi && f->vlan_tci & htons(VLAN_CFI) && (!vid_mask || vid_mask == htons(VLAN_VID_MASK)) && (!pcp_mask || pcp_mask == htons(VLAN_PCP_MASK)) && (vid_mask || pcp_mask)) { if (vid_mask) { ds_put_format(s, "dl_vlan=%"PRIu16",", vlan_tci_to_vid(f->vlan_tci)); } if (pcp_mask) { ds_put_format(s, "dl_vlan_pcp=%d,", vlan_tci_to_pcp(f->vlan_tci)); } } else if (wc->masks.vlan_tci == htons(0xffff)) { ds_put_format(s, "vlan_tci=0x%04"PRIx16",", ntohs(f->vlan_tci)); } else { ds_put_format(s, "vlan_tci=0x%04"PRIx16"/0x%04"PRIx16",", ntohs(f->vlan_tci), ntohs(wc->masks.vlan_tci)); } } format_eth_masked(s, "dl_src", f->dl_src, wc->masks.dl_src); format_eth_masked(s, "dl_dst", f->dl_dst, wc->masks.dl_dst); if (!skip_type && wc->masks.dl_type) { ds_put_format(s, "dl_type=0x%04"PRIx16",", ntohs(f->dl_type)); } if (f->dl_type == htons(ETH_TYPE_IPV6)) { format_ipv6_netmask(s, "ipv6_src", &f->ipv6_src, &wc->masks.ipv6_src); format_ipv6_netmask(s, "ipv6_dst", &f->ipv6_dst, &wc->masks.ipv6_dst); if (wc->masks.ipv6_label) { if (wc->masks.ipv6_label == htonl(UINT32_MAX)) { ds_put_format(s, "ipv6_label=0x%05"PRIx32",", ntohl(f->ipv6_label)); } else { ds_put_format(s, "ipv6_label=0x%05"PRIx32"/0x%05"PRIx32",", ntohl(f->ipv6_label), ntohl(wc->masks.ipv6_label)); } } } else if (f->dl_type == htons(ETH_TYPE_ARP) || f->dl_type == htons(ETH_TYPE_RARP)) { format_ip_netmask(s, "arp_spa", f->nw_src, wc->masks.nw_src); format_ip_netmask(s, "arp_tpa", f->nw_dst, wc->masks.nw_dst); } else { format_ip_netmask(s, "nw_src", f->nw_src, wc->masks.nw_src); format_ip_netmask(s, "nw_dst", f->nw_dst, wc->masks.nw_dst); } if (!skip_proto && wc->masks.nw_proto) { if (f->dl_type == htons(ETH_TYPE_ARP) || f->dl_type == htons(ETH_TYPE_RARP)) { ds_put_format(s, "arp_op=%"PRIu8",", f->nw_proto); } else { ds_put_format(s, "nw_proto=%"PRIu8",", f->nw_proto); } } if (f->dl_type == htons(ETH_TYPE_ARP) || f->dl_type == htons(ETH_TYPE_RARP)) { format_eth_masked(s, "arp_sha", f->arp_sha, wc->masks.arp_sha); format_eth_masked(s, "arp_tha", f->arp_tha, wc->masks.arp_tha); } if (wc->masks.nw_tos & IP_DSCP_MASK) { ds_put_format(s, "nw_tos=%"PRIu8",", f->nw_tos & IP_DSCP_MASK); } if (wc->masks.nw_tos & IP_ECN_MASK) { ds_put_format(s, "nw_ecn=%"PRIu8",", f->nw_tos & IP_ECN_MASK); } if (wc->masks.nw_ttl) { ds_put_format(s, "nw_ttl=%"PRIu8",", f->nw_ttl); } if (wc->masks.mpls_lse & htonl(MPLS_LABEL_MASK)) { ds_put_format(s, "mpls_label=%"PRIu32",", mpls_lse_to_label(f->mpls_lse)); } if (wc->masks.mpls_lse & htonl(MPLS_TC_MASK)) { ds_put_format(s, "mpls_tc=%"PRIu8",", mpls_lse_to_tc(f->mpls_lse)); } if (wc->masks.mpls_lse & htonl(MPLS_TTL_MASK)) { ds_put_format(s, "mpls_ttl=%"PRIu8",", mpls_lse_to_ttl(f->mpls_lse)); } if (wc->masks.mpls_lse & htonl(MPLS_BOS_MASK)) { ds_put_format(s, "mpls_bos=%"PRIu8",", mpls_lse_to_bos(f->mpls_lse)); } switch (wc->masks.nw_frag) { case FLOW_NW_FRAG_ANY | FLOW_NW_FRAG_LATER: ds_put_format(s, "nw_frag=%s,", f->nw_frag & FLOW_NW_FRAG_ANY ? (f->nw_frag & FLOW_NW_FRAG_LATER ? "later" : "first") : (f->nw_frag & FLOW_NW_FRAG_LATER ? "" : "no")); break; case FLOW_NW_FRAG_ANY: ds_put_format(s, "nw_frag=%s,", f->nw_frag & FLOW_NW_FRAG_ANY ? "yes" : "no"); break; case FLOW_NW_FRAG_LATER: ds_put_format(s, "nw_frag=%s,", f->nw_frag & FLOW_NW_FRAG_LATER ? "later" : "not_later"); break; } if (f->dl_type == htons(ETH_TYPE_IP) && f->nw_proto == IPPROTO_ICMP) { format_be16_masked(s, "icmp_type", f->tp_src, wc->masks.tp_src); format_be16_masked(s, "icmp_code", f->tp_dst, wc->masks.tp_dst); } else if (f->dl_type == htons(ETH_TYPE_IPV6) && f->nw_proto == IPPROTO_ICMPV6) { format_be16_masked(s, "icmp_type", f->tp_src, wc->masks.tp_src); format_be16_masked(s, "icmp_code", f->tp_dst, wc->masks.tp_dst); format_ipv6_netmask(s, "nd_target", &f->nd_target, &wc->masks.nd_target); format_eth_masked(s, "nd_sll", f->arp_sha, wc->masks.arp_sha); format_eth_masked(s, "nd_tll", f->arp_tha, wc->masks.arp_tha); } else { format_be16_masked(s, "tp_src", f->tp_src, wc->masks.tp_src); format_be16_masked(s, "tp_dst", f->tp_dst, wc->masks.tp_dst); } if (s->length > start_len && ds_last(s) == ',') { s->length--; } } /* Converts 'match' to a string and returns the string. If 'priority' is * different from OFP_DEFAULT_PRIORITY, includes it in the string. The caller * must free the string (with free()). */ char * match_to_string(const struct match *match, unsigned int priority) { struct ds s = DS_EMPTY_INITIALIZER; match_format(match, &s, priority); return ds_steal_cstr(&s); } void match_print(const struct match *match) { char *s = match_to_string(match, OFP_DEFAULT_PRIORITY); puts(s); free(s); } /* Initializes 'dst' as a copy of 'src'. The caller must eventually free 'dst' * with minimatch_destroy(). */ void minimatch_init(struct minimatch *dst, const struct match *src) { miniflow_init(&dst->flow, &src->flow); minimask_init(&dst->mask, &src->wc); } /* Initializes 'dst' as a copy of 'src'. The caller must eventually free 'dst' * with minimatch_destroy(). */ void minimatch_clone(struct minimatch *dst, const struct minimatch *src) { miniflow_clone(&dst->flow, &src->flow); minimask_clone(&dst->mask, &src->mask); } /* Initializes 'dst' with the data in 'src', destroying 'src'. The caller must * eventually free 'dst' with minimatch_destroy(). */ void minimatch_move(struct minimatch *dst, struct minimatch *src) { miniflow_move(&dst->flow, &src->flow); minimask_move(&dst->mask, &src->mask); } /* Frees any memory owned by 'match'. Does not free the storage in which * 'match' itself resides; the caller is responsible for that. */ void minimatch_destroy(struct minimatch *match) { miniflow_destroy(&match->flow); minimask_destroy(&match->mask); } /* Initializes 'dst' as a copy of 'src'. */ void minimatch_expand(const struct minimatch *src, struct match *dst) { miniflow_expand(&src->flow, &dst->flow); minimask_expand(&src->mask, &dst->wc); } /* Returns true if 'a' and 'b' match the same packets, false otherwise. */ bool minimatch_equal(const struct minimatch *a, const struct minimatch *b) { return (miniflow_equal(&a->flow, &b->flow) && minimask_equal(&a->mask, &b->mask)); } /* Returns a hash value for 'match', given 'basis'. */ uint32_t minimatch_hash(const struct minimatch *match, uint32_t basis) { return miniflow_hash(&match->flow, minimask_hash(&match->mask, basis)); } /* Appends a string representation of 'match' to 's'. If 'priority' is * different from OFP_DEFAULT_PRIORITY, includes it in 's'. */ void minimatch_format(const struct minimatch *match, struct ds *s, unsigned int priority) { struct match megamatch; minimatch_expand(match, &megamatch); match_format(&megamatch, s, priority); } /* Converts 'match' to a string and returns the string. If 'priority' is * different from OFP_DEFAULT_PRIORITY, includes it in the string. The caller * must free the string (with free()). */ char * minimatch_to_string(const struct minimatch *match, unsigned int priority) { struct match megamatch; minimatch_expand(match, &megamatch); return match_to_string(&megamatch, priority); } openvswitch-2.0.1+git20140120/lib/match.h000066400000000000000000000170641226605124000174750ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef MATCH_H #define MATCH_H 1 #include "flow.h" struct ds; /* A flow classification match. * * Use one of the match_*() functions to initialize a "struct match". * * The match_*() functions below maintain the following important invariant. * If a bit or a field is wildcarded in 'wc', then the corresponding bit or * field in 'flow' is set to all-0-bits. (The match_zero_wildcarded_fields() * function can be used to restore this invariant after adding wildcards.) */ struct match { struct flow flow; struct flow_wildcards wc; }; void match_init(struct match *, const struct flow *, const struct flow_wildcards *); void match_wc_init(struct match *match, const struct flow *flow); void match_init_catchall(struct match *); void match_init_exact(struct match *, const struct flow *); void match_zero_wildcarded_fields(struct match *); void match_set_reg(struct match *, unsigned int reg_idx, uint32_t value); void match_set_reg_masked(struct match *, unsigned int reg_idx, uint32_t value, uint32_t mask); void match_set_metadata(struct match *, ovs_be64 metadata); void match_set_metadata_masked(struct match *, ovs_be64 metadata, ovs_be64 mask); void match_set_tun_id(struct match *, ovs_be64 tun_id); void match_set_tun_id_masked(struct match *, ovs_be64 tun_id, ovs_be64 mask); void match_set_tun_src(struct match *match, ovs_be32 src); void match_set_tun_src_masked(struct match *match, ovs_be32 src, ovs_be32 mask); void match_set_tun_dst(struct match *match, ovs_be32 dst); void match_set_tun_dst_masked(struct match *match, ovs_be32 dst, ovs_be32 mask); void match_set_tun_ttl(struct match *match, uint8_t ttl); void match_set_tun_ttl_masked(struct match *match, uint8_t ttl, uint8_t mask); void match_set_tun_tos(struct match *match, uint8_t tos); void match_set_tun_tos_masked(struct match *match, uint8_t tos, uint8_t mask); void match_set_tun_flags(struct match *match, uint16_t flags); void match_set_tun_flags_masked(struct match *match, uint16_t flags, uint16_t mask); void match_set_in_port(struct match *, ofp_port_t ofp_port); void match_set_pkt_mark(struct match *, uint32_t pkt_mark); void match_set_pkt_mark_masked(struct match *, uint32_t pkt_mark, uint32_t mask); void match_set_skb_priority(struct match *, uint32_t skb_priority); void match_set_dl_type(struct match *, ovs_be16); void match_set_dl_src(struct match *, const uint8_t[6]); void match_set_dl_src_masked(struct match *, const uint8_t dl_src[6], const uint8_t mask[6]); void match_set_dl_dst(struct match *, const uint8_t[6]); void match_set_dl_dst_masked(struct match *, const uint8_t dl_dst[6], const uint8_t mask[6]); void match_set_dl_tci(struct match *, ovs_be16 tci); void match_set_dl_tci_masked(struct match *, ovs_be16 tci, ovs_be16 mask); void match_set_any_vid(struct match *); void match_set_dl_vlan(struct match *, ovs_be16); void match_set_vlan_vid(struct match *, ovs_be16); void match_set_vlan_vid_masked(struct match *, ovs_be16 vid, ovs_be16 mask); void match_set_any_pcp(struct match *); void match_set_dl_vlan_pcp(struct match *, uint8_t); void match_set_any_mpls_label(struct match *); void match_set_mpls_label(struct match *, ovs_be32); void match_set_any_mpls_tc(struct match *); void match_set_mpls_tc(struct match *, uint8_t); void match_set_any_mpls_bos(struct match *); void match_set_mpls_bos(struct match *, uint8_t); void match_set_tp_src(struct match *, ovs_be16); void match_set_tp_src_masked(struct match *, ovs_be16 port, ovs_be16 mask); void match_set_tp_dst(struct match *, ovs_be16); void match_set_tp_dst_masked(struct match *, ovs_be16 port, ovs_be16 mask); void match_set_nw_proto(struct match *, uint8_t); void match_set_nw_src(struct match *, ovs_be32); void match_set_nw_src_masked(struct match *, ovs_be32 ip, ovs_be32 mask); void match_set_nw_dst(struct match *, ovs_be32); void match_set_nw_dst_masked(struct match *, ovs_be32 ip, ovs_be32 mask); void match_set_nw_dscp(struct match *, uint8_t); void match_set_nw_ecn(struct match *, uint8_t); void match_set_nw_ttl(struct match *, uint8_t); void match_set_nw_frag(struct match *, uint8_t nw_frag); void match_set_nw_frag_masked(struct match *, uint8_t nw_frag, uint8_t mask); void match_set_icmp_type(struct match *, uint8_t); void match_set_icmp_code(struct match *, uint8_t); void match_set_arp_sha(struct match *, const uint8_t[6]); void match_set_arp_sha_masked(struct match *, const uint8_t arp_sha[6], const uint8_t mask[6]); void match_set_arp_tha(struct match *, const uint8_t[6]); void match_set_arp_tha_masked(struct match *, const uint8_t arp_tha[6], const uint8_t mask[6]); void match_set_ipv6_src(struct match *, const struct in6_addr *); void match_set_ipv6_src_masked(struct match *, const struct in6_addr *, const struct in6_addr *); void match_set_ipv6_dst(struct match *, const struct in6_addr *); void match_set_ipv6_dst_masked(struct match *, const struct in6_addr *, const struct in6_addr *); void match_set_ipv6_label(struct match *, ovs_be32); void match_set_ipv6_label_masked(struct match *, ovs_be32, ovs_be32); void match_set_nd_target(struct match *, const struct in6_addr *); void match_set_nd_target_masked(struct match *, const struct in6_addr *, const struct in6_addr *); bool match_equal(const struct match *, const struct match *); uint32_t match_hash(const struct match *, uint32_t basis); void match_format(const struct match *, struct ds *, unsigned int priority); char *match_to_string(const struct match *, unsigned int priority); void match_print(const struct match *); /* Compressed match. */ /* A sparse representation of a "struct match". * * This has the same invariant as "struct match", that is, a 1-bit in the * 'flow' must correspond to a 1-bit in 'mask'. * * The invariants for the underlying miniflow and minimask are also maintained, * which means that 'flow' and 'mask' can have different 'map's. In * particular, if the match checks that a given 32-bit field has value 0, then * 'map' will have a 1-bit in 'mask' but a 0-bit in 'flow' for that field. */ struct minimatch { struct miniflow flow; struct minimask mask; }; void minimatch_init(struct minimatch *, const struct match *); void minimatch_clone(struct minimatch *, const struct minimatch *); void minimatch_move(struct minimatch *dst, struct minimatch *src); void minimatch_destroy(struct minimatch *); void minimatch_expand(const struct minimatch *, struct match *); bool minimatch_equal(const struct minimatch *a, const struct minimatch *b); uint32_t minimatch_hash(const struct minimatch *, uint32_t basis); void minimatch_format(const struct minimatch *, struct ds *, unsigned int priority); char *minimatch_to_string(const struct minimatch *, unsigned int priority); #endif /* match.h */ openvswitch-2.0.1+git20140120/lib/memory-unixctl.man000066400000000000000000000004011226605124000217040ustar00rootroot00000000000000.SS "MEMORY COMMANDS" These commands report memory usage. . .IP "\fBmemory/show\fR" Displays some basic statistics about \fB\*(PN\fR's memory usage. \fB\*(PN\fR also logs this information soon after startup and periodically as its memory consumption grows. openvswitch-2.0.1+git20140120/lib/memory.c000066400000000000000000000115241226605124000176770ustar00rootroot00000000000000/* * Copyright (c) 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "memory.h" #include #include #include #include "dynamic-string.h" #include "poll-loop.h" #include "simap.h" #include "timeval.h" #include "unixctl.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(memory); /* The number of milliseconds before the first report of daemon memory usage, * and the number of milliseconds between checks for daemon memory growth. */ #define MEMORY_CHECK_INTERVAL (10 * 1000) /* When we should next check memory usage and possibly trigger a report. */ static long long int next_check; /* The last time at which we reported memory usage, and the usage we reported * at that time. */ static long long int last_report; static unsigned long int last_reported_maxrss; /* Are we expecting a call to memory_report()? */ static bool want_report; /* Unixctl connections waiting for responses. */ static struct unixctl_conn **conns; static size_t n_conns; static void memory_init(void); /* Runs the memory monitor. * * The client should call memory_should_report() afterward. * * This function, and the remainder of this module's interface, should be * called from only a single thread. */ void memory_run(void) { struct rusage usage; long long int now; memory_init(); /* Time for a check? */ now = time_msec(); if (now < next_check) { return; } next_check = now + MEMORY_CHECK_INTERVAL; /* Time for a report? */ getrusage(RUSAGE_SELF, &usage); if (!last_reported_maxrss) { VLOG_INFO("%lu kB peak resident set size after %.1f seconds", (unsigned long int) usage.ru_maxrss, (now - time_boot_msec()) / 1000.0); } else if (usage.ru_maxrss >= last_reported_maxrss * 1.5) { VLOG_INFO("peak resident set size grew %.0f%% in last %.1f seconds, " "from %lu kB to %lu kB", ((double) usage.ru_maxrss / last_reported_maxrss - 1) * 100, (now - last_report) / 1000.0, last_reported_maxrss, (unsigned long int) usage.ru_maxrss); } else { return; } /* Request a report. */ want_report = true; last_report = now; last_reported_maxrss = usage.ru_maxrss; } /* Causes the poll loop to wake up if the memory monitor needs to run. */ void memory_wait(void) { if (memory_should_report()) { poll_immediate_wake(); } } /* Returns true if the caller should log some information about memory usage * (with memory_report()), false otherwise. */ bool memory_should_report(void) { return want_report || n_conns > 0; } static void compose_report(const struct simap *usage, struct ds *s) { const struct simap_node **nodes = simap_sort(usage); size_t n = simap_count(usage); size_t i; for (i = 0; i < n; i++) { const struct simap_node *node = nodes[i]; ds_put_format(s, "%s:%u ", node->name, node->data); } ds_chomp(s, ' '); free(nodes); } /* Logs the contents of 'usage', as a collection of name-count pairs. * * 'usage' should capture large-scale statistics that one might reasonably * expect to correlate with memory usage. For example, each OpenFlow flow * requires some memory, so ovs-vswitchd includes the total number of flows in * 'usage'. */ void memory_report(const struct simap *usage) { struct ds s; size_t i; ds_init(&s); compose_report(usage, &s); if (want_report) { if (s.length) { VLOG_INFO("%s", ds_cstr(&s)); } want_report = false; } if (n_conns) { for (i = 0; i < n_conns; i++) { unixctl_command_reply(conns[i], ds_cstr(&s)); } free(conns); conns = NULL; n_conns = 0; } ds_destroy(&s); } static void memory_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { conns = xrealloc(conns, (n_conns + 1) * sizeof *conns); conns[n_conns++] = conn; } static void memory_init(void) { static bool inited = false; if (!inited) { inited = true; unixctl_command_register("memory/show", "", 0, 0, memory_unixctl_show, NULL); next_check = time_boot_msec() + MEMORY_CHECK_INTERVAL; } } openvswitch-2.0.1+git20140120/lib/memory.h000066400000000000000000000031741226605124000177060ustar00rootroot00000000000000/* * Copyright (c) 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef MEMORY_H #define MEMORY_H 1 /* Memory usage monitor. * * This is intended to be called as part of a daemon's main loop. After some * time to allow the daemon to allocate an initial memory usage, it logs some * memory usage information (most of which must actually be provided by the * client). At intervals, if the daemon's memory usage has grown * significantly, it again logs information. * * The monitor also has a unixctl interface. * * Intended usage in the program's main loop is like this: * * for (;;) { * memory_run(); * if (memory_should_report()) { * struct simap usage; * * simap_init(&usage); * ...fill in 'usage' with meaningful statistics... * memory_report(&usage); * simap_destroy(&usage); * } * * ... * * memory_wait(); * poll_block(); * } */ #include struct simap; void memory_run(void); void memory_wait(void); bool memory_should_report(void); void memory_report(const struct simap *usage); #endif /* memory.h */ openvswitch-2.0.1+git20140120/lib/meta-flow.c000066400000000000000000002245211226605124000202650ustar00rootroot00000000000000/* * Copyright (c) 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "meta-flow.h" #include #include #include #include #include "classifier.h" #include "dynamic-string.h" #include "ofp-errors.h" #include "ofp-util.h" #include "ovs-thread.h" #include "packets.h" #include "random.h" #include "shash.h" #include "socket-util.h" #include "unaligned.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(meta_flow); #define MF_FIELD_SIZES(MEMBER) \ sizeof ((union mf_value *)0)->MEMBER, \ 8 * sizeof ((union mf_value *)0)->MEMBER static const struct mf_field mf_fields[MFF_N_IDS] = { /* ## -------- ## */ /* ## metadata ## */ /* ## -------- ## */ { MFF_TUN_ID, "tun_id", NULL, MF_FIELD_SIZES(be64), MFM_FULLY, MFS_HEXADECIMAL, MFP_NONE, true, NXM_NX_TUN_ID, "NXM_NX_TUN_ID", OXM_OF_TUNNEL_ID, "OXM_OF_TUNNEL_ID", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_TUN_SRC, "tun_src", NULL, MF_FIELD_SIZES(be32), MFM_FULLY, MFS_IPV4, MFP_NONE, true, NXM_NX_TUN_IPV4_SRC, "NXM_NX_TUN_IPV4_SRC", NXM_NX_TUN_IPV4_SRC, "NXM_NX_TUN_IPV4_SRC", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_TUN_DST, "tun_dst", NULL, MF_FIELD_SIZES(be32), MFM_FULLY, MFS_IPV4, MFP_NONE, true, NXM_NX_TUN_IPV4_DST, "NXM_NX_TUN_IPV4_DST", NXM_NX_TUN_IPV4_DST, "NXM_NX_TUN_IPV4_DST", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_TUN_FLAGS, "tun_flags", NULL, MF_FIELD_SIZES(be16), MFM_NONE, MFS_TNL_FLAGS, MFP_NONE, false, 0, NULL, 0, NULL, OFPUTIL_P_NONE, OFPUTIL_P_NONE, }, { MFF_TUN_TTL, "tun_ttl", NULL, MF_FIELD_SIZES(u8), MFM_NONE, MFS_DECIMAL, MFP_NONE, false, 0, NULL, 0, NULL, OFPUTIL_P_NONE, OFPUTIL_P_NONE, }, { MFF_TUN_TOS, "tun_tos", NULL, MF_FIELD_SIZES(u8), MFM_NONE, MFS_DECIMAL, MFP_NONE, false, 0, NULL, 0, NULL, OFPUTIL_P_NONE, OFPUTIL_P_NONE, }, { MFF_METADATA, "metadata", NULL, MF_FIELD_SIZES(be64), MFM_FULLY, MFS_HEXADECIMAL, MFP_NONE, true, OXM_OF_METADATA, "OXM_OF_METADATA", OXM_OF_METADATA, "OXM_OF_METADATA", OFPUTIL_P_NXM_OF11_UP, OFPUTIL_P_NXM_OF11_UP, }, { MFF_IN_PORT, "in_port", NULL, MF_FIELD_SIZES(be16), MFM_NONE, MFS_OFP_PORT, MFP_NONE, true, NXM_OF_IN_PORT, "NXM_OF_IN_PORT", NXM_OF_IN_PORT, "NXM_OF_IN_PORT", OFPUTIL_P_ANY, /* OF11+ via mapping to 32 bits. */ OFPUTIL_P_NONE, }, { MFF_IN_PORT_OXM, "in_port_oxm", NULL, MF_FIELD_SIZES(be32), MFM_NONE, MFS_OFP_PORT_OXM, MFP_NONE, true, OXM_OF_IN_PORT, "OXM_OF_IN_PORT", OXM_OF_IN_PORT, "OXM_OF_IN_PORT", OFPUTIL_P_OF11_UP, OFPUTIL_P_NONE, }, { MFF_SKB_PRIORITY, "skb_priority", NULL, MF_FIELD_SIZES(be32), MFM_NONE, MFS_HEXADECIMAL, MFP_NONE, false, 0, NULL, 0, NULL, OFPUTIL_P_NONE, OFPUTIL_P_NONE, }, { MFF_PKT_MARK, "pkt_mark", NULL, MF_FIELD_SIZES(be32), MFM_FULLY, MFS_HEXADECIMAL, MFP_NONE, true, NXM_NX_PKT_MARK, "NXM_NX_PKT_MARK", NXM_NX_PKT_MARK, "NXM_NX_PKT_MARK", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NXM_OXM_ANY, }, #define REGISTER(IDX) \ { \ MFF_REG##IDX, "reg" #IDX, NULL, \ MF_FIELD_SIZES(be32), \ MFM_FULLY, \ MFS_HEXADECIMAL, \ MFP_NONE, \ true, \ NXM_NX_REG(IDX), "NXM_NX_REG" #IDX, \ NXM_NX_REG(IDX), "NXM_NX_REG" #IDX, \ OFPUTIL_P_NXM_OXM_ANY, \ OFPUTIL_P_NXM_OXM_ANY, \ } #if FLOW_N_REGS > 0 REGISTER(0), #endif #if FLOW_N_REGS > 1 REGISTER(1), #endif #if FLOW_N_REGS > 2 REGISTER(2), #endif #if FLOW_N_REGS > 3 REGISTER(3), #endif #if FLOW_N_REGS > 4 REGISTER(4), #endif #if FLOW_N_REGS > 5 REGISTER(5), #endif #if FLOW_N_REGS > 6 REGISTER(6), #endif #if FLOW_N_REGS > 7 REGISTER(7), #endif #if FLOW_N_REGS > 8 #error #endif /* ## -- ## */ /* ## L2 ## */ /* ## -- ## */ { MFF_ETH_SRC, "eth_src", "dl_src", MF_FIELD_SIZES(mac), MFM_FULLY, MFS_ETHERNET, MFP_NONE, true, NXM_OF_ETH_SRC, "NXM_OF_ETH_SRC", OXM_OF_ETH_SRC, "OXM_OF_ETH_SRC", OFPUTIL_P_ANY, OFPUTIL_P_NXM_OF11_UP, /* Bitwise masking only with NXM and OF11+! */ }, { MFF_ETH_DST, "eth_dst", "dl_dst", MF_FIELD_SIZES(mac), MFM_FULLY, MFS_ETHERNET, MFP_NONE, true, NXM_OF_ETH_DST, "NXM_OF_ETH_DST", OXM_OF_ETH_DST, "OXM_OF_ETH_DST", OFPUTIL_P_ANY, OFPUTIL_P_NXM_OF11_UP, /* Bitwise masking only with NXM and OF11+! */ }, { MFF_ETH_TYPE, "eth_type", "dl_type", MF_FIELD_SIZES(be16), MFM_NONE, MFS_HEXADECIMAL, MFP_NONE, false, NXM_OF_ETH_TYPE, "NXM_OF_ETH_TYPE", OXM_OF_ETH_TYPE, "OXM_OF_ETH_TYPE", OFPUTIL_P_ANY, OFPUTIL_P_NONE, }, { MFF_VLAN_TCI, "vlan_tci", NULL, MF_FIELD_SIZES(be16), MFM_FULLY, MFS_HEXADECIMAL, MFP_NONE, true, NXM_OF_VLAN_TCI, "NXM_OF_VLAN_TCI", NXM_OF_VLAN_TCI, "NXM_OF_VLAN_TCI", OFPUTIL_P_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_DL_VLAN, "dl_vlan", NULL, sizeof(ovs_be16), 12, MFM_NONE, MFS_DECIMAL, MFP_NONE, true, 0, NULL, 0, NULL, OFPUTIL_P_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_VLAN_VID, "vlan_vid", NULL, sizeof(ovs_be16), 12, MFM_FULLY, MFS_DECIMAL, MFP_NONE, true, OXM_OF_VLAN_VID, "OXM_OF_VLAN_VID", OXM_OF_VLAN_VID, "OXM_OF_VLAN_VID", OFPUTIL_P_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_DL_VLAN_PCP, "dl_vlan_pcp", NULL, 1, 3, MFM_NONE, MFS_DECIMAL, MFP_NONE, true, 0, NULL, 0, NULL, OFPUTIL_P_ANY, /* Will be mapped to NXM and OXM. */ OFPUTIL_P_NONE, }, { MFF_VLAN_PCP, "vlan_pcp", NULL, 1, 3, MFM_NONE, MFS_DECIMAL, MFP_VLAN_VID, true, OXM_OF_VLAN_PCP, "OXM_OF_VLAN_PCP", OXM_OF_VLAN_PCP, "OXM_OF_VLAN_PCP", OFPUTIL_P_ANY, /* Will be mapped to OF10 and NXM. */ OFPUTIL_P_NONE, }, /* ## ---- ## */ /* ## L2.5 ## */ /* ## ---- ## */ { MFF_MPLS_LABEL, "mpls_label", NULL, 4, 20, MFM_NONE, MFS_DECIMAL, MFP_MPLS, true, OXM_OF_MPLS_LABEL, "OXM_OF_MPLS_LABEL", OXM_OF_MPLS_LABEL, "OXM_OF_MPLS_LABEL", OFPUTIL_P_NXM_OF11_UP, OFPUTIL_P_NONE, }, { MFF_MPLS_TC, "mpls_tc", NULL, 1, 3, MFM_NONE, MFS_DECIMAL, MFP_MPLS, true, OXM_OF_MPLS_TC, "OXM_OF_MPLS_TC", OXM_OF_MPLS_TC, "OXM_OF_MPLS_TC", OFPUTIL_P_NXM_OF11_UP, OFPUTIL_P_NONE, }, { MFF_MPLS_BOS, "mpls_bos", NULL, 1, 1, MFM_NONE, MFS_DECIMAL, MFP_MPLS, false, OXM_OF_MPLS_BOS, "OXM_OF_MPLS_BOS", OXM_OF_MPLS_BOS, "OXM_OF_MPLS_BOS", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NONE, }, /* ## -- ## */ /* ## L3 ## */ /* ## -- ## */ { MFF_IPV4_SRC, "ip_src", "nw_src", MF_FIELD_SIZES(be32), MFM_FULLY, MFS_IPV4, MFP_IPV4, true, NXM_OF_IP_SRC, "NXM_OF_IP_SRC", OXM_OF_IPV4_SRC, "OXM_OF_IPV4_SRC", OFPUTIL_P_ANY, OFPUTIL_P_NXM_OF11_UP, }, { MFF_IPV4_DST, "ip_dst", "nw_dst", MF_FIELD_SIZES(be32), MFM_FULLY, MFS_IPV4, MFP_IPV4, true, NXM_OF_IP_DST, "NXM_OF_IP_DST", OXM_OF_IPV4_DST, "OXM_OF_IPV4_DST", OFPUTIL_P_ANY, OFPUTIL_P_NXM_OF11_UP, }, { MFF_IPV6_SRC, "ipv6_src", NULL, MF_FIELD_SIZES(ipv6), MFM_FULLY, MFS_IPV6, MFP_IPV6, true, NXM_NX_IPV6_SRC, "NXM_NX_IPV6_SRC", OXM_OF_IPV6_SRC, "OXM_OF_IPV6_SRC", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_IPV6_DST, "ipv6_dst", NULL, MF_FIELD_SIZES(ipv6), MFM_FULLY, MFS_IPV6, MFP_IPV6, true, NXM_NX_IPV6_DST, "NXM_NX_IPV6_DST", OXM_OF_IPV6_DST, "OXM_OF_IPV6_DST", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_IPV6_LABEL, "ipv6_label", NULL, 4, 20, MFM_FULLY, MFS_HEXADECIMAL, MFP_IPV6, false, NXM_NX_IPV6_LABEL, "NXM_NX_IPV6_LABEL", OXM_OF_IPV6_FLABEL, "OXM_OF_IPV6_FLABEL", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_IP_PROTO, "nw_proto", NULL, MF_FIELD_SIZES(u8), MFM_NONE, MFS_DECIMAL, MFP_IP_ANY, false, NXM_OF_IP_PROTO, "NXM_OF_IP_PROTO", OXM_OF_IP_PROTO, "OXM_OF_IP_PROTO", OFPUTIL_P_ANY, OFPUTIL_P_NONE, }, { MFF_IP_DSCP, "nw_tos", NULL, MF_FIELD_SIZES(u8), MFM_NONE, MFS_DECIMAL, MFP_IP_ANY, true, NXM_OF_IP_TOS, "NXM_OF_IP_TOS", NXM_OF_IP_TOS, "NXM_OF_IP_TOS", OFPUTIL_P_ANY, /* Will be shifted for OXM. */ OFPUTIL_P_NONE, }, { MFF_IP_DSCP_SHIFTED, "nw_tos_shifted", NULL, MF_FIELD_SIZES(u8), MFM_NONE, MFS_DECIMAL, MFP_IP_ANY, true, OXM_OF_IP_DSCP, "OXM_OF_IP_DSCP", OXM_OF_IP_DSCP, "OXM_OF_IP_DSCP", OFPUTIL_P_ANY, /* Will be shifted for non-OXM. */ OFPUTIL_P_NONE, }, { MFF_IP_ECN, "nw_ecn", NULL, 1, 2, MFM_NONE, MFS_DECIMAL, MFP_IP_ANY, true, NXM_NX_IP_ECN, "NXM_NX_IP_ECN", OXM_OF_IP_ECN, "OXM_OF_IP_ECN", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NONE, }, { MFF_IP_TTL, "nw_ttl", NULL, MF_FIELD_SIZES(u8), MFM_NONE, MFS_DECIMAL, MFP_IP_ANY, true, NXM_NX_IP_TTL, "NXM_NX_IP_TTL", NXM_NX_IP_TTL, "NXM_NX_IP_TTL", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NONE, }, { MFF_IP_FRAG, "ip_frag", NULL, 1, 2, MFM_FULLY, MFS_FRAG, MFP_IP_ANY, false, NXM_NX_IP_FRAG, "NXM_NX_IP_FRAG", NXM_NX_IP_FRAG, "NXM_NX_IP_FRAG", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_ARP_OP, "arp_op", NULL, MF_FIELD_SIZES(be16), MFM_NONE, MFS_DECIMAL, MFP_ARP, false, NXM_OF_ARP_OP, "NXM_OF_ARP_OP", OXM_OF_ARP_OP, "OXM_OF_ARP_OP", OFPUTIL_P_ANY, OFPUTIL_P_NONE, }, { MFF_ARP_SPA, "arp_spa", NULL, MF_FIELD_SIZES(be32), MFM_FULLY, MFS_IPV4, MFP_ARP, false, NXM_OF_ARP_SPA, "NXM_OF_ARP_SPA", OXM_OF_ARP_SPA, "OXM_OF_ARP_SPA", OFPUTIL_P_ANY, OFPUTIL_P_NXM_OF11_UP, }, { MFF_ARP_TPA, "arp_tpa", NULL, MF_FIELD_SIZES(be32), MFM_FULLY, MFS_IPV4, MFP_ARP, false, NXM_OF_ARP_TPA, "NXM_OF_ARP_TPA", OXM_OF_ARP_TPA, "OXM_OF_ARP_TPA", OFPUTIL_P_ANY, OFPUTIL_P_NXM_OF11_UP, }, { MFF_ARP_SHA, "arp_sha", NULL, MF_FIELD_SIZES(mac), MFM_FULLY, MFS_ETHERNET, MFP_ARP, false, NXM_NX_ARP_SHA, "NXM_NX_ARP_SHA", OXM_OF_ARP_SHA, "OXM_OF_ARP_SHA", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_ARP_THA, "arp_tha", NULL, MF_FIELD_SIZES(mac), MFM_FULLY, MFS_ETHERNET, MFP_ARP, false, NXM_NX_ARP_THA, "NXM_NX_ARP_THA", OXM_OF_ARP_THA, "OXM_OF_ARP_THA", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NXM_OXM_ANY, }, /* ## -- ## */ /* ## L4 ## */ /* ## -- ## */ { MFF_TCP_SRC, "tcp_src", "tp_src", MF_FIELD_SIZES(be16), MFM_FULLY, MFS_DECIMAL, MFP_TCP, true, NXM_OF_TCP_SRC, "NXM_OF_TCP_SRC", OXM_OF_TCP_SRC, "OXM_OF_TCP_SRC", OFPUTIL_P_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_TCP_DST, "tcp_dst", "tp_dst", MF_FIELD_SIZES(be16), MFM_FULLY, MFS_DECIMAL, MFP_TCP, true, NXM_OF_TCP_DST, "NXM_OF_TCP_DST", OXM_OF_TCP_DST, "OXM_OF_TCP_DST", OFPUTIL_P_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_UDP_SRC, "udp_src", NULL, MF_FIELD_SIZES(be16), MFM_FULLY, MFS_DECIMAL, MFP_UDP, true, NXM_OF_UDP_SRC, "NXM_OF_UDP_SRC", OXM_OF_UDP_SRC, "OXM_OF_UDP_SRC", OFPUTIL_P_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_UDP_DST, "udp_dst", NULL, MF_FIELD_SIZES(be16), MFM_FULLY, MFS_DECIMAL, MFP_UDP, true, NXM_OF_UDP_DST, "NXM_OF_UDP_DST", OXM_OF_UDP_DST, "OXM_OF_UDP_DST", OFPUTIL_P_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_SCTP_SRC, "sctp_src", NULL, MF_FIELD_SIZES(be16), MFM_FULLY, MFS_DECIMAL, MFP_SCTP, true, OXM_OF_SCTP_SRC, "OXM_OF_SCTP_SRC", OXM_OF_SCTP_SRC, "OXM_OF_SCTP_SRC", OFPUTIL_P_NXM_OF11_UP, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_SCTP_DST, "sctp_dst", NULL, MF_FIELD_SIZES(be16), MFM_FULLY, MFS_DECIMAL, MFP_SCTP, true, OXM_OF_SCTP_DST, "OXM_OF_SCTP_DST", OXM_OF_SCTP_DST, "OXM_OF_SCTP_DST", OFPUTIL_P_NXM_OF11_UP, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_ICMPV4_TYPE, "icmp_type", NULL, MF_FIELD_SIZES(u8), MFM_NONE, MFS_DECIMAL, MFP_ICMPV4, false, NXM_OF_ICMP_TYPE, "NXM_OF_ICMP_TYPE", OXM_OF_ICMPV4_TYPE, "OXM_OF_ICMPV4_TYPE", OFPUTIL_P_ANY, OFPUTIL_P_NONE, }, { MFF_ICMPV4_CODE, "icmp_code", NULL, MF_FIELD_SIZES(u8), MFM_NONE, MFS_DECIMAL, MFP_ICMPV4, false, NXM_OF_ICMP_CODE, "NXM_OF_ICMP_CODE", OXM_OF_ICMPV4_CODE, "OXM_OF_ICMPV4_CODE", OFPUTIL_P_ANY, OFPUTIL_P_NONE, }, { MFF_ICMPV6_TYPE, "icmpv6_type", NULL, MF_FIELD_SIZES(u8), MFM_NONE, MFS_DECIMAL, MFP_ICMPV6, false, NXM_NX_ICMPV6_TYPE, "NXM_NX_ICMPV6_TYPE", OXM_OF_ICMPV6_TYPE, "OXM_OF_ICMPV6_TYPE", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NONE, }, { MFF_ICMPV6_CODE, "icmpv6_code", NULL, MF_FIELD_SIZES(u8), MFM_NONE, MFS_DECIMAL, MFP_ICMPV6, false, NXM_NX_ICMPV6_CODE, "NXM_NX_ICMPV6_CODE", OXM_OF_ICMPV6_CODE, "OXM_OF_ICMPV6_CODE", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NONE, }, /* ## ---- ## */ /* ## L"5" ## */ /* ## ---- ## */ { MFF_ND_TARGET, "nd_target", NULL, MF_FIELD_SIZES(ipv6), MFM_FULLY, MFS_IPV6, MFP_ND, false, NXM_NX_ND_TARGET, "NXM_NX_ND_TARGET", OXM_OF_IPV6_ND_TARGET, "OXM_OF_IPV6_ND_TARGET", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_ND_SLL, "nd_sll", NULL, MF_FIELD_SIZES(mac), MFM_FULLY, MFS_ETHERNET, MFP_ND_SOLICIT, false, NXM_NX_ND_SLL, "NXM_NX_ND_SLL", OXM_OF_IPV6_ND_SLL, "OXM_OF_IPV6_ND_SLL", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NXM_OXM_ANY, }, { MFF_ND_TLL, "nd_tll", NULL, MF_FIELD_SIZES(mac), MFM_FULLY, MFS_ETHERNET, MFP_ND_ADVERT, false, NXM_NX_ND_TLL, "NXM_NX_ND_TLL", OXM_OF_IPV6_ND_TLL, "OXM_OF_IPV6_ND_TLL", OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NXM_OXM_ANY, } }; /* Maps an NXM or OXM header value to an mf_field. */ struct nxm_field { struct hmap_node hmap_node; /* In 'all_fields' hmap. */ uint32_t header; /* NXM or OXM header value. */ const struct mf_field *mf; }; /* Contains 'struct nxm_field's. */ static struct hmap all_fields; /* Maps from an mf_field's 'name' or 'extra_name' to the mf_field. */ static struct shash mf_by_name; /* Rate limit for parse errors. These always indicate a bug in an OpenFlow * controller and so there's not much point in showing a lot of them. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); const struct mf_field *mf_from_nxm_header__(uint32_t header); static void nxm_init(void); /* Returns the field with the given 'id'. */ const struct mf_field * mf_from_id(enum mf_field_id id) { ovs_assert((unsigned int) id < MFF_N_IDS); return &mf_fields[id]; } /* Returns the field with the given 'name', or a null pointer if no field has * that name. */ const struct mf_field * mf_from_name(const char *name) { nxm_init(); return shash_find_data(&mf_by_name, name); } static void add_nxm_field(uint32_t header, const struct mf_field *mf) { struct nxm_field *f; f = xmalloc(sizeof *f); hmap_insert(&all_fields, &f->hmap_node, hash_int(header, 0)); f->header = header; f->mf = mf; } static void nxm_init_add_field(const struct mf_field *mf, uint32_t header) { if (header) { ovs_assert(!mf_from_nxm_header__(header)); add_nxm_field(header, mf); if (mf->maskable != MFM_NONE) { add_nxm_field(NXM_MAKE_WILD_HEADER(header), mf); } } } static void nxm_do_init(void) { int i; hmap_init(&all_fields); shash_init(&mf_by_name); for (i = 0; i < MFF_N_IDS; i++) { const struct mf_field *mf = &mf_fields[i]; ovs_assert(mf->id == i); /* Fields must be in the enum order. */ nxm_init_add_field(mf, mf->nxm_header); if (mf->oxm_header != mf->nxm_header) { nxm_init_add_field(mf, mf->oxm_header); } shash_add_once(&mf_by_name, mf->name, mf); if (mf->extra_name) { shash_add_once(&mf_by_name, mf->extra_name, mf); } } } static void nxm_init(void) { static pthread_once_t once = PTHREAD_ONCE_INIT; pthread_once(&once, nxm_do_init); } const struct mf_field * mf_from_nxm_header(uint32_t header) { nxm_init(); return mf_from_nxm_header__(header); } const struct mf_field * mf_from_nxm_header__(uint32_t header) { const struct nxm_field *f; HMAP_FOR_EACH_IN_BUCKET (f, hmap_node, hash_int(header, 0), &all_fields) { if (f->header == header) { return f->mf; } } return NULL; } /* Returns true if 'wc' wildcards all the bits in field 'mf', false if 'wc' * specifies at least one bit in the field. * * The caller is responsible for ensuring that 'wc' corresponds to a flow that * meets 'mf''s prerequisites. */ bool mf_is_all_wild(const struct mf_field *mf, const struct flow_wildcards *wc) { switch (mf->id) { case MFF_TUN_SRC: return !wc->masks.tunnel.ip_src; case MFF_TUN_DST: return !wc->masks.tunnel.ip_dst; case MFF_TUN_ID: case MFF_TUN_TOS: case MFF_TUN_TTL: case MFF_TUN_FLAGS: return !wc->masks.tunnel.tun_id; case MFF_METADATA: return !wc->masks.metadata; case MFF_IN_PORT: case MFF_IN_PORT_OXM: return !wc->masks.in_port.ofp_port; case MFF_SKB_PRIORITY: return !wc->masks.skb_priority; case MFF_PKT_MARK: return !wc->masks.pkt_mark; CASE_MFF_REGS: return !wc->masks.regs[mf->id - MFF_REG0]; case MFF_ETH_SRC: return eth_addr_is_zero(wc->masks.dl_src); case MFF_ETH_DST: return eth_addr_is_zero(wc->masks.dl_dst); case MFF_ETH_TYPE: return !wc->masks.dl_type; case MFF_ARP_SHA: case MFF_ND_SLL: return eth_addr_is_zero(wc->masks.arp_sha); case MFF_ARP_THA: case MFF_ND_TLL: return eth_addr_is_zero(wc->masks.arp_tha); case MFF_VLAN_TCI: return !wc->masks.vlan_tci; case MFF_DL_VLAN: return !(wc->masks.vlan_tci & htons(VLAN_VID_MASK)); case MFF_VLAN_VID: return !(wc->masks.vlan_tci & htons(VLAN_VID_MASK | VLAN_CFI)); case MFF_DL_VLAN_PCP: case MFF_VLAN_PCP: return !(wc->masks.vlan_tci & htons(VLAN_PCP_MASK)); case MFF_MPLS_LABEL: return !(wc->masks.mpls_lse & htonl(MPLS_LABEL_MASK)); case MFF_MPLS_TC: return !(wc->masks.mpls_lse & htonl(MPLS_TC_MASK)); case MFF_MPLS_BOS: return !(wc->masks.mpls_lse & htonl(MPLS_BOS_MASK)); case MFF_IPV4_SRC: return !wc->masks.nw_src; case MFF_IPV4_DST: return !wc->masks.nw_dst; case MFF_IPV6_SRC: return ipv6_mask_is_any(&wc->masks.ipv6_src); case MFF_IPV6_DST: return ipv6_mask_is_any(&wc->masks.ipv6_dst); case MFF_IPV6_LABEL: return !wc->masks.ipv6_label; case MFF_IP_PROTO: return !wc->masks.nw_proto; case MFF_IP_DSCP: case MFF_IP_DSCP_SHIFTED: return !(wc->masks.nw_tos & IP_DSCP_MASK); case MFF_IP_ECN: return !(wc->masks.nw_tos & IP_ECN_MASK); case MFF_IP_TTL: return !wc->masks.nw_ttl; case MFF_ND_TARGET: return ipv6_mask_is_any(&wc->masks.nd_target); case MFF_IP_FRAG: return !(wc->masks.nw_frag & FLOW_NW_FRAG_MASK); case MFF_ARP_OP: return !wc->masks.nw_proto; case MFF_ARP_SPA: return !wc->masks.nw_src; case MFF_ARP_TPA: return !wc->masks.nw_dst; case MFF_TCP_SRC: case MFF_UDP_SRC: case MFF_SCTP_SRC: case MFF_ICMPV4_TYPE: case MFF_ICMPV6_TYPE: return !wc->masks.tp_src; case MFF_TCP_DST: case MFF_UDP_DST: case MFF_SCTP_DST: case MFF_ICMPV4_CODE: case MFF_ICMPV6_CODE: return !wc->masks.tp_dst; case MFF_N_IDS: default: NOT_REACHED(); } } /* Initializes 'mask' with the wildcard bit pattern for field 'mf' within 'wc'. * Each bit in 'mask' will be set to 1 if the bit is significant for matching * purposes, or to 0 if it is wildcarded. * * The caller is responsible for ensuring that 'wc' corresponds to a flow that * meets 'mf''s prerequisites. */ void mf_get_mask(const struct mf_field *mf, const struct flow_wildcards *wc, union mf_value *mask) { mf_get_value(mf, &wc->masks, mask); } /* Tests whether 'mask' is a valid wildcard bit pattern for 'mf'. Returns true * if the mask is valid, false otherwise. */ bool mf_is_mask_valid(const struct mf_field *mf, const union mf_value *mask) { switch (mf->maskable) { case MFM_NONE: return (is_all_zeros((const uint8_t *) mask, mf->n_bytes) || is_all_ones((const uint8_t *) mask, mf->n_bytes)); case MFM_FULLY: return true; } NOT_REACHED(); } static bool is_icmpv4(const struct flow *flow) { return (flow->dl_type == htons(ETH_TYPE_IP) && flow->nw_proto == IPPROTO_ICMP); } static bool is_icmpv6(const struct flow *flow) { return (flow->dl_type == htons(ETH_TYPE_IPV6) && flow->nw_proto == IPPROTO_ICMPV6); } /* Returns true if 'flow' meets the prerequisites for 'mf', false otherwise. */ bool mf_are_prereqs_ok(const struct mf_field *mf, const struct flow *flow) { switch (mf->prereqs) { case MFP_NONE: return true; case MFP_ARP: return (flow->dl_type == htons(ETH_TYPE_ARP) || flow->dl_type == htons(ETH_TYPE_RARP)); case MFP_IPV4: return flow->dl_type == htons(ETH_TYPE_IP); case MFP_IPV6: return flow->dl_type == htons(ETH_TYPE_IPV6); case MFP_VLAN_VID: return (flow->vlan_tci & htons(VLAN_CFI)) != 0; case MFP_MPLS: return eth_type_mpls(flow->dl_type); case MFP_IP_ANY: return is_ip_any(flow); case MFP_TCP: return is_ip_any(flow) && flow->nw_proto == IPPROTO_TCP; case MFP_UDP: return is_ip_any(flow) && flow->nw_proto == IPPROTO_UDP; case MFP_SCTP: return is_ip_any(flow) && flow->nw_proto == IPPROTO_SCTP; case MFP_ICMPV4: return is_icmpv4(flow); case MFP_ICMPV6: return is_icmpv6(flow); case MFP_ND: return (is_icmpv6(flow) && flow->tp_dst == htons(0) && (flow->tp_src == htons(ND_NEIGHBOR_SOLICIT) || flow->tp_src == htons(ND_NEIGHBOR_ADVERT))); case MFP_ND_SOLICIT: return (is_icmpv6(flow) && flow->tp_dst == htons(0) && (flow->tp_src == htons(ND_NEIGHBOR_SOLICIT))); case MFP_ND_ADVERT: return (is_icmpv6(flow) && flow->tp_dst == htons(0) && (flow->tp_src == htons(ND_NEIGHBOR_ADVERT))); } NOT_REACHED(); } /* Returns true if 'value' may be a valid value *as part of a masked match*, * false otherwise. * * A value is not rejected just because it is not valid for the field in * question, but only if it doesn't make sense to test the bits in question at * all. For example, the MFF_VLAN_TCI field will never have a nonzero value * without the VLAN_CFI bit being set, but we can't reject those values because * it is still legitimate to test just for those bits (see the documentation * for NXM_OF_VLAN_TCI in nicira-ext.h). On the other hand, there is never a * reason to set the low bit of MFF_IP_DSCP to 1, so we reject that. */ bool mf_is_value_valid(const struct mf_field *mf, const union mf_value *value) { switch (mf->id) { case MFF_TUN_ID: case MFF_TUN_SRC: case MFF_TUN_DST: case MFF_TUN_TOS: case MFF_TUN_TTL: case MFF_TUN_FLAGS: case MFF_METADATA: case MFF_IN_PORT: case MFF_SKB_PRIORITY: case MFF_PKT_MARK: CASE_MFF_REGS: case MFF_ETH_SRC: case MFF_ETH_DST: case MFF_ETH_TYPE: case MFF_VLAN_TCI: case MFF_IPV4_SRC: case MFF_IPV4_DST: case MFF_IPV6_SRC: case MFF_IPV6_DST: case MFF_IP_PROTO: case MFF_IP_TTL: case MFF_ARP_SPA: case MFF_ARP_TPA: case MFF_ARP_SHA: case MFF_ARP_THA: case MFF_TCP_SRC: case MFF_TCP_DST: case MFF_UDP_SRC: case MFF_UDP_DST: case MFF_SCTP_SRC: case MFF_SCTP_DST: case MFF_ICMPV4_TYPE: case MFF_ICMPV4_CODE: case MFF_ICMPV6_TYPE: case MFF_ICMPV6_CODE: case MFF_ND_TARGET: case MFF_ND_SLL: case MFF_ND_TLL: return true; case MFF_IN_PORT_OXM: { ofp_port_t port; return !ofputil_port_from_ofp11(value->be32, &port); } case MFF_IP_DSCP: return !(value->u8 & ~IP_DSCP_MASK); case MFF_IP_DSCP_SHIFTED: return !(value->u8 & (~IP_DSCP_MASK >> 2)); case MFF_IP_ECN: return !(value->u8 & ~IP_ECN_MASK); case MFF_IP_FRAG: return !(value->u8 & ~FLOW_NW_FRAG_MASK); case MFF_ARP_OP: return !(value->be16 & htons(0xff00)); case MFF_DL_VLAN: return !(value->be16 & htons(VLAN_CFI | VLAN_PCP_MASK)); case MFF_VLAN_VID: return !(value->be16 & htons(VLAN_PCP_MASK)); case MFF_DL_VLAN_PCP: case MFF_VLAN_PCP: return !(value->u8 & ~(VLAN_PCP_MASK >> VLAN_PCP_SHIFT)); case MFF_IPV6_LABEL: return !(value->be32 & ~htonl(IPV6_LABEL_MASK)); case MFF_MPLS_LABEL: return !(value->be32 & ~htonl(MPLS_LABEL_MASK >> MPLS_LABEL_SHIFT)); case MFF_MPLS_TC: return !(value->u8 & ~(MPLS_TC_MASK >> MPLS_TC_SHIFT)); case MFF_MPLS_BOS: return !(value->u8 & ~(MPLS_BOS_MASK >> MPLS_BOS_SHIFT)); case MFF_N_IDS: default: NOT_REACHED(); } } /* Copies the value of field 'mf' from 'flow' into 'value'. The caller is * responsible for ensuring that 'flow' meets 'mf''s prerequisites. */ void mf_get_value(const struct mf_field *mf, const struct flow *flow, union mf_value *value) { switch (mf->id) { case MFF_TUN_ID: value->be64 = flow->tunnel.tun_id; break; case MFF_TUN_SRC: value->be32 = flow->tunnel.ip_src; break; case MFF_TUN_DST: value->be32 = flow->tunnel.ip_dst; break; case MFF_TUN_FLAGS: value->be16 = htons(flow->tunnel.flags); break; case MFF_TUN_TTL: value->u8 = flow->tunnel.ip_ttl; break; case MFF_TUN_TOS: value->u8 = flow->tunnel.ip_tos; break; case MFF_METADATA: value->be64 = flow->metadata; break; case MFF_IN_PORT: value->be16 = htons(ofp_to_u16(flow->in_port.ofp_port)); break; case MFF_IN_PORT_OXM: value->be32 = ofputil_port_to_ofp11(flow->in_port.ofp_port); break; case MFF_SKB_PRIORITY: value->be32 = htonl(flow->skb_priority); break; case MFF_PKT_MARK: value->be32 = htonl(flow->pkt_mark); break; CASE_MFF_REGS: value->be32 = htonl(flow->regs[mf->id - MFF_REG0]); break; case MFF_ETH_SRC: memcpy(value->mac, flow->dl_src, ETH_ADDR_LEN); break; case MFF_ETH_DST: memcpy(value->mac, flow->dl_dst, ETH_ADDR_LEN); break; case MFF_ETH_TYPE: value->be16 = flow->dl_type; break; case MFF_VLAN_TCI: value->be16 = flow->vlan_tci; break; case MFF_DL_VLAN: value->be16 = flow->vlan_tci & htons(VLAN_VID_MASK); break; case MFF_VLAN_VID: value->be16 = flow->vlan_tci & htons(VLAN_VID_MASK | VLAN_CFI); break; case MFF_DL_VLAN_PCP: case MFF_VLAN_PCP: value->u8 = vlan_tci_to_pcp(flow->vlan_tci); break; case MFF_MPLS_LABEL: value->be32 = htonl(mpls_lse_to_label(flow->mpls_lse)); break; case MFF_MPLS_TC: value->u8 = mpls_lse_to_tc(flow->mpls_lse); break; case MFF_MPLS_BOS: value->u8 = mpls_lse_to_bos(flow->mpls_lse); break; case MFF_IPV4_SRC: value->be32 = flow->nw_src; break; case MFF_IPV4_DST: value->be32 = flow->nw_dst; break; case MFF_IPV6_SRC: value->ipv6 = flow->ipv6_src; break; case MFF_IPV6_DST: value->ipv6 = flow->ipv6_dst; break; case MFF_IPV6_LABEL: value->be32 = flow->ipv6_label; break; case MFF_IP_PROTO: value->u8 = flow->nw_proto; break; case MFF_IP_DSCP: value->u8 = flow->nw_tos & IP_DSCP_MASK; break; case MFF_IP_DSCP_SHIFTED: value->u8 = flow->nw_tos >> 2; break; case MFF_IP_ECN: value->u8 = flow->nw_tos & IP_ECN_MASK; break; case MFF_IP_TTL: value->u8 = flow->nw_ttl; break; case MFF_IP_FRAG: value->u8 = flow->nw_frag; break; case MFF_ARP_OP: value->be16 = htons(flow->nw_proto); break; case MFF_ARP_SPA: value->be32 = flow->nw_src; break; case MFF_ARP_TPA: value->be32 = flow->nw_dst; break; case MFF_ARP_SHA: case MFF_ND_SLL: memcpy(value->mac, flow->arp_sha, ETH_ADDR_LEN); break; case MFF_ARP_THA: case MFF_ND_TLL: memcpy(value->mac, flow->arp_tha, ETH_ADDR_LEN); break; case MFF_TCP_SRC: case MFF_UDP_SRC: case MFF_SCTP_SRC: value->be16 = flow->tp_src; break; case MFF_TCP_DST: case MFF_UDP_DST: case MFF_SCTP_DST: value->be16 = flow->tp_dst; break; case MFF_ICMPV4_TYPE: case MFF_ICMPV6_TYPE: value->u8 = ntohs(flow->tp_src); break; case MFF_ICMPV4_CODE: case MFF_ICMPV6_CODE: value->u8 = ntohs(flow->tp_dst); break; case MFF_ND_TARGET: value->ipv6 = flow->nd_target; break; case MFF_N_IDS: default: NOT_REACHED(); } } /* Makes 'match' match field 'mf' exactly, with the value matched taken from * 'value'. The caller is responsible for ensuring that 'match' meets 'mf''s * prerequisites. */ void mf_set_value(const struct mf_field *mf, const union mf_value *value, struct match *match) { switch (mf->id) { case MFF_TUN_ID: match_set_tun_id(match, value->be64); break; case MFF_TUN_SRC: match_set_tun_src(match, value->be32); break; case MFF_TUN_DST: match_set_tun_dst(match, value->be32); break; case MFF_TUN_FLAGS: match_set_tun_flags(match, ntohs(value->be16)); break; case MFF_TUN_TOS: match_set_tun_tos(match, value->u8); break; case MFF_TUN_TTL: match_set_tun_ttl(match, value->u8); break; case MFF_METADATA: match_set_metadata(match, value->be64); break; case MFF_IN_PORT: match_set_in_port(match, u16_to_ofp(ntohs(value->be16))); break; case MFF_IN_PORT_OXM: { ofp_port_t port; ofputil_port_from_ofp11(value->be32, &port); match_set_in_port(match, port); break; } case MFF_SKB_PRIORITY: match_set_skb_priority(match, ntohl(value->be32)); break; case MFF_PKT_MARK: match_set_pkt_mark(match, ntohl(value->be32)); break; CASE_MFF_REGS: match_set_reg(match, mf->id - MFF_REG0, ntohl(value->be32)); break; case MFF_ETH_SRC: match_set_dl_src(match, value->mac); break; case MFF_ETH_DST: match_set_dl_dst(match, value->mac); break; case MFF_ETH_TYPE: match_set_dl_type(match, value->be16); break; case MFF_VLAN_TCI: match_set_dl_tci(match, value->be16); break; case MFF_DL_VLAN: match_set_dl_vlan(match, value->be16); break; case MFF_VLAN_VID: match_set_vlan_vid(match, value->be16); break; case MFF_DL_VLAN_PCP: case MFF_VLAN_PCP: match_set_dl_vlan_pcp(match, value->u8); break; case MFF_MPLS_LABEL: match_set_mpls_label(match, value->be32); break; case MFF_MPLS_TC: match_set_mpls_tc(match, value->u8); break; case MFF_MPLS_BOS: match_set_mpls_bos(match, value->u8); break; case MFF_IPV4_SRC: match_set_nw_src(match, value->be32); break; case MFF_IPV4_DST: match_set_nw_dst(match, value->be32); break; case MFF_IPV6_SRC: match_set_ipv6_src(match, &value->ipv6); break; case MFF_IPV6_DST: match_set_ipv6_dst(match, &value->ipv6); break; case MFF_IPV6_LABEL: match_set_ipv6_label(match, value->be32); break; case MFF_IP_PROTO: match_set_nw_proto(match, value->u8); break; case MFF_IP_DSCP: match_set_nw_dscp(match, value->u8); break; case MFF_IP_DSCP_SHIFTED: match_set_nw_dscp(match, value->u8 << 2); break; case MFF_IP_ECN: match_set_nw_ecn(match, value->u8); break; case MFF_IP_TTL: match_set_nw_ttl(match, value->u8); break; case MFF_IP_FRAG: match_set_nw_frag(match, value->u8); break; case MFF_ARP_OP: match_set_nw_proto(match, ntohs(value->be16)); break; case MFF_ARP_SPA: match_set_nw_src(match, value->be32); break; case MFF_ARP_TPA: match_set_nw_dst(match, value->be32); break; case MFF_ARP_SHA: case MFF_ND_SLL: match_set_arp_sha(match, value->mac); break; case MFF_ARP_THA: case MFF_ND_TLL: match_set_arp_tha(match, value->mac); break; case MFF_TCP_SRC: case MFF_UDP_SRC: case MFF_SCTP_SRC: match_set_tp_src(match, value->be16); break; case MFF_TCP_DST: case MFF_UDP_DST: case MFF_SCTP_DST: match_set_tp_dst(match, value->be16); break; case MFF_ICMPV4_TYPE: case MFF_ICMPV6_TYPE: match_set_icmp_type(match, value->u8); break; case MFF_ICMPV4_CODE: case MFF_ICMPV6_CODE: match_set_icmp_code(match, value->u8); break; case MFF_ND_TARGET: match_set_nd_target(match, &value->ipv6); break; case MFF_N_IDS: default: NOT_REACHED(); } } /* Sets 'flow' member field described by 'mf' to 'value'. The caller is * responsible for ensuring that 'flow' meets 'mf''s prerequisites.*/ void mf_set_flow_value(const struct mf_field *mf, const union mf_value *value, struct flow *flow) { switch (mf->id) { case MFF_TUN_ID: flow->tunnel.tun_id = value->be64; break; case MFF_TUN_SRC: flow->tunnel.ip_src = value->be32; break; case MFF_TUN_DST: flow->tunnel.ip_dst = value->be32; break; case MFF_TUN_FLAGS: flow->tunnel.flags = ntohs(value->be16); break; case MFF_TUN_TOS: flow->tunnel.ip_tos = value->u8; break; case MFF_TUN_TTL: flow->tunnel.ip_ttl = value->u8; break; case MFF_METADATA: flow->metadata = value->be64; break; case MFF_IN_PORT: flow->in_port.ofp_port = u16_to_ofp(ntohs(value->be16)); break; case MFF_IN_PORT_OXM: { ofp_port_t port; ofputil_port_from_ofp11(value->be32, &port); flow->in_port.ofp_port = port; break; } case MFF_SKB_PRIORITY: flow->skb_priority = ntohl(value->be32); break; case MFF_PKT_MARK: flow->pkt_mark = ntohl(value->be32); break; CASE_MFF_REGS: flow->regs[mf->id - MFF_REG0] = ntohl(value->be32); break; case MFF_ETH_SRC: memcpy(flow->dl_src, value->mac, ETH_ADDR_LEN); break; case MFF_ETH_DST: memcpy(flow->dl_dst, value->mac, ETH_ADDR_LEN); break; case MFF_ETH_TYPE: flow->dl_type = value->be16; break; case MFF_VLAN_TCI: flow->vlan_tci = value->be16; break; case MFF_DL_VLAN: flow_set_dl_vlan(flow, value->be16); break; case MFF_VLAN_VID: flow_set_vlan_vid(flow, value->be16); break; case MFF_DL_VLAN_PCP: case MFF_VLAN_PCP: flow_set_vlan_pcp(flow, value->u8); break; case MFF_MPLS_LABEL: flow_set_mpls_label(flow, value->be32); break; case MFF_MPLS_TC: flow_set_mpls_tc(flow, value->u8); break; case MFF_MPLS_BOS: flow_set_mpls_bos(flow, value->u8); break; case MFF_IPV4_SRC: flow->nw_src = value->be32; break; case MFF_IPV4_DST: flow->nw_dst = value->be32; break; case MFF_IPV6_SRC: flow->ipv6_src = value->ipv6; break; case MFF_IPV6_DST: flow->ipv6_dst = value->ipv6; break; case MFF_IPV6_LABEL: flow->ipv6_label = value->be32 & ~htonl(IPV6_LABEL_MASK); break; case MFF_IP_PROTO: flow->nw_proto = value->u8; break; case MFF_IP_DSCP: flow->nw_tos &= ~IP_DSCP_MASK; flow->nw_tos |= value->u8 & IP_DSCP_MASK; break; case MFF_IP_DSCP_SHIFTED: flow->nw_tos &= ~IP_DSCP_MASK; flow->nw_tos |= value->u8 << 2; break; case MFF_IP_ECN: flow->nw_tos &= ~IP_ECN_MASK; flow->nw_tos |= value->u8 & IP_ECN_MASK; break; case MFF_IP_TTL: flow->nw_ttl = value->u8; break; case MFF_IP_FRAG: flow->nw_frag &= value->u8; break; case MFF_ARP_OP: flow->nw_proto = ntohs(value->be16); break; case MFF_ARP_SPA: flow->nw_src = value->be32; break; case MFF_ARP_TPA: flow->nw_dst = value->be32; break; case MFF_ARP_SHA: case MFF_ND_SLL: memcpy(flow->arp_sha, value->mac, ETH_ADDR_LEN); break; case MFF_ARP_THA: case MFF_ND_TLL: memcpy(flow->arp_tha, value->mac, ETH_ADDR_LEN); break; case MFF_TCP_SRC: case MFF_UDP_SRC: case MFF_SCTP_SRC: flow->tp_src = value->be16; break; case MFF_TCP_DST: case MFF_UDP_DST: case MFF_SCTP_DST: flow->tp_dst = value->be16; break; case MFF_ICMPV4_TYPE: case MFF_ICMPV6_TYPE: flow->tp_src = htons(value->u8); break; case MFF_ICMPV4_CODE: case MFF_ICMPV6_CODE: flow->tp_dst = htons(value->u8); break; case MFF_ND_TARGET: flow->nd_target = value->ipv6; break; case MFF_N_IDS: default: NOT_REACHED(); } } /* Returns true if 'mf' has a zero value in 'flow', false if it is nonzero. * * The caller is responsible for ensuring that 'flow' meets 'mf''s * prerequisites. */ bool mf_is_zero(const struct mf_field *mf, const struct flow *flow) { union mf_value value; mf_get_value(mf, flow, &value); return is_all_zeros((const uint8_t *) &value, mf->n_bytes); } /* Makes 'match' wildcard field 'mf'. * * The caller is responsible for ensuring that 'match' meets 'mf''s * prerequisites. */ void mf_set_wild(const struct mf_field *mf, struct match *match) { switch (mf->id) { case MFF_TUN_ID: match_set_tun_id_masked(match, htonll(0), htonll(0)); break; case MFF_TUN_SRC: match_set_tun_src_masked(match, htonl(0), htonl(0)); break; case MFF_TUN_DST: match_set_tun_dst_masked(match, htonl(0), htonl(0)); break; case MFF_TUN_FLAGS: match_set_tun_flags_masked(match, 0, 0); break; case MFF_TUN_TOS: match_set_tun_tos_masked(match, 0, 0); break; case MFF_TUN_TTL: match_set_tun_ttl_masked(match, 0, 0); break; case MFF_METADATA: match_set_metadata_masked(match, htonll(0), htonll(0)); break; case MFF_IN_PORT: case MFF_IN_PORT_OXM: match->flow.in_port.ofp_port = 0; match->wc.masks.in_port.ofp_port = 0; break; case MFF_SKB_PRIORITY: match->flow.skb_priority = 0; match->wc.masks.skb_priority = 0; break; case MFF_PKT_MARK: match->flow.pkt_mark = 0; match->wc.masks.pkt_mark = 0; break; CASE_MFF_REGS: match_set_reg_masked(match, mf->id - MFF_REG0, 0, 0); break; case MFF_ETH_SRC: memset(match->flow.dl_src, 0, ETH_ADDR_LEN); memset(match->wc.masks.dl_src, 0, ETH_ADDR_LEN); break; case MFF_ETH_DST: memset(match->flow.dl_dst, 0, ETH_ADDR_LEN); memset(match->wc.masks.dl_dst, 0, ETH_ADDR_LEN); break; case MFF_ETH_TYPE: match->flow.dl_type = htons(0); match->wc.masks.dl_type = htons(0); break; case MFF_VLAN_TCI: match_set_dl_tci_masked(match, htons(0), htons(0)); break; case MFF_DL_VLAN: case MFF_VLAN_VID: match_set_any_vid(match); break; case MFF_DL_VLAN_PCP: case MFF_VLAN_PCP: match_set_any_pcp(match); break; case MFF_MPLS_LABEL: match_set_any_mpls_label(match); break; case MFF_MPLS_TC: match_set_any_mpls_tc(match); break; case MFF_MPLS_BOS: match_set_any_mpls_bos(match); break; case MFF_IPV4_SRC: case MFF_ARP_SPA: match_set_nw_src_masked(match, htonl(0), htonl(0)); break; case MFF_IPV4_DST: case MFF_ARP_TPA: match_set_nw_dst_masked(match, htonl(0), htonl(0)); break; case MFF_IPV6_SRC: memset(&match->wc.masks.ipv6_src, 0, sizeof match->wc.masks.ipv6_src); memset(&match->flow.ipv6_src, 0, sizeof match->flow.ipv6_src); break; case MFF_IPV6_DST: memset(&match->wc.masks.ipv6_dst, 0, sizeof match->wc.masks.ipv6_dst); memset(&match->flow.ipv6_dst, 0, sizeof match->flow.ipv6_dst); break; case MFF_IPV6_LABEL: match->wc.masks.ipv6_label = htonl(0); match->flow.ipv6_label = htonl(0); break; case MFF_IP_PROTO: match->wc.masks.nw_proto = 0; match->flow.nw_proto = 0; break; case MFF_IP_DSCP: case MFF_IP_DSCP_SHIFTED: match->wc.masks.nw_tos &= ~IP_DSCP_MASK; match->flow.nw_tos &= ~IP_DSCP_MASK; break; case MFF_IP_ECN: match->wc.masks.nw_tos &= ~IP_ECN_MASK; match->flow.nw_tos &= ~IP_ECN_MASK; break; case MFF_IP_TTL: match->wc.masks.nw_ttl = 0; match->flow.nw_ttl = 0; break; case MFF_IP_FRAG: match->wc.masks.nw_frag |= FLOW_NW_FRAG_MASK; match->flow.nw_frag &= ~FLOW_NW_FRAG_MASK; break; case MFF_ARP_OP: match->wc.masks.nw_proto = 0; match->flow.nw_proto = 0; break; case MFF_ARP_SHA: case MFF_ND_SLL: memset(match->flow.arp_sha, 0, ETH_ADDR_LEN); memset(match->wc.masks.arp_sha, 0, ETH_ADDR_LEN); break; case MFF_ARP_THA: case MFF_ND_TLL: memset(match->flow.arp_tha, 0, ETH_ADDR_LEN); memset(match->wc.masks.arp_tha, 0, ETH_ADDR_LEN); break; case MFF_TCP_SRC: case MFF_UDP_SRC: case MFF_SCTP_SRC: case MFF_ICMPV4_TYPE: case MFF_ICMPV6_TYPE: match->wc.masks.tp_src = htons(0); match->flow.tp_src = htons(0); break; case MFF_TCP_DST: case MFF_UDP_DST: case MFF_SCTP_DST: case MFF_ICMPV4_CODE: case MFF_ICMPV6_CODE: match->wc.masks.tp_dst = htons(0); match->flow.tp_dst = htons(0); break; case MFF_ND_TARGET: memset(&match->wc.masks.nd_target, 0, sizeof match->wc.masks.nd_target); memset(&match->flow.nd_target, 0, sizeof match->flow.nd_target); break; case MFF_N_IDS: default: NOT_REACHED(); } } /* Makes 'match' match field 'mf' with the specified 'value' and 'mask'. * 'value' specifies a value to match and 'mask' specifies a wildcard pattern, * with a 1-bit indicating that the corresponding value bit must match and a * 0-bit indicating a don't-care. * * If 'mask' is NULL or points to all-1-bits, then this call is equivalent to * mf_set_value(mf, value, match). If 'mask' points to all-0-bits, then this * call is equivalent to mf_set_wild(mf, match). * * 'mask' must be a valid mask for 'mf' (see mf_is_mask_valid()). The caller * is responsible for ensuring that 'match' meets 'mf''s prerequisites. */ enum ofputil_protocol mf_set(const struct mf_field *mf, const union mf_value *value, const union mf_value *mask, struct match *match) { if (!mask || is_all_ones((const uint8_t *) mask, mf->n_bytes)) { mf_set_value(mf, value, match); return mf->usable_protocols; } else if (is_all_zeros((const uint8_t *) mask, mf->n_bytes)) { mf_set_wild(mf, match); return OFPUTIL_P_ANY; } switch (mf->id) { case MFF_IN_PORT: case MFF_IN_PORT_OXM: case MFF_SKB_PRIORITY: case MFF_ETH_TYPE: case MFF_DL_VLAN: case MFF_DL_VLAN_PCP: case MFF_VLAN_PCP: case MFF_MPLS_LABEL: case MFF_MPLS_TC: case MFF_MPLS_BOS: case MFF_IP_PROTO: case MFF_IP_TTL: case MFF_IP_DSCP: case MFF_IP_DSCP_SHIFTED: case MFF_IP_ECN: case MFF_ARP_OP: case MFF_ICMPV4_TYPE: case MFF_ICMPV4_CODE: case MFF_ICMPV6_TYPE: case MFF_ICMPV6_CODE: NOT_REACHED(); case MFF_TUN_ID: match_set_tun_id_masked(match, value->be64, mask->be64); break; case MFF_TUN_SRC: match_set_tun_src_masked(match, value->be32, mask->be32); break; case MFF_TUN_DST: match_set_tun_dst_masked(match, value->be32, mask->be32); break; case MFF_TUN_FLAGS: match_set_tun_flags_masked(match, ntohs(value->be16), ntohs(mask->be16)); break; case MFF_TUN_TTL: match_set_tun_ttl_masked(match, value->u8, mask->u8); break; case MFF_TUN_TOS: match_set_tun_tos_masked(match, value->u8, mask->u8); break; case MFF_METADATA: match_set_metadata_masked(match, value->be64, mask->be64); break; CASE_MFF_REGS: match_set_reg_masked(match, mf->id - MFF_REG0, ntohl(value->be32), ntohl(mask->be32)); break; case MFF_PKT_MARK: match_set_pkt_mark_masked(match, ntohl(value->be32), ntohl(mask->be32)); break; case MFF_ETH_DST: match_set_dl_dst_masked(match, value->mac, mask->mac); break; case MFF_ETH_SRC: match_set_dl_src_masked(match, value->mac, mask->mac); break; case MFF_ARP_SHA: case MFF_ND_SLL: match_set_arp_sha_masked(match, value->mac, mask->mac); break; case MFF_ARP_THA: case MFF_ND_TLL: match_set_arp_tha_masked(match, value->mac, mask->mac); break; case MFF_VLAN_TCI: match_set_dl_tci_masked(match, value->be16, mask->be16); break; case MFF_VLAN_VID: match_set_vlan_vid_masked(match, value->be16, mask->be16); break; case MFF_IPV4_SRC: match_set_nw_src_masked(match, value->be32, mask->be32); goto cidr_check; case MFF_IPV4_DST: match_set_nw_dst_masked(match, value->be32, mask->be32); goto cidr_check; case MFF_IPV6_SRC: match_set_ipv6_src_masked(match, &value->ipv6, &mask->ipv6); break; case MFF_IPV6_DST: match_set_ipv6_dst_masked(match, &value->ipv6, &mask->ipv6); break; case MFF_IPV6_LABEL: if ((mask->be32 & htonl(IPV6_LABEL_MASK)) == htonl(IPV6_LABEL_MASK)) { mf_set_value(mf, value, match); } else { match_set_ipv6_label_masked(match, value->be32, mask->be32); } break; case MFF_ND_TARGET: match_set_nd_target_masked(match, &value->ipv6, &mask->ipv6); break; case MFF_IP_FRAG: match_set_nw_frag_masked(match, value->u8, mask->u8); break; case MFF_ARP_SPA: match_set_nw_src_masked(match, value->be32, mask->be32); goto cidr_check; case MFF_ARP_TPA: match_set_nw_dst_masked(match, value->be32, mask->be32); goto cidr_check; case MFF_TCP_SRC: case MFF_UDP_SRC: case MFF_SCTP_SRC: match_set_tp_src_masked(match, value->be16, mask->be16); break; case MFF_TCP_DST: case MFF_UDP_DST: case MFF_SCTP_DST: match_set_tp_dst_masked(match, value->be16, mask->be16); break; case MFF_N_IDS: default: NOT_REACHED(); } return mf->usable_protocols_bitwise; cidr_check: return ip_is_cidr(mask->be32) ? mf->usable_protocols : mf->usable_protocols_bitwise; } static enum ofperr mf_check__(const struct mf_subfield *sf, const struct flow *flow, const char *type) { if (!sf->field) { VLOG_WARN_RL(&rl, "unknown %s field", type); return OFPERR_OFPBAC_BAD_SET_TYPE; } else if (!sf->n_bits) { VLOG_WARN_RL(&rl, "zero bit %s field %s", type, sf->field->name); return OFPERR_OFPBAC_BAD_SET_LEN; } else if (sf->ofs >= sf->field->n_bits) { VLOG_WARN_RL(&rl, "bit offset %d exceeds %d-bit width of %s field %s", sf->ofs, sf->field->n_bits, type, sf->field->name); return OFPERR_OFPBAC_BAD_SET_LEN; } else if (sf->ofs + sf->n_bits > sf->field->n_bits) { VLOG_WARN_RL(&rl, "bit offset %d and width %d exceeds %d-bit width " "of %s field %s", sf->ofs, sf->n_bits, sf->field->n_bits, type, sf->field->name); return OFPERR_OFPBAC_BAD_SET_LEN; } else if (flow && !mf_are_prereqs_ok(sf->field, flow)) { VLOG_WARN_RL(&rl, "%s field %s lacks correct prerequisites", type, sf->field->name); return OFPERR_OFPBAC_MATCH_INCONSISTENT; } else { return 0; } } /* Checks whether 'sf' is valid for reading a subfield out of 'flow'. Returns * 0 if so, otherwise an OpenFlow error code (e.g. as returned by * ofp_mkerr()). */ enum ofperr mf_check_src(const struct mf_subfield *sf, const struct flow *flow) { return mf_check__(sf, flow, "source"); } /* Checks whether 'sf' is valid for writing a subfield into 'flow'. Returns 0 * if so, otherwise an OpenFlow error code (e.g. as returned by * ofp_mkerr()). */ enum ofperr mf_check_dst(const struct mf_subfield *sf, const struct flow *flow) { int error = mf_check__(sf, flow, "destination"); if (!error && !sf->field->writable) { VLOG_WARN_RL(&rl, "destination field %s is not writable", sf->field->name); return OFPERR_OFPBAC_BAD_SET_ARGUMENT; } return error; } /* Copies the value and wildcard bit pattern for 'mf' from 'match' into the * 'value' and 'mask', respectively. */ void mf_get(const struct mf_field *mf, const struct match *match, union mf_value *value, union mf_value *mask) { mf_get_value(mf, &match->flow, value); mf_get_mask(mf, &match->wc, mask); } /* Assigns a random value for field 'mf' to 'value'. */ void mf_random_value(const struct mf_field *mf, union mf_value *value) { random_bytes(value, mf->n_bytes); switch (mf->id) { case MFF_TUN_ID: case MFF_TUN_SRC: case MFF_TUN_DST: case MFF_TUN_TOS: case MFF_TUN_TTL: case MFF_TUN_FLAGS: case MFF_METADATA: case MFF_IN_PORT: case MFF_PKT_MARK: case MFF_SKB_PRIORITY: CASE_MFF_REGS: case MFF_ETH_SRC: case MFF_ETH_DST: case MFF_ETH_TYPE: case MFF_VLAN_TCI: case MFF_IPV4_SRC: case MFF_IPV4_DST: case MFF_IPV6_SRC: case MFF_IPV6_DST: case MFF_IP_PROTO: case MFF_IP_TTL: case MFF_ARP_SPA: case MFF_ARP_TPA: case MFF_ARP_SHA: case MFF_ARP_THA: case MFF_TCP_SRC: case MFF_TCP_DST: case MFF_UDP_SRC: case MFF_UDP_DST: case MFF_SCTP_SRC: case MFF_SCTP_DST: case MFF_ICMPV4_TYPE: case MFF_ICMPV4_CODE: case MFF_ICMPV6_TYPE: case MFF_ICMPV6_CODE: case MFF_ND_TARGET: case MFF_ND_SLL: case MFF_ND_TLL: break; case MFF_IN_PORT_OXM: value->be32 = ofputil_port_to_ofp11(u16_to_ofp(ntohs(value->be16))); break; case MFF_IPV6_LABEL: value->be32 &= ~htonl(IPV6_LABEL_MASK); break; case MFF_IP_DSCP: value->u8 &= IP_DSCP_MASK; break; case MFF_IP_DSCP_SHIFTED: value->u8 &= IP_DSCP_MASK >> 2; break; case MFF_IP_ECN: value->u8 &= IP_ECN_MASK; break; case MFF_IP_FRAG: value->u8 &= FLOW_NW_FRAG_MASK; break; case MFF_ARP_OP: value->be16 &= htons(0xff); break; case MFF_DL_VLAN: value->be16 &= htons(VLAN_VID_MASK); break; case MFF_VLAN_VID: value->be16 &= htons(VLAN_VID_MASK | VLAN_CFI); break; case MFF_DL_VLAN_PCP: case MFF_VLAN_PCP: value->u8 &= 0x07; break; case MFF_MPLS_LABEL: value->be32 &= htonl(MPLS_LABEL_MASK >> MPLS_LABEL_SHIFT); break; case MFF_MPLS_TC: value->u8 &= MPLS_TC_MASK >> MPLS_TC_SHIFT; break; case MFF_MPLS_BOS: value->u8 &= MPLS_BOS_MASK >> MPLS_BOS_SHIFT; break; case MFF_N_IDS: default: NOT_REACHED(); } } static char * mf_from_integer_string(const struct mf_field *mf, const char *s, uint8_t *valuep, uint8_t *maskp) { unsigned long long int integer, mask; char *tail; int i; errno = 0; integer = strtoull(s, &tail, 0); if (errno || (*tail != '\0' && *tail != '/')) { goto syntax_error; } if (*tail == '/') { mask = strtoull(tail + 1, &tail, 0); if (errno || *tail != '\0') { goto syntax_error; } } else { mask = ULLONG_MAX; } for (i = mf->n_bytes - 1; i >= 0; i--) { valuep[i] = integer; maskp[i] = mask; integer >>= 8; mask >>= 8; } if (integer) { return xasprintf("%s: value too large for %u-byte field %s", s, mf->n_bytes, mf->name); } return NULL; syntax_error: return xasprintf("%s: bad syntax for %s", s, mf->name); } static char * mf_from_ethernet_string(const struct mf_field *mf, const char *s, uint8_t mac[ETH_ADDR_LEN], uint8_t mask[ETH_ADDR_LEN]) { int n; ovs_assert(mf->n_bytes == ETH_ADDR_LEN); n = -1; if (sscanf(s, ETH_ADDR_SCAN_FMT"%n", ETH_ADDR_SCAN_ARGS(mac), &n) > 0 && n == strlen(s)) { memset(mask, 0xff, ETH_ADDR_LEN); return NULL; } n = -1; if (sscanf(s, ETH_ADDR_SCAN_FMT"/"ETH_ADDR_SCAN_FMT"%n", ETH_ADDR_SCAN_ARGS(mac), ETH_ADDR_SCAN_ARGS(mask), &n) > 0 && n == strlen(s)) { return NULL; } return xasprintf("%s: invalid Ethernet address", s); } static char * mf_from_ipv4_string(const struct mf_field *mf, const char *s, ovs_be32 *ip, ovs_be32 *mask) { int prefix; ovs_assert(mf->n_bytes == sizeof *ip); if (sscanf(s, IP_SCAN_FMT"/"IP_SCAN_FMT, IP_SCAN_ARGS(ip), IP_SCAN_ARGS(mask)) == IP_SCAN_COUNT * 2) { /* OK. */ } else if (sscanf(s, IP_SCAN_FMT"/%d", IP_SCAN_ARGS(ip), &prefix) == IP_SCAN_COUNT + 1) { if (prefix <= 0 || prefix > 32) { return xasprintf("%s: network prefix bits not between 1 and " "32", s); } else if (prefix == 32) { *mask = htonl(UINT32_MAX); } else { *mask = htonl(((1u << prefix) - 1) << (32 - prefix)); } } else if (sscanf(s, IP_SCAN_FMT, IP_SCAN_ARGS(ip)) == IP_SCAN_COUNT) { *mask = htonl(UINT32_MAX); } else { return xasprintf("%s: invalid IP address", s); } return NULL; } static char * mf_from_ipv6_string(const struct mf_field *mf, const char *s, struct in6_addr *value, struct in6_addr *mask) { char *str = xstrdup(s); char *save_ptr = NULL; const char *name, *netmask; int retval; ovs_assert(mf->n_bytes == sizeof *value); name = strtok_r(str, "/", &save_ptr); retval = name ? lookup_ipv6(name, value) : EINVAL; if (retval) { char *err; err = xasprintf("%s: could not convert to IPv6 address", str); free(str); return err; } netmask = strtok_r(NULL, "/", &save_ptr); if (netmask) { if (inet_pton(AF_INET6, netmask, mask) != 1) { int prefix = atoi(netmask); if (prefix <= 0 || prefix > 128) { free(str); return xasprintf("%s: prefix bits not between 1 and 128", s); } else { *mask = ipv6_create_mask(prefix); } } } else { *mask = in6addr_exact; } free(str); return NULL; } static char * mf_from_ofp_port_string(const struct mf_field *mf, const char *s, ovs_be16 *valuep, ovs_be16 *maskp) { ofp_port_t port; ovs_assert(mf->n_bytes == sizeof(ovs_be16)); if (ofputil_port_from_string(s, &port)) { *valuep = htons(ofp_to_u16(port)); *maskp = htons(UINT16_MAX); return NULL; } return xasprintf("%s: port value out of range for %s", s, mf->name); } static char * mf_from_ofp_port_string32(const struct mf_field *mf, const char *s, ovs_be32 *valuep, ovs_be32 *maskp) { ofp_port_t port; ovs_assert(mf->n_bytes == sizeof(ovs_be32)); if (ofputil_port_from_string(s, &port)) { *valuep = ofputil_port_to_ofp11(port); *maskp = htonl(UINT32_MAX); return NULL; } return xasprintf("%s: port value out of range for %s", s, mf->name); } struct frag_handling { const char *name; uint8_t mask; uint8_t value; }; static const struct frag_handling all_frags[] = { #define A FLOW_NW_FRAG_ANY #define L FLOW_NW_FRAG_LATER /* name mask value */ { "no", A|L, 0 }, { "first", A|L, A }, { "later", A|L, A|L }, { "no", A, 0 }, { "yes", A, A }, { "not_later", L, 0 }, { "later", L, L }, #undef A #undef L }; static char * mf_from_frag_string(const char *s, uint8_t *valuep, uint8_t *maskp) { const struct frag_handling *h; for (h = all_frags; h < &all_frags[ARRAY_SIZE(all_frags)]; h++) { if (!strcasecmp(s, h->name)) { /* We force the upper bits of the mask on to make mf_parse_value() * happy (otherwise it will never think it's an exact match.) */ *maskp = h->mask | ~FLOW_NW_FRAG_MASK; *valuep = h->value; return NULL; } } return xasprintf("%s: unknown fragment type (valid types are \"no\", " "\"yes\", \"first\", \"later\", \"not_first\"", s); } static int parse_flow_tun_flags(const char *s_, const char *(*bit_to_string)(uint32_t), ovs_be16 *res) { uint32_t result = 0; char *save_ptr = NULL; char *name; int rc = 0; char *s = xstrdup(s_); for (name = strtok_r((char *)s, " |", &save_ptr); name; name = strtok_r(NULL, " |", &save_ptr)) { int name_len; unsigned long long int flags; uint32_t bit; int n0; if (sscanf(name, "%lli%n", &flags, &n0) > 0 && n0 > 0) { result |= flags; continue; } name_len = strlen(name); for (bit = 1; bit; bit <<= 1) { const char *fname = bit_to_string(bit); size_t len; if (!fname) { continue; } len = strlen(fname); if (len != name_len) { continue; } if (!strncmp(name, fname, len)) { result |= bit; break; } } if (!bit) { rc = -ENOENT; goto out; } } *res = htons(result); out: free(s); return rc; } static char * mf_from_tun_flags_string(const char *s, ovs_be16 *valuep, ovs_be16 *maskp) { if (!parse_flow_tun_flags(s, flow_tun_flag_to_string, valuep)) { *maskp = htons(UINT16_MAX); return NULL; } return xasprintf("%s: unknown tunnel flags (valid flags are \"df\", " "\"csum\", \"key\"", s); } /* Parses 's', a string value for field 'mf', into 'value' and 'mask'. Returns * NULL if successful, otherwise a malloc()'d string describing the error. */ char * mf_parse(const struct mf_field *mf, const char *s, union mf_value *value, union mf_value *mask) { if (!strcmp(s, "*")) { memset(value, 0, mf->n_bytes); memset(mask, 0, mf->n_bytes); return NULL; } switch (mf->string) { case MFS_DECIMAL: case MFS_HEXADECIMAL: return mf_from_integer_string(mf, s, (uint8_t *) value, (uint8_t *) mask); case MFS_ETHERNET: return mf_from_ethernet_string(mf, s, value->mac, mask->mac); case MFS_IPV4: return mf_from_ipv4_string(mf, s, &value->be32, &mask->be32); case MFS_IPV6: return mf_from_ipv6_string(mf, s, &value->ipv6, &mask->ipv6); case MFS_OFP_PORT: return mf_from_ofp_port_string(mf, s, &value->be16, &mask->be16); case MFS_OFP_PORT_OXM: return mf_from_ofp_port_string32(mf, s, &value->be32, &mask->be32); case MFS_FRAG: return mf_from_frag_string(s, &value->u8, &mask->u8); case MFS_TNL_FLAGS: ovs_assert(mf->n_bytes == sizeof(ovs_be16)); return mf_from_tun_flags_string(s, &value->be16, &mask->be16); } NOT_REACHED(); } /* Parses 's', a string value for field 'mf', into 'value'. Returns NULL if * successful, otherwise a malloc()'d string describing the error. */ char * mf_parse_value(const struct mf_field *mf, const char *s, union mf_value *value) { union mf_value mask; char *error; error = mf_parse(mf, s, value, &mask); if (error) { return error; } if (!is_all_ones((const uint8_t *) &mask, mf->n_bytes)) { return xasprintf("%s: wildcards not allowed here", s); } return NULL; } static void mf_format_integer_string(const struct mf_field *mf, const uint8_t *valuep, const uint8_t *maskp, struct ds *s) { unsigned long long int integer; int i; ovs_assert(mf->n_bytes <= 8); integer = 0; for (i = 0; i < mf->n_bytes; i++) { integer = (integer << 8) | valuep[i]; } if (mf->string == MFS_HEXADECIMAL) { ds_put_format(s, "%#llx", integer); } else { ds_put_format(s, "%lld", integer); } if (maskp) { unsigned long long int mask; mask = 0; for (i = 0; i < mf->n_bytes; i++) { mask = (mask << 8) | maskp[i]; } /* I guess we could write the mask in decimal for MFS_DECIMAL but I'm * not sure that that a bit-mask written in decimal is ever easier to * understand than the same bit-mask written in hexadecimal. */ ds_put_format(s, "/%#llx", mask); } } static void mf_format_frag_string(uint8_t value, uint8_t mask, struct ds *s) { const struct frag_handling *h; mask &= FLOW_NW_FRAG_MASK; value &= mask; for (h = all_frags; h < &all_frags[ARRAY_SIZE(all_frags)]; h++) { if (value == h->value && mask == h->mask) { ds_put_cstr(s, h->name); return; } } ds_put_cstr(s, ""); } static void mf_format_tnl_flags_string(const ovs_be16 *valuep, struct ds *s) { format_flags(s, flow_tun_flag_to_string, ntohs(*valuep), '|'); } /* Appends to 's' a string representation of field 'mf' whose value is in * 'value' and 'mask'. 'mask' may be NULL to indicate an exact match. */ void mf_format(const struct mf_field *mf, const union mf_value *value, const union mf_value *mask, struct ds *s) { if (mask) { if (is_all_zeros((const uint8_t *) mask, mf->n_bytes)) { ds_put_cstr(s, "ANY"); return; } else if (is_all_ones((const uint8_t *) mask, mf->n_bytes)) { mask = NULL; } } switch (mf->string) { case MFS_OFP_PORT_OXM: if (!mask) { ofp_port_t port; ofputil_port_from_ofp11(value->be32, &port); ofputil_format_port(port, s); break; } /* fall through */ case MFS_OFP_PORT: if (!mask) { ofputil_format_port(u16_to_ofp(ntohs(value->be16)), s); break; } /* fall through */ case MFS_DECIMAL: case MFS_HEXADECIMAL: mf_format_integer_string(mf, (uint8_t *) value, (uint8_t *) mask, s); break; case MFS_ETHERNET: eth_format_masked(value->mac, mask->mac, s); break; case MFS_IPV4: ip_format_masked(value->be32, mask ? mask->be32 : htonl(UINT32_MAX), s); break; case MFS_IPV6: print_ipv6_masked(s, &value->ipv6, mask ? &mask->ipv6 : NULL); break; case MFS_FRAG: mf_format_frag_string(value->u8, mask ? mask->u8 : UINT8_MAX, s); break; case MFS_TNL_FLAGS: mf_format_tnl_flags_string(&value->be16, s); break; default: NOT_REACHED(); } } /* Makes subfield 'sf' within 'flow' exactly match the 'sf->n_bits' * least-significant bits in 'x'. */ void mf_write_subfield_flow(const struct mf_subfield *sf, const union mf_subvalue *x, struct flow *flow) { const struct mf_field *field = sf->field; union mf_value value; mf_get_value(field, flow, &value); bitwise_copy(x, sizeof *x, 0, &value, field->n_bytes, sf->ofs, sf->n_bits); mf_set_flow_value(field, &value, flow); } /* Makes subfield 'sf' within 'match' exactly match the 'sf->n_bits' * least-significant bits in 'x'. */ void mf_write_subfield(const struct mf_subfield *sf, const union mf_subvalue *x, struct match *match) { const struct mf_field *field = sf->field; union mf_value value, mask; mf_get(field, match, &value, &mask); bitwise_copy(x, sizeof *x, 0, &value, field->n_bytes, sf->ofs, sf->n_bits); bitwise_one ( &mask, field->n_bytes, sf->ofs, sf->n_bits); mf_set(field, &value, &mask, match); } /* Initializes 'x' to the value of 'sf' within 'flow'. 'sf' must be valid for * reading 'flow', e.g. as checked by mf_check_src(). */ void mf_read_subfield(const struct mf_subfield *sf, const struct flow *flow, union mf_subvalue *x) { union mf_value value; mf_get_value(sf->field, flow, &value); memset(x, 0, sizeof *x); bitwise_copy(&value, sf->field->n_bytes, sf->ofs, x, sizeof *x, 0, sf->n_bits); } /* Returns the value of 'sf' within 'flow'. 'sf' must be valid for reading * 'flow', e.g. as checked by mf_check_src() and sf->n_bits must be 64 or * less. */ uint64_t mf_get_subfield(const struct mf_subfield *sf, const struct flow *flow) { union mf_value value; mf_get_value(sf->field, flow, &value); return bitwise_get(&value, sf->field->n_bytes, sf->ofs, sf->n_bits); } /* Formats 'sf' into 's' in a format normally acceptable to * mf_parse_subfield(). (It won't be acceptable if sf->field is NULL or if * sf->field has no NXM name.) */ void mf_format_subfield(const struct mf_subfield *sf, struct ds *s) { if (!sf->field) { ds_put_cstr(s, ""); } else if (sf->field->nxm_name) { ds_put_cstr(s, sf->field->nxm_name); } else if (sf->field->nxm_header) { uint32_t header = sf->field->nxm_header; ds_put_format(s, "%d:%d", NXM_VENDOR(header), NXM_FIELD(header)); } else { ds_put_cstr(s, sf->field->name); } if (sf->field && sf->ofs == 0 && sf->n_bits == sf->field->n_bits) { ds_put_cstr(s, "[]"); } else if (sf->n_bits == 1) { ds_put_format(s, "[%d]", sf->ofs); } else { ds_put_format(s, "[%d..%d]", sf->ofs, sf->ofs + sf->n_bits - 1); } } static const struct mf_field * mf_parse_subfield_name(const char *name, int name_len, bool *wild) { int i; *wild = name_len > 2 && !memcmp(&name[name_len - 2], "_W", 2); if (*wild) { name_len -= 2; } for (i = 0; i < MFF_N_IDS; i++) { const struct mf_field *mf = mf_from_id(i); if (mf->nxm_name && !strncmp(mf->nxm_name, name, name_len) && mf->nxm_name[name_len] == '\0') { return mf; } if (mf->oxm_name && !strncmp(mf->oxm_name, name, name_len) && mf->oxm_name[name_len] == '\0') { return mf; } } return NULL; } /* Parses a subfield from the beginning of '*sp' into 'sf'. If successful, * returns NULL and advances '*sp' to the first byte following the parsed * string. On failure, returns a malloc()'d error message, does not modify * '*sp', and does not properly initialize 'sf'. * * The syntax parsed from '*sp' takes the form "header[start..end]" where * 'header' is the name of an NXM field and 'start' and 'end' are (inclusive) * bit indexes. "..end" may be omitted to indicate a single bit. "start..end" * may both be omitted (the [] are still required) to indicate an entire * field. */ char * WARN_UNUSED_RESULT mf_parse_subfield__(struct mf_subfield *sf, const char **sp) { const struct mf_field *field; const char *name; int start, end; const char *s; int name_len; bool wild; s = *sp; name = s; name_len = strcspn(s, "["); if (s[name_len] != '[') { return xasprintf("%s: missing [ looking for field name", *sp); } field = mf_parse_subfield_name(name, name_len, &wild); if (!field) { return xasprintf("%s: unknown field `%.*s'", *sp, name_len, s); } s += name_len; if (sscanf(s, "[%d..%d]", &start, &end) == 2) { /* Nothing to do. */ } else if (sscanf(s, "[%d]", &start) == 1) { end = start; } else if (!strncmp(s, "[]", 2)) { start = 0; end = field->n_bits - 1; } else { return xasprintf("%s: syntax error expecting [] or [] or " "[..]", *sp); } s = strchr(s, ']') + 1; if (start > end) { return xasprintf("%s: starting bit %d is after ending bit %d", *sp, start, end); } else if (start >= field->n_bits) { return xasprintf("%s: starting bit %d is not valid because field is " "only %d bits wide", *sp, start, field->n_bits); } else if (end >= field->n_bits){ return xasprintf("%s: ending bit %d is not valid because field is " "only %d bits wide", *sp, end, field->n_bits); } sf->field = field; sf->ofs = start; sf->n_bits = end - start + 1; *sp = s; return NULL; } /* Parses a subfield from the entirety of 's' into 'sf'. Returns NULL if * successful, otherwise a malloc()'d string describing the error. The caller * is responsible for freeing the returned string. * * The syntax parsed from 's' takes the form "header[start..end]" where * 'header' is the name of an NXM field and 'start' and 'end' are (inclusive) * bit indexes. "..end" may be omitted to indicate a single bit. "start..end" * may both be omitted (the [] are still required) to indicate an entire * field. */ char * WARN_UNUSED_RESULT mf_parse_subfield(struct mf_subfield *sf, const char *s) { char *error = mf_parse_subfield__(sf, &s); if (!error && s[0]) { error = xstrdup("unexpected input following field syntax"); } return error; } void mf_format_subvalue(const union mf_subvalue *subvalue, struct ds *s) { int i; for (i = 0; i < ARRAY_SIZE(subvalue->u8); i++) { if (subvalue->u8[i]) { ds_put_format(s, "0x%"PRIx8, subvalue->u8[i]); for (i++; i < ARRAY_SIZE(subvalue->u8); i++) { ds_put_format(s, "%02"PRIx8, subvalue->u8[i]); } return; } } ds_put_char(s, '0'); } openvswitch-2.0.1+git20140120/lib/meta-flow.h000066400000000000000000000344621226605124000202750ustar00rootroot00000000000000/* * Copyright (c) 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef META_FLOW_H #define META_FLOW_H 1 #include #include #include #include "flow.h" #include "ofp-errors.h" #include "ofp-util.h" #include "packets.h" struct ds; struct match; /* The comment on each of these indicates the member in "union mf_value" used * to represent its value. */ enum mf_field_id { /* Metadata. */ MFF_TUN_ID, /* be64 */ MFF_TUN_SRC, /* be32 */ MFF_TUN_DST, /* be32 */ MFF_TUN_FLAGS, /* be16 */ MFF_TUN_TTL, /* u8 */ MFF_TUN_TOS, /* u8 */ MFF_METADATA, /* be64 */ MFF_IN_PORT, /* be16 */ MFF_IN_PORT_OXM, /* be32 */ MFF_SKB_PRIORITY, /* be32 */ MFF_PKT_MARK, /* be32 */ #if FLOW_N_REGS > 0 MFF_REG0, /* be32 */ #endif #if FLOW_N_REGS > 1 MFF_REG1, /* be32 */ #endif #if FLOW_N_REGS > 2 MFF_REG2, /* be32 */ #endif #if FLOW_N_REGS > 3 MFF_REG3, /* be32 */ #endif #if FLOW_N_REGS > 4 MFF_REG4, /* be32 */ #endif #if FLOW_N_REGS > 5 MFF_REG5, /* be32 */ #endif #if FLOW_N_REGS > 6 MFF_REG6, /* be32 */ #endif #if FLOW_N_REGS > 7 MFF_REG7, /* be32 */ #endif /* L2. */ MFF_ETH_SRC, /* mac */ MFF_ETH_DST, /* mac */ MFF_ETH_TYPE, /* be16 */ MFF_VLAN_TCI, /* be16 */ MFF_DL_VLAN, /* be16 (OpenFlow 1.0 compatibility) */ MFF_VLAN_VID, /* be16 (OpenFlow 1.2 compatibility) */ MFF_DL_VLAN_PCP, /* u8 (OpenFlow 1.0 compatibility) */ MFF_VLAN_PCP, /* be16 (OpenFlow 1.2 compatibility) */ /* L2.5 */ MFF_MPLS_LABEL, /* be32 */ MFF_MPLS_TC, /* u8 */ MFF_MPLS_BOS, /* u8 */ /* L3. */ MFF_IPV4_SRC, /* be32 */ MFF_IPV4_DST, /* be32 */ MFF_IPV6_SRC, /* ipv6 */ MFF_IPV6_DST, /* ipv6 */ MFF_IPV6_LABEL, /* be32 */ /* The IPv4/IPv6 DSCP field has two different views: * * - MFF_IP_DSCP has the DSCP in bits 2-7, their bit positions in the * IPv4 and IPv6 "traffic class" field, as used in OpenFlow 1.0 and 1.1 * flow format and in NXM's NXM_OF_IP_TOS * * - MFF_IP_DSCP has the DSCP in bits 0-5, shifted right two bits from * their positions in the IPv4 and IPv6 "traffic class" field, as used * in OpenFlow 1.2+ OXM's OXM_OF_IP_DSCP. */ MFF_IP_PROTO, /* u8 (used for IPv4 or IPv6) */ MFF_IP_DSCP, /* u8 (used for IPv4 or IPv6) */ MFF_IP_DSCP_SHIFTED, /* u8 (used for IPv4 or IPv6) (OF1.2 compat) */ MFF_IP_ECN, /* u8 (used for IPv4 or IPv6) */ MFF_IP_TTL, /* u8 (used for IPv4 or IPv6) */ MFF_IP_FRAG, /* u8 (used for IPv4 or IPv6) */ MFF_ARP_OP, /* be16 */ MFF_ARP_SPA, /* be32 */ MFF_ARP_TPA, /* be32 */ MFF_ARP_SHA, /* mac */ MFF_ARP_THA, /* mac */ /* L4. */ MFF_TCP_SRC, /* be16 (used for IPv4 or IPv6) */ MFF_TCP_DST, /* be16 (used for IPv4 or IPv6) */ MFF_UDP_SRC, /* be16 (used for IPv4 or IPv6) */ MFF_UDP_DST, /* be16 (used for IPv4 or IPv6) */ MFF_SCTP_SRC, /* be16 (used for IPv4 or IPv6) */ MFF_SCTP_DST, /* be16 (used for IPv4 or IPv6) */ MFF_ICMPV4_TYPE, /* u8 */ MFF_ICMPV4_CODE, /* u8 */ MFF_ICMPV6_TYPE, /* u8 */ MFF_ICMPV6_CODE, /* u8 */ /* ICMPv6 Neighbor Discovery. */ MFF_ND_TARGET, /* ipv6 */ MFF_ND_SLL, /* mac */ MFF_ND_TLL, /* mac */ MFF_N_IDS }; /* Use this macro as CASE_MFF_REGS: in a switch statement to choose all of the * MFF_REGx cases. */ #if FLOW_N_REGS == 1 # define CASE_MFF_REGS \ case MFF_REG0 #elif FLOW_N_REGS == 2 # define CASE_MFF_REGS \ case MFF_REG0: case MFF_REG1 #elif FLOW_N_REGS == 3 # define CASE_MFF_REGS \ case MFF_REG0: case MFF_REG1: case MFF_REG2 #elif FLOW_N_REGS == 4 # define CASE_MFF_REGS \ case MFF_REG0: case MFF_REG1: case MFF_REG2: case MFF_REG3 #elif FLOW_N_REGS == 5 # define CASE_MFF_REGS \ case MFF_REG0: case MFF_REG1: case MFF_REG2: case MFF_REG3: \ case MFF_REG4 #elif FLOW_N_REGS == 6 # define CASE_MFF_REGS \ case MFF_REG0: case MFF_REG1: case MFF_REG2: case MFF_REG3: \ case MFF_REG4: case MFF_REG5 #elif FLOW_N_REGS == 7 # define CASE_MFF_REGS \ case MFF_REG0: case MFF_REG1: case MFF_REG2: case MFF_REG3: \ case MFF_REG4: case MFF_REG5: case MFF_REG6 #elif FLOW_N_REGS == 8 # define CASE_MFF_REGS \ case MFF_REG0: case MFF_REG1: case MFF_REG2: case MFF_REG3: \ case MFF_REG4: case MFF_REG5: case MFF_REG6: case MFF_REG7 #else # error #endif /* Prerequisites for matching a field. * * A field may only be matched if the correct lower-level protocols are also * matched. For example, the TCP port may be matched only if the Ethernet type * matches ETH_TYPE_IP and the IP protocol matches IPPROTO_TCP. */ enum mf_prereqs { MFP_NONE, /* L2 requirements. */ MFP_ARP, MFP_VLAN_VID, MFP_IPV4, MFP_IPV6, MFP_IP_ANY, /* L2.5 requirements. */ MFP_MPLS, /* L2+L3 requirements. */ MFP_TCP, /* On IPv4 or IPv6. */ MFP_UDP, /* On IPv4 or IPv6. */ MFP_SCTP, /* On IPv4 or IPv6. */ MFP_ICMPV4, MFP_ICMPV6, /* L2+L3+L4 requirements. */ MFP_ND, MFP_ND_SOLICIT, MFP_ND_ADVERT }; /* Forms of partial-field masking allowed for a field. * * Every field may be masked as a whole. */ enum mf_maskable { MFM_NONE, /* No sub-field masking. */ MFM_FULLY, /* Every bit is individually maskable. */ }; /* How to format or parse a field's value. */ enum mf_string { /* Integer formats. * * The particular MFS_* constant sets the output format. On input, either * decimal or hexadecimal (prefixed with 0x) is accepted. */ MFS_DECIMAL, MFS_HEXADECIMAL, /* Other formats. */ MFS_ETHERNET, MFS_IPV4, MFS_IPV6, MFS_OFP_PORT, /* An OpenFlow port number or name. */ MFS_OFP_PORT_OXM, /* An OpenFlow port number or name (32-bit). */ MFS_FRAG, /* no, yes, first, later, not_later */ MFS_TNL_FLAGS, /* FLOW_TNL_F_* flags */ }; struct mf_field { /* Identification. */ enum mf_field_id id; /* MFF_*. */ const char *name; /* Name of this field, e.g. "eth_type". */ const char *extra_name; /* Alternate name, e.g. "dl_type", or NULL. */ /* Size. * * Most fields have n_bytes * 8 == n_bits. There are a few exceptions: * * - "dl_vlan" is 2 bytes but only 12 bits. * - "dl_vlan_pcp" is 1 byte but only 3 bits. * - "is_frag" is 1 byte but only 2 bits. * - "ipv6_label" is 4 bytes but only 20 bits. * - "mpls_label" is 4 bytes but only 20 bits. * - "mpls_tc" is 1 byte but only 3 bits. * - "mpls_bos" is 1 byte but only 1 bit. */ unsigned int n_bytes; /* Width of the field in bytes. */ unsigned int n_bits; /* Number of significant bits in field. */ /* Properties. */ enum mf_maskable maskable; enum mf_string string; enum mf_prereqs prereqs; bool writable; /* May be written by actions? */ /* NXM and OXM properties. * * There are the following possibilities for these members for a given * mf_field: * * - Neither NXM nor OXM defines such a field: these members will all be * zero or NULL. * * - NXM and OXM both define such a field: nxm_header and oxm_header will * both be nonzero and different, similarly for nxm_name and oxm_name. * * - Only NXM or only OXM defines such a field: nxm_header and oxm_header * will both have the same value (either an OXM_* or NXM_* value) and * similarly for nxm_name and oxm_name. * * Thus, 'nxm_header' is the appropriate header to use when outputting an * NXM formatted match, since it will be an NXM_* constant when possible * for compatibility with OpenFlow implementations that expect that, with * OXM_* constants used for fields that OXM adds. Conversely, 'oxm_header' * is the header to use when outputting an OXM formatted match. */ uint32_t nxm_header; /* An NXM_* (or OXM_*) constant. */ const char *nxm_name; /* The nxm_header constant's name. */ uint32_t oxm_header; /* An OXM_* (or NXM_*) constant. */ const char *oxm_name; /* The oxm_header constant's name */ /* Usable protocols. * NXM and OXM are extensible, allowing later extensions to be sent in * earlier protocol versions, so this does not necessarily correspond to * the OpenFlow protocol version the field was introduced in. * Also, some field types are tranparently mapped to each other via the * struct flow (like vlan and dscp/tos fields), so each variant supports * all protocols. */ enum ofputil_protocol usable_protocols; /* If fully/cidr masked. */ /* If partially/non-cidr masked. */ enum ofputil_protocol usable_protocols_bitwise; }; /* The representation of a field's value. */ union mf_value { uint8_t u8; ovs_be16 be16; ovs_be32 be32; ovs_be64 be64; uint8_t mac[ETH_ADDR_LEN]; struct in6_addr ipv6; }; BUILD_ASSERT_DECL(sizeof(union mf_value) == 16); /* Part of a field. */ struct mf_subfield { const struct mf_field *field; unsigned int ofs; /* Bit offset. */ unsigned int n_bits; /* Number of bits. */ }; /* Data for some part of an mf_field. * * The data is stored "right-justified". For example, if "union mf_subvalue * value" contains NXM_OF_VLAN_TCI[0..11], then one could access the * corresponding data in value.be16[7] as the bits in the mask htons(0xfff). */ union mf_subvalue { uint8_t u8[16]; ovs_be16 be16[8]; ovs_be32 be32[4]; ovs_be64 be64[2]; }; BUILD_ASSERT_DECL(sizeof(union mf_value) == sizeof (union mf_subvalue)); /* Finding mf_fields. */ const struct mf_field *mf_from_id(enum mf_field_id); const struct mf_field *mf_from_name(const char *name); const struct mf_field *mf_from_nxm_header(uint32_t nxm_header); const struct mf_field *mf_from_nxm_name(const char *nxm_name); /* Inspecting wildcarded bits. */ bool mf_is_all_wild(const struct mf_field *, const struct flow_wildcards *); bool mf_is_mask_valid(const struct mf_field *, const union mf_value *mask); void mf_get_mask(const struct mf_field *, const struct flow_wildcards *, union mf_value *mask); /* Prerequisites. */ bool mf_are_prereqs_ok(const struct mf_field *, const struct flow *); void mf_force_prereqs(const struct mf_field *, struct match *); /* Field values. */ bool mf_is_value_valid(const struct mf_field *, const union mf_value *value); void mf_get_value(const struct mf_field *, const struct flow *, union mf_value *value); void mf_set_value(const struct mf_field *, const union mf_value *value, struct match *); void mf_set_flow_value(const struct mf_field *, const union mf_value *value, struct flow *); bool mf_is_zero(const struct mf_field *, const struct flow *); void mf_get(const struct mf_field *, const struct match *, union mf_value *value, union mf_value *mask); /* Returns the set of usable protocols. */ enum ofputil_protocol mf_set(const struct mf_field *, const union mf_value *value, const union mf_value *mask, struct match *); void mf_set_wild(const struct mf_field *, struct match *); void mf_random_value(const struct mf_field *, union mf_value *value); /* Subfields. */ void mf_write_subfield_flow(const struct mf_subfield *, const union mf_subvalue *, struct flow *); void mf_write_subfield(const struct mf_subfield *, const union mf_subvalue *, struct match *); void mf_read_subfield(const struct mf_subfield *, const struct flow *, union mf_subvalue *); uint64_t mf_get_subfield(const struct mf_subfield *, const struct flow *); void mf_format_subfield(const struct mf_subfield *, struct ds *); char *mf_parse_subfield__(struct mf_subfield *sf, const char **s) WARN_UNUSED_RESULT; char *mf_parse_subfield(struct mf_subfield *, const char *s) WARN_UNUSED_RESULT; enum ofperr mf_check_src(const struct mf_subfield *, const struct flow *); enum ofperr mf_check_dst(const struct mf_subfield *, const struct flow *); /* Parsing and formatting. */ char *mf_parse(const struct mf_field *, const char *, union mf_value *value, union mf_value *mask); char *mf_parse_value(const struct mf_field *, const char *, union mf_value *); void mf_format(const struct mf_field *, const union mf_value *value, const union mf_value *mask, struct ds *); void mf_format_subvalue(const union mf_subvalue *subvalue, struct ds *s); #endif /* meta-flow.h */ openvswitch-2.0.1+git20140120/lib/multipath.c000066400000000000000000000224461226605124000204030ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "multipath.h" #include #include #include #include #include "dynamic-string.h" #include "nx-match.h" #include "ofp-actions.h" #include "ofp-errors.h" #include "ofp-util.h" #include "openflow/nicira-ext.h" #include "packets.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(multipath); static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); /* Converts 'nam' into 'mp'. Returns 0 if successful, otherwise an * OFPERR_*. */ enum ofperr multipath_from_openflow(const struct nx_action_multipath *nam, struct ofpact_multipath *mp) { uint32_t n_links = ntohs(nam->max_link) + 1; size_t min_n_bits = log_2_ceil(n_links); ofpact_init_MULTIPATH(mp); mp->fields = ntohs(nam->fields); mp->basis = ntohs(nam->basis); mp->algorithm = ntohs(nam->algorithm); mp->max_link = ntohs(nam->max_link); mp->arg = ntohl(nam->arg); mp->dst.field = mf_from_nxm_header(ntohl(nam->dst)); mp->dst.ofs = nxm_decode_ofs(nam->ofs_nbits); mp->dst.n_bits = nxm_decode_n_bits(nam->ofs_nbits); if (!flow_hash_fields_valid(mp->fields)) { VLOG_WARN_RL(&rl, "unsupported fields %d", (int) mp->fields); return OFPERR_OFPBAC_BAD_ARGUMENT; } else if (mp->algorithm != NX_MP_ALG_MODULO_N && mp->algorithm != NX_MP_ALG_HASH_THRESHOLD && mp->algorithm != NX_MP_ALG_HRW && mp->algorithm != NX_MP_ALG_ITER_HASH) { VLOG_WARN_RL(&rl, "unsupported algorithm %d", (int) mp->algorithm); return OFPERR_OFPBAC_BAD_ARGUMENT; } else if (mp->dst.n_bits < min_n_bits) { VLOG_WARN_RL(&rl, "multipath action requires at least %zu bits for " "%"PRIu32" links", min_n_bits, n_links); return OFPERR_OFPBAC_BAD_ARGUMENT; } return multipath_check(mp, NULL); } /* Checks that 'mp' is valid on flow. Returns 0 if it is valid, otherwise an * OFPERR_*. */ enum ofperr multipath_check(const struct ofpact_multipath *mp, const struct flow *flow) { return mf_check_dst(&mp->dst, flow); } /* Converts 'mp' into an OpenFlow NXAST_MULTIPATH action, which it appends to * 'openflow'. */ void multipath_to_nxast(const struct ofpact_multipath *mp, struct ofpbuf *openflow) { struct nx_action_multipath *nam = ofputil_put_NXAST_MULTIPATH(openflow); nam->fields = htons(mp->fields); nam->basis = htons(mp->basis); nam->algorithm = htons(mp->algorithm); nam->max_link = htons(mp->max_link); nam->arg = htonl(mp->arg); nam->ofs_nbits = nxm_encode_ofs_nbits(mp->dst.ofs, mp->dst.n_bits); nam->dst = htonl(mp->dst.field->nxm_header); } /* multipath_execute(). */ static uint16_t multipath_algorithm(uint32_t hash, enum nx_mp_algorithm, unsigned int n_links, unsigned int arg); /* Executes 'mp' based on the current contents of 'flow', writing the results * back into 'flow'. Sets fields in 'wc' that were used to calculate * the result. */ void multipath_execute(const struct ofpact_multipath *mp, struct flow *flow, struct flow_wildcards *wc) { /* Calculate value to store. */ uint32_t hash = flow_hash_fields(flow, mp->fields, mp->basis); uint16_t link = multipath_algorithm(hash, mp->algorithm, mp->max_link + 1, mp->arg); flow_mask_hash_fields(flow, wc, mp->fields); nxm_reg_load(&mp->dst, link, flow, wc); } static uint16_t algorithm_hrw(uint32_t hash, unsigned int n_links) { uint32_t best_weight; uint16_t best_link; unsigned int link; best_link = 0; best_weight = hash_2words(hash, 0); for (link = 1; link < n_links; link++) { uint32_t weight = hash_2words(hash, link); if (weight > best_weight) { best_link = link; best_weight = weight; } } return best_link; } /* Works for 'x' in the range [1,65536], which is all we need. */ static unsigned int round_up_pow2(unsigned int x) { x--; x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; return x + 1; } static uint16_t algorithm_iter_hash(uint32_t hash, unsigned int n_links, unsigned int modulo) { uint16_t link; int i; if (modulo < n_links || modulo / 2 > n_links) { modulo = round_up_pow2(n_links); } i = 0; do { link = hash_2words(hash, i++) % modulo; } while (link >= n_links); return link; } static uint16_t multipath_algorithm(uint32_t hash, enum nx_mp_algorithm algorithm, unsigned int n_links, unsigned int arg) { switch (algorithm) { case NX_MP_ALG_MODULO_N: return hash % n_links; case NX_MP_ALG_HASH_THRESHOLD: if (n_links == 1) { return 0; } return hash / (UINT32_MAX / n_links + 1); case NX_MP_ALG_HRW: return (n_links <= 64 ? algorithm_hrw(hash, n_links) : algorithm_iter_hash(hash, n_links, 0)); case NX_MP_ALG_ITER_HASH: return algorithm_iter_hash(hash, n_links, arg); } NOT_REACHED(); } /* Parses 's_' as a set of arguments to the "multipath" action and initializes * 'mp' accordingly. ovs-ofctl(8) describes the format parsed. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string.*/ static char * WARN_UNUSED_RESULT multipath_parse__(struct ofpact_multipath *mp, const char *s_, char *s) { char *save_ptr = NULL; char *fields, *basis, *algorithm, *n_links_str, *arg, *dst; char *error; int n_links; fields = strtok_r(s, ", ", &save_ptr); basis = strtok_r(NULL, ", ", &save_ptr); algorithm = strtok_r(NULL, ", ", &save_ptr); n_links_str = strtok_r(NULL, ", ", &save_ptr); arg = strtok_r(NULL, ", ", &save_ptr); dst = strtok_r(NULL, ", ", &save_ptr); if (!dst) { return xasprintf("%s: not enough arguments to multipath action", s_); } ofpact_init_MULTIPATH(mp); if (!strcasecmp(fields, "eth_src")) { mp->fields = NX_HASH_FIELDS_ETH_SRC; } else if (!strcasecmp(fields, "symmetric_l4")) { mp->fields = NX_HASH_FIELDS_SYMMETRIC_L4; } else { return xasprintf("%s: unknown fields `%s'", s_, fields); } mp->basis = atoi(basis); if (!strcasecmp(algorithm, "modulo_n")) { mp->algorithm = NX_MP_ALG_MODULO_N; } else if (!strcasecmp(algorithm, "hash_threshold")) { mp->algorithm = NX_MP_ALG_HASH_THRESHOLD; } else if (!strcasecmp(algorithm, "hrw")) { mp->algorithm = NX_MP_ALG_HRW; } else if (!strcasecmp(algorithm, "iter_hash")) { mp->algorithm = NX_MP_ALG_ITER_HASH; } else { return xasprintf("%s: unknown algorithm `%s'", s_, algorithm); } n_links = atoi(n_links_str); if (n_links < 1 || n_links > 65536) { return xasprintf("%s: n_links %d is not in valid range 1 to 65536", s_, n_links); } mp->max_link = n_links - 1; mp->arg = atoi(arg); error = mf_parse_subfield(&mp->dst, dst); if (error) { return error; } if (mp->dst.n_bits < 16 && n_links > (1u << mp->dst.n_bits)) { return xasprintf("%s: %d-bit destination field has %u possible " "values, less than specified n_links %d", s_, mp->dst.n_bits, 1u << mp->dst.n_bits, n_links); } return NULL; } /* Parses 's_' as a set of arguments to the "multipath" action and initializes * 'mp' accordingly. ovs-ofctl(8) describes the format parsed. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ char * WARN_UNUSED_RESULT multipath_parse(struct ofpact_multipath *mp, const char *s_) { char *s = xstrdup(s_); char *error = multipath_parse__(mp, s_, s); free(s); return error; } /* Appends a description of 'mp' to 's', in the format that ovs-ofctl(8) * describes. */ void multipath_format(const struct ofpact_multipath *mp, struct ds *s) { const char *fields, *algorithm; fields = flow_hash_fields_to_str(mp->fields); switch (mp->algorithm) { case NX_MP_ALG_MODULO_N: algorithm = "modulo_n"; break; case NX_MP_ALG_HASH_THRESHOLD: algorithm = "hash_threshold"; break; case NX_MP_ALG_HRW: algorithm = "hrw"; break; case NX_MP_ALG_ITER_HASH: algorithm = "iter_hash"; break; default: algorithm = ""; } ds_put_format(s, "multipath(%s,%"PRIu16",%s,%d,%"PRIu16",", fields, mp->basis, algorithm, mp->max_link + 1, mp->arg); mf_format_subfield(&mp->dst, s); ds_put_char(s, ')'); } openvswitch-2.0.1+git20140120/lib/multipath.h000066400000000000000000000031251226605124000204010ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef MULTIPATH_H #define MULTIPATH_H 1 #include #include "compiler.h" #include "ofp-errors.h" struct ds; struct flow; struct flow_wildcards; struct nx_action_multipath; struct ofpact_multipath; struct ofpbuf; /* NXAST_MULTIPATH helper functions. * * See include/openflow/nicira-ext.h for NXAST_MULTIPATH specification. */ enum ofperr multipath_from_openflow(const struct nx_action_multipath *, struct ofpact_multipath *); enum ofperr multipath_check(const struct ofpact_multipath *, const struct flow *); void multipath_to_nxast(const struct ofpact_multipath *, struct ofpbuf *openflow); void multipath_execute(const struct ofpact_multipath *, struct flow *, struct flow_wildcards *); char *multipath_parse(struct ofpact_multipath *, const char *) WARN_UNUSED_RESULT; void multipath_format(const struct ofpact_multipath *, struct ds *); #endif /* multipath.h */ openvswitch-2.0.1+git20140120/lib/netdev-bsd.c000066400000000000000000001365101226605124000204250ustar00rootroot00000000000000/* * Copyright (c) 2011, 2013 Gaetano Catalli. * Copyright (c) 2013 YAMAMOTO Takashi. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "netdev-provider.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_NET_IF_MIB_H #include #endif #include #include #include #include #if defined(__NetBSD__) #include #include #include #endif #include "rtbsd.h" #include "coverage.h" #include "dynamic-string.h" #include "fatal-signal.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "ovs-thread.h" #include "packets.h" #include "poll-loop.h" #include "socket-util.h" #include "shash.h" #include "svec.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(netdev_bsd); struct netdev_rx_bsd { struct netdev_rx up; /* Packet capture descriptor for a system network device. * For a tap device this is NULL. */ pcap_t *pcap_handle; /* Selectable file descriptor for the network device. * This descriptor will be used for polling operations. */ int fd; }; struct netdev_bsd { struct netdev up; /* Never changes after initialization. */ char *kernel_name; /* Protects all members below. */ struct ovs_mutex mutex; unsigned int cache_valid; unsigned int change_seq; int ifindex; uint8_t etheraddr[ETH_ADDR_LEN]; struct in_addr in4; struct in_addr netmask; struct in6_addr in6; int mtu; int carrier; int tap_fd; /* TAP character device, if any, otherwise -1. */ /* Used for sending packets on non-tap devices. */ pcap_t *pcap; int fd; }; enum { VALID_IFINDEX = 1 << 0, VALID_ETHERADDR = 1 << 1, VALID_IN4 = 1 << 2, VALID_IN6 = 1 << 3, VALID_MTU = 1 << 4, VALID_CARRIER = 1 << 5 }; #define PCAP_SNAPLEN 2048 /* * Notifier used to invalidate device informations in case of status change. * * It will be registered with a 'rtbsd_notifier_register()' when the first * device will be created with the call of either 'netdev_bsd_tap_create()' or * 'netdev_bsd_system_create()'. * * The callback associated with this notifier ('netdev_bsd_cache_cb()') will * invalidate cached information about the device. */ static struct rtbsd_notifier netdev_bsd_cache_notifier; static int cache_notifier_refcount; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); static void destroy_tap(int fd, const char *name); static int get_flags(const struct netdev *, int *flagsp); static int set_flags(const char *, int flags); static int do_set_addr(struct netdev *netdev, unsigned long ioctl_nr, const char *ioctl_name, struct in_addr addr); static int get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN]); static int set_etheraddr(const char *netdev_name, int hwaddr_family, int hwaddr_len, const uint8_t[ETH_ADDR_LEN]); static int get_ifindex(const struct netdev *, int *ifindexp); static int ifr_get_flags(const struct ifreq *); static void ifr_set_flags(struct ifreq *, int flags); #ifdef __NetBSD__ static int af_link_ioctl(unsigned long command, const void *arg); #endif static void netdev_bsd_run(void); static bool is_netdev_bsd_class(const struct netdev_class *netdev_class) { return netdev_class->run == netdev_bsd_run; } static struct netdev_bsd * netdev_bsd_cast(const struct netdev *netdev) { ovs_assert(is_netdev_bsd_class(netdev_get_class(netdev))); return CONTAINER_OF(netdev, struct netdev_bsd, up); } static struct netdev_rx_bsd * netdev_rx_bsd_cast(const struct netdev_rx *rx) { ovs_assert(is_netdev_bsd_class(netdev_get_class(rx->netdev))); return CONTAINER_OF(rx, struct netdev_rx_bsd, up); } static const char * netdev_get_kernel_name(const struct netdev *netdev) { return netdev_bsd_cast(netdev)->kernel_name; } /* * Perform periodic work needed by netdev. In BSD netdevs it checks for any * interface status changes, and eventually calls all the user callbacks. */ static void netdev_bsd_run(void) { rtbsd_notifier_run(); } /* * Arranges for poll_block() to wake up if the "run" member function needs to * be called. */ static void netdev_bsd_wait(void) { rtbsd_notifier_wait(); } static void netdev_bsd_changed(struct netdev_bsd *dev) { dev->change_seq++; if (!dev->change_seq) { dev->change_seq++; } } /* Invalidate cache in case of interface status change. */ static void netdev_bsd_cache_cb(const struct rtbsd_change *change, void *aux OVS_UNUSED) { struct netdev_bsd *dev; if (change) { struct netdev *base_dev = netdev_from_name(change->if_name); if (base_dev) { const struct netdev_class *netdev_class = netdev_get_class(base_dev); if (is_netdev_bsd_class(netdev_class)) { dev = netdev_bsd_cast(base_dev); dev->cache_valid = 0; netdev_bsd_changed(dev); } netdev_close(base_dev); } } else { /* * XXX the API is lacking, we should be able to iterate on the list of * netdevs without having to store the info in a temp shash. */ struct shash device_shash; struct shash_node *node; shash_init(&device_shash); netdev_get_devices(&netdev_bsd_class, &device_shash); SHASH_FOR_EACH (node, &device_shash) { struct netdev *netdev = node->data; dev = netdev_bsd_cast(netdev); dev->cache_valid = 0; netdev_bsd_changed(dev); netdev_close(netdev); } shash_destroy(&device_shash); } } static int cache_notifier_ref(void) { int ret = 0; if (!cache_notifier_refcount) { ret = rtbsd_notifier_register(&netdev_bsd_cache_notifier, netdev_bsd_cache_cb, NULL); if (ret) { return ret; } } cache_notifier_refcount++; return 0; } static int cache_notifier_unref(void) { cache_notifier_refcount--; if (cache_notifier_refcount == 0) { rtbsd_notifier_unregister(&netdev_bsd_cache_notifier); } return 0; } static struct netdev * netdev_bsd_alloc(void) { struct netdev_bsd *netdev = xzalloc(sizeof *netdev); return &netdev->up; } static int netdev_bsd_construct_system(struct netdev *netdev_) { struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); enum netdev_flags flags; int error; error = cache_notifier_ref(); if (error) { return error; } ovs_mutex_init(&netdev->mutex); netdev->change_seq = 1; netdev->tap_fd = -1; netdev->kernel_name = xstrdup(netdev_->name); /* Verify that the netdev really exists by attempting to read its flags */ error = netdev_get_flags(netdev_, &flags); if (error == ENXIO) { free(netdev->kernel_name); cache_notifier_unref(); return error; } return 0; } static int netdev_bsd_construct_tap(struct netdev *netdev_) { struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); const char *name = netdev_->name; int error = 0; struct ifreq ifr; char *kernel_name = NULL; error = cache_notifier_ref(); if (error) { goto error; } memset(&ifr, 0, sizeof(ifr)); /* Create a tap device by opening /dev/tap. The TAPGIFNAME ioctl is used * to retrieve the name of the tap device. */ ovs_mutex_init(&netdev->mutex); netdev->tap_fd = open("/dev/tap", O_RDWR); netdev->change_seq = 1; if (netdev->tap_fd < 0) { error = errno; VLOG_WARN("opening \"/dev/tap\" failed: %s", ovs_strerror(error)); goto error_unref_notifier; } /* Retrieve tap name (e.g. tap0) */ if (ioctl(netdev->tap_fd, TAPGIFNAME, &ifr) == -1) { /* XXX Need to destroy the device? */ error = errno; close(netdev->tap_fd); goto error_unref_notifier; } /* Change the name of the tap device */ #if defined(SIOCSIFNAME) ifr.ifr_data = (void *)name; error = af_inet_ioctl(SIOCSIFNAME, &ifr); if (error) { destroy_tap(netdev->tap_fd, ifr.ifr_name); goto error_unref_notifier; } kernel_name = xstrdup(name); #else /* * NetBSD doesn't support inteface renaming. */ VLOG_INFO("tap %s is created for bridge %s", ifr.ifr_name, name); kernel_name = xstrdup(ifr.ifr_name); #endif /* set non-blocking. */ error = set_nonblocking(netdev->tap_fd); if (error) { destroy_tap(netdev->tap_fd, kernel_name); goto error_unref_notifier; } /* Turn device UP */ ifr_set_flags(&ifr, IFF_UP); strncpy(ifr.ifr_name, kernel_name, sizeof ifr.ifr_name); error = af_inet_ioctl(SIOCSIFFLAGS, &ifr); if (error) { destroy_tap(netdev->tap_fd, kernel_name); goto error_unref_notifier; } netdev->kernel_name = kernel_name; return 0; error_unref_notifier: ovs_mutex_destroy(&netdev->mutex); cache_notifier_unref(); error: free(kernel_name); return error; } static void netdev_bsd_destruct(struct netdev *netdev_) { struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); cache_notifier_unref(); if (netdev->tap_fd >= 0) { destroy_tap(netdev->tap_fd, netdev_get_kernel_name(netdev_)); } if (netdev->pcap) { pcap_close(netdev->pcap); } free(netdev->kernel_name); ovs_mutex_destroy(&netdev->mutex); } static void netdev_bsd_dealloc(struct netdev *netdev_) { struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); free(netdev); } static int netdev_bsd_open_pcap(const char *name, pcap_t **pcapp, int *fdp) { char errbuf[PCAP_ERRBUF_SIZE]; pcap_t *pcap = NULL; int one = 1; int error; int fd; /* Open the pcap device. The device is opened in non-promiscuous mode * because the interface flags are manually set by the caller. */ errbuf[0] = '\0'; pcap = pcap_open_live(name, PCAP_SNAPLEN, 0, 1000, errbuf); if (!pcap) { VLOG_ERR_RL(&rl, "%s: pcap_open_live failed: %s", name, errbuf); error = EIO; goto error; } if (errbuf[0] != '\0') { VLOG_WARN_RL(&rl, "%s: pcap_open_live: %s", name, errbuf); } /* Get the underlying fd. */ fd = pcap_get_selectable_fd(pcap); if (fd == -1) { VLOG_WARN_RL(&rl, "%s: no selectable file descriptor", name); error = errno; goto error; } /* Set non-blocking mode. Also the BIOCIMMEDIATE ioctl must be called * on the file descriptor returned by pcap_get_selectable_fd to achieve * a real non-blocking behaviour.*/ error = pcap_setnonblock(pcap, 1, errbuf); if (error == -1) { error = errno; goto error; } /* This call assure that reads return immediately upon packet * reception. Otherwise, a read will block until either the kernel * buffer becomes full or a timeout occurs. */ if (ioctl(fd, BIOCIMMEDIATE, &one) < 0 ) { VLOG_ERR_RL(&rl, "ioctl(BIOCIMMEDIATE) on %s device failed: %s", name, ovs_strerror(errno)); error = errno; goto error; } /* Capture only incoming packets. */ error = pcap_setdirection(pcap, PCAP_D_IN); if (error == -1) { error = errno; goto error; } *pcapp = pcap; *fdp = fd; return 0; error: if (pcap) { pcap_close(pcap); } *pcapp = NULL; *fdp = -1; return error; } static struct netdev_rx * netdev_bsd_rx_alloc(void) { struct netdev_rx_bsd *rx = xzalloc(sizeof *rx); return &rx->up; } static int netdev_bsd_rx_construct(struct netdev_rx *rx_) { struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_); struct netdev *netdev_ = rx->up.netdev; struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); int error; if (!strcmp(netdev_get_type(netdev_), "tap")) { rx->pcap_handle = NULL; rx->fd = netdev->tap_fd; error = 0; } else { ovs_mutex_lock(&netdev->mutex); error = netdev_bsd_open_pcap(netdev_get_kernel_name(netdev_), &rx->pcap_handle, &rx->fd); if (!error) { netdev_bsd_changed(netdev); } ovs_mutex_unlock(&netdev->mutex); } return error; } static void netdev_bsd_rx_destruct(struct netdev_rx *rx_) { struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_); if (rx->pcap_handle) { pcap_close(rx->pcap_handle); } } static void netdev_bsd_rx_dealloc(struct netdev_rx *rx_) { struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_); free(rx); } /* The recv callback of the netdev class returns the number of bytes of the * received packet. * * This can be done by the pcap_next() function. Unfortunately pcap_next() does * not make difference between a missing packet on the capture interface and * an error during the file capture. We can use the pcap_dispatch() function * instead, which is able to distinguish between errors and null packet. * * To make pcap_dispatch() returns the number of bytes read from the interface * we need to define the following callback and argument. */ struct pcap_arg { void *data; int size; int retval; }; /* * This callback will be executed on every captured packet. * * If the packet captured by pcap_dispatch() does not fit the pcap buffer, * pcap returns a truncated packet and we follow this behavior. * * The argument args->retval is the packet size in bytes. */ static void proc_pkt(u_char *args_, const struct pcap_pkthdr *hdr, const u_char *packet) { struct pcap_arg *args = (struct pcap_arg *)args_; if (args->size < hdr->len) { VLOG_WARN_RL(&rl, "packet truncated"); args->retval = args->size; } else { args->retval = hdr->len; } /* copy the packet to our buffer */ memcpy(args->data, packet, args->retval); } /* * This function attempts to receive a packet from the specified network * device. It is assumed that the network device is a system device or a tap * device opened as a system one. In this case the read operation is performed * from rx->pcap. */ static int netdev_rx_bsd_recv_pcap(struct netdev_rx_bsd *rx, void *data, size_t size) { struct pcap_arg arg; int ret; /* prepare the pcap argument to store the packet */ arg.size = size; arg.data = data; for (;;) { ret = pcap_dispatch(rx->pcap_handle, 1, proc_pkt, (u_char *) &arg); if (ret > 0) { return arg.retval; /* arg.retval < 0 is handled in the caller */ } if (ret == -1) { if (errno == EINTR) { continue; } } return -EAGAIN; } } /* * This function attempts to receive a packet from the specified network * device. It is assumed that the network device is a tap device and * 'rx->fd' is initialized with the tap file descriptor. */ static int netdev_rx_bsd_recv_tap(struct netdev_rx_bsd *rx, void *data, size_t size) { for (;;) { ssize_t retval = read(rx->fd, data, size); if (retval >= 0) { return retval; } else if (errno != EINTR) { if (errno != EAGAIN) { VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s", ovs_strerror(errno), netdev_rx_get_name(&rx->up)); } return -errno; } } } static int netdev_bsd_rx_recv(struct netdev_rx *rx_, void *data, size_t size) { struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_); return (rx->pcap_handle ? netdev_rx_bsd_recv_pcap(rx, data, size) : netdev_rx_bsd_recv_tap(rx, data, size)); } /* * Registers with the poll loop to wake up from the next call to poll_block() * when a packet is ready to be received with netdev_rx_recv() on 'rx'. */ static void netdev_bsd_rx_wait(struct netdev_rx *rx_) { struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_); poll_fd_wait(rx->fd, POLLIN); } /* Discards all packets waiting to be received from 'rx'. */ static int netdev_bsd_rx_drain(struct netdev_rx *rx_) { struct ifreq ifr; struct netdev_rx_bsd *rx = netdev_rx_bsd_cast(rx_); strcpy(ifr.ifr_name, netdev_get_kernel_name(netdev_rx_get_netdev(rx_))); if (ioctl(rx->fd, BIOCFLUSH, &ifr) == -1) { VLOG_DBG_RL(&rl, "%s: ioctl(BIOCFLUSH) failed: %s", netdev_rx_get_name(rx_), ovs_strerror(errno)); return errno; } return 0; } /* * Send a packet on the specified network device. The device could be either a * system or a tap device. */ static int netdev_bsd_send(struct netdev *netdev_, const void *data, size_t size) { struct netdev_bsd *dev = netdev_bsd_cast(netdev_); const char *name = netdev_get_name(netdev_); int error; ovs_mutex_lock(&dev->mutex); if (dev->tap_fd < 0 && !dev->pcap) { error = netdev_bsd_open_pcap(name, &dev->pcap, &dev->fd); } else { error = 0; } while (!error) { ssize_t retval; if (dev->tap_fd >= 0) { retval = write(dev->tap_fd, data, size); } else { retval = pcap_inject(dev->pcap, data, size); } if (retval < 0) { if (errno == EINTR) { continue; } else { error = errno; if (error != EAGAIN) { VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: " "%s", name, ovs_strerror(error)); } } } else if (retval != size) { VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%zd bytes of " "%zu) on %s", retval, size, name); error = EMSGSIZE; } else { break; } } ovs_mutex_unlock(&dev->mutex); return error; } /* * Registers with the poll loop to wake up from the next call to poll_block() * when the packet transmission queue has sufficient room to transmit a packet * with netdev_send(). */ static void netdev_bsd_send_wait(struct netdev *netdev_) { struct netdev_bsd *dev = netdev_bsd_cast(netdev_); ovs_mutex_lock(&dev->mutex); if (dev->tap_fd >= 0) { /* TAP device always accepts packets. */ poll_immediate_wake(); } else if (dev->pcap) { poll_fd_wait(dev->fd, POLLOUT); } else { /* We haven't even tried to send a packet yet. */ poll_immediate_wake(); } ovs_mutex_unlock(&dev->mutex); } /* * Attempts to set 'netdev''s MAC address to 'mac'. Returns 0 if successful, * otherwise a positive errno value. */ static int netdev_bsd_set_etheraddr(struct netdev *netdev_, const uint8_t mac[ETH_ADDR_LEN]) { struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); int error = 0; ovs_mutex_lock(&netdev->mutex); if (!(netdev->cache_valid & VALID_ETHERADDR) || !eth_addr_equals(netdev->etheraddr, mac)) { error = set_etheraddr(netdev_get_kernel_name(netdev_), AF_LINK, ETH_ADDR_LEN, mac); if (!error) { netdev->cache_valid |= VALID_ETHERADDR; memcpy(netdev->etheraddr, mac, ETH_ADDR_LEN); netdev_bsd_changed(netdev); } } ovs_mutex_unlock(&netdev->mutex); return error; } /* * Returns a pointer to 'netdev''s MAC address. The caller must not modify or * free the returned buffer. */ static int netdev_bsd_get_etheraddr(const struct netdev *netdev_, uint8_t mac[ETH_ADDR_LEN]) { struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); int error = 0; ovs_mutex_lock(&netdev->mutex); if (!(netdev->cache_valid & VALID_ETHERADDR)) { error = get_etheraddr(netdev_get_kernel_name(netdev_), netdev->etheraddr); if (!error) { netdev->cache_valid |= VALID_ETHERADDR; } } if (!error) { memcpy(mac, netdev->etheraddr, ETH_ADDR_LEN); } ovs_mutex_unlock(&netdev->mutex); return error; } /* * Returns the maximum size of transmitted (and received) packets on 'netdev', * in bytes, not including the hardware header; thus, this is typically 1500 * bytes for Ethernet devices. */ static int netdev_bsd_get_mtu(const struct netdev *netdev_, int *mtup) { struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); int error = 0; ovs_mutex_lock(&netdev->mutex); if (!(netdev->cache_valid & VALID_MTU)) { struct ifreq ifr; error = af_inet_ifreq_ioctl(netdev_get_kernel_name(netdev_), &ifr, SIOCGIFMTU, "SIOCGIFMTU"); if (!error) { netdev->mtu = ifr.ifr_mtu; netdev->cache_valid |= VALID_MTU; } } if (!error) { *mtup = netdev->mtu; } ovs_mutex_unlock(&netdev->mutex); return 0; } static int netdev_bsd_get_ifindex(const struct netdev *netdev_) { struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); int ifindex, error; ovs_mutex_lock(&netdev->mutex); error = get_ifindex(netdev_, &ifindex); ovs_mutex_unlock(&netdev->mutex); return error ? -error : ifindex; } static int netdev_bsd_get_carrier(const struct netdev *netdev_, bool *carrier) { struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); int error = 0; ovs_mutex_lock(&netdev->mutex); if (!(netdev->cache_valid & VALID_CARRIER)) { struct ifmediareq ifmr; memset(&ifmr, 0, sizeof(ifmr)); strncpy(ifmr.ifm_name, netdev_get_kernel_name(netdev_), sizeof ifmr.ifm_name); error = af_inet_ioctl(SIOCGIFMEDIA, &ifmr); if (!error) { netdev->carrier = (ifmr.ifm_status & IFM_ACTIVE) == IFM_ACTIVE; netdev->cache_valid |= VALID_CARRIER; /* If the interface doesn't report whether the media is active, * just assume it is active. */ if ((ifmr.ifm_status & IFM_AVALID) == 0) { netdev->carrier = true; } } else { VLOG_DBG_RL(&rl, "%s: ioctl(SIOCGIFMEDIA) failed: %s", netdev_get_name(netdev_), ovs_strerror(error)); } } if (!error) { *carrier = netdev->carrier; } ovs_mutex_unlock(&netdev->mutex); return error; } static void convert_stats(struct netdev_stats *stats, const struct if_data *ifd) { /* * note: UINT64_MAX means unsupported */ stats->rx_packets = ifd->ifi_ipackets; stats->tx_packets = ifd->ifi_opackets; stats->rx_bytes = ifd->ifi_obytes; stats->tx_bytes = ifd->ifi_ibytes; stats->rx_errors = ifd->ifi_ierrors; stats->tx_errors = ifd->ifi_oerrors; stats->rx_dropped = ifd->ifi_iqdrops; stats->tx_dropped = UINT64_MAX; stats->multicast = ifd->ifi_imcasts; stats->collisions = ifd->ifi_collisions; stats->rx_length_errors = UINT64_MAX; stats->rx_over_errors = UINT64_MAX; stats->rx_crc_errors = UINT64_MAX; stats->rx_frame_errors = UINT64_MAX; stats->rx_fifo_errors = UINT64_MAX; stats->rx_missed_errors = UINT64_MAX; stats->tx_aborted_errors = UINT64_MAX; stats->tx_carrier_errors = UINT64_MAX; stats->tx_fifo_errors = UINT64_MAX; stats->tx_heartbeat_errors = UINT64_MAX; stats->tx_window_errors = UINT64_MAX; } /* Retrieves current device stats for 'netdev'. */ static int netdev_bsd_get_stats(const struct netdev *netdev_, struct netdev_stats *stats) { #if defined(__FreeBSD__) int if_count, i; int mib[6]; size_t len; struct ifmibdata ifmd; mib[0] = CTL_NET; mib[1] = PF_LINK; mib[2] = NETLINK_GENERIC; mib[3] = IFMIB_SYSTEM; mib[4] = IFMIB_IFCOUNT; len = sizeof(if_count); if (sysctl(mib, 5, &if_count, &len, (void *)0, 0) == -1) { VLOG_DBG_RL(&rl, "%s: sysctl failed: %s", netdev_get_name(netdev_), ovs_strerror(errno)); return errno; } mib[5] = IFDATA_GENERAL; mib[3] = IFMIB_IFDATA; len = sizeof(ifmd); for (i = 1; i <= if_count; i++) { mib[4] = i; //row if (sysctl(mib, 6, &ifmd, &len, (void *)0, 0) == -1) { VLOG_DBG_RL(&rl, "%s: sysctl failed: %s", netdev_get_name(netdev_), ovs_strerror(errno)); return errno; } else if (!strcmp(ifmd.ifmd_name, netdev_get_name(netdev_))) { convert_stats(stats, &ifmd.ifmd_data); break; } } return 0; #elif defined(__NetBSD__) struct ifdatareq ifdr; int error; memset(&ifdr, 0, sizeof(ifdr)); strncpy(ifdr.ifdr_name, netdev_get_kernel_name(netdev_), sizeof(ifdr.ifdr_name)); error = af_link_ioctl(SIOCGIFDATA, &ifdr); if (!error) { convert_stats(stats, &ifdr.ifdr_data); } return error; #else #error not implemented #endif } static uint32_t netdev_bsd_parse_media(int media) { uint32_t supported = 0; bool half_duplex = media & IFM_HDX ? true : false; switch (IFM_SUBTYPE(media)) { case IFM_10_2: case IFM_10_5: case IFM_10_STP: case IFM_10_T: supported |= half_duplex ? NETDEV_F_10MB_HD : NETDEV_F_10MB_FD; supported |= NETDEV_F_COPPER; break; case IFM_10_FL: supported |= half_duplex ? NETDEV_F_10MB_HD : NETDEV_F_10MB_FD; supported |= NETDEV_F_FIBER; break; case IFM_100_T2: case IFM_100_T4: case IFM_100_TX: case IFM_100_VG: supported |= half_duplex ? NETDEV_F_100MB_HD : NETDEV_F_100MB_FD; supported |= NETDEV_F_COPPER; break; case IFM_100_FX: supported |= half_duplex ? NETDEV_F_100MB_HD : NETDEV_F_100MB_FD; supported |= NETDEV_F_FIBER; break; case IFM_1000_CX: case IFM_1000_T: supported |= half_duplex ? NETDEV_F_1GB_HD : NETDEV_F_1GB_FD; supported |= NETDEV_F_COPPER; break; case IFM_1000_LX: case IFM_1000_SX: supported |= half_duplex ? NETDEV_F_1GB_HD : NETDEV_F_1GB_FD; supported |= NETDEV_F_FIBER; break; case IFM_10G_CX4: supported |= NETDEV_F_10GB_FD; supported |= NETDEV_F_COPPER; break; case IFM_10G_LR: case IFM_10G_SR: supported |= NETDEV_F_10GB_FD; supported |= NETDEV_F_FIBER; break; default: return 0; } if (IFM_SUBTYPE(media) == IFM_AUTO) { supported |= NETDEV_F_AUTONEG; } /* if (media & IFM_ETH_FMASK) { supported |= NETDEV_F_PAUSE; } */ return supported; } /* * Stores the features supported by 'netdev' into each of '*current', * '*advertised', '*supported', and '*peer' that are non-null. Each value is a * bitmap of "enum ofp_port_features" bits, in host byte order. Returns 0 if * successful, otherwise a positive errno value. On failure, all of the * passed-in values are set to 0. */ static int netdev_bsd_get_features(const struct netdev *netdev, enum netdev_features *current, uint32_t *advertised, enum netdev_features *supported, uint32_t *peer) { struct ifmediareq ifmr; int *media_list; int i; int error; /* XXX Look into SIOCGIFCAP instead of SIOCGIFMEDIA */ memset(&ifmr, 0, sizeof(ifmr)); strncpy(ifmr.ifm_name, netdev_get_name(netdev), sizeof ifmr.ifm_name); /* We make two SIOCGIFMEDIA ioctl calls. The first to determine the * number of supported modes, and a second with a buffer to retrieve * them. */ error = af_inet_ioctl(SIOCGIFMEDIA, &ifmr); if (error) { VLOG_DBG_RL(&rl, "%s: ioctl(SIOCGIFMEDIA) failed: %s", netdev_get_name(netdev), ovs_strerror(error)); return error; } media_list = xcalloc(ifmr.ifm_count, sizeof(int)); ifmr.ifm_ulist = media_list; if (IFM_TYPE(ifmr.ifm_current) != IFM_ETHER) { VLOG_DBG_RL(&rl, "%s: doesn't appear to be ethernet", netdev_get_name(netdev)); error = EINVAL; goto cleanup; } error = af_inet_ioctl(SIOCGIFMEDIA, &ifmr); if (error) { VLOG_DBG_RL(&rl, "%s: ioctl(SIOCGIFMEDIA) failed: %s", netdev_get_name(netdev), ovs_strerror(error)); goto cleanup; } /* Current settings. */ *current = netdev_bsd_parse_media(ifmr.ifm_active); /* Advertised features. */ *advertised = netdev_bsd_parse_media(ifmr.ifm_current); /* Supported features. */ *supported = 0; for (i = 0; i < ifmr.ifm_count; i++) { *supported |= netdev_bsd_parse_media(ifmr.ifm_ulist[i]); } /* Peer advertisements. */ *peer = 0; /* XXX */ error = 0; cleanup: free(media_list); return error; } /* * If 'netdev' has an assigned IPv4 address, sets '*in4' to that address and * '*netmask' to its netmask and returns true. Otherwise, returns false. */ static int netdev_bsd_get_in4(const struct netdev *netdev_, struct in_addr *in4, struct in_addr *netmask) { struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); int error = 0; ovs_mutex_lock(&netdev->mutex); if (!(netdev->cache_valid & VALID_IN4)) { struct ifreq ifr; ifr.ifr_addr.sa_family = AF_INET; error = af_inet_ifreq_ioctl(netdev_get_kernel_name(netdev_), &ifr, SIOCGIFADDR, "SIOCGIFADDR"); if (!error) { const struct sockaddr_in *sin; sin = (struct sockaddr_in *) &ifr.ifr_addr; netdev->in4 = sin->sin_addr; netdev->cache_valid |= VALID_IN4; error = af_inet_ifreq_ioctl(netdev_get_kernel_name(netdev_), &ifr, SIOCGIFNETMASK, "SIOCGIFNETMASK"); if (!error) { *netmask = sin->sin_addr; } } } if (!error) { *in4 = netdev->in4; *netmask = netdev->netmask; } ovs_mutex_unlock(&netdev->mutex); return error ? error : in4->s_addr == INADDR_ANY ? EADDRNOTAVAIL : 0; } /* * Assigns 'addr' as 'netdev''s IPv4 address and 'mask' as its netmask. If * 'addr' is INADDR_ANY, 'netdev''s IPv4 address is cleared. Returns a * positive errno value. */ static int netdev_bsd_set_in4(struct netdev *netdev_, struct in_addr addr, struct in_addr mask) { struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", addr); if (!error) { if (addr.s_addr != INADDR_ANY) { error = do_set_addr(netdev_, SIOCSIFNETMASK, "SIOCSIFNETMASK", mask); if (!error) { netdev->cache_valid |= VALID_IN4; netdev->in4 = addr; netdev->netmask = mask; } } netdev_bsd_changed(netdev); } ovs_mutex_unlock(&netdev->mutex); return error; } static int netdev_bsd_get_in6(const struct netdev *netdev_, struct in6_addr *in6) { struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); if (!(netdev->cache_valid & VALID_IN6)) { struct ifaddrs *ifa, *head; struct sockaddr_in6 *sin6; const char *netdev_name = netdev_get_name(netdev_); if (getifaddrs(&head) != 0) { VLOG_ERR("getifaddrs on %s device failed: %s", netdev_name, ovs_strerror(errno)); return errno; } for (ifa = head; ifa; ifa = ifa->ifa_next) { if (ifa->ifa_addr->sa_family == AF_INET6 && !strcmp(ifa->ifa_name, netdev_name)) { sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; if (sin6) { memcpy(&netdev->in6, &sin6->sin6_addr, sin6->sin6_len); netdev->cache_valid |= VALID_IN6; *in6 = netdev->in6; freeifaddrs(head); return 0; } } } return EADDRNOTAVAIL; } *in6 = netdev->in6; return 0; } #if defined(__NetBSD__) static char * netdev_bsd_kernel_name_to_ovs_name(const char *kernel_name) { char *ovs_name = NULL; struct shash device_shash; struct shash_node *node; shash_init(&device_shash); netdev_get_devices(&netdev_tap_class, &device_shash); SHASH_FOR_EACH(node, &device_shash) { struct netdev *netdev = node->data; struct netdev_bsd * const dev = netdev_bsd_cast(netdev); if (!strcmp(dev->kernel_name, kernel_name)) { free(ovs_name); ovs_name = xstrdup(netdev_get_name(&dev->up)); } netdev_close(netdev); } shash_destroy(&device_shash); return ovs_name ? ovs_name : xstrdup(kernel_name); } #endif static int netdev_bsd_get_next_hop(const struct in_addr *host OVS_UNUSED, struct in_addr *next_hop OVS_UNUSED, char **netdev_name OVS_UNUSED) { #if defined(__NetBSD__) static int seq = 0; struct sockaddr_in sin; struct sockaddr_dl sdl; int s; int i; struct { struct rt_msghdr h; char space[512]; } buf; struct rt_msghdr *rtm = &buf.h; const pid_t pid = getpid(); char *cp; ssize_t ssz; bool gateway = false; char *ifname = NULL; int saved_errno; memset(next_hop, 0, sizeof(*next_hop)); *netdev_name = NULL; memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof(sin); sin.sin_family = AF_INET; sin.sin_port = 0; sin.sin_addr = *host; memset(&sdl, 0, sizeof(sdl)); sdl.sdl_len = sizeof(sdl); sdl.sdl_family = AF_LINK; s = socket(PF_ROUTE, SOCK_RAW, 0); memset(&buf, 0, sizeof(buf)); rtm->rtm_flags = RTF_HOST|RTF_UP; rtm->rtm_version = RTM_VERSION; rtm->rtm_addrs = RTA_DST|RTA_IFP; cp = (void *)&buf.space; memcpy(cp, &sin, sizeof(sin)); RT_ADVANCE(cp, (struct sockaddr *)(void *)&sin); memcpy(cp, &sdl, sizeof(sdl)); RT_ADVANCE(cp, (struct sockaddr *)(void *)&sdl); rtm->rtm_msglen = cp - (char *)(void *)rtm; rtm->rtm_seq = ++seq; rtm->rtm_type = RTM_GET; rtm->rtm_pid = pid; write(s, rtm, rtm->rtm_msglen); memset(&buf, 0, sizeof(buf)); do { ssz = read(s, &buf, sizeof(buf)); } while (ssz > 0 && (rtm->rtm_seq != seq || rtm->rtm_pid != pid)); saved_errno = errno; close(s); if (ssz <= 0) { if (ssz < 0) { return saved_errno; } return EPIPE; /* XXX */ } cp = (void *)&buf.space; for (i = 1; i; i <<= 1) { if ((rtm->rtm_addrs & i) != 0) { const struct sockaddr *sa = (const void *)cp; if ((i == RTA_GATEWAY) && sa->sa_family == AF_INET) { const struct sockaddr_in * const sin = (const struct sockaddr_in *)sa; *next_hop = sin->sin_addr; gateway = true; } if ((i == RTA_IFP) && sa->sa_family == AF_LINK) { const struct sockaddr_dl * const sdl = (const struct sockaddr_dl *)sa; char *kernel_name; kernel_name = xmemdup0(sdl->sdl_data, sdl->sdl_nlen); ifname = netdev_bsd_kernel_name_to_ovs_name(kernel_name); free(kernel_name); } RT_ADVANCE(cp, sa); } } if (ifname == NULL) { return ENXIO; } if (!gateway) { *next_hop = *host; } *netdev_name = ifname; VLOG_DBG("host " IP_FMT " next-hop " IP_FMT " if %s", IP_ARGS(host->s_addr), IP_ARGS(next_hop->s_addr), *netdev_name); return 0; #else return EOPNOTSUPP; #endif } static int netdev_bsd_arp_lookup(const struct netdev *netdev OVS_UNUSED, ovs_be32 ip OVS_UNUSED, uint8_t mac[ETH_ADDR_LEN] OVS_UNUSED) { #if defined(__NetBSD__) const struct rt_msghdr *rtm; size_t needed; char *buf; const char *cp; const char *ep; int mib[6]; int error; buf = NULL; mib[0] = CTL_NET; mib[1] = PF_ROUTE; mib[2] = 0; mib[3] = AF_INET; mib[4] = NET_RT_FLAGS; mib[5] = RTF_LLINFO; if (sysctl(mib, 6, NULL, &needed, NULL, 0) == -1) { error = errno; goto error; } buf = xmalloc(needed); if (sysctl(mib, 6, buf, &needed, NULL, 0) == -1) { error = errno; goto error; } ep = buf + needed; for (cp = buf; cp < ep; cp += rtm->rtm_msglen) { const struct sockaddr_inarp *sina; const struct sockaddr_dl *sdl; rtm = (const void *)cp; sina = (const void *)(rtm + 1); if (ip != sina->sin_addr.s_addr) { continue; } sdl = (const void *) ((const char *)(const void *)sina + RT_ROUNDUP(sina->sin_len)); if (sdl->sdl_alen == ETH_ADDR_LEN) { memcpy(mac, &sdl->sdl_data[sdl->sdl_nlen], ETH_ADDR_LEN); error = 0; goto error; } } error = ENXIO; error: free(buf); return error; #else return EOPNOTSUPP; #endif } static void make_in4_sockaddr(struct sockaddr *sa, struct in_addr addr) { struct sockaddr_in sin; memset(&sin, 0, sizeof sin); sin.sin_family = AF_INET; sin.sin_addr = addr; sin.sin_port = 0; memset(sa, 0, sizeof *sa); memcpy(sa, &sin, sizeof sin); } static int do_set_addr(struct netdev *netdev, unsigned long ioctl_nr, const char *ioctl_name, struct in_addr addr) { struct ifreq ifr; make_in4_sockaddr(&ifr.ifr_addr, addr); return af_inet_ifreq_ioctl(netdev_get_kernel_name(netdev), &ifr, ioctl_nr, ioctl_name); } static int nd_to_iff_flags(enum netdev_flags nd) { int iff = 0; if (nd & NETDEV_UP) { iff |= IFF_UP; } if (nd & NETDEV_PROMISC) { iff |= IFF_PROMISC; #if defined(IFF_PPROMISC) iff |= IFF_PPROMISC; #endif } return iff; } static int iff_to_nd_flags(int iff) { enum netdev_flags nd = 0; if (iff & IFF_UP) { nd |= NETDEV_UP; } if (iff & IFF_PROMISC) { nd |= NETDEV_PROMISC; } return nd; } static int netdev_bsd_update_flags(struct netdev *netdev_, enum netdev_flags off, enum netdev_flags on, enum netdev_flags *old_flagsp) { struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); int old_flags, new_flags; int error; error = get_flags(netdev_, &old_flags); if (!error) { *old_flagsp = iff_to_nd_flags(old_flags); new_flags = (old_flags & ~nd_to_iff_flags(off)) | nd_to_iff_flags(on); if (new_flags != old_flags) { error = set_flags(netdev_get_kernel_name(netdev_), new_flags); netdev_bsd_changed(netdev); } } return error; } static unsigned int netdev_bsd_change_seq(const struct netdev *netdev) { return netdev_bsd_cast(netdev)->change_seq; } const struct netdev_class netdev_bsd_class = { "system", NULL, /* init */ netdev_bsd_run, netdev_bsd_wait, netdev_bsd_alloc, netdev_bsd_construct_system, netdev_bsd_destruct, netdev_bsd_dealloc, NULL, /* get_config */ NULL, /* set_config */ NULL, /* get_tunnel_config */ netdev_bsd_send, netdev_bsd_send_wait, netdev_bsd_set_etheraddr, netdev_bsd_get_etheraddr, netdev_bsd_get_mtu, NULL, /* set_mtu */ netdev_bsd_get_ifindex, netdev_bsd_get_carrier, NULL, /* get_carrier_resets */ NULL, /* set_miimon_interval */ netdev_bsd_get_stats, NULL, /* set_stats */ netdev_bsd_get_features, NULL, /* set_advertisement */ NULL, /* set_policing */ NULL, /* get_qos_type */ NULL, /* get_qos_capabilities */ NULL, /* get_qos */ NULL, /* set_qos */ NULL, /* get_queue */ NULL, /* set_queue */ NULL, /* delete_queue */ NULL, /* get_queue_stats */ NULL, /* queue_dump_start */ NULL, /* queue_dump_next */ NULL, /* queue_dump_done */ NULL, /* dump_queue_stats */ netdev_bsd_get_in4, netdev_bsd_set_in4, netdev_bsd_get_in6, NULL, /* add_router */ netdev_bsd_get_next_hop, NULL, /* get_status */ netdev_bsd_arp_lookup, /* arp_lookup */ netdev_bsd_update_flags, netdev_bsd_change_seq, netdev_bsd_rx_alloc, netdev_bsd_rx_construct, netdev_bsd_rx_destruct, netdev_bsd_rx_dealloc, netdev_bsd_rx_recv, netdev_bsd_rx_wait, netdev_bsd_rx_drain, }; const struct netdev_class netdev_tap_class = { "tap", NULL, /* init */ netdev_bsd_run, netdev_bsd_wait, netdev_bsd_alloc, netdev_bsd_construct_tap, netdev_bsd_destruct, netdev_bsd_dealloc, NULL, /* get_config */ NULL, /* set_config */ NULL, /* get_tunnel_config */ netdev_bsd_send, netdev_bsd_send_wait, netdev_bsd_set_etheraddr, netdev_bsd_get_etheraddr, netdev_bsd_get_mtu, NULL, /* set_mtu */ netdev_bsd_get_ifindex, netdev_bsd_get_carrier, NULL, /* get_carrier_resets */ NULL, /* set_miimon_interval */ netdev_bsd_get_stats, NULL, /* set_stats */ netdev_bsd_get_features, NULL, /* set_advertisement */ NULL, /* set_policing */ NULL, /* get_qos_type */ NULL, /* get_qos_capabilities */ NULL, /* get_qos */ NULL, /* set_qos */ NULL, /* get_queue */ NULL, /* set_queue */ NULL, /* delete_queue */ NULL, /* get_queue_stats */ NULL, /* queue_dump_start */ NULL, /* queue_dump_next */ NULL, /* queue_dump_done */ NULL, /* dump_queue_stats */ netdev_bsd_get_in4, netdev_bsd_set_in4, netdev_bsd_get_in6, NULL, /* add_router */ netdev_bsd_get_next_hop, NULL, /* get_status */ netdev_bsd_arp_lookup, /* arp_lookup */ netdev_bsd_update_flags, netdev_bsd_change_seq, netdev_bsd_rx_alloc, netdev_bsd_rx_construct, netdev_bsd_rx_destruct, netdev_bsd_rx_dealloc, netdev_bsd_rx_recv, netdev_bsd_rx_wait, netdev_bsd_rx_drain, }; static void destroy_tap(int fd, const char *name) { struct ifreq ifr; close(fd); strcpy(ifr.ifr_name, name); /* XXX What to do if this call fails? */ af_inet_ioctl(SIOCIFDESTROY, &ifr); } static int get_flags(const struct netdev *netdev, int *flags) { struct ifreq ifr; int error; error = af_inet_ifreq_ioctl(netdev_get_kernel_name(netdev), &ifr, SIOCGIFFLAGS, "SIOCGIFFLAGS"); *flags = ifr_get_flags(&ifr); return error; } static int set_flags(const char *name, int flags) { struct ifreq ifr; ifr_set_flags(&ifr, flags); return af_inet_ifreq_ioctl(name, &ifr, SIOCSIFFLAGS, "SIOCSIFFLAGS"); } static int get_ifindex(const struct netdev *netdev_, int *ifindexp) { struct netdev_bsd *netdev = netdev_bsd_cast(netdev_); *ifindexp = 0; if (!(netdev->cache_valid & VALID_IFINDEX)) { int ifindex = if_nametoindex(netdev_get_name(netdev_)); if (ifindex <= 0) { return errno; } netdev->cache_valid |= VALID_IFINDEX; netdev->ifindex = ifindex; } *ifindexp = netdev->ifindex; return 0; } static int get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN]) { struct ifaddrs *head; struct ifaddrs *ifa; struct sockaddr_dl *sdl; if (getifaddrs(&head) != 0) { VLOG_ERR("getifaddrs on %s device failed: %s", netdev_name, ovs_strerror(errno)); return errno; } for (ifa = head; ifa; ifa = ifa->ifa_next) { if (ifa->ifa_addr->sa_family == AF_LINK) { if (!strcmp(ifa->ifa_name, netdev_name)) { sdl = (struct sockaddr_dl *)ifa->ifa_addr; if (sdl) { memcpy(ea, LLADDR(sdl), sdl->sdl_alen); freeifaddrs(head); return 0; } } } } VLOG_ERR("could not find ethernet address for %s device", netdev_name); freeifaddrs(head); return ENODEV; } static int set_etheraddr(const char *netdev_name OVS_UNUSED, int hwaddr_family OVS_UNUSED, int hwaddr_len OVS_UNUSED, const uint8_t mac[ETH_ADDR_LEN] OVS_UNUSED) { #if defined(__FreeBSD__) struct ifreq ifr; int error; memset(&ifr, 0, sizeof ifr); strncpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name); ifr.ifr_addr.sa_family = hwaddr_family; ifr.ifr_addr.sa_len = hwaddr_len; memcpy(ifr.ifr_addr.sa_data, mac, hwaddr_len); error = af_inet_ioctl(SIOCSIFLLADDR, &ifr); if (error) { VLOG_ERR("ioctl(SIOCSIFLLADDR) on %s device failed: %s", netdev_name, ovs_strerror(error)); return error; } return 0; #elif defined(__NetBSD__) struct if_laddrreq req; struct sockaddr_dl *sdl; struct sockaddr_storage oldaddr; int error; /* * get the old address, add new one, and then remove old one. */ if (hwaddr_len != ETH_ADDR_LEN) { /* just to be safe about sockaddr storage size */ return EOPNOTSUPP; } memset(&req, 0, sizeof(req)); strncpy(req.iflr_name, netdev_name, sizeof(req.iflr_name)); req.addr.ss_len = sizeof(req.addr); req.addr.ss_family = hwaddr_family; sdl = (struct sockaddr_dl *)&req.addr; sdl->sdl_alen = hwaddr_len; error = af_link_ioctl(SIOCGLIFADDR, &req); if (error) { return error; } if (!memcmp(&sdl->sdl_data[sdl->sdl_nlen], mac, hwaddr_len)) { return 0; } oldaddr = req.addr; memset(&req, 0, sizeof(req)); strncpy(req.iflr_name, netdev_name, sizeof(req.iflr_name)); req.flags = IFLR_ACTIVE; sdl = (struct sockaddr_dl *)&req.addr; sdl->sdl_len = offsetof(struct sockaddr_dl, sdl_data) + hwaddr_len; sdl->sdl_alen = hwaddr_len; sdl->sdl_family = hwaddr_family; memcpy(sdl->sdl_data, mac, hwaddr_len); error = af_link_ioctl(SIOCALIFADDR, &req); if (error) { return error; } memset(&req, 0, sizeof(req)); strncpy(req.iflr_name, netdev_name, sizeof(req.iflr_name)); req.addr = oldaddr; return af_link_ioctl(SIOCDLIFADDR, &req); #else #error not implemented #endif } static int ifr_get_flags(const struct ifreq *ifr) { #ifdef HAVE_STRUCT_IFREQ_IFR_FLAGSHIGH return (ifr->ifr_flagshigh << 16) | ifr->ifr_flags; #else return ifr->ifr_flags; #endif } static void ifr_set_flags(struct ifreq *ifr, int flags) { ifr->ifr_flags = flags; #ifdef HAVE_STRUCT_IFREQ_IFR_FLAGSHIGH ifr->ifr_flagshigh = flags >> 16; #endif } /* Calls ioctl() on an AF_LINK sock, passing the specified 'command' and * 'arg'. Returns 0 if successful, otherwise a positive errno value. */ int af_link_ioctl(unsigned long command, const void *arg) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; static int sock; if (ovsthread_once_start(&once)) { sock = socket(AF_LINK, SOCK_DGRAM, 0); if (sock < 0) { sock = -errno; VLOG_ERR("failed to create link socket: %s", ovs_strerror(errno)); } ovsthread_once_done(&once); } return (sock < 0 ? -sock : ioctl(sock, command, arg) == -1 ? errno : 0); } openvswitch-2.0.1+git20140120/lib/netdev-dummy.c000066400000000000000000000620541226605124000210110ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "dummy.h" #include #include "flow.h" #include "list.h" #include "netdev-provider.h" #include "netdev-vport.h" #include "odp-util.h" #include "ofp-print.h" #include "ofpbuf.h" #include "packets.h" #include "poll-loop.h" #include "shash.h" #include "sset.h" #include "stream.h" #include "unaligned.h" #include "unixctl.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(netdev_dummy); struct dummy_stream { struct stream *stream; struct ofpbuf rxbuf; struct list txq; }; /* Protects 'dummy_list'. */ static struct ovs_mutex dummy_list_mutex = OVS_MUTEX_INITIALIZER; /* Contains all 'struct dummy_dev's. */ static struct list dummy_list OVS_GUARDED_BY(dummy_list_mutex) = LIST_INITIALIZER(&dummy_list); struct netdev_dummy { struct netdev up; /* In dummy_list. */ struct list list_node OVS_GUARDED_BY(dummy_list_mutex); /* Protects all members below. */ struct ovs_mutex mutex OVS_ACQ_AFTER(dummy_list_mutex); uint8_t hwaddr[ETH_ADDR_LEN] OVS_GUARDED; int mtu OVS_GUARDED; struct netdev_stats stats OVS_GUARDED; enum netdev_flags flags OVS_GUARDED; unsigned int change_seq OVS_GUARDED; int ifindex OVS_GUARDED; struct pstream *pstream OVS_GUARDED; struct dummy_stream *streams OVS_GUARDED; size_t n_streams OVS_GUARDED; struct list rxes OVS_GUARDED; /* List of child "netdev_rx_dummy"s. */ }; /* Max 'recv_queue_len' in struct netdev_dummy. */ #define NETDEV_DUMMY_MAX_QUEUE 100 struct netdev_rx_dummy { struct netdev_rx up; struct list node; /* In netdev_dummy's "rxes" list. */ struct list recv_queue; int recv_queue_len; /* list_size(&recv_queue). */ bool listening; }; static unixctl_cb_func netdev_dummy_set_admin_state; static int netdev_dummy_construct(struct netdev *); static void netdev_dummy_poll_notify(struct netdev_dummy *netdev) OVS_REQUIRES(netdev->mutex); static void netdev_dummy_queue_packet(struct netdev_dummy *, struct ofpbuf *); static void dummy_stream_close(struct dummy_stream *); static bool is_dummy_class(const struct netdev_class *class) { return class->construct == netdev_dummy_construct; } static struct netdev_dummy * netdev_dummy_cast(const struct netdev *netdev) { ovs_assert(is_dummy_class(netdev_get_class(netdev))); return CONTAINER_OF(netdev, struct netdev_dummy, up); } static struct netdev_rx_dummy * netdev_rx_dummy_cast(const struct netdev_rx *rx) { ovs_assert(is_dummy_class(netdev_get_class(rx->netdev))); return CONTAINER_OF(rx, struct netdev_rx_dummy, up); } static void netdev_dummy_run(void) { struct netdev_dummy *dev; ovs_mutex_lock(&dummy_list_mutex); LIST_FOR_EACH (dev, list_node, &dummy_list) { size_t i; ovs_mutex_lock(&dev->mutex); if (dev->pstream) { struct stream *new_stream; int error; error = pstream_accept(dev->pstream, &new_stream); if (!error) { struct dummy_stream *s; dev->streams = xrealloc(dev->streams, ((dev->n_streams + 1) * sizeof *dev->streams)); s = &dev->streams[dev->n_streams++]; s->stream = new_stream; ofpbuf_init(&s->rxbuf, 2048); list_init(&s->txq); } else if (error != EAGAIN) { VLOG_WARN("%s: accept failed (%s)", pstream_get_name(dev->pstream), ovs_strerror(error)); pstream_close(dev->pstream); dev->pstream = NULL; } } for (i = 0; i < dev->n_streams; i++) { struct dummy_stream *s = &dev->streams[i]; int error = 0; size_t n; stream_run(s->stream); if (!list_is_empty(&s->txq)) { struct ofpbuf *txbuf; int retval; txbuf = ofpbuf_from_list(list_front(&s->txq)); retval = stream_send(s->stream, txbuf->data, txbuf->size); if (retval > 0) { ofpbuf_pull(txbuf, retval); if (!txbuf->size) { list_remove(&txbuf->list_node); ofpbuf_delete(txbuf); } } else if (retval != -EAGAIN) { error = -retval; } } if (!error) { if (s->rxbuf.size < 2) { n = 2 - s->rxbuf.size; } else { uint16_t frame_len; frame_len = ntohs(get_unaligned_be16(s->rxbuf.data)); if (frame_len < ETH_HEADER_LEN) { error = EPROTO; n = 0; } else { n = (2 + frame_len) - s->rxbuf.size; } } } if (!error) { int retval; ofpbuf_prealloc_tailroom(&s->rxbuf, n); retval = stream_recv(s->stream, ofpbuf_tail(&s->rxbuf), n); if (retval > 0) { s->rxbuf.size += retval; if (retval == n && s->rxbuf.size > 2) { ofpbuf_pull(&s->rxbuf, 2); netdev_dummy_queue_packet(dev, ofpbuf_clone(&s->rxbuf)); ofpbuf_clear(&s->rxbuf); } } else if (retval != -EAGAIN) { error = (retval < 0 ? -retval : s->rxbuf.size ? EPROTO : EOF); } } if (error) { VLOG_DBG("%s: closing connection (%s)", stream_get_name(s->stream), ovs_retval_to_string(error)); dummy_stream_close(&dev->streams[i]); dev->streams[i] = dev->streams[--dev->n_streams]; } } ovs_mutex_unlock(&dev->mutex); } ovs_mutex_unlock(&dummy_list_mutex); } static void dummy_stream_close(struct dummy_stream *s) { stream_close(s->stream); ofpbuf_uninit(&s->rxbuf); ofpbuf_list_delete(&s->txq); } static void netdev_dummy_wait(void) { struct netdev_dummy *dev; ovs_mutex_lock(&dummy_list_mutex); LIST_FOR_EACH (dev, list_node, &dummy_list) { size_t i; ovs_mutex_lock(&dev->mutex); if (dev->pstream) { pstream_wait(dev->pstream); } for (i = 0; i < dev->n_streams; i++) { struct dummy_stream *s = &dev->streams[i]; stream_run_wait(s->stream); if (!list_is_empty(&s->txq)) { stream_send_wait(s->stream); } stream_recv_wait(s->stream); } ovs_mutex_unlock(&dev->mutex); } ovs_mutex_unlock(&dummy_list_mutex); } static struct netdev * netdev_dummy_alloc(void) { struct netdev_dummy *netdev = xzalloc(sizeof *netdev); return &netdev->up; } static int netdev_dummy_construct(struct netdev *netdev_) { static atomic_uint next_n = ATOMIC_VAR_INIT(0xaa550000); struct netdev_dummy *netdev = netdev_dummy_cast(netdev_); unsigned int n; atomic_add(&next_n, 1, &n); ovs_mutex_init(&netdev->mutex); ovs_mutex_lock(&netdev->mutex); netdev->hwaddr[0] = 0xaa; netdev->hwaddr[1] = 0x55; netdev->hwaddr[2] = n >> 24; netdev->hwaddr[3] = n >> 16; netdev->hwaddr[4] = n >> 8; netdev->hwaddr[5] = n; netdev->mtu = 1500; netdev->flags = 0; netdev->change_seq = 1; netdev->ifindex = -EOPNOTSUPP; netdev->pstream = NULL; netdev->streams = NULL; netdev->n_streams = 0; list_init(&netdev->rxes); ovs_mutex_unlock(&netdev->mutex); ovs_mutex_lock(&dummy_list_mutex); list_push_back(&dummy_list, &netdev->list_node); ovs_mutex_unlock(&dummy_list_mutex); return 0; } static void netdev_dummy_destruct(struct netdev *netdev_) { struct netdev_dummy *netdev = netdev_dummy_cast(netdev_); size_t i; ovs_mutex_lock(&dummy_list_mutex); list_remove(&netdev->list_node); ovs_mutex_unlock(&dummy_list_mutex); ovs_mutex_lock(&netdev->mutex); pstream_close(netdev->pstream); for (i = 0; i < netdev->n_streams; i++) { dummy_stream_close(&netdev->streams[i]); } free(netdev->streams); ovs_mutex_unlock(&netdev->mutex); ovs_mutex_destroy(&netdev->mutex); } static void netdev_dummy_dealloc(struct netdev *netdev_) { struct netdev_dummy *netdev = netdev_dummy_cast(netdev_); free(netdev); } static int netdev_dummy_get_config(const struct netdev *netdev_, struct smap *args) { struct netdev_dummy *netdev = netdev_dummy_cast(netdev_); ovs_mutex_lock(&netdev->mutex); if (netdev->ifindex >= 0) { smap_add_format(args, "ifindex", "%d", netdev->ifindex); } if (netdev->pstream) { smap_add(args, "pstream", pstream_get_name(netdev->pstream)); } ovs_mutex_unlock(&netdev->mutex); return 0; } static int netdev_dummy_set_config(struct netdev *netdev_, const struct smap *args) { struct netdev_dummy *netdev = netdev_dummy_cast(netdev_); const char *pstream; ovs_mutex_lock(&netdev->mutex); netdev->ifindex = smap_get_int(args, "ifindex", -EOPNOTSUPP); pstream = smap_get(args, "pstream"); if (!pstream || !netdev->pstream || strcmp(pstream_get_name(netdev->pstream), pstream)) { pstream_close(netdev->pstream); netdev->pstream = NULL; if (pstream) { int error; error = pstream_open(pstream, &netdev->pstream, DSCP_DEFAULT); if (error) { VLOG_WARN("%s: open failed (%s)", pstream, ovs_strerror(error)); } } } ovs_mutex_unlock(&netdev->mutex); return 0; } static struct netdev_rx * netdev_dummy_rx_alloc(void) { struct netdev_rx_dummy *rx = xzalloc(sizeof *rx); return &rx->up; } static int netdev_dummy_rx_construct(struct netdev_rx *rx_) { struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_); struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev); ovs_mutex_lock(&netdev->mutex); list_push_back(&netdev->rxes, &rx->node); list_init(&rx->recv_queue); rx->recv_queue_len = 0; ovs_mutex_unlock(&netdev->mutex); return 0; } static void netdev_dummy_rx_destruct(struct netdev_rx *rx_) { struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_); struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev); ovs_mutex_lock(&netdev->mutex); list_remove(&rx->node); ofpbuf_list_delete(&rx->recv_queue); ovs_mutex_unlock(&netdev->mutex); } static void netdev_dummy_rx_dealloc(struct netdev_rx *rx_) { struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_); free(rx); } static int netdev_dummy_rx_recv(struct netdev_rx *rx_, void *buffer, size_t size) { struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_); struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev); struct ofpbuf *packet; int retval; ovs_mutex_lock(&netdev->mutex); if (!list_is_empty(&rx->recv_queue)) { packet = ofpbuf_from_list(list_pop_front(&rx->recv_queue)); rx->recv_queue_len--; } else { packet = NULL; } ovs_mutex_unlock(&netdev->mutex); if (!packet) { return -EAGAIN; } if (packet->size <= size) { memcpy(buffer, packet->data, packet->size); retval = packet->size; } else { retval = -EMSGSIZE; } ofpbuf_delete(packet); return retval; } static void netdev_dummy_rx_wait(struct netdev_rx *rx_) { struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_); struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev); ovs_mutex_lock(&netdev->mutex); if (!list_is_empty(&rx->recv_queue)) { poll_immediate_wake(); } ovs_mutex_unlock(&netdev->mutex); } static int netdev_dummy_rx_drain(struct netdev_rx *rx_) { struct netdev_rx_dummy *rx = netdev_rx_dummy_cast(rx_); struct netdev_dummy *netdev = netdev_dummy_cast(rx->up.netdev); ovs_mutex_lock(&netdev->mutex); ofpbuf_list_delete(&rx->recv_queue); rx->recv_queue_len = 0; ovs_mutex_unlock(&netdev->mutex); return 0; } static int netdev_dummy_send(struct netdev *netdev, const void *buffer, size_t size) { struct netdev_dummy *dev = netdev_dummy_cast(netdev); size_t i; if (size < ETH_HEADER_LEN) { return EMSGSIZE; } else { const struct eth_header *eth = buffer; int max_size; ovs_mutex_lock(&dev->mutex); max_size = dev->mtu + ETH_HEADER_LEN; ovs_mutex_unlock(&dev->mutex); if (eth->eth_type == htons(ETH_TYPE_VLAN)) { max_size += VLAN_HEADER_LEN; } if (size > max_size) { return EMSGSIZE; } } ovs_mutex_lock(&dev->mutex); dev->stats.tx_packets++; dev->stats.tx_bytes += size; for (i = 0; i < dev->n_streams; i++) { struct dummy_stream *s = &dev->streams[i]; if (list_size(&s->txq) < NETDEV_DUMMY_MAX_QUEUE) { struct ofpbuf *b; b = ofpbuf_clone_data_with_headroom(buffer, size, 2); put_unaligned_be16(ofpbuf_push_uninit(b, 2), htons(size)); list_push_back(&s->txq, &b->list_node); } } ovs_mutex_unlock(&dev->mutex); return 0; } static int netdev_dummy_set_etheraddr(struct netdev *netdev, const uint8_t mac[ETH_ADDR_LEN]) { struct netdev_dummy *dev = netdev_dummy_cast(netdev); ovs_mutex_lock(&dev->mutex); if (!eth_addr_equals(dev->hwaddr, mac)) { memcpy(dev->hwaddr, mac, ETH_ADDR_LEN); netdev_dummy_poll_notify(dev); } ovs_mutex_unlock(&dev->mutex); return 0; } static int netdev_dummy_get_etheraddr(const struct netdev *netdev, uint8_t mac[ETH_ADDR_LEN]) { struct netdev_dummy *dev = netdev_dummy_cast(netdev); ovs_mutex_lock(&dev->mutex); memcpy(mac, dev->hwaddr, ETH_ADDR_LEN); ovs_mutex_unlock(&dev->mutex); return 0; } static int netdev_dummy_get_mtu(const struct netdev *netdev, int *mtup) { struct netdev_dummy *dev = netdev_dummy_cast(netdev); ovs_mutex_lock(&dev->mutex); *mtup = dev->mtu; ovs_mutex_unlock(&dev->mutex); return 0; } static int netdev_dummy_set_mtu(const struct netdev *netdev, int mtu) { struct netdev_dummy *dev = netdev_dummy_cast(netdev); ovs_mutex_lock(&dev->mutex); dev->mtu = mtu; ovs_mutex_unlock(&dev->mutex); return 0; } static int netdev_dummy_get_stats(const struct netdev *netdev, struct netdev_stats *stats) { struct netdev_dummy *dev = netdev_dummy_cast(netdev); ovs_mutex_lock(&dev->mutex); *stats = dev->stats; ovs_mutex_unlock(&dev->mutex); return 0; } static int netdev_dummy_set_stats(struct netdev *netdev, const struct netdev_stats *stats) { struct netdev_dummy *dev = netdev_dummy_cast(netdev); ovs_mutex_lock(&dev->mutex); dev->stats = *stats; ovs_mutex_unlock(&dev->mutex); return 0; } static int netdev_dummy_get_ifindex(const struct netdev *netdev) { struct netdev_dummy *dev = netdev_dummy_cast(netdev); int ifindex; ovs_mutex_lock(&dev->mutex); ifindex = dev->ifindex; ovs_mutex_unlock(&dev->mutex); return ifindex; } static int netdev_dummy_update_flags__(struct netdev_dummy *netdev, enum netdev_flags off, enum netdev_flags on, enum netdev_flags *old_flagsp) OVS_REQUIRES(netdev->mutex) { if ((off | on) & ~(NETDEV_UP | NETDEV_PROMISC)) { return EINVAL; } *old_flagsp = netdev->flags; netdev->flags |= on; netdev->flags &= ~off; if (*old_flagsp != netdev->flags) { netdev_dummy_poll_notify(netdev); } return 0; } static int netdev_dummy_update_flags(struct netdev *netdev_, enum netdev_flags off, enum netdev_flags on, enum netdev_flags *old_flagsp) { struct netdev_dummy *netdev = netdev_dummy_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); error = netdev_dummy_update_flags__(netdev, off, on, old_flagsp); ovs_mutex_unlock(&netdev->mutex); return error; } static unsigned int netdev_dummy_change_seq(const struct netdev *netdev_) { struct netdev_dummy *netdev = netdev_dummy_cast(netdev_); unsigned int change_seq; ovs_mutex_lock(&netdev->mutex); change_seq = netdev->change_seq; ovs_mutex_unlock(&netdev->mutex); return change_seq; } /* Helper functions. */ static void netdev_dummy_poll_notify(struct netdev_dummy *dev) { dev->change_seq++; if (!dev->change_seq) { dev->change_seq++; } } static const struct netdev_class dummy_class = { "dummy", NULL, /* init */ netdev_dummy_run, netdev_dummy_wait, netdev_dummy_alloc, netdev_dummy_construct, netdev_dummy_destruct, netdev_dummy_dealloc, netdev_dummy_get_config, netdev_dummy_set_config, NULL, /* get_tunnel_config */ netdev_dummy_send, /* send */ NULL, /* send_wait */ netdev_dummy_set_etheraddr, netdev_dummy_get_etheraddr, netdev_dummy_get_mtu, netdev_dummy_set_mtu, netdev_dummy_get_ifindex, NULL, /* get_carrier */ NULL, /* get_carrier_resets */ NULL, /* get_miimon */ netdev_dummy_get_stats, netdev_dummy_set_stats, NULL, /* get_features */ NULL, /* set_advertisements */ NULL, /* set_policing */ NULL, /* get_qos_types */ NULL, /* get_qos_capabilities */ NULL, /* get_qos */ NULL, /* set_qos */ NULL, /* get_queue */ NULL, /* set_queue */ NULL, /* delete_queue */ NULL, /* get_queue_stats */ NULL, /* queue_dump_start */ NULL, /* queue_dump_next */ NULL, /* queue_dump_done */ NULL, /* dump_queue_stats */ NULL, /* get_in4 */ NULL, /* set_in4 */ NULL, /* get_in6 */ NULL, /* add_router */ NULL, /* get_next_hop */ NULL, /* get_status */ NULL, /* arp_lookup */ netdev_dummy_update_flags, netdev_dummy_change_seq, netdev_dummy_rx_alloc, netdev_dummy_rx_construct, netdev_dummy_rx_destruct, netdev_dummy_rx_dealloc, netdev_dummy_rx_recv, netdev_dummy_rx_wait, netdev_dummy_rx_drain, }; static struct ofpbuf * eth_from_packet_or_flow(const char *s) { enum odp_key_fitness fitness; struct ofpbuf *packet; struct ofpbuf odp_key; struct flow flow; int error; if (!eth_from_hex(s, &packet)) { return packet; } /* Convert string to datapath key. * * It would actually be nicer to parse an OpenFlow-like flow key here, but * the code for that currently calls exit() on parse error. We have to * settle for parsing a datapath key for now. */ ofpbuf_init(&odp_key, 0); error = odp_flow_from_string(s, NULL, &odp_key, NULL); if (error) { ofpbuf_uninit(&odp_key); return NULL; } /* Convert odp_key to flow. */ fitness = odp_flow_key_to_flow(odp_key.data, odp_key.size, &flow); if (fitness == ODP_FIT_ERROR) { ofpbuf_uninit(&odp_key); return NULL; } packet = ofpbuf_new(0); flow_compose(packet, &flow); ofpbuf_uninit(&odp_key); return packet; } static void netdev_dummy_queue_packet__(struct netdev_rx_dummy *rx, struct ofpbuf *packet) { list_push_back(&rx->recv_queue, &packet->list_node); rx->recv_queue_len++; } static void netdev_dummy_queue_packet(struct netdev_dummy *dummy, struct ofpbuf *packet) { struct netdev_rx_dummy *rx, *prev; prev = NULL; LIST_FOR_EACH (rx, node, &dummy->rxes) { if (rx->recv_queue_len < NETDEV_DUMMY_MAX_QUEUE) { if (prev) { netdev_dummy_queue_packet__(prev, ofpbuf_clone(packet)); } prev = rx; } } if (prev) { netdev_dummy_queue_packet__(prev, packet); } else { ofpbuf_delete(packet); } } static void netdev_dummy_receive(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) { struct netdev_dummy *dummy_dev; struct netdev *netdev; int i; netdev = netdev_from_name(argv[1]); if (!netdev || !is_dummy_class(netdev->netdev_class)) { unixctl_command_reply_error(conn, "no such dummy netdev"); goto exit; } dummy_dev = netdev_dummy_cast(netdev); for (i = 2; i < argc; i++) { struct ofpbuf *packet; packet = eth_from_packet_or_flow(argv[i]); if (!packet) { unixctl_command_reply_error(conn, "bad packet syntax"); goto exit; } ovs_mutex_lock(&dummy_dev->mutex); dummy_dev->stats.rx_packets++; dummy_dev->stats.rx_bytes += packet->size; netdev_dummy_queue_packet(dummy_dev, packet); ovs_mutex_unlock(&dummy_dev->mutex); } unixctl_command_reply(conn, NULL); exit: netdev_close(netdev); } static void netdev_dummy_set_admin_state__(struct netdev_dummy *dev, bool admin_state) OVS_REQUIRES(dev->mutex) { enum netdev_flags old_flags; if (admin_state) { netdev_dummy_update_flags__(dev, 0, NETDEV_UP, &old_flags); } else { netdev_dummy_update_flags__(dev, NETDEV_UP, 0, &old_flags); } } static void netdev_dummy_set_admin_state(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) { bool up; if (!strcasecmp(argv[argc - 1], "up")) { up = true; } else if ( !strcasecmp(argv[argc - 1], "down")) { up = false; } else { unixctl_command_reply_error(conn, "Invalid Admin State"); return; } if (argc > 2) { struct netdev *netdev = netdev_from_name(argv[1]); if (netdev && is_dummy_class(netdev->netdev_class)) { struct netdev_dummy *dummy_dev = netdev_dummy_cast(netdev); ovs_mutex_lock(&dummy_dev->mutex); netdev_dummy_set_admin_state__(dummy_dev, up); ovs_mutex_unlock(&dummy_dev->mutex); netdev_close(netdev); } else { unixctl_command_reply_error(conn, "Unknown Dummy Interface"); netdev_close(netdev); return; } } else { struct netdev_dummy *netdev; ovs_mutex_lock(&dummy_list_mutex); LIST_FOR_EACH (netdev, list_node, &dummy_list) { ovs_mutex_lock(&netdev->mutex); netdev_dummy_set_admin_state__(netdev, up); ovs_mutex_unlock(&netdev->mutex); } ovs_mutex_unlock(&dummy_list_mutex); } unixctl_command_reply(conn, "OK"); } void netdev_dummy_register(bool override) { unixctl_command_register("netdev-dummy/receive", "NAME PACKET|FLOW...", 2, INT_MAX, netdev_dummy_receive, NULL); unixctl_command_register("netdev-dummy/set-admin-state", "[netdev] up|down", 1, 2, netdev_dummy_set_admin_state, NULL); if (override) { struct sset types; const char *type; sset_init(&types); netdev_enumerate_types(&types); SSET_FOR_EACH (type, &types) { if (!strcmp(type, "patch")) { continue; } if (!netdev_unregister_provider(type)) { struct netdev_class *class; int error; class = xmemdup(&dummy_class, sizeof dummy_class); class->type = xstrdup(type); error = netdev_register_provider(class); if (error) { VLOG_ERR("%s: failed to register netdev provider (%s)", type, ovs_strerror(error)); free(CONST_CAST(char *, class->type)); free(class); } } } sset_destroy(&types); } netdev_register_provider(&dummy_class); netdev_vport_tunnel_register(); } openvswitch-2.0.1+git20140120/lib/netdev-linux.c000066400000000000000000004421741226605124000210220ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "netdev-linux.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "coverage.h" #include "dpif-linux.h" #include "dynamic-string.h" #include "fatal-signal.h" #include "hash.h" #include "hmap.h" #include "netdev-provider.h" #include "netdev-vport.h" #include "netlink-notifier.h" #include "netlink-socket.h" #include "netlink.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "packets.h" #include "poll-loop.h" #include "rtnetlink-link.h" #include "shash.h" #include "socket-util.h" #include "sset.h" #include "timer.h" #include "unaligned.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(netdev_linux); COVERAGE_DEFINE(netdev_set_policing); COVERAGE_DEFINE(netdev_arp_lookup); COVERAGE_DEFINE(netdev_get_ifindex); COVERAGE_DEFINE(netdev_get_hwaddr); COVERAGE_DEFINE(netdev_set_hwaddr); COVERAGE_DEFINE(netdev_get_ethtool); COVERAGE_DEFINE(netdev_set_ethtool); /* These were introduced in Linux 2.6.14, so they might be missing if we have * old headers. */ #ifndef ADVERTISED_Pause #define ADVERTISED_Pause (1 << 13) #endif #ifndef ADVERTISED_Asym_Pause #define ADVERTISED_Asym_Pause (1 << 14) #endif /* These were introduced in Linux 2.6.24, so they might be missing if we * have old headers. */ #ifndef ETHTOOL_GFLAGS #define ETHTOOL_GFLAGS 0x00000025 /* Get flags bitmap(ethtool_value) */ #endif #ifndef ETHTOOL_SFLAGS #define ETHTOOL_SFLAGS 0x00000026 /* Set flags bitmap(ethtool_value) */ #endif /* This was introduced in Linux 2.6.25, so it might be missing if we have old * headers. */ #ifndef TC_RTAB_SIZE #define TC_RTAB_SIZE 1024 #endif enum { VALID_IFINDEX = 1 << 0, VALID_ETHERADDR = 1 << 1, VALID_IN4 = 1 << 2, VALID_IN6 = 1 << 3, VALID_MTU = 1 << 4, VALID_POLICING = 1 << 5, VALID_VPORT_STAT_ERROR = 1 << 6, VALID_DRVINFO = 1 << 7, VALID_FEATURES = 1 << 8, }; /* Traffic control. */ /* An instance of a traffic control class. Always associated with a particular * network device. * * Each TC implementation subclasses this with whatever additional data it * needs. */ struct tc { const struct tc_ops *ops; struct hmap queues; /* Contains "struct tc_queue"s. * Read by generic TC layer. * Written only by TC implementation. */ }; #define TC_INITIALIZER(TC, OPS) { OPS, HMAP_INITIALIZER(&(TC)->queues) } /* One traffic control queue. * * Each TC implementation subclasses this with whatever additional data it * needs. */ struct tc_queue { struct hmap_node hmap_node; /* In struct tc's "queues" hmap. */ unsigned int queue_id; /* OpenFlow queue ID. */ long long int created; /* Time queue was created, in msecs. */ }; /* A particular kind of traffic control. Each implementation generally maps to * one particular Linux qdisc class. * * The functions below return 0 if successful or a positive errno value on * failure, except where otherwise noted. All of them must be provided, except * where otherwise noted. */ struct tc_ops { /* Name used by kernel in the TCA_KIND attribute of tcmsg, e.g. "htb". * This is null for tc_ops_default and tc_ops_other, for which there are no * appropriate values. */ const char *linux_name; /* Name used in OVS database, e.g. "linux-htb". Must be nonnull. */ const char *ovs_name; /* Number of supported OpenFlow queues, 0 for qdiscs that have no * queues. The queues are numbered 0 through n_queues - 1. */ unsigned int n_queues; /* Called to install this TC class on 'netdev'. The implementation should * make the Netlink calls required to set up 'netdev' with the right qdisc * and configure it according to 'details'. The implementation may assume * that the current qdisc is the default; that is, there is no need for it * to delete the current qdisc before installing itself. * * The contents of 'details' should be documented as valid for 'ovs_name' * in the "other_config" column in the "QoS" table in vswitchd/vswitch.xml * (which is built as ovs-vswitchd.conf.db(8)). * * This function must return 0 if and only if it sets 'netdev->tc' to an * initialized 'struct tc'. * * (This function is null for tc_ops_other, which cannot be installed. For * other TC classes it should always be nonnull.) */ int (*tc_install)(struct netdev *netdev, const struct smap *details); /* Called when the netdev code determines (through a Netlink query) that * this TC class's qdisc is installed on 'netdev', but we didn't install * it ourselves and so don't know any of the details. * * 'nlmsg' is the kernel reply to a RTM_GETQDISC Netlink message for * 'netdev'. The TCA_KIND attribute of 'nlmsg' is 'linux_name'. The * implementation should parse the other attributes of 'nlmsg' as * necessary to determine its configuration. If necessary it should also * use Netlink queries to determine the configuration of queues on * 'netdev'. * * This function must return 0 if and only if it sets 'netdev->tc' to an * initialized 'struct tc'. */ int (*tc_load)(struct netdev *netdev, struct ofpbuf *nlmsg); /* Destroys the data structures allocated by the implementation as part of * 'tc'. (This includes destroying 'tc->queues' by calling * tc_destroy(tc). * * The implementation should not need to perform any Netlink calls. If * desirable, the caller is responsible for deconfiguring the kernel qdisc. * (But it may not be desirable.) * * This function may be null if 'tc' is trivial. */ void (*tc_destroy)(struct tc *tc); /* Retrieves details of 'netdev->tc' configuration into 'details'. * * The implementation should not need to perform any Netlink calls, because * the 'tc_install' or 'tc_load' that instantiated 'netdev->tc' should have * cached the configuration. * * The contents of 'details' should be documented as valid for 'ovs_name' * in the "other_config" column in the "QoS" table in vswitchd/vswitch.xml * (which is built as ovs-vswitchd.conf.db(8)). * * This function may be null if 'tc' is not configurable. */ int (*qdisc_get)(const struct netdev *netdev, struct smap *details); /* Reconfigures 'netdev->tc' according to 'details', performing any * required Netlink calls to complete the reconfiguration. * * The contents of 'details' should be documented as valid for 'ovs_name' * in the "other_config" column in the "QoS" table in vswitchd/vswitch.xml * (which is built as ovs-vswitchd.conf.db(8)). * * This function may be null if 'tc' is not configurable. */ int (*qdisc_set)(struct netdev *, const struct smap *details); /* Retrieves details of 'queue' on 'netdev->tc' into 'details'. 'queue' is * one of the 'struct tc_queue's within 'netdev->tc->queues'. * * The contents of 'details' should be documented as valid for 'ovs_name' * in the "other_config" column in the "Queue" table in * vswitchd/vswitch.xml (which is built as ovs-vswitchd.conf.db(8)). * * The implementation should not need to perform any Netlink calls, because * the 'tc_install' or 'tc_load' that instantiated 'netdev->tc' should have * cached the queue configuration. * * This function may be null if 'tc' does not have queues ('n_queues' is * 0). */ int (*class_get)(const struct netdev *netdev, const struct tc_queue *queue, struct smap *details); /* Configures or reconfigures 'queue_id' on 'netdev->tc' according to * 'details', perfoming any required Netlink calls to complete the * reconfiguration. The caller ensures that 'queue_id' is less than * 'n_queues'. * * The contents of 'details' should be documented as valid for 'ovs_name' * in the "other_config" column in the "Queue" table in * vswitchd/vswitch.xml (which is built as ovs-vswitchd.conf.db(8)). * * This function may be null if 'tc' does not have queues or its queues are * not configurable. */ int (*class_set)(struct netdev *, unsigned int queue_id, const struct smap *details); /* Deletes 'queue' from 'netdev->tc'. 'queue' is one of the 'struct * tc_queue's within 'netdev->tc->queues'. * * This function may be null if 'tc' does not have queues or its queues * cannot be deleted. */ int (*class_delete)(struct netdev *, struct tc_queue *queue); /* Obtains stats for 'queue' from 'netdev->tc'. 'queue' is one of the * 'struct tc_queue's within 'netdev->tc->queues'. * * On success, initializes '*stats'. * * This function may be null if 'tc' does not have queues or if it cannot * report queue statistics. */ int (*class_get_stats)(const struct netdev *netdev, const struct tc_queue *queue, struct netdev_queue_stats *stats); /* Extracts queue stats from 'nlmsg', which is a response to a * RTM_GETTCLASS message, and passes them to 'cb' along with 'aux'. * * This function may be null if 'tc' does not have queues or if it cannot * report queue statistics. */ int (*class_dump_stats)(const struct netdev *netdev, const struct ofpbuf *nlmsg, netdev_dump_queue_stats_cb *cb, void *aux); }; static void tc_init(struct tc *tc, const struct tc_ops *ops) { tc->ops = ops; hmap_init(&tc->queues); } static void tc_destroy(struct tc *tc) { hmap_destroy(&tc->queues); } static const struct tc_ops tc_ops_htb; static const struct tc_ops tc_ops_hfsc; static const struct tc_ops tc_ops_default; static const struct tc_ops tc_ops_other; static const struct tc_ops *const tcs[] = { &tc_ops_htb, /* Hierarchy token bucket (see tc-htb(8)). */ &tc_ops_hfsc, /* Hierarchical fair service curve. */ &tc_ops_default, /* Default qdisc (see tc-pfifo_fast(8)). */ &tc_ops_other, /* Some other qdisc. */ NULL }; static unsigned int tc_make_handle(unsigned int major, unsigned int minor); static unsigned int tc_get_major(unsigned int handle); static unsigned int tc_get_minor(unsigned int handle); static unsigned int tc_ticks_to_bytes(unsigned int rate, unsigned int ticks); static unsigned int tc_bytes_to_ticks(unsigned int rate, unsigned int size); static unsigned int tc_buffer_per_jiffy(unsigned int rate); static struct tcmsg *tc_make_request(const struct netdev *, int type, unsigned int flags, struct ofpbuf *); static int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp); static int tc_add_del_ingress_qdisc(struct netdev *netdev, bool add); static int tc_add_policer(struct netdev *netdev, int kbits_rate, int kbits_burst); static int tc_parse_qdisc(const struct ofpbuf *, const char **kind, struct nlattr **options); static int tc_parse_class(const struct ofpbuf *, unsigned int *queue_id, struct nlattr **options, struct netdev_queue_stats *); static int tc_query_class(const struct netdev *, unsigned int handle, unsigned int parent, struct ofpbuf **replyp); static int tc_delete_class(const struct netdev *, unsigned int handle); static int tc_del_qdisc(struct netdev *netdev); static int tc_query_qdisc(const struct netdev *netdev); static int tc_calc_cell_log(unsigned int mtu); static void tc_fill_rate(struct tc_ratespec *rate, uint64_t bps, int mtu); static void tc_put_rtab(struct ofpbuf *, uint16_t type, const struct tc_ratespec *rate); static int tc_calc_buffer(unsigned int Bps, int mtu, uint64_t burst_bytes); struct netdev_linux { struct netdev up; /* Protects all members below. */ struct ovs_mutex mutex; unsigned int cache_valid; unsigned int change_seq; bool miimon; /* Link status of last poll. */ long long int miimon_interval; /* Miimon Poll rate. Disabled if <= 0. */ struct timer miimon_timer; /* The following are figured out "on demand" only. They are only valid * when the corresponding VALID_* bit in 'cache_valid' is set. */ int ifindex; uint8_t etheraddr[ETH_ADDR_LEN]; struct in_addr address, netmask; struct in6_addr in6; int mtu; unsigned int ifi_flags; long long int carrier_resets; uint32_t kbits_rate; /* Policing data. */ uint32_t kbits_burst; int vport_stats_error; /* Cached error code from vport_get_stats(). 0 or an errno value. */ int netdev_mtu_error; /* Cached error code from SIOCGIFMTU or SIOCSIFMTU. */ int ether_addr_error; /* Cached error code from set/get etheraddr. */ int netdev_policing_error; /* Cached error code from set policing. */ int get_features_error; /* Cached error code from ETHTOOL_GSET. */ int get_ifindex_error; /* Cached error code from SIOCGIFINDEX. */ enum netdev_features current; /* Cached from ETHTOOL_GSET. */ enum netdev_features advertised; /* Cached from ETHTOOL_GSET. */ enum netdev_features supported; /* Cached from ETHTOOL_GSET. */ struct ethtool_drvinfo drvinfo; /* Cached from ETHTOOL_GDRVINFO. */ struct tc *tc; /* For devices of class netdev_tap_class only. */ int tap_fd; }; struct netdev_rx_linux { struct netdev_rx up; bool is_tap; int fd; }; /* This is set pretty low because we probably won't learn anything from the * additional log messages. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); static void netdev_linux_run(void); static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *, int cmd, const char *cmd_name); static int netdev_linux_get_ipv4(const struct netdev *, struct in_addr *, int cmd, const char *cmd_name); static int get_flags(const struct netdev *, unsigned int *flags); static int set_flags(const char *, unsigned int flags); static int update_flags(struct netdev_linux *netdev, enum netdev_flags off, enum netdev_flags on, enum netdev_flags *old_flagsp) OVS_REQUIRES(netdev->mutex); static int do_get_ifindex(const char *netdev_name); static int get_ifindex(const struct netdev *, int *ifindexp); static int do_set_addr(struct netdev *netdev, int ioctl_nr, const char *ioctl_name, struct in_addr addr); static int get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN]); static int set_etheraddr(const char *netdev_name, const uint8_t[ETH_ADDR_LEN]); static int get_stats_via_netlink(int ifindex, struct netdev_stats *stats); static int get_stats_via_proc(const char *netdev_name, struct netdev_stats *stats); static int af_packet_sock(void); static void netdev_linux_miimon_run(void); static void netdev_linux_miimon_wait(void); static bool is_netdev_linux_class(const struct netdev_class *netdev_class) { return netdev_class->run == netdev_linux_run; } static bool is_tap_netdev(const struct netdev *netdev) { return netdev_get_class(netdev) == &netdev_tap_class; } static struct netdev_linux * netdev_linux_cast(const struct netdev *netdev) { ovs_assert(is_netdev_linux_class(netdev_get_class(netdev))); return CONTAINER_OF(netdev, struct netdev_linux, up); } static struct netdev_rx_linux * netdev_rx_linux_cast(const struct netdev_rx *rx) { ovs_assert(is_netdev_linux_class(netdev_get_class(rx->netdev))); return CONTAINER_OF(rx, struct netdev_rx_linux, up); } static void netdev_linux_update(struct netdev_linux *netdev, const struct rtnetlink_link_change *) OVS_REQUIRES(netdev->mutex); static void netdev_linux_changed(struct netdev_linux *netdev, unsigned int ifi_flags, unsigned int mask) OVS_REQUIRES(netdev->mutex); /* Returns a NETLINK_ROUTE socket listening for RTNLGRP_LINK changes, or NULL * if no such socket could be created. */ static struct nl_sock * netdev_linux_notify_sock(void) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; static struct nl_sock *sock; if (ovsthread_once_start(&once)) { int error; error = nl_sock_create(NETLINK_ROUTE, &sock); if (!error) { error = nl_sock_join_mcgroup(sock, RTNLGRP_LINK); if (error) { nl_sock_destroy(sock); sock = NULL; } } ovsthread_once_done(&once); } return sock; } static void netdev_linux_run(void) { struct nl_sock *sock; int error; netdev_linux_miimon_run(); sock = netdev_linux_notify_sock(); if (!sock) { return; } do { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); uint64_t buf_stub[4096 / 8]; struct ofpbuf buf; ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); error = nl_sock_recv(sock, &buf, false); if (!error) { struct rtnetlink_link_change change; if (rtnetlink_link_parse(&buf, &change)) { struct netdev *netdev_ = netdev_from_name(change.ifname); if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); ovs_mutex_lock(&netdev->mutex); netdev_linux_update(netdev, &change); ovs_mutex_unlock(&netdev->mutex); } netdev_close(netdev_); } } else if (error == ENOBUFS) { struct shash device_shash; struct shash_node *node; nl_sock_drain(sock); shash_init(&device_shash); netdev_get_devices(&netdev_linux_class, &device_shash); SHASH_FOR_EACH (node, &device_shash) { struct netdev *netdev_ = node->data; struct netdev_linux *netdev = netdev_linux_cast(netdev_); unsigned int flags; ovs_mutex_lock(&netdev->mutex); get_flags(netdev_, &flags); netdev_linux_changed(netdev, flags, 0); ovs_mutex_unlock(&netdev->mutex); netdev_close(netdev_); } shash_destroy(&device_shash); } else if (error != EAGAIN) { VLOG_WARN_RL(&rl, "error reading or parsing netlink (%s)", ovs_strerror(error)); } ofpbuf_uninit(&buf); } while (!error); } static void netdev_linux_wait(void) { struct nl_sock *sock; netdev_linux_miimon_wait(); sock = netdev_linux_notify_sock(); if (sock) { nl_sock_wait(sock, POLLIN); } } static void netdev_linux_changed(struct netdev_linux *dev, unsigned int ifi_flags, unsigned int mask) OVS_REQUIRES(dev->mutex) { dev->change_seq++; if (!dev->change_seq) { dev->change_seq++; } if ((dev->ifi_flags ^ ifi_flags) & IFF_RUNNING) { dev->carrier_resets++; } dev->ifi_flags = ifi_flags; dev->cache_valid &= mask; } static void netdev_linux_update(struct netdev_linux *dev, const struct rtnetlink_link_change *change) OVS_REQUIRES(dev->mutex) { if (change->nlmsg_type == RTM_NEWLINK) { /* Keep drv-info */ netdev_linux_changed(dev, change->ifi_flags, VALID_DRVINFO); /* Update netdev from rtnl-change msg. */ if (change->mtu) { dev->mtu = change->mtu; dev->cache_valid |= VALID_MTU; dev->netdev_mtu_error = 0; } if (!eth_addr_is_zero(change->addr)) { memcpy(dev->etheraddr, change->addr, ETH_ADDR_LEN); dev->cache_valid |= VALID_ETHERADDR; dev->ether_addr_error = 0; } dev->ifindex = change->ifi_index; dev->cache_valid |= VALID_IFINDEX; dev->get_ifindex_error = 0; } else { netdev_linux_changed(dev, change->ifi_flags, 0); } } static struct netdev * netdev_linux_alloc(void) { struct netdev_linux *netdev = xzalloc(sizeof *netdev); return &netdev->up; } static void netdev_linux_common_construct(struct netdev_linux *netdev) { ovs_mutex_init(&netdev->mutex); netdev->change_seq = 1; } /* Creates system and internal devices. */ static int netdev_linux_construct(struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; netdev_linux_common_construct(netdev); error = get_flags(&netdev->up, &netdev->ifi_flags); if (error == ENODEV) { if (netdev->up.netdev_class != &netdev_internal_class) { /* The device does not exist, so don't allow it to be opened. */ return ENODEV; } else { /* "Internal" netdevs have to be created as netdev objects before * they exist in the kernel, because creating them in the kernel * happens by passing a netdev object to dpif_port_add(). * Therefore, ignore the error. */ } } return 0; } /* For most types of netdevs we open the device for each call of * netdev_open(). However, this is not the case with tap devices, * since it is only possible to open the device once. In this * situation we share a single file descriptor, and consequently * buffers, across all readers. Therefore once data is read it will * be unavailable to other reads for tap devices. */ static int netdev_linux_construct_tap(struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); static const char tap_dev[] = "/dev/net/tun"; const char *name = netdev_->name; struct ifreq ifr; int error; netdev_linux_common_construct(netdev); /* Open tap device. */ netdev->tap_fd = open(tap_dev, O_RDWR); if (netdev->tap_fd < 0) { error = errno; VLOG_WARN("opening \"%s\" failed: %s", tap_dev, ovs_strerror(error)); return error; } /* Create tap device. */ ifr.ifr_flags = IFF_TAP | IFF_NO_PI; ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name); if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) { VLOG_WARN("%s: creating tap device failed: %s", name, ovs_strerror(errno)); error = errno; goto error_close; } /* Make non-blocking. */ error = set_nonblocking(netdev->tap_fd); if (error) { goto error_close; } return 0; error_close: close(netdev->tap_fd); return error; } static void netdev_linux_destruct(struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); if (netdev->tc && netdev->tc->ops->tc_destroy) { netdev->tc->ops->tc_destroy(netdev->tc); } if (netdev_get_class(netdev_) == &netdev_tap_class && netdev->tap_fd >= 0) { close(netdev->tap_fd); } ovs_mutex_destroy(&netdev->mutex); } static void netdev_linux_dealloc(struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); free(netdev); } static struct netdev_rx * netdev_linux_rx_alloc(void) { struct netdev_rx_linux *rx = xzalloc(sizeof *rx); return &rx->up; } static int netdev_linux_rx_construct(struct netdev_rx *rx_) { struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_); struct netdev *netdev_ = rx->up.netdev; struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); rx->is_tap = is_tap_netdev(netdev_); if (rx->is_tap) { rx->fd = netdev->tap_fd; } else { struct sockaddr_ll sll; int ifindex; /* Result of tcpdump -dd inbound */ static const struct sock_filter filt[] = { { 0x28, 0, 0, 0xfffff004 }, /* ldh [0] */ { 0x15, 0, 1, 0x00000004 }, /* jeq #4 jt 2 jf 3 */ { 0x6, 0, 0, 0x00000000 }, /* ret #0 */ { 0x6, 0, 0, 0x0000ffff } /* ret #65535 */ }; static const struct sock_fprog fprog = { ARRAY_SIZE(filt), (struct sock_filter *) filt }; /* Create file descriptor. */ rx->fd = socket(PF_PACKET, SOCK_RAW, 0); if (rx->fd < 0) { error = errno; VLOG_ERR("failed to create raw socket (%s)", ovs_strerror(error)); goto error; } /* Set non-blocking mode. */ error = set_nonblocking(rx->fd); if (error) { goto error; } /* Get ethernet device index. */ error = get_ifindex(&netdev->up, &ifindex); if (error) { goto error; } /* Bind to specific ethernet device. */ memset(&sll, 0, sizeof sll); sll.sll_family = AF_PACKET; sll.sll_ifindex = ifindex; sll.sll_protocol = (OVS_FORCE unsigned short int) htons(ETH_P_ALL); if (bind(rx->fd, (struct sockaddr *) &sll, sizeof sll) < 0) { error = errno; VLOG_ERR("%s: failed to bind raw socket (%s)", netdev_get_name(netdev_), ovs_strerror(error)); goto error; } /* Filter for only inbound packets. */ error = setsockopt(rx->fd, SOL_SOCKET, SO_ATTACH_FILTER, &fprog, sizeof fprog); if (error) { error = errno; VLOG_ERR("%s: failed to attach filter (%s)", netdev_get_name(netdev_), ovs_strerror(error)); goto error; } } ovs_mutex_unlock(&netdev->mutex); return 0; error: if (rx->fd >= 0) { close(rx->fd); } ovs_mutex_unlock(&netdev->mutex); return error; } static void netdev_linux_rx_destruct(struct netdev_rx *rx_) { struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_); if (!rx->is_tap) { close(rx->fd); } } static void netdev_linux_rx_dealloc(struct netdev_rx *rx_) { struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_); free(rx); } static int netdev_linux_rx_recv(struct netdev_rx *rx_, void *data, size_t size) { struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_); ssize_t retval; do { retval = (rx->is_tap ? read(rx->fd, data, size) : recv(rx->fd, data, size, MSG_TRUNC)); } while (retval < 0 && errno == EINTR); if (retval >= 0) { return retval > size ? -EMSGSIZE : retval; } else { if (errno != EAGAIN) { VLOG_WARN_RL(&rl, "error receiving Ethernet packet on %s: %s", ovs_strerror(errno), netdev_rx_get_name(rx_)); } return -errno; } } static void netdev_linux_rx_wait(struct netdev_rx *rx_) { struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_); poll_fd_wait(rx->fd, POLLIN); } static int netdev_linux_rx_drain(struct netdev_rx *rx_) { struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_); if (rx->is_tap) { struct ifreq ifr; int error = af_inet_ifreq_ioctl(netdev_rx_get_name(rx_), &ifr, SIOCGIFTXQLEN, "SIOCGIFTXQLEN"); if (error) { return error; } drain_fd(rx->fd, ifr.ifr_qlen); return 0; } else { return drain_rcvbuf(rx->fd); } } /* Sends 'buffer' on 'netdev'. Returns 0 if successful, otherwise a positive * errno value. Returns EAGAIN without blocking if the packet cannot be queued * immediately. Returns EMSGSIZE if a partial packet was transmitted or if * the packet is too big or too small to transmit on the device. * * The caller retains ownership of 'buffer' in all cases. * * The kernel maintains a packet transmission queue, so the caller is not * expected to do additional queuing of packets. */ static int netdev_linux_send(struct netdev *netdev_, const void *data, size_t size) { for (;;) { ssize_t retval; if (!is_tap_netdev(netdev_)) { /* Use our AF_PACKET socket to send to this device. */ struct sockaddr_ll sll; struct msghdr msg; struct iovec iov; int ifindex; int sock; sock = af_packet_sock(); if (sock < 0) { return -sock; } ifindex = netdev_get_ifindex(netdev_); if (ifindex < 0) { return -ifindex; } /* We don't bother setting most fields in sockaddr_ll because the * kernel ignores them for SOCK_RAW. */ memset(&sll, 0, sizeof sll); sll.sll_family = AF_PACKET; sll.sll_ifindex = ifindex; iov.iov_base = CONST_CAST(void *, data); iov.iov_len = size; msg.msg_name = &sll; msg.msg_namelen = sizeof sll; msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; retval = sendmsg(sock, &msg, 0); } else { /* Use the tap fd to send to this device. This is essential for * tap devices, because packets sent to a tap device with an * AF_PACKET socket will loop back to be *received* again on the * tap device. This doesn't occur on other interface types * because we attach a socket filter to the rx socket. */ struct netdev_linux *netdev = netdev_linux_cast(netdev_); retval = write(netdev->tap_fd, data, size); } if (retval < 0) { /* The Linux AF_PACKET implementation never blocks waiting for room * for packets, instead returning ENOBUFS. Translate this into * EAGAIN for the caller. */ if (errno == ENOBUFS) { return EAGAIN; } else if (errno == EINTR) { continue; } else if (errno != EAGAIN) { VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s", netdev_get_name(netdev_), ovs_strerror(errno)); } return errno; } else if (retval != size) { VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%zd bytes of " "%zu) on %s", retval, size, netdev_get_name(netdev_)); return EMSGSIZE; } else { return 0; } } } /* Registers with the poll loop to wake up from the next call to poll_block() * when the packet transmission queue has sufficient room to transmit a packet * with netdev_send(). * * The kernel maintains a packet transmission queue, so the client is not * expected to do additional queuing of packets. Thus, this function is * unlikely to ever be used. It is included for completeness. */ static void netdev_linux_send_wait(struct netdev *netdev) { if (is_tap_netdev(netdev)) { /* TAP device always accepts packets.*/ poll_immediate_wake(); } } /* Attempts to set 'netdev''s MAC address to 'mac'. Returns 0 if successful, * otherwise a positive errno value. */ static int netdev_linux_set_etheraddr(struct netdev *netdev_, const uint8_t mac[ETH_ADDR_LEN]) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); enum netdev_flags old_flags = 0; int error; ovs_mutex_lock(&netdev->mutex); if (netdev->cache_valid & VALID_ETHERADDR) { error = netdev->ether_addr_error; if (error || eth_addr_equals(netdev->etheraddr, mac)) { goto exit; } netdev->cache_valid &= ~VALID_ETHERADDR; } /* Tap devices must be brought down before setting the address. */ if (is_tap_netdev(netdev_)) { update_flags(netdev, NETDEV_UP, 0, &old_flags); } error = set_etheraddr(netdev_get_name(netdev_), mac); if (!error || error == ENODEV) { netdev->ether_addr_error = error; netdev->cache_valid |= VALID_ETHERADDR; if (!error) { memcpy(netdev->etheraddr, mac, ETH_ADDR_LEN); } } if (is_tap_netdev(netdev_) && old_flags & NETDEV_UP) { update_flags(netdev, 0, NETDEV_UP, &old_flags); } exit: ovs_mutex_unlock(&netdev->mutex); return error; } /* Copies 'netdev''s MAC address to 'mac' which is passed as param. */ static int netdev_linux_get_etheraddr(const struct netdev *netdev_, uint8_t mac[ETH_ADDR_LEN]) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); if (!(netdev->cache_valid & VALID_ETHERADDR)) { netdev->ether_addr_error = get_etheraddr(netdev_get_name(netdev_), netdev->etheraddr); netdev->cache_valid |= VALID_ETHERADDR; } error = netdev->ether_addr_error; if (!error) { memcpy(mac, netdev->etheraddr, ETH_ADDR_LEN); } ovs_mutex_unlock(&netdev->mutex); return error; } static int netdev_linux_get_mtu__(struct netdev_linux *netdev, int *mtup) { int error; if (!(netdev->cache_valid & VALID_MTU)) { struct ifreq ifr; netdev->netdev_mtu_error = af_inet_ifreq_ioctl( netdev_get_name(&netdev->up), &ifr, SIOCGIFMTU, "SIOCGIFMTU"); netdev->mtu = ifr.ifr_mtu; netdev->cache_valid |= VALID_MTU; } error = netdev->netdev_mtu_error; if (!error) { *mtup = netdev->mtu; } return error; } /* Returns the maximum size of transmitted (and received) packets on 'netdev', * in bytes, not including the hardware header; thus, this is typically 1500 * bytes for Ethernet devices. */ static int netdev_linux_get_mtu(const struct netdev *netdev_, int *mtup) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); error = netdev_linux_get_mtu__(netdev, mtup); ovs_mutex_unlock(&netdev->mutex); return error; } /* Sets the maximum size of transmitted (MTU) for given device using linux * networking ioctl interface. */ static int netdev_linux_set_mtu(const struct netdev *netdev_, int mtu) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); struct ifreq ifr; int error; ovs_mutex_lock(&netdev->mutex); if (netdev->cache_valid & VALID_MTU) { error = netdev->netdev_mtu_error; if (error || netdev->mtu == mtu) { goto exit; } netdev->cache_valid &= ~VALID_MTU; } ifr.ifr_mtu = mtu; error = af_inet_ifreq_ioctl(netdev_get_name(netdev_), &ifr, SIOCSIFMTU, "SIOCSIFMTU"); if (!error || error == ENODEV) { netdev->netdev_mtu_error = error; netdev->mtu = ifr.ifr_mtu; netdev->cache_valid |= VALID_MTU; } exit: ovs_mutex_unlock(&netdev->mutex); return error; } /* Returns the ifindex of 'netdev', if successful, as a positive number. * On failure, returns a negative errno value. */ static int netdev_linux_get_ifindex(const struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int ifindex, error; ovs_mutex_lock(&netdev->mutex); error = get_ifindex(netdev_, &ifindex); ovs_mutex_unlock(&netdev->mutex); return error ? -error : ifindex; } static int netdev_linux_get_carrier(const struct netdev *netdev_, bool *carrier) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); ovs_mutex_lock(&netdev->mutex); if (netdev->miimon_interval > 0) { *carrier = netdev->miimon; } else { *carrier = (netdev->ifi_flags & IFF_RUNNING) != 0; } ovs_mutex_unlock(&netdev->mutex); return 0; } static long long int netdev_linux_get_carrier_resets(const struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); long long int carrier_resets; ovs_mutex_lock(&netdev->mutex); carrier_resets = netdev->carrier_resets; ovs_mutex_unlock(&netdev->mutex); return carrier_resets; } static int netdev_linux_do_miimon(const char *name, int cmd, const char *cmd_name, struct mii_ioctl_data *data) { struct ifreq ifr; int error; memset(&ifr, 0, sizeof ifr); memcpy(&ifr.ifr_data, data, sizeof *data); error = af_inet_ifreq_ioctl(name, &ifr, cmd, cmd_name); memcpy(data, &ifr.ifr_data, sizeof *data); return error; } static int netdev_linux_get_miimon(const char *name, bool *miimon) { struct mii_ioctl_data data; int error; *miimon = false; memset(&data, 0, sizeof data); error = netdev_linux_do_miimon(name, SIOCGMIIPHY, "SIOCGMIIPHY", &data); if (!error) { /* data.phy_id is filled out by previous SIOCGMIIPHY miimon call. */ data.reg_num = MII_BMSR; error = netdev_linux_do_miimon(name, SIOCGMIIREG, "SIOCGMIIREG", &data); if (!error) { *miimon = !!(data.val_out & BMSR_LSTATUS); } else { VLOG_WARN_RL(&rl, "%s: failed to query MII", name); } } else { struct ethtool_cmd ecmd; VLOG_DBG_RL(&rl, "%s: failed to query MII, falling back to ethtool", name); COVERAGE_INC(netdev_get_ethtool); memset(&ecmd, 0, sizeof ecmd); error = netdev_linux_do_ethtool(name, &ecmd, ETHTOOL_GLINK, "ETHTOOL_GLINK"); if (!error) { struct ethtool_value eval; memcpy(&eval, &ecmd, sizeof eval); *miimon = !!eval.data; } else { VLOG_WARN_RL(&rl, "%s: ethtool link status failed", name); } } return error; } static int netdev_linux_set_miimon_interval(struct netdev *netdev_, long long int interval) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); ovs_mutex_lock(&netdev->mutex); interval = interval > 0 ? MAX(interval, 100) : 0; if (netdev->miimon_interval != interval) { netdev->miimon_interval = interval; timer_set_expired(&netdev->miimon_timer); } ovs_mutex_unlock(&netdev->mutex); return 0; } static void netdev_linux_miimon_run(void) { struct shash device_shash; struct shash_node *node; shash_init(&device_shash); netdev_get_devices(&netdev_linux_class, &device_shash); SHASH_FOR_EACH (node, &device_shash) { struct netdev *netdev = node->data; struct netdev_linux *dev = netdev_linux_cast(netdev); bool miimon; ovs_mutex_lock(&dev->mutex); if (dev->miimon_interval > 0 && timer_expired(&dev->miimon_timer)) { netdev_linux_get_miimon(dev->up.name, &miimon); if (miimon != dev->miimon) { dev->miimon = miimon; netdev_linux_changed(dev, dev->ifi_flags, 0); } timer_set_duration(&dev->miimon_timer, dev->miimon_interval); } ovs_mutex_unlock(&dev->mutex); netdev_close(netdev); } shash_destroy(&device_shash); } static void netdev_linux_miimon_wait(void) { struct shash device_shash; struct shash_node *node; shash_init(&device_shash); netdev_get_devices(&netdev_linux_class, &device_shash); SHASH_FOR_EACH (node, &device_shash) { struct netdev *netdev = node->data; struct netdev_linux *dev = netdev_linux_cast(netdev); ovs_mutex_lock(&dev->mutex); if (dev->miimon_interval > 0) { timer_wait(&dev->miimon_timer); } ovs_mutex_unlock(&dev->mutex); netdev_close(netdev); } shash_destroy(&device_shash); } /* Check whether we can we use RTM_GETLINK to get network device statistics. * In pre-2.6.19 kernels, this was only available if wireless extensions were * enabled. */ static bool check_for_working_netlink_stats(void) { /* Decide on the netdev_get_stats() implementation to use. Netlink is * preferable, so if that works, we'll use it. */ int ifindex = do_get_ifindex("lo"); if (ifindex < 0) { VLOG_WARN("failed to get ifindex for lo, " "obtaining netdev stats from proc"); return false; } else { struct netdev_stats stats; int error = get_stats_via_netlink(ifindex, &stats); if (!error) { VLOG_DBG("obtaining netdev stats via rtnetlink"); return true; } else { VLOG_INFO("RTM_GETLINK failed (%s), obtaining netdev stats " "via proc (you are probably running a pre-2.6.19 " "kernel)", ovs_strerror(error)); return false; } } } static void swap_uint64(uint64_t *a, uint64_t *b) { uint64_t tmp = *a; *a = *b; *b = tmp; } /* Copies 'src' into 'dst', performing format conversion in the process. * * 'src' is allowed to be misaligned. */ static void netdev_stats_from_ovs_vport_stats(struct netdev_stats *dst, const struct ovs_vport_stats *src) { dst->rx_packets = get_unaligned_u64(&src->rx_packets); dst->tx_packets = get_unaligned_u64(&src->tx_packets); dst->rx_bytes = get_unaligned_u64(&src->rx_bytes); dst->tx_bytes = get_unaligned_u64(&src->tx_bytes); dst->rx_errors = get_unaligned_u64(&src->rx_errors); dst->tx_errors = get_unaligned_u64(&src->tx_errors); dst->rx_dropped = get_unaligned_u64(&src->rx_dropped); dst->tx_dropped = get_unaligned_u64(&src->tx_dropped); dst->multicast = 0; dst->collisions = 0; dst->rx_length_errors = 0; dst->rx_over_errors = 0; dst->rx_crc_errors = 0; dst->rx_frame_errors = 0; dst->rx_fifo_errors = 0; dst->rx_missed_errors = 0; dst->tx_aborted_errors = 0; dst->tx_carrier_errors = 0; dst->tx_fifo_errors = 0; dst->tx_heartbeat_errors = 0; dst->tx_window_errors = 0; } static int get_stats_via_vport__(const struct netdev *netdev, struct netdev_stats *stats) { struct dpif_linux_vport reply; struct ofpbuf *buf; int error; error = dpif_linux_vport_get(netdev_get_name(netdev), &reply, &buf); if (error) { return error; } else if (!reply.stats) { ofpbuf_delete(buf); return EOPNOTSUPP; } netdev_stats_from_ovs_vport_stats(stats, reply.stats); ofpbuf_delete(buf); return 0; } static void get_stats_via_vport(const struct netdev *netdev_, struct netdev_stats *stats) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); if (!netdev->vport_stats_error || !(netdev->cache_valid & VALID_VPORT_STAT_ERROR)) { int error; error = get_stats_via_vport__(netdev_, stats); if (error && error != ENOENT) { VLOG_WARN_RL(&rl, "%s: obtaining netdev stats via vport failed " "(%s)", netdev_get_name(netdev_), ovs_strerror(error)); } netdev->vport_stats_error = error; netdev->cache_valid |= VALID_VPORT_STAT_ERROR; } } static int netdev_linux_sys_get_stats(const struct netdev *netdev_, struct netdev_stats *stats) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; static int use_netlink_stats; int error; if (ovsthread_once_start(&once)) { use_netlink_stats = check_for_working_netlink_stats(); ovsthread_once_done(&once); } if (use_netlink_stats) { int ifindex; error = get_ifindex(netdev_, &ifindex); if (!error) { error = get_stats_via_netlink(ifindex, stats); } } else { error = get_stats_via_proc(netdev_get_name(netdev_), stats); } if (error) { VLOG_WARN_RL(&rl, "%s: linux-sys get stats failed %d", netdev_get_name(netdev_), error); } return error; } /* Retrieves current device stats for 'netdev-linux'. */ static int netdev_linux_get_stats(const struct netdev *netdev_, struct netdev_stats *stats) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); struct netdev_stats dev_stats; int error; ovs_mutex_lock(&netdev->mutex); get_stats_via_vport(netdev_, stats); error = netdev_linux_sys_get_stats(netdev_, &dev_stats); if (error) { if (!netdev->vport_stats_error) { error = 0; } } else if (netdev->vport_stats_error) { /* stats not available from OVS then use ioctl stats. */ *stats = dev_stats; } else { stats->rx_errors += dev_stats.rx_errors; stats->tx_errors += dev_stats.tx_errors; stats->rx_dropped += dev_stats.rx_dropped; stats->tx_dropped += dev_stats.tx_dropped; stats->multicast += dev_stats.multicast; stats->collisions += dev_stats.collisions; stats->rx_length_errors += dev_stats.rx_length_errors; stats->rx_over_errors += dev_stats.rx_over_errors; stats->rx_crc_errors += dev_stats.rx_crc_errors; stats->rx_frame_errors += dev_stats.rx_frame_errors; stats->rx_fifo_errors += dev_stats.rx_fifo_errors; stats->rx_missed_errors += dev_stats.rx_missed_errors; stats->tx_aborted_errors += dev_stats.tx_aborted_errors; stats->tx_carrier_errors += dev_stats.tx_carrier_errors; stats->tx_fifo_errors += dev_stats.tx_fifo_errors; stats->tx_heartbeat_errors += dev_stats.tx_heartbeat_errors; stats->tx_window_errors += dev_stats.tx_window_errors; } ovs_mutex_unlock(&netdev->mutex); return error; } /* Retrieves current device stats for 'netdev-tap' netdev or * netdev-internal. */ static int netdev_tap_get_stats(const struct netdev *netdev_, struct netdev_stats *stats) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); struct netdev_stats dev_stats; int error; ovs_mutex_lock(&netdev->mutex); get_stats_via_vport(netdev_, stats); error = netdev_linux_sys_get_stats(netdev_, &dev_stats); if (error) { if (!netdev->vport_stats_error) { error = 0; } } else if (netdev->vport_stats_error) { /* Transmit and receive stats will appear to be swapped relative to the * other ports since we are the one sending the data, not a remote * computer. For consistency, we swap them back here. This does not * apply if we are getting stats from the vport layer because it always * tracks stats from the perspective of the switch. */ *stats = dev_stats; swap_uint64(&stats->rx_packets, &stats->tx_packets); swap_uint64(&stats->rx_bytes, &stats->tx_bytes); swap_uint64(&stats->rx_errors, &stats->tx_errors); swap_uint64(&stats->rx_dropped, &stats->tx_dropped); stats->rx_length_errors = 0; stats->rx_over_errors = 0; stats->rx_crc_errors = 0; stats->rx_frame_errors = 0; stats->rx_fifo_errors = 0; stats->rx_missed_errors = 0; stats->tx_aborted_errors = 0; stats->tx_carrier_errors = 0; stats->tx_fifo_errors = 0; stats->tx_heartbeat_errors = 0; stats->tx_window_errors = 0; } else { stats->rx_dropped += dev_stats.tx_dropped; stats->tx_dropped += dev_stats.rx_dropped; stats->rx_errors += dev_stats.tx_errors; stats->tx_errors += dev_stats.rx_errors; stats->multicast += dev_stats.multicast; stats->collisions += dev_stats.collisions; } ovs_mutex_unlock(&netdev->mutex); return error; } static int netdev_internal_get_stats(const struct netdev *netdev_, struct netdev_stats *stats) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); get_stats_via_vport(netdev_, stats); error = netdev->vport_stats_error; ovs_mutex_unlock(&netdev->mutex); return error; } static int netdev_internal_set_stats(struct netdev *netdev, const struct netdev_stats *stats) { struct ovs_vport_stats vport_stats; struct dpif_linux_vport vport; int err; vport_stats.rx_packets = stats->rx_packets; vport_stats.tx_packets = stats->tx_packets; vport_stats.rx_bytes = stats->rx_bytes; vport_stats.tx_bytes = stats->tx_bytes; vport_stats.rx_errors = stats->rx_errors; vport_stats.tx_errors = stats->tx_errors; vport_stats.rx_dropped = stats->rx_dropped; vport_stats.tx_dropped = stats->tx_dropped; dpif_linux_vport_init(&vport); vport.cmd = OVS_VPORT_CMD_SET; vport.name = netdev_get_name(netdev); vport.stats = &vport_stats; err = dpif_linux_vport_transact(&vport, NULL, NULL); /* If the vport layer doesn't know about the device, that doesn't mean it * doesn't exist (after all were able to open it when netdev_open() was * called), it just means that it isn't attached and we'll be getting * stats a different way. */ if (err == ENODEV) { err = EOPNOTSUPP; } return err; } static void netdev_linux_read_features(struct netdev_linux *netdev) { struct ethtool_cmd ecmd; uint32_t speed; int error; if (netdev->cache_valid & VALID_FEATURES) { return; } COVERAGE_INC(netdev_get_ethtool); memset(&ecmd, 0, sizeof ecmd); error = netdev_linux_do_ethtool(netdev->up.name, &ecmd, ETHTOOL_GSET, "ETHTOOL_GSET"); if (error) { goto out; } /* Supported features. */ netdev->supported = 0; if (ecmd.supported & SUPPORTED_10baseT_Half) { netdev->supported |= NETDEV_F_10MB_HD; } if (ecmd.supported & SUPPORTED_10baseT_Full) { netdev->supported |= NETDEV_F_10MB_FD; } if (ecmd.supported & SUPPORTED_100baseT_Half) { netdev->supported |= NETDEV_F_100MB_HD; } if (ecmd.supported & SUPPORTED_100baseT_Full) { netdev->supported |= NETDEV_F_100MB_FD; } if (ecmd.supported & SUPPORTED_1000baseT_Half) { netdev->supported |= NETDEV_F_1GB_HD; } if (ecmd.supported & SUPPORTED_1000baseT_Full) { netdev->supported |= NETDEV_F_1GB_FD; } if (ecmd.supported & SUPPORTED_10000baseT_Full) { netdev->supported |= NETDEV_F_10GB_FD; } if (ecmd.supported & SUPPORTED_TP) { netdev->supported |= NETDEV_F_COPPER; } if (ecmd.supported & SUPPORTED_FIBRE) { netdev->supported |= NETDEV_F_FIBER; } if (ecmd.supported & SUPPORTED_Autoneg) { netdev->supported |= NETDEV_F_AUTONEG; } if (ecmd.supported & SUPPORTED_Pause) { netdev->supported |= NETDEV_F_PAUSE; } if (ecmd.supported & SUPPORTED_Asym_Pause) { netdev->supported |= NETDEV_F_PAUSE_ASYM; } /* Advertised features. */ netdev->advertised = 0; if (ecmd.advertising & ADVERTISED_10baseT_Half) { netdev->advertised |= NETDEV_F_10MB_HD; } if (ecmd.advertising & ADVERTISED_10baseT_Full) { netdev->advertised |= NETDEV_F_10MB_FD; } if (ecmd.advertising & ADVERTISED_100baseT_Half) { netdev->advertised |= NETDEV_F_100MB_HD; } if (ecmd.advertising & ADVERTISED_100baseT_Full) { netdev->advertised |= NETDEV_F_100MB_FD; } if (ecmd.advertising & ADVERTISED_1000baseT_Half) { netdev->advertised |= NETDEV_F_1GB_HD; } if (ecmd.advertising & ADVERTISED_1000baseT_Full) { netdev->advertised |= NETDEV_F_1GB_FD; } if (ecmd.advertising & ADVERTISED_10000baseT_Full) { netdev->advertised |= NETDEV_F_10GB_FD; } if (ecmd.advertising & ADVERTISED_TP) { netdev->advertised |= NETDEV_F_COPPER; } if (ecmd.advertising & ADVERTISED_FIBRE) { netdev->advertised |= NETDEV_F_FIBER; } if (ecmd.advertising & ADVERTISED_Autoneg) { netdev->advertised |= NETDEV_F_AUTONEG; } if (ecmd.advertising & ADVERTISED_Pause) { netdev->advertised |= NETDEV_F_PAUSE; } if (ecmd.advertising & ADVERTISED_Asym_Pause) { netdev->advertised |= NETDEV_F_PAUSE_ASYM; } /* Current settings. */ speed = ecmd.speed; if (speed == SPEED_10) { netdev->current = ecmd.duplex ? NETDEV_F_10MB_FD : NETDEV_F_10MB_HD; } else if (speed == SPEED_100) { netdev->current = ecmd.duplex ? NETDEV_F_100MB_FD : NETDEV_F_100MB_HD; } else if (speed == SPEED_1000) { netdev->current = ecmd.duplex ? NETDEV_F_1GB_FD : NETDEV_F_1GB_HD; } else if (speed == SPEED_10000) { netdev->current = NETDEV_F_10GB_FD; } else if (speed == 40000) { netdev->current = NETDEV_F_40GB_FD; } else if (speed == 100000) { netdev->current = NETDEV_F_100GB_FD; } else if (speed == 1000000) { netdev->current = NETDEV_F_1TB_FD; } else { netdev->current = 0; } if (ecmd.port == PORT_TP) { netdev->current |= NETDEV_F_COPPER; } else if (ecmd.port == PORT_FIBRE) { netdev->current |= NETDEV_F_FIBER; } if (ecmd.autoneg) { netdev->current |= NETDEV_F_AUTONEG; } out: netdev->cache_valid |= VALID_FEATURES; netdev->get_features_error = error; } /* Stores the features supported by 'netdev' into of '*current', '*advertised', * '*supported', and '*peer'. Each value is a bitmap of NETDEV_* bits. * Returns 0 if successful, otherwise a positive errno value. */ static int netdev_linux_get_features(const struct netdev *netdev_, enum netdev_features *current, enum netdev_features *advertised, enum netdev_features *supported, enum netdev_features *peer) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); netdev_linux_read_features(netdev); if (!netdev->get_features_error) { *current = netdev->current; *advertised = netdev->advertised; *supported = netdev->supported; *peer = 0; /* XXX */ } error = netdev->get_features_error; ovs_mutex_unlock(&netdev->mutex); return error; } /* Set the features advertised by 'netdev' to 'advertise'. */ static int netdev_linux_set_advertisements(struct netdev *netdev_, enum netdev_features advertise) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); struct ethtool_cmd ecmd; int error; ovs_mutex_lock(&netdev->mutex); COVERAGE_INC(netdev_get_ethtool); memset(&ecmd, 0, sizeof ecmd); error = netdev_linux_do_ethtool(netdev_get_name(netdev_), &ecmd, ETHTOOL_GSET, "ETHTOOL_GSET"); if (error) { goto exit; } ecmd.advertising = 0; if (advertise & NETDEV_F_10MB_HD) { ecmd.advertising |= ADVERTISED_10baseT_Half; } if (advertise & NETDEV_F_10MB_FD) { ecmd.advertising |= ADVERTISED_10baseT_Full; } if (advertise & NETDEV_F_100MB_HD) { ecmd.advertising |= ADVERTISED_100baseT_Half; } if (advertise & NETDEV_F_100MB_FD) { ecmd.advertising |= ADVERTISED_100baseT_Full; } if (advertise & NETDEV_F_1GB_HD) { ecmd.advertising |= ADVERTISED_1000baseT_Half; } if (advertise & NETDEV_F_1GB_FD) { ecmd.advertising |= ADVERTISED_1000baseT_Full; } if (advertise & NETDEV_F_10GB_FD) { ecmd.advertising |= ADVERTISED_10000baseT_Full; } if (advertise & NETDEV_F_COPPER) { ecmd.advertising |= ADVERTISED_TP; } if (advertise & NETDEV_F_FIBER) { ecmd.advertising |= ADVERTISED_FIBRE; } if (advertise & NETDEV_F_AUTONEG) { ecmd.advertising |= ADVERTISED_Autoneg; } if (advertise & NETDEV_F_PAUSE) { ecmd.advertising |= ADVERTISED_Pause; } if (advertise & NETDEV_F_PAUSE_ASYM) { ecmd.advertising |= ADVERTISED_Asym_Pause; } COVERAGE_INC(netdev_set_ethtool); error = netdev_linux_do_ethtool(netdev_get_name(netdev_), &ecmd, ETHTOOL_SSET, "ETHTOOL_SSET"); exit: ovs_mutex_unlock(&netdev->mutex); return error; } /* Attempts to set input rate limiting (policing) policy. Returns 0 if * successful, otherwise a positive errno value. */ static int netdev_linux_set_policing(struct netdev *netdev_, uint32_t kbits_rate, uint32_t kbits_burst) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); const char *netdev_name = netdev_get_name(netdev_); int error; kbits_burst = (!kbits_rate ? 0 /* Force to 0 if no rate specified. */ : !kbits_burst ? 1000 /* Default to 1000 kbits if 0. */ : kbits_burst); /* Stick with user-specified value. */ ovs_mutex_lock(&netdev->mutex); if (netdev->cache_valid & VALID_POLICING) { error = netdev->netdev_policing_error; if (error || (netdev->kbits_rate == kbits_rate && netdev->kbits_burst == kbits_burst)) { /* Assume that settings haven't changed since we last set them. */ goto out; } netdev->cache_valid &= ~VALID_POLICING; } COVERAGE_INC(netdev_set_policing); /* Remove any existing ingress qdisc. */ error = tc_add_del_ingress_qdisc(netdev_, false); if (error) { VLOG_WARN_RL(&rl, "%s: removing policing failed: %s", netdev_name, ovs_strerror(error)); goto out; } if (kbits_rate) { error = tc_add_del_ingress_qdisc(netdev_, true); if (error) { VLOG_WARN_RL(&rl, "%s: adding policing qdisc failed: %s", netdev_name, ovs_strerror(error)); goto out; } error = tc_add_policer(netdev_, kbits_rate, kbits_burst); if (error){ VLOG_WARN_RL(&rl, "%s: adding policing action failed: %s", netdev_name, ovs_strerror(error)); goto out; } } netdev->kbits_rate = kbits_rate; netdev->kbits_burst = kbits_burst; out: if (!error || error == ENODEV) { netdev->netdev_policing_error = error; netdev->cache_valid |= VALID_POLICING; } ovs_mutex_unlock(&netdev->mutex); return error; } static int netdev_linux_get_qos_types(const struct netdev *netdev OVS_UNUSED, struct sset *types) { const struct tc_ops *const *opsp; for (opsp = tcs; *opsp != NULL; opsp++) { const struct tc_ops *ops = *opsp; if (ops->tc_install && ops->ovs_name[0] != '\0') { sset_add(types, ops->ovs_name); } } return 0; } static const struct tc_ops * tc_lookup_ovs_name(const char *name) { const struct tc_ops *const *opsp; for (opsp = tcs; *opsp != NULL; opsp++) { const struct tc_ops *ops = *opsp; if (!strcmp(name, ops->ovs_name)) { return ops; } } return NULL; } static const struct tc_ops * tc_lookup_linux_name(const char *name) { const struct tc_ops *const *opsp; for (opsp = tcs; *opsp != NULL; opsp++) { const struct tc_ops *ops = *opsp; if (ops->linux_name && !strcmp(name, ops->linux_name)) { return ops; } } return NULL; } static struct tc_queue * tc_find_queue__(const struct netdev *netdev_, unsigned int queue_id, size_t hash) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); struct tc_queue *queue; HMAP_FOR_EACH_IN_BUCKET (queue, hmap_node, hash, &netdev->tc->queues) { if (queue->queue_id == queue_id) { return queue; } } return NULL; } static struct tc_queue * tc_find_queue(const struct netdev *netdev, unsigned int queue_id) { return tc_find_queue__(netdev, queue_id, hash_int(queue_id, 0)); } static int netdev_linux_get_qos_capabilities(const struct netdev *netdev OVS_UNUSED, const char *type, struct netdev_qos_capabilities *caps) { const struct tc_ops *ops = tc_lookup_ovs_name(type); if (!ops) { return EOPNOTSUPP; } caps->n_queues = ops->n_queues; return 0; } static int netdev_linux_get_qos(const struct netdev *netdev_, const char **typep, struct smap *details) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); error = tc_query_qdisc(netdev_); if (!error) { *typep = netdev->tc->ops->ovs_name; error = (netdev->tc->ops->qdisc_get ? netdev->tc->ops->qdisc_get(netdev_, details) : 0); } ovs_mutex_unlock(&netdev->mutex); return error; } static int netdev_linux_set_qos(struct netdev *netdev_, const char *type, const struct smap *details) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); const struct tc_ops *new_ops; int error; new_ops = tc_lookup_ovs_name(type); if (!new_ops || !new_ops->tc_install) { return EOPNOTSUPP; } ovs_mutex_lock(&netdev->mutex); error = tc_query_qdisc(netdev_); if (error) { goto exit; } if (new_ops == netdev->tc->ops) { error = new_ops->qdisc_set ? new_ops->qdisc_set(netdev_, details) : 0; } else { /* Delete existing qdisc. */ error = tc_del_qdisc(netdev_); if (error) { goto exit; } ovs_assert(netdev->tc == NULL); /* Install new qdisc. */ error = new_ops->tc_install(netdev_, details); ovs_assert((error == 0) == (netdev->tc != NULL)); } exit: ovs_mutex_unlock(&netdev->mutex); return error; } static int netdev_linux_get_queue(const struct netdev *netdev_, unsigned int queue_id, struct smap *details) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); error = tc_query_qdisc(netdev_); if (!error) { struct tc_queue *queue = tc_find_queue(netdev_, queue_id); error = (queue ? netdev->tc->ops->class_get(netdev_, queue, details) : ENOENT); } ovs_mutex_unlock(&netdev->mutex); return error; } static int netdev_linux_set_queue(struct netdev *netdev_, unsigned int queue_id, const struct smap *details) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); error = tc_query_qdisc(netdev_); if (!error) { error = (queue_id < netdev->tc->ops->n_queues && netdev->tc->ops->class_set ? netdev->tc->ops->class_set(netdev_, queue_id, details) : EINVAL); } ovs_mutex_unlock(&netdev->mutex); return error; } static int netdev_linux_delete_queue(struct netdev *netdev_, unsigned int queue_id) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); error = tc_query_qdisc(netdev_); if (!error) { if (netdev->tc->ops->class_delete) { struct tc_queue *queue = tc_find_queue(netdev_, queue_id); error = (queue ? netdev->tc->ops->class_delete(netdev_, queue) : ENOENT); } else { error = EINVAL; } } ovs_mutex_unlock(&netdev->mutex); return error; } static int netdev_linux_get_queue_stats(const struct netdev *netdev_, unsigned int queue_id, struct netdev_queue_stats *stats) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); error = tc_query_qdisc(netdev_); if (!error) { if (netdev->tc->ops->class_get_stats) { const struct tc_queue *queue = tc_find_queue(netdev_, queue_id); if (queue) { stats->created = queue->created; error = netdev->tc->ops->class_get_stats(netdev_, queue, stats); } else { error = ENOENT; } } else { error = EOPNOTSUPP; } } ovs_mutex_unlock(&netdev->mutex); return error; } static bool start_queue_dump(const struct netdev *netdev, struct nl_dump *dump) { struct ofpbuf request; struct tcmsg *tcmsg; tcmsg = tc_make_request(netdev, RTM_GETTCLASS, 0, &request); if (!tcmsg) { return false; } tcmsg->tcm_parent = 0; nl_dump_start(dump, NETLINK_ROUTE, &request); ofpbuf_uninit(&request); return true; } struct netdev_linux_queue_state { unsigned int *queues; size_t cur_queue; size_t n_queues; }; static int netdev_linux_queue_dump_start(const struct netdev *netdev_, void **statep) { const struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); error = tc_query_qdisc(netdev_); if (!error) { if (netdev->tc->ops->class_get) { struct netdev_linux_queue_state *state; struct tc_queue *queue; size_t i; *statep = state = xmalloc(sizeof *state); state->n_queues = hmap_count(&netdev->tc->queues); state->cur_queue = 0; state->queues = xmalloc(state->n_queues * sizeof *state->queues); i = 0; HMAP_FOR_EACH (queue, hmap_node, &netdev->tc->queues) { state->queues[i++] = queue->queue_id; } } else { error = EOPNOTSUPP; } } ovs_mutex_unlock(&netdev->mutex); return error; } static int netdev_linux_queue_dump_next(const struct netdev *netdev_, void *state_, unsigned int *queue_idp, struct smap *details) { const struct netdev_linux *netdev = netdev_linux_cast(netdev_); struct netdev_linux_queue_state *state = state_; int error = EOF; ovs_mutex_lock(&netdev->mutex); while (state->cur_queue < state->n_queues) { unsigned int queue_id = state->queues[state->cur_queue++]; struct tc_queue *queue = tc_find_queue(netdev_, queue_id); if (queue) { *queue_idp = queue_id; error = netdev->tc->ops->class_get(netdev_, queue, details); break; } } ovs_mutex_unlock(&netdev->mutex); return error; } static int netdev_linux_queue_dump_done(const struct netdev *netdev OVS_UNUSED, void *state_) { struct netdev_linux_queue_state *state = state_; free(state->queues); free(state); return 0; } static int netdev_linux_dump_queue_stats(const struct netdev *netdev_, netdev_dump_queue_stats_cb *cb, void *aux) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); error = tc_query_qdisc(netdev_); if (!error) { struct nl_dump dump; if (!netdev->tc->ops->class_dump_stats) { error = EOPNOTSUPP; } else if (!start_queue_dump(netdev_, &dump)) { error = ENODEV; } else { struct ofpbuf msg; int retval; while (nl_dump_next(&dump, &msg)) { retval = netdev->tc->ops->class_dump_stats(netdev_, &msg, cb, aux); if (retval) { error = retval; } } retval = nl_dump_done(&dump); if (retval) { error = retval; } } } ovs_mutex_unlock(&netdev->mutex); return error; } static int netdev_linux_get_in4(const struct netdev *netdev_, struct in_addr *address, struct in_addr *netmask) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); if (!(netdev->cache_valid & VALID_IN4)) { error = netdev_linux_get_ipv4(netdev_, &netdev->address, SIOCGIFADDR, "SIOCGIFADDR"); if (!error) { error = netdev_linux_get_ipv4(netdev_, &netdev->netmask, SIOCGIFNETMASK, "SIOCGIFNETMASK"); if (!error) { netdev->cache_valid |= VALID_IN4; } } } else { error = 0; } if (!error) { if (netdev->address.s_addr != INADDR_ANY) { *address = netdev->address; *netmask = netdev->netmask; } else { error = EADDRNOTAVAIL; } } ovs_mutex_unlock(&netdev->mutex); return error; } static int netdev_linux_set_in4(struct netdev *netdev_, struct in_addr address, struct in_addr netmask) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); error = do_set_addr(netdev_, SIOCSIFADDR, "SIOCSIFADDR", address); if (!error) { netdev->cache_valid |= VALID_IN4; netdev->address = address; netdev->netmask = netmask; if (address.s_addr != INADDR_ANY) { error = do_set_addr(netdev_, SIOCSIFNETMASK, "SIOCSIFNETMASK", netmask); } } ovs_mutex_unlock(&netdev->mutex); return error; } static bool parse_if_inet6_line(const char *line, struct in6_addr *in6, char ifname[16 + 1]) { uint8_t *s6 = in6->s6_addr; #define X8 "%2"SCNx8 return sscanf(line, " "X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 X8 "%*x %*x %*x %*x %16s\n", &s6[0], &s6[1], &s6[2], &s6[3], &s6[4], &s6[5], &s6[6], &s6[7], &s6[8], &s6[9], &s6[10], &s6[11], &s6[12], &s6[13], &s6[14], &s6[15], ifname) == 17; } /* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address (if * 'in6' is non-null) and returns true. Otherwise, returns false. */ static int netdev_linux_get_in6(const struct netdev *netdev_, struct in6_addr *in6) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); ovs_mutex_lock(&netdev->mutex); if (!(netdev->cache_valid & VALID_IN6)) { FILE *file; char line[128]; netdev->in6 = in6addr_any; file = fopen("/proc/net/if_inet6", "r"); if (file != NULL) { const char *name = netdev_get_name(netdev_); while (fgets(line, sizeof line, file)) { struct in6_addr in6_tmp; char ifname[16 + 1]; if (parse_if_inet6_line(line, &in6_tmp, ifname) && !strcmp(name, ifname)) { netdev->in6 = in6_tmp; break; } } fclose(file); } netdev->cache_valid |= VALID_IN6; } *in6 = netdev->in6; ovs_mutex_unlock(&netdev->mutex); return 0; } static void make_in4_sockaddr(struct sockaddr *sa, struct in_addr addr) { struct sockaddr_in sin; memset(&sin, 0, sizeof sin); sin.sin_family = AF_INET; sin.sin_addr = addr; sin.sin_port = 0; memset(sa, 0, sizeof *sa); memcpy(sa, &sin, sizeof sin); } static int do_set_addr(struct netdev *netdev, int ioctl_nr, const char *ioctl_name, struct in_addr addr) { struct ifreq ifr; make_in4_sockaddr(&ifr.ifr_addr, addr); return af_inet_ifreq_ioctl(netdev_get_name(netdev), &ifr, ioctl_nr, ioctl_name); } /* Adds 'router' as a default IP gateway. */ static int netdev_linux_add_router(struct netdev *netdev OVS_UNUSED, struct in_addr router) { struct in_addr any = { INADDR_ANY }; struct rtentry rt; int error; memset(&rt, 0, sizeof rt); make_in4_sockaddr(&rt.rt_dst, any); make_in4_sockaddr(&rt.rt_gateway, router); make_in4_sockaddr(&rt.rt_genmask, any); rt.rt_flags = RTF_UP | RTF_GATEWAY; error = af_inet_ioctl(SIOCADDRT, &rt); if (error) { VLOG_WARN("ioctl(SIOCADDRT): %s", ovs_strerror(error)); } return error; } static int netdev_linux_get_next_hop(const struct in_addr *host, struct in_addr *next_hop, char **netdev_name) { static const char fn[] = "/proc/net/route"; FILE *stream; char line[256]; int ln; *netdev_name = NULL; stream = fopen(fn, "r"); if (stream == NULL) { VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, ovs_strerror(errno)); return errno; } ln = 0; while (fgets(line, sizeof line, stream)) { if (++ln >= 2) { char iface[17]; ovs_be32 dest, gateway, mask; int refcnt, metric, mtu; unsigned int flags, use, window, irtt; if (sscanf(line, "%16s %"SCNx32" %"SCNx32" %04X %d %u %d %"SCNx32 " %d %u %u\n", iface, &dest, &gateway, &flags, &refcnt, &use, &metric, &mask, &mtu, &window, &irtt) != 11) { VLOG_WARN_RL(&rl, "%s: could not parse line %d: %s", fn, ln, line); continue; } if (!(flags & RTF_UP)) { /* Skip routes that aren't up. */ continue; } /* The output of 'dest', 'mask', and 'gateway' were given in * network byte order, so we don't need need any endian * conversions here. */ if ((dest & mask) == (host->s_addr & mask)) { if (!gateway) { /* The host is directly reachable. */ next_hop->s_addr = 0; } else { /* To reach the host, we must go through a gateway. */ next_hop->s_addr = gateway; } *netdev_name = xstrdup(iface); fclose(stream); return 0; } } } fclose(stream); return ENXIO; } static int netdev_linux_get_status(const struct netdev *netdev_, struct smap *smap) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error = 0; ovs_mutex_lock(&netdev->mutex); if (!(netdev->cache_valid & VALID_DRVINFO)) { struct ethtool_cmd *cmd = (struct ethtool_cmd *) &netdev->drvinfo; COVERAGE_INC(netdev_get_ethtool); memset(&netdev->drvinfo, 0, sizeof netdev->drvinfo); error = netdev_linux_do_ethtool(netdev->up.name, cmd, ETHTOOL_GDRVINFO, "ETHTOOL_GDRVINFO"); if (!error) { netdev->cache_valid |= VALID_DRVINFO; } } if (!error) { smap_add(smap, "driver_name", netdev->drvinfo.driver); smap_add(smap, "driver_version", netdev->drvinfo.version); smap_add(smap, "firmware_version", netdev->drvinfo.fw_version); } ovs_mutex_unlock(&netdev->mutex); return error; } static int netdev_internal_get_status(const struct netdev *netdev OVS_UNUSED, struct smap *smap) { smap_add(smap, "driver_name", "openvswitch"); return 0; } /* Looks up the ARP table entry for 'ip' on 'netdev'. If one exists and can be * successfully retrieved, it stores the corresponding MAC address in 'mac' and * returns 0. Otherwise, it returns a positive errno value; in particular, * ENXIO indicates that there is not ARP table entry for 'ip' on 'netdev'. */ static int netdev_linux_arp_lookup(const struct netdev *netdev, ovs_be32 ip, uint8_t mac[ETH_ADDR_LEN]) { struct arpreq r; struct sockaddr_in sin; int retval; memset(&r, 0, sizeof r); memset(&sin, 0, sizeof sin); sin.sin_family = AF_INET; sin.sin_addr.s_addr = ip; sin.sin_port = 0; memcpy(&r.arp_pa, &sin, sizeof sin); r.arp_ha.sa_family = ARPHRD_ETHER; r.arp_flags = 0; ovs_strzcpy(r.arp_dev, netdev_get_name(netdev), sizeof r.arp_dev); COVERAGE_INC(netdev_arp_lookup); retval = af_inet_ioctl(SIOCGARP, &r); if (!retval) { memcpy(mac, r.arp_ha.sa_data, ETH_ADDR_LEN); } else if (retval != ENXIO) { VLOG_WARN_RL(&rl, "%s: could not look up ARP entry for "IP_FMT": %s", netdev_get_name(netdev), IP_ARGS(ip), ovs_strerror(retval)); } return retval; } static int nd_to_iff_flags(enum netdev_flags nd) { int iff = 0; if (nd & NETDEV_UP) { iff |= IFF_UP; } if (nd & NETDEV_PROMISC) { iff |= IFF_PROMISC; } return iff; } static int iff_to_nd_flags(int iff) { enum netdev_flags nd = 0; if (iff & IFF_UP) { nd |= NETDEV_UP; } if (iff & IFF_PROMISC) { nd |= NETDEV_PROMISC; } return nd; } static int update_flags(struct netdev_linux *netdev, enum netdev_flags off, enum netdev_flags on, enum netdev_flags *old_flagsp) OVS_REQUIRES(netdev->mutex) { int old_flags, new_flags; int error = 0; old_flags = netdev->ifi_flags; *old_flagsp = iff_to_nd_flags(old_flags); new_flags = (old_flags & ~nd_to_iff_flags(off)) | nd_to_iff_flags(on); if (new_flags != old_flags) { error = set_flags(netdev_get_name(&netdev->up), new_flags); get_flags(&netdev->up, &netdev->ifi_flags); } return error; } static int netdev_linux_update_flags(struct netdev *netdev_, enum netdev_flags off, enum netdev_flags on, enum netdev_flags *old_flagsp) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); int error; ovs_mutex_lock(&netdev->mutex); error = update_flags(netdev, off, on, old_flagsp); ovs_mutex_unlock(&netdev->mutex); return error; } static unsigned int netdev_linux_change_seq(const struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); unsigned int change_seq; ovs_mutex_lock(&netdev->mutex); change_seq = netdev->change_seq; ovs_mutex_unlock(&netdev->mutex); return change_seq; } #define NETDEV_LINUX_CLASS(NAME, CONSTRUCT, GET_STATS, SET_STATS, \ GET_FEATURES, GET_STATUS) \ { \ NAME, \ \ NULL, \ netdev_linux_run, \ netdev_linux_wait, \ \ netdev_linux_alloc, \ CONSTRUCT, \ netdev_linux_destruct, \ netdev_linux_dealloc, \ NULL, /* get_config */ \ NULL, /* set_config */ \ NULL, /* get_tunnel_config */ \ \ netdev_linux_send, \ netdev_linux_send_wait, \ \ netdev_linux_set_etheraddr, \ netdev_linux_get_etheraddr, \ netdev_linux_get_mtu, \ netdev_linux_set_mtu, \ netdev_linux_get_ifindex, \ netdev_linux_get_carrier, \ netdev_linux_get_carrier_resets, \ netdev_linux_set_miimon_interval, \ GET_STATS, \ SET_STATS, \ \ GET_FEATURES, \ netdev_linux_set_advertisements, \ \ netdev_linux_set_policing, \ netdev_linux_get_qos_types, \ netdev_linux_get_qos_capabilities, \ netdev_linux_get_qos, \ netdev_linux_set_qos, \ netdev_linux_get_queue, \ netdev_linux_set_queue, \ netdev_linux_delete_queue, \ netdev_linux_get_queue_stats, \ netdev_linux_queue_dump_start, \ netdev_linux_queue_dump_next, \ netdev_linux_queue_dump_done, \ netdev_linux_dump_queue_stats, \ \ netdev_linux_get_in4, \ netdev_linux_set_in4, \ netdev_linux_get_in6, \ netdev_linux_add_router, \ netdev_linux_get_next_hop, \ GET_STATUS, \ netdev_linux_arp_lookup, \ \ netdev_linux_update_flags, \ \ netdev_linux_change_seq, \ \ netdev_linux_rx_alloc, \ netdev_linux_rx_construct, \ netdev_linux_rx_destruct, \ netdev_linux_rx_dealloc, \ netdev_linux_rx_recv, \ netdev_linux_rx_wait, \ netdev_linux_rx_drain, \ } const struct netdev_class netdev_linux_class = NETDEV_LINUX_CLASS( "system", netdev_linux_construct, netdev_linux_get_stats, NULL, /* set_stats */ netdev_linux_get_features, netdev_linux_get_status); const struct netdev_class netdev_tap_class = NETDEV_LINUX_CLASS( "tap", netdev_linux_construct_tap, netdev_tap_get_stats, NULL, /* set_stats */ netdev_linux_get_features, netdev_linux_get_status); const struct netdev_class netdev_internal_class = NETDEV_LINUX_CLASS( "internal", netdev_linux_construct, netdev_internal_get_stats, netdev_internal_set_stats, NULL, /* get_features */ netdev_internal_get_status); /* HTB traffic control class. */ #define HTB_N_QUEUES 0xf000 struct htb { struct tc tc; unsigned int max_rate; /* In bytes/s. */ }; struct htb_class { struct tc_queue tc_queue; unsigned int min_rate; /* In bytes/s. */ unsigned int max_rate; /* In bytes/s. */ unsigned int burst; /* In bytes. */ unsigned int priority; /* Lower values are higher priorities. */ }; static struct htb * htb_get__(const struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); return CONTAINER_OF(netdev->tc, struct htb, tc); } static void htb_install__(struct netdev *netdev_, uint64_t max_rate) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); struct htb *htb; htb = xmalloc(sizeof *htb); tc_init(&htb->tc, &tc_ops_htb); htb->max_rate = max_rate; netdev->tc = &htb->tc; } /* Create an HTB qdisc. * * Equivalent to "tc qdisc add dev root handle 1: htb default 1". */ static int htb_setup_qdisc__(struct netdev *netdev) { size_t opt_offset; struct tc_htb_glob opt; struct ofpbuf request; struct tcmsg *tcmsg; tc_del_qdisc(netdev); tcmsg = tc_make_request(netdev, RTM_NEWQDISC, NLM_F_EXCL | NLM_F_CREATE, &request); if (!tcmsg) { return ENODEV; } tcmsg->tcm_handle = tc_make_handle(1, 0); tcmsg->tcm_parent = TC_H_ROOT; nl_msg_put_string(&request, TCA_KIND, "htb"); memset(&opt, 0, sizeof opt); opt.rate2quantum = 10; opt.version = 3; opt.defcls = 1; opt_offset = nl_msg_start_nested(&request, TCA_OPTIONS); nl_msg_put_unspec(&request, TCA_HTB_INIT, &opt, sizeof opt); nl_msg_end_nested(&request, opt_offset); return tc_transact(&request, NULL); } /* Equivalent to "tc class replace classid parent htb * rate bps ceil bps burst b prio ". */ static int htb_setup_class__(struct netdev *netdev, unsigned int handle, unsigned int parent, struct htb_class *class) { size_t opt_offset; struct tc_htb_opt opt; struct ofpbuf request; struct tcmsg *tcmsg; int error; int mtu; error = netdev_linux_get_mtu__(netdev_linux_cast(netdev), &mtu); if (error) { VLOG_WARN_RL(&rl, "cannot set up HTB on device %s that lacks MTU", netdev_get_name(netdev)); return error; } memset(&opt, 0, sizeof opt); tc_fill_rate(&opt.rate, class->min_rate, mtu); tc_fill_rate(&opt.ceil, class->max_rate, mtu); opt.buffer = tc_calc_buffer(opt.rate.rate, mtu, class->burst); opt.cbuffer = tc_calc_buffer(opt.ceil.rate, mtu, class->burst); opt.prio = class->priority; tcmsg = tc_make_request(netdev, RTM_NEWTCLASS, NLM_F_CREATE, &request); if (!tcmsg) { return ENODEV; } tcmsg->tcm_handle = handle; tcmsg->tcm_parent = parent; nl_msg_put_string(&request, TCA_KIND, "htb"); opt_offset = nl_msg_start_nested(&request, TCA_OPTIONS); nl_msg_put_unspec(&request, TCA_HTB_PARMS, &opt, sizeof opt); tc_put_rtab(&request, TCA_HTB_RTAB, &opt.rate); tc_put_rtab(&request, TCA_HTB_CTAB, &opt.ceil); nl_msg_end_nested(&request, opt_offset); error = tc_transact(&request, NULL); if (error) { VLOG_WARN_RL(&rl, "failed to replace %s class %u:%u, parent %u:%u, " "min_rate=%u max_rate=%u burst=%u prio=%u (%s)", netdev_get_name(netdev), tc_get_major(handle), tc_get_minor(handle), tc_get_major(parent), tc_get_minor(parent), class->min_rate, class->max_rate, class->burst, class->priority, ovs_strerror(error)); } return error; } /* Parses Netlink attributes in 'options' for HTB parameters and stores a * description of them into 'details'. The description complies with the * specification given in the vswitch database documentation for linux-htb * queue details. */ static int htb_parse_tca_options__(struct nlattr *nl_options, struct htb_class *class) { static const struct nl_policy tca_htb_policy[] = { [TCA_HTB_PARMS] = { .type = NL_A_UNSPEC, .optional = false, .min_len = sizeof(struct tc_htb_opt) }, }; struct nlattr *attrs[ARRAY_SIZE(tca_htb_policy)]; const struct tc_htb_opt *htb; if (!nl_parse_nested(nl_options, tca_htb_policy, attrs, ARRAY_SIZE(tca_htb_policy))) { VLOG_WARN_RL(&rl, "failed to parse HTB class options"); return EPROTO; } htb = nl_attr_get(attrs[TCA_HTB_PARMS]); class->min_rate = htb->rate.rate; class->max_rate = htb->ceil.rate; class->burst = tc_ticks_to_bytes(htb->rate.rate, htb->buffer); class->priority = htb->prio; return 0; } static int htb_parse_tcmsg__(struct ofpbuf *tcmsg, unsigned int *queue_id, struct htb_class *options, struct netdev_queue_stats *stats) { struct nlattr *nl_options; unsigned int handle; int error; error = tc_parse_class(tcmsg, &handle, &nl_options, stats); if (!error && queue_id) { unsigned int major = tc_get_major(handle); unsigned int minor = tc_get_minor(handle); if (major == 1 && minor > 0 && minor <= HTB_N_QUEUES) { *queue_id = minor - 1; } else { error = EPROTO; } } if (!error && options) { error = htb_parse_tca_options__(nl_options, options); } return error; } static void htb_parse_qdisc_details__(struct netdev *netdev_, const struct smap *details, struct htb_class *hc) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); const char *max_rate_s; max_rate_s = smap_get(details, "max-rate"); hc->max_rate = max_rate_s ? strtoull(max_rate_s, NULL, 10) / 8 : 0; if (!hc->max_rate) { enum netdev_features current; netdev_linux_read_features(netdev); current = !netdev->get_features_error ? netdev->current : 0; hc->max_rate = netdev_features_to_bps(current, 100 * 1000 * 1000) / 8; } hc->min_rate = hc->max_rate; hc->burst = 0; hc->priority = 0; } static int htb_parse_class_details__(struct netdev *netdev, const struct smap *details, struct htb_class *hc) { const struct htb *htb = htb_get__(netdev); const char *min_rate_s = smap_get(details, "min-rate"); const char *max_rate_s = smap_get(details, "max-rate"); const char *burst_s = smap_get(details, "burst"); const char *priority_s = smap_get(details, "priority"); int mtu, error; error = netdev_linux_get_mtu__(netdev_linux_cast(netdev), &mtu); if (error) { VLOG_WARN_RL(&rl, "cannot parse HTB class on device %s that lacks MTU", netdev_get_name(netdev)); return error; } /* HTB requires at least an mtu sized min-rate to send any traffic even * on uncongested links. */ hc->min_rate = min_rate_s ? strtoull(min_rate_s, NULL, 10) / 8 : 0; hc->min_rate = MAX(hc->min_rate, mtu); hc->min_rate = MIN(hc->min_rate, htb->max_rate); /* max-rate */ hc->max_rate = (max_rate_s ? strtoull(max_rate_s, NULL, 10) / 8 : htb->max_rate); hc->max_rate = MAX(hc->max_rate, hc->min_rate); hc->max_rate = MIN(hc->max_rate, htb->max_rate); /* burst * * According to hints in the documentation that I've read, it is important * that 'burst' be at least as big as the largest frame that might be * transmitted. Also, making 'burst' a bit bigger than necessary is OK, * but having it a bit too small is a problem. Since netdev_get_mtu() * doesn't include the Ethernet header, we need to add at least 14 (18?) to * the MTU. We actually add 64, instead of 14, as a guard against * additional headers get tacked on somewhere that we're not aware of. */ hc->burst = burst_s ? strtoull(burst_s, NULL, 10) / 8 : 0; hc->burst = MAX(hc->burst, mtu + 64); /* priority */ hc->priority = priority_s ? strtoul(priority_s, NULL, 10) : 0; return 0; } static int htb_query_class__(const struct netdev *netdev, unsigned int handle, unsigned int parent, struct htb_class *options, struct netdev_queue_stats *stats) { struct ofpbuf *reply; int error; error = tc_query_class(netdev, handle, parent, &reply); if (!error) { error = htb_parse_tcmsg__(reply, NULL, options, stats); ofpbuf_delete(reply); } return error; } static int htb_tc_install(struct netdev *netdev, const struct smap *details) { int error; error = htb_setup_qdisc__(netdev); if (!error) { struct htb_class hc; htb_parse_qdisc_details__(netdev, details, &hc); error = htb_setup_class__(netdev, tc_make_handle(1, 0xfffe), tc_make_handle(1, 0), &hc); if (!error) { htb_install__(netdev, hc.max_rate); } } return error; } static struct htb_class * htb_class_cast__(const struct tc_queue *queue) { return CONTAINER_OF(queue, struct htb_class, tc_queue); } static void htb_update_queue__(struct netdev *netdev, unsigned int queue_id, const struct htb_class *hc) { struct htb *htb = htb_get__(netdev); size_t hash = hash_int(queue_id, 0); struct tc_queue *queue; struct htb_class *hcp; queue = tc_find_queue__(netdev, queue_id, hash); if (queue) { hcp = htb_class_cast__(queue); } else { hcp = xmalloc(sizeof *hcp); queue = &hcp->tc_queue; queue->queue_id = queue_id; queue->created = time_msec(); hmap_insert(&htb->tc.queues, &queue->hmap_node, hash); } hcp->min_rate = hc->min_rate; hcp->max_rate = hc->max_rate; hcp->burst = hc->burst; hcp->priority = hc->priority; } static int htb_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg OVS_UNUSED) { struct ofpbuf msg; struct nl_dump dump; struct htb_class hc; /* Get qdisc options. */ hc.max_rate = 0; htb_query_class__(netdev, tc_make_handle(1, 0xfffe), 0, &hc, NULL); htb_install__(netdev, hc.max_rate); /* Get queues. */ if (!start_queue_dump(netdev, &dump)) { return ENODEV; } while (nl_dump_next(&dump, &msg)) { unsigned int queue_id; if (!htb_parse_tcmsg__(&msg, &queue_id, &hc, NULL)) { htb_update_queue__(netdev, queue_id, &hc); } } nl_dump_done(&dump); return 0; } static void htb_tc_destroy(struct tc *tc) { struct htb *htb = CONTAINER_OF(tc, struct htb, tc); struct htb_class *hc, *next; HMAP_FOR_EACH_SAFE (hc, next, tc_queue.hmap_node, &htb->tc.queues) { hmap_remove(&htb->tc.queues, &hc->tc_queue.hmap_node); free(hc); } tc_destroy(tc); free(htb); } static int htb_qdisc_get(const struct netdev *netdev, struct smap *details) { const struct htb *htb = htb_get__(netdev); smap_add_format(details, "max-rate", "%llu", 8ULL * htb->max_rate); return 0; } static int htb_qdisc_set(struct netdev *netdev, const struct smap *details) { struct htb_class hc; int error; htb_parse_qdisc_details__(netdev, details, &hc); error = htb_setup_class__(netdev, tc_make_handle(1, 0xfffe), tc_make_handle(1, 0), &hc); if (!error) { htb_get__(netdev)->max_rate = hc.max_rate; } return error; } static int htb_class_get(const struct netdev *netdev OVS_UNUSED, const struct tc_queue *queue, struct smap *details) { const struct htb_class *hc = htb_class_cast__(queue); smap_add_format(details, "min-rate", "%llu", 8ULL * hc->min_rate); if (hc->min_rate != hc->max_rate) { smap_add_format(details, "max-rate", "%llu", 8ULL * hc->max_rate); } smap_add_format(details, "burst", "%llu", 8ULL * hc->burst); if (hc->priority) { smap_add_format(details, "priority", "%u", hc->priority); } return 0; } static int htb_class_set(struct netdev *netdev, unsigned int queue_id, const struct smap *details) { struct htb_class hc; int error; error = htb_parse_class_details__(netdev, details, &hc); if (error) { return error; } error = htb_setup_class__(netdev, tc_make_handle(1, queue_id + 1), tc_make_handle(1, 0xfffe), &hc); if (error) { return error; } htb_update_queue__(netdev, queue_id, &hc); return 0; } static int htb_class_delete(struct netdev *netdev, struct tc_queue *queue) { struct htb_class *hc = htb_class_cast__(queue); struct htb *htb = htb_get__(netdev); int error; error = tc_delete_class(netdev, tc_make_handle(1, queue->queue_id + 1)); if (!error) { hmap_remove(&htb->tc.queues, &hc->tc_queue.hmap_node); free(hc); } return error; } static int htb_class_get_stats(const struct netdev *netdev, const struct tc_queue *queue, struct netdev_queue_stats *stats) { return htb_query_class__(netdev, tc_make_handle(1, queue->queue_id + 1), tc_make_handle(1, 0xfffe), NULL, stats); } static int htb_class_dump_stats(const struct netdev *netdev OVS_UNUSED, const struct ofpbuf *nlmsg, netdev_dump_queue_stats_cb *cb, void *aux) { struct netdev_queue_stats stats; unsigned int handle, major, minor; int error; error = tc_parse_class(nlmsg, &handle, NULL, &stats); if (error) { return error; } major = tc_get_major(handle); minor = tc_get_minor(handle); if (major == 1 && minor > 0 && minor <= HTB_N_QUEUES) { (*cb)(minor - 1, &stats, aux); } return 0; } static const struct tc_ops tc_ops_htb = { "htb", /* linux_name */ "linux-htb", /* ovs_name */ HTB_N_QUEUES, /* n_queues */ htb_tc_install, htb_tc_load, htb_tc_destroy, htb_qdisc_get, htb_qdisc_set, htb_class_get, htb_class_set, htb_class_delete, htb_class_get_stats, htb_class_dump_stats }; /* "linux-hfsc" traffic control class. */ #define HFSC_N_QUEUES 0xf000 struct hfsc { struct tc tc; uint32_t max_rate; }; struct hfsc_class { struct tc_queue tc_queue; uint32_t min_rate; uint32_t max_rate; }; static struct hfsc * hfsc_get__(const struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); return CONTAINER_OF(netdev->tc, struct hfsc, tc); } static struct hfsc_class * hfsc_class_cast__(const struct tc_queue *queue) { return CONTAINER_OF(queue, struct hfsc_class, tc_queue); } static void hfsc_install__(struct netdev *netdev_, uint32_t max_rate) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); struct hfsc *hfsc; hfsc = xmalloc(sizeof *hfsc); tc_init(&hfsc->tc, &tc_ops_hfsc); hfsc->max_rate = max_rate; netdev->tc = &hfsc->tc; } static void hfsc_update_queue__(struct netdev *netdev, unsigned int queue_id, const struct hfsc_class *hc) { size_t hash; struct hfsc *hfsc; struct hfsc_class *hcp; struct tc_queue *queue; hfsc = hfsc_get__(netdev); hash = hash_int(queue_id, 0); queue = tc_find_queue__(netdev, queue_id, hash); if (queue) { hcp = hfsc_class_cast__(queue); } else { hcp = xmalloc(sizeof *hcp); queue = &hcp->tc_queue; queue->queue_id = queue_id; queue->created = time_msec(); hmap_insert(&hfsc->tc.queues, &queue->hmap_node, hash); } hcp->min_rate = hc->min_rate; hcp->max_rate = hc->max_rate; } static int hfsc_parse_tca_options__(struct nlattr *nl_options, struct hfsc_class *class) { const struct tc_service_curve *rsc, *fsc, *usc; static const struct nl_policy tca_hfsc_policy[] = { [TCA_HFSC_RSC] = { .type = NL_A_UNSPEC, .optional = false, .min_len = sizeof(struct tc_service_curve), }, [TCA_HFSC_FSC] = { .type = NL_A_UNSPEC, .optional = false, .min_len = sizeof(struct tc_service_curve), }, [TCA_HFSC_USC] = { .type = NL_A_UNSPEC, .optional = false, .min_len = sizeof(struct tc_service_curve), }, }; struct nlattr *attrs[ARRAY_SIZE(tca_hfsc_policy)]; if (!nl_parse_nested(nl_options, tca_hfsc_policy, attrs, ARRAY_SIZE(tca_hfsc_policy))) { VLOG_WARN_RL(&rl, "failed to parse HFSC class options"); return EPROTO; } rsc = nl_attr_get(attrs[TCA_HFSC_RSC]); fsc = nl_attr_get(attrs[TCA_HFSC_FSC]); usc = nl_attr_get(attrs[TCA_HFSC_USC]); if (rsc->m1 != 0 || rsc->d != 0 || fsc->m1 != 0 || fsc->d != 0 || usc->m1 != 0 || usc->d != 0) { VLOG_WARN_RL(&rl, "failed to parse HFSC class options. " "Non-linear service curves are not supported."); return EPROTO; } if (rsc->m2 != fsc->m2) { VLOG_WARN_RL(&rl, "failed to parse HFSC class options. " "Real-time service curves are not supported "); return EPROTO; } if (rsc->m2 > usc->m2) { VLOG_WARN_RL(&rl, "failed to parse HFSC class options. " "Min-rate service curve is greater than " "the max-rate service curve."); return EPROTO; } class->min_rate = fsc->m2; class->max_rate = usc->m2; return 0; } static int hfsc_parse_tcmsg__(struct ofpbuf *tcmsg, unsigned int *queue_id, struct hfsc_class *options, struct netdev_queue_stats *stats) { int error; unsigned int handle; struct nlattr *nl_options; error = tc_parse_class(tcmsg, &handle, &nl_options, stats); if (error) { return error; } if (queue_id) { unsigned int major, minor; major = tc_get_major(handle); minor = tc_get_minor(handle); if (major == 1 && minor > 0 && minor <= HFSC_N_QUEUES) { *queue_id = minor - 1; } else { return EPROTO; } } if (options) { error = hfsc_parse_tca_options__(nl_options, options); } return error; } static int hfsc_query_class__(const struct netdev *netdev, unsigned int handle, unsigned int parent, struct hfsc_class *options, struct netdev_queue_stats *stats) { int error; struct ofpbuf *reply; error = tc_query_class(netdev, handle, parent, &reply); if (error) { return error; } error = hfsc_parse_tcmsg__(reply, NULL, options, stats); ofpbuf_delete(reply); return error; } static void hfsc_parse_qdisc_details__(struct netdev *netdev_, const struct smap *details, struct hfsc_class *class) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); uint32_t max_rate; const char *max_rate_s; max_rate_s = smap_get(details, "max-rate"); max_rate = max_rate_s ? strtoull(max_rate_s, NULL, 10) / 8 : 0; if (!max_rate) { enum netdev_features current; netdev_linux_read_features(netdev); current = !netdev->get_features_error ? netdev->current : 0; max_rate = netdev_features_to_bps(current, 100 * 1000 * 1000) / 8; } class->min_rate = max_rate; class->max_rate = max_rate; } static int hfsc_parse_class_details__(struct netdev *netdev, const struct smap *details, struct hfsc_class * class) { const struct hfsc *hfsc; uint32_t min_rate, max_rate; const char *min_rate_s, *max_rate_s; hfsc = hfsc_get__(netdev); min_rate_s = smap_get(details, "min-rate"); max_rate_s = smap_get(details, "max-rate"); min_rate = min_rate_s ? strtoull(min_rate_s, NULL, 10) / 8 : 0; min_rate = MAX(min_rate, 1); min_rate = MIN(min_rate, hfsc->max_rate); max_rate = (max_rate_s ? strtoull(max_rate_s, NULL, 10) / 8 : hfsc->max_rate); max_rate = MAX(max_rate, min_rate); max_rate = MIN(max_rate, hfsc->max_rate); class->min_rate = min_rate; class->max_rate = max_rate; return 0; } /* Create an HFSC qdisc. * * Equivalent to "tc qdisc add dev root handle 1: hfsc default 1". */ static int hfsc_setup_qdisc__(struct netdev * netdev) { struct tcmsg *tcmsg; struct ofpbuf request; struct tc_hfsc_qopt opt; tc_del_qdisc(netdev); tcmsg = tc_make_request(netdev, RTM_NEWQDISC, NLM_F_EXCL | NLM_F_CREATE, &request); if (!tcmsg) { return ENODEV; } tcmsg->tcm_handle = tc_make_handle(1, 0); tcmsg->tcm_parent = TC_H_ROOT; memset(&opt, 0, sizeof opt); opt.defcls = 1; nl_msg_put_string(&request, TCA_KIND, "hfsc"); nl_msg_put_unspec(&request, TCA_OPTIONS, &opt, sizeof opt); return tc_transact(&request, NULL); } /* Create an HFSC class. * * Equivalent to "tc class add parent classid hfsc * sc rate ul rate " */ static int hfsc_setup_class__(struct netdev *netdev, unsigned int handle, unsigned int parent, struct hfsc_class *class) { int error; size_t opt_offset; struct tcmsg *tcmsg; struct ofpbuf request; struct tc_service_curve min, max; tcmsg = tc_make_request(netdev, RTM_NEWTCLASS, NLM_F_CREATE, &request); if (!tcmsg) { return ENODEV; } tcmsg->tcm_handle = handle; tcmsg->tcm_parent = parent; min.m1 = 0; min.d = 0; min.m2 = class->min_rate; max.m1 = 0; max.d = 0; max.m2 = class->max_rate; nl_msg_put_string(&request, TCA_KIND, "hfsc"); opt_offset = nl_msg_start_nested(&request, TCA_OPTIONS); nl_msg_put_unspec(&request, TCA_HFSC_RSC, &min, sizeof min); nl_msg_put_unspec(&request, TCA_HFSC_FSC, &min, sizeof min); nl_msg_put_unspec(&request, TCA_HFSC_USC, &max, sizeof max); nl_msg_end_nested(&request, opt_offset); error = tc_transact(&request, NULL); if (error) { VLOG_WARN_RL(&rl, "failed to replace %s class %u:%u, parent %u:%u, " "min-rate %ubps, max-rate %ubps (%s)", netdev_get_name(netdev), tc_get_major(handle), tc_get_minor(handle), tc_get_major(parent), tc_get_minor(parent), class->min_rate, class->max_rate, ovs_strerror(error)); } return error; } static int hfsc_tc_install(struct netdev *netdev, const struct smap *details) { int error; struct hfsc_class class; error = hfsc_setup_qdisc__(netdev); if (error) { return error; } hfsc_parse_qdisc_details__(netdev, details, &class); error = hfsc_setup_class__(netdev, tc_make_handle(1, 0xfffe), tc_make_handle(1, 0), &class); if (error) { return error; } hfsc_install__(netdev, class.max_rate); return 0; } static int hfsc_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg OVS_UNUSED) { struct ofpbuf msg; struct nl_dump dump; struct hfsc_class hc; hc.max_rate = 0; hfsc_query_class__(netdev, tc_make_handle(1, 0xfffe), 0, &hc, NULL); hfsc_install__(netdev, hc.max_rate); if (!start_queue_dump(netdev, &dump)) { return ENODEV; } while (nl_dump_next(&dump, &msg)) { unsigned int queue_id; if (!hfsc_parse_tcmsg__(&msg, &queue_id, &hc, NULL)) { hfsc_update_queue__(netdev, queue_id, &hc); } } nl_dump_done(&dump); return 0; } static void hfsc_tc_destroy(struct tc *tc) { struct hfsc *hfsc; struct hfsc_class *hc, *next; hfsc = CONTAINER_OF(tc, struct hfsc, tc); HMAP_FOR_EACH_SAFE (hc, next, tc_queue.hmap_node, &hfsc->tc.queues) { hmap_remove(&hfsc->tc.queues, &hc->tc_queue.hmap_node); free(hc); } tc_destroy(tc); free(hfsc); } static int hfsc_qdisc_get(const struct netdev *netdev, struct smap *details) { const struct hfsc *hfsc; hfsc = hfsc_get__(netdev); smap_add_format(details, "max-rate", "%llu", 8ULL * hfsc->max_rate); return 0; } static int hfsc_qdisc_set(struct netdev *netdev, const struct smap *details) { int error; struct hfsc_class class; hfsc_parse_qdisc_details__(netdev, details, &class); error = hfsc_setup_class__(netdev, tc_make_handle(1, 0xfffe), tc_make_handle(1, 0), &class); if (!error) { hfsc_get__(netdev)->max_rate = class.max_rate; } return error; } static int hfsc_class_get(const struct netdev *netdev OVS_UNUSED, const struct tc_queue *queue, struct smap *details) { const struct hfsc_class *hc; hc = hfsc_class_cast__(queue); smap_add_format(details, "min-rate", "%llu", 8ULL * hc->min_rate); if (hc->min_rate != hc->max_rate) { smap_add_format(details, "max-rate", "%llu", 8ULL * hc->max_rate); } return 0; } static int hfsc_class_set(struct netdev *netdev, unsigned int queue_id, const struct smap *details) { int error; struct hfsc_class class; error = hfsc_parse_class_details__(netdev, details, &class); if (error) { return error; } error = hfsc_setup_class__(netdev, tc_make_handle(1, queue_id + 1), tc_make_handle(1, 0xfffe), &class); if (error) { return error; } hfsc_update_queue__(netdev, queue_id, &class); return 0; } static int hfsc_class_delete(struct netdev *netdev, struct tc_queue *queue) { int error; struct hfsc *hfsc; struct hfsc_class *hc; hc = hfsc_class_cast__(queue); hfsc = hfsc_get__(netdev); error = tc_delete_class(netdev, tc_make_handle(1, queue->queue_id + 1)); if (!error) { hmap_remove(&hfsc->tc.queues, &hc->tc_queue.hmap_node); free(hc); } return error; } static int hfsc_class_get_stats(const struct netdev *netdev, const struct tc_queue *queue, struct netdev_queue_stats *stats) { return hfsc_query_class__(netdev, tc_make_handle(1, queue->queue_id + 1), tc_make_handle(1, 0xfffe), NULL, stats); } static int hfsc_class_dump_stats(const struct netdev *netdev OVS_UNUSED, const struct ofpbuf *nlmsg, netdev_dump_queue_stats_cb *cb, void *aux) { struct netdev_queue_stats stats; unsigned int handle, major, minor; int error; error = tc_parse_class(nlmsg, &handle, NULL, &stats); if (error) { return error; } major = tc_get_major(handle); minor = tc_get_minor(handle); if (major == 1 && minor > 0 && minor <= HFSC_N_QUEUES) { (*cb)(minor - 1, &stats, aux); } return 0; } static const struct tc_ops tc_ops_hfsc = { "hfsc", /* linux_name */ "linux-hfsc", /* ovs_name */ HFSC_N_QUEUES, /* n_queues */ hfsc_tc_install, /* tc_install */ hfsc_tc_load, /* tc_load */ hfsc_tc_destroy, /* tc_destroy */ hfsc_qdisc_get, /* qdisc_get */ hfsc_qdisc_set, /* qdisc_set */ hfsc_class_get, /* class_get */ hfsc_class_set, /* class_set */ hfsc_class_delete, /* class_delete */ hfsc_class_get_stats, /* class_get_stats */ hfsc_class_dump_stats /* class_dump_stats */ }; /* "linux-default" traffic control class. * * This class represents the default, unnamed Linux qdisc. It corresponds to * the "" (empty string) QoS type in the OVS database. */ static void default_install__(struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); static const struct tc tc = TC_INITIALIZER(&tc, &tc_ops_default); /* Nothing but a tc class implementation is allowed to write to a tc. This * class never does that, so we can legitimately use a const tc object. */ netdev->tc = CONST_CAST(struct tc *, &tc); } static int default_tc_install(struct netdev *netdev, const struct smap *details OVS_UNUSED) { default_install__(netdev); return 0; } static int default_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg OVS_UNUSED) { default_install__(netdev); return 0; } static const struct tc_ops tc_ops_default = { NULL, /* linux_name */ "", /* ovs_name */ 0, /* n_queues */ default_tc_install, default_tc_load, NULL, /* tc_destroy */ NULL, /* qdisc_get */ NULL, /* qdisc_set */ NULL, /* class_get */ NULL, /* class_set */ NULL, /* class_delete */ NULL, /* class_get_stats */ NULL /* class_dump_stats */ }; /* "linux-other" traffic control class. * * */ static int other_tc_load(struct netdev *netdev_, struct ofpbuf *nlmsg OVS_UNUSED) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); static const struct tc tc = TC_INITIALIZER(&tc, &tc_ops_other); /* Nothing but a tc class implementation is allowed to write to a tc. This * class never does that, so we can legitimately use a const tc object. */ netdev->tc = CONST_CAST(struct tc *, &tc); return 0; } static const struct tc_ops tc_ops_other = { NULL, /* linux_name */ "linux-other", /* ovs_name */ 0, /* n_queues */ NULL, /* tc_install */ other_tc_load, NULL, /* tc_destroy */ NULL, /* qdisc_get */ NULL, /* qdisc_set */ NULL, /* class_get */ NULL, /* class_set */ NULL, /* class_delete */ NULL, /* class_get_stats */ NULL /* class_dump_stats */ }; /* Traffic control. */ /* Number of kernel "tc" ticks per second. */ static double ticks_per_s; /* Number of kernel "jiffies" per second. This is used for the purpose of * computing buffer sizes. Generally kernel qdiscs need to be able to buffer * one jiffy's worth of data. * * There are two possibilities here: * * - 'buffer_hz' is the kernel's real timer tick rate, a small number in the * approximate range of 100 to 1024. That means that we really need to * make sure that the qdisc can buffer that much data. * * - 'buffer_hz' is an absurdly large number. That means that the kernel * has finely granular timers and there's no need to fudge additional room * for buffers. (There's no extra effort needed to implement that: the * large 'buffer_hz' is used as a divisor, so practically any number will * come out as 0 in the division. Small integer results in the case of * really high dividends won't have any real effect anyhow.) */ static unsigned int buffer_hz; /* Returns tc handle 'major':'minor'. */ static unsigned int tc_make_handle(unsigned int major, unsigned int minor) { return TC_H_MAKE(major << 16, minor); } /* Returns the major number from 'handle'. */ static unsigned int tc_get_major(unsigned int handle) { return TC_H_MAJ(handle) >> 16; } /* Returns the minor number from 'handle'. */ static unsigned int tc_get_minor(unsigned int handle) { return TC_H_MIN(handle); } static struct tcmsg * tc_make_request(const struct netdev *netdev, int type, unsigned int flags, struct ofpbuf *request) { struct tcmsg *tcmsg; int ifindex; int error; error = get_ifindex(netdev, &ifindex); if (error) { return NULL; } ofpbuf_init(request, 512); nl_msg_put_nlmsghdr(request, sizeof *tcmsg, type, NLM_F_REQUEST | flags); tcmsg = ofpbuf_put_zeros(request, sizeof *tcmsg); tcmsg->tcm_family = AF_UNSPEC; tcmsg->tcm_ifindex = ifindex; /* Caller should fill in tcmsg->tcm_handle. */ /* Caller should fill in tcmsg->tcm_parent. */ return tcmsg; } static int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp) { int error = nl_transact(NETLINK_ROUTE, request, replyp); ofpbuf_uninit(request); return error; } /* Adds or deletes a root ingress qdisc on 'netdev'. We use this for * policing configuration. * * This function is equivalent to running the following when 'add' is true: * /sbin/tc qdisc add dev handle ffff: ingress * * This function is equivalent to running the following when 'add' is false: * /sbin/tc qdisc del dev handle ffff: ingress * * The configuration and stats may be seen with the following command: * /sbin/tc -s qdisc show dev * * Returns 0 if successful, otherwise a positive errno value. */ static int tc_add_del_ingress_qdisc(struct netdev *netdev, bool add) { struct ofpbuf request; struct tcmsg *tcmsg; int error; int type = add ? RTM_NEWQDISC : RTM_DELQDISC; int flags = add ? NLM_F_EXCL | NLM_F_CREATE : 0; tcmsg = tc_make_request(netdev, type, flags, &request); if (!tcmsg) { return ENODEV; } tcmsg->tcm_handle = tc_make_handle(0xffff, 0); tcmsg->tcm_parent = TC_H_INGRESS; nl_msg_put_string(&request, TCA_KIND, "ingress"); nl_msg_put_unspec(&request, TCA_OPTIONS, NULL, 0); error = tc_transact(&request, NULL); if (error) { /* If we're deleting the qdisc, don't worry about some of the * error conditions. */ if (!add && (error == ENOENT || error == EINVAL)) { return 0; } return error; } return 0; } /* Adds a policer to 'netdev' with a rate of 'kbits_rate' and a burst size * of 'kbits_burst'. * * This function is equivalent to running: * /sbin/tc filter add dev parent ffff: protocol all prio 49 * basic police rate kbit burst k * mtu 65535 drop * * The configuration and stats may be seen with the following command: * /sbin/tc -s filter show eth0 parent ffff: * * Returns 0 if successful, otherwise a positive errno value. */ static int tc_add_policer(struct netdev *netdev, int kbits_rate, int kbits_burst) { struct tc_police tc_police; struct ofpbuf request; struct tcmsg *tcmsg; size_t basic_offset; size_t police_offset; int error; int mtu = 65535; memset(&tc_police, 0, sizeof tc_police); tc_police.action = TC_POLICE_SHOT; tc_police.mtu = mtu; tc_fill_rate(&tc_police.rate, (kbits_rate * 1000)/8, mtu); tc_police.burst = tc_bytes_to_ticks(tc_police.rate.rate, kbits_burst * 1024); tcmsg = tc_make_request(netdev, RTM_NEWTFILTER, NLM_F_EXCL | NLM_F_CREATE, &request); if (!tcmsg) { return ENODEV; } tcmsg->tcm_parent = tc_make_handle(0xffff, 0); tcmsg->tcm_info = tc_make_handle(49, (OVS_FORCE uint16_t) htons(ETH_P_ALL)); nl_msg_put_string(&request, TCA_KIND, "basic"); basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS); police_offset = nl_msg_start_nested(&request, TCA_BASIC_POLICE); nl_msg_put_unspec(&request, TCA_POLICE_TBF, &tc_police, sizeof tc_police); tc_put_rtab(&request, TCA_POLICE_RATE, &tc_police.rate); nl_msg_end_nested(&request, police_offset); nl_msg_end_nested(&request, basic_offset); error = tc_transact(&request, NULL); if (error) { return error; } return 0; } static void read_psched(void) { /* The values in psched are not individually very meaningful, but they are * important. The tables below show some values seen in the wild. * * Some notes: * * - "c" has always been a constant 1000000 since at least Linux 2.4.14. * (Before that, there are hints that it was 1000000000.) * * - "d" can be unrealistically large, see the comment on 'buffer_hz' * above. * * /proc/net/psched * ----------------------------------- * [1] 000c8000 000f4240 000f4240 00000064 * [2] 000003e8 00000400 000f4240 3b9aca00 * [3] 000003e8 00000400 000f4240 3b9aca00 * [4] 000003e8 00000400 000f4240 00000064 * [5] 000003e8 00000040 000f4240 3b9aca00 * [6] 000003e8 00000040 000f4240 000000f9 * * a b c d ticks_per_s buffer_hz * ------- --------- ---------- ------------- ----------- ------------- * [1] 819,200 1,000,000 1,000,000 100 819,200 100 * [2] 1,000 1,024 1,000,000 1,000,000,000 976,562 1,000,000,000 * [3] 1,000 1,024 1,000,000 1,000,000,000 976,562 1,000,000,000 * [4] 1,000 1,024 1,000,000 100 976,562 100 * [5] 1,000 64 1,000,000 1,000,000,000 15,625,000 1,000,000,000 * [6] 1,000 64 1,000,000 249 15,625,000 249 * * [1] 2.6.18-128.1.6.el5.xs5.5.0.505.1024xen from XenServer 5.5.0-24648p * [2] 2.6.26-1-686-bigmem from Debian lenny * [3] 2.6.26-2-sparc64 from Debian lenny * [4] 2.6.27.42-0.1.1.xs5.6.810.44.111163xen from XenServer 5.6.810-31078p * [5] 2.6.32.21.22 (approx.) from Ubuntu 10.04 on VMware Fusion * [6] 2.6.34 from kernel.org on KVM */ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; static const char fn[] = "/proc/net/psched"; unsigned int a, b, c, d; FILE *stream; if (!ovsthread_once_start(&once)) { return; } ticks_per_s = 1.0; buffer_hz = 100; stream = fopen(fn, "r"); if (!stream) { VLOG_WARN("%s: open failed: %s", fn, ovs_strerror(errno)); goto exit; } if (fscanf(stream, "%x %x %x %x", &a, &b, &c, &d) != 4) { VLOG_WARN("%s: read failed", fn); fclose(stream); goto exit; } VLOG_DBG("%s: psched parameters are: %u %u %u %u", fn, a, b, c, d); fclose(stream); if (!a || !c) { VLOG_WARN("%s: invalid scheduler parameters", fn); goto exit; } ticks_per_s = (double) a * c / b; if (c == 1000000) { buffer_hz = d; } else { VLOG_WARN("%s: unexpected psched parameters: %u %u %u %u", fn, a, b, c, d); } VLOG_DBG("%s: ticks_per_s=%f buffer_hz=%u", fn, ticks_per_s, buffer_hz); exit: ovsthread_once_done(&once); } /* Returns the number of bytes that can be transmitted in 'ticks' ticks at a * rate of 'rate' bytes per second. */ static unsigned int tc_ticks_to_bytes(unsigned int rate, unsigned int ticks) { read_psched(); return (rate * ticks) / ticks_per_s; } /* Returns the number of ticks that it would take to transmit 'size' bytes at a * rate of 'rate' bytes per second. */ static unsigned int tc_bytes_to_ticks(unsigned int rate, unsigned int size) { read_psched(); return rate ? ((unsigned long long int) ticks_per_s * size) / rate : 0; } /* Returns the number of bytes that need to be reserved for qdisc buffering at * a transmission rate of 'rate' bytes per second. */ static unsigned int tc_buffer_per_jiffy(unsigned int rate) { read_psched(); return rate / buffer_hz; } /* Given Netlink 'msg' that describes a qdisc, extracts the name of the qdisc, * e.g. "htb", into '*kind' (if it is nonnull). If 'options' is nonnull, * extracts 'msg''s TCA_OPTIONS attributes into '*options' if it is present or * stores NULL into it if it is absent. * * '*kind' and '*options' point into 'msg', so they are owned by whoever owns * 'msg'. * * Returns 0 if successful, otherwise a positive errno value. */ static int tc_parse_qdisc(const struct ofpbuf *msg, const char **kind, struct nlattr **options) { static const struct nl_policy tca_policy[] = { [TCA_KIND] = { .type = NL_A_STRING, .optional = false }, [TCA_OPTIONS] = { .type = NL_A_NESTED, .optional = true }, }; struct nlattr *ta[ARRAY_SIZE(tca_policy)]; if (!nl_policy_parse(msg, NLMSG_HDRLEN + sizeof(struct tcmsg), tca_policy, ta, ARRAY_SIZE(ta))) { VLOG_WARN_RL(&rl, "failed to parse qdisc message"); goto error; } if (kind) { *kind = nl_attr_get_string(ta[TCA_KIND]); } if (options) { *options = ta[TCA_OPTIONS]; } return 0; error: if (kind) { *kind = NULL; } if (options) { *options = NULL; } return EPROTO; } /* Given Netlink 'msg' that describes a class, extracts the queue ID (e.g. the * minor number of its class ID) into '*queue_id', its TCA_OPTIONS attribute * into '*options', and its queue statistics into '*stats'. Any of the output * arguments may be null. * * Returns 0 if successful, otherwise a positive errno value. */ static int tc_parse_class(const struct ofpbuf *msg, unsigned int *handlep, struct nlattr **options, struct netdev_queue_stats *stats) { static const struct nl_policy tca_policy[] = { [TCA_OPTIONS] = { .type = NL_A_NESTED, .optional = false }, [TCA_STATS2] = { .type = NL_A_NESTED, .optional = false }, }; struct nlattr *ta[ARRAY_SIZE(tca_policy)]; if (!nl_policy_parse(msg, NLMSG_HDRLEN + sizeof(struct tcmsg), tca_policy, ta, ARRAY_SIZE(ta))) { VLOG_WARN_RL(&rl, "failed to parse class message"); goto error; } if (handlep) { struct tcmsg *tc = ofpbuf_at_assert(msg, NLMSG_HDRLEN, sizeof *tc); *handlep = tc->tcm_handle; } if (options) { *options = ta[TCA_OPTIONS]; } if (stats) { const struct gnet_stats_queue *gsq; struct gnet_stats_basic gsb; static const struct nl_policy stats_policy[] = { [TCA_STATS_BASIC] = { .type = NL_A_UNSPEC, .optional = false, .min_len = sizeof gsb }, [TCA_STATS_QUEUE] = { .type = NL_A_UNSPEC, .optional = false, .min_len = sizeof *gsq }, }; struct nlattr *sa[ARRAY_SIZE(stats_policy)]; if (!nl_parse_nested(ta[TCA_STATS2], stats_policy, sa, ARRAY_SIZE(sa))) { VLOG_WARN_RL(&rl, "failed to parse class stats"); goto error; } /* Alignment issues screw up the length of struct gnet_stats_basic on * some arch/bitsize combinations. Newer versions of Linux have a * struct gnet_stats_basic_packed, but we can't depend on that. The * easiest thing to do is just to make a copy. */ memset(&gsb, 0, sizeof gsb); memcpy(&gsb, nl_attr_get(sa[TCA_STATS_BASIC]), MIN(nl_attr_get_size(sa[TCA_STATS_BASIC]), sizeof gsb)); stats->tx_bytes = gsb.bytes; stats->tx_packets = gsb.packets; gsq = nl_attr_get(sa[TCA_STATS_QUEUE]); stats->tx_errors = gsq->drops; } return 0; error: if (options) { *options = NULL; } if (stats) { memset(stats, 0, sizeof *stats); } return EPROTO; } /* Queries the kernel for class with identifier 'handle' and parent 'parent' * on 'netdev'. */ static int tc_query_class(const struct netdev *netdev, unsigned int handle, unsigned int parent, struct ofpbuf **replyp) { struct ofpbuf request; struct tcmsg *tcmsg; int error; tcmsg = tc_make_request(netdev, RTM_GETTCLASS, NLM_F_ECHO, &request); if (!tcmsg) { return ENODEV; } tcmsg->tcm_handle = handle; tcmsg->tcm_parent = parent; error = tc_transact(&request, replyp); if (error) { VLOG_WARN_RL(&rl, "query %s class %u:%u (parent %u:%u) failed (%s)", netdev_get_name(netdev), tc_get_major(handle), tc_get_minor(handle), tc_get_major(parent), tc_get_minor(parent), ovs_strerror(error)); } return error; } /* Equivalent to "tc class del dev handle ". */ static int tc_delete_class(const struct netdev *netdev, unsigned int handle) { struct ofpbuf request; struct tcmsg *tcmsg; int error; tcmsg = tc_make_request(netdev, RTM_DELTCLASS, 0, &request); if (!tcmsg) { return ENODEV; } tcmsg->tcm_handle = handle; tcmsg->tcm_parent = 0; error = tc_transact(&request, NULL); if (error) { VLOG_WARN_RL(&rl, "delete %s class %u:%u failed (%s)", netdev_get_name(netdev), tc_get_major(handle), tc_get_minor(handle), ovs_strerror(error)); } return error; } /* Equivalent to "tc qdisc del dev root". */ static int tc_del_qdisc(struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); struct ofpbuf request; struct tcmsg *tcmsg; int error; tcmsg = tc_make_request(netdev_, RTM_DELQDISC, 0, &request); if (!tcmsg) { return ENODEV; } tcmsg->tcm_handle = tc_make_handle(1, 0); tcmsg->tcm_parent = TC_H_ROOT; error = tc_transact(&request, NULL); if (error == EINVAL) { /* EINVAL probably means that the default qdisc was in use, in which * case we've accomplished our purpose. */ error = 0; } if (!error && netdev->tc) { if (netdev->tc->ops->tc_destroy) { netdev->tc->ops->tc_destroy(netdev->tc); } netdev->tc = NULL; } return error; } /* If 'netdev''s qdisc type and parameters are not yet known, queries the * kernel to determine what they are. Returns 0 if successful, otherwise a * positive errno value. */ static int tc_query_qdisc(const struct netdev *netdev_) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); struct ofpbuf request, *qdisc; const struct tc_ops *ops; struct tcmsg *tcmsg; int load_error; int error; if (netdev->tc) { return 0; } /* This RTM_GETQDISC is crafted to avoid OOPSing kernels that do not have * commit 53b0f08 "net_sched: Fix qdisc_notify()", which is anything before * 2.6.35 without that fix backported to it. * * To avoid the OOPS, we must not make a request that would attempt to dump * a "built-in" qdisc, that is, the default pfifo_fast qdisc or one of a * few others. There are a few ways that I can see to do this, but most of * them seem to be racy (and if you lose the race the kernel OOPSes). The * technique chosen here is to assume that any non-default qdisc that we * create will have a class with handle 1:0. The built-in qdiscs only have * a class with handle 0:0. * * We could check for Linux 2.6.35+ and use a more straightforward method * there. */ tcmsg = tc_make_request(netdev_, RTM_GETQDISC, NLM_F_ECHO, &request); if (!tcmsg) { return ENODEV; } tcmsg->tcm_handle = tc_make_handle(1, 0); tcmsg->tcm_parent = 0; /* Figure out what tc class to instantiate. */ error = tc_transact(&request, &qdisc); if (!error) { const char *kind; error = tc_parse_qdisc(qdisc, &kind, NULL); if (error) { ops = &tc_ops_other; } else { ops = tc_lookup_linux_name(kind); if (!ops) { static struct vlog_rate_limit rl2 = VLOG_RATE_LIMIT_INIT(1, 1); VLOG_INFO_RL(&rl2, "unknown qdisc \"%s\"", kind); ops = &tc_ops_other; } } } else if (error == ENOENT) { /* Either it's a built-in qdisc, or it's a qdisc set up by some * other entity that doesn't have a handle 1:0. We will assume * that it's the system default qdisc. */ ops = &tc_ops_default; error = 0; } else { /* Who knows? Maybe the device got deleted. */ VLOG_WARN_RL(&rl, "query %s qdisc failed (%s)", netdev_get_name(netdev_), ovs_strerror(error)); ops = &tc_ops_other; } /* Instantiate it. */ load_error = ops->tc_load(CONST_CAST(struct netdev *, netdev_), qdisc); ovs_assert((load_error == 0) == (netdev->tc != NULL)); ofpbuf_delete(qdisc); return error ? error : load_error; } /* Linux traffic control uses tables with 256 entries ("rtab" tables) to approximate the time to transmit packets of various lengths. For an MTU of 256 or less, each entry is exact; for an MTU of 257 through 512, each entry represents two possible packet lengths; for a MTU of 513 through 1024, four possible lengths; and so on. Returns, for the specified 'mtu', the number of bits that packet lengths need to be shifted right to fit within such a 256-entry table. */ static int tc_calc_cell_log(unsigned int mtu) { int cell_log; if (!mtu) { mtu = ETH_PAYLOAD_MAX; } mtu += ETH_HEADER_LEN + VLAN_HEADER_LEN; for (cell_log = 0; mtu >= 256; cell_log++) { mtu >>= 1; } return cell_log; } /* Initializes 'rate' properly for a rate of 'Bps' bytes per second with an MTU * of 'mtu'. */ static void tc_fill_rate(struct tc_ratespec *rate, uint64_t Bps, int mtu) { memset(rate, 0, sizeof *rate); rate->cell_log = tc_calc_cell_log(mtu); /* rate->overhead = 0; */ /* New in 2.6.24, not yet in some */ /* rate->cell_align = 0; */ /* distro headers. */ rate->mpu = ETH_TOTAL_MIN; rate->rate = Bps; } /* Appends to 'msg' an "rtab" table for the specified 'rate' as a Netlink * attribute of the specified "type". * * See tc_calc_cell_log() above for a description of "rtab"s. */ static void tc_put_rtab(struct ofpbuf *msg, uint16_t type, const struct tc_ratespec *rate) { uint32_t *rtab; unsigned int i; rtab = nl_msg_put_unspec_uninit(msg, type, TC_RTAB_SIZE); for (i = 0; i < TC_RTAB_SIZE / sizeof *rtab; i++) { unsigned packet_size = (i + 1) << rate->cell_log; if (packet_size < rate->mpu) { packet_size = rate->mpu; } rtab[i] = tc_bytes_to_ticks(rate->rate, packet_size); } } /* Calculates the proper value of 'buffer' or 'cbuffer' in HTB options given a * rate of 'Bps' bytes per second, the specified 'mtu', and a user-requested * burst size of 'burst_bytes'. (If no value was requested, a 'burst_bytes' of * 0 is fine.) */ static int tc_calc_buffer(unsigned int Bps, int mtu, uint64_t burst_bytes) { unsigned int min_burst = tc_buffer_per_jiffy(Bps) + mtu; return tc_bytes_to_ticks(Bps, MAX(burst_bytes, min_burst)); } /* Linux-only functions declared in netdev-linux.h */ /* Modifies the 'flag' bit in ethtool's flags field for 'netdev'. If * 'enable' is true, the bit is set. Otherwise, it is cleared. */ int netdev_linux_ethtool_set_flag(struct netdev *netdev, uint32_t flag, const char *flag_name, bool enable) { const char *netdev_name = netdev_get_name(netdev); struct ethtool_value evalue; uint32_t new_flags; int error; COVERAGE_INC(netdev_get_ethtool); memset(&evalue, 0, sizeof evalue); error = netdev_linux_do_ethtool(netdev_name, (struct ethtool_cmd *)&evalue, ETHTOOL_GFLAGS, "ETHTOOL_GFLAGS"); if (error) { return error; } COVERAGE_INC(netdev_set_ethtool); evalue.data = new_flags = (evalue.data & ~flag) | (enable ? flag : 0); error = netdev_linux_do_ethtool(netdev_name, (struct ethtool_cmd *)&evalue, ETHTOOL_SFLAGS, "ETHTOOL_SFLAGS"); if (error) { return error; } COVERAGE_INC(netdev_get_ethtool); memset(&evalue, 0, sizeof evalue); error = netdev_linux_do_ethtool(netdev_name, (struct ethtool_cmd *)&evalue, ETHTOOL_GFLAGS, "ETHTOOL_GFLAGS"); if (error) { return error; } if (new_flags != evalue.data) { VLOG_WARN_RL(&rl, "attempt to %s ethtool %s flag on network " "device %s failed", enable ? "enable" : "disable", flag_name, netdev_name); return EOPNOTSUPP; } return 0; } /* Utility functions. */ /* Copies 'src' into 'dst', performing format conversion in the process. */ static void netdev_stats_from_rtnl_link_stats(struct netdev_stats *dst, const struct rtnl_link_stats *src) { dst->rx_packets = src->rx_packets; dst->tx_packets = src->tx_packets; dst->rx_bytes = src->rx_bytes; dst->tx_bytes = src->tx_bytes; dst->rx_errors = src->rx_errors; dst->tx_errors = src->tx_errors; dst->rx_dropped = src->rx_dropped; dst->tx_dropped = src->tx_dropped; dst->multicast = src->multicast; dst->collisions = src->collisions; dst->rx_length_errors = src->rx_length_errors; dst->rx_over_errors = src->rx_over_errors; dst->rx_crc_errors = src->rx_crc_errors; dst->rx_frame_errors = src->rx_frame_errors; dst->rx_fifo_errors = src->rx_fifo_errors; dst->rx_missed_errors = src->rx_missed_errors; dst->tx_aborted_errors = src->tx_aborted_errors; dst->tx_carrier_errors = src->tx_carrier_errors; dst->tx_fifo_errors = src->tx_fifo_errors; dst->tx_heartbeat_errors = src->tx_heartbeat_errors; dst->tx_window_errors = src->tx_window_errors; } static int get_stats_via_netlink(int ifindex, struct netdev_stats *stats) { /* Policy for RTNLGRP_LINK messages. * * There are *many* more fields in these messages, but currently we only * care about these fields. */ static const struct nl_policy rtnlgrp_link_policy[] = { [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false }, [IFLA_STATS] = { .type = NL_A_UNSPEC, .optional = true, .min_len = sizeof(struct rtnl_link_stats) }, }; struct ofpbuf request; struct ofpbuf *reply; struct ifinfomsg *ifi; struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)]; int error; ofpbuf_init(&request, 0); nl_msg_put_nlmsghdr(&request, sizeof *ifi, RTM_GETLINK, NLM_F_REQUEST); ifi = ofpbuf_put_zeros(&request, sizeof *ifi); ifi->ifi_family = PF_UNSPEC; ifi->ifi_index = ifindex; error = nl_transact(NETLINK_ROUTE, &request, &reply); ofpbuf_uninit(&request); if (error) { return error; } if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof(struct ifinfomsg), rtnlgrp_link_policy, attrs, ARRAY_SIZE(rtnlgrp_link_policy))) { ofpbuf_delete(reply); return EPROTO; } if (!attrs[IFLA_STATS]) { VLOG_WARN_RL(&rl, "RTM_GETLINK reply lacks stats"); ofpbuf_delete(reply); return EPROTO; } netdev_stats_from_rtnl_link_stats(stats, nl_attr_get(attrs[IFLA_STATS])); ofpbuf_delete(reply); return 0; } static int get_stats_via_proc(const char *netdev_name, struct netdev_stats *stats) { static const char fn[] = "/proc/net/dev"; char line[1024]; FILE *stream; int ln; stream = fopen(fn, "r"); if (!stream) { VLOG_WARN_RL(&rl, "%s: open failed: %s", fn, ovs_strerror(errno)); return errno; } ln = 0; while (fgets(line, sizeof line, stream)) { if (++ln >= 3) { char devname[16]; #define X64 "%"SCNu64 if (sscanf(line, " %15[^:]:" X64 X64 X64 X64 X64 X64 X64 "%*u" X64 X64 X64 X64 X64 X64 X64 "%*u", devname, &stats->rx_bytes, &stats->rx_packets, &stats->rx_errors, &stats->rx_dropped, &stats->rx_fifo_errors, &stats->rx_frame_errors, &stats->multicast, &stats->tx_bytes, &stats->tx_packets, &stats->tx_errors, &stats->tx_dropped, &stats->tx_fifo_errors, &stats->collisions, &stats->tx_carrier_errors) != 15) { VLOG_WARN_RL(&rl, "%s:%d: parse error", fn, ln); } else if (!strcmp(devname, netdev_name)) { stats->rx_length_errors = UINT64_MAX; stats->rx_over_errors = UINT64_MAX; stats->rx_crc_errors = UINT64_MAX; stats->rx_missed_errors = UINT64_MAX; stats->tx_aborted_errors = UINT64_MAX; stats->tx_heartbeat_errors = UINT64_MAX; stats->tx_window_errors = UINT64_MAX; fclose(stream); return 0; } } } VLOG_WARN_RL(&rl, "%s: no stats for %s", fn, netdev_name); fclose(stream); return ENODEV; } static int get_flags(const struct netdev *dev, unsigned int *flags) { struct ifreq ifr; int error; *flags = 0; error = af_inet_ifreq_ioctl(dev->name, &ifr, SIOCGIFFLAGS, "SIOCGIFFLAGS"); if (!error) { *flags = ifr.ifr_flags; } return error; } static int set_flags(const char *name, unsigned int flags) { struct ifreq ifr; ifr.ifr_flags = flags; return af_inet_ifreq_ioctl(name, &ifr, SIOCSIFFLAGS, "SIOCSIFFLAGS"); } static int do_get_ifindex(const char *netdev_name) { struct ifreq ifr; int error; ovs_strzcpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name); COVERAGE_INC(netdev_get_ifindex); error = af_inet_ioctl(SIOCGIFINDEX, &ifr); if (error) { VLOG_WARN_RL(&rl, "ioctl(SIOCGIFINDEX) on %s device failed: %s", netdev_name, ovs_strerror(error)); return -error; } return ifr.ifr_ifindex; } static int get_ifindex(const struct netdev *netdev_, int *ifindexp) { struct netdev_linux *netdev = netdev_linux_cast(netdev_); if (!(netdev->cache_valid & VALID_IFINDEX)) { int ifindex = do_get_ifindex(netdev_get_name(netdev_)); if (ifindex < 0) { netdev->get_ifindex_error = -ifindex; netdev->ifindex = 0; } else { netdev->get_ifindex_error = 0; netdev->ifindex = ifindex; } netdev->cache_valid |= VALID_IFINDEX; } *ifindexp = netdev->ifindex; return netdev->get_ifindex_error; } static int get_etheraddr(const char *netdev_name, uint8_t ea[ETH_ADDR_LEN]) { struct ifreq ifr; int hwaddr_family; int error; memset(&ifr, 0, sizeof ifr); ovs_strzcpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name); COVERAGE_INC(netdev_get_hwaddr); error = af_inet_ioctl(SIOCGIFHWADDR, &ifr); if (error) { /* ENODEV probably means that a vif disappeared asynchronously and * hasn't been removed from the database yet, so reduce the log level * to INFO for that case. */ VLOG(error == ENODEV ? VLL_INFO : VLL_ERR, "ioctl(SIOCGIFHWADDR) on %s device failed: %s", netdev_name, ovs_strerror(error)); return error; } hwaddr_family = ifr.ifr_hwaddr.sa_family; if (hwaddr_family != AF_UNSPEC && hwaddr_family != ARPHRD_ETHER) { VLOG_WARN("%s device has unknown hardware address family %d", netdev_name, hwaddr_family); } memcpy(ea, ifr.ifr_hwaddr.sa_data, ETH_ADDR_LEN); return 0; } static int set_etheraddr(const char *netdev_name, const uint8_t mac[ETH_ADDR_LEN]) { struct ifreq ifr; int error; memset(&ifr, 0, sizeof ifr); ovs_strzcpy(ifr.ifr_name, netdev_name, sizeof ifr.ifr_name); ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER; memcpy(ifr.ifr_hwaddr.sa_data, mac, ETH_ADDR_LEN); COVERAGE_INC(netdev_set_hwaddr); error = af_inet_ioctl(SIOCSIFHWADDR, &ifr); if (error) { VLOG_ERR("ioctl(SIOCSIFHWADDR) on %s device failed: %s", netdev_name, ovs_strerror(error)); } return error; } static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *ecmd, int cmd, const char *cmd_name) { struct ifreq ifr; int error; memset(&ifr, 0, sizeof ifr); ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name); ifr.ifr_data = (caddr_t) ecmd; ecmd->cmd = cmd; error = af_inet_ioctl(SIOCETHTOOL, &ifr); if (error) { if (error != EOPNOTSUPP) { VLOG_WARN_RL(&rl, "ethtool command %s on network device %s " "failed: %s", cmd_name, name, ovs_strerror(error)); } else { /* The device doesn't support this operation. That's pretty * common, so there's no point in logging anything. */ } } return error; } static int netdev_linux_get_ipv4(const struct netdev *netdev, struct in_addr *ip, int cmd, const char *cmd_name) { struct ifreq ifr; int error; ifr.ifr_addr.sa_family = AF_INET; error = af_inet_ifreq_ioctl(netdev_get_name(netdev), &ifr, cmd, cmd_name); if (!error) { const struct sockaddr_in *sin = ALIGNED_CAST(struct sockaddr_in *, &ifr.ifr_addr); *ip = sin->sin_addr; } return error; } /* Returns an AF_PACKET raw socket or a negative errno value. */ static int af_packet_sock(void) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; static int sock; if (ovsthread_once_start(&once)) { sock = socket(AF_PACKET, SOCK_RAW, 0); if (sock >= 0) { int error = set_nonblocking(sock); if (error) { close(sock); sock = -error; } } else { sock = -errno; VLOG_ERR("failed to create packet socket: %s", ovs_strerror(errno)); } ovsthread_once_done(&once); } return sock; } openvswitch-2.0.1+git20140120/lib/netdev-linux.h000066400000000000000000000017421226605124000210170ustar00rootroot00000000000000/* * Copyright (c) 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef NETDEV_LINUX_H #define NETDEV_LINUX_H 1 #include #include /* These functions are Linux specific, so they should be used directly only by * Linux-specific code. */ struct netdev; int netdev_linux_ethtool_set_flag(struct netdev *netdev, uint32_t flag, const char *flag_name, bool enable); #endif /* netdev-linux.h */ openvswitch-2.0.1+git20140120/lib/netdev-provider.h000066400000000000000000000737621226605124000215250ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef NETDEV_PROVIDER_H #define NETDEV_PROVIDER_H 1 /* Generic interface to network devices. */ #include "netdev.h" #include "list.h" #include "shash.h" #include "smap.h" #ifdef __cplusplus extern "C" { #endif /* A network device (e.g. an Ethernet device). * * Network device implementations may read these members but should not modify * them. */ struct netdev { /* The following do not change during the lifetime of a struct netdev. */ char *name; /* Name of network device. */ const struct netdev_class *netdev_class; /* Functions to control this device. */ /* The following are protected by 'netdev_mutex' (internal to netdev.c). */ int ref_cnt; /* Times this devices was opened. */ struct shash_node *node; /* Pointer to element in global map. */ struct list saved_flags_list; /* Contains "struct netdev_saved_flags". */ }; const char *netdev_get_type(const struct netdev *); const struct netdev_class *netdev_get_class(const struct netdev *); const char *netdev_get_name(const struct netdev *); struct netdev *netdev_from_name(const char *name); void netdev_get_devices(const struct netdev_class *, struct shash *device_list); /* A data structure for capturing packets received by a network device. * * Network device implementations may read these members but should not modify * them. * * None of these members change during the lifetime of a struct netdev_rx. */ struct netdev_rx { struct netdev *netdev; /* Owns a reference to the netdev. */ }; struct netdev *netdev_rx_get_netdev(const struct netdev_rx *); /* Network device class structure, to be defined by each implementation of a * network device. * * These functions return 0 if successful or a positive errno value on failure, * except where otherwise noted. * * * Data Structures * =============== * * These functions work primarily with two different kinds of data structures: * * - "struct netdev", which represents a network device. * * - "struct netdev_rx", which represents a handle for capturing packets * received on a network device * * Each of these data structures contains all of the implementation-independent * generic state for the respective concept, called the "base" state. None of * them contains any extra space for implementations to use. Instead, each * implementation is expected to declare its own data structure that contains * an instance of the generic data structure plus additional * implementation-specific members, called the "derived" state. The * implementation can use casts or (preferably) the CONTAINER_OF macro to * obtain access to derived state given only a pointer to the embedded generic * data structure. * * * Life Cycle * ========== * * Four stylized functions accompany each of these data structures: * * "alloc" "construct" "destruct" "dealloc" * ------------ ---------------- --------------- -------------- * netdev ->alloc ->construct ->destruct ->dealloc * netdev_rx ->rx_alloc ->rx_construct ->rx_destruct ->rx_dealloc * * Any instance of a given data structure goes through the following life * cycle: * * 1. The client calls the "alloc" function to obtain raw memory. If "alloc" * fails, skip all the other steps. * * 2. The client initializes all of the data structure's base state. If this * fails, skip to step 7. * * 3. The client calls the "construct" function. The implementation * initializes derived state. It may refer to the already-initialized * base state. If "construct" fails, skip to step 6. * * 4. The data structure is now initialized and in use. * * 5. When the data structure is no longer needed, the client calls the * "destruct" function. The implementation uninitializes derived state. * The base state has not been uninitialized yet, so the implementation * may still refer to it. * * 6. The client uninitializes all of the data structure's base state. * * 7. The client calls the "dealloc" to free the raw memory. The * implementation must not refer to base or derived state in the data * structure, because it has already been uninitialized. * * Each "alloc" function allocates and returns a new instance of the respective * data structure. The "alloc" function is not given any information about the * use of the new data structure, so it cannot perform much initialization. * Its purpose is just to ensure that the new data structure has enough room * for base and derived state. It may return a null pointer if memory is not * available, in which case none of the other functions is called. * * Each "construct" function initializes derived state in its respective data * structure. When "construct" is called, all of the base state has already * been initialized, so the "construct" function may refer to it. The * "construct" function is allowed to fail, in which case the client calls the * "dealloc" function (but not the "destruct" function). * * Each "destruct" function uninitializes and frees derived state in its * respective data structure. When "destruct" is called, the base state has * not yet been uninitialized, so the "destruct" function may refer to it. The * "destruct" function is not allowed to fail. * * Each "dealloc" function frees raw memory that was allocated by the the * "alloc" function. The memory's base and derived members might not have ever * been initialized (but if "construct" returned successfully, then it has been * "destruct"ed already). The "dealloc" function is not allowed to fail. */ struct netdev_class { /* Type of netdevs in this class, e.g. "system", "tap", "gre", etc. * * One of the providers should supply a "system" type, since this is * the type assumed if no type is specified when opening a netdev. * The "system" type corresponds to an existing network device on * the system. */ const char *type; /* ## ------------------- ## */ /* ## Top-Level Functions ## */ /* ## ------------------- ## */ /* Called when the netdev provider is registered, typically at program * startup. Returning an error from this function will prevent any network * device in this class from being opened. * * This function may be set to null if a network device class needs no * initialization at registration time. */ int (*init)(void); /* Performs periodic work needed by netdevs of this class. May be null if * no periodic work is necessary. */ void (*run)(void); /* Arranges for poll_block() to wake up if the "run" member function needs * to be called. Implementations are additionally required to wake * whenever something changes in any of its netdevs which would cause their * ->change_seq() function to change its result. May be null if nothing is * needed here. */ void (*wait)(void); /* ## ---------------- ## */ /* ## netdev Functions ## */ /* ## ---------------- ## */ /* Life-cycle functions for a netdev. See the large comment above on * struct netdev_class. */ struct netdev *(*alloc)(void); int (*construct)(struct netdev *); void (*destruct)(struct netdev *); void (*dealloc)(struct netdev *); /* Fetches the device 'netdev''s configuration, storing it in 'args'. * The caller owns 'args' and pre-initializes it to an empty smap. * * If this netdev class does not have any configuration options, this may * be a null pointer. */ int (*get_config)(const struct netdev *netdev, struct smap *args); /* Changes the device 'netdev''s configuration to 'args'. * * If this netdev class does not support configuration, this may be a null * pointer. */ int (*set_config)(struct netdev *netdev, const struct smap *args); /* Returns the tunnel configuration of 'netdev'. If 'netdev' is * not a tunnel, returns null. * * If this function would always return null, it may be null instead. */ const struct netdev_tunnel_config * (*get_tunnel_config)(const struct netdev *netdev); /* Sends the 'size'-byte packet in 'buffer' on 'netdev'. Returns 0 if * successful, otherwise a positive errno value. Returns EAGAIN without * blocking if the packet cannot be queued immediately. Returns EMSGSIZE * if a partial packet was transmitted or if the packet is too big or too * small to transmit on the device. * * The caller retains ownership of 'buffer' in all cases. * * The network device is expected to maintain a packet transmission queue, * so that the caller does not ordinarily have to do additional queuing of * packets. * * May return EOPNOTSUPP if a network device does not implement packet * transmission through this interface. This function may be set to null * if it would always return EOPNOTSUPP anyhow. (This will prevent the * network device from being usefully used by the netdev-based "userspace * datapath". It will also prevent the OVS implementation of bonding from * working properly over 'netdev'.) */ int (*send)(struct netdev *netdev, const void *buffer, size_t size); /* Registers with the poll loop to wake up from the next call to * poll_block() when the packet transmission queue for 'netdev' has * sufficient room to transmit a packet with netdev_send(). * * The network device is expected to maintain a packet transmission queue, * so that the caller does not ordinarily have to do additional queuing of * packets. Thus, this function is unlikely to ever be useful. * * May be null if not needed, such as for a network device that does not * implement packet transmission through the 'send' member function. */ void (*send_wait)(struct netdev *netdev); /* Sets 'netdev''s Ethernet address to 'mac' */ int (*set_etheraddr)(struct netdev *netdev, const uint8_t mac[6]); /* Retrieves 'netdev''s Ethernet address into 'mac'. * * This address will be advertised as 'netdev''s MAC address through the * OpenFlow protocol, among other uses. */ int (*get_etheraddr)(const struct netdev *netdev, uint8_t mac[6]); /* Retrieves 'netdev''s MTU into '*mtup'. * * The MTU is the maximum size of transmitted (and received) packets, in * bytes, not including the hardware header; thus, this is typically 1500 * bytes for Ethernet devices. * * If 'netdev' does not have an MTU (e.g. as some tunnels do not), then * this function should return EOPNOTSUPP. This function may be set to * null if it would always return EOPNOTSUPP. */ int (*get_mtu)(const struct netdev *netdev, int *mtup); /* Sets 'netdev''s MTU to 'mtu'. * * If 'netdev' does not have an MTU (e.g. as some tunnels do not), then * this function should return EOPNOTSUPP. This function may be set to * null if it would always return EOPNOTSUPP. */ int (*set_mtu)(const struct netdev *netdev, int mtu); /* Returns the ifindex of 'netdev', if successful, as a positive number. * On failure, returns a negative errno value. * * The desired semantics of the ifindex value are a combination of those * specified by POSIX for if_nametoindex() and by SNMP for ifIndex. An * ifindex value should be unique within a host and remain stable at least * until reboot. SNMP says an ifindex "ranges between 1 and the value of * ifNumber" but many systems do not follow this rule anyhow. * * This function may be set to null if it would always return -EOPNOTSUPP. */ int (*get_ifindex)(const struct netdev *netdev); /* Sets 'carrier' to true if carrier is active (link light is on) on * 'netdev'. * * May be null if device does not provide carrier status (will be always * up as long as device is up). */ int (*get_carrier)(const struct netdev *netdev, bool *carrier); /* Returns the number of times 'netdev''s carrier has changed since being * initialized. * * If null, callers will assume the number of carrier resets is zero. */ long long int (*get_carrier_resets)(const struct netdev *netdev); /* Forces ->get_carrier() to poll 'netdev''s MII registers for link status * instead of checking 'netdev''s carrier. 'netdev''s MII registers will * be polled once ever 'interval' milliseconds. If 'netdev' does not * support MII, another method may be used as a fallback. If 'interval' is * less than or equal to zero, reverts ->get_carrier() to its normal * behavior. * * Most network devices won't support this feature and will set this * function pointer to NULL, which is equivalent to returning EOPNOTSUPP. */ int (*set_miimon_interval)(struct netdev *netdev, long long int interval); /* Retrieves current device stats for 'netdev' into 'stats'. * * A network device that supports some statistics but not others, it should * set the values of the unsupported statistics to all-1-bits * (UINT64_MAX). */ int (*get_stats)(const struct netdev *netdev, struct netdev_stats *); /* Sets the device stats for 'netdev' to 'stats'. * * Most network devices won't support this feature and will set this * function pointer to NULL, which is equivalent to returning EOPNOTSUPP. * * Some network devices might only allow setting their stats to 0. */ int (*set_stats)(struct netdev *netdev, const struct netdev_stats *); /* Stores the features supported by 'netdev' into each of '*current', * '*advertised', '*supported', and '*peer'. Each value is a bitmap of * NETDEV_F_* bits. * * This function may be set to null if it would always return EOPNOTSUPP. */ int (*get_features)(const struct netdev *netdev, enum netdev_features *current, enum netdev_features *advertised, enum netdev_features *supported, enum netdev_features *peer); /* Set the features advertised by 'netdev' to 'advertise', which is a * set of NETDEV_F_* bits. * * This function may be set to null for a network device that does not * support configuring advertisements. */ int (*set_advertisements)(struct netdev *netdev, enum netdev_features advertise); /* Attempts to set input rate limiting (policing) policy, such that up to * 'kbits_rate' kbps of traffic is accepted, with a maximum accumulative * burst size of 'kbits' kb. * * This function may be set to null if policing is not supported. */ int (*set_policing)(struct netdev *netdev, unsigned int kbits_rate, unsigned int kbits_burst); /* Adds to 'types' all of the forms of QoS supported by 'netdev', or leaves * it empty if 'netdev' does not support QoS. Any names added to 'types' * should be documented as valid for the "type" column in the "QoS" table * in vswitchd/vswitch.xml (which is built as ovs-vswitchd.conf.db(8)). * * Every network device must support disabling QoS with a type of "", but * this function must not add "" to 'types'. * * The caller is responsible for initializing 'types' (e.g. with * sset_init()) before calling this function. The caller retains ownership * of 'types'. * * May be NULL if 'netdev' does not support QoS at all. */ int (*get_qos_types)(const struct netdev *netdev, struct sset *types); /* Queries 'netdev' for its capabilities regarding the specified 'type' of * QoS. On success, initializes 'caps' with the QoS capabilities. * * Should return EOPNOTSUPP if 'netdev' does not support 'type'. May be * NULL if 'netdev' does not support QoS at all. */ int (*get_qos_capabilities)(const struct netdev *netdev, const char *type, struct netdev_qos_capabilities *caps); /* Queries 'netdev' about its currently configured form of QoS. If * successful, stores the name of the current form of QoS into '*typep' * and any details of configuration as string key-value pairs in * 'details'. * * A '*typep' of "" indicates that QoS is currently disabled on 'netdev'. * * The caller initializes 'details' before calling this function. The * caller takes ownership of the string key-values pairs added to * 'details'. * * The netdev retains ownership of '*typep'. * * '*typep' will be one of the types returned by netdev_get_qos_types() for * 'netdev'. The contents of 'details' should be documented as valid for * '*typep' in the "other_config" column in the "QoS" table in * vswitchd/vswitch.xml (which is built as ovs-vswitchd.conf.db(8)). * * May be NULL if 'netdev' does not support QoS at all. */ int (*get_qos)(const struct netdev *netdev, const char **typep, struct smap *details); /* Attempts to reconfigure QoS on 'netdev', changing the form of QoS to * 'type' with details of configuration from 'details'. * * On error, the previous QoS configuration is retained. * * When this function changes the type of QoS (not just 'details'), this * also resets all queue configuration for 'netdev' to their defaults * (which depend on the specific type of QoS). Otherwise, the queue * configuration for 'netdev' is unchanged. * * 'type' should be "" (to disable QoS) or one of the types returned by * netdev_get_qos_types() for 'netdev'. The contents of 'details' should * be documented as valid for the given 'type' in the "other_config" column * in the "QoS" table in vswitchd/vswitch.xml (which is built as * ovs-vswitchd.conf.db(8)). * * May be NULL if 'netdev' does not support QoS at all. */ int (*set_qos)(struct netdev *netdev, const char *type, const struct smap *details); /* Queries 'netdev' for information about the queue numbered 'queue_id'. * If successful, adds that information as string key-value pairs to * 'details'. Returns 0 if successful, otherwise a positive errno value. * * Should return EINVAL if 'queue_id' is greater than or equal to the * number of supported queues (as reported in the 'n_queues' member of * struct netdev_qos_capabilities by 'get_qos_capabilities'). * * The caller initializes 'details' before calling this function. The * caller takes ownership of the string key-values pairs added to * 'details'. * * The returned contents of 'details' should be documented as valid for the * given 'type' in the "other_config" column in the "Queue" table in * vswitchd/vswitch.xml (which is built as ovs-vswitchd.conf.db(8)). */ int (*get_queue)(const struct netdev *netdev, unsigned int queue_id, struct smap *details); /* Configures the queue numbered 'queue_id' on 'netdev' with the key-value * string pairs in 'details'. The contents of 'details' should be * documented as valid for the given 'type' in the "other_config" column in * the "Queue" table in vswitchd/vswitch.xml (which is built as * ovs-vswitchd.conf.db(8)). Returns 0 if successful, otherwise a positive * errno value. On failure, the given queue's configuration should be * unmodified. * * Should return EINVAL if 'queue_id' is greater than or equal to the * number of supported queues (as reported in the 'n_queues' member of * struct netdev_qos_capabilities by 'get_qos_capabilities'), or if * 'details' is invalid for the type of queue. * * This function does not modify 'details', and the caller retains * ownership of it. * * May be NULL if 'netdev' does not support QoS at all. */ int (*set_queue)(struct netdev *netdev, unsigned int queue_id, const struct smap *details); /* Attempts to delete the queue numbered 'queue_id' from 'netdev'. * * Should return EINVAL if 'queue_id' is greater than or equal to the * number of supported queues (as reported in the 'n_queues' member of * struct netdev_qos_capabilities by 'get_qos_capabilities'). Should * return EOPNOTSUPP if 'queue_id' is valid but may not be deleted (e.g. if * 'netdev' has a fixed set of queues with the current QoS mode). * * May be NULL if 'netdev' does not support QoS at all, or if all of its * QoS modes have fixed sets of queues. */ int (*delete_queue)(struct netdev *netdev, unsigned int queue_id); /* Obtains statistics about 'queue_id' on 'netdev'. Fills 'stats' with the * queue's statistics. May set individual members of 'stats' to all-1-bits * if the statistic is unavailable. * * May be NULL if 'netdev' does not support QoS at all. */ int (*get_queue_stats)(const struct netdev *netdev, unsigned int queue_id, struct netdev_queue_stats *stats); /* Attempts to begin dumping the queues in 'netdev'. On success, returns 0 * and initializes '*statep' with any data needed for iteration. On * failure, returns a positive errno value. * * May be NULL if 'netdev' does not support QoS at all. */ int (*queue_dump_start)(const struct netdev *netdev, void **statep); /* Attempts to retrieve another queue from 'netdev' for 'state', which was * initialized by a successful call to the 'queue_dump_start' function for * 'netdev'. On success, stores a queue ID into '*queue_id' and fills * 'details' with the configuration of the queue with that ID. Returns EOF * if the last queue has been dumped, or a positive errno value on error. * This function will not be called again once it returns nonzero once for * a given iteration (but the 'queue_dump_done' function will be called * afterward). * * The caller initializes and clears 'details' before calling this * function. The caller takes ownership of the string key-values pairs * added to 'details'. * * The returned contents of 'details' should be documented as valid for the * given 'type' in the "other_config" column in the "Queue" table in * vswitchd/vswitch.xml (which is built as ovs-vswitchd.conf.db(8)). * * May be NULL if 'netdev' does not support QoS at all. */ int (*queue_dump_next)(const struct netdev *netdev, void *state, unsigned int *queue_id, struct smap *details); /* Releases resources from 'netdev' for 'state', which was initialized by a * successful call to the 'queue_dump_start' function for 'netdev'. * * May be NULL if 'netdev' does not support QoS at all. */ int (*queue_dump_done)(const struct netdev *netdev, void *state); /* Iterates over all of 'netdev''s queues, calling 'cb' with the queue's * ID, its statistics, and the 'aux' specified by the caller. The order of * iteration is unspecified, but (when successful) each queue must be * visited exactly once. * * 'cb' will not modify or free the statistics passed in. */ int (*dump_queue_stats)(const struct netdev *netdev, void (*cb)(unsigned int queue_id, struct netdev_queue_stats *, void *aux), void *aux); /* If 'netdev' has an assigned IPv4 address, sets '*address' to that * address and '*netmask' to the associated netmask. * * The following error values have well-defined meanings: * * - EADDRNOTAVAIL: 'netdev' has no assigned IPv4 address. * * - EOPNOTSUPP: No IPv4 network stack attached to 'netdev'. * * This function may be set to null if it would always return EOPNOTSUPP * anyhow. */ int (*get_in4)(const struct netdev *netdev, struct in_addr *address, struct in_addr *netmask); /* Assigns 'addr' as 'netdev''s IPv4 address and 'mask' as its netmask. If * 'addr' is INADDR_ANY, 'netdev''s IPv4 address is cleared. * * This function may be set to null if it would always return EOPNOTSUPP * anyhow. */ int (*set_in4)(struct netdev *netdev, struct in_addr addr, struct in_addr mask); /* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address. * * The following error values have well-defined meanings: * * - EADDRNOTAVAIL: 'netdev' has no assigned IPv6 address. * * - EOPNOTSUPP: No IPv6 network stack attached to 'netdev'. * * This function may be set to null if it would always return EOPNOTSUPP * anyhow. */ int (*get_in6)(const struct netdev *netdev, struct in6_addr *in6); /* Adds 'router' as a default IP gateway for the TCP/IP stack that * corresponds to 'netdev'. * * This function may be set to null if it would always return EOPNOTSUPP * anyhow. */ int (*add_router)(struct netdev *netdev, struct in_addr router); /* Looks up the next hop for 'host'. If successful, stores the next hop * gateway's address (0 if 'host' is on a directly connected network) in * '*next_hop' and a copy of the name of the device to reach 'host' in * '*netdev_name', and returns 0. The caller is responsible for freeing * '*netdev_name' (by calling free()). * * This function may be set to null if it would always return EOPNOTSUPP * anyhow. */ int (*get_next_hop)(const struct in_addr *host, struct in_addr *next_hop, char **netdev_name); /* Retrieves driver information of the device. * * Populates 'smap' with key-value pairs representing the status of the * device. 'smap' is a set of key-value string pairs representing netdev * type specific information. For more information see * ovs-vswitchd.conf.db(5). * * The caller is responsible for destroying 'smap' and its data. * * This function may be set to null if it would always return EOPNOTSUPP * anyhow. */ int (*get_status)(const struct netdev *netdev, struct smap *smap); /* Looks up the ARP table entry for 'ip' on 'netdev' and stores the * corresponding MAC address in 'mac'. A return value of ENXIO, in * particular, indicates that there is no ARP table entry for 'ip' on * 'netdev'. * * This function may be set to null if it would always return EOPNOTSUPP * anyhow. */ int (*arp_lookup)(const struct netdev *netdev, ovs_be32 ip, uint8_t mac[6]); /* Retrieves the current set of flags on 'netdev' into '*old_flags'. Then, * turns off the flags that are set to 1 in 'off' and turns on the flags * that are set to 1 in 'on'. (No bit will be set to 1 in both 'off' and * 'on'; that is, off & on == 0.) * * This function may be invoked from a signal handler. Therefore, it * should not do anything that is not signal-safe (such as logging). */ int (*update_flags)(struct netdev *netdev, enum netdev_flags off, enum netdev_flags on, enum netdev_flags *old_flags); /* Returns a sequence number which indicates changes in one of 'netdev''s * properties. The returned sequence number must be nonzero so that * callers have a value which they may use as a reset when tracking * 'netdev'. * * Minimally, the returned sequence number is required to change whenever * 'netdev''s flags, features, ethernet address, or carrier changes. The * returned sequence number is allowed to change even when 'netdev' doesn't * change, although implementations should try to avoid this. */ unsigned int (*change_seq)(const struct netdev *netdev); /* ## ------------------- ## */ /* ## netdev_rx Functions ## */ /* ## ------------------- ## */ /* If a particular netdev class does not support receiving packets, all these * function pointers must be NULL. */ /* Life-cycle functions for a netdev_rx. See the large comment above on * struct netdev_class. */ struct netdev_rx *(*rx_alloc)(void); int (*rx_construct)(struct netdev_rx *); void (*rx_destruct)(struct netdev_rx *); void (*rx_dealloc)(struct netdev_rx *); /* Attempts to receive a packet from 'rx' into the 'size' bytes in * 'buffer'. If successful, returns the number of bytes in the received * packet, otherwise a negative errno value. Returns -EAGAIN immediately * if no packet is ready to be received. * * Must return -EMSGSIZE, and discard the packet, if the received packet * is longer than 'size' bytes. * * Specify NULL if this */ int (*rx_recv)(struct netdev_rx *rx, void *buffer, size_t size); /* Registers with the poll loop to wake up from the next call to * poll_block() when a packet is ready to be received with netdev_rx_recv() * on 'rx'. */ void (*rx_wait)(struct netdev_rx *rx); /* Discards all packets waiting to be received from 'rx'. */ int (*rx_drain)(struct netdev_rx *rx); }; int netdev_register_provider(const struct netdev_class *); int netdev_unregister_provider(const char *type); extern const struct netdev_class netdev_linux_class; extern const struct netdev_class netdev_internal_class; extern const struct netdev_class netdev_tap_class; #if defined(__FreeBSD__) || defined(__NetBSD__) extern const struct netdev_class netdev_bsd_class; #endif #ifdef __cplusplus } #endif #endif /* netdev.h */ openvswitch-2.0.1+git20140120/lib/netdev-vport.c000066400000000000000000000610631226605124000210270ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "netdev-vport.h" #include #include #include #include #include #include "byte-order.h" #include "daemon.h" #include "dirs.h" #include "dpif.h" #include "hash.h" #include "hmap.h" #include "list.h" #include "netdev-provider.h" #include "ofpbuf.h" #include "packets.h" #include "route-table.h" #include "shash.h" #include "socket-util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(netdev_vport); #define VXLAN_DST_PORT 4789 #define LISP_DST_PORT 4341 #define DEFAULT_TTL 64 struct netdev_vport { struct netdev up; /* Protects all members below. */ struct ovs_mutex mutex; unsigned int change_seq; uint8_t etheraddr[ETH_ADDR_LEN]; struct netdev_stats stats; /* Tunnels. */ struct netdev_tunnel_config tnl_cfg; /* Patch Ports. */ char *peer; }; struct vport_class { const char *dpif_port; struct netdev_class netdev_class; }; static int netdev_vport_construct(struct netdev *); static int get_patch_config(const struct netdev *netdev, struct smap *args); static int get_tunnel_config(const struct netdev *, struct smap *args); static void netdev_vport_poll_notify(struct netdev_vport *netdev) OVS_REQUIRES(netdev->mutex); static bool is_vport_class(const struct netdev_class *class) { return class->construct == netdev_vport_construct; } static const struct vport_class * vport_class_cast(const struct netdev_class *class) { ovs_assert(is_vport_class(class)); return CONTAINER_OF(class, struct vport_class, netdev_class); } static struct netdev_vport * netdev_vport_cast(const struct netdev *netdev) { ovs_assert(is_vport_class(netdev_get_class(netdev))); return CONTAINER_OF(netdev, struct netdev_vport, up); } static const struct netdev_tunnel_config * get_netdev_tunnel_config(const struct netdev *netdev) { return &netdev_vport_cast(netdev)->tnl_cfg; } bool netdev_vport_is_patch(const struct netdev *netdev) { const struct netdev_class *class = netdev_get_class(netdev); return class->get_config == get_patch_config; } static bool netdev_vport_needs_dst_port(const struct netdev *dev) { const struct netdev_class *class = netdev_get_class(dev); const char *type = netdev_get_type(dev); return (class->get_config == get_tunnel_config && (!strcmp("vxlan", type) || !strcmp("lisp", type))); } const char * netdev_vport_class_get_dpif_port(const struct netdev_class *class) { return is_vport_class(class) ? vport_class_cast(class)->dpif_port : NULL; } const char * netdev_vport_get_dpif_port(const struct netdev *netdev, char namebuf[], size_t bufsize) { if (netdev_vport_needs_dst_port(netdev)) { const struct netdev_vport *vport = netdev_vport_cast(netdev); const char *type = netdev_get_type(netdev); /* * Note: IFNAMSIZ is 16 bytes long. The maximum length of a VXLAN * or LISP port name below is 15 or 14 bytes respectively. Still, * assert here on the size of strlen(type) in case that changes * in the future. */ BUILD_ASSERT(NETDEV_VPORT_NAME_BUFSIZE >= IFNAMSIZ); ovs_assert(strlen(type) + 10 < IFNAMSIZ); snprintf(namebuf, bufsize, "%s_sys_%d", type, ntohs(vport->tnl_cfg.dst_port)); return namebuf; } else { const struct netdev_class *class = netdev_get_class(netdev); const char *dpif_port = netdev_vport_class_get_dpif_port(class); return dpif_port ? dpif_port : netdev_get_name(netdev); } } char * netdev_vport_get_dpif_port_strdup(const struct netdev *netdev) { char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; return xstrdup(netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf)); } static struct netdev * netdev_vport_alloc(void) { struct netdev_vport *netdev = xzalloc(sizeof *netdev); return &netdev->up; } static int netdev_vport_construct(struct netdev *netdev_) { struct netdev_vport *netdev = netdev_vport_cast(netdev_); ovs_mutex_init(&netdev->mutex); netdev->change_seq = 1; eth_addr_random(netdev->etheraddr); route_table_register(); return 0; } static void netdev_vport_destruct(struct netdev *netdev_) { struct netdev_vport *netdev = netdev_vport_cast(netdev_); route_table_unregister(); free(netdev->peer); ovs_mutex_destroy(&netdev->mutex); } static void netdev_vport_dealloc(struct netdev *netdev_) { struct netdev_vport *netdev = netdev_vport_cast(netdev_); free(netdev); } static int netdev_vport_set_etheraddr(struct netdev *netdev_, const uint8_t mac[ETH_ADDR_LEN]) { struct netdev_vport *netdev = netdev_vport_cast(netdev_); ovs_mutex_lock(&netdev->mutex); memcpy(netdev->etheraddr, mac, ETH_ADDR_LEN); netdev_vport_poll_notify(netdev); ovs_mutex_unlock(&netdev->mutex); return 0; } static int netdev_vport_get_etheraddr(const struct netdev *netdev_, uint8_t mac[ETH_ADDR_LEN]) { struct netdev_vport *netdev = netdev_vport_cast(netdev_); ovs_mutex_lock(&netdev->mutex); memcpy(mac, netdev->etheraddr, ETH_ADDR_LEN); ovs_mutex_unlock(&netdev->mutex); return 0; } static int tunnel_get_status(const struct netdev *netdev_, struct smap *smap) { struct netdev_vport *netdev = netdev_vport_cast(netdev_); char iface[IFNAMSIZ]; ovs_be32 route; ovs_mutex_lock(&netdev->mutex); route = netdev->tnl_cfg.ip_dst; ovs_mutex_unlock(&netdev->mutex); if (route_table_get_name(route, iface)) { struct netdev *egress_netdev; smap_add(smap, "tunnel_egress_iface", iface); if (!netdev_open(iface, "system", &egress_netdev)) { smap_add(smap, "tunnel_egress_iface_carrier", netdev_get_carrier(egress_netdev) ? "up" : "down"); netdev_close(egress_netdev); } } return 0; } static int netdev_vport_update_flags(struct netdev *netdev OVS_UNUSED, enum netdev_flags off, enum netdev_flags on OVS_UNUSED, enum netdev_flags *old_flagsp) { if (off & (NETDEV_UP | NETDEV_PROMISC)) { return EOPNOTSUPP; } *old_flagsp = NETDEV_UP | NETDEV_PROMISC; return 0; } static unsigned int netdev_vport_change_seq(const struct netdev *netdev) { return netdev_vport_cast(netdev)->change_seq; } static void netdev_vport_run(void) { route_table_run(); } static void netdev_vport_wait(void) { route_table_wait(); } /* Helper functions. */ static void netdev_vport_poll_notify(struct netdev_vport *ndv) { ndv->change_seq++; if (!ndv->change_seq) { ndv->change_seq++; } } /* Code specific to tunnel types. */ static ovs_be64 parse_key(const struct smap *args, const char *name, bool *present, bool *flow) { const char *s; *present = false; *flow = false; s = smap_get(args, name); if (!s) { s = smap_get(args, "key"); if (!s) { return 0; } } *present = true; if (!strcmp(s, "flow")) { *flow = true; return 0; } else { return htonll(strtoull(s, NULL, 0)); } } static int set_tunnel_config(struct netdev *dev_, const struct smap *args) { struct netdev_vport *dev = netdev_vport_cast(dev_); const char *name = netdev_get_name(dev_); const char *type = netdev_get_type(dev_); bool ipsec_mech_set, needs_dst_port, has_csum; struct netdev_tunnel_config tnl_cfg; struct smap_node *node; has_csum = strstr(type, "gre"); ipsec_mech_set = false; memset(&tnl_cfg, 0, sizeof tnl_cfg); needs_dst_port = netdev_vport_needs_dst_port(dev_); tnl_cfg.ipsec = strstr(type, "ipsec"); tnl_cfg.dont_fragment = true; SMAP_FOR_EACH (node, args) { if (!strcmp(node->key, "remote_ip")) { struct in_addr in_addr; if (!strcmp(node->value, "flow")) { tnl_cfg.ip_dst_flow = true; tnl_cfg.ip_dst = htonl(0); } else if (lookup_ip(node->value, &in_addr)) { VLOG_WARN("%s: bad %s 'remote_ip'", name, type); } else if (ip_is_multicast(in_addr.s_addr)) { VLOG_WARN("%s: multicast remote_ip="IP_FMT" not allowed", name, IP_ARGS(in_addr.s_addr)); return EINVAL; } else { tnl_cfg.ip_dst = in_addr.s_addr; } } else if (!strcmp(node->key, "local_ip")) { struct in_addr in_addr; if (!strcmp(node->value, "flow")) { tnl_cfg.ip_src_flow = true; tnl_cfg.ip_src = htonl(0); } else if (lookup_ip(node->value, &in_addr)) { VLOG_WARN("%s: bad %s 'local_ip'", name, type); } else { tnl_cfg.ip_src = in_addr.s_addr; } } else if (!strcmp(node->key, "tos")) { if (!strcmp(node->value, "inherit")) { tnl_cfg.tos_inherit = true; } else { char *endptr; int tos; tos = strtol(node->value, &endptr, 0); if (*endptr == '\0' && tos == (tos & IP_DSCP_MASK)) { tnl_cfg.tos = tos; } else { VLOG_WARN("%s: invalid TOS %s", name, node->value); } } } else if (!strcmp(node->key, "ttl")) { if (!strcmp(node->value, "inherit")) { tnl_cfg.ttl_inherit = true; } else { tnl_cfg.ttl = atoi(node->value); } } else if (!strcmp(node->key, "dst_port") && needs_dst_port) { tnl_cfg.dst_port = htons(atoi(node->value)); } else if (!strcmp(node->key, "csum") && has_csum) { if (!strcmp(node->value, "true")) { tnl_cfg.csum = true; } } else if (!strcmp(node->key, "df_default")) { if (!strcmp(node->value, "false")) { tnl_cfg.dont_fragment = false; } } else if (!strcmp(node->key, "peer_cert") && tnl_cfg.ipsec) { if (smap_get(args, "certificate")) { ipsec_mech_set = true; } else { const char *use_ssl_cert; /* If the "use_ssl_cert" is true, then "certificate" and * "private_key" will be pulled from the SSL table. The * use of this option is strongly discouraged, since it * will like be removed when multiple SSL configurations * are supported by OVS. */ use_ssl_cert = smap_get(args, "use_ssl_cert"); if (!use_ssl_cert || strcmp(use_ssl_cert, "true")) { VLOG_ERR("%s: 'peer_cert' requires 'certificate' argument", name); return EINVAL; } ipsec_mech_set = true; } } else if (!strcmp(node->key, "psk") && tnl_cfg.ipsec) { ipsec_mech_set = true; } else if (tnl_cfg.ipsec && (!strcmp(node->key, "certificate") || !strcmp(node->key, "private_key") || !strcmp(node->key, "use_ssl_cert"))) { /* Ignore options not used by the netdev. */ } else if (!strcmp(node->key, "key") || !strcmp(node->key, "in_key") || !strcmp(node->key, "out_key")) { /* Handled separately below. */ } else { VLOG_WARN("%s: unknown %s argument '%s'", name, type, node->key); } } /* Add a default destination port for VXLAN if none specified. */ if (!strcmp(type, "vxlan") && !tnl_cfg.dst_port) { tnl_cfg.dst_port = htons(VXLAN_DST_PORT); } /* Add a default destination port for LISP if none specified. */ if (!strcmp(type, "lisp") && !tnl_cfg.dst_port) { tnl_cfg.dst_port = htons(LISP_DST_PORT); } if (tnl_cfg.ipsec) { static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; static pid_t pid = 0; ovs_mutex_lock(&mutex); if (pid <= 0) { char *file_name = xasprintf("%s/%s", ovs_rundir(), "ovs-monitor-ipsec.pid"); pid = read_pidfile(file_name); free(file_name); } ovs_mutex_unlock(&mutex); if (pid < 0) { VLOG_ERR("%s: IPsec requires the ovs-monitor-ipsec daemon", name); return EINVAL; } if (smap_get(args, "peer_cert") && smap_get(args, "psk")) { VLOG_ERR("%s: cannot define both 'peer_cert' and 'psk'", name); return EINVAL; } if (!ipsec_mech_set) { VLOG_ERR("%s: IPsec requires an 'peer_cert' or psk' argument", name); return EINVAL; } } if (!tnl_cfg.ip_dst && !tnl_cfg.ip_dst_flow) { VLOG_ERR("%s: %s type requires valid 'remote_ip' argument", name, type); return EINVAL; } if (tnl_cfg.ip_src_flow && !tnl_cfg.ip_dst_flow) { VLOG_ERR("%s: %s type requires 'remote_ip=flow' with 'local_ip=flow'", name, type); return EINVAL; } if (!tnl_cfg.ttl) { tnl_cfg.ttl = DEFAULT_TTL; } tnl_cfg.in_key = parse_key(args, "in_key", &tnl_cfg.in_key_present, &tnl_cfg.in_key_flow); tnl_cfg.out_key = parse_key(args, "out_key", &tnl_cfg.out_key_present, &tnl_cfg.out_key_flow); ovs_mutex_lock(&dev->mutex); dev->tnl_cfg = tnl_cfg; netdev_vport_poll_notify(dev); ovs_mutex_unlock(&dev->mutex); return 0; } static int get_tunnel_config(const struct netdev *dev, struct smap *args) { struct netdev_vport *netdev = netdev_vport_cast(dev); struct netdev_tunnel_config tnl_cfg; ovs_mutex_lock(&netdev->mutex); tnl_cfg = netdev->tnl_cfg; ovs_mutex_unlock(&netdev->mutex); if (tnl_cfg.ip_dst) { smap_add_format(args, "remote_ip", IP_FMT, IP_ARGS(tnl_cfg.ip_dst)); } else if (tnl_cfg.ip_dst_flow) { smap_add(args, "remote_ip", "flow"); } if (tnl_cfg.ip_src) { smap_add_format(args, "local_ip", IP_FMT, IP_ARGS(tnl_cfg.ip_src)); } else if (tnl_cfg.ip_src_flow) { smap_add(args, "local_ip", "flow"); } if (tnl_cfg.in_key_flow && tnl_cfg.out_key_flow) { smap_add(args, "key", "flow"); } else if (tnl_cfg.in_key_present && tnl_cfg.out_key_present && tnl_cfg.in_key == tnl_cfg.out_key) { smap_add_format(args, "key", "%"PRIu64, ntohll(tnl_cfg.in_key)); } else { if (tnl_cfg.in_key_flow) { smap_add(args, "in_key", "flow"); } else if (tnl_cfg.in_key_present) { smap_add_format(args, "in_key", "%"PRIu64, ntohll(tnl_cfg.in_key)); } if (tnl_cfg.out_key_flow) { smap_add(args, "out_key", "flow"); } else if (tnl_cfg.out_key_present) { smap_add_format(args, "out_key", "%"PRIu64, ntohll(tnl_cfg.out_key)); } } if (tnl_cfg.ttl_inherit) { smap_add(args, "ttl", "inherit"); } else if (tnl_cfg.ttl != DEFAULT_TTL) { smap_add_format(args, "ttl", "%"PRIu8, tnl_cfg.ttl); } if (tnl_cfg.tos_inherit) { smap_add(args, "tos", "inherit"); } else if (tnl_cfg.tos) { smap_add_format(args, "tos", "0x%x", tnl_cfg.tos); } if (tnl_cfg.dst_port) { uint16_t dst_port = ntohs(tnl_cfg.dst_port); const char *type = netdev_get_type(dev); if ((!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) || (!strcmp("lisp", type) && dst_port != LISP_DST_PORT)) { smap_add_format(args, "dst_port", "%d", dst_port); } } if (tnl_cfg.csum) { smap_add(args, "csum", "true"); } if (!tnl_cfg.dont_fragment) { smap_add(args, "df_default", "false"); } return 0; } /* Code specific to patch ports. */ /* If 'netdev' is a patch port, returns the name of its peer as a malloc()'d * string that the caller must free. * * If 'netdev' is not a patch port, returns NULL. */ char * netdev_vport_patch_peer(const struct netdev *netdev_) { char *peer = NULL; if (netdev_vport_is_patch(netdev_)) { struct netdev_vport *netdev = netdev_vport_cast(netdev_); ovs_mutex_lock(&netdev->mutex); if (netdev->peer) { peer = xstrdup(netdev->peer); } ovs_mutex_unlock(&netdev->mutex); } return peer; } void netdev_vport_inc_rx(const struct netdev *netdev, const struct dpif_flow_stats *stats) { if (is_vport_class(netdev_get_class(netdev))) { struct netdev_vport *dev = netdev_vport_cast(netdev); ovs_mutex_lock(&dev->mutex); dev->stats.rx_packets += stats->n_packets; dev->stats.rx_bytes += stats->n_bytes; ovs_mutex_unlock(&dev->mutex); } } void netdev_vport_inc_tx(const struct netdev *netdev, const struct dpif_flow_stats *stats) { if (is_vport_class(netdev_get_class(netdev))) { struct netdev_vport *dev = netdev_vport_cast(netdev); ovs_mutex_lock(&dev->mutex); dev->stats.tx_packets += stats->n_packets; dev->stats.tx_bytes += stats->n_bytes; ovs_mutex_unlock(&dev->mutex); } } static int get_patch_config(const struct netdev *dev_, struct smap *args) { struct netdev_vport *dev = netdev_vport_cast(dev_); ovs_mutex_lock(&dev->mutex); if (dev->peer) { smap_add(args, "peer", dev->peer); } ovs_mutex_unlock(&dev->mutex); return 0; } static int set_patch_config(struct netdev *dev_, const struct smap *args) { struct netdev_vport *dev = netdev_vport_cast(dev_); const char *name = netdev_get_name(dev_); const char *peer; peer = smap_get(args, "peer"); if (!peer) { VLOG_ERR("%s: patch type requires valid 'peer' argument", name); return EINVAL; } if (smap_count(args) > 1) { VLOG_ERR("%s: patch type takes only a 'peer' argument", name); return EINVAL; } if (!strcmp(name, peer)) { VLOG_ERR("%s: patch peer must not be self", name); return EINVAL; } ovs_mutex_lock(&dev->mutex); free(dev->peer); dev->peer = xstrdup(peer); netdev_vport_poll_notify(dev); ovs_mutex_unlock(&dev->mutex); return 0; } static int get_stats(const struct netdev *netdev, struct netdev_stats *stats) { struct netdev_vport *dev = netdev_vport_cast(netdev); ovs_mutex_lock(&dev->mutex); *stats = dev->stats; ovs_mutex_unlock(&dev->mutex); return 0; } #define VPORT_FUNCTIONS(GET_CONFIG, SET_CONFIG, \ GET_TUNNEL_CONFIG, GET_STATUS) \ NULL, \ netdev_vport_run, \ netdev_vport_wait, \ \ netdev_vport_alloc, \ netdev_vport_construct, \ netdev_vport_destruct, \ netdev_vport_dealloc, \ GET_CONFIG, \ SET_CONFIG, \ GET_TUNNEL_CONFIG, \ \ NULL, /* send */ \ NULL, /* send_wait */ \ \ netdev_vport_set_etheraddr, \ netdev_vport_get_etheraddr, \ NULL, /* get_mtu */ \ NULL, /* set_mtu */ \ NULL, /* get_ifindex */ \ NULL, /* get_carrier */ \ NULL, /* get_carrier_resets */ \ NULL, /* get_miimon */ \ get_stats, \ NULL, /* set_stats */ \ \ NULL, /* get_features */ \ NULL, /* set_advertisements */ \ \ NULL, /* set_policing */ \ NULL, /* get_qos_types */ \ NULL, /* get_qos_capabilities */ \ NULL, /* get_qos */ \ NULL, /* set_qos */ \ NULL, /* get_queue */ \ NULL, /* set_queue */ \ NULL, /* delete_queue */ \ NULL, /* get_queue_stats */ \ NULL, /* queue_dump_start */ \ NULL, /* queue_dump_next */ \ NULL, /* queue_dump_done */ \ NULL, /* dump_queue_stats */ \ \ NULL, /* get_in4 */ \ NULL, /* set_in4 */ \ NULL, /* get_in6 */ \ NULL, /* add_router */ \ NULL, /* get_next_hop */ \ GET_STATUS, \ NULL, /* arp_lookup */ \ \ netdev_vport_update_flags, \ \ netdev_vport_change_seq, \ \ NULL, /* rx_alloc */ \ NULL, /* rx_construct */ \ NULL, /* rx_destruct */ \ NULL, /* rx_dealloc */ \ NULL, /* rx_recv */ \ NULL, /* rx_wait */ \ NULL, /* rx_drain */ #define TUNNEL_CLASS(NAME, DPIF_PORT) \ { DPIF_PORT, \ { NAME, VPORT_FUNCTIONS(get_tunnel_config, \ set_tunnel_config, \ get_netdev_tunnel_config, \ tunnel_get_status) }} void netdev_vport_tunnel_register(void) { static const struct vport_class vport_classes[] = { TUNNEL_CLASS("gre", "gre_system"), TUNNEL_CLASS("ipsec_gre", "gre_system"), TUNNEL_CLASS("gre64", "gre64_system"), TUNNEL_CLASS("ipsec_gre64", "gre64_system"), TUNNEL_CLASS("vxlan", "vxlan_system"), TUNNEL_CLASS("lisp", "lisp_system") }; static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; if (ovsthread_once_start(&once)) { int i; for (i = 0; i < ARRAY_SIZE(vport_classes); i++) { netdev_register_provider(&vport_classes[i].netdev_class); } ovsthread_once_done(&once); } } void netdev_vport_patch_register(void) { static const struct vport_class patch_class = { NULL, { "patch", VPORT_FUNCTIONS(get_patch_config, set_patch_config, NULL, NULL) }}; netdev_register_provider(&patch_class.netdev_class); } openvswitch-2.0.1+git20140120/lib/netdev-vport.h000066400000000000000000000030341226605124000210260ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef NETDEV_VPORT_H #define NETDEV_VPORT_H 1 #include #include struct dpif_linux_vport; struct dpif_flow_stats; struct netdev; struct netdev_class; struct netdev_stats; void netdev_vport_tunnel_register(void); void netdev_vport_patch_register(void); bool netdev_vport_is_patch(const struct netdev *); char *netdev_vport_patch_peer(const struct netdev *netdev); void netdev_vport_inc_rx(const struct netdev *, const struct dpif_flow_stats *); void netdev_vport_inc_tx(const struct netdev *, const struct dpif_flow_stats *); const char *netdev_vport_class_get_dpif_port(const struct netdev_class *); enum { NETDEV_VPORT_NAME_BUFSIZE = 16 }; const char *netdev_vport_get_dpif_port(const struct netdev *, char namebuf[], size_t bufsize); char *netdev_vport_get_dpif_port_strdup(const struct netdev *); #endif /* netdev-vport.h */ openvswitch-2.0.1+git20140120/lib/netdev.c000066400000000000000000001500711226605124000176550ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "netdev.h" #include #include #include #include #include #include #include "coverage.h" #include "dpif.h" #include "dynamic-string.h" #include "fatal-signal.h" #include "hash.h" #include "list.h" #include "netdev-provider.h" #include "netdev-vport.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "packets.h" #include "poll-loop.h" #include "shash.h" #include "smap.h" #include "sset.h" #include "svec.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(netdev); COVERAGE_DEFINE(netdev_received); COVERAGE_DEFINE(netdev_sent); COVERAGE_DEFINE(netdev_add_router); COVERAGE_DEFINE(netdev_get_stats); struct netdev_saved_flags { struct netdev *netdev; struct list node; /* In struct netdev's saved_flags_list. */ enum netdev_flags saved_flags; enum netdev_flags saved_values; }; /* Protects 'netdev_shash' and the mutable members of struct netdev. */ static struct ovs_mutex netdev_mutex = OVS_MUTEX_INITIALIZER; /* All created network devices. */ static struct shash netdev_shash OVS_GUARDED_BY(netdev_mutex) = SHASH_INITIALIZER(&netdev_shash); /* Protects 'netdev_classes' against insertions or deletions. * * This is not an rwlock for performance reasons but to allow recursive * acquisition when calling into providers. For example, netdev_run() calls * into provider 'run' functions, which might reasonably want to call one of * the netdev functions that takes netdev_class_rwlock read-only. */ static struct ovs_rwlock netdev_class_rwlock OVS_ACQ_BEFORE(netdev_mutex) = OVS_RWLOCK_INITIALIZER; /* Contains 'struct netdev_registered_class'es. */ static struct hmap netdev_classes OVS_GUARDED_BY(netdev_class_rwlock) = HMAP_INITIALIZER(&netdev_classes); struct netdev_registered_class { struct hmap_node hmap_node; /* In 'netdev_classes', by class->type. */ const struct netdev_class *class; atomic_int ref_cnt; /* Number of 'struct netdev's of this class. */ }; /* This is set pretty low because we probably won't learn anything from the * additional log messages. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); static void restore_all_flags(void *aux OVS_UNUSED); void update_device_args(struct netdev *, const struct shash *args); static void netdev_initialize(void) OVS_EXCLUDED(netdev_class_rwlock, netdev_mutex) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; if (ovsthread_once_start(&once)) { fatal_signal_add_hook(restore_all_flags, NULL, NULL, true); netdev_vport_patch_register(); #ifdef LINUX_DATAPATH netdev_register_provider(&netdev_linux_class); netdev_register_provider(&netdev_internal_class); netdev_register_provider(&netdev_tap_class); netdev_vport_tunnel_register(); #endif #if defined(__FreeBSD__) || defined(__NetBSD__) netdev_register_provider(&netdev_tap_class); netdev_register_provider(&netdev_bsd_class); #endif ovsthread_once_done(&once); } } /* Performs periodic work needed by all the various kinds of netdevs. * * If your program opens any netdevs, it must call this function within its * main poll loop. */ void netdev_run(void) OVS_EXCLUDED(netdev_class_rwlock, netdev_mutex) { struct netdev_registered_class *rc; ovs_rwlock_rdlock(&netdev_class_rwlock); HMAP_FOR_EACH (rc, hmap_node, &netdev_classes) { if (rc->class->run) { rc->class->run(); } } ovs_rwlock_unlock(&netdev_class_rwlock); } /* Arranges for poll_block() to wake up when netdev_run() needs to be called. * * If your program opens any netdevs, it must call this function within its * main poll loop. */ void netdev_wait(void) OVS_EXCLUDED(netdev_class_rwlock, netdev_mutex) { struct netdev_registered_class *rc; ovs_rwlock_rdlock(&netdev_class_rwlock); HMAP_FOR_EACH (rc, hmap_node, &netdev_classes) { if (rc->class->wait) { rc->class->wait(); } } ovs_rwlock_unlock(&netdev_class_rwlock); } static struct netdev_registered_class * netdev_lookup_class(const char *type) OVS_REQ_RDLOCK(netdev_class_rwlock) { struct netdev_registered_class *rc; HMAP_FOR_EACH_WITH_HASH (rc, hmap_node, hash_string(type, 0), &netdev_classes) { if (!strcmp(type, rc->class->type)) { return rc; } } return NULL; } /* Initializes and registers a new netdev provider. After successful * registration, new netdevs of that type can be opened using netdev_open(). */ int netdev_register_provider(const struct netdev_class *new_class) OVS_EXCLUDED(netdev_class_rwlock, netdev_mutex) { int error; ovs_rwlock_wrlock(&netdev_class_rwlock); if (netdev_lookup_class(new_class->type)) { VLOG_WARN("attempted to register duplicate netdev provider: %s", new_class->type); error = EEXIST; } else { error = new_class->init ? new_class->init() : 0; if (!error) { struct netdev_registered_class *rc; rc = xmalloc(sizeof *rc); hmap_insert(&netdev_classes, &rc->hmap_node, hash_string(new_class->type, 0)); rc->class = new_class; atomic_init(&rc->ref_cnt, 0); } else { VLOG_ERR("failed to initialize %s network device class: %s", new_class->type, ovs_strerror(error)); } } ovs_rwlock_unlock(&netdev_class_rwlock); return error; } /* Unregisters a netdev provider. 'type' must have been previously * registered and not currently be in use by any netdevs. After unregistration * new netdevs of that type cannot be opened using netdev_open(). */ int netdev_unregister_provider(const char *type) OVS_EXCLUDED(netdev_class_rwlock, netdev_mutex) { struct netdev_registered_class *rc; int error; ovs_rwlock_wrlock(&netdev_class_rwlock); rc = netdev_lookup_class(type); if (!rc) { VLOG_WARN("attempted to unregister a netdev provider that is not " "registered: %s", type); error = EAFNOSUPPORT; } else { int ref_cnt; atomic_read(&rc->ref_cnt, &ref_cnt); if (!ref_cnt) { hmap_remove(&netdev_classes, &rc->hmap_node); free(rc); error = 0; } else { VLOG_WARN("attempted to unregister in use netdev provider: %s", type); error = EBUSY; } } ovs_rwlock_unlock(&netdev_class_rwlock); return error; } /* Clears 'types' and enumerates the types of all currently registered netdev * providers into it. The caller must first initialize the sset. */ void netdev_enumerate_types(struct sset *types) OVS_EXCLUDED(netdev_mutex) { struct netdev_registered_class *rc; netdev_initialize(); sset_clear(types); ovs_rwlock_rdlock(&netdev_class_rwlock); HMAP_FOR_EACH (rc, hmap_node, &netdev_classes) { sset_add(types, rc->class->type); } ovs_rwlock_unlock(&netdev_class_rwlock); } /* Check that the network device name is not the same as any of the registered * vport providers' dpif_port name (dpif_port is NULL if the vport provider * does not define it) or the datapath internal port name (e.g. ovs-system). * * Returns true if there is a name conflict, false otherwise. */ bool netdev_is_reserved_name(const char *name) OVS_EXCLUDED(netdev_mutex) { struct netdev_registered_class *rc; netdev_initialize(); ovs_rwlock_rdlock(&netdev_class_rwlock); HMAP_FOR_EACH (rc, hmap_node, &netdev_classes) { const char *dpif_port = netdev_vport_class_get_dpif_port(rc->class); if (dpif_port && !strcmp(dpif_port, name)) { ovs_rwlock_unlock(&netdev_class_rwlock); return true; } } ovs_rwlock_unlock(&netdev_class_rwlock); if (!strncmp(name, "ovs-", 4)) { struct sset types; const char *type; sset_init(&types); dp_enumerate_types(&types); SSET_FOR_EACH (type, &types) { if (!strcmp(name+4, type)) { sset_destroy(&types); return true; } } sset_destroy(&types); } return false; } /* Opens the network device named 'name' (e.g. "eth0") of the specified 'type' * (e.g. "system") and returns zero if successful, otherwise a positive errno * value. On success, sets '*netdevp' to the new network device, otherwise to * null. * * Some network devices may need to be configured (with netdev_set_config()) * before they can be used. */ int netdev_open(const char *name, const char *type, struct netdev **netdevp) OVS_EXCLUDED(netdev_mutex) { struct netdev *netdev; int error; netdev_initialize(); ovs_rwlock_rdlock(&netdev_class_rwlock); ovs_mutex_lock(&netdev_mutex); netdev = shash_find_data(&netdev_shash, name); if (!netdev) { struct netdev_registered_class *rc; rc = netdev_lookup_class(type && type[0] ? type : "system"); if (rc) { netdev = rc->class->alloc(); if (netdev) { memset(netdev, 0, sizeof *netdev); netdev->netdev_class = rc->class; netdev->name = xstrdup(name); netdev->node = shash_add(&netdev_shash, name, netdev); list_init(&netdev->saved_flags_list); error = rc->class->construct(netdev); if (!error) { int old_ref_cnt; atomic_add(&rc->ref_cnt, 1, &old_ref_cnt); } else { free(netdev->name); ovs_assert(list_is_empty(&netdev->saved_flags_list)); shash_delete(&netdev_shash, netdev->node); rc->class->dealloc(netdev); } } else { error = ENOMEM; } } else { VLOG_WARN("could not create netdev %s of unknown type %s", name, type); error = EAFNOSUPPORT; } } else { error = 0; } ovs_mutex_unlock(&netdev_mutex); ovs_rwlock_unlock(&netdev_class_rwlock); if (!error) { netdev->ref_cnt++; *netdevp = netdev; } else { *netdevp = NULL; } return error; } /* Returns a reference to 'netdev_' for the caller to own. Returns null if * 'netdev_' is null. */ struct netdev * netdev_ref(const struct netdev *netdev_) OVS_EXCLUDED(netdev_mutex) { struct netdev *netdev = CONST_CAST(struct netdev *, netdev_); if (netdev) { ovs_mutex_lock(&netdev_mutex); ovs_assert(netdev->ref_cnt > 0); netdev->ref_cnt++; ovs_mutex_unlock(&netdev_mutex); } return netdev; } /* Reconfigures the device 'netdev' with 'args'. 'args' may be empty * or NULL if none are needed. */ int netdev_set_config(struct netdev *netdev, const struct smap *args) OVS_EXCLUDED(netdev_mutex) { if (netdev->netdev_class->set_config) { const struct smap no_args = SMAP_INITIALIZER(&no_args); return netdev->netdev_class->set_config(netdev, args ? args : &no_args); } else if (args && !smap_is_empty(args)) { VLOG_WARN("%s: arguments provided to device that is not configurable", netdev_get_name(netdev)); } return 0; } /* Returns the current configuration for 'netdev' in 'args'. The caller must * have already initialized 'args' with smap_init(). Returns 0 on success, in * which case 'args' will be filled with 'netdev''s configuration. On failure * returns a positive errno value, in which case 'args' will be empty. * * The caller owns 'args' and its contents and must eventually free them with * smap_destroy(). */ int netdev_get_config(const struct netdev *netdev, struct smap *args) OVS_EXCLUDED(netdev_mutex) { int error; smap_clear(args); if (netdev->netdev_class->get_config) { error = netdev->netdev_class->get_config(netdev, args); if (error) { smap_clear(args); } } else { error = 0; } return error; } const struct netdev_tunnel_config * netdev_get_tunnel_config(const struct netdev *netdev) OVS_EXCLUDED(netdev_mutex) { if (netdev->netdev_class->get_tunnel_config) { return netdev->netdev_class->get_tunnel_config(netdev); } else { return NULL; } } static void netdev_unref(struct netdev *dev) OVS_RELEASES(netdev_mutex) { ovs_assert(dev->ref_cnt); if (!--dev->ref_cnt) { const struct netdev_class *class = dev->netdev_class; struct netdev_registered_class *rc; int old_ref_cnt; dev->netdev_class->destruct(dev); shash_delete(&netdev_shash, dev->node); free(dev->name); dev->netdev_class->dealloc(dev); ovs_mutex_unlock(&netdev_mutex); ovs_rwlock_rdlock(&netdev_class_rwlock); rc = netdev_lookup_class(class->type); atomic_sub(&rc->ref_cnt, 1, &old_ref_cnt); ovs_assert(old_ref_cnt > 0); ovs_rwlock_unlock(&netdev_class_rwlock); } else { ovs_mutex_unlock(&netdev_mutex); } } /* Closes and destroys 'netdev'. */ void netdev_close(struct netdev *netdev) OVS_EXCLUDED(netdev_mutex) { if (netdev) { ovs_mutex_lock(&netdev_mutex); netdev_unref(netdev); } } /* Parses 'netdev_name_', which is of the form [type@]name into its component * pieces. 'name' and 'type' must be freed by the caller. */ void netdev_parse_name(const char *netdev_name_, char **name, char **type) { char *netdev_name = xstrdup(netdev_name_); char *separator; separator = strchr(netdev_name, '@'); if (separator) { *separator = '\0'; *type = netdev_name; *name = xstrdup(separator + 1); } else { *name = netdev_name; *type = xstrdup("system"); } } int netdev_rx_open(struct netdev *netdev, struct netdev_rx **rxp) OVS_EXCLUDED(netdev_mutex) { int error; if (netdev->netdev_class->rx_alloc) { struct netdev_rx *rx = netdev->netdev_class->rx_alloc(); if (rx) { rx->netdev = netdev; error = netdev->netdev_class->rx_construct(rx); if (!error) { ovs_mutex_lock(&netdev_mutex); netdev->ref_cnt++; ovs_mutex_unlock(&netdev_mutex); *rxp = rx; return 0; } netdev->netdev_class->rx_dealloc(rx); } else { error = ENOMEM; } } else { error = EOPNOTSUPP; } *rxp = NULL; return error; } void netdev_rx_close(struct netdev_rx *rx) OVS_EXCLUDED(netdev_mutex) { if (rx) { struct netdev *netdev = rx->netdev; netdev->netdev_class->rx_destruct(rx); netdev->netdev_class->rx_dealloc(rx); netdev_close(netdev); } } int netdev_rx_recv(struct netdev_rx *rx, struct ofpbuf *buffer) { int retval; ovs_assert(buffer->size == 0); ovs_assert(ofpbuf_tailroom(buffer) >= ETH_TOTAL_MIN); retval = rx->netdev->netdev_class->rx_recv(rx, buffer->data, ofpbuf_tailroom(buffer)); if (retval >= 0) { COVERAGE_INC(netdev_received); buffer->size += retval; if (buffer->size < ETH_TOTAL_MIN) { ofpbuf_put_zeros(buffer, ETH_TOTAL_MIN - buffer->size); } return 0; } else { return -retval; } } void netdev_rx_wait(struct netdev_rx *rx) { rx->netdev->netdev_class->rx_wait(rx); } int netdev_rx_drain(struct netdev_rx *rx) { return (rx->netdev->netdev_class->rx_drain ? rx->netdev->netdev_class->rx_drain(rx) : 0); } /* Sends 'buffer' on 'netdev'. Returns 0 if successful, otherwise a positive * errno value. Returns EAGAIN without blocking if the packet cannot be queued * immediately. Returns EMSGSIZE if a partial packet was transmitted or if * the packet is too big or too small to transmit on the device. * * The caller retains ownership of 'buffer' in all cases. * * The kernel maintains a packet transmission queue, so the caller is not * expected to do additional queuing of packets. * * Some network devices may not implement support for this function. In such * cases this function will always return EOPNOTSUPP. */ int netdev_send(struct netdev *netdev, const struct ofpbuf *buffer) { int error; error = (netdev->netdev_class->send ? netdev->netdev_class->send(netdev, buffer->data, buffer->size) : EOPNOTSUPP); if (!error) { COVERAGE_INC(netdev_sent); } return error; } /* Registers with the poll loop to wake up from the next call to poll_block() * when the packet transmission queue has sufficient room to transmit a packet * with netdev_send(). * * The kernel maintains a packet transmission queue, so the client is not * expected to do additional queuing of packets. Thus, this function is * unlikely to ever be used. It is included for completeness. */ void netdev_send_wait(struct netdev *netdev) { if (netdev->netdev_class->send_wait) { netdev->netdev_class->send_wait(netdev); } } /* Attempts to set 'netdev''s MAC address to 'mac'. Returns 0 if successful, * otherwise a positive errno value. */ int netdev_set_etheraddr(struct netdev *netdev, const uint8_t mac[ETH_ADDR_LEN]) { return netdev->netdev_class->set_etheraddr(netdev, mac); } /* Retrieves 'netdev''s MAC address. If successful, returns 0 and copies the * the MAC address into 'mac'. On failure, returns a positive errno value and * clears 'mac' to all-zeros. */ int netdev_get_etheraddr(const struct netdev *netdev, uint8_t mac[ETH_ADDR_LEN]) { return netdev->netdev_class->get_etheraddr(netdev, mac); } /* Returns the name of the network device that 'netdev' represents, * e.g. "eth0". The caller must not modify or free the returned string. */ const char * netdev_get_name(const struct netdev *netdev) { return netdev->name; } /* Retrieves the MTU of 'netdev'. The MTU is the maximum size of transmitted * (and received) packets, in bytes, not including the hardware header; thus, * this is typically 1500 bytes for Ethernet devices. * * If successful, returns 0 and stores the MTU size in '*mtup'. Returns * EOPNOTSUPP if 'netdev' does not have an MTU (as e.g. some tunnels do not). * On other failure, returns a positive errno value. On failure, sets '*mtup' * to 0. */ int netdev_get_mtu(const struct netdev *netdev, int *mtup) { const struct netdev_class *class = netdev->netdev_class; int error; error = class->get_mtu ? class->get_mtu(netdev, mtup) : EOPNOTSUPP; if (error) { *mtup = 0; if (error != EOPNOTSUPP) { VLOG_DBG_RL(&rl, "failed to retrieve MTU for network device %s: " "%s", netdev_get_name(netdev), ovs_strerror(error)); } } return error; } /* Sets the MTU of 'netdev'. The MTU is the maximum size of transmitted * (and received) packets, in bytes. * * If successful, returns 0. Returns EOPNOTSUPP if 'netdev' does not have an * MTU (as e.g. some tunnels do not). On other failure, returns a positive * errno value. */ int netdev_set_mtu(const struct netdev *netdev, int mtu) { const struct netdev_class *class = netdev->netdev_class; int error; error = class->set_mtu ? class->set_mtu(netdev, mtu) : EOPNOTSUPP; if (error && error != EOPNOTSUPP) { VLOG_DBG_RL(&rl, "failed to set MTU for network device %s: %s", netdev_get_name(netdev), ovs_strerror(error)); } return error; } /* Returns the ifindex of 'netdev', if successful, as a positive number. On * failure, returns a negative errno value. * * The desired semantics of the ifindex value are a combination of those * specified by POSIX for if_nametoindex() and by SNMP for ifIndex. An ifindex * value should be unique within a host and remain stable at least until * reboot. SNMP says an ifindex "ranges between 1 and the value of ifNumber" * but many systems do not follow this rule anyhow. * * Some network devices may not implement support for this function. In such * cases this function will always return -EOPNOTSUPP. */ int netdev_get_ifindex(const struct netdev *netdev) { int (*get_ifindex)(const struct netdev *); get_ifindex = netdev->netdev_class->get_ifindex; return get_ifindex ? get_ifindex(netdev) : -EOPNOTSUPP; } /* Stores the features supported by 'netdev' into each of '*current', * '*advertised', '*supported', and '*peer' that are non-null. Each value is a * bitmap of "enum ofp_port_features" bits, in host byte order. Returns 0 if * successful, otherwise a positive errno value. On failure, all of the * passed-in values are set to 0. * * Some network devices may not implement support for this function. In such * cases this function will always return EOPNOTSUPP. */ int netdev_get_features(const struct netdev *netdev, enum netdev_features *current, enum netdev_features *advertised, enum netdev_features *supported, enum netdev_features *peer) { int (*get_features)(const struct netdev *netdev, enum netdev_features *current, enum netdev_features *advertised, enum netdev_features *supported, enum netdev_features *peer); enum netdev_features dummy[4]; int error; if (!current) { current = &dummy[0]; } if (!advertised) { advertised = &dummy[1]; } if (!supported) { supported = &dummy[2]; } if (!peer) { peer = &dummy[3]; } get_features = netdev->netdev_class->get_features; error = get_features ? get_features(netdev, current, advertised, supported, peer) : EOPNOTSUPP; if (error) { *current = *advertised = *supported = *peer = 0; } return error; } /* Returns the maximum speed of a network connection that has the NETDEV_F_* * bits in 'features', in bits per second. If no bits that indicate a speed * are set in 'features', returns 'default_bps'. */ uint64_t netdev_features_to_bps(enum netdev_features features, uint64_t default_bps) { enum { F_1000000MB = NETDEV_F_1TB_FD, F_100000MB = NETDEV_F_100GB_FD, F_40000MB = NETDEV_F_40GB_FD, F_10000MB = NETDEV_F_10GB_FD, F_1000MB = NETDEV_F_1GB_HD | NETDEV_F_1GB_FD, F_100MB = NETDEV_F_100MB_HD | NETDEV_F_100MB_FD, F_10MB = NETDEV_F_10MB_HD | NETDEV_F_10MB_FD }; return ( features & F_1000000MB ? UINT64_C(1000000000000) : features & F_100000MB ? UINT64_C(100000000000) : features & F_40000MB ? UINT64_C(40000000000) : features & F_10000MB ? UINT64_C(10000000000) : features & F_1000MB ? UINT64_C(1000000000) : features & F_100MB ? UINT64_C(100000000) : features & F_10MB ? UINT64_C(10000000) : default_bps); } /* Returns true if any of the NETDEV_F_* bits that indicate a full-duplex link * are set in 'features', otherwise false. */ bool netdev_features_is_full_duplex(enum netdev_features features) { return (features & (NETDEV_F_10MB_FD | NETDEV_F_100MB_FD | NETDEV_F_1GB_FD | NETDEV_F_10GB_FD | NETDEV_F_40GB_FD | NETDEV_F_100GB_FD | NETDEV_F_1TB_FD)) != 0; } /* Set the features advertised by 'netdev' to 'advertise'. Returns 0 if * successful, otherwise a positive errno value. */ int netdev_set_advertisements(struct netdev *netdev, enum netdev_features advertise) { return (netdev->netdev_class->set_advertisements ? netdev->netdev_class->set_advertisements( netdev, advertise) : EOPNOTSUPP); } /* If 'netdev' has an assigned IPv4 address, sets '*address' to that address * and '*netmask' to its netmask and returns 0. Otherwise, returns a positive * errno value and sets '*address' to 0 (INADDR_ANY). * * The following error values have well-defined meanings: * * - EADDRNOTAVAIL: 'netdev' has no assigned IPv4 address. * * - EOPNOTSUPP: No IPv4 network stack attached to 'netdev'. * * 'address' or 'netmask' or both may be null, in which case the address or * netmask is not reported. */ int netdev_get_in4(const struct netdev *netdev, struct in_addr *address_, struct in_addr *netmask_) { struct in_addr address; struct in_addr netmask; int error; error = (netdev->netdev_class->get_in4 ? netdev->netdev_class->get_in4(netdev, &address, &netmask) : EOPNOTSUPP); if (address_) { address_->s_addr = error ? 0 : address.s_addr; } if (netmask_) { netmask_->s_addr = error ? 0 : netmask.s_addr; } return error; } /* Assigns 'addr' as 'netdev''s IPv4 address and 'mask' as its netmask. If * 'addr' is INADDR_ANY, 'netdev''s IPv4 address is cleared. Returns a * positive errno value. */ int netdev_set_in4(struct netdev *netdev, struct in_addr addr, struct in_addr mask) { return (netdev->netdev_class->set_in4 ? netdev->netdev_class->set_in4(netdev, addr, mask) : EOPNOTSUPP); } /* Obtains ad IPv4 address from device name and save the address in * in4. Returns 0 if successful, otherwise a positive errno value. */ int netdev_get_in4_by_name(const char *device_name, struct in_addr *in4) { struct netdev *netdev; int error; error = netdev_open(device_name, "system", &netdev); if (error) { in4->s_addr = htonl(0); return error; } error = netdev_get_in4(netdev, in4, NULL); netdev_close(netdev); return error; } /* Adds 'router' as a default IP gateway for the TCP/IP stack that corresponds * to 'netdev'. */ int netdev_add_router(struct netdev *netdev, struct in_addr router) { COVERAGE_INC(netdev_add_router); return (netdev->netdev_class->add_router ? netdev->netdev_class->add_router(netdev, router) : EOPNOTSUPP); } /* Looks up the next hop for 'host' for the TCP/IP stack that corresponds to * 'netdev'. If a route cannot not be determined, sets '*next_hop' to 0, * '*netdev_name' to null, and returns a positive errno value. Otherwise, if a * next hop is found, stores the next hop gateway's address (0 if 'host' is on * a directly connected network) in '*next_hop' and a copy of the name of the * device to reach 'host' in '*netdev_name', and returns 0. The caller is * responsible for freeing '*netdev_name' (by calling free()). */ int netdev_get_next_hop(const struct netdev *netdev, const struct in_addr *host, struct in_addr *next_hop, char **netdev_name) { int error = (netdev->netdev_class->get_next_hop ? netdev->netdev_class->get_next_hop( host, next_hop, netdev_name) : EOPNOTSUPP); if (error) { next_hop->s_addr = 0; *netdev_name = NULL; } return error; } /* Populates 'smap' with status information. * * Populates 'smap' with 'netdev' specific status information. This * information may be used to populate the status column of the Interface table * as defined in ovs-vswitchd.conf.db(5). */ int netdev_get_status(const struct netdev *netdev, struct smap *smap) { return (netdev->netdev_class->get_status ? netdev->netdev_class->get_status(netdev, smap) : EOPNOTSUPP); } /* If 'netdev' has an assigned IPv6 address, sets '*in6' to that address and * returns 0. Otherwise, returns a positive errno value and sets '*in6' to * all-zero-bits (in6addr_any). * * The following error values have well-defined meanings: * * - EADDRNOTAVAIL: 'netdev' has no assigned IPv6 address. * * - EOPNOTSUPP: No IPv6 network stack attached to 'netdev'. * * 'in6' may be null, in which case the address itself is not reported. */ int netdev_get_in6(const struct netdev *netdev, struct in6_addr *in6) { struct in6_addr dummy; int error; error = (netdev->netdev_class->get_in6 ? netdev->netdev_class->get_in6(netdev, in6 ? in6 : &dummy) : EOPNOTSUPP); if (error && in6) { memset(in6, 0, sizeof *in6); } return error; } /* On 'netdev', turns off the flags in 'off' and then turns on the flags in * 'on'. Returns 0 if successful, otherwise a positive errno value. */ static int do_update_flags(struct netdev *netdev, enum netdev_flags off, enum netdev_flags on, enum netdev_flags *old_flagsp, struct netdev_saved_flags **sfp) OVS_EXCLUDED(netdev_mutex) { struct netdev_saved_flags *sf = NULL; enum netdev_flags old_flags; int error; error = netdev->netdev_class->update_flags(netdev, off & ~on, on, &old_flags); if (error) { VLOG_WARN_RL(&rl, "failed to %s flags for network device %s: %s", off || on ? "set" : "get", netdev_get_name(netdev), ovs_strerror(error)); old_flags = 0; } else if ((off || on) && sfp) { enum netdev_flags new_flags = (old_flags & ~off) | on; enum netdev_flags changed_flags = old_flags ^ new_flags; if (changed_flags) { ovs_mutex_lock(&netdev_mutex); *sfp = sf = xmalloc(sizeof *sf); sf->netdev = netdev; list_push_front(&netdev->saved_flags_list, &sf->node); sf->saved_flags = changed_flags; sf->saved_values = changed_flags & new_flags; netdev->ref_cnt++; ovs_mutex_unlock(&netdev_mutex); } } if (old_flagsp) { *old_flagsp = old_flags; } if (sfp) { *sfp = sf; } return error; } /* Obtains the current flags for 'netdev' and stores them into '*flagsp'. * Returns 0 if successful, otherwise a positive errno value. On failure, * stores 0 into '*flagsp'. */ int netdev_get_flags(const struct netdev *netdev_, enum netdev_flags *flagsp) { struct netdev *netdev = CONST_CAST(struct netdev *, netdev_); return do_update_flags(netdev, 0, 0, flagsp, NULL); } /* Sets the flags for 'netdev' to 'flags'. * Returns 0 if successful, otherwise a positive errno value. */ int netdev_set_flags(struct netdev *netdev, enum netdev_flags flags, struct netdev_saved_flags **sfp) { return do_update_flags(netdev, -1, flags, NULL, sfp); } /* Turns on the specified 'flags' on 'netdev': * * - On success, returns 0. If 'sfp' is nonnull, sets '*sfp' to a newly * allocated 'struct netdev_saved_flags *' that may be passed to * netdev_restore_flags() to restore the original values of 'flags' on * 'netdev' (this will happen automatically at program termination if * netdev_restore_flags() is never called) , or to NULL if no flags were * actually changed. * * - On failure, returns a positive errno value. If 'sfp' is nonnull, sets * '*sfp' to NULL. */ int netdev_turn_flags_on(struct netdev *netdev, enum netdev_flags flags, struct netdev_saved_flags **sfp) { return do_update_flags(netdev, 0, flags, NULL, sfp); } /* Turns off the specified 'flags' on 'netdev'. See netdev_turn_flags_on() for * details of the interface. */ int netdev_turn_flags_off(struct netdev *netdev, enum netdev_flags flags, struct netdev_saved_flags **sfp) { return do_update_flags(netdev, flags, 0, NULL, sfp); } /* Restores the flags that were saved in 'sf', and destroys 'sf'. * Does nothing if 'sf' is NULL. */ void netdev_restore_flags(struct netdev_saved_flags *sf) OVS_EXCLUDED(netdev_mutex) { if (sf) { struct netdev *netdev = sf->netdev; enum netdev_flags old_flags; netdev->netdev_class->update_flags(netdev, sf->saved_flags & sf->saved_values, sf->saved_flags & ~sf->saved_values, &old_flags); ovs_mutex_lock(&netdev_mutex); list_remove(&sf->node); free(sf); netdev_unref(netdev); } } /* Looks up the ARP table entry for 'ip' on 'netdev'. If one exists and can be * successfully retrieved, it stores the corresponding MAC address in 'mac' and * returns 0. Otherwise, it returns a positive errno value; in particular, * ENXIO indicates that there is no ARP table entry for 'ip' on 'netdev'. */ int netdev_arp_lookup(const struct netdev *netdev, ovs_be32 ip, uint8_t mac[ETH_ADDR_LEN]) { int error = (netdev->netdev_class->arp_lookup ? netdev->netdev_class->arp_lookup(netdev, ip, mac) : EOPNOTSUPP); if (error) { memset(mac, 0, ETH_ADDR_LEN); } return error; } /* Returns true if carrier is active (link light is on) on 'netdev'. */ bool netdev_get_carrier(const struct netdev *netdev) { int error; enum netdev_flags flags; bool carrier; netdev_get_flags(netdev, &flags); if (!(flags & NETDEV_UP)) { return false; } if (!netdev->netdev_class->get_carrier) { return true; } error = netdev->netdev_class->get_carrier(netdev, &carrier); if (error) { VLOG_DBG("%s: failed to get network device carrier status, assuming " "down: %s", netdev_get_name(netdev), ovs_strerror(error)); carrier = false; } return carrier; } /* Returns the number of times 'netdev''s carrier has changed. */ long long int netdev_get_carrier_resets(const struct netdev *netdev) { return (netdev->netdev_class->get_carrier_resets ? netdev->netdev_class->get_carrier_resets(netdev) : 0); } /* Attempts to force netdev_get_carrier() to poll 'netdev''s MII registers for * link status instead of checking 'netdev''s carrier. 'netdev''s MII * registers will be polled once ever 'interval' milliseconds. If 'netdev' * does not support MII, another method may be used as a fallback. If * 'interval' is less than or equal to zero, reverts netdev_get_carrier() to * its normal behavior. * * Returns 0 if successful, otherwise a positive errno value. */ int netdev_set_miimon_interval(struct netdev *netdev, long long int interval) { return (netdev->netdev_class->set_miimon_interval ? netdev->netdev_class->set_miimon_interval(netdev, interval) : EOPNOTSUPP); } /* Retrieves current device stats for 'netdev'. */ int netdev_get_stats(const struct netdev *netdev, struct netdev_stats *stats) { int error; COVERAGE_INC(netdev_get_stats); error = (netdev->netdev_class->get_stats ? netdev->netdev_class->get_stats(netdev, stats) : EOPNOTSUPP); if (error) { memset(stats, 0xff, sizeof *stats); } return error; } /* Attempts to change the stats for 'netdev' to those provided in 'stats'. * Returns 0 if successful, otherwise a positive errno value. * * This will probably fail for most network devices. Some devices might only * allow setting their stats to 0. */ int netdev_set_stats(struct netdev *netdev, const struct netdev_stats *stats) { return (netdev->netdev_class->set_stats ? netdev->netdev_class->set_stats(netdev, stats) : EOPNOTSUPP); } /* Attempts to set input rate limiting (policing) policy, such that up to * 'kbits_rate' kbps of traffic is accepted, with a maximum accumulative burst * size of 'kbits' kb. */ int netdev_set_policing(struct netdev *netdev, uint32_t kbits_rate, uint32_t kbits_burst) { return (netdev->netdev_class->set_policing ? netdev->netdev_class->set_policing(netdev, kbits_rate, kbits_burst) : EOPNOTSUPP); } /* Adds to 'types' all of the forms of QoS supported by 'netdev', or leaves it * empty if 'netdev' does not support QoS. Any names added to 'types' should * be documented as valid for the "type" column in the "QoS" table in * vswitchd/vswitch.xml (which is built as ovs-vswitchd.conf.db(8)). * * Every network device supports disabling QoS with a type of "", but this type * will not be added to 'types'. * * The caller must initialize 'types' (e.g. with sset_init()) before calling * this function. The caller is responsible for destroying 'types' (e.g. with * sset_destroy()) when it is no longer needed. * * Returns 0 if successful, otherwise a positive errno value. */ int netdev_get_qos_types(const struct netdev *netdev, struct sset *types) { const struct netdev_class *class = netdev->netdev_class; return (class->get_qos_types ? class->get_qos_types(netdev, types) : 0); } /* Queries 'netdev' for its capabilities regarding the specified 'type' of QoS, * which should be "" or one of the types returned by netdev_get_qos_types() * for 'netdev'. Returns 0 if successful, otherwise a positive errno value. * On success, initializes 'caps' with the QoS capabilities; on failure, clears * 'caps' to all zeros. */ int netdev_get_qos_capabilities(const struct netdev *netdev, const char *type, struct netdev_qos_capabilities *caps) { const struct netdev_class *class = netdev->netdev_class; if (*type) { int retval = (class->get_qos_capabilities ? class->get_qos_capabilities(netdev, type, caps) : EOPNOTSUPP); if (retval) { memset(caps, 0, sizeof *caps); } return retval; } else { /* Every netdev supports turning off QoS. */ memset(caps, 0, sizeof *caps); return 0; } } /* Obtains the number of queues supported by 'netdev' for the specified 'type' * of QoS. Returns 0 if successful, otherwise a positive errno value. Stores * the number of queues (zero on failure) in '*n_queuesp'. * * This is just a simple wrapper around netdev_get_qos_capabilities(). */ int netdev_get_n_queues(const struct netdev *netdev, const char *type, unsigned int *n_queuesp) { struct netdev_qos_capabilities caps; int retval; retval = netdev_get_qos_capabilities(netdev, type, &caps); *n_queuesp = caps.n_queues; return retval; } /* Queries 'netdev' about its currently configured form of QoS. If successful, * stores the name of the current form of QoS into '*typep', stores any details * of configuration as string key-value pairs in 'details', and returns 0. On * failure, sets '*typep' to NULL and returns a positive errno value. * * A '*typep' of "" indicates that QoS is currently disabled on 'netdev'. * * The caller must initialize 'details' as an empty smap (e.g. with * smap_init()) before calling this function. The caller must free 'details' * when it is no longer needed (e.g. with smap_destroy()). * * The caller must not modify or free '*typep'. * * '*typep' will be one of the types returned by netdev_get_qos_types() for * 'netdev'. The contents of 'details' should be documented as valid for * '*typep' in the "other_config" column in the "QoS" table in * vswitchd/vswitch.xml (which is built as ovs-vswitchd.conf.db(8)). */ int netdev_get_qos(const struct netdev *netdev, const char **typep, struct smap *details) { const struct netdev_class *class = netdev->netdev_class; int retval; if (class->get_qos) { retval = class->get_qos(netdev, typep, details); if (retval) { *typep = NULL; smap_clear(details); } return retval; } else { /* 'netdev' doesn't support QoS, so report that QoS is disabled. */ *typep = ""; return 0; } } /* Attempts to reconfigure QoS on 'netdev', changing the form of QoS to 'type' * with details of configuration from 'details'. Returns 0 if successful, * otherwise a positive errno value. On error, the previous QoS configuration * is retained. * * When this function changes the type of QoS (not just 'details'), this also * resets all queue configuration for 'netdev' to their defaults (which depend * on the specific type of QoS). Otherwise, the queue configuration for * 'netdev' is unchanged. * * 'type' should be "" (to disable QoS) or one of the types returned by * netdev_get_qos_types() for 'netdev'. The contents of 'details' should be * documented as valid for the given 'type' in the "other_config" column in the * "QoS" table in vswitchd/vswitch.xml (which is built as * ovs-vswitchd.conf.db(8)). * * NULL may be specified for 'details' if there are no configuration * details. */ int netdev_set_qos(struct netdev *netdev, const char *type, const struct smap *details) { const struct netdev_class *class = netdev->netdev_class; if (!type) { type = ""; } if (class->set_qos) { if (!details) { static const struct smap empty = SMAP_INITIALIZER(&empty); details = ∅ } return class->set_qos(netdev, type, details); } else { return *type ? EOPNOTSUPP : 0; } } /* Queries 'netdev' for information about the queue numbered 'queue_id'. If * successful, adds that information as string key-value pairs to 'details'. * Returns 0 if successful, otherwise a positive errno value. * * 'queue_id' must be less than the number of queues supported by 'netdev' for * the current form of QoS (e.g. as returned by netdev_get_n_queues(netdev)). * * The returned contents of 'details' should be documented as valid for the * given 'type' in the "other_config" column in the "Queue" table in * vswitchd/vswitch.xml (which is built as ovs-vswitchd.conf.db(8)). * * The caller must initialize 'details' (e.g. with smap_init()) before calling * this function. The caller must free 'details' when it is no longer needed * (e.g. with smap_destroy()). */ int netdev_get_queue(const struct netdev *netdev, unsigned int queue_id, struct smap *details) { const struct netdev_class *class = netdev->netdev_class; int retval; retval = (class->get_queue ? class->get_queue(netdev, queue_id, details) : EOPNOTSUPP); if (retval) { smap_clear(details); } return retval; } /* Configures the queue numbered 'queue_id' on 'netdev' with the key-value * string pairs in 'details'. The contents of 'details' should be documented * as valid for the given 'type' in the "other_config" column in the "Queue" * table in vswitchd/vswitch.xml (which is built as ovs-vswitchd.conf.db(8)). * Returns 0 if successful, otherwise a positive errno value. On failure, the * given queue's configuration should be unmodified. * * 'queue_id' must be less than the number of queues supported by 'netdev' for * the current form of QoS (e.g. as returned by netdev_get_n_queues(netdev)). * * This function does not modify 'details', and the caller retains ownership of * it. */ int netdev_set_queue(struct netdev *netdev, unsigned int queue_id, const struct smap *details) { const struct netdev_class *class = netdev->netdev_class; return (class->set_queue ? class->set_queue(netdev, queue_id, details) : EOPNOTSUPP); } /* Attempts to delete the queue numbered 'queue_id' from 'netdev'. Some kinds * of QoS may have a fixed set of queues, in which case attempts to delete them * will fail with EOPNOTSUPP. * * Returns 0 if successful, otherwise a positive errno value. On failure, the * given queue will be unmodified. * * 'queue_id' must be less than the number of queues supported by 'netdev' for * the current form of QoS (e.g. as returned by * netdev_get_n_queues(netdev)). */ int netdev_delete_queue(struct netdev *netdev, unsigned int queue_id) { const struct netdev_class *class = netdev->netdev_class; return (class->delete_queue ? class->delete_queue(netdev, queue_id) : EOPNOTSUPP); } /* Obtains statistics about 'queue_id' on 'netdev'. On success, returns 0 and * fills 'stats' with the queue's statistics; individual members of 'stats' may * be set to all-1-bits if the statistic is unavailable. On failure, returns a * positive errno value and fills 'stats' with values indicating unsupported * statistics. */ int netdev_get_queue_stats(const struct netdev *netdev, unsigned int queue_id, struct netdev_queue_stats *stats) { const struct netdev_class *class = netdev->netdev_class; int retval; retval = (class->get_queue_stats ? class->get_queue_stats(netdev, queue_id, stats) : EOPNOTSUPP); if (retval) { stats->tx_bytes = UINT64_MAX; stats->tx_packets = UINT64_MAX; stats->tx_errors = UINT64_MAX; stats->created = LLONG_MIN; } return retval; } /* Initializes 'dump' to begin dumping the queues in a netdev. * * This function provides no status indication. An error status for the entire * dump operation is provided when it is completed by calling * netdev_queue_dump_done(). */ void netdev_queue_dump_start(struct netdev_queue_dump *dump, const struct netdev *netdev) { dump->netdev = netdev_ref(netdev); if (netdev->netdev_class->queue_dump_start) { dump->error = netdev->netdev_class->queue_dump_start(netdev, &dump->state); } else { dump->error = EOPNOTSUPP; } } /* Attempts to retrieve another queue from 'dump', which must have been * initialized with netdev_queue_dump_start(). On success, stores a new queue * ID into '*queue_id', fills 'details' with configuration details for the * queue, and returns true. On failure, returns false. * * Queues are not necessarily dumped in increasing order of queue ID (or any * other predictable order). * * Failure might indicate an actual error or merely that the last queue has * been dumped. An error status for the entire dump operation is provided when * it is completed by calling netdev_queue_dump_done(). * * The returned contents of 'details' should be documented as valid for the * given 'type' in the "other_config" column in the "Queue" table in * vswitchd/vswitch.xml (which is built as ovs-vswitchd.conf.db(8)). * * The caller must initialize 'details' (e.g. with smap_init()) before calling * this function. This function will clear and replace its contents. The * caller must free 'details' when it is no longer needed (e.g. with * smap_destroy()). */ bool netdev_queue_dump_next(struct netdev_queue_dump *dump, unsigned int *queue_id, struct smap *details) { const struct netdev *netdev = dump->netdev; if (dump->error) { return false; } dump->error = netdev->netdev_class->queue_dump_next(netdev, dump->state, queue_id, details); if (dump->error) { netdev->netdev_class->queue_dump_done(netdev, dump->state); return false; } return true; } /* Completes queue table dump operation 'dump', which must have been * initialized with netdev_queue_dump_start(). Returns 0 if the dump operation * was error-free, otherwise a positive errno value describing the problem. */ int netdev_queue_dump_done(struct netdev_queue_dump *dump) { const struct netdev *netdev = dump->netdev; if (!dump->error && netdev->netdev_class->queue_dump_done) { dump->error = netdev->netdev_class->queue_dump_done(netdev, dump->state); } netdev_close(dump->netdev); return dump->error == EOF ? 0 : dump->error; } /* Iterates over all of 'netdev''s queues, calling 'cb' with the queue's ID, * its statistics, and the 'aux' specified by the caller. The order of * iteration is unspecified, but (when successful) each queue is visited * exactly once. * * Calling this function may be more efficient than calling * netdev_get_queue_stats() for every queue. * * 'cb' must not modify or free the statistics passed in. * * Returns 0 if successful, otherwise a positive errno value. On error, some * configured queues may not have been included in the iteration. */ int netdev_dump_queue_stats(const struct netdev *netdev, netdev_dump_queue_stats_cb *cb, void *aux) { const struct netdev_class *class = netdev->netdev_class; return (class->dump_queue_stats ? class->dump_queue_stats(netdev, cb, aux) : EOPNOTSUPP); } /* Returns a sequence number which indicates changes in one of 'netdev''s * properties. The returned sequence will be nonzero so that callers have a * value which they may use as a reset when tracking 'netdev'. * * The returned sequence number will change whenever 'netdev''s flags, * features, ethernet address, or carrier changes. It may change for other * reasons as well, or no reason at all. */ unsigned int netdev_change_seq(const struct netdev *netdev) { return netdev->netdev_class->change_seq(netdev); } /* Returns the class type of 'netdev'. * * The caller must not free the returned value. */ const char * netdev_get_type(const struct netdev *netdev) { return netdev->netdev_class->type; } /* Returns the class associated with 'netdev'. */ const struct netdev_class * netdev_get_class(const struct netdev *netdev) { return netdev->netdev_class; } /* Returns the netdev with 'name' or NULL if there is none. * * The caller must free the returned netdev with netdev_close(). */ struct netdev * netdev_from_name(const char *name) OVS_EXCLUDED(netdev_mutex) { struct netdev *netdev; ovs_mutex_lock(&netdev_mutex); netdev = shash_find_data(&netdev_shash, name); if (netdev) { netdev->ref_cnt++; } ovs_mutex_unlock(&netdev_mutex); return netdev; } /* Fills 'device_list' with devices that match 'netdev_class'. * * The caller is responsible for initializing and destroying 'device_list' and * must close each device on the list. */ void netdev_get_devices(const struct netdev_class *netdev_class, struct shash *device_list) OVS_EXCLUDED(netdev_mutex) { struct shash_node *node; ovs_mutex_lock(&netdev_mutex); SHASH_FOR_EACH (node, &netdev_shash) { struct netdev *dev = node->data; if (dev->netdev_class == netdev_class) { dev->ref_cnt++; shash_add(device_list, node->name, node->data); } } ovs_mutex_unlock(&netdev_mutex); } const char * netdev_get_type_from_name(const char *name) { struct netdev *dev = netdev_from_name(name); const char *type = dev ? netdev_get_type(dev) : NULL; netdev_close(dev); return type; } struct netdev * netdev_rx_get_netdev(const struct netdev_rx *rx) { ovs_assert(rx->netdev->ref_cnt > 0); return rx->netdev; } const char * netdev_rx_get_name(const struct netdev_rx *rx) { return netdev_get_name(netdev_rx_get_netdev(rx)); } static void restore_all_flags(void *aux OVS_UNUSED) { struct shash_node *node; SHASH_FOR_EACH (node, &netdev_shash) { struct netdev *netdev = node->data; const struct netdev_saved_flags *sf; enum netdev_flags saved_values; enum netdev_flags saved_flags; saved_values = saved_flags = 0; LIST_FOR_EACH (sf, node, &netdev->saved_flags_list) { saved_flags |= sf->saved_flags; saved_values &= ~sf->saved_flags; saved_values |= sf->saved_flags & sf->saved_values; } if (saved_flags) { enum netdev_flags old_flags; netdev->netdev_class->update_flags(netdev, saved_flags & saved_values, saved_flags & ~saved_values, &old_flags); } } } openvswitch-2.0.1+git20140120/lib/netdev.h000066400000000000000000000273271226605124000176710ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef NETDEV_H #define NETDEV_H 1 #include #include #include #include "openvswitch/types.h" #ifdef __cplusplus extern "C" { #endif /* Generic interface to network devices ("netdev"s). * * Every port on a switch must have a corresponding netdev that must minimally * support a few operations, such as the ability to read the netdev's MTU. * The PORTING file at the top of the source tree has more information in the * "Writing a netdev Provider" section. * * Thread-safety * ============= * * Most of the netdev functions are fully thread-safe: they may be called from * any number of threads on the same or different netdev objects. The * exceptions are: * * netdev_rx_recv() * netdev_rx_wait() * netdev_rx_drain() * * These functions are conditionally thread-safe: they may be called from * different threads only on different netdev_rx objects. (The client may * create multiple netdev_rx objects for a single netdev and access each * of those from a different thread.) * * NETDEV_FOR_EACH_QUEUE * netdev_queue_dump_next() * netdev_queue_dump_done() * * These functions are conditionally thread-safe: they may be called from * different threads only on different netdev_queue_dump objects. (The * client may create multiple netdev_queue_dump objects for a single * netdev and access each of those from a different thread.) */ struct netdev; struct netdev_class; struct netdev_rx; struct netdev_saved_flags; struct ofpbuf; struct in_addr; struct in6_addr; struct smap; struct sset; /* Network device statistics. * * Values of unsupported statistics are set to all-1-bits (UINT64_MAX). */ struct netdev_stats { uint64_t rx_packets; /* Total packets received. */ uint64_t tx_packets; /* Total packets transmitted. */ uint64_t rx_bytes; /* Total bytes received. */ uint64_t tx_bytes; /* Total bytes transmitted. */ uint64_t rx_errors; /* Bad packets received. */ uint64_t tx_errors; /* Packet transmit problems. */ uint64_t rx_dropped; /* No buffer space. */ uint64_t tx_dropped; /* No buffer space. */ uint64_t multicast; /* Multicast packets received. */ uint64_t collisions; /* Detailed receive errors. */ uint64_t rx_length_errors; uint64_t rx_over_errors; /* Receiver ring buff overflow. */ uint64_t rx_crc_errors; /* Recved pkt with crc error. */ uint64_t rx_frame_errors; /* Recv'd frame alignment error. */ uint64_t rx_fifo_errors; /* Recv'r fifo overrun . */ uint64_t rx_missed_errors; /* Receiver missed packet. */ /* Detailed transmit errors. */ uint64_t tx_aborted_errors; uint64_t tx_carrier_errors; uint64_t tx_fifo_errors; uint64_t tx_heartbeat_errors; uint64_t tx_window_errors; }; /* Configuration specific to tunnels. */ struct netdev_tunnel_config { bool in_key_present; bool in_key_flow; ovs_be64 in_key; bool out_key_present; bool out_key_flow; ovs_be64 out_key; ovs_be16 dst_port; bool ip_src_flow; bool ip_dst_flow; ovs_be32 ip_src; ovs_be32 ip_dst; uint8_t ttl; bool ttl_inherit; uint8_t tos; bool tos_inherit; bool csum; bool ipsec; bool dont_fragment; }; void netdev_run(void); void netdev_wait(void); void netdev_enumerate_types(struct sset *types); bool netdev_is_reserved_name(const char *name); /* Open and close. */ int netdev_open(const char *name, const char *type, struct netdev **); struct netdev *netdev_ref(const struct netdev *); void netdev_close(struct netdev *); void netdev_parse_name(const char *netdev_name, char **name, char **type); /* Options. */ int netdev_set_config(struct netdev *, const struct smap *args); int netdev_get_config(const struct netdev *, struct smap *); const struct netdev_tunnel_config * netdev_get_tunnel_config(const struct netdev *); /* Basic properties. */ const char *netdev_get_name(const struct netdev *); const char *netdev_get_type(const struct netdev *); const char *netdev_get_type_from_name(const char *); int netdev_get_mtu(const struct netdev *, int *mtup); int netdev_set_mtu(const struct netdev *, int mtu); int netdev_get_ifindex(const struct netdev *); /* Packet reception. */ int netdev_rx_open(struct netdev *, struct netdev_rx **); void netdev_rx_close(struct netdev_rx *); const char *netdev_rx_get_name(const struct netdev_rx *); int netdev_rx_recv(struct netdev_rx *, struct ofpbuf *); void netdev_rx_wait(struct netdev_rx *); int netdev_rx_drain(struct netdev_rx *); /* Packet transmission. */ int netdev_send(struct netdev *, const struct ofpbuf *); void netdev_send_wait(struct netdev *); /* Hardware address. */ int netdev_set_etheraddr(struct netdev *, const uint8_t mac[6]); int netdev_get_etheraddr(const struct netdev *, uint8_t mac[6]); /* PHY interface. */ bool netdev_get_carrier(const struct netdev *); long long int netdev_get_carrier_resets(const struct netdev *); int netdev_set_miimon_interval(struct netdev *, long long int interval); /* Features. */ enum netdev_features { NETDEV_F_10MB_HD = 1 << 0, /* 10 Mb half-duplex rate support. */ NETDEV_F_10MB_FD = 1 << 1, /* 10 Mb full-duplex rate support. */ NETDEV_F_100MB_HD = 1 << 2, /* 100 Mb half-duplex rate support. */ NETDEV_F_100MB_FD = 1 << 3, /* 100 Mb full-duplex rate support. */ NETDEV_F_1GB_HD = 1 << 4, /* 1 Gb half-duplex rate support. */ NETDEV_F_1GB_FD = 1 << 5, /* 1 Gb full-duplex rate support. */ NETDEV_F_10GB_FD = 1 << 6, /* 10 Gb full-duplex rate support. */ NETDEV_F_40GB_FD = 1 << 7, /* 40 Gb full-duplex rate support. */ NETDEV_F_100GB_FD = 1 << 8, /* 100 Gb full-duplex rate support. */ NETDEV_F_1TB_FD = 1 << 9, /* 1 Tb full-duplex rate support. */ NETDEV_F_OTHER = 1 << 10, /* Other rate, not in the list. */ NETDEV_F_COPPER = 1 << 11, /* Copper medium. */ NETDEV_F_FIBER = 1 << 12, /* Fiber medium. */ NETDEV_F_AUTONEG = 1 << 13, /* Auto-negotiation. */ NETDEV_F_PAUSE = 1 << 14, /* Pause. */ NETDEV_F_PAUSE_ASYM = 1 << 15, /* Asymmetric pause. */ }; int netdev_get_features(const struct netdev *, enum netdev_features *current, enum netdev_features *advertised, enum netdev_features *supported, enum netdev_features *peer); uint64_t netdev_features_to_bps(enum netdev_features features, uint64_t default_bps); bool netdev_features_is_full_duplex(enum netdev_features features); int netdev_set_advertisements(struct netdev *, enum netdev_features advertise); /* Flags. */ enum netdev_flags { NETDEV_UP = 0x0001, /* Device enabled? */ NETDEV_PROMISC = 0x0002, /* Promiscuous mode? */ NETDEV_LOOPBACK = 0x0004 /* This is a loopback device. */ }; int netdev_get_flags(const struct netdev *, enum netdev_flags *); int netdev_set_flags(struct netdev *, enum netdev_flags, struct netdev_saved_flags **); int netdev_turn_flags_on(struct netdev *, enum netdev_flags, struct netdev_saved_flags **); int netdev_turn_flags_off(struct netdev *, enum netdev_flags, struct netdev_saved_flags **); void netdev_restore_flags(struct netdev_saved_flags *); /* TCP/IP stack interface. */ int netdev_get_in4(const struct netdev *, struct in_addr *address, struct in_addr *netmask); int netdev_set_in4(struct netdev *, struct in_addr addr, struct in_addr mask); int netdev_get_in4_by_name(const char *device_name, struct in_addr *in4); int netdev_get_in6(const struct netdev *, struct in6_addr *); int netdev_add_router(struct netdev *, struct in_addr router); int netdev_get_next_hop(const struct netdev *, const struct in_addr *host, struct in_addr *next_hop, char **); int netdev_get_status(const struct netdev *, struct smap *); int netdev_arp_lookup(const struct netdev *, ovs_be32 ip, uint8_t mac[6]); struct netdev *netdev_find_dev_by_in4(const struct in_addr *); /* Statistics. */ int netdev_get_stats(const struct netdev *, struct netdev_stats *); int netdev_set_stats(struct netdev *, const struct netdev_stats *); /* Quality of service. */ struct netdev_qos_capabilities { unsigned int n_queues; }; struct netdev_queue_stats { /* Values of unsupported statistics are set to all-1-bits (UINT64_MAX). */ uint64_t tx_bytes; uint64_t tx_packets; uint64_t tx_errors; /* Time at which the queue was created, in msecs, LLONG_MIN if unknown. */ long long int created; }; int netdev_set_policing(struct netdev *, uint32_t kbits_rate, uint32_t kbits_burst); int netdev_get_qos_types(const struct netdev *, struct sset *types); int netdev_get_qos_capabilities(const struct netdev *, const char *type, struct netdev_qos_capabilities *); int netdev_get_n_queues(const struct netdev *, const char *type, unsigned int *n_queuesp); int netdev_get_qos(const struct netdev *, const char **typep, struct smap *details); int netdev_set_qos(struct netdev *, const char *type, const struct smap *details); int netdev_get_queue(const struct netdev *, unsigned int queue_id, struct smap *details); int netdev_set_queue(struct netdev *, unsigned int queue_id, const struct smap *details); int netdev_delete_queue(struct netdev *, unsigned int queue_id); int netdev_get_queue_stats(const struct netdev *, unsigned int queue_id, struct netdev_queue_stats *); struct netdev_queue_dump { struct netdev *netdev; int error; void *state; }; void netdev_queue_dump_start(struct netdev_queue_dump *, const struct netdev *); bool netdev_queue_dump_next(struct netdev_queue_dump *, unsigned int *queue_id, struct smap *details); int netdev_queue_dump_done(struct netdev_queue_dump *); /* Iterates through each queue in NETDEV, using DUMP as state. Fills QUEUE_ID * and DETAILS with information about queues. The client must initialize and * destroy DETAILS. * * Arguments all have pointer type. * * If you break out of the loop, then you need to free the dump structure by * hand using netdev_queue_dump_done(). */ #define NETDEV_QUEUE_FOR_EACH(QUEUE_ID, DETAILS, DUMP, NETDEV) \ for (netdev_queue_dump_start(DUMP, NETDEV); \ (netdev_queue_dump_next(DUMP, QUEUE_ID, DETAILS) \ ? true \ : (netdev_queue_dump_done(DUMP), false)); \ ) typedef void netdev_dump_queue_stats_cb(unsigned int queue_id, struct netdev_queue_stats *, void *aux); int netdev_dump_queue_stats(const struct netdev *, netdev_dump_queue_stats_cb *, void *aux); unsigned int netdev_change_seq(const struct netdev *netdev); #ifdef __cplusplus } #endif #endif /* netdev.h */ openvswitch-2.0.1+git20140120/lib/netflow.h000066400000000000000000000065111226605124000200520ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef NETFLOW_H #define NETFLOW_H 1 /* NetFlow v5 protocol definitions. */ #include #include "openvswitch/types.h" #include "util.h" #define NETFLOW_V5_VERSION 5 /* Every NetFlow v5 message contains the header that follows. This is * followed by up to thirty records that describe a terminating flow. * We only send a single record per NetFlow message. */ struct netflow_v5_header { ovs_be16 version; /* NetFlow version is 5. */ ovs_be16 count; /* Number of records in this message. */ ovs_be32 sysuptime; /* System uptime in milliseconds. */ ovs_be32 unix_secs; /* Number of seconds since Unix epoch. */ ovs_be32 unix_nsecs; /* Number of residual nanoseconds after epoch seconds. */ ovs_be32 flow_seq; /* Number of flows since sending messages began. */ uint8_t engine_type; /* Engine type. */ uint8_t engine_id; /* Engine id. */ ovs_be16 sampling_interval; /* Set to zero. */ }; BUILD_ASSERT_DECL(sizeof(struct netflow_v5_header) == 24); /* A NetFlow v5 description of a terminating flow. It is preceded by a * NetFlow v5 header. */ struct netflow_v5_record { ovs_be32 src_addr; /* Source IP address. */ ovs_be32 dst_addr; /* Destination IP address. */ ovs_be32 nexthop; /* IP address of next hop. Set to 0. */ ovs_be16 input; /* Input interface index. */ ovs_be16 output; /* Output interface index. */ ovs_be32 packet_count; /* Number of packets. */ ovs_be32 byte_count; /* Number of bytes. */ ovs_be32 init_time; /* Value of sysuptime on first packet. */ ovs_be32 used_time; /* Value of sysuptime on last packet. */ /* The 'src_port' and 'dst_port' identify the source and destination * port, respectively, for TCP and UDP. For ICMP, the high-order * byte identifies the type and low-order byte identifies the code * in the 'dst_port' field. */ ovs_be16 src_port; ovs_be16 dst_port; uint8_t pad1; uint8_t tcp_flags; /* Union of seen TCP flags. */ uint8_t ip_proto; /* IP protocol. */ uint8_t ip_tos; /* IP TOS value. */ ovs_be16 src_as; /* Source AS ID. Set to 0. */ ovs_be16 dst_as; /* Destination AS ID. Set to 0. */ uint8_t src_mask; /* Source mask bits. Set to 0. */ uint8_t dst_mask; /* Destination mask bits. Set to 0. */ uint8_t pad[2]; }; BUILD_ASSERT_DECL(sizeof(struct netflow_v5_record) == 48); #endif /* lib/netflow.h */ openvswitch-2.0.1+git20140120/lib/netlink-notifier.c000066400000000000000000000141051226605124000216460ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "netlink-notifier.h" #include #include #include #include "coverage.h" #include "netlink.h" #include "netlink-socket.h" #include "ofpbuf.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(netlink_notifier); COVERAGE_DEFINE(nln_changed); static void nln_report(struct nln *nln, void *change); struct nln { struct nl_sock *notify_sock; /* Netlink socket. */ struct list all_notifiers; /* All nln notifiers. */ bool has_run; /* Guard for run and wait functions. */ /* Passed in by nln_create(). */ int multicast_group; /* Multicast group we listen on. */ int protocol; /* Protocol passed to nl_sock_create(). */ nln_parse_func *parse; /* Message parsing function. */ void *change; /* Change passed to parse. */ }; struct nln_notifier { struct nln *nln; /* Parent nln. */ struct list node; nln_notify_func *cb; void *aux; }; /* Creates an nln handle which may be used to manage change notifications. The * created handle will listen for netlink messages on 'multicast_group' using * netlink protocol 'protocol' (e.g. NETLINK_ROUTE, NETLINK_GENERIC, ...). * Incoming messages will be parsed with 'parse' which will be passed 'change' * as an argument. */ struct nln * nln_create(int protocol, int multicast_group, nln_parse_func *parse, void *change) { struct nln *nln; nln = xzalloc(sizeof *nln); nln->notify_sock = NULL; nln->protocol = protocol; nln->multicast_group = multicast_group; nln->parse = parse; nln->change = change; nln->has_run = false; list_init(&nln->all_notifiers); return nln; } /* Destroys 'nln' by freeing any memory it has reserved and closing any sockets * it has opened. * * The caller is responsible for destroying any notifiers created by this * 'nln' before destroying 'nln'. */ void nln_destroy(struct nln *nln) { if (nln) { ovs_assert(list_is_empty(&nln->all_notifiers)); nl_sock_destroy(nln->notify_sock); free(nln); } } /* Registers 'cb' to be called with auxiliary data 'aux' with change * notifications. The notifier is stored in 'notifier', which the caller must * not modify or free. * * This is probably not the function you want. You should probably be using * message specific notifiers like rtnetlink_link_notifier_register(). * * Returns an initialized nln_notifier if successful, otherwise NULL. */ struct nln_notifier * nln_notifier_create(struct nln *nln, nln_notify_func *cb, void *aux) { struct nln_notifier *notifier; if (!nln->notify_sock) { struct nl_sock *sock; int error; error = nl_sock_create(nln->protocol, &sock); if (!error) { error = nl_sock_join_mcgroup(sock, nln->multicast_group); } if (error) { nl_sock_destroy(sock); VLOG_WARN("could not create netlink socket: %s", ovs_strerror(error)); return NULL; } nln->notify_sock = sock; } else { /* Catch up on notification work so that the new notifier won't * receive any stale notifications. */ nln_run(nln); } notifier = xmalloc(sizeof *notifier); list_push_back(&nln->all_notifiers, ¬ifier->node); notifier->cb = cb; notifier->aux = aux; notifier->nln = nln; return notifier; } /* Destroys 'notifier', which must have previously been created with * nln_notifier_register(). */ void nln_notifier_destroy(struct nln_notifier *notifier) { if (notifier) { struct nln *nln = notifier->nln; list_remove(¬ifier->node); if (list_is_empty(&nln->all_notifiers)) { nl_sock_destroy(nln->notify_sock); nln->notify_sock = NULL; } free(notifier); } } /* Calls all of the registered notifiers, passing along any as-yet-unreported * change events. */ void nln_run(struct nln *nln) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); if (!nln->notify_sock || nln->has_run) { return; } nln->has_run = true; for (;;) { uint64_t buf_stub[4096 / 8]; struct ofpbuf buf; int error; ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); error = nl_sock_recv(nln->notify_sock, &buf, false); if (!error) { if (nln->parse(&buf, nln->change)) { nln_report(nln, nln->change); } else { VLOG_WARN_RL(&rl, "received bad netlink message"); nln_report(nln, NULL); } ofpbuf_uninit(&buf); } else if (error == EAGAIN) { return; } else { if (error == ENOBUFS) { VLOG_WARN_RL(&rl, "netlink receive buffer overflowed"); } else { VLOG_WARN_RL(&rl, "error reading netlink socket: %s", ovs_strerror(error)); } nln_report(nln, NULL); } } } /* Causes poll_block() to wake up when change notifications are ready. */ void nln_wait(struct nln *nln) { nln->has_run = false; if (nln->notify_sock) { nl_sock_wait(nln->notify_sock, POLLIN); } } static void nln_report(struct nln *nln, void *change) { struct nln_notifier *notifier; if (change) { COVERAGE_INC(nln_changed); } LIST_FOR_EACH (notifier, node, &nln->all_notifiers) { notifier->cb(change, notifier->aux); } } openvswitch-2.0.1+git20140120/lib/netlink-notifier.h000066400000000000000000000035011226605124000216510ustar00rootroot00000000000000/* * Copyright (c) 2009 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef NETLINK_NOTIFIER_H #define NETLINK_NOTIFIER_H 1 /* These functions are Linux specific, so they should be used directly only by * Linux-specific code. */ #include "list.h" struct nln; struct nln_notifier; struct nlattr; struct ofpbuf; /* Function called to report netlink notifications. 'change' describes the * specific change filled out by an nln_parse_func. It may be null if the * buffer of change information overflowed, in which case the function must * assume that everything may have changed. 'aux' is as specified in * nln_notifier_register(). */ typedef void nln_notify_func(const void *change, void *aux); /* Function called to parse incoming nln notifications. The 'buf' message * should be parsed into 'change' as specified in nln_create(). */ typedef bool nln_parse_func(struct ofpbuf *buf, void *change); struct nln *nln_create(int protocol, int multicast_group, nln_parse_func *, void *change); void nln_destroy(struct nln *); struct nln_notifier *nln_notifier_create(struct nln *, nln_notify_func *, void *aux); void nln_notifier_destroy(struct nln_notifier *); void nln_run(struct nln *); void nln_wait(struct nln *); #endif /* netlink-notifier.h */ openvswitch-2.0.1+git20140120/lib/netlink-protocol.h000066400000000000000000000114631226605124000217010ustar00rootroot00000000000000/* * Copyright (c) 2008, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef NETLINK_PROTOCOL_H #define NETLINK_PROTOCOL_H 1 /* Netlink protocol definitions. * * Netlink is a message framing format described in RFC 3549 and used heavily * in Linux to access the network stack. Open vSwitch uses AF_NETLINK sockets * for this purpose on Linux. But on all platforms, Open vSwitch uses Netlink * message framing internally for certain purposes. * * This header provides access to the Netlink message framing definitions * regardless of platform. On Linux, it includes the proper headers directly; * on other platforms it directly defines the structures and macros itself. */ #include #include #include "util.h" #ifdef HAVE_NETLINK #include #include #else #define NETLINK_GENERIC 16 struct sockaddr_nl { sa_family_t nl_family; unsigned short int nl_pad; uint32_t nl_pid; uint32_t nl_groups; }; BUILD_ASSERT_DECL(sizeof(struct sockaddr_nl) == 12); /* nlmsg_flags bits. */ #define NLM_F_REQUEST 0x001 #define NLM_F_MULTI 0x002 #define NLM_F_ACK 0x004 #define NLM_F_ECHO 0x008 #define NLM_F_ROOT 0x100 #define NLM_F_MATCH 0x200 #define NLM_F_ATOMIC 0x400 #define NLM_F_DUMP (NLM_F_ROOT | NLM_F_MATCH) /* nlmsg_type values. */ #define NLMSG_NOOP 1 #define NLMSG_ERROR 2 #define NLMSG_DONE 3 #define NLMSG_OVERRUN 4 #define NLMSG_MIN_TYPE 0x10 struct nlmsghdr { uint32_t nlmsg_len; uint16_t nlmsg_type; uint16_t nlmsg_flags; uint32_t nlmsg_seq; uint32_t nlmsg_pid; }; BUILD_ASSERT_DECL(sizeof(struct nlmsghdr) == 16); #define NLMSG_ALIGNTO 4 #define NLMSG_ALIGN(SIZE) ROUND_UP(SIZE, NLMSG_ALIGNTO) #define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) struct nlmsgerr { int error; struct nlmsghdr msg; }; BUILD_ASSERT_DECL(sizeof(struct nlmsgerr) == 20); struct genlmsghdr { uint8_t cmd; uint8_t version; uint16_t reserved; }; BUILD_ASSERT_DECL(sizeof(struct genlmsghdr) == 4); #define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr)) struct nlattr { uint16_t nla_len; uint16_t nla_type; }; BUILD_ASSERT_DECL(sizeof(struct nlattr) == 4); #define NLA_ALIGNTO 4 #define NLA_ALIGN(SIZE) ROUND_UP(SIZE, NLA_ALIGNTO) #define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) #define GENL_MIN_ID NLMSG_MIN_TYPE #define GENL_MAX_ID 1023 #define GENL_ID_CTRL NLMSG_MIN_TYPE enum { CTRL_CMD_UNSPEC, CTRL_CMD_NEWFAMILY, CTRL_CMD_DELFAMILY, CTRL_CMD_GETFAMILY, CTRL_CMD_NEWOPS, CTRL_CMD_DELOPS, CTRL_CMD_GETOPS, __CTRL_CMD_MAX, }; #define CTRL_CMD_MAX (__CTRL_CMD_MAX - 1) enum { CTRL_ATTR_UNSPEC, CTRL_ATTR_FAMILY_ID, CTRL_ATTR_FAMILY_NAME, CTRL_ATTR_VERSION, CTRL_ATTR_HDRSIZE, CTRL_ATTR_MAXATTR, CTRL_ATTR_OPS, __CTRL_ATTR_MAX, }; #define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1) enum { CTRL_ATTR_OP_UNSPEC, CTRL_ATTR_OP_ID, CTRL_ATTR_OP_FLAGS, __CTRL_ATTR_OP_MAX, }; #define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1) #endif /* !HAVE_NETLINK */ /* These were introduced all together in 2.6.24. */ #ifndef NLA_TYPE_MASK #define NLA_F_NESTED (1 << 15) #define NLA_F_NET_BYTEORDER (1 << 14) #define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER) #endif /* These were introduced all together in 2.6.14. (We want our programs to * support the newer kernel features even if compiled with older headers.) */ #ifndef NETLINK_ADD_MEMBERSHIP #define NETLINK_ADD_MEMBERSHIP 1 #define NETLINK_DROP_MEMBERSHIP 2 #endif /* These were introduced all together in 2.6.23. (We want our programs to * support the newer kernel features even if compiled with older headers.) */ #ifndef CTRL_ATTR_MCAST_GRP_MAX #undef CTRL_ATTR_MAX #define CTRL_ATTR_MAX 7 #define CTRL_ATTR_MCAST_GROUPS 7 enum { CTRL_ATTR_MCAST_GRP_UNSPEC, CTRL_ATTR_MCAST_GRP_NAME, CTRL_ATTR_MCAST_GRP_ID, __CTRL_ATTR_MCAST_GRP_MAX, }; #define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1) #endif /* CTRL_ATTR_MCAST_GRP_MAX */ #endif /* netlink-protocol.h */ openvswitch-2.0.1+git20140120/lib/netlink-socket.c000066400000000000000000001124401226605124000213200ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "netlink-socket.h" #include #include #include #include #include #include #include "coverage.h" #include "dynamic-string.h" #include "hash.h" #include "hmap.h" #include "netlink.h" #include "netlink-protocol.h" #include "ofpbuf.h" #include "ovs-thread.h" #include "poll-loop.h" #include "socket-util.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(netlink_socket); COVERAGE_DEFINE(netlink_overflow); COVERAGE_DEFINE(netlink_received); COVERAGE_DEFINE(netlink_recv_jumbo); COVERAGE_DEFINE(netlink_send); COVERAGE_DEFINE(netlink_sent); /* Linux header file confusion causes this to be undefined. */ #ifndef SOL_NETLINK #define SOL_NETLINK 270 #endif /* A single (bad) Netlink message can in theory dump out many, many log * messages, so the burst size is set quite high here to avoid missing useful * information. Also, at high logging levels we log *all* Netlink messages. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 600); static uint32_t nl_sock_allocate_seq(struct nl_sock *, unsigned int n); static void log_nlmsg(const char *function, int error, const void *message, size_t size, int protocol); /* Netlink sockets. */ struct nl_sock { int fd; uint32_t next_seq; uint32_t pid; int protocol; unsigned int rcvbuf; /* Receive buffer size (SO_RCVBUF). */ }; /* Compile-time limit on iovecs, so that we can allocate a maximum-size array * of iovecs on the stack. */ #define MAX_IOVS 128 /* Maximum number of iovecs that may be passed to sendmsg, capped at a * minimum of _XOPEN_IOV_MAX (16) and a maximum of MAX_IOVS. * * Initialized by nl_sock_create(). */ static int max_iovs; static int nl_pool_alloc(int protocol, struct nl_sock **sockp); static void nl_pool_release(struct nl_sock *); /* Creates a new netlink socket for the given netlink 'protocol' * (NETLINK_ROUTE, NETLINK_GENERIC, ...). Returns 0 and sets '*sockp' to the * new socket if successful, otherwise returns a positive errno value. */ int nl_sock_create(int protocol, struct nl_sock **sockp) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; struct nl_sock *sock; struct sockaddr_nl local, remote; socklen_t local_size; int rcvbuf; int retval = 0; if (ovsthread_once_start(&once)) { int save_errno = errno; errno = 0; max_iovs = sysconf(_SC_UIO_MAXIOV); if (max_iovs < _XOPEN_IOV_MAX) { if (max_iovs == -1 && errno) { VLOG_WARN("sysconf(_SC_UIO_MAXIOV): %s", ovs_strerror(errno)); } max_iovs = _XOPEN_IOV_MAX; } else if (max_iovs > MAX_IOVS) { max_iovs = MAX_IOVS; } errno = save_errno; ovsthread_once_done(&once); } *sockp = NULL; sock = xmalloc(sizeof *sock); sock->fd = socket(AF_NETLINK, SOCK_RAW, protocol); if (sock->fd < 0) { VLOG_ERR("fcntl: %s", ovs_strerror(errno)); goto error; } sock->protocol = protocol; sock->next_seq = 1; rcvbuf = 1024 * 1024; if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUFFORCE, &rcvbuf, sizeof rcvbuf)) { /* Only root can use SO_RCVBUFFORCE. Everyone else gets EPERM. * Warn only if the failure is therefore unexpected. */ if (errno != EPERM) { VLOG_WARN_RL(&rl, "setting %d-byte socket receive buffer failed " "(%s)", rcvbuf, ovs_strerror(errno)); } } retval = get_socket_rcvbuf(sock->fd); if (retval < 0) { retval = -retval; goto error; } sock->rcvbuf = retval; /* Connect to kernel (pid 0) as remote address. */ memset(&remote, 0, sizeof remote); remote.nl_family = AF_NETLINK; remote.nl_pid = 0; if (connect(sock->fd, (struct sockaddr *) &remote, sizeof remote) < 0) { VLOG_ERR("connect(0): %s", ovs_strerror(errno)); goto error; } /* Obtain pid assigned by kernel. */ local_size = sizeof local; if (getsockname(sock->fd, (struct sockaddr *) &local, &local_size) < 0) { VLOG_ERR("getsockname: %s", ovs_strerror(errno)); goto error; } if (local_size < sizeof local || local.nl_family != AF_NETLINK) { VLOG_ERR("getsockname returned bad Netlink name"); retval = EINVAL; goto error; } sock->pid = local.nl_pid; *sockp = sock; return 0; error: if (retval == 0) { retval = errno; if (retval == 0) { retval = EINVAL; } } if (sock->fd >= 0) { close(sock->fd); } free(sock); return retval; } /* Creates a new netlink socket for the same protocol as 'src'. Returns 0 and * sets '*sockp' to the new socket if successful, otherwise returns a positive * errno value. */ int nl_sock_clone(const struct nl_sock *src, struct nl_sock **sockp) { return nl_sock_create(src->protocol, sockp); } /* Destroys netlink socket 'sock'. */ void nl_sock_destroy(struct nl_sock *sock) { if (sock) { close(sock->fd); free(sock); } } /* Tries to add 'sock' as a listener for 'multicast_group'. Returns 0 if * successful, otherwise a positive errno value. * * A socket that is subscribed to a multicast group that receives asynchronous * notifications must not be used for Netlink transactions or dumps, because * transactions and dumps can cause notifications to be lost. * * Multicast group numbers are always positive. * * It is not an error to attempt to join a multicast group to which a socket * already belongs. */ int nl_sock_join_mcgroup(struct nl_sock *sock, unsigned int multicast_group) { if (setsockopt(sock->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &multicast_group, sizeof multicast_group) < 0) { VLOG_WARN("could not join multicast group %u (%s)", multicast_group, ovs_strerror(errno)); return errno; } return 0; } /* Tries to make 'sock' stop listening to 'multicast_group'. Returns 0 if * successful, otherwise a positive errno value. * * Multicast group numbers are always positive. * * It is not an error to attempt to leave a multicast group to which a socket * does not belong. * * On success, reading from 'sock' will still return any messages that were * received on 'multicast_group' before the group was left. */ int nl_sock_leave_mcgroup(struct nl_sock *sock, unsigned int multicast_group) { if (setsockopt(sock->fd, SOL_NETLINK, NETLINK_DROP_MEMBERSHIP, &multicast_group, sizeof multicast_group) < 0) { VLOG_WARN("could not leave multicast group %u (%s)", multicast_group, ovs_strerror(errno)); return errno; } return 0; } static int nl_sock_send__(struct nl_sock *sock, const struct ofpbuf *msg, uint32_t nlmsg_seq, bool wait) { struct nlmsghdr *nlmsg = nl_msg_nlmsghdr(msg); int error; nlmsg->nlmsg_len = msg->size; nlmsg->nlmsg_seq = nlmsg_seq; nlmsg->nlmsg_pid = sock->pid; do { int retval; retval = send(sock->fd, msg->data, msg->size, wait ? 0 : MSG_DONTWAIT); error = retval < 0 ? errno : 0; } while (error == EINTR); log_nlmsg(__func__, error, msg->data, msg->size, sock->protocol); if (!error) { COVERAGE_INC(netlink_sent); } return error; } /* Tries to send 'msg', which must contain a Netlink message, to the kernel on * 'sock'. nlmsg_len in 'msg' will be finalized to match msg->size, nlmsg_pid * will be set to 'sock''s pid, and nlmsg_seq will be initialized to a fresh * sequence number, before the message is sent. * * Returns 0 if successful, otherwise a positive errno value. If * 'wait' is true, then the send will wait until buffer space is ready; * otherwise, returns EAGAIN if the 'sock' send buffer is full. */ int nl_sock_send(struct nl_sock *sock, const struct ofpbuf *msg, bool wait) { return nl_sock_send_seq(sock, msg, nl_sock_allocate_seq(sock, 1), wait); } /* Tries to send 'msg', which must contain a Netlink message, to the kernel on * 'sock'. nlmsg_len in 'msg' will be finalized to match msg->size, nlmsg_pid * will be set to 'sock''s pid, and nlmsg_seq will be initialized to * 'nlmsg_seq', before the message is sent. * * Returns 0 if successful, otherwise a positive errno value. If * 'wait' is true, then the send will wait until buffer space is ready; * otherwise, returns EAGAIN if the 'sock' send buffer is full. * * This function is suitable for sending a reply to a request that was received * with sequence number 'nlmsg_seq'. Otherwise, use nl_sock_send() instead. */ int nl_sock_send_seq(struct nl_sock *sock, const struct ofpbuf *msg, uint32_t nlmsg_seq, bool wait) { return nl_sock_send__(sock, msg, nlmsg_seq, wait); } static int nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait) { /* We can't accurately predict the size of the data to be received. The * caller is supposed to have allocated enough space in 'buf' to handle the * "typical" case. To handle exceptions, we make available enough space in * 'tail' to allow Netlink messages to be up to 64 kB long (a reasonable * figure since that's the maximum length of a Netlink attribute). */ struct nlmsghdr *nlmsghdr; uint8_t tail[65536]; struct iovec iov[2]; struct msghdr msg; ssize_t retval; ovs_assert(buf->allocated >= sizeof *nlmsghdr); ofpbuf_clear(buf); iov[0].iov_base = buf->base; iov[0].iov_len = buf->allocated; iov[1].iov_base = tail; iov[1].iov_len = sizeof tail; memset(&msg, 0, sizeof msg); msg.msg_iov = iov; msg.msg_iovlen = 2; do { retval = recvmsg(sock->fd, &msg, wait ? 0 : MSG_DONTWAIT); } while (retval < 0 && errno == EINTR); if (retval < 0) { int error = errno; if (error == ENOBUFS) { /* Socket receive buffer overflow dropped one or more messages that * the kernel tried to send to us. */ COVERAGE_INC(netlink_overflow); } return error; } if (msg.msg_flags & MSG_TRUNC) { VLOG_ERR_RL(&rl, "truncated message (longer than %zu bytes)", sizeof tail); return E2BIG; } nlmsghdr = buf->data; if (retval < sizeof *nlmsghdr || nlmsghdr->nlmsg_len < sizeof *nlmsghdr || nlmsghdr->nlmsg_len > retval) { VLOG_ERR_RL(&rl, "received invalid nlmsg (%zd bytes < %zu)", retval, sizeof *nlmsghdr); return EPROTO; } buf->size = MIN(retval, buf->allocated); if (retval > buf->allocated) { COVERAGE_INC(netlink_recv_jumbo); ofpbuf_put(buf, tail, retval - buf->allocated); } log_nlmsg(__func__, 0, buf->data, buf->size, sock->protocol); COVERAGE_INC(netlink_received); return 0; } /* Tries to receive a Netlink message from the kernel on 'sock' into 'buf'. If * 'wait' is true, waits for a message to be ready. Otherwise, fails with * EAGAIN if the 'sock' receive buffer is empty. * * The caller must have initialized 'buf' with an allocation of at least * NLMSG_HDRLEN bytes. For best performance, the caller should allocate enough * space for a "typical" message. * * On success, returns 0 and replaces 'buf''s previous content by the received * message. This function expands 'buf''s allocated memory, as necessary, to * hold the actual size of the received message. * * On failure, returns a positive errno value and clears 'buf' to zero length. * 'buf' retains its previous memory allocation. * * Regardless of success or failure, this function resets 'buf''s headroom to * 0. */ int nl_sock_recv(struct nl_sock *sock, struct ofpbuf *buf, bool wait) { return nl_sock_recv__(sock, buf, wait); } static void nl_sock_record_errors__(struct nl_transaction **transactions, size_t n, int error) { size_t i; for (i = 0; i < n; i++) { struct nl_transaction *txn = transactions[i]; txn->error = error; if (txn->reply) { ofpbuf_clear(txn->reply); } } } static int nl_sock_transact_multiple__(struct nl_sock *sock, struct nl_transaction **transactions, size_t n, size_t *done) { uint64_t tmp_reply_stub[1024 / 8]; struct nl_transaction tmp_txn; struct ofpbuf tmp_reply; uint32_t base_seq; struct iovec iovs[MAX_IOVS]; struct msghdr msg; int error; int i; base_seq = nl_sock_allocate_seq(sock, n); *done = 0; for (i = 0; i < n; i++) { struct nl_transaction *txn = transactions[i]; struct nlmsghdr *nlmsg = nl_msg_nlmsghdr(txn->request); nlmsg->nlmsg_len = txn->request->size; nlmsg->nlmsg_seq = base_seq + i; nlmsg->nlmsg_pid = sock->pid; iovs[i].iov_base = txn->request->data; iovs[i].iov_len = txn->request->size; } memset(&msg, 0, sizeof msg); msg.msg_iov = iovs; msg.msg_iovlen = n; do { error = sendmsg(sock->fd, &msg, 0) < 0 ? errno : 0; } while (error == EINTR); for (i = 0; i < n; i++) { struct nl_transaction *txn = transactions[i]; log_nlmsg(__func__, error, txn->request->data, txn->request->size, sock->protocol); } if (!error) { COVERAGE_ADD(netlink_sent, n); } if (error) { return error; } ofpbuf_use_stub(&tmp_reply, tmp_reply_stub, sizeof tmp_reply_stub); tmp_txn.request = NULL; tmp_txn.reply = &tmp_reply; tmp_txn.error = 0; while (n > 0) { struct nl_transaction *buf_txn, *txn; uint32_t seq; /* Find a transaction whose buffer we can use for receiving a reply. * If no such transaction is left, use tmp_txn. */ buf_txn = &tmp_txn; for (i = 0; i < n; i++) { if (transactions[i]->reply) { buf_txn = transactions[i]; break; } } /* Receive a reply. */ error = nl_sock_recv__(sock, buf_txn->reply, false); if (error) { if (error == EAGAIN) { nl_sock_record_errors__(transactions, n, 0); *done += n; error = 0; } break; } /* Match the reply up with a transaction. */ seq = nl_msg_nlmsghdr(buf_txn->reply)->nlmsg_seq; if (seq < base_seq || seq >= base_seq + n) { VLOG_DBG_RL(&rl, "ignoring unexpected seq %#"PRIx32, seq); continue; } i = seq - base_seq; txn = transactions[i]; /* Fill in the results for 'txn'. */ if (nl_msg_nlmsgerr(buf_txn->reply, &txn->error)) { if (txn->reply) { ofpbuf_clear(txn->reply); } if (txn->error) { VLOG_DBG_RL(&rl, "received NAK error=%d (%s)", error, ovs_strerror(txn->error)); } } else { txn->error = 0; if (txn->reply && txn != buf_txn) { /* Swap buffers. */ struct ofpbuf *reply = buf_txn->reply; buf_txn->reply = txn->reply; txn->reply = reply; } } /* Fill in the results for transactions before 'txn'. (We have to do * this after the results for 'txn' itself because of the buffer swap * above.) */ nl_sock_record_errors__(transactions, i, 0); /* Advance. */ *done += i + 1; transactions += i + 1; n -= i + 1; base_seq += i + 1; } ofpbuf_uninit(&tmp_reply); return error; } /* Sends the 'request' member of the 'n' transactions in 'transactions' on * 'sock', in order, and receives responses to all of them. Fills in the * 'error' member of each transaction with 0 if it was successful, otherwise * with a positive errno value. If 'reply' is nonnull, then it will be filled * with the reply if the message receives a detailed reply. In other cases, * i.e. where the request failed or had no reply beyond an indication of * success, 'reply' will be cleared if it is nonnull. * * The caller is responsible for destroying each request and reply, and the * transactions array itself. * * Before sending each message, this function will finalize nlmsg_len in each * 'request' to match the ofpbuf's size, set nlmsg_pid to 'sock''s pid, and * initialize nlmsg_seq. * * Bare Netlink is an unreliable transport protocol. This function layers * reliable delivery and reply semantics on top of bare Netlink. See * nl_sock_transact() for some caveats. */ void nl_sock_transact_multiple(struct nl_sock *sock, struct nl_transaction **transactions, size_t n) { int max_batch_count; int error; if (!n) { return; } /* In theory, every request could have a 64 kB reply. But the default and * maximum socket rcvbuf size with typical Dom0 memory sizes both tend to * be a bit below 128 kB, so that would only allow a single message in a * "batch". So we assume that replies average (at most) 4 kB, which allows * a good deal of batching. * * In practice, most of the requests that we batch either have no reply at * all or a brief reply. */ max_batch_count = MAX(sock->rcvbuf / 4096, 1); max_batch_count = MIN(max_batch_count, max_iovs); while (n > 0) { size_t count, bytes; size_t done; /* Batch up to 'max_batch_count' transactions. But cap it at about a * page of requests total because big skbuffs are expensive to * allocate in the kernel. */ #if defined(PAGESIZE) enum { MAX_BATCH_BYTES = MAX(1, PAGESIZE - 512) }; #else enum { MAX_BATCH_BYTES = 4096 - 512 }; #endif bytes = transactions[0]->request->size; for (count = 1; count < n && count < max_batch_count; count++) { if (bytes + transactions[count]->request->size > MAX_BATCH_BYTES) { break; } bytes += transactions[count]->request->size; } error = nl_sock_transact_multiple__(sock, transactions, count, &done); transactions += done; n -= done; if (error == ENOBUFS) { VLOG_DBG_RL(&rl, "receive buffer overflow, resending request"); } else if (error) { VLOG_ERR_RL(&rl, "transaction error (%s)", ovs_strerror(error)); nl_sock_record_errors__(transactions, n, error); } } } /* Sends 'request' to the kernel via 'sock' and waits for a response. If * successful, returns 0. On failure, returns a positive errno value. * * If 'replyp' is nonnull, then on success '*replyp' is set to the kernel's * reply, which the caller is responsible for freeing with ofpbuf_delete(), and * on failure '*replyp' is set to NULL. If 'replyp' is null, then the kernel's * reply, if any, is discarded. * * Before the message is sent, nlmsg_len in 'request' will be finalized to * match msg->size, nlmsg_pid will be set to 'sock''s pid, and nlmsg_seq will * be initialized, NLM_F_ACK will be set in nlmsg_flags. * * The caller is responsible for destroying 'request'. * * Bare Netlink is an unreliable transport protocol. This function layers * reliable delivery and reply semantics on top of bare Netlink. * * In Netlink, sending a request to the kernel is reliable enough, because the * kernel will tell us if the message cannot be queued (and we will in that * case put it on the transmit queue and wait until it can be delivered). * * Receiving the reply is the real problem: if the socket buffer is full when * the kernel tries to send the reply, the reply will be dropped. However, the * kernel sets a flag that a reply has been dropped. The next call to recv * then returns ENOBUFS. We can then re-send the request. * * Caveats: * * 1. Netlink depends on sequence numbers to match up requests and * replies. The sender of a request supplies a sequence number, and * the reply echos back that sequence number. * * This is fine, but (1) some kernel netlink implementations are * broken, in that they fail to echo sequence numbers and (2) this * function will drop packets with non-matching sequence numbers, so * that only a single request can be usefully transacted at a time. * * 2. Resending the request causes it to be re-executed, so the request * needs to be idempotent. */ int nl_sock_transact(struct nl_sock *sock, const struct ofpbuf *request, struct ofpbuf **replyp) { struct nl_transaction *transactionp; struct nl_transaction transaction; transaction.request = CONST_CAST(struct ofpbuf *, request); transaction.reply = replyp ? ofpbuf_new(1024) : NULL; transactionp = &transaction; nl_sock_transact_multiple(sock, &transactionp, 1); if (replyp) { if (transaction.error) { ofpbuf_delete(transaction.reply); *replyp = NULL; } else { *replyp = transaction.reply; } } return transaction.error; } /* Drain all the messages currently in 'sock''s receive queue. */ int nl_sock_drain(struct nl_sock *sock) { return drain_rcvbuf(sock->fd); } /* Starts a Netlink "dump" operation, by sending 'request' to the kernel on a * Netlink socket created with the given 'protocol', and initializes 'dump' to * reflect the state of the operation. * * nlmsg_len in 'msg' will be finalized to match msg->size, and nlmsg_pid will * be set to the Netlink socket's pid, before the message is sent. NLM_F_DUMP * and NLM_F_ACK will be set in nlmsg_flags. * * The design of this Netlink socket library ensures that the dump is reliable. * * This function provides no status indication. An error status for the entire * dump operation is provided when it is completed by calling nl_dump_done(). * * The caller is responsible for destroying 'request'. */ void nl_dump_start(struct nl_dump *dump, int protocol, const struct ofpbuf *request) { ofpbuf_init(&dump->buffer, 4096); dump->status = nl_pool_alloc(protocol, &dump->sock); if (dump->status) { return; } nl_msg_nlmsghdr(request)->nlmsg_flags |= NLM_F_DUMP | NLM_F_ACK; dump->status = nl_sock_send__(dump->sock, request, nl_sock_allocate_seq(dump->sock, 1), true); dump->seq = nl_msg_nlmsghdr(request)->nlmsg_seq; } /* Helper function for nl_dump_next(). */ static int nl_dump_recv(struct nl_dump *dump) { struct nlmsghdr *nlmsghdr; int retval; retval = nl_sock_recv__(dump->sock, &dump->buffer, true); if (retval) { return retval == EINTR ? EAGAIN : retval; } nlmsghdr = nl_msg_nlmsghdr(&dump->buffer); if (dump->seq != nlmsghdr->nlmsg_seq) { VLOG_DBG_RL(&rl, "ignoring seq %#"PRIx32" != expected %#"PRIx32, nlmsghdr->nlmsg_seq, dump->seq); return EAGAIN; } if (nl_msg_nlmsgerr(&dump->buffer, &retval)) { VLOG_INFO_RL(&rl, "netlink dump request error (%s)", ovs_strerror(retval)); return retval && retval != EAGAIN ? retval : EPROTO; } return 0; } /* Attempts to retrieve another reply from 'dump', which must have been * initialized with nl_dump_start(). * * If successful, returns true and points 'reply->data' and 'reply->size' to * the message that was retrieved. The caller must not modify 'reply' (because * it points into the middle of a larger buffer). * * On failure, returns false and sets 'reply->data' to NULL and 'reply->size' * to 0. Failure might indicate an actual error or merely the end of replies. * An error status for the entire dump operation is provided when it is * completed by calling nl_dump_done(). */ bool nl_dump_next(struct nl_dump *dump, struct ofpbuf *reply) { struct nlmsghdr *nlmsghdr; reply->data = NULL; reply->size = 0; if (dump->status) { return false; } while (!dump->buffer.size) { int retval = nl_dump_recv(dump); if (retval) { ofpbuf_clear(&dump->buffer); if (retval != EAGAIN) { dump->status = retval; return false; } } } nlmsghdr = nl_msg_next(&dump->buffer, reply); if (!nlmsghdr) { VLOG_WARN_RL(&rl, "netlink dump reply contains message fragment"); dump->status = EPROTO; return false; } else if (nlmsghdr->nlmsg_type == NLMSG_DONE) { dump->status = EOF; return false; } return true; } /* Completes Netlink dump operation 'dump', which must have been initialized * with nl_dump_start(). Returns 0 if the dump operation was error-free, * otherwise a positive errno value describing the problem. */ int nl_dump_done(struct nl_dump *dump) { /* Drain any remaining messages that the client didn't read. Otherwise the * kernel will continue to queue them up and waste buffer space. * * XXX We could just destroy and discard the socket in this case. */ while (!dump->status) { struct ofpbuf reply; if (!nl_dump_next(dump, &reply)) { ovs_assert(dump->status); } } nl_pool_release(dump->sock); ofpbuf_uninit(&dump->buffer); return dump->status == EOF ? 0 : dump->status; } /* Causes poll_block() to wake up when any of the specified 'events' (which is * a OR'd combination of POLLIN, POLLOUT, etc.) occur on 'sock'. */ void nl_sock_wait(const struct nl_sock *sock, short int events) { poll_fd_wait(sock->fd, events); } /* Returns the underlying fd for 'sock', for use in "poll()"-like operations * that can't use nl_sock_wait(). * * It's a little tricky to use the returned fd correctly, because nl_sock does * "copy on write" to allow a single nl_sock to be used for notifications, * transactions, and dumps. If 'sock' is used only for notifications and * transactions (and never for dump) then the usage is safe. */ int nl_sock_fd(const struct nl_sock *sock) { return sock->fd; } /* Returns the PID associated with this socket. */ uint32_t nl_sock_pid(const struct nl_sock *sock) { return sock->pid; } /* Miscellaneous. */ struct genl_family { struct hmap_node hmap_node; uint16_t id; char *name; }; static struct hmap genl_families = HMAP_INITIALIZER(&genl_families); static const struct nl_policy family_policy[CTRL_ATTR_MAX + 1] = { [CTRL_ATTR_FAMILY_ID] = {.type = NL_A_U16}, [CTRL_ATTR_MCAST_GROUPS] = {.type = NL_A_NESTED, .optional = true}, }; static struct genl_family * find_genl_family_by_id(uint16_t id) { struct genl_family *family; HMAP_FOR_EACH_IN_BUCKET (family, hmap_node, hash_int(id, 0), &genl_families) { if (family->id == id) { return family; } } return NULL; } static void define_genl_family(uint16_t id, const char *name) { struct genl_family *family = find_genl_family_by_id(id); if (family) { if (!strcmp(family->name, name)) { return; } free(family->name); } else { family = xmalloc(sizeof *family); family->id = id; hmap_insert(&genl_families, &family->hmap_node, hash_int(id, 0)); } family->name = xstrdup(name); } static const char * genl_family_to_name(uint16_t id) { if (id == GENL_ID_CTRL) { return "control"; } else { struct genl_family *family = find_genl_family_by_id(id); return family ? family->name : "unknown"; } } static int do_lookup_genl_family(const char *name, struct nlattr **attrs, struct ofpbuf **replyp) { struct nl_sock *sock; struct ofpbuf request, *reply; int error; *replyp = NULL; error = nl_sock_create(NETLINK_GENERIC, &sock); if (error) { return error; } ofpbuf_init(&request, 0); nl_msg_put_genlmsghdr(&request, 0, GENL_ID_CTRL, NLM_F_REQUEST, CTRL_CMD_GETFAMILY, 1); nl_msg_put_string(&request, CTRL_ATTR_FAMILY_NAME, name); error = nl_sock_transact(sock, &request, &reply); ofpbuf_uninit(&request); if (error) { nl_sock_destroy(sock); return error; } if (!nl_policy_parse(reply, NLMSG_HDRLEN + GENL_HDRLEN, family_policy, attrs, ARRAY_SIZE(family_policy)) || nl_attr_get_u16(attrs[CTRL_ATTR_FAMILY_ID]) == 0) { nl_sock_destroy(sock); ofpbuf_delete(reply); return EPROTO; } nl_sock_destroy(sock); *replyp = reply; return 0; } /* Finds the multicast group called 'group_name' in genl family 'family_name'. * When successful, writes its result to 'multicast_group' and returns 0. * Otherwise, clears 'multicast_group' and returns a positive error code. */ int nl_lookup_genl_mcgroup(const char *family_name, const char *group_name, unsigned int *multicast_group) { struct nlattr *family_attrs[ARRAY_SIZE(family_policy)]; const struct nlattr *mc; struct ofpbuf *reply; unsigned int left; int error; *multicast_group = 0; error = do_lookup_genl_family(family_name, family_attrs, &reply); if (error) { return error; } if (!family_attrs[CTRL_ATTR_MCAST_GROUPS]) { error = EPROTO; goto exit; } NL_NESTED_FOR_EACH (mc, left, family_attrs[CTRL_ATTR_MCAST_GROUPS]) { static const struct nl_policy mc_policy[] = { [CTRL_ATTR_MCAST_GRP_ID] = {.type = NL_A_U32}, [CTRL_ATTR_MCAST_GRP_NAME] = {.type = NL_A_STRING}, }; struct nlattr *mc_attrs[ARRAY_SIZE(mc_policy)]; const char *mc_name; if (!nl_parse_nested(mc, mc_policy, mc_attrs, ARRAY_SIZE(mc_policy))) { error = EPROTO; goto exit; } mc_name = nl_attr_get_string(mc_attrs[CTRL_ATTR_MCAST_GRP_NAME]); if (!strcmp(group_name, mc_name)) { *multicast_group = nl_attr_get_u32(mc_attrs[CTRL_ATTR_MCAST_GRP_ID]); error = 0; goto exit; } } error = EPROTO; exit: ofpbuf_delete(reply); return error; } /* If '*number' is 0, translates the given Generic Netlink family 'name' to a * number and stores it in '*number'. If successful, returns 0 and the caller * may use '*number' as the family number. On failure, returns a positive * errno value and '*number' caches the errno value. */ int nl_lookup_genl_family(const char *name, int *number) { if (*number == 0) { struct nlattr *attrs[ARRAY_SIZE(family_policy)]; struct ofpbuf *reply; int error; error = do_lookup_genl_family(name, attrs, &reply); if (!error) { *number = nl_attr_get_u16(attrs[CTRL_ATTR_FAMILY_ID]); define_genl_family(*number, name); } else { *number = -error; } ofpbuf_delete(reply); ovs_assert(*number != 0); } return *number > 0 ? 0 : -*number; } struct nl_pool { struct nl_sock *socks[16]; int n; }; static struct ovs_mutex pool_mutex = OVS_MUTEX_INITIALIZER; static struct nl_pool pools[MAX_LINKS] OVS_GUARDED_BY(pool_mutex); static int nl_pool_alloc(int protocol, struct nl_sock **sockp) { struct nl_sock *sock = NULL; struct nl_pool *pool; ovs_assert(protocol >= 0 && protocol < ARRAY_SIZE(pools)); ovs_mutex_lock(&pool_mutex); pool = &pools[protocol]; if (pool->n > 0) { sock = pool->socks[--pool->n]; } ovs_mutex_unlock(&pool_mutex); if (sock) { *sockp = sock; return 0; } else { return nl_sock_create(protocol, sockp); } } static void nl_pool_release(struct nl_sock *sock) { if (sock) { struct nl_pool *pool = &pools[sock->protocol]; ovs_mutex_lock(&pool_mutex); if (pool->n < ARRAY_SIZE(pool->socks)) { pool->socks[pool->n++] = sock; sock = NULL; } ovs_mutex_unlock(&pool_mutex); nl_sock_destroy(sock); } } int nl_transact(int protocol, const struct ofpbuf *request, struct ofpbuf **replyp) { struct nl_sock *sock; int error; error = nl_pool_alloc(protocol, &sock); if (error) { *replyp = NULL; return error; } error = nl_sock_transact(sock, request, replyp); nl_pool_release(sock); return error; } void nl_transact_multiple(int protocol, struct nl_transaction **transactions, size_t n) { struct nl_sock *sock; int error; error = nl_pool_alloc(protocol, &sock); if (!error) { nl_sock_transact_multiple(sock, transactions, n); nl_pool_release(sock); } else { nl_sock_record_errors__(transactions, n, error); } } static uint32_t nl_sock_allocate_seq(struct nl_sock *sock, unsigned int n) { uint32_t seq = sock->next_seq; sock->next_seq += n; /* Make it impossible for the next request for sequence numbers to wrap * around to 0. Start over with 1 to avoid ever using a sequence number of * 0, because the kernel uses sequence number 0 for notifications. */ if (sock->next_seq >= UINT32_MAX / 2) { sock->next_seq = 1; } return seq; } static void nlmsghdr_to_string(const struct nlmsghdr *h, int protocol, struct ds *ds) { struct nlmsg_flag { unsigned int bits; const char *name; }; static const struct nlmsg_flag flags[] = { { NLM_F_REQUEST, "REQUEST" }, { NLM_F_MULTI, "MULTI" }, { NLM_F_ACK, "ACK" }, { NLM_F_ECHO, "ECHO" }, { NLM_F_DUMP, "DUMP" }, { NLM_F_ROOT, "ROOT" }, { NLM_F_MATCH, "MATCH" }, { NLM_F_ATOMIC, "ATOMIC" }, }; const struct nlmsg_flag *flag; uint16_t flags_left; ds_put_format(ds, "nl(len:%"PRIu32", type=%"PRIu16, h->nlmsg_len, h->nlmsg_type); if (h->nlmsg_type == NLMSG_NOOP) { ds_put_cstr(ds, "(no-op)"); } else if (h->nlmsg_type == NLMSG_ERROR) { ds_put_cstr(ds, "(error)"); } else if (h->nlmsg_type == NLMSG_DONE) { ds_put_cstr(ds, "(done)"); } else if (h->nlmsg_type == NLMSG_OVERRUN) { ds_put_cstr(ds, "(overrun)"); } else if (h->nlmsg_type < NLMSG_MIN_TYPE) { ds_put_cstr(ds, "(reserved)"); } else if (protocol == NETLINK_GENERIC) { ds_put_format(ds, "(%s)", genl_family_to_name(h->nlmsg_type)); } else { ds_put_cstr(ds, "(family-defined)"); } ds_put_format(ds, ", flags=%"PRIx16, h->nlmsg_flags); flags_left = h->nlmsg_flags; for (flag = flags; flag < &flags[ARRAY_SIZE(flags)]; flag++) { if ((flags_left & flag->bits) == flag->bits) { ds_put_format(ds, "[%s]", flag->name); flags_left &= ~flag->bits; } } if (flags_left) { ds_put_format(ds, "[OTHER:%"PRIx16"]", flags_left); } ds_put_format(ds, ", seq=%"PRIx32", pid=%"PRIu32, h->nlmsg_seq, h->nlmsg_pid); } static char * nlmsg_to_string(const struct ofpbuf *buffer, int protocol) { struct ds ds = DS_EMPTY_INITIALIZER; const struct nlmsghdr *h = ofpbuf_at(buffer, 0, NLMSG_HDRLEN); if (h) { nlmsghdr_to_string(h, protocol, &ds); if (h->nlmsg_type == NLMSG_ERROR) { const struct nlmsgerr *e; e = ofpbuf_at(buffer, NLMSG_HDRLEN, NLMSG_ALIGN(sizeof(struct nlmsgerr))); if (e) { ds_put_format(&ds, " error(%d", e->error); if (e->error < 0) { ds_put_format(&ds, "(%s)", ovs_strerror(-e->error)); } ds_put_cstr(&ds, ", in-reply-to("); nlmsghdr_to_string(&e->msg, protocol, &ds); ds_put_cstr(&ds, "))"); } else { ds_put_cstr(&ds, " error(truncated)"); } } else if (h->nlmsg_type == NLMSG_DONE) { int *error = ofpbuf_at(buffer, NLMSG_HDRLEN, sizeof *error); if (error) { ds_put_format(&ds, " done(%d", *error); if (*error < 0) { ds_put_format(&ds, "(%s)", ovs_strerror(-*error)); } ds_put_cstr(&ds, ")"); } else { ds_put_cstr(&ds, " done(truncated)"); } } else if (protocol == NETLINK_GENERIC) { struct genlmsghdr *genl = nl_msg_genlmsghdr(buffer); if (genl) { ds_put_format(&ds, ",genl(cmd=%"PRIu8",version=%"PRIu8")", genl->cmd, genl->version); } } } else { ds_put_cstr(&ds, "nl(truncated)"); } return ds.string; } static void log_nlmsg(const char *function, int error, const void *message, size_t size, int protocol) { struct ofpbuf buffer; char *nlmsg; if (!VLOG_IS_DBG_ENABLED()) { return; } ofpbuf_use_const(&buffer, message, size); nlmsg = nlmsg_to_string(&buffer, protocol); VLOG_DBG_RL(&rl, "%s (%s): %s", function, ovs_strerror(error), nlmsg); free(nlmsg); } openvswitch-2.0.1+git20140120/lib/netlink-socket.h000066400000000000000000000101061226605124000213210ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef NETLINK_SOCKET_H #define NETLINK_SOCKET_H 1 /* Netlink socket definitions. * * Netlink is a datagram-based network protocol primarily for communication * between user processes and the kernel, and mainly on Linux. Netlink is * specified in RFC 3549, "Linux Netlink as an IP Services Protocol". * * Netlink is not suitable for use in physical networks of heterogeneous * machines because host byte order is used throughout. * * This header file defines functions for working with Netlink sockets, which * are Linux-specific. For Netlink protocol definitions, see * netlink-protocol.h. For helper functions for working with Netlink messages, * see netlink.h. * * * Thread-safety * ============= * * Only a single thread may use a given nl_sock or nl_dump at one time. */ #include #include #include #include "ofpbuf.h" struct nl_sock; #ifndef HAVE_NETLINK #error "netlink-socket.h is only for hosts that support Netlink sockets" #endif /* Netlink sockets. */ int nl_sock_create(int protocol, struct nl_sock **); int nl_sock_clone(const struct nl_sock *, struct nl_sock **); void nl_sock_destroy(struct nl_sock *); int nl_sock_join_mcgroup(struct nl_sock *, unsigned int multicast_group); int nl_sock_leave_mcgroup(struct nl_sock *, unsigned int multicast_group); int nl_sock_send(struct nl_sock *, const struct ofpbuf *, bool wait); int nl_sock_send_seq(struct nl_sock *, const struct ofpbuf *, uint32_t nlmsg_seq, bool wait); int nl_sock_recv(struct nl_sock *, struct ofpbuf *, bool wait); int nl_sock_transact(struct nl_sock *, const struct ofpbuf *request, struct ofpbuf **replyp); int nl_sock_drain(struct nl_sock *); void nl_sock_wait(const struct nl_sock *, short int events); int nl_sock_fd(const struct nl_sock *); uint32_t nl_sock_pid(const struct nl_sock *); /* Batching transactions. */ struct nl_transaction { /* Filled in by client. */ struct ofpbuf *request; /* Request to send. */ /* The client must initialize 'reply' to one of: * * - NULL, if it does not care to examine the reply. * * - Otherwise, to an ofpbuf with a memory allocation of at least * NLMSG_HDRLEN bytes. */ struct ofpbuf *reply; /* Reply (empty if reply was an error code). */ int error; /* Positive errno value, 0 if no error. */ }; void nl_sock_transact_multiple(struct nl_sock *, struct nl_transaction **, size_t n); /* Transactions without an allocated socket. */ int nl_transact(int protocol, const struct ofpbuf *request, struct ofpbuf **replyp); void nl_transact_multiple(int protocol, struct nl_transaction **, size_t n); /* Table dumping. */ struct nl_dump { struct nl_sock *sock; /* Socket being dumped. */ uint32_t seq; /* Expected nlmsg_seq for replies. */ struct ofpbuf buffer; /* Receive buffer currently being iterated. */ int status; /* 0=OK, EOF=done, or positive errno value. */ }; void nl_dump_start(struct nl_dump *, int protocol, const struct ofpbuf *request); bool nl_dump_next(struct nl_dump *, struct ofpbuf *reply); int nl_dump_done(struct nl_dump *); /* Miscellaneous */ int nl_lookup_genl_family(const char *name, int *number); int nl_lookup_genl_mcgroup(const char *family_name, const char *group_name, unsigned int *multicast_group); #endif /* netlink-socket.h */ openvswitch-2.0.1+git20140120/lib/netlink.c000066400000000000000000000622641226605124000200420ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "netlink.h" #include #include #include #include #include "coverage.h" #include "flow.h" #include "netlink-protocol.h" #include "ofpbuf.h" #include "timeval.h" #include "unaligned.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(netlink); /* A single (bad) Netlink message can in theory dump out many, many log * messages, so the burst size is set quite high here to avoid missing useful * information. Also, at high logging levels we log *all* Netlink messages. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 600); /* Returns the nlmsghdr at the head of 'msg'. * * 'msg' must be at least as large as a nlmsghdr. */ struct nlmsghdr * nl_msg_nlmsghdr(const struct ofpbuf *msg) { return ofpbuf_at_assert(msg, 0, NLMSG_HDRLEN); } /* Returns the genlmsghdr just past 'msg''s nlmsghdr. * * Returns a null pointer if 'msg' is not large enough to contain an nlmsghdr * and a genlmsghdr. */ struct genlmsghdr * nl_msg_genlmsghdr(const struct ofpbuf *msg) { return ofpbuf_at(msg, NLMSG_HDRLEN, GENL_HDRLEN); } /* If 'buffer' is a NLMSG_ERROR message, stores 0 in '*errorp' if it is an ACK * message, otherwise a positive errno value, and returns true. If 'buffer' is * not an NLMSG_ERROR message, returns false. * * 'msg' must be at least as large as a nlmsghdr. */ bool nl_msg_nlmsgerr(const struct ofpbuf *msg, int *errorp) { if (nl_msg_nlmsghdr(msg)->nlmsg_type == NLMSG_ERROR) { struct nlmsgerr *err = ofpbuf_at(msg, NLMSG_HDRLEN, sizeof *err); int code = EPROTO; if (!err) { VLOG_ERR_RL(&rl, "received invalid nlmsgerr (%zd bytes < %zd)", msg->size, NLMSG_HDRLEN + sizeof *err); } else if (err->error <= 0 && err->error > INT_MIN) { code = -err->error; } if (errorp) { *errorp = code; } return true; } else { return false; } } /* Ensures that 'b' has room for at least 'size' bytes plus netlink padding at * its tail end, reallocating and copying its data if necessary. */ void nl_msg_reserve(struct ofpbuf *msg, size_t size) { ofpbuf_prealloc_tailroom(msg, NLMSG_ALIGN(size)); } /* Puts a nlmsghdr at the beginning of 'msg', which must be initially empty. * Uses the given 'type' and 'flags'. 'expected_payload' should be * an estimate of the number of payload bytes to be supplied; if the size of * the payload is unknown a value of 0 is acceptable. * * 'type' is ordinarily an enumerated value specific to the Netlink protocol * (e.g. RTM_NEWLINK, for NETLINK_ROUTE protocol). For Generic Netlink, 'type' * is the family number obtained via nl_lookup_genl_family(). * * 'flags' is a bit-mask that indicates what kind of request is being made. It * is often NLM_F_REQUEST indicating that a request is being made, commonly * or'd with NLM_F_ACK to request an acknowledgement. * * Sets the new nlmsghdr's nlmsg_len, nlmsg_seq, and nlmsg_pid fields to 0 for * now. Functions that send Netlink messages will fill these in just before * sending the message. * * nl_msg_put_genlmsghdr() is more convenient for composing a Generic Netlink * message. */ void nl_msg_put_nlmsghdr(struct ofpbuf *msg, size_t expected_payload, uint32_t type, uint32_t flags) { struct nlmsghdr *nlmsghdr; ovs_assert(msg->size == 0); nl_msg_reserve(msg, NLMSG_HDRLEN + expected_payload); nlmsghdr = nl_msg_put_uninit(msg, NLMSG_HDRLEN); nlmsghdr->nlmsg_len = 0; nlmsghdr->nlmsg_type = type; nlmsghdr->nlmsg_flags = flags; nlmsghdr->nlmsg_seq = 0; nlmsghdr->nlmsg_pid = 0; } /* Puts a nlmsghdr and genlmsghdr at the beginning of 'msg', which must be * initially empty. 'expected_payload' should be an estimate of the number of * payload bytes to be supplied; if the size of the payload is unknown a value * of 0 is acceptable. * * 'family' is the family number obtained via nl_lookup_genl_family(). * * 'flags' is a bit-mask that indicates what kind of request is being made. It * is often NLM_F_REQUEST indicating that a request is being made, commonly * or'd with NLM_F_ACK to request an acknowledgement. * * 'cmd' is an enumerated value specific to the Generic Netlink family * (e.g. CTRL_CMD_NEWFAMILY for the GENL_ID_CTRL family). * * 'version' is a version number specific to the family and command (often 1). * * Sets the new nlmsghdr's nlmsg_pid field to 0 for now. nl_sock_send() will * fill it in just before sending the message. * * nl_msg_put_nlmsghdr() should be used to compose Netlink messages that are * not Generic Netlink messages. */ void nl_msg_put_genlmsghdr(struct ofpbuf *msg, size_t expected_payload, int family, uint32_t flags, uint8_t cmd, uint8_t version) { struct genlmsghdr *genlmsghdr; nl_msg_put_nlmsghdr(msg, GENL_HDRLEN + expected_payload, family, flags); ovs_assert(msg->size == NLMSG_HDRLEN); genlmsghdr = nl_msg_put_uninit(msg, GENL_HDRLEN); genlmsghdr->cmd = cmd; genlmsghdr->version = version; genlmsghdr->reserved = 0; } /* Appends the 'size' bytes of data in 'p', plus Netlink padding if needed, to * the tail end of 'msg'. Data in 'msg' is reallocated and copied if * necessary. */ void nl_msg_put(struct ofpbuf *msg, const void *data, size_t size) { memcpy(nl_msg_put_uninit(msg, size), data, size); } /* Appends 'size' bytes of data, plus Netlink padding if needed, to the tail * end of 'msg', reallocating and copying its data if necessary. Returns a * pointer to the first byte of the new data, which is left uninitialized. */ void * nl_msg_put_uninit(struct ofpbuf *msg, size_t size) { size_t pad = NLMSG_ALIGN(size) - size; char *p = ofpbuf_put_uninit(msg, size + pad); if (pad) { memset(p + size, 0, pad); } return p; } /* Prepends the 'size' bytes of data in 'p', plus Netlink padding if needed, to * the head end of 'msg'. Data in 'msg' is reallocated and copied if * necessary. */ void nl_msg_push(struct ofpbuf *msg, const void *data, size_t size) { memcpy(nl_msg_push_uninit(msg, size), data, size); } /* Prepends 'size' bytes of data, plus Netlink padding if needed, to the head * end of 'msg', reallocating and copying its data if necessary. Returns a * pointer to the first byte of the new data, which is left uninitialized. */ void * nl_msg_push_uninit(struct ofpbuf *msg, size_t size) { size_t pad = NLMSG_ALIGN(size) - size; char *p = ofpbuf_push_uninit(msg, size + pad); if (pad) { memset(p + size, 0, pad); } return p; } /* Appends a Netlink attribute of the given 'type' and room for 'size' bytes of * data as its payload, plus Netlink padding if needed, to the tail end of * 'msg', reallocating and copying its data if necessary. Returns a pointer to * the first byte of data in the attribute, which is left uninitialized. */ void * nl_msg_put_unspec_uninit(struct ofpbuf *msg, uint16_t type, size_t size) { size_t total_size = NLA_HDRLEN + size; struct nlattr* nla = nl_msg_put_uninit(msg, total_size); ovs_assert(NLA_ALIGN(total_size) <= UINT16_MAX); nla->nla_len = total_size; nla->nla_type = type; return nla + 1; } /* Appends a Netlink attribute of the given 'type' and room for 'size' bytes of * data as its payload, plus Netlink padding if needed, to the tail end of * 'msg', reallocating and copying its data if necessary. Returns a pointer to * the first byte of data in the attribute, which is zeroed. */ void * nl_msg_put_unspec_zero(struct ofpbuf *msg, uint16_t type, size_t size) { void *data = nl_msg_put_unspec_uninit(msg, type, size); memset(data, 0, size); return data; } /* Appends a Netlink attribute of the given 'type' and the 'size' bytes of * 'data' as its payload, to the tail end of 'msg', reallocating and copying * its data if necessary. Returns a pointer to the first byte of data in the * attribute, which is left uninitialized. */ void nl_msg_put_unspec(struct ofpbuf *msg, uint16_t type, const void *data, size_t size) { memcpy(nl_msg_put_unspec_uninit(msg, type, size), data, size); } /* Appends a Netlink attribute of the given 'type' and no payload to 'msg'. * (Some Netlink protocols use the presence or absence of an attribute as a * Boolean flag.) */ void nl_msg_put_flag(struct ofpbuf *msg, uint16_t type) { nl_msg_put_unspec(msg, type, NULL, 0); } /* Appends a Netlink attribute of the given 'type' and the given 8-bit 'value' * to 'msg'. */ void nl_msg_put_u8(struct ofpbuf *msg, uint16_t type, uint8_t value) { nl_msg_put_unspec(msg, type, &value, sizeof value); } /* Appends a Netlink attribute of the given 'type' and the given 16-bit host * byte order 'value' to 'msg'. */ void nl_msg_put_u16(struct ofpbuf *msg, uint16_t type, uint16_t value) { nl_msg_put_unspec(msg, type, &value, sizeof value); } /* Appends a Netlink attribute of the given 'type' and the given 32-bit host * byte order 'value' to 'msg'. */ void nl_msg_put_u32(struct ofpbuf *msg, uint16_t type, uint32_t value) { nl_msg_put_unspec(msg, type, &value, sizeof value); } /* Appends a Netlink attribute of the given 'type' and the given 64-bit host * byte order 'value' to 'msg'. */ void nl_msg_put_u64(struct ofpbuf *msg, uint16_t type, uint64_t value) { nl_msg_put_unspec(msg, type, &value, sizeof value); } /* Appends a Netlink attribute of the given 'type' and the given 16-bit network * byte order 'value' to 'msg'. */ void nl_msg_put_be16(struct ofpbuf *msg, uint16_t type, ovs_be16 value) { nl_msg_put_unspec(msg, type, &value, sizeof value); } /* Appends a Netlink attribute of the given 'type' and the given 32-bit network * byte order 'value' to 'msg'. */ void nl_msg_put_be32(struct ofpbuf *msg, uint16_t type, ovs_be32 value) { nl_msg_put_unspec(msg, type, &value, sizeof value); } /* Appends a Netlink attribute of the given 'type' and the given 64-bit network * byte order 'value' to 'msg'. */ void nl_msg_put_be64(struct ofpbuf *msg, uint16_t type, ovs_be64 value) { nl_msg_put_unspec(msg, type, &value, sizeof value); } /* Appends a Netlink attribute of the given 'type' and the given odp_port_t * 'value' to 'msg'. */ void nl_msg_put_odp_port(struct ofpbuf *msg, uint16_t type, odp_port_t value) { nl_msg_put_u32(msg, type, odp_to_u32(value)); } /* Appends a Netlink attribute of the given 'type' and the given * null-terminated string 'value' to 'msg'. */ void nl_msg_put_string(struct ofpbuf *msg, uint16_t type, const char *value) { nl_msg_put_unspec(msg, type, value, strlen(value) + 1); } /* Prepends a Netlink attribute of the given 'type' and room for 'size' bytes * of data as its payload, plus Netlink padding if needed, to the head end of * 'msg', reallocating and copying its data if necessary. Returns a pointer to * the first byte of data in the attribute, which is left uninitialized. */ void * nl_msg_push_unspec_uninit(struct ofpbuf *msg, uint16_t type, size_t size) { size_t total_size = NLA_HDRLEN + size; struct nlattr* nla = nl_msg_push_uninit(msg, total_size); ovs_assert(NLA_ALIGN(total_size) <= UINT16_MAX); nla->nla_len = total_size; nla->nla_type = type; return nla + 1; } /* Prepends a Netlink attribute of the given 'type' and the 'size' bytes of * 'data' as its payload, to the head end of 'msg', reallocating and copying * its data if necessary. Returns a pointer to the first byte of data in the * attribute, which is left uninitialized. */ void nl_msg_push_unspec(struct ofpbuf *msg, uint16_t type, const void *data, size_t size) { memcpy(nl_msg_push_unspec_uninit(msg, type, size), data, size); } /* Prepends a Netlink attribute of the given 'type' and no payload to 'msg'. * (Some Netlink protocols use the presence or absence of an attribute as a * Boolean flag.) */ void nl_msg_push_flag(struct ofpbuf *msg, uint16_t type) { nl_msg_push_unspec(msg, type, NULL, 0); } /* Prepends a Netlink attribute of the given 'type' and the given 8-bit 'value' * to 'msg'. */ void nl_msg_push_u8(struct ofpbuf *msg, uint16_t type, uint8_t value) { nl_msg_push_unspec(msg, type, &value, sizeof value); } /* Prepends a Netlink attribute of the given 'type' and the given 16-bit host * byte order 'value' to 'msg'. */ void nl_msg_push_u16(struct ofpbuf *msg, uint16_t type, uint16_t value) { nl_msg_push_unspec(msg, type, &value, sizeof value); } /* Prepends a Netlink attribute of the given 'type' and the given 32-bit host * byte order 'value' to 'msg'. */ void nl_msg_push_u32(struct ofpbuf *msg, uint16_t type, uint32_t value) { nl_msg_push_unspec(msg, type, &value, sizeof value); } /* Prepends a Netlink attribute of the given 'type' and the given 64-bit host * byte order 'value' to 'msg'. */ void nl_msg_push_u64(struct ofpbuf *msg, uint16_t type, uint64_t value) { nl_msg_push_unspec(msg, type, &value, sizeof value); } /* Prepends a Netlink attribute of the given 'type' and the given 16-bit * network byte order 'value' to 'msg'. */ void nl_msg_push_be16(struct ofpbuf *msg, uint16_t type, ovs_be16 value) { nl_msg_push_unspec(msg, type, &value, sizeof value); } /* Prepends a Netlink attribute of the given 'type' and the given 32-bit * network byte order 'value' to 'msg'. */ void nl_msg_push_be32(struct ofpbuf *msg, uint16_t type, ovs_be32 value) { nl_msg_push_unspec(msg, type, &value, sizeof value); } /* Prepends a Netlink attribute of the given 'type' and the given 64-bit * network byte order 'value' to 'msg'. */ void nl_msg_push_be64(struct ofpbuf *msg, uint16_t type, ovs_be64 value) { nl_msg_push_unspec(msg, type, &value, sizeof value); } /* Prepends a Netlink attribute of the given 'type' and the given * null-terminated string 'value' to 'msg'. */ void nl_msg_push_string(struct ofpbuf *msg, uint16_t type, const char *value) { nl_msg_push_unspec(msg, type, value, strlen(value) + 1); } /* Adds the header for nested Netlink attributes to 'msg', with the specified * 'type', and returns the header's offset within 'msg'. The caller should add * the content for the nested Netlink attribute to 'msg' (e.g. using the other * nl_msg_*() functions), and then pass the returned offset to * nl_msg_end_nested() to finish up the nested attributes. */ size_t nl_msg_start_nested(struct ofpbuf *msg, uint16_t type) { size_t offset = msg->size; nl_msg_put_unspec(msg, type, NULL, 0); return offset; } /* Finalizes a nested Netlink attribute in 'msg'. 'offset' should be the value * returned by nl_msg_start_nested(). */ void nl_msg_end_nested(struct ofpbuf *msg, size_t offset) { struct nlattr *attr = ofpbuf_at_assert(msg, offset, sizeof *attr); attr->nla_len = msg->size - offset; } /* Appends a nested Netlink attribute of the given 'type', with the 'size' * bytes of content starting at 'data', to 'msg'. */ void nl_msg_put_nested(struct ofpbuf *msg, uint16_t type, const void *data, size_t size) { size_t offset = nl_msg_start_nested(msg, type); nl_msg_put(msg, data, size); nl_msg_end_nested(msg, offset); } /* If 'buffer' begins with a valid "struct nlmsghdr", pulls the header and its * payload off 'buffer', stores header and payload in 'msg->data' and * 'msg->size', and returns a pointer to the header. * * If 'buffer' does not begin with a "struct nlmsghdr" or begins with one that * is invalid, returns NULL without modifying 'buffer'. */ struct nlmsghdr * nl_msg_next(struct ofpbuf *buffer, struct ofpbuf *msg) { if (buffer->size >= sizeof(struct nlmsghdr)) { struct nlmsghdr *nlmsghdr = nl_msg_nlmsghdr(buffer); size_t len = nlmsghdr->nlmsg_len; if (len >= sizeof *nlmsghdr && len <= buffer->size) { ofpbuf_use_const(msg, nlmsghdr, len); ofpbuf_pull(buffer, len); return nlmsghdr; } } msg->data = NULL; msg->size = 0; return NULL; } /* Attributes. */ /* Returns the bits of 'nla->nla_type' that are significant for determining its * type. */ int nl_attr_type(const struct nlattr *nla) { return nla->nla_type & NLA_TYPE_MASK; } /* Returns the first byte in the payload of attribute 'nla'. */ const void * nl_attr_get(const struct nlattr *nla) { ovs_assert(nla->nla_len >= NLA_HDRLEN); return nla + 1; } /* Returns the number of bytes in the payload of attribute 'nla'. */ size_t nl_attr_get_size(const struct nlattr *nla) { ovs_assert(nla->nla_len >= NLA_HDRLEN); return nla->nla_len - NLA_HDRLEN; } /* Asserts that 'nla''s payload is at least 'size' bytes long, and returns the * first byte of the payload. */ const void * nl_attr_get_unspec(const struct nlattr *nla, size_t size) { ovs_assert(nla->nla_len >= NLA_HDRLEN + size); return nla + 1; } /* Returns true if 'nla' is nonnull. (Some Netlink protocols use the presence * or absence of an attribute as a Boolean flag.) */ bool nl_attr_get_flag(const struct nlattr *nla) { return nla != NULL; } #define NL_ATTR_GET_AS(NLA, TYPE) \ (*(TYPE*) nl_attr_get_unspec(nla, sizeof(TYPE))) /* Returns the 8-bit value in 'nla''s payload. * * Asserts that 'nla''s payload is at least 1 byte long. */ uint8_t nl_attr_get_u8(const struct nlattr *nla) { return NL_ATTR_GET_AS(nla, uint8_t); } /* Returns the 16-bit host byte order value in 'nla''s payload. * * Asserts that 'nla''s payload is at least 2 bytes long. */ uint16_t nl_attr_get_u16(const struct nlattr *nla) { return NL_ATTR_GET_AS(nla, uint16_t); } /* Returns the 32-bit host byte order value in 'nla''s payload. * * Asserts that 'nla''s payload is at least 4 bytes long. */ uint32_t nl_attr_get_u32(const struct nlattr *nla) { return NL_ATTR_GET_AS(nla, uint32_t); } /* Returns the 64-bit host byte order value in 'nla''s payload. * * Asserts that 'nla''s payload is at least 8 bytes long. */ uint64_t nl_attr_get_u64(const struct nlattr *nla) { const ovs_32aligned_u64 *x = nl_attr_get_unspec(nla, sizeof *x); return get_32aligned_u64(x); } /* Returns the 16-bit network byte order value in 'nla''s payload. * * Asserts that 'nla''s payload is at least 2 bytes long. */ ovs_be16 nl_attr_get_be16(const struct nlattr *nla) { return NL_ATTR_GET_AS(nla, ovs_be16); } /* Returns the 32-bit network byte order value in 'nla''s payload. * * Asserts that 'nla''s payload is at least 4 bytes long. */ ovs_be32 nl_attr_get_be32(const struct nlattr *nla) { return NL_ATTR_GET_AS(nla, ovs_be32); } /* Returns the 64-bit network byte order value in 'nla''s payload. * * Asserts that 'nla''s payload is at least 8 bytes long. */ ovs_be64 nl_attr_get_be64(const struct nlattr *nla) { const ovs_32aligned_be64 *x = nl_attr_get_unspec(nla, sizeof *x); return get_32aligned_be64(x); } /* Returns the 32-bit odp_port_t value in 'nla''s payload. * * Asserts that 'nla''s payload is at least 4 bytes long. */ odp_port_t nl_attr_get_odp_port(const struct nlattr *nla) { return u32_to_odp(nl_attr_get_u32(nla)); } /* Returns the null-terminated string value in 'nla''s payload. * * Asserts that 'nla''s payload contains a null-terminated string. */ const char * nl_attr_get_string(const struct nlattr *nla) { ovs_assert(nla->nla_len > NLA_HDRLEN); ovs_assert(memchr(nl_attr_get(nla), '\0', nla->nla_len - NLA_HDRLEN)); return nl_attr_get(nla); } /* Initializes 'nested' to the payload of 'nla'. */ void nl_attr_get_nested(const struct nlattr *nla, struct ofpbuf *nested) { ofpbuf_use_const(nested, nl_attr_get(nla), nl_attr_get_size(nla)); } /* Default minimum payload size for each type of attribute. */ static size_t min_attr_len(enum nl_attr_type type) { switch (type) { case NL_A_NO_ATTR: return 0; case NL_A_UNSPEC: return 0; case NL_A_U8: return 1; case NL_A_U16: return 2; case NL_A_U32: return 4; case NL_A_U64: return 8; case NL_A_STRING: return 1; case NL_A_FLAG: return 0; case NL_A_NESTED: return 0; case N_NL_ATTR_TYPES: default: NOT_REACHED(); } } /* Default maximum payload size for each type of attribute. */ static size_t max_attr_len(enum nl_attr_type type) { switch (type) { case NL_A_NO_ATTR: return SIZE_MAX; case NL_A_UNSPEC: return SIZE_MAX; case NL_A_U8: return 1; case NL_A_U16: return 2; case NL_A_U32: return 4; case NL_A_U64: return 8; case NL_A_STRING: return SIZE_MAX; case NL_A_FLAG: return SIZE_MAX; case NL_A_NESTED: return SIZE_MAX; case N_NL_ATTR_TYPES: default: NOT_REACHED(); } } bool nl_attr_validate(const struct nlattr *nla, const struct nl_policy *policy) { uint16_t type = nl_attr_type(nla); size_t min_len; size_t max_len; size_t len; if (policy->type == NL_A_NO_ATTR) { return true; } /* Figure out min and max length. */ min_len = policy->min_len; if (!min_len) { min_len = min_attr_len(policy->type); } max_len = policy->max_len; if (!max_len) { max_len = max_attr_len(policy->type); } /* Verify length. */ len = nl_attr_get_size(nla); if (len < min_len || len > max_len) { VLOG_DBG_RL(&rl, "attr %"PRIu16" length %zu not in " "allowed range %zu...%zu", type, len, min_len, max_len); return false; } /* Strings must be null terminated and must not have embedded nulls. */ if (policy->type == NL_A_STRING) { if (((char *) nla)[nla->nla_len - 1]) { VLOG_DBG_RL(&rl, "attr %"PRIu16" lacks null at end", type); return false; } if (memchr(nla + 1, '\0', len - 1) != NULL) { VLOG_DBG_RL(&rl, "attr %"PRIu16" has bad length", type); return false; } } return true; } /* Parses the 'msg' starting at the given 'nla_offset' as a sequence of Netlink * attributes. 'policy[i]', for 0 <= i < n_attrs, specifies how the attribute * with nla_type == i is parsed; a pointer to attribute i is stored in * attrs[i]. Returns true if successful, false on failure. * * If the Netlink attributes in 'msg' follow a Netlink header and a Generic * Netlink header, then 'nla_offset' should be NLMSG_HDRLEN + GENL_HDRLEN. */ bool nl_policy_parse(const struct ofpbuf *msg, size_t nla_offset, const struct nl_policy policy[], struct nlattr *attrs[], size_t n_attrs) { struct nlattr *nla; size_t left; size_t i; memset(attrs, 0, n_attrs * sizeof *attrs); if (msg->size < nla_offset) { VLOG_DBG_RL(&rl, "missing headers in nl_policy_parse"); return false; } NL_ATTR_FOR_EACH (nla, left, ofpbuf_at(msg, nla_offset, 0), msg->size - nla_offset) { uint16_t type = nl_attr_type(nla); if (type < n_attrs && policy[type].type != NL_A_NO_ATTR) { const struct nl_policy *e = &policy[type]; if (!nl_attr_validate(nla, e)) { return false; } if (attrs[type]) { VLOG_DBG_RL(&rl, "duplicate attr %"PRIu16, type); } attrs[type] = nla; } } if (left) { VLOG_DBG_RL(&rl, "attributes followed by garbage"); return false; } for (i = 0; i < n_attrs; i++) { const struct nl_policy *e = &policy[i]; if (!e->optional && e->type != NL_A_NO_ATTR && !attrs[i]) { VLOG_DBG_RL(&rl, "required attr %zu missing", i); return false; } } return true; } /* Parses the Netlink attributes within 'nla'. 'policy[i]', for 0 <= i < * n_attrs, specifies how the attribute with nla_type == i is parsed; a pointer * to attribute i is stored in attrs[i]. Returns true if successful, false on * failure. */ bool nl_parse_nested(const struct nlattr *nla, const struct nl_policy policy[], struct nlattr *attrs[], size_t n_attrs) { struct ofpbuf buf; nl_attr_get_nested(nla, &buf); return nl_policy_parse(&buf, 0, policy, attrs, n_attrs); } const struct nlattr * nl_attr_find__(const struct nlattr *attrs, size_t size, uint16_t type) { const struct nlattr *nla; size_t left; NL_ATTR_FOR_EACH (nla, left, attrs, size) { if (nl_attr_type(nla) == type) { return nla; } } return NULL; } /* Returns the first Netlink attribute within 'buf' with the specified 'type', * skipping a header of 'hdr_len' bytes at the beginning of 'buf'. * * This function does not validate the attribute's length. */ const struct nlattr * nl_attr_find(const struct ofpbuf *buf, size_t hdr_len, uint16_t type) { return nl_attr_find__(ofpbuf_at(buf, hdr_len, 0), buf->size - hdr_len, type); } /* Returns the first Netlink attribute within 'nla' with the specified * 'type'. * * This function does not validate the attribute's length. */ const struct nlattr * nl_attr_find_nested(const struct nlattr *nla, uint16_t type) { return nl_attr_find__(nl_attr_get(nla), nl_attr_get_size(nla), type); } openvswitch-2.0.1+git20140120/lib/netlink.h000066400000000000000000000210001226605124000200260ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef NETLINK_H #define NETLINK_H 1 /* Netlink message helpers. * * Netlink is a datagram-based network protocol primarily for communication * between user processes and the kernel, and mainly on Linux. Netlink is * specified in RFC 3549, "Linux Netlink as an IP Services Protocol". * * Netlink is not suitable for use in physical networks of heterogeneous * machines because host byte order is used throughout. * * This header file defines helper functions for working with Netlink messages. * For Netlink protocol definitions, see netlink-protocol.h. For * Linux-specific definitions for Netlink sockets, see netlink-socket.h. */ #include #include #include #include "netlink-protocol.h" #include "openvswitch/types.h" struct ofpbuf; struct nlattr; /* Accessing headers and data. */ struct nlmsghdr *nl_msg_nlmsghdr(const struct ofpbuf *); struct genlmsghdr *nl_msg_genlmsghdr(const struct ofpbuf *); bool nl_msg_nlmsgerr(const struct ofpbuf *, int *error); void nl_msg_reserve(struct ofpbuf *, size_t); /* Appending and prepending headers and raw data. */ void nl_msg_put_nlmsghdr(struct ofpbuf *, size_t expected_payload, uint32_t type, uint32_t flags); void nl_msg_put_genlmsghdr(struct ofpbuf *, size_t expected_payload, int family, uint32_t flags, uint8_t cmd, uint8_t version); void nl_msg_put(struct ofpbuf *, const void *, size_t); void *nl_msg_put_uninit(struct ofpbuf *, size_t); void nl_msg_push(struct ofpbuf *, const void *, size_t); void *nl_msg_push_uninit(struct ofpbuf *, size_t); /* Appending attributes. */ void *nl_msg_put_unspec_uninit(struct ofpbuf *, uint16_t type, size_t); void *nl_msg_put_unspec_zero(struct ofpbuf *, uint16_t type, size_t); void nl_msg_put_unspec(struct ofpbuf *, uint16_t type, const void *, size_t); void nl_msg_put_flag(struct ofpbuf *, uint16_t type); void nl_msg_put_u8(struct ofpbuf *, uint16_t type, uint8_t value); void nl_msg_put_u16(struct ofpbuf *, uint16_t type, uint16_t value); void nl_msg_put_u32(struct ofpbuf *, uint16_t type, uint32_t value); void nl_msg_put_u64(struct ofpbuf *, uint16_t type, uint64_t value); void nl_msg_put_be16(struct ofpbuf *, uint16_t type, ovs_be16 value); void nl_msg_put_be32(struct ofpbuf *, uint16_t type, ovs_be32 value); void nl_msg_put_be64(struct ofpbuf *, uint16_t type, ovs_be64 value); void nl_msg_put_odp_port(struct ofpbuf *, uint16_t type, odp_port_t value); void nl_msg_put_string(struct ofpbuf *, uint16_t type, const char *value); size_t nl_msg_start_nested(struct ofpbuf *, uint16_t type); void nl_msg_end_nested(struct ofpbuf *, size_t offset); void nl_msg_put_nested(struct ofpbuf *, uint16_t type, const void *data, size_t size); /* Prepending attributes. */ void *nl_msg_push_unspec_uninit(struct ofpbuf *, uint16_t type, size_t); void nl_msg_push_unspec(struct ofpbuf *, uint16_t type, const void *, size_t); void nl_msg_push_flag(struct ofpbuf *, uint16_t type); void nl_msg_push_u8(struct ofpbuf *, uint16_t type, uint8_t value); void nl_msg_push_u16(struct ofpbuf *, uint16_t type, uint16_t value); void nl_msg_push_u32(struct ofpbuf *, uint16_t type, uint32_t value); void nl_msg_push_u64(struct ofpbuf *, uint16_t type, uint64_t value); void nl_msg_push_be16(struct ofpbuf *, uint16_t type, ovs_be16 value); void nl_msg_push_be32(struct ofpbuf *, uint16_t type, ovs_be32 value); void nl_msg_push_be64(struct ofpbuf *, uint16_t type, ovs_be64 value); void nl_msg_push_string(struct ofpbuf *, uint16_t type, const char *value); /* Separating buffers into individual messages. */ struct nlmsghdr *nl_msg_next(struct ofpbuf *buffer, struct ofpbuf *msg); /* Sizes of various attribute types, in bytes, including the attribute header * and padding. */ #define NL_ATTR_SIZE(PAYLOAD_SIZE) (NLA_HDRLEN + NLA_ALIGN(PAYLOAD_SIZE)) #define NL_A_U8_SIZE NL_ATTR_SIZE(sizeof(uint8_t)) #define NL_A_U16_SIZE NL_ATTR_SIZE(sizeof(uint16_t)) #define NL_A_U32_SIZE NL_ATTR_SIZE(sizeof(uint32_t)) #define NL_A_U64_SIZE NL_ATTR_SIZE(sizeof(uint64_t)) #define NL_A_BE16_SIZE NL_ATTR_SIZE(sizeof(ovs_be16)) #define NL_A_BE32_SIZE NL_ATTR_SIZE(sizeof(ovs_be32)) #define NL_A_BE64_SIZE NL_ATTR_SIZE(sizeof(ovs_be64)) #define NL_A_FLAG_SIZE NL_ATTR_SIZE(0) /* Netlink attribute types. */ enum nl_attr_type { NL_A_NO_ATTR = 0, NL_A_UNSPEC, NL_A_U8, NL_A_U16, NL_A_BE16 = NL_A_U16, NL_A_U32, NL_A_BE32 = NL_A_U32, NL_A_U64, NL_A_BE64 = NL_A_U64, NL_A_STRING, NL_A_FLAG, NL_A_NESTED, N_NL_ATTR_TYPES }; /* Netlink attribute iteration. */ static inline struct nlattr * nl_attr_next(const struct nlattr *nla) { return (void *) ((uint8_t *) nla + NLA_ALIGN(nla->nla_len)); } static inline bool nl_attr_is_valid(const struct nlattr *nla, size_t maxlen) { return (maxlen >= sizeof *nla && nla->nla_len >= sizeof *nla && NLA_ALIGN(nla->nla_len) <= maxlen); } /* This macro is careful to check for attributes with bad lengths. */ #define NL_ATTR_FOR_EACH(ITER, LEFT, ATTRS, ATTRS_LEN) \ for ((ITER) = (ATTRS), (LEFT) = (ATTRS_LEN); \ nl_attr_is_valid(ITER, LEFT); \ (LEFT) -= NLA_ALIGN((ITER)->nla_len), (ITER) = nl_attr_next(ITER)) /* This macro does not check for attributes with bad lengths. It should only * be used with messages from trusted sources or with messages that have * already been validated (e.g. with NL_ATTR_FOR_EACH). */ #define NL_ATTR_FOR_EACH_UNSAFE(ITER, LEFT, ATTRS, ATTRS_LEN) \ for ((ITER) = (ATTRS), (LEFT) = (ATTRS_LEN); \ (LEFT) > 0; \ (LEFT) -= NLA_ALIGN((ITER)->nla_len), (ITER) = nl_attr_next(ITER)) /* These variants are convenient for iterating nested attributes. */ #define NL_NESTED_FOR_EACH(ITER, LEFT, A) \ NL_ATTR_FOR_EACH(ITER, LEFT, nl_attr_get(A), nl_attr_get_size(A)) #define NL_NESTED_FOR_EACH_UNSAFE(ITER, LEFT, A) \ NL_ATTR_FOR_EACH_UNSAFE(ITER, LEFT, nl_attr_get(A), nl_attr_get_size(A)) /* Netlink attribute parsing. */ int nl_attr_type(const struct nlattr *); const void *nl_attr_get(const struct nlattr *); size_t nl_attr_get_size(const struct nlattr *); const void *nl_attr_get_unspec(const struct nlattr *, size_t size); bool nl_attr_get_flag(const struct nlattr *); uint8_t nl_attr_get_u8(const struct nlattr *); uint16_t nl_attr_get_u16(const struct nlattr *); uint32_t nl_attr_get_u32(const struct nlattr *); uint64_t nl_attr_get_u64(const struct nlattr *); ovs_be16 nl_attr_get_be16(const struct nlattr *); ovs_be32 nl_attr_get_be32(const struct nlattr *); ovs_be64 nl_attr_get_be64(const struct nlattr *); odp_port_t nl_attr_get_odp_port(const struct nlattr *); const char *nl_attr_get_string(const struct nlattr *); void nl_attr_get_nested(const struct nlattr *, struct ofpbuf *); /* Netlink attribute policy. * * Specifies how to parse a single attribute from a Netlink message payload. */ struct nl_policy { enum nl_attr_type type; size_t min_len, max_len; bool optional; }; #define NL_POLICY_FOR(TYPE) \ .type = NL_A_UNSPEC, .min_len = sizeof(TYPE), .max_len = sizeof(TYPE) bool nl_attr_validate(const struct nlattr *, const struct nl_policy *); bool nl_policy_parse(const struct ofpbuf *, size_t offset, const struct nl_policy[], struct nlattr *[], size_t n_attrs); bool nl_parse_nested(const struct nlattr *, const struct nl_policy[], struct nlattr *[], size_t n_attrs); const struct nlattr *nl_attr_find(const struct ofpbuf *, size_t hdr_len, uint16_t type); const struct nlattr *nl_attr_find_nested(const struct nlattr *, uint16_t type); const struct nlattr *nl_attr_find__(const struct nlattr *attrs, size_t size, uint16_t type); #endif /* netlink.h */ openvswitch-2.0.1+git20140120/lib/nx-match.c000066400000000000000000001313651226605124000201140ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "nx-match.h" #include #include "classifier.h" #include "dynamic-string.h" #include "meta-flow.h" #include "ofp-actions.h" #include "ofp-errors.h" #include "ofp-util.h" #include "ofpbuf.h" #include "openflow/nicira-ext.h" #include "packets.h" #include "unaligned.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(nx_match); /* Rate limit for nx_match parse errors. These always indicate a bug in the * peer and so there's not much point in showing a lot of them. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); /* Returns the width of the data for a field with the given 'header', in * bytes. */ int nxm_field_bytes(uint32_t header) { unsigned int length = NXM_LENGTH(header); return NXM_HASMASK(header) ? length / 2 : length; } /* Returns the width of the data for a field with the given 'header', in * bits. */ int nxm_field_bits(uint32_t header) { return nxm_field_bytes(header) * 8; } /* nx_pull_match() and helpers. */ static uint32_t nx_entry_ok(const void *p, unsigned int match_len) { unsigned int payload_len; ovs_be32 header_be; uint32_t header; if (match_len < 4) { if (match_len) { VLOG_DBG_RL(&rl, "nx_match ends with partial (%u-byte) nxm_header", match_len); } return 0; } memcpy(&header_be, p, 4); header = ntohl(header_be); payload_len = NXM_LENGTH(header); if (!payload_len) { VLOG_DBG_RL(&rl, "nxm_entry %08"PRIx32" has invalid payload " "length 0", header); return 0; } if (match_len < payload_len + 4) { VLOG_DBG_RL(&rl, "%"PRIu32"-byte nxm_entry but only " "%u bytes left in nx_match", payload_len + 4, match_len); return 0; } return header; } /* Given NXM/OXM value 'value' and mask 'mask', each 'width' bytes long, * checks for any 1-bit in the value where there is a 0-bit in the mask. If it * finds one, logs a warning. */ static void check_mask_consistency(const uint8_t *p, const struct mf_field *mf) { unsigned int width = mf->n_bytes; const uint8_t *value = p + 4; const uint8_t *mask = p + 4 + width; unsigned int i; for (i = 0; i < width; i++) { if (value[i] & ~mask[i]) { if (!VLOG_DROP_WARN(&rl)) { char *s = nx_match_to_string(p, width * 2 + 4); VLOG_WARN_RL(&rl, "NXM/OXM entry %s has 1-bits in value for " "bits wildcarded by the mask. (Future versions " "of OVS may report this as an OpenFlow error.)", s); break; } } } } static enum ofperr nx_pull_raw(const uint8_t *p, unsigned int match_len, bool strict, struct match *match, ovs_be64 *cookie, ovs_be64 *cookie_mask) { uint32_t header; ovs_assert((cookie != NULL) == (cookie_mask != NULL)); match_init_catchall(match); if (cookie) { *cookie = *cookie_mask = htonll(0); } if (!match_len) { return 0; } for (; (header = nx_entry_ok(p, match_len)) != 0; p += 4 + NXM_LENGTH(header), match_len -= 4 + NXM_LENGTH(header)) { const struct mf_field *mf; enum ofperr error; mf = mf_from_nxm_header(header); if (!mf) { if (strict) { error = OFPERR_OFPBMC_BAD_FIELD; } else { continue; } } else if (!mf_are_prereqs_ok(mf, &match->flow)) { error = OFPERR_OFPBMC_BAD_PREREQ; } else if (!mf_is_all_wild(mf, &match->wc)) { error = OFPERR_OFPBMC_DUP_FIELD; } else { unsigned int width = mf->n_bytes; union mf_value value; memcpy(&value, p + 4, width); if (!mf_is_value_valid(mf, &value)) { error = OFPERR_OFPBMC_BAD_VALUE; } else if (!NXM_HASMASK(header)) { error = 0; mf_set_value(mf, &value, match); } else { union mf_value mask; memcpy(&mask, p + 4 + width, width); if (!mf_is_mask_valid(mf, &mask)) { error = OFPERR_OFPBMC_BAD_MASK; } else { error = 0; check_mask_consistency(p, mf); mf_set(mf, &value, &mask, match); } } } /* Check if the match is for a cookie rather than a classifier rule. */ if ((header == NXM_NX_COOKIE || header == NXM_NX_COOKIE_W) && cookie) { if (*cookie_mask) { error = OFPERR_OFPBMC_DUP_FIELD; } else { unsigned int width = sizeof *cookie; memcpy(cookie, p + 4, width); if (NXM_HASMASK(header)) { memcpy(cookie_mask, p + 4 + width, width); } else { *cookie_mask = htonll(UINT64_MAX); } error = 0; } } if (error) { VLOG_DBG_RL(&rl, "bad nxm_entry %#08"PRIx32" (vendor=%"PRIu32", " "field=%"PRIu32", hasmask=%"PRIu32", len=%"PRIu32"), " "(%s)", header, NXM_VENDOR(header), NXM_FIELD(header), NXM_HASMASK(header), NXM_LENGTH(header), ofperr_to_string(error)); return error; } } return match_len ? OFPERR_OFPBMC_BAD_LEN : 0; } static enum ofperr nx_pull_match__(struct ofpbuf *b, unsigned int match_len, bool strict, struct match *match, ovs_be64 *cookie, ovs_be64 *cookie_mask) { uint8_t *p = NULL; if (match_len) { p = ofpbuf_try_pull(b, ROUND_UP(match_len, 8)); if (!p) { VLOG_DBG_RL(&rl, "nx_match length %u, rounded up to a " "multiple of 8, is longer than space in message (max " "length %zu)", match_len, b->size); return OFPERR_OFPBMC_BAD_LEN; } } return nx_pull_raw(p, match_len, strict, match, cookie, cookie_mask); } /* Parses the nx_match formatted match description in 'b' with length * 'match_len'. Stores the results in 'match'. If 'cookie' and 'cookie_mask' * are valid pointers, then stores the cookie and mask in them if 'b' contains * a "NXM_NX_COOKIE*" match. Otherwise, stores 0 in both. * * Fails with an error upon encountering an unknown NXM header. * * Returns 0 if successful, otherwise an OpenFlow error code. */ enum ofperr nx_pull_match(struct ofpbuf *b, unsigned int match_len, struct match *match, ovs_be64 *cookie, ovs_be64 *cookie_mask) { return nx_pull_match__(b, match_len, true, match, cookie, cookie_mask); } /* Behaves the same as nx_pull_match(), but skips over unknown NXM headers, * instead of failing with an error. */ enum ofperr nx_pull_match_loose(struct ofpbuf *b, unsigned int match_len, struct match *match, ovs_be64 *cookie, ovs_be64 *cookie_mask) { return nx_pull_match__(b, match_len, false, match, cookie, cookie_mask); } static enum ofperr oxm_pull_match__(struct ofpbuf *b, bool strict, struct match *match) { struct ofp11_match_header *omh = b->data; uint8_t *p; uint16_t match_len; if (b->size < sizeof *omh) { return OFPERR_OFPBMC_BAD_LEN; } match_len = ntohs(omh->length); if (match_len < sizeof *omh) { return OFPERR_OFPBMC_BAD_LEN; } if (omh->type != htons(OFPMT_OXM)) { return OFPERR_OFPBMC_BAD_TYPE; } p = ofpbuf_try_pull(b, ROUND_UP(match_len, 8)); if (!p) { VLOG_DBG_RL(&rl, "oxm length %u, rounded up to a " "multiple of 8, is longer than space in message (max " "length %zu)", match_len, b->size); return OFPERR_OFPBMC_BAD_LEN; } return nx_pull_raw(p + sizeof *omh, match_len - sizeof *omh, strict, match, NULL, NULL); } /* Parses the oxm formatted match description preceded by a struct ofp11_match * in 'b' with length 'match_len'. Stores the result in 'match'. * * Fails with an error when encountering unknown OXM headers. * * Returns 0 if successful, otherwise an OpenFlow error code. */ enum ofperr oxm_pull_match(struct ofpbuf *b, struct match *match) { return oxm_pull_match__(b, true, match); } /* Behaves the same as oxm_pull_match() with one exception. Skips over unknown * PXM headers instead of failing with an error when they are encountered. */ enum ofperr oxm_pull_match_loose(struct ofpbuf *b, struct match *match) { return oxm_pull_match__(b, false, match); } /* nx_put_match() and helpers. * * 'put' functions whose names end in 'w' add a wildcarded field. * 'put' functions whose names end in 'm' add a field that might be wildcarded. * Other 'put' functions add exact-match fields. */ static void nxm_put_header(struct ofpbuf *b, uint32_t header) { ovs_be32 n_header = htonl(header); ofpbuf_put(b, &n_header, sizeof n_header); } static void nxm_put_8(struct ofpbuf *b, uint32_t header, uint8_t value) { nxm_put_header(b, header); ofpbuf_put(b, &value, sizeof value); } static void nxm_put_8m(struct ofpbuf *b, uint32_t header, uint8_t value, uint8_t mask) { switch (mask) { case 0: break; case UINT8_MAX: nxm_put_8(b, header, value); break; default: nxm_put_header(b, NXM_MAKE_WILD_HEADER(header)); ofpbuf_put(b, &value, sizeof value); ofpbuf_put(b, &mask, sizeof mask); } } static void nxm_put_16(struct ofpbuf *b, uint32_t header, ovs_be16 value) { nxm_put_header(b, header); ofpbuf_put(b, &value, sizeof value); } static void nxm_put_16w(struct ofpbuf *b, uint32_t header, ovs_be16 value, ovs_be16 mask) { nxm_put_header(b, header); ofpbuf_put(b, &value, sizeof value); ofpbuf_put(b, &mask, sizeof mask); } static void nxm_put_16m(struct ofpbuf *b, uint32_t header, ovs_be16 value, ovs_be16 mask) { switch (mask) { case 0: break; case CONSTANT_HTONS(UINT16_MAX): nxm_put_16(b, header, value); break; default: nxm_put_16w(b, NXM_MAKE_WILD_HEADER(header), value, mask); break; } } static void nxm_put_32(struct ofpbuf *b, uint32_t header, ovs_be32 value) { nxm_put_header(b, header); ofpbuf_put(b, &value, sizeof value); } static void nxm_put_32w(struct ofpbuf *b, uint32_t header, ovs_be32 value, ovs_be32 mask) { nxm_put_header(b, header); ofpbuf_put(b, &value, sizeof value); ofpbuf_put(b, &mask, sizeof mask); } static void nxm_put_32m(struct ofpbuf *b, uint32_t header, ovs_be32 value, ovs_be32 mask) { switch (mask) { case 0: break; case CONSTANT_HTONL(UINT32_MAX): nxm_put_32(b, header, value); break; default: nxm_put_32w(b, NXM_MAKE_WILD_HEADER(header), value, mask); break; } } static void nxm_put_64(struct ofpbuf *b, uint32_t header, ovs_be64 value) { nxm_put_header(b, header); ofpbuf_put(b, &value, sizeof value); } static void nxm_put_64w(struct ofpbuf *b, uint32_t header, ovs_be64 value, ovs_be64 mask) { nxm_put_header(b, header); ofpbuf_put(b, &value, sizeof value); ofpbuf_put(b, &mask, sizeof mask); } static void nxm_put_64m(struct ofpbuf *b, uint32_t header, ovs_be64 value, ovs_be64 mask) { switch (mask) { case 0: break; case CONSTANT_HTONLL(UINT64_MAX): nxm_put_64(b, header, value); break; default: nxm_put_64w(b, NXM_MAKE_WILD_HEADER(header), value, mask); break; } } static void nxm_put_eth(struct ofpbuf *b, uint32_t header, const uint8_t value[ETH_ADDR_LEN]) { nxm_put_header(b, header); ofpbuf_put(b, value, ETH_ADDR_LEN); } static void nxm_put_eth_masked(struct ofpbuf *b, uint32_t header, const uint8_t value[ETH_ADDR_LEN], const uint8_t mask[ETH_ADDR_LEN]) { if (!eth_addr_is_zero(mask)) { if (eth_mask_is_exact(mask)) { nxm_put_eth(b, header, value); } else { nxm_put_header(b, NXM_MAKE_WILD_HEADER(header)); ofpbuf_put(b, value, ETH_ADDR_LEN); ofpbuf_put(b, mask, ETH_ADDR_LEN); } } } static void nxm_put_ipv6(struct ofpbuf *b, uint32_t header, const struct in6_addr *value, const struct in6_addr *mask) { if (ipv6_mask_is_any(mask)) { return; } else if (ipv6_mask_is_exact(mask)) { nxm_put_header(b, header); ofpbuf_put(b, value, sizeof *value); } else { nxm_put_header(b, NXM_MAKE_WILD_HEADER(header)); ofpbuf_put(b, value, sizeof *value); ofpbuf_put(b, mask, sizeof *mask); } } static void nxm_put_frag(struct ofpbuf *b, const struct match *match) { uint8_t nw_frag = match->flow.nw_frag; uint8_t nw_frag_mask = match->wc.masks.nw_frag; switch (nw_frag_mask) { case 0: break; case FLOW_NW_FRAG_MASK: nxm_put_8(b, NXM_NX_IP_FRAG, nw_frag); break; default: nxm_put_8m(b, NXM_NX_IP_FRAG, nw_frag, nw_frag_mask & FLOW_NW_FRAG_MASK); break; } } static void nxm_put_ip(struct ofpbuf *b, const struct match *match, uint8_t icmp_proto, uint32_t icmp_type, uint32_t icmp_code, bool oxm) { const struct flow *flow = &match->flow; nxm_put_frag(b, match); if (match->wc.masks.nw_tos & IP_DSCP_MASK) { if (oxm) { nxm_put_8(b, OXM_OF_IP_DSCP, flow->nw_tos >> 2); } else { nxm_put_8(b, NXM_OF_IP_TOS, flow->nw_tos & IP_DSCP_MASK); } } if (match->wc.masks.nw_tos & IP_ECN_MASK) { nxm_put_8(b, oxm ? OXM_OF_IP_ECN : NXM_NX_IP_ECN, flow->nw_tos & IP_ECN_MASK); } if (!oxm && match->wc.masks.nw_ttl) { nxm_put_8(b, NXM_NX_IP_TTL, flow->nw_ttl); } if (match->wc.masks.nw_proto) { nxm_put_8(b, oxm ? OXM_OF_IP_PROTO : NXM_OF_IP_PROTO, flow->nw_proto); if (flow->nw_proto == IPPROTO_TCP) { nxm_put_16m(b, oxm ? OXM_OF_TCP_SRC : NXM_OF_TCP_SRC, flow->tp_src, match->wc.masks.tp_src); nxm_put_16m(b, oxm ? OXM_OF_TCP_DST : NXM_OF_TCP_DST, flow->tp_dst, match->wc.masks.tp_dst); } else if (flow->nw_proto == IPPROTO_UDP) { nxm_put_16m(b, oxm ? OXM_OF_UDP_SRC : NXM_OF_UDP_SRC, flow->tp_src, match->wc.masks.tp_src); nxm_put_16m(b, oxm ? OXM_OF_UDP_DST : NXM_OF_UDP_DST, flow->tp_dst, match->wc.masks.tp_dst); } else if (flow->nw_proto == IPPROTO_SCTP) { nxm_put_16m(b, OXM_OF_SCTP_SRC, flow->tp_src, match->wc.masks.tp_src); nxm_put_16m(b, OXM_OF_SCTP_DST, flow->tp_dst, match->wc.masks.tp_dst); } else if (flow->nw_proto == icmp_proto) { if (match->wc.masks.tp_src) { nxm_put_8(b, icmp_type, ntohs(flow->tp_src)); } if (match->wc.masks.tp_dst) { nxm_put_8(b, icmp_code, ntohs(flow->tp_dst)); } } } } /* Appends to 'b' the nx_match format that expresses 'match'. For Flow Mod and * Flow Stats Requests messages, a 'cookie' and 'cookie_mask' may be supplied. * Otherwise, 'cookie_mask' should be zero. * * This function can cause 'b''s data to be reallocated. * * Returns the number of bytes appended to 'b', excluding padding. * * If 'match' is a catch-all rule that matches every packet, then this function * appends nothing to 'b' and returns 0. */ static int nx_put_raw(struct ofpbuf *b, bool oxm, const struct match *match, ovs_be64 cookie, ovs_be64 cookie_mask) { const struct flow *flow = &match->flow; const size_t start_len = b->size; int match_len; int i; BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20); /* Metadata. */ if (match->wc.masks.in_port.ofp_port) { ofp_port_t in_port = flow->in_port.ofp_port; if (oxm) { nxm_put_32(b, OXM_OF_IN_PORT, ofputil_port_to_ofp11(in_port)); } else { nxm_put_16(b, NXM_OF_IN_PORT, htons(ofp_to_u16(in_port))); } } /* Ethernet. */ nxm_put_eth_masked(b, oxm ? OXM_OF_ETH_SRC : NXM_OF_ETH_SRC, flow->dl_src, match->wc.masks.dl_src); nxm_put_eth_masked(b, oxm ? OXM_OF_ETH_DST : NXM_OF_ETH_DST, flow->dl_dst, match->wc.masks.dl_dst); nxm_put_16m(b, oxm ? OXM_OF_ETH_TYPE : NXM_OF_ETH_TYPE, ofputil_dl_type_to_openflow(flow->dl_type), match->wc.masks.dl_type); /* 802.1Q. */ if (oxm) { ovs_be16 VID_CFI_MASK = htons(VLAN_VID_MASK | VLAN_CFI); ovs_be16 vid = flow->vlan_tci & VID_CFI_MASK; ovs_be16 mask = match->wc.masks.vlan_tci & VID_CFI_MASK; if (mask == htons(VLAN_VID_MASK | VLAN_CFI)) { nxm_put_16(b, OXM_OF_VLAN_VID, vid); } else if (mask) { nxm_put_16m(b, OXM_OF_VLAN_VID, vid, mask); } if (vid && vlan_tci_to_pcp(match->wc.masks.vlan_tci)) { nxm_put_8(b, OXM_OF_VLAN_PCP, vlan_tci_to_pcp(flow->vlan_tci)); } } else { nxm_put_16m(b, NXM_OF_VLAN_TCI, flow->vlan_tci, match->wc.masks.vlan_tci); } /* MPLS. */ if (eth_type_mpls(flow->dl_type)) { if (match->wc.masks.mpls_lse & htonl(MPLS_TC_MASK)) { nxm_put_8(b, OXM_OF_MPLS_TC, mpls_lse_to_tc(flow->mpls_lse)); } if (match->wc.masks.mpls_lse & htonl(MPLS_BOS_MASK)) { nxm_put_8(b, OXM_OF_MPLS_BOS, mpls_lse_to_bos(flow->mpls_lse)); } if (match->wc.masks.mpls_lse & htonl(MPLS_LABEL_MASK)) { nxm_put_32(b, OXM_OF_MPLS_LABEL, htonl(mpls_lse_to_label(flow->mpls_lse))); } } /* L3. */ if (flow->dl_type == htons(ETH_TYPE_IP)) { /* IP. */ nxm_put_32m(b, oxm ? OXM_OF_IPV4_SRC : NXM_OF_IP_SRC, flow->nw_src, match->wc.masks.nw_src); nxm_put_32m(b, oxm ? OXM_OF_IPV4_DST : NXM_OF_IP_DST, flow->nw_dst, match->wc.masks.nw_dst); nxm_put_ip(b, match, IPPROTO_ICMP, oxm ? OXM_OF_ICMPV4_TYPE : NXM_OF_ICMP_TYPE, oxm ? OXM_OF_ICMPV4_CODE : NXM_OF_ICMP_CODE, oxm); } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) { /* IPv6. */ nxm_put_ipv6(b, oxm ? OXM_OF_IPV6_SRC : NXM_NX_IPV6_SRC, &flow->ipv6_src, &match->wc.masks.ipv6_src); nxm_put_ipv6(b, oxm ? OXM_OF_IPV6_DST : NXM_NX_IPV6_DST, &flow->ipv6_dst, &match->wc.masks.ipv6_dst); nxm_put_ip(b, match, IPPROTO_ICMPV6, oxm ? OXM_OF_ICMPV6_TYPE : NXM_NX_ICMPV6_TYPE, oxm ? OXM_OF_ICMPV6_CODE : NXM_NX_ICMPV6_CODE, oxm); nxm_put_32m(b, oxm ? OXM_OF_IPV6_FLABEL : NXM_NX_IPV6_LABEL, flow->ipv6_label, match->wc.masks.ipv6_label); if (flow->nw_proto == IPPROTO_ICMPV6 && (flow->tp_src == htons(ND_NEIGHBOR_SOLICIT) || flow->tp_src == htons(ND_NEIGHBOR_ADVERT))) { nxm_put_ipv6(b, oxm ? OXM_OF_IPV6_ND_TARGET : NXM_NX_ND_TARGET, &flow->nd_target, &match->wc.masks.nd_target); if (flow->tp_src == htons(ND_NEIGHBOR_SOLICIT)) { nxm_put_eth_masked(b, oxm ? OXM_OF_IPV6_ND_SLL : NXM_NX_ND_SLL, flow->arp_sha, match->wc.masks.arp_sha); } if (flow->tp_src == htons(ND_NEIGHBOR_ADVERT)) { nxm_put_eth_masked(b, oxm ? OXM_OF_IPV6_ND_TLL : NXM_NX_ND_TLL, flow->arp_tha, match->wc.masks.arp_tha); } } } else if (flow->dl_type == htons(ETH_TYPE_ARP) || flow->dl_type == htons(ETH_TYPE_RARP)) { /* ARP. */ if (match->wc.masks.nw_proto) { nxm_put_16(b, oxm ? OXM_OF_ARP_OP : NXM_OF_ARP_OP, htons(flow->nw_proto)); } nxm_put_32m(b, oxm ? OXM_OF_ARP_SPA : NXM_OF_ARP_SPA, flow->nw_src, match->wc.masks.nw_src); nxm_put_32m(b, oxm ? OXM_OF_ARP_TPA : NXM_OF_ARP_TPA, flow->nw_dst, match->wc.masks.nw_dst); nxm_put_eth_masked(b, oxm ? OXM_OF_ARP_SHA : NXM_NX_ARP_SHA, flow->arp_sha, match->wc.masks.arp_sha); nxm_put_eth_masked(b, oxm ? OXM_OF_ARP_THA : NXM_NX_ARP_THA, flow->arp_tha, match->wc.masks.arp_tha); } /* Tunnel ID. */ nxm_put_64m(b, oxm ? OXM_OF_TUNNEL_ID : NXM_NX_TUN_ID, flow->tunnel.tun_id, match->wc.masks.tunnel.tun_id); /* Other tunnel metadata. */ nxm_put_32m(b, NXM_NX_TUN_IPV4_SRC, flow->tunnel.ip_src, match->wc.masks.tunnel.ip_src); nxm_put_32m(b, NXM_NX_TUN_IPV4_DST, flow->tunnel.ip_dst, match->wc.masks.tunnel.ip_dst); /* Registers. */ for (i = 0; i < FLOW_N_REGS; i++) { nxm_put_32m(b, NXM_NX_REG(i), htonl(flow->regs[i]), htonl(match->wc.masks.regs[i])); } /* Mark. */ nxm_put_32m(b, NXM_NX_PKT_MARK, htonl(flow->pkt_mark), htonl(match->wc.masks.pkt_mark)); /* OpenFlow 1.1+ Metadata. */ nxm_put_64m(b, OXM_OF_METADATA, flow->metadata, match->wc.masks.metadata); /* Cookie. */ nxm_put_64m(b, NXM_NX_COOKIE, cookie, cookie_mask); match_len = b->size - start_len; return match_len; } /* Appends to 'b' the nx_match format that expresses 'match', plus enough zero * bytes to pad the nx_match out to a multiple of 8. For Flow Mod and Flow * Stats Requests messages, a 'cookie' and 'cookie_mask' may be supplied. * Otherwise, 'cookie_mask' should be zero. * * This function can cause 'b''s data to be reallocated. * * Returns the number of bytes appended to 'b', excluding padding. The return * value can be zero if it appended nothing at all to 'b' (which happens if * 'cr' is a catch-all rule that matches every packet). */ int nx_put_match(struct ofpbuf *b, const struct match *match, ovs_be64 cookie, ovs_be64 cookie_mask) { int match_len = nx_put_raw(b, false, match, cookie, cookie_mask); ofpbuf_put_zeros(b, ROUND_UP(match_len, 8) - match_len); return match_len; } /* Appends to 'b' an struct ofp11_match_header followed by the oxm format that * expresses 'cr', plus enough zero bytes to pad the data appended out to a * multiple of 8. * * This function can cause 'b''s data to be reallocated. * * Returns the number of bytes appended to 'b', excluding the padding. Never * returns zero. */ int oxm_put_match(struct ofpbuf *b, const struct match *match) { int match_len; struct ofp11_match_header *omh; size_t start_len = b->size; ovs_be64 cookie = htonll(0), cookie_mask = htonll(0); ofpbuf_put_uninit(b, sizeof *omh); match_len = nx_put_raw(b, true, match, cookie, cookie_mask) + sizeof *omh; ofpbuf_put_zeros(b, ROUND_UP(match_len, 8) - match_len); omh = ofpbuf_at(b, start_len, sizeof *omh); omh->type = htons(OFPMT_OXM); omh->length = htons(match_len); return match_len; } /* nx_match_to_string() and helpers. */ static void format_nxm_field_name(struct ds *, uint32_t header); char * nx_match_to_string(const uint8_t *p, unsigned int match_len) { uint32_t header; struct ds s; if (!match_len) { return xstrdup(""); } ds_init(&s); while ((header = nx_entry_ok(p, match_len)) != 0) { unsigned int length = NXM_LENGTH(header); unsigned int value_len = nxm_field_bytes(header); const uint8_t *value = p + 4; const uint8_t *mask = value + value_len; unsigned int i; if (s.length) { ds_put_cstr(&s, ", "); } format_nxm_field_name(&s, header); ds_put_char(&s, '('); for (i = 0; i < value_len; i++) { ds_put_format(&s, "%02x", value[i]); } if (NXM_HASMASK(header)) { ds_put_char(&s, '/'); for (i = 0; i < value_len; i++) { ds_put_format(&s, "%02x", mask[i]); } } ds_put_char(&s, ')'); p += 4 + length; match_len -= 4 + length; } if (match_len) { if (s.length) { ds_put_cstr(&s, ", "); } ds_put_format(&s, "<%u invalid bytes>", match_len); } return ds_steal_cstr(&s); } char * oxm_match_to_string(const struct ofpbuf *p, unsigned int match_len) { const struct ofp11_match_header *omh = p->data; uint16_t match_len_; struct ds s; ds_init(&s); if (match_len < sizeof *omh) { ds_put_format(&s, "", match_len); goto err; } if (omh->type != htons(OFPMT_OXM)) { ds_put_format(&s, "", ntohs(omh->type)); goto err; } match_len_ = ntohs(omh->length); if (match_len_ < sizeof *omh) { ds_put_format(&s, "", match_len_); goto err; } if (match_len_ != match_len) { ds_put_format(&s, "", match_len_, match_len); goto err; } return nx_match_to_string(ofpbuf_at(p, sizeof *omh, 0), match_len - sizeof *omh); err: return ds_steal_cstr(&s); } static void format_nxm_field_name(struct ds *s, uint32_t header) { const struct mf_field *mf = mf_from_nxm_header(header); if (mf) { ds_put_cstr(s, IS_OXM_HEADER(header) ? mf->oxm_name : mf->nxm_name); if (NXM_HASMASK(header)) { ds_put_cstr(s, "_W"); } } else if (header == NXM_NX_COOKIE) { ds_put_cstr(s, "NXM_NX_COOKIE"); } else if (header == NXM_NX_COOKIE_W) { ds_put_cstr(s, "NXM_NX_COOKIE_W"); } else { ds_put_format(s, "%d:%d", NXM_VENDOR(header), NXM_FIELD(header)); } } static uint32_t parse_nxm_field_name(const char *name, int name_len) { bool wild; int i; /* Check whether it's a field name. */ wild = name_len > 2 && !memcmp(&name[name_len - 2], "_W", 2); if (wild) { name_len -= 2; } for (i = 0; i < MFF_N_IDS; i++) { const struct mf_field *mf = mf_from_id(i); uint32_t header; if (mf->nxm_name && !strncmp(mf->nxm_name, name, name_len) && mf->nxm_name[name_len] == '\0') { header = mf->nxm_header; } else if (mf->oxm_name && !strncmp(mf->oxm_name, name, name_len) && mf->oxm_name[name_len] == '\0') { header = mf->oxm_header; } else { continue; } if (!wild) { return header; } else if (mf->maskable != MFM_NONE) { return NXM_MAKE_WILD_HEADER(header); } } if (!strncmp("NXM_NX_COOKIE", name, name_len) && (name_len == strlen("NXM_NX_COOKIE"))) { if (!wild) { return NXM_NX_COOKIE; } else { return NXM_NX_COOKIE_W; } } /* Check whether it's a 32-bit field header value as hex. * (This isn't ordinarily useful except for testing error behavior.) */ if (name_len == 8) { uint32_t header = hexits_value(name, name_len, NULL); if (header != UINT_MAX) { return header; } } return 0; } /* nx_match_from_string(). */ static int nx_match_from_string_raw(const char *s, struct ofpbuf *b) { const char *full_s = s; const size_t start_len = b->size; if (!strcmp(s, "")) { /* Ensure that 'b->data' isn't actually null. */ ofpbuf_prealloc_tailroom(b, 1); return 0; } for (s += strspn(s, ", "); *s; s += strspn(s, ", ")) { const char *name; uint32_t header; int name_len; size_t n; name = s; name_len = strcspn(s, "("); if (s[name_len] != '(') { ovs_fatal(0, "%s: missing ( at end of nx_match", full_s); } header = parse_nxm_field_name(name, name_len); if (!header) { ovs_fatal(0, "%s: unknown field `%.*s'", full_s, name_len, s); } s += name_len + 1; nxm_put_header(b, header); s = ofpbuf_put_hex(b, s, &n); if (n != nxm_field_bytes(header)) { ovs_fatal(0, "%.2s: hex digits expected", s); } if (NXM_HASMASK(header)) { s += strspn(s, " "); if (*s != '/') { ovs_fatal(0, "%s: missing / in masked field %.*s", full_s, name_len, name); } s = ofpbuf_put_hex(b, s + 1, &n); if (n != nxm_field_bytes(header)) { ovs_fatal(0, "%.2s: hex digits expected", s); } } s += strspn(s, " "); if (*s != ')') { ovs_fatal(0, "%s: missing ) following field %.*s", full_s, name_len, name); } s++; } return b->size - start_len; } int nx_match_from_string(const char *s, struct ofpbuf *b) { int match_len = nx_match_from_string_raw(s, b); ofpbuf_put_zeros(b, ROUND_UP(match_len, 8) - match_len); return match_len; } int oxm_match_from_string(const char *s, struct ofpbuf *b) { int match_len; struct ofp11_match_header *omh; size_t start_len = b->size; ofpbuf_put_uninit(b, sizeof *omh); match_len = nx_match_from_string_raw(s, b) + sizeof *omh; ofpbuf_put_zeros(b, ROUND_UP(match_len, 8) - match_len); omh = ofpbuf_at(b, start_len, sizeof *omh); omh->type = htons(OFPMT_OXM); omh->length = htons(match_len); return match_len; } /* Parses 's' as a "move" action, in the form described in ovs-ofctl(8), into * '*move'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ char * WARN_UNUSED_RESULT nxm_parse_reg_move(struct ofpact_reg_move *move, const char *s) { const char *full_s = s; char *error; error = mf_parse_subfield__(&move->src, &s); if (error) { return error; } if (strncmp(s, "->", 2)) { return xasprintf("%s: missing `->' following source", full_s); } s += 2; error = mf_parse_subfield(&move->dst, s); if (error) { return error; } if (move->src.n_bits != move->dst.n_bits) { return xasprintf("%s: source field is %d bits wide but destination is " "%d bits wide", full_s, move->src.n_bits, move->dst.n_bits); } return NULL; } /* Parses 's' as a "load" action, in the form described in ovs-ofctl(8), into * '*load'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ char * WARN_UNUSED_RESULT nxm_parse_reg_load(struct ofpact_reg_load *load, const char *s) { const char *full_s = s; uint64_t value = strtoull(s, (char **) &s, 0); char *error; if (strncmp(s, "->", 2)) { return xasprintf("%s: missing `->' following value", full_s); } s += 2; error = mf_parse_subfield(&load->dst, s); if (error) { return error; } if (load->dst.n_bits < 64 && (value >> load->dst.n_bits) != 0) { return xasprintf("%s: value %"PRIu64" does not fit into %d bits", full_s, value, load->dst.n_bits); } load->subvalue.be64[0] = htonll(0); load->subvalue.be64[1] = htonll(value); return NULL; } /* nxm_format_reg_move(), nxm_format_reg_load(). */ void nxm_format_reg_move(const struct ofpact_reg_move *move, struct ds *s) { ds_put_format(s, "move:"); mf_format_subfield(&move->src, s); ds_put_cstr(s, "->"); mf_format_subfield(&move->dst, s); } static void set_field_format(const struct ofpact_reg_load *load, struct ds *s) { const struct mf_field *mf = load->dst.field; union mf_value value; ovs_assert(load->ofpact.compat == OFPUTIL_OFPAT12_SET_FIELD); ds_put_format(s, "set_field:"); memset(&value, 0, sizeof value); bitwise_copy(&load->subvalue, sizeof load->subvalue, 0, &value, mf->n_bytes, 0, load->dst.n_bits); mf_format(mf, &value, NULL, s); ds_put_format(s, "->%s", mf->name); } static void load_format(const struct ofpact_reg_load *load, struct ds *s) { ds_put_cstr(s, "load:"); mf_format_subvalue(&load->subvalue, s); ds_put_cstr(s, "->"); mf_format_subfield(&load->dst, s); } void nxm_format_reg_load(const struct ofpact_reg_load *load, struct ds *s) { if (load->ofpact.compat == OFPUTIL_OFPAT12_SET_FIELD) { set_field_format(load, s); } else { load_format(load, s); } } enum ofperr nxm_reg_move_from_openflow(const struct nx_action_reg_move *narm, struct ofpbuf *ofpacts) { struct ofpact_reg_move *move; move = ofpact_put_REG_MOVE(ofpacts); move->src.field = mf_from_nxm_header(ntohl(narm->src)); move->src.ofs = ntohs(narm->src_ofs); move->src.n_bits = ntohs(narm->n_bits); move->dst.field = mf_from_nxm_header(ntohl(narm->dst)); move->dst.ofs = ntohs(narm->dst_ofs); move->dst.n_bits = ntohs(narm->n_bits); return nxm_reg_move_check(move, NULL); } enum ofperr nxm_reg_load_from_openflow(const struct nx_action_reg_load *narl, struct ofpbuf *ofpacts) { struct ofpact_reg_load *load; load = ofpact_put_REG_LOAD(ofpacts); load->dst.field = mf_from_nxm_header(ntohl(narl->dst)); load->dst.ofs = nxm_decode_ofs(narl->ofs_nbits); load->dst.n_bits = nxm_decode_n_bits(narl->ofs_nbits); load->subvalue.be64[1] = narl->value; /* Reject 'narl' if a bit numbered 'n_bits' or higher is set to 1 in * narl->value. */ if (load->dst.n_bits < 64 && ntohll(narl->value) >> load->dst.n_bits) { return OFPERR_OFPBAC_BAD_ARGUMENT; } return nxm_reg_load_check(load, NULL); } enum ofperr nxm_reg_load_from_openflow12_set_field( const struct ofp12_action_set_field * oasf, struct ofpbuf *ofpacts) { uint16_t oasf_len = ntohs(oasf->len); uint32_t oxm_header = ntohl(oasf->dst); uint8_t oxm_length = NXM_LENGTH(oxm_header); struct ofpact_reg_load *load; const struct mf_field *mf; /* ofp12_action_set_field is padded to 64 bits by zero */ if (oasf_len != ROUND_UP(sizeof(*oasf) + oxm_length, 8)) { return OFPERR_OFPBAC_BAD_SET_LEN; } if (!is_all_zeros((const uint8_t *)(oasf) + sizeof *oasf + oxm_length, oasf_len - oxm_length - sizeof *oasf)) { return OFPERR_OFPBAC_BAD_SET_ARGUMENT; } if (NXM_HASMASK(oxm_header)) { return OFPERR_OFPBAC_BAD_SET_TYPE; } mf = mf_from_nxm_header(oxm_header); if (!mf) { return OFPERR_OFPBAC_BAD_SET_TYPE; } load = ofpact_put_REG_LOAD(ofpacts); ofpact_set_field_init(load, mf, oasf + 1); return nxm_reg_load_check(load, NULL); } enum ofperr nxm_reg_move_check(const struct ofpact_reg_move *move, const struct flow *flow) { enum ofperr error; error = mf_check_src(&move->src, flow); if (error) { return error; } return mf_check_dst(&move->dst, NULL); } enum ofperr nxm_reg_load_check(const struct ofpact_reg_load *load, const struct flow *flow) { return mf_check_dst(&load->dst, flow); } void nxm_reg_move_to_nxast(const struct ofpact_reg_move *move, struct ofpbuf *openflow) { struct nx_action_reg_move *narm; narm = ofputil_put_NXAST_REG_MOVE(openflow); narm->n_bits = htons(move->dst.n_bits); narm->src_ofs = htons(move->src.ofs); narm->dst_ofs = htons(move->dst.ofs); narm->src = htonl(move->src.field->nxm_header); narm->dst = htonl(move->dst.field->nxm_header); } static void reg_load_to_nxast(const struct ofpact_reg_load *load, struct ofpbuf *openflow) { struct nx_action_reg_load *narl; narl = ofputil_put_NXAST_REG_LOAD(openflow); narl->ofs_nbits = nxm_encode_ofs_nbits(load->dst.ofs, load->dst.n_bits); narl->dst = htonl(load->dst.field->nxm_header); narl->value = load->subvalue.be64[1]; } static void set_field_to_ofast(const struct ofpact_reg_load *load, struct ofpbuf *openflow) { const struct mf_field *mf = load->dst.field; uint16_t padded_value_len = ROUND_UP(mf->n_bytes, 8); struct ofp12_action_set_field *oasf; char *value; /* Set field is the only action of variable length (so far), * so handling the variable length portion is open-coded here */ oasf = ofputil_put_OFPAT12_SET_FIELD(openflow); oasf->dst = htonl(mf->oxm_header); oasf->len = htons(ntohs(oasf->len) + padded_value_len); value = ofpbuf_put_zeros(openflow, padded_value_len); bitwise_copy(&load->subvalue, sizeof load->subvalue, load->dst.ofs, value, mf->n_bytes, load->dst.ofs, load->dst.n_bits); } void nxm_reg_load_to_nxast(const struct ofpact_reg_load *load, struct ofpbuf *openflow) { if (load->ofpact.compat == OFPUTIL_OFPAT12_SET_FIELD) { struct ofp_header *oh = (struct ofp_header *)openflow->l2; switch(oh->version) { case OFP13_VERSION: case OFP12_VERSION: set_field_to_ofast(load, openflow); break; case OFP11_VERSION: case OFP10_VERSION: if (load->dst.n_bits < 64) { reg_load_to_nxast(load, openflow); } else { /* Split into 64bit chunks */ int chunk, ofs; for (ofs = 0; ofs < load->dst.n_bits; ofs += chunk) { struct ofpact_reg_load subload = *load; chunk = MIN(load->dst.n_bits - ofs, 64); subload.dst.field = load->dst.field; subload.dst.ofs = load->dst.ofs + ofs; subload.dst.n_bits = chunk; bitwise_copy(&load->subvalue, sizeof load->subvalue, ofs, &subload.subvalue, sizeof subload.subvalue, 0, chunk); reg_load_to_nxast(&subload, openflow); } } break; default: NOT_REACHED(); } } else { reg_load_to_nxast(load, openflow); } } /* nxm_execute_reg_move(), nxm_execute_reg_load(). */ void nxm_execute_reg_move(const struct ofpact_reg_move *move, struct flow *flow, struct flow_wildcards *wc) { union mf_subvalue mask_value; union mf_value src_value; union mf_value dst_value; memset(&mask_value, 0xff, sizeof mask_value); mf_write_subfield_flow(&move->dst, &mask_value, &wc->masks); mf_write_subfield_flow(&move->src, &mask_value, &wc->masks); mf_get_value(move->dst.field, flow, &dst_value); mf_get_value(move->src.field, flow, &src_value); bitwise_copy(&src_value, move->src.field->n_bytes, move->src.ofs, &dst_value, move->dst.field->n_bytes, move->dst.ofs, move->src.n_bits); mf_set_flow_value(move->dst.field, &dst_value, flow); } void nxm_execute_reg_load(const struct ofpact_reg_load *load, struct flow *flow) { mf_write_subfield_flow(&load->dst, &load->subvalue, flow); } void nxm_reg_load(const struct mf_subfield *dst, uint64_t src_data, struct flow *flow, struct flow_wildcards *wc) { union mf_subvalue src_subvalue; union mf_subvalue mask_value; ovs_be64 src_data_be = htonll(src_data); memset(&mask_value, 0xff, sizeof mask_value); mf_write_subfield_flow(dst, &mask_value, &wc->masks); bitwise_copy(&src_data_be, sizeof src_data_be, 0, &src_subvalue, sizeof src_subvalue, 0, sizeof src_data_be * 8); mf_write_subfield_flow(dst, &src_subvalue, flow); } /* nxm_parse_stack_action, works for both push() and pop(). */ /* Parses 's' as a "push" or "pop" action, in the form described in * ovs-ofctl(8), into '*stack_action'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ char * WARN_UNUSED_RESULT nxm_parse_stack_action(struct ofpact_stack *stack_action, const char *s) { char *error; error = mf_parse_subfield__(&stack_action->subfield, &s); if (error) { return error; } if (*s != '\0') { return xasprintf("%s: trailing garbage following push or pop", s); } return NULL; } void nxm_format_stack_push(const struct ofpact_stack *push, struct ds *s) { ds_put_cstr(s, "push:"); mf_format_subfield(&push->subfield, s); } void nxm_format_stack_pop(const struct ofpact_stack *pop, struct ds *s) { ds_put_cstr(s, "pop:"); mf_format_subfield(&pop->subfield, s); } /* Common set for both push and pop actions. */ static void stack_action_from_openflow__(const struct nx_action_stack *nasp, struct ofpact_stack *stack_action) { stack_action->subfield.field = mf_from_nxm_header(ntohl(nasp->field)); stack_action->subfield.ofs = ntohs(nasp->offset); stack_action->subfield.n_bits = ntohs(nasp->n_bits); } static void nxm_stack_to_nxast__(const struct ofpact_stack *stack_action, struct nx_action_stack *nasp) { nasp->offset = htons(stack_action->subfield.ofs); nasp->n_bits = htons(stack_action->subfield.n_bits); nasp->field = htonl(stack_action->subfield.field->nxm_header); } enum ofperr nxm_stack_push_from_openflow(const struct nx_action_stack *nasp, struct ofpbuf *ofpacts) { struct ofpact_stack *push; push = ofpact_put_STACK_PUSH(ofpacts); stack_action_from_openflow__(nasp, push); return nxm_stack_push_check(push, NULL); } enum ofperr nxm_stack_pop_from_openflow(const struct nx_action_stack *nasp, struct ofpbuf *ofpacts) { struct ofpact_stack *pop; pop = ofpact_put_STACK_POP(ofpacts); stack_action_from_openflow__(nasp, pop); return nxm_stack_pop_check(pop, NULL); } enum ofperr nxm_stack_push_check(const struct ofpact_stack *push, const struct flow *flow) { return mf_check_src(&push->subfield, flow); } enum ofperr nxm_stack_pop_check(const struct ofpact_stack *pop, const struct flow *flow) { return mf_check_dst(&pop->subfield, flow); } void nxm_stack_push_to_nxast(const struct ofpact_stack *stack, struct ofpbuf *openflow) { nxm_stack_to_nxast__(stack, ofputil_put_NXAST_STACK_PUSH(openflow)); } void nxm_stack_pop_to_nxast(const struct ofpact_stack *stack, struct ofpbuf *openflow) { nxm_stack_to_nxast__(stack, ofputil_put_NXAST_STACK_POP(openflow)); } /* nxm_execute_stack_push(), nxm_execute_stack_pop(). */ static void nx_stack_push(struct ofpbuf *stack, union mf_subvalue *v) { ofpbuf_put(stack, v, sizeof *v); } static union mf_subvalue * nx_stack_pop(struct ofpbuf *stack) { union mf_subvalue *v = NULL; if (stack->size) { stack->size -= sizeof *v; v = (union mf_subvalue *) ofpbuf_tail(stack); } return v; } void nxm_execute_stack_push(const struct ofpact_stack *push, const struct flow *flow, struct flow_wildcards *wc, struct ofpbuf *stack) { union mf_subvalue mask_value; union mf_subvalue dst_value; memset(&mask_value, 0xff, sizeof mask_value); mf_write_subfield_flow(&push->subfield, &mask_value, &wc->masks); mf_read_subfield(&push->subfield, flow, &dst_value); nx_stack_push(stack, &dst_value); } void nxm_execute_stack_pop(const struct ofpact_stack *pop, struct flow *flow, struct flow_wildcards *wc, struct ofpbuf *stack) { union mf_subvalue *src_value; src_value = nx_stack_pop(stack); /* Only pop if stack is not empty. Otherwise, give warning. */ if (src_value) { union mf_subvalue mask_value; memset(&mask_value, 0xff, sizeof mask_value); mf_write_subfield_flow(&pop->subfield, &mask_value, &wc->masks); mf_write_subfield_flow(&pop->subfield, src_value, flow); } else { if (!VLOG_DROP_WARN(&rl)) { char *flow_str = flow_to_string(flow); VLOG_WARN_RL(&rl, "Failed to pop from an empty stack. On flow \n" " %s", flow_str); free(flow_str); } } } openvswitch-2.0.1+git20140120/lib/nx-match.h000066400000000000000000000126041226605124000201130ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef NX_MATCH_H #define NX_MATCH_H 1 #include #include #include #include "compiler.h" #include "flow.h" #include "ofp-errors.h" #include "openvswitch/types.h" struct ds; struct match; struct mf_subfield; struct ofpact_reg_move; struct ofpact_reg_load; struct ofpact_stack; struct ofpbuf; struct nx_action_reg_load; struct nx_action_reg_move; /* Nicira Extended Match (NXM) flexible flow match helper functions. * * See include/openflow/nicira-ext.h for NXM specification. */ enum ofperr nx_pull_match(struct ofpbuf *, unsigned int match_len, struct match *, ovs_be64 *cookie, ovs_be64 *cookie_mask); enum ofperr nx_pull_match_loose(struct ofpbuf *, unsigned int match_len, struct match *, ovs_be64 *cookie, ovs_be64 *cookie_mask); enum ofperr oxm_pull_match(struct ofpbuf *, struct match *); enum ofperr oxm_pull_match_loose(struct ofpbuf *, struct match *); int nx_put_match(struct ofpbuf *, const struct match *, ovs_be64 cookie, ovs_be64 cookie_mask); int oxm_put_match(struct ofpbuf *, const struct match *); char *nx_match_to_string(const uint8_t *, unsigned int match_len); char *oxm_match_to_string(const struct ofpbuf *, unsigned int match_len); int nx_match_from_string(const char *, struct ofpbuf *); int oxm_match_from_string(const char *, struct ofpbuf *); char *nxm_parse_reg_move(struct ofpact_reg_move *, const char *) WARN_UNUSED_RESULT; char *nxm_parse_reg_load(struct ofpact_reg_load *, const char *) WARN_UNUSED_RESULT; void nxm_format_reg_move(const struct ofpact_reg_move *, struct ds *); void nxm_format_reg_load(const struct ofpact_reg_load *, struct ds *); enum ofperr nxm_reg_move_from_openflow(const struct nx_action_reg_move *, struct ofpbuf *ofpacts); enum ofperr nxm_reg_load_from_openflow(const struct nx_action_reg_load *, struct ofpbuf *ofpacts); enum ofperr nxm_reg_load_from_openflow12_set_field( const struct ofp12_action_set_field * oasf, struct ofpbuf *ofpacts); enum ofperr nxm_reg_move_check(const struct ofpact_reg_move *, const struct flow *); enum ofperr nxm_reg_load_check(const struct ofpact_reg_load *, const struct flow *); void nxm_reg_move_to_nxast(const struct ofpact_reg_move *, struct ofpbuf *openflow); void nxm_reg_load_to_nxast(const struct ofpact_reg_load *, struct ofpbuf *openflow); void nxm_execute_reg_move(const struct ofpact_reg_move *, struct flow *, struct flow_wildcards *); void nxm_execute_reg_load(const struct ofpact_reg_load *, struct flow *); void nxm_reg_load(const struct mf_subfield *, uint64_t src_data, struct flow *, struct flow_wildcards *); char *nxm_parse_stack_action(struct ofpact_stack *, const char *) WARN_UNUSED_RESULT; void nxm_format_stack_push(const struct ofpact_stack *, struct ds *); void nxm_format_stack_pop(const struct ofpact_stack *, struct ds *); enum ofperr nxm_stack_push_from_openflow(const struct nx_action_stack *, struct ofpbuf *ofpacts); enum ofperr nxm_stack_pop_from_openflow(const struct nx_action_stack *, struct ofpbuf *ofpacts); enum ofperr nxm_stack_push_check(const struct ofpact_stack *, const struct flow *); enum ofperr nxm_stack_pop_check(const struct ofpact_stack *, const struct flow *); void nxm_stack_push_to_nxast(const struct ofpact_stack *, struct ofpbuf *openflow); void nxm_stack_pop_to_nxast(const struct ofpact_stack *, struct ofpbuf *openflow); void nxm_execute_stack_push(const struct ofpact_stack *, const struct flow *, struct flow_wildcards *, struct ofpbuf *); void nxm_execute_stack_pop(const struct ofpact_stack *, struct flow *, struct flow_wildcards *, struct ofpbuf *); int nxm_field_bytes(uint32_t header); int nxm_field_bits(uint32_t header); /* Dealing with the 'ofs_nbits' members in several Nicira extensions. */ static inline ovs_be16 nxm_encode_ofs_nbits(int ofs, int n_bits) { return htons((ofs << 6) | (n_bits - 1)); } static inline int nxm_decode_ofs(ovs_be16 ofs_nbits) { return ntohs(ofs_nbits) >> 6; } static inline int nxm_decode_n_bits(ovs_be16 ofs_nbits) { return (ntohs(ofs_nbits) & 0x3f) + 1; } /* This is my guess at the length of a "typical" nx_match, for use in * predicting space requirements. */ #define NXM_TYPICAL_LEN 64 #endif /* nx-match.h */ openvswitch-2.0.1+git20140120/lib/odp-execute.c000066400000000000000000000156201226605124000206120ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * Copyright (c) 2013 Simon Horman * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "odp-execute.h" #include #include #include #include "netlink.h" #include "ofpbuf.h" #include "odp-util.h" #include "packets.h" #include "util.h" static void odp_eth_set_addrs(struct ofpbuf *packet, const struct ovs_key_ethernet *eth_key) { struct eth_header *eh = packet->l2; memcpy(eh->eth_src, eth_key->eth_src, sizeof eh->eth_src); memcpy(eh->eth_dst, eth_key->eth_dst, sizeof eh->eth_dst); } static void odp_set_tunnel_action(const struct nlattr *a, struct flow_tnl *tun_key) { enum odp_key_fitness fitness; fitness = odp_tun_key_from_attr(a, tun_key); ovs_assert(fitness != ODP_FIT_ERROR); } static void odp_execute_set_action(struct ofpbuf *packet, const struct nlattr *a, struct flow *flow) { enum ovs_key_attr type = nl_attr_type(a); const struct ovs_key_ipv4 *ipv4_key; const struct ovs_key_ipv6 *ipv6_key; const struct ovs_key_tcp *tcp_key; const struct ovs_key_udp *udp_key; const struct ovs_key_sctp *sctp_key; switch (type) { case OVS_KEY_ATTR_PRIORITY: flow->skb_priority = nl_attr_get_u32(a); break; case OVS_KEY_ATTR_TUNNEL: odp_set_tunnel_action(a, &flow->tunnel); break; case OVS_KEY_ATTR_SKB_MARK: flow->pkt_mark = nl_attr_get_u32(a); break; case OVS_KEY_ATTR_ETHERNET: odp_eth_set_addrs(packet, nl_attr_get_unspec(a, sizeof(struct ovs_key_ethernet))); break; case OVS_KEY_ATTR_IPV4: ipv4_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_ipv4)); packet_set_ipv4(packet, ipv4_key->ipv4_src, ipv4_key->ipv4_dst, ipv4_key->ipv4_tos, ipv4_key->ipv4_ttl); break; case OVS_KEY_ATTR_IPV6: ipv6_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_ipv6)); packet_set_ipv6(packet, ipv6_key->ipv6_proto, ipv6_key->ipv6_src, ipv6_key->ipv6_dst, ipv6_key->ipv6_tclass, ipv6_key->ipv6_label, ipv6_key->ipv6_hlimit); break; case OVS_KEY_ATTR_TCP: tcp_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_tcp)); packet_set_tcp_port(packet, tcp_key->tcp_src, tcp_key->tcp_dst); break; case OVS_KEY_ATTR_UDP: udp_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_udp)); packet_set_udp_port(packet, udp_key->udp_src, udp_key->udp_dst); break; case OVS_KEY_ATTR_SCTP: sctp_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_sctp)); packet_set_sctp_port(packet, sctp_key->sctp_src, sctp_key->sctp_dst); break; case OVS_KEY_ATTR_MPLS: set_mpls_lse(packet, nl_attr_get_be32(a)); break; case OVS_KEY_ATTR_UNSPEC: case OVS_KEY_ATTR_ENCAP: case OVS_KEY_ATTR_ETHERTYPE: case OVS_KEY_ATTR_IN_PORT: case OVS_KEY_ATTR_VLAN: case OVS_KEY_ATTR_ICMP: case OVS_KEY_ATTR_ICMPV6: case OVS_KEY_ATTR_ARP: case OVS_KEY_ATTR_ND: case __OVS_KEY_ATTR_MAX: default: NOT_REACHED(); } } static void odp_execute_sample(void *dp, struct ofpbuf *packet, struct flow *key, const struct nlattr *action, void (*output)(void *dp, struct ofpbuf *packet, uint32_t out_port), void (*userspace)(void *dp, struct ofpbuf *packet, const struct flow *key, const struct nlattr *a)) { const struct nlattr *subactions = NULL; const struct nlattr *a; size_t left; NL_NESTED_FOR_EACH_UNSAFE (a, left, action) { int type = nl_attr_type(a); switch ((enum ovs_sample_attr) type) { case OVS_SAMPLE_ATTR_PROBABILITY: if (random_uint32() >= nl_attr_get_u32(a)) { return; } break; case OVS_SAMPLE_ATTR_ACTIONS: subactions = a; break; case OVS_SAMPLE_ATTR_UNSPEC: case __OVS_SAMPLE_ATTR_MAX: default: NOT_REACHED(); } } odp_execute_actions(dp, packet, key, nl_attr_get(subactions), nl_attr_get_size(subactions), output, userspace); } void odp_execute_actions(void *dp, struct ofpbuf *packet, struct flow *key, const struct nlattr *actions, size_t actions_len, void (*output)(void *dp, struct ofpbuf *packet, uint32_t out_port), void (*userspace)(void *dp, struct ofpbuf *packet, const struct flow *key, const struct nlattr *a)) { const struct nlattr *a; unsigned int left; NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) { int type = nl_attr_type(a); switch ((enum ovs_action_attr) type) { case OVS_ACTION_ATTR_OUTPUT: if (output) { output(dp, packet, nl_attr_get_u32(a)); } break; case OVS_ACTION_ATTR_USERSPACE: { if (userspace) { const struct nlattr *userdata; userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA); userspace(dp, packet, key, userdata); } break; } case OVS_ACTION_ATTR_PUSH_VLAN: { const struct ovs_action_push_vlan *vlan = nl_attr_get(a); eth_push_vlan(packet, vlan->vlan_tci); break; } case OVS_ACTION_ATTR_POP_VLAN: eth_pop_vlan(packet); break; case OVS_ACTION_ATTR_PUSH_MPLS: { const struct ovs_action_push_mpls *mpls = nl_attr_get(a); push_mpls(packet, mpls->mpls_ethertype, mpls->mpls_lse); break; } case OVS_ACTION_ATTR_POP_MPLS: pop_mpls(packet, nl_attr_get_be16(a)); break; case OVS_ACTION_ATTR_SET: odp_execute_set_action(packet, nl_attr_get(a), key); break; case OVS_ACTION_ATTR_SAMPLE: odp_execute_sample(dp, packet, key, a, output, userspace); break; case OVS_ACTION_ATTR_UNSPEC: case __OVS_ACTION_ATTR_MAX: NOT_REACHED(); } } } openvswitch-2.0.1+git20140120/lib/odp-execute.h000066400000000000000000000023421226605124000206140ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * Copyright (c) 2013 Simon Horman * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef EXECUTE_ACTIONS_H #define EXECUTE_ACTIONS_H 1 #include #include struct flow; struct nlattr; struct ofpbuf; void odp_execute_actions(void *dp, struct ofpbuf *packet, struct flow *key, const struct nlattr *actions, size_t actions_len, void (*output)(void *dp, struct ofpbuf *packet, uint32_t out_port), void (*userspace)(void *dp, struct ofpbuf *packet, const struct flow *key, const struct nlattr *a)); #endif openvswitch-2.0.1+git20140120/lib/odp-util.c000066400000000000000000004001751226605124000201300ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include "odp-util.h" #include #include #include #include #include #include #include #include "byte-order.h" #include "coverage.h" #include "dynamic-string.h" #include "flow.h" #include "netlink.h" #include "ofpbuf.h" #include "packets.h" #include "simap.h" #include "timeval.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(odp_util); /* The interface between userspace and kernel uses an "OVS_*" prefix. * Since this is fairly non-specific for the OVS userspace components, * "ODP_*" (Open vSwitch Datapath) is used as the prefix for * interactions with the datapath. */ /* The set of characters that may separate one action or one key attribute * from another. */ static const char *delimiters = ", \t\r\n"; static int parse_odp_key_mask_attr(const char *, const struct simap *port_names, struct ofpbuf *, struct ofpbuf *); static void format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, struct ds *ds, bool verbose); /* Returns one the following for the action with the given OVS_ACTION_ATTR_* * 'type': * * - For an action whose argument has a fixed length, returned that * nonnegative length in bytes. * * - For an action with a variable-length argument, returns -2. * * - For an invalid 'type', returns -1. */ static int odp_action_len(uint16_t type) { if (type > OVS_ACTION_ATTR_MAX) { return -1; } switch ((enum ovs_action_attr) type) { case OVS_ACTION_ATTR_OUTPUT: return sizeof(uint32_t); case OVS_ACTION_ATTR_USERSPACE: return -2; case OVS_ACTION_ATTR_PUSH_VLAN: return sizeof(struct ovs_action_push_vlan); case OVS_ACTION_ATTR_POP_VLAN: return 0; case OVS_ACTION_ATTR_PUSH_MPLS: return sizeof(struct ovs_action_push_mpls); case OVS_ACTION_ATTR_POP_MPLS: return sizeof(ovs_be16); case OVS_ACTION_ATTR_SET: return -2; case OVS_ACTION_ATTR_SAMPLE: return -2; case OVS_ACTION_ATTR_UNSPEC: case __OVS_ACTION_ATTR_MAX: return -1; } return -1; } /* Returns a string form of 'attr'. The return value is either a statically * allocated constant string or the 'bufsize'-byte buffer 'namebuf'. 'bufsize' * should be at least OVS_KEY_ATTR_BUFSIZE. */ enum { OVS_KEY_ATTR_BUFSIZE = 3 + INT_STRLEN(unsigned int) + 1 }; static const char * ovs_key_attr_to_string(enum ovs_key_attr attr, char *namebuf, size_t bufsize) { switch (attr) { case OVS_KEY_ATTR_UNSPEC: return "unspec"; case OVS_KEY_ATTR_ENCAP: return "encap"; case OVS_KEY_ATTR_PRIORITY: return "skb_priority"; case OVS_KEY_ATTR_SKB_MARK: return "skb_mark"; case OVS_KEY_ATTR_TUNNEL: return "tunnel"; case OVS_KEY_ATTR_IN_PORT: return "in_port"; case OVS_KEY_ATTR_ETHERNET: return "eth"; case OVS_KEY_ATTR_VLAN: return "vlan"; case OVS_KEY_ATTR_ETHERTYPE: return "eth_type"; case OVS_KEY_ATTR_IPV4: return "ipv4"; case OVS_KEY_ATTR_IPV6: return "ipv6"; case OVS_KEY_ATTR_TCP: return "tcp"; case OVS_KEY_ATTR_UDP: return "udp"; case OVS_KEY_ATTR_SCTP: return "sctp"; case OVS_KEY_ATTR_ICMP: return "icmp"; case OVS_KEY_ATTR_ICMPV6: return "icmpv6"; case OVS_KEY_ATTR_ARP: return "arp"; case OVS_KEY_ATTR_ND: return "nd"; case OVS_KEY_ATTR_MPLS: return "mpls"; case __OVS_KEY_ATTR_MAX: default: snprintf(namebuf, bufsize, "key%u", (unsigned int) attr); return namebuf; } } static void format_generic_odp_action(struct ds *ds, const struct nlattr *a) { size_t len = nl_attr_get_size(a); ds_put_format(ds, "action%"PRId16, nl_attr_type(a)); if (len) { const uint8_t *unspec; unsigned int i; unspec = nl_attr_get(a); for (i = 0; i < len; i++) { ds_put_char(ds, i ? ' ': '('); ds_put_format(ds, "%02x", unspec[i]); } ds_put_char(ds, ')'); } } static void format_odp_sample_action(struct ds *ds, const struct nlattr *attr) { static const struct nl_policy ovs_sample_policy[] = { { NL_A_NO_ATTR, 0, 0, false }, /* OVS_SAMPLE_ATTR_UNSPEC */ { NL_A_U32, 0, 0, false }, /* OVS_SAMPLE_ATTR_PROBABILITY */ { NL_A_NESTED, 0, 0, false }, /* OVS_SAMPLE_ATTR_ACTIONS */ }; struct nlattr *a[ARRAY_SIZE(ovs_sample_policy)]; double percentage; const struct nlattr *nla_acts; int len; ds_put_cstr(ds, "sample"); if (!nl_parse_nested(attr, ovs_sample_policy, a, ARRAY_SIZE(a))) { ds_put_cstr(ds, "(error)"); return; } percentage = (100.0 * nl_attr_get_u32(a[OVS_SAMPLE_ATTR_PROBABILITY])) / UINT32_MAX; ds_put_format(ds, "(sample=%.1f%%,", percentage); ds_put_cstr(ds, "actions("); nla_acts = nl_attr_get(a[OVS_SAMPLE_ATTR_ACTIONS]); len = nl_attr_get_size(a[OVS_SAMPLE_ATTR_ACTIONS]); format_odp_actions(ds, nla_acts, len); ds_put_format(ds, "))"); } static const char * slow_path_reason_to_string(enum slow_path_reason reason) { switch (reason) { case SLOW_CFM: return "cfm"; case SLOW_LACP: return "lacp"; case SLOW_STP: return "stp"; case SLOW_BFD: return "bfd"; case SLOW_CONTROLLER: return "controller"; case __SLOW_MAX: default: return NULL; } } static enum slow_path_reason string_to_slow_path_reason(const char *string) { enum slow_path_reason i; for (i = 1; i < __SLOW_MAX; i++) { if (!strcmp(string, slow_path_reason_to_string(i))) { return i; } } return 0; } static int parse_flags(const char *s, const char *(*bit_to_string)(uint32_t), uint32_t *res) { uint32_t result = 0; int n = 0; if (s[n] != '(') { return -EINVAL; } n++; while (s[n] != ')') { unsigned long long int flags; uint32_t bit; int n0; if (sscanf(&s[n], "%lli%n", &flags, &n0) > 0 && n0 > 0) { n += n0 + (s[n + n0] == ','); result |= flags; continue; } for (bit = 1; bit; bit <<= 1) { const char *name = bit_to_string(bit); size_t len; if (!name) { continue; } len = strlen(name); if (!strncmp(s + n, name, len) && (s[n + len] == ',' || s[n + len] == ')')) { result |= bit; n += len + (s[n + len] == ','); break; } } if (!bit) { return -EINVAL; } } n++; *res = result; return n; } static void format_odp_userspace_action(struct ds *ds, const struct nlattr *attr) { static const struct nl_policy ovs_userspace_policy[] = { { NL_A_NO_ATTR, 0, 0, false }, /* OVS_USERSPACE_ATTR_UNSPEC */ { NL_A_U32, 0, 0, false }, /* OVS_USERSPACE_ATTR_PID */ { NL_A_UNSPEC, 0, 0, true }, /* OVS_USERSPACE_ATTR_USERDATA */ }; struct nlattr *a[ARRAY_SIZE(ovs_userspace_policy)]; const struct nlattr *userdata_attr; if (!nl_parse_nested(attr, ovs_userspace_policy, a, ARRAY_SIZE(a))) { ds_put_cstr(ds, "userspace(error)"); return; } ds_put_format(ds, "userspace(pid=%"PRIu32, nl_attr_get_u32(a[OVS_USERSPACE_ATTR_PID])); userdata_attr = a[OVS_USERSPACE_ATTR_USERDATA]; if (userdata_attr) { const uint8_t *userdata = nl_attr_get(userdata_attr); size_t userdata_len = nl_attr_get_size(userdata_attr); bool userdata_unspec = true; union user_action_cookie cookie; if (userdata_len >= sizeof cookie.type && userdata_len <= sizeof cookie) { memset(&cookie, 0, sizeof cookie); memcpy(&cookie, userdata, userdata_len); userdata_unspec = false; if (userdata_len == sizeof cookie.sflow && cookie.type == USER_ACTION_COOKIE_SFLOW) { ds_put_format(ds, ",sFlow(" "vid=%"PRIu16",pcp=%"PRIu8",output=%"PRIu32")", vlan_tci_to_vid(cookie.sflow.vlan_tci), vlan_tci_to_pcp(cookie.sflow.vlan_tci), cookie.sflow.output); } else if (userdata_len == sizeof cookie.slow_path && cookie.type == USER_ACTION_COOKIE_SLOW_PATH) { const char *reason; reason = slow_path_reason_to_string(cookie.slow_path.reason); reason = reason ? reason : ""; ds_put_format(ds, ",slow_path(%s)", reason); } else if (userdata_len == sizeof cookie.flow_sample && cookie.type == USER_ACTION_COOKIE_FLOW_SAMPLE) { ds_put_format(ds, ",flow_sample(probability=%"PRIu16 ",collector_set_id=%"PRIu32 ",obs_domain_id=%"PRIu32 ",obs_point_id=%"PRIu32")", cookie.flow_sample.probability, cookie.flow_sample.collector_set_id, cookie.flow_sample.obs_domain_id, cookie.flow_sample.obs_point_id); } else if (userdata_len >= sizeof cookie.ipfix && cookie.type == USER_ACTION_COOKIE_IPFIX) { ds_put_format(ds, ",ipfix"); } else { userdata_unspec = true; } } if (userdata_unspec) { size_t i; ds_put_format(ds, ",userdata("); for (i = 0; i < userdata_len; i++) { ds_put_format(ds, "%02x", userdata[i]); } ds_put_char(ds, ')'); } } ds_put_char(ds, ')'); } static void format_vlan_tci(struct ds *ds, ovs_be16 vlan_tci) { ds_put_format(ds, "vid=%"PRIu16",pcp=%d", vlan_tci_to_vid(vlan_tci), vlan_tci_to_pcp(vlan_tci)); if (!(vlan_tci & htons(VLAN_CFI))) { ds_put_cstr(ds, ",cfi=0"); } } static void format_mpls_lse(struct ds *ds, ovs_be32 mpls_lse) { ds_put_format(ds, "label=%"PRIu32",tc=%d,ttl=%d,bos=%d", mpls_lse_to_label(mpls_lse), mpls_lse_to_tc(mpls_lse), mpls_lse_to_ttl(mpls_lse), mpls_lse_to_bos(mpls_lse)); } static void format_mpls(struct ds *ds, const struct ovs_key_mpls *mpls_key, const struct ovs_key_mpls *mpls_mask) { ovs_be32 key = mpls_key->mpls_lse; if (mpls_mask == NULL) { format_mpls_lse(ds, key); } else { ovs_be32 mask = mpls_mask->mpls_lse; ds_put_format(ds, "label=%"PRIu32"/0x%x,tc=%d/%x,ttl=%d/0x%x,bos=%d/%x", mpls_lse_to_label(key), mpls_lse_to_label(mask), mpls_lse_to_tc(key), mpls_lse_to_tc(mask), mpls_lse_to_ttl(key), mpls_lse_to_ttl(mask), mpls_lse_to_bos(key), mpls_lse_to_bos(mask)); } } static void format_odp_action(struct ds *ds, const struct nlattr *a) { int expected_len; enum ovs_action_attr type = nl_attr_type(a); const struct ovs_action_push_vlan *vlan; expected_len = odp_action_len(nl_attr_type(a)); if (expected_len != -2 && nl_attr_get_size(a) != expected_len) { ds_put_format(ds, "bad length %zu, expected %d for: ", nl_attr_get_size(a), expected_len); format_generic_odp_action(ds, a); return; } switch (type) { case OVS_ACTION_ATTR_OUTPUT: ds_put_format(ds, "%"PRIu32, nl_attr_get_u32(a)); break; case OVS_ACTION_ATTR_USERSPACE: format_odp_userspace_action(ds, a); break; case OVS_ACTION_ATTR_SET: ds_put_cstr(ds, "set("); format_odp_key_attr(nl_attr_get(a), NULL, ds, true); ds_put_cstr(ds, ")"); break; case OVS_ACTION_ATTR_PUSH_VLAN: vlan = nl_attr_get(a); ds_put_cstr(ds, "push_vlan("); if (vlan->vlan_tpid != htons(ETH_TYPE_VLAN)) { ds_put_format(ds, "tpid=0x%04"PRIx16",", ntohs(vlan->vlan_tpid)); } format_vlan_tci(ds, vlan->vlan_tci); ds_put_char(ds, ')'); break; case OVS_ACTION_ATTR_POP_VLAN: ds_put_cstr(ds, "pop_vlan"); break; case OVS_ACTION_ATTR_PUSH_MPLS: { const struct ovs_action_push_mpls *mpls = nl_attr_get(a); ds_put_cstr(ds, "push_mpls("); format_mpls_lse(ds, mpls->mpls_lse); ds_put_format(ds, ",eth_type=0x%"PRIx16")", ntohs(mpls->mpls_ethertype)); break; } case OVS_ACTION_ATTR_POP_MPLS: { ovs_be16 ethertype = nl_attr_get_be16(a); ds_put_format(ds, "pop_mpls(eth_type=0x%"PRIx16")", ntohs(ethertype)); break; } case OVS_ACTION_ATTR_SAMPLE: format_odp_sample_action(ds, a); break; case OVS_ACTION_ATTR_UNSPEC: case __OVS_ACTION_ATTR_MAX: default: format_generic_odp_action(ds, a); break; } } void format_odp_actions(struct ds *ds, const struct nlattr *actions, size_t actions_len) { if (actions_len) { const struct nlattr *a; unsigned int left; NL_ATTR_FOR_EACH (a, left, actions, actions_len) { if (a != actions) { ds_put_char(ds, ','); } format_odp_action(ds, a); } if (left) { int i; if (left == actions_len) { ds_put_cstr(ds, ""); } ds_put_format(ds, ",***%u leftover bytes*** (", left); for (i = 0; i < left; i++) { ds_put_format(ds, "%02x", ((const uint8_t *) a)[i]); } ds_put_char(ds, ')'); } } else { ds_put_cstr(ds, "drop"); } } static int parse_odp_action(const char *s, const struct simap *port_names, struct ofpbuf *actions) { /* Many of the sscanf calls in this function use oversized destination * fields because some sscanf() implementations truncate the range of %i * directives, so that e.g. "%"SCNi16 interprets input of "0xfedc" as a * value of 0x7fff. The other alternatives are to allow only a single * radix (e.g. decimal or hexadecimal) or to write more sophisticated * parsers. * * The tun_id parser has to use an alternative approach because there is no * type larger than 64 bits. */ { unsigned long long int port; int n = -1; if (sscanf(s, "%lli%n", &port, &n) > 0 && n > 0) { nl_msg_put_u32(actions, OVS_ACTION_ATTR_OUTPUT, port); return n; } } if (port_names) { int len = strcspn(s, delimiters); struct simap_node *node; node = simap_find_len(port_names, s, len); if (node) { nl_msg_put_u32(actions, OVS_ACTION_ATTR_OUTPUT, node->data); return len; } } { unsigned long long int pid; unsigned long long int output; unsigned long long int probability; unsigned long long int collector_set_id; unsigned long long int obs_domain_id; unsigned long long int obs_point_id; int vid, pcp; int n = -1; if (sscanf(s, "userspace(pid=%lli)%n", &pid, &n) > 0 && n > 0) { odp_put_userspace_action(pid, NULL, 0, actions); return n; } else if (sscanf(s, "userspace(pid=%lli,sFlow(vid=%i," "pcp=%i,output=%lli))%n", &pid, &vid, &pcp, &output, &n) > 0 && n > 0) { union user_action_cookie cookie; uint16_t tci; tci = vid | (pcp << VLAN_PCP_SHIFT); if (tci) { tci |= VLAN_CFI; } cookie.type = USER_ACTION_COOKIE_SFLOW; cookie.sflow.vlan_tci = htons(tci); cookie.sflow.output = output; odp_put_userspace_action(pid, &cookie, sizeof cookie.sflow, actions); return n; } else if (sscanf(s, "userspace(pid=%lli,slow_path(%n", &pid, &n) > 0 && n > 0) { union user_action_cookie cookie; char reason[32]; if (s[n] == ')' && s[n + 1] == ')') { reason[0] = '\0'; n += 2; } else if (sscanf(s + n, "%31[^)]))", reason) > 0) { n += strlen(reason) + 2; } else { return -EINVAL; } cookie.type = USER_ACTION_COOKIE_SLOW_PATH; cookie.slow_path.unused = 0; cookie.slow_path.reason = string_to_slow_path_reason(reason); if (reason[0] && !cookie.slow_path.reason) { return -EINVAL; } odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, actions); return n; } else if (sscanf(s, "userspace(pid=%lli,flow_sample(probability=%lli," "collector_set_id=%lli,obs_domain_id=%lli," "obs_point_id=%lli))%n", &pid, &probability, &collector_set_id, &obs_domain_id, &obs_point_id, &n) > 0 && n > 0) { union user_action_cookie cookie; cookie.type = USER_ACTION_COOKIE_FLOW_SAMPLE; cookie.flow_sample.probability = probability; cookie.flow_sample.collector_set_id = collector_set_id; cookie.flow_sample.obs_domain_id = obs_domain_id; cookie.flow_sample.obs_point_id = obs_point_id; odp_put_userspace_action(pid, &cookie, sizeof cookie.flow_sample, actions); return n; } else if (sscanf(s, "userspace(pid=%lli,ipfix)%n", &pid, &n) > 0 && n > 0) { union user_action_cookie cookie; cookie.type = USER_ACTION_COOKIE_IPFIX; odp_put_userspace_action(pid, &cookie, sizeof cookie.ipfix, actions); return n; } else if (sscanf(s, "userspace(pid=%lli,userdata(%n", &pid, &n) > 0 && n > 0) { struct ofpbuf buf; char *end; ofpbuf_init(&buf, 16); end = ofpbuf_put_hex(&buf, &s[n], NULL); if (end[0] == ')' && end[1] == ')') { odp_put_userspace_action(pid, buf.data, buf.size, actions); ofpbuf_uninit(&buf); return (end + 2) - s; } } } if (!strncmp(s, "set(", 4)) { size_t start_ofs; int retval; start_ofs = nl_msg_start_nested(actions, OVS_ACTION_ATTR_SET); retval = parse_odp_key_mask_attr(s + 4, port_names, actions, NULL); if (retval < 0) { return retval; } if (s[retval + 4] != ')') { return -EINVAL; } nl_msg_end_nested(actions, start_ofs); return retval + 5; } { struct ovs_action_push_vlan push; int tpid = ETH_TYPE_VLAN; int vid, pcp; int cfi = 1; int n = -1; if ((sscanf(s, "push_vlan(vid=%i,pcp=%i)%n", &vid, &pcp, &n) > 0 && n > 0) || (sscanf(s, "push_vlan(vid=%i,pcp=%i,cfi=%i)%n", &vid, &pcp, &cfi, &n) > 0 && n > 0) || (sscanf(s, "push_vlan(tpid=%i,vid=%i,pcp=%i)%n", &tpid, &vid, &pcp, &n) > 0 && n > 0) || (sscanf(s, "push_vlan(tpid=%i,vid=%i,pcp=%i,cfi=%i)%n", &tpid, &vid, &pcp, &cfi, &n) > 0 && n > 0)) { push.vlan_tpid = htons(tpid); push.vlan_tci = htons((vid << VLAN_VID_SHIFT) | (pcp << VLAN_PCP_SHIFT) | (cfi ? VLAN_CFI : 0)); nl_msg_put_unspec(actions, OVS_ACTION_ATTR_PUSH_VLAN, &push, sizeof push); return n; } } if (!strncmp(s, "pop_vlan", 8)) { nl_msg_put_flag(actions, OVS_ACTION_ATTR_POP_VLAN); return 8; } { double percentage; int n = -1; if (sscanf(s, "sample(sample=%lf%%,actions(%n", &percentage, &n) > 0 && percentage >= 0. && percentage <= 100.0 && n > 0) { size_t sample_ofs, actions_ofs; double probability; probability = floor(UINT32_MAX * (percentage / 100.0) + .5); sample_ofs = nl_msg_start_nested(actions, OVS_ACTION_ATTR_SAMPLE); nl_msg_put_u32(actions, OVS_SAMPLE_ATTR_PROBABILITY, (probability <= 0 ? 0 : probability >= UINT32_MAX ? UINT32_MAX : probability)); actions_ofs = nl_msg_start_nested(actions, OVS_SAMPLE_ATTR_ACTIONS); for (;;) { int retval; n += strspn(s + n, delimiters); if (s[n] == ')') { break; } retval = parse_odp_action(s + n, port_names, actions); if (retval < 0) { return retval; } n += retval; } nl_msg_end_nested(actions, actions_ofs); nl_msg_end_nested(actions, sample_ofs); return s[n + 1] == ')' ? n + 2 : -EINVAL; } } return -EINVAL; } /* Parses the string representation of datapath actions, in the format output * by format_odp_action(). Returns 0 if successful, otherwise a positive errno * value. On success, the ODP actions are appended to 'actions' as a series of * Netlink attributes. On failure, no data is appended to 'actions'. Either * way, 'actions''s data might be reallocated. */ int odp_actions_from_string(const char *s, const struct simap *port_names, struct ofpbuf *actions) { size_t old_size; if (!strcasecmp(s, "drop")) { return 0; } old_size = actions->size; for (;;) { int retval; s += strspn(s, delimiters); if (!*s) { return 0; } retval = parse_odp_action(s, port_names, actions); if (retval < 0 || !strchr(delimiters, s[retval])) { actions->size = old_size; return -retval; } s += retval; } return 0; } /* Returns the correct length of the payload for a flow key attribute of the * specified 'type', -1 if 'type' is unknown, or -2 if the attribute's payload * is variable length. */ static int odp_flow_key_attr_len(uint16_t type) { if (type > OVS_KEY_ATTR_MAX) { return -1; } switch ((enum ovs_key_attr) type) { case OVS_KEY_ATTR_ENCAP: return -2; case OVS_KEY_ATTR_PRIORITY: return 4; case OVS_KEY_ATTR_SKB_MARK: return 4; case OVS_KEY_ATTR_TUNNEL: return -2; case OVS_KEY_ATTR_IN_PORT: return 4; case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet); case OVS_KEY_ATTR_VLAN: return sizeof(ovs_be16); case OVS_KEY_ATTR_ETHERTYPE: return 2; case OVS_KEY_ATTR_MPLS: return sizeof(struct ovs_key_mpls); case OVS_KEY_ATTR_IPV4: return sizeof(struct ovs_key_ipv4); case OVS_KEY_ATTR_IPV6: return sizeof(struct ovs_key_ipv6); case OVS_KEY_ATTR_TCP: return sizeof(struct ovs_key_tcp); case OVS_KEY_ATTR_UDP: return sizeof(struct ovs_key_udp); case OVS_KEY_ATTR_SCTP: return sizeof(struct ovs_key_sctp); case OVS_KEY_ATTR_ICMP: return sizeof(struct ovs_key_icmp); case OVS_KEY_ATTR_ICMPV6: return sizeof(struct ovs_key_icmpv6); case OVS_KEY_ATTR_ARP: return sizeof(struct ovs_key_arp); case OVS_KEY_ATTR_ND: return sizeof(struct ovs_key_nd); case OVS_KEY_ATTR_UNSPEC: case __OVS_KEY_ATTR_MAX: return -1; } return -1; } static void format_generic_odp_key(const struct nlattr *a, struct ds *ds) { size_t len = nl_attr_get_size(a); if (len) { const uint8_t *unspec; unsigned int i; unspec = nl_attr_get(a); for (i = 0; i < len; i++) { if (i) { ds_put_char(ds, ' '); } ds_put_format(ds, "%02x", unspec[i]); } } } static const char * ovs_frag_type_to_string(enum ovs_frag_type type) { switch (type) { case OVS_FRAG_TYPE_NONE: return "no"; case OVS_FRAG_TYPE_FIRST: return "first"; case OVS_FRAG_TYPE_LATER: return "later"; case __OVS_FRAG_TYPE_MAX: default: return ""; } } static int tunnel_key_attr_len(int type) { switch (type) { case OVS_TUNNEL_KEY_ATTR_ID: return 8; case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: return 4; case OVS_TUNNEL_KEY_ATTR_IPV4_DST: return 4; case OVS_TUNNEL_KEY_ATTR_TOS: return 1; case OVS_TUNNEL_KEY_ATTR_TTL: return 1; case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: return 0; case OVS_TUNNEL_KEY_ATTR_CSUM: return 0; case __OVS_TUNNEL_KEY_ATTR_MAX: return -1; } return -1; } enum odp_key_fitness odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun) { unsigned int left; const struct nlattr *a; bool ttl = false; bool unknown = false; NL_NESTED_FOR_EACH(a, left, attr) { uint16_t type = nl_attr_type(a); size_t len = nl_attr_get_size(a); int expected_len = tunnel_key_attr_len(type); if (len != expected_len && expected_len >= 0) { return ODP_FIT_ERROR; } switch (type) { case OVS_TUNNEL_KEY_ATTR_ID: tun->tun_id = nl_attr_get_be64(a); tun->flags |= FLOW_TNL_F_KEY; break; case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: tun->ip_src = nl_attr_get_be32(a); break; case OVS_TUNNEL_KEY_ATTR_IPV4_DST: tun->ip_dst = nl_attr_get_be32(a); break; case OVS_TUNNEL_KEY_ATTR_TOS: tun->ip_tos = nl_attr_get_u8(a); break; case OVS_TUNNEL_KEY_ATTR_TTL: tun->ip_ttl = nl_attr_get_u8(a); ttl = true; break; case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: tun->flags |= FLOW_TNL_F_DONT_FRAGMENT; break; case OVS_TUNNEL_KEY_ATTR_CSUM: tun->flags |= FLOW_TNL_F_CSUM; break; default: /* Allow this to show up as unexpected, if there are unknown * tunnel attribute, eventually resulting in ODP_FIT_TOO_MUCH. */ unknown = true; break; } } if (!ttl) { return ODP_FIT_ERROR; } if (unknown) { return ODP_FIT_TOO_MUCH; } return ODP_FIT_PERFECT; } static void tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key) { size_t tun_key_ofs; tun_key_ofs = nl_msg_start_nested(a, OVS_KEY_ATTR_TUNNEL); if (tun_key->flags & FLOW_TNL_F_KEY) { nl_msg_put_be64(a, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id); } if (tun_key->ip_src) { nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ip_src); } if (tun_key->ip_dst) { nl_msg_put_be32(a, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ip_dst); } if (tun_key->ip_tos) { nl_msg_put_u8(a, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ip_tos); } nl_msg_put_u8(a, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ip_ttl); if (tun_key->flags & FLOW_TNL_F_DONT_FRAGMENT) { nl_msg_put_flag(a, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT); } if (tun_key->flags & FLOW_TNL_F_CSUM) { nl_msg_put_flag(a, OVS_TUNNEL_KEY_ATTR_CSUM); } nl_msg_end_nested(a, tun_key_ofs); } static bool odp_mask_attr_is_wildcard(const struct nlattr *ma) { return is_all_zeros(nl_attr_get(ma), nl_attr_get_size(ma)); } static bool odp_mask_attr_is_exact(const struct nlattr *ma) { bool is_exact = false; enum ovs_key_attr attr = nl_attr_type(ma); if (attr == OVS_KEY_ATTR_TUNNEL) { /* XXX this is a hack for now. Should change * the exact match dection to per field * instead of per attribute. */ struct flow_tnl tun_mask; memset(&tun_mask, 0, sizeof tun_mask); odp_tun_key_from_attr(ma, &tun_mask); if (tun_mask.flags == (FLOW_TNL_F_KEY | FLOW_TNL_F_DONT_FRAGMENT | FLOW_TNL_F_CSUM)) { /* The flags are exact match, check the remaining fields. */ tun_mask.flags = 0xffff; is_exact = is_all_ones((uint8_t *)&tun_mask, offsetof(struct flow_tnl, ip_ttl)); } } else { is_exact = is_all_ones(nl_attr_get(ma), nl_attr_get_size(ma)); } return is_exact; } static void format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, struct ds *ds, bool verbose) { struct flow_tnl tun_key; enum ovs_key_attr attr = nl_attr_type(a); char namebuf[OVS_KEY_ATTR_BUFSIZE]; int expected_len; bool is_exact; is_exact = ma ? odp_mask_attr_is_exact(ma) : true; ds_put_cstr(ds, ovs_key_attr_to_string(attr, namebuf, sizeof namebuf)); { expected_len = odp_flow_key_attr_len(nl_attr_type(a)); if (expected_len != -2) { bool bad_key_len = nl_attr_get_size(a) != expected_len; bool bad_mask_len = ma && nl_attr_get_size(a) != expected_len; if (bad_key_len || bad_mask_len) { if (bad_key_len) { ds_put_format(ds, "(bad key length %zu, expected %d)(", nl_attr_get_size(a), odp_flow_key_attr_len(nl_attr_type(a))); } format_generic_odp_key(a, ds); if (bad_mask_len) { ds_put_char(ds, '/'); ds_put_format(ds, "(bad mask length %zu, expected %d)(", nl_attr_get_size(ma), odp_flow_key_attr_len(nl_attr_type(ma))); } format_generic_odp_key(ma, ds); ds_put_char(ds, ')'); return; } } } ds_put_char(ds, '('); switch (attr) { case OVS_KEY_ATTR_ENCAP: if (ma && nl_attr_get_size(ma) && nl_attr_get_size(a)) { odp_flow_format(nl_attr_get(a), nl_attr_get_size(a), nl_attr_get(ma), nl_attr_get_size(ma), ds, verbose); } else if (nl_attr_get_size(a)) { odp_flow_format(nl_attr_get(a), nl_attr_get_size(a), NULL, 0, ds, verbose); } break; case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: ds_put_format(ds, "%#"PRIx32, nl_attr_get_u32(a)); if (!is_exact) { ds_put_format(ds, "/%#"PRIx32, nl_attr_get_u32(ma)); } break; case OVS_KEY_ATTR_TUNNEL: memset(&tun_key, 0, sizeof tun_key); if (odp_tun_key_from_attr(a, &tun_key) == ODP_FIT_ERROR) { ds_put_format(ds, "error"); } else if (!is_exact) { struct flow_tnl tun_mask; memset(&tun_mask, 0, sizeof tun_mask); odp_tun_key_from_attr(ma, &tun_mask); ds_put_format(ds, "tun_id=%#"PRIx64"/%#"PRIx64 ",src="IP_FMT"/"IP_FMT",dst="IP_FMT"/"IP_FMT ",tos=%#"PRIx8"/%#"PRIx8",ttl=%"PRIu8"/%#"PRIx8 ",flags(", ntohll(tun_key.tun_id), ntohll(tun_mask.tun_id), IP_ARGS(tun_key.ip_src), IP_ARGS(tun_mask.ip_src), IP_ARGS(tun_key.ip_dst), IP_ARGS(tun_mask.ip_dst), tun_key.ip_tos, tun_mask.ip_tos, tun_key.ip_ttl, tun_mask.ip_ttl); format_flags(ds, flow_tun_flag_to_string, tun_key.flags, ','); /* XXX This code is correct, but enabling it would break the unit test. Disable it for now until the input parser is fixed. ds_put_char(ds, '/'); format_flags(ds, flow_tun_flag_to_string, tun_mask.flags, ','); */ ds_put_char(ds, ')'); } else { ds_put_format(ds, "tun_id=0x%"PRIx64",src="IP_FMT",dst="IP_FMT"," "tos=0x%"PRIx8",ttl=%"PRIu8",flags(", ntohll(tun_key.tun_id), IP_ARGS(tun_key.ip_src), IP_ARGS(tun_key.ip_dst), tun_key.ip_tos, tun_key.ip_ttl); format_flags(ds, flow_tun_flag_to_string, tun_key.flags, ','); ds_put_char(ds, ')'); } break; case OVS_KEY_ATTR_IN_PORT: ds_put_format(ds, "%"PRIu32, nl_attr_get_u32(a)); if (!is_exact) { ds_put_format(ds, "/%#"PRIx32, nl_attr_get_u32(ma)); } break; case OVS_KEY_ATTR_ETHERNET: if (!is_exact) { const struct ovs_key_ethernet *eth_mask = nl_attr_get(ma); const struct ovs_key_ethernet *eth_key = nl_attr_get(a); ds_put_format(ds, "src="ETH_ADDR_FMT"/"ETH_ADDR_FMT ",dst="ETH_ADDR_FMT"/"ETH_ADDR_FMT, ETH_ADDR_ARGS(eth_key->eth_src), ETH_ADDR_ARGS(eth_mask->eth_src), ETH_ADDR_ARGS(eth_key->eth_dst), ETH_ADDR_ARGS(eth_mask->eth_dst)); } else { const struct ovs_key_ethernet *eth_key = nl_attr_get(a); ds_put_format(ds, "src="ETH_ADDR_FMT",dst="ETH_ADDR_FMT, ETH_ADDR_ARGS(eth_key->eth_src), ETH_ADDR_ARGS(eth_key->eth_dst)); } break; case OVS_KEY_ATTR_VLAN: { ovs_be16 vlan_tci = nl_attr_get_be16(a); if (!is_exact) { ovs_be16 mask = nl_attr_get_be16(ma); ds_put_format(ds, "vid=%"PRIu16"/0x%"PRIx16",pcp=%d/0x%x,cfi=%d/%d", vlan_tci_to_vid(vlan_tci), vlan_tci_to_vid(mask), vlan_tci_to_pcp(vlan_tci), vlan_tci_to_pcp(mask), vlan_tci_to_cfi(vlan_tci), vlan_tci_to_cfi(mask)); } else { format_vlan_tci(ds, vlan_tci); } } break; case OVS_KEY_ATTR_MPLS: { const struct ovs_key_mpls *mpls_key = nl_attr_get(a); const struct ovs_key_mpls *mpls_mask = NULL; if (!is_exact) { mpls_mask = nl_attr_get(ma); } format_mpls(ds, mpls_key, mpls_mask); break; } case OVS_KEY_ATTR_ETHERTYPE: ds_put_format(ds, "0x%04"PRIx16, ntohs(nl_attr_get_be16(a))); if (!is_exact) { ds_put_format(ds, "/0x%04"PRIx16, ntohs(nl_attr_get_be16(ma))); } break; case OVS_KEY_ATTR_IPV4: if (!is_exact) { const struct ovs_key_ipv4 *ipv4_key = nl_attr_get(a); const struct ovs_key_ipv4 *ipv4_mask = nl_attr_get(ma); ds_put_format(ds, "src="IP_FMT"/"IP_FMT",dst="IP_FMT"/"IP_FMT ",proto=%"PRIu8"/%#"PRIx8",tos=%#"PRIx8"/%#"PRIx8 ",ttl=%"PRIu8"/%#"PRIx8",frag=%s/%#"PRIx8, IP_ARGS(ipv4_key->ipv4_src), IP_ARGS(ipv4_mask->ipv4_src), IP_ARGS(ipv4_key->ipv4_dst), IP_ARGS(ipv4_mask->ipv4_dst), ipv4_key->ipv4_proto, ipv4_mask->ipv4_proto, ipv4_key->ipv4_tos, ipv4_mask->ipv4_tos, ipv4_key->ipv4_ttl, ipv4_mask->ipv4_ttl, ovs_frag_type_to_string(ipv4_key->ipv4_frag), ipv4_mask->ipv4_frag); } else { const struct ovs_key_ipv4 *ipv4_key = nl_attr_get(a); ds_put_format(ds, "src="IP_FMT",dst="IP_FMT",proto=%"PRIu8 ",tos=%#"PRIx8",ttl=%"PRIu8",frag=%s", IP_ARGS(ipv4_key->ipv4_src), IP_ARGS(ipv4_key->ipv4_dst), ipv4_key->ipv4_proto, ipv4_key->ipv4_tos, ipv4_key->ipv4_ttl, ovs_frag_type_to_string(ipv4_key->ipv4_frag)); } break; case OVS_KEY_ATTR_IPV6: if (!is_exact) { const struct ovs_key_ipv6 *ipv6_key, *ipv6_mask; char src_str[INET6_ADDRSTRLEN]; char dst_str[INET6_ADDRSTRLEN]; char src_mask[INET6_ADDRSTRLEN]; char dst_mask[INET6_ADDRSTRLEN]; ipv6_key = nl_attr_get(a); inet_ntop(AF_INET6, ipv6_key->ipv6_src, src_str, sizeof src_str); inet_ntop(AF_INET6, ipv6_key->ipv6_dst, dst_str, sizeof dst_str); ipv6_mask = nl_attr_get(ma); inet_ntop(AF_INET6, ipv6_mask->ipv6_src, src_mask, sizeof src_mask); inet_ntop(AF_INET6, ipv6_mask->ipv6_dst, dst_mask, sizeof dst_mask); ds_put_format(ds, "src=%s/%s,dst=%s/%s,label=%#"PRIx32"/%#"PRIx32 ",proto=%"PRIu8"/%#"PRIx8",tclass=%#"PRIx8"/%#"PRIx8 ",hlimit=%"PRIu8"/%#"PRIx8",frag=%s/%#"PRIx8, src_str, src_mask, dst_str, dst_mask, ntohl(ipv6_key->ipv6_label), ntohl(ipv6_mask->ipv6_label), ipv6_key->ipv6_proto, ipv6_mask->ipv6_proto, ipv6_key->ipv6_tclass, ipv6_mask->ipv6_tclass, ipv6_key->ipv6_hlimit, ipv6_mask->ipv6_hlimit, ovs_frag_type_to_string(ipv6_key->ipv6_frag), ipv6_mask->ipv6_frag); } else { const struct ovs_key_ipv6 *ipv6_key; char src_str[INET6_ADDRSTRLEN]; char dst_str[INET6_ADDRSTRLEN]; ipv6_key = nl_attr_get(a); inet_ntop(AF_INET6, ipv6_key->ipv6_src, src_str, sizeof src_str); inet_ntop(AF_INET6, ipv6_key->ipv6_dst, dst_str, sizeof dst_str); ds_put_format(ds, "src=%s,dst=%s,label=%#"PRIx32",proto=%"PRIu8 ",tclass=%#"PRIx8",hlimit=%"PRIu8",frag=%s", src_str, dst_str, ntohl(ipv6_key->ipv6_label), ipv6_key->ipv6_proto, ipv6_key->ipv6_tclass, ipv6_key->ipv6_hlimit, ovs_frag_type_to_string(ipv6_key->ipv6_frag)); } break; case OVS_KEY_ATTR_TCP: if (!is_exact) { const struct ovs_key_tcp *tcp_mask = nl_attr_get(ma); const struct ovs_key_tcp *tcp_key = nl_attr_get(a); ds_put_format(ds, "src=%"PRIu16"/%#"PRIx16 ",dst=%"PRIu16"/%#"PRIx16, ntohs(tcp_key->tcp_src), ntohs(tcp_mask->tcp_src), ntohs(tcp_key->tcp_dst), ntohs(tcp_mask->tcp_dst)); } else { const struct ovs_key_tcp *tcp_key = nl_attr_get(a); ds_put_format(ds, "src=%"PRIu16",dst=%"PRIu16, ntohs(tcp_key->tcp_src), ntohs(tcp_key->tcp_dst)); } break; case OVS_KEY_ATTR_UDP: if (!is_exact) { const struct ovs_key_udp *udp_mask = nl_attr_get(ma); const struct ovs_key_udp *udp_key = nl_attr_get(a); ds_put_format(ds, "src=%"PRIu16"/%#"PRIx16 ",dst=%"PRIu16"/%#"PRIx16, ntohs(udp_key->udp_src), ntohs(udp_mask->udp_src), ntohs(udp_key->udp_dst), ntohs(udp_mask->udp_dst)); } else { const struct ovs_key_udp *udp_key = nl_attr_get(a); ds_put_format(ds, "src=%"PRIu16",dst=%"PRIu16, ntohs(udp_key->udp_src), ntohs(udp_key->udp_dst)); } break; case OVS_KEY_ATTR_SCTP: if (ma) { const struct ovs_key_sctp *sctp_mask = nl_attr_get(ma); const struct ovs_key_sctp *sctp_key = nl_attr_get(a); ds_put_format(ds, "src=%"PRIu16"/%#"PRIx16 ",dst=%"PRIu16"/%#"PRIx16, ntohs(sctp_key->sctp_src), ntohs(sctp_mask->sctp_src), ntohs(sctp_key->sctp_dst), ntohs(sctp_mask->sctp_dst)); } else { const struct ovs_key_sctp *sctp_key = nl_attr_get(a); ds_put_format(ds, "(src=%"PRIu16",dst=%"PRIu16")", ntohs(sctp_key->sctp_src), ntohs(sctp_key->sctp_dst)); } break; case OVS_KEY_ATTR_ICMP: if (!is_exact) { const struct ovs_key_icmp *icmp_mask = nl_attr_get(ma); const struct ovs_key_icmp *icmp_key = nl_attr_get(a); ds_put_format(ds, "type=%"PRIu8"/%#"PRIx8",code=%"PRIu8"/%#"PRIx8, icmp_key->icmp_type, icmp_mask->icmp_type, icmp_key->icmp_code, icmp_mask->icmp_code); } else { const struct ovs_key_icmp *icmp_key = nl_attr_get(a); ds_put_format(ds, "type=%"PRIu8",code=%"PRIu8, icmp_key->icmp_type, icmp_key->icmp_code); } break; case OVS_KEY_ATTR_ICMPV6: if (!is_exact) { const struct ovs_key_icmpv6 *icmpv6_mask = nl_attr_get(ma); const struct ovs_key_icmpv6 *icmpv6_key = nl_attr_get(a); ds_put_format(ds, "type=%"PRIu8"/%#"PRIx8",code=%"PRIu8"/%#"PRIx8, icmpv6_key->icmpv6_type, icmpv6_mask->icmpv6_type, icmpv6_key->icmpv6_code, icmpv6_mask->icmpv6_code); } else { const struct ovs_key_icmpv6 *icmpv6_key = nl_attr_get(a); ds_put_format(ds, "type=%"PRIu8",code=%"PRIu8, icmpv6_key->icmpv6_type, icmpv6_key->icmpv6_code); } break; case OVS_KEY_ATTR_ARP: if (!is_exact) { const struct ovs_key_arp *arp_mask = nl_attr_get(ma); const struct ovs_key_arp *arp_key = nl_attr_get(a); ds_put_format(ds, "sip="IP_FMT"/"IP_FMT",tip="IP_FMT"/"IP_FMT ",op=%"PRIu16"/%#"PRIx16 ",sha="ETH_ADDR_FMT"/"ETH_ADDR_FMT ",tha="ETH_ADDR_FMT"/"ETH_ADDR_FMT, IP_ARGS(arp_key->arp_sip), IP_ARGS(arp_mask->arp_sip), IP_ARGS(arp_key->arp_tip), IP_ARGS(arp_mask->arp_tip), ntohs(arp_key->arp_op), ntohs(arp_mask->arp_op), ETH_ADDR_ARGS(arp_key->arp_sha), ETH_ADDR_ARGS(arp_mask->arp_sha), ETH_ADDR_ARGS(arp_key->arp_tha), ETH_ADDR_ARGS(arp_mask->arp_tha)); } else { const struct ovs_key_arp *arp_key = nl_attr_get(a); ds_put_format(ds, "sip="IP_FMT",tip="IP_FMT",op=%"PRIu16"," "sha="ETH_ADDR_FMT",tha="ETH_ADDR_FMT, IP_ARGS(arp_key->arp_sip), IP_ARGS(arp_key->arp_tip), ntohs(arp_key->arp_op), ETH_ADDR_ARGS(arp_key->arp_sha), ETH_ADDR_ARGS(arp_key->arp_tha)); } break; case OVS_KEY_ATTR_ND: { const struct ovs_key_nd *nd_key, *nd_mask = NULL; char target[INET6_ADDRSTRLEN]; nd_key = nl_attr_get(a); if (!is_exact) { nd_mask = nl_attr_get(ma); } inet_ntop(AF_INET6, nd_key->nd_target, target, sizeof target); ds_put_format(ds, "target=%s", target); if (!is_exact) { inet_ntop(AF_INET6, nd_mask->nd_target, target, sizeof target); ds_put_format(ds, "/%s", target); } if (!eth_addr_is_zero(nd_key->nd_sll)) { ds_put_format(ds, ",sll="ETH_ADDR_FMT, ETH_ADDR_ARGS(nd_key->nd_sll)); if (!is_exact) { ds_put_format(ds, "/"ETH_ADDR_FMT, ETH_ADDR_ARGS(nd_mask->nd_sll)); } } if (!eth_addr_is_zero(nd_key->nd_tll)) { ds_put_format(ds, ",tll="ETH_ADDR_FMT, ETH_ADDR_ARGS(nd_key->nd_tll)); if (!is_exact) { ds_put_format(ds, "/"ETH_ADDR_FMT, ETH_ADDR_ARGS(nd_mask->nd_tll)); } } break; } case OVS_KEY_ATTR_UNSPEC: case __OVS_KEY_ATTR_MAX: default: format_generic_odp_key(a, ds); if (!is_exact) { ds_put_char(ds, '/'); format_generic_odp_key(ma, ds); } break; } ds_put_char(ds, ')'); } static struct nlattr * generate_all_wildcard_mask(struct ofpbuf *ofp, const struct nlattr *key) { const struct nlattr *a; unsigned int left; int type = nl_attr_type(key); int size = nl_attr_get_size(key); if (odp_flow_key_attr_len(type) >=0) { memset(nl_msg_put_unspec_uninit(ofp, type, size), 0, size); } else { size_t nested_mask; nested_mask = nl_msg_start_nested(ofp, type); NL_ATTR_FOR_EACH(a, left, key, nl_attr_get_size(key)) { generate_all_wildcard_mask(ofp, nl_attr_get(a)); } nl_msg_end_nested(ofp, nested_mask); } return ofp->base; } /* Appends to 'ds' a string representation of the 'key_len' bytes of * OVS_KEY_ATTR_* attributes in 'key'. If non-null, additionally formats the * 'mask_len' bytes of 'mask' which apply to 'key'. */ void odp_flow_format(const struct nlattr *key, size_t key_len, const struct nlattr *mask, size_t mask_len, struct ds *ds, bool verbose) { if (key_len) { const struct nlattr *a; unsigned int left; bool has_ethtype_key = false; const struct nlattr *ma = NULL; struct ofpbuf ofp; bool first_field = true; ofpbuf_init(&ofp, 100); NL_ATTR_FOR_EACH (a, left, key, key_len) { bool is_nested_attr; bool is_wildcard = false; int attr_type = nl_attr_type(a); if (attr_type == OVS_KEY_ATTR_ETHERTYPE) { has_ethtype_key = true; } is_nested_attr = (odp_flow_key_attr_len(attr_type) == -2); if (mask && mask_len) { ma = nl_attr_find__(mask, mask_len, nl_attr_type(a)); is_wildcard = ma ? odp_mask_attr_is_wildcard(ma) : true; } if (verbose || !is_wildcard || is_nested_attr) { if (is_wildcard && !ma) { ma = generate_all_wildcard_mask(&ofp, a); } if (!first_field) { ds_put_char(ds, ','); } format_odp_key_attr(a, ma, ds, verbose); first_field = false; } ofpbuf_clear(&ofp); } ofpbuf_uninit(&ofp); if (left) { int i; if (left == key_len) { ds_put_cstr(ds, ""); } ds_put_format(ds, ",***%u leftover bytes*** (", left); for (i = 0; i < left; i++) { ds_put_format(ds, "%02x", ((const uint8_t *) a)[i]); } ds_put_char(ds, ')'); } if (!has_ethtype_key) { ma = nl_attr_find__(mask, mask_len, OVS_KEY_ATTR_ETHERTYPE); if (ma) { ds_put_format(ds, ",eth_type(0/0x%04"PRIx16")", ntohs(nl_attr_get_be16(ma))); } } } else { ds_put_cstr(ds, ""); } } /* Appends to 'ds' a string representation of the 'key_len' bytes of * OVS_KEY_ATTR_* attributes in 'key'. */ void odp_flow_key_format(const struct nlattr *key, size_t key_len, struct ds *ds) { odp_flow_format(key, key_len, NULL, 0, ds, true); } static void put_nd(struct ovs_key_nd* nd_key, const uint8_t *nd_sll, const uint8_t *nd_tll, struct ofpbuf *key) { if (nd_sll) { memcpy(nd_key->nd_sll, nd_sll, ETH_ADDR_LEN); } if (nd_tll) { memcpy(nd_key->nd_tll, nd_tll, ETH_ADDR_LEN); } nl_msg_put_unspec(key, OVS_KEY_ATTR_ND, nd_key, sizeof *nd_key); } static int put_nd_key(int n, const char *nd_target_s, const uint8_t *nd_sll, const uint8_t *nd_tll, struct ofpbuf *key) { struct ovs_key_nd nd_key; memset(&nd_key, 0, sizeof nd_key); if (inet_pton(AF_INET6, nd_target_s, nd_key.nd_target) != 1) { return -EINVAL; } put_nd(&nd_key, nd_sll, nd_tll, key); return n; } static int put_nd_mask(int n, const char *nd_target_s, const uint8_t *nd_sll, const uint8_t *nd_tll, struct ofpbuf *mask) { struct ovs_key_nd nd_mask; memset(&nd_mask, 0xff, sizeof nd_mask); if (strlen(nd_target_s) != 0 && inet_pton(AF_INET6, nd_target_s, nd_mask.nd_target) != 1) { return -EINVAL; } put_nd(&nd_mask, nd_sll, nd_tll, mask); return n; } static bool ovs_frag_type_from_string(const char *s, enum ovs_frag_type *type) { if (!strcasecmp(s, "no")) { *type = OVS_FRAG_TYPE_NONE; } else if (!strcasecmp(s, "first")) { *type = OVS_FRAG_TYPE_FIRST; } else if (!strcasecmp(s, "later")) { *type = OVS_FRAG_TYPE_LATER; } else { return false; } return true; } static ovs_be32 mpls_lse_from_components(int mpls_label, int mpls_tc, int mpls_ttl, int mpls_bos) { return (htonl((mpls_label << MPLS_LABEL_SHIFT) | (mpls_tc << MPLS_TC_SHIFT) | (mpls_ttl << MPLS_TTL_SHIFT) | (mpls_bos << MPLS_BOS_SHIFT))); } static int parse_odp_key_mask_attr(const char *s, const struct simap *port_names, struct ofpbuf *key, struct ofpbuf *mask) { /* Many of the sscanf calls in this function use oversized destination * fields because some sscanf() implementations truncate the range of %i * directives, so that e.g. "%"SCNi16 interprets input of "0xfedc" as a * value of 0x7fff. The other alternatives are to allow only a single * radix (e.g. decimal or hexadecimal) or to write more sophisticated * parsers. * * The tun_id parser has to use an alternative approach because there is no * type larger than 64 bits. */ { unsigned long long int priority; unsigned long long int priority_mask; int n = -1; if (mask && sscanf(s, "skb_priority(%lli/%lli)%n", &priority, &priority_mask, &n) > 0 && n > 0) { nl_msg_put_u32(key, OVS_KEY_ATTR_PRIORITY, priority); nl_msg_put_u32(mask, OVS_KEY_ATTR_PRIORITY, priority_mask); return n; } else if (sscanf(s, "skb_priority(%lli)%n", &priority, &n) > 0 && n > 0) { nl_msg_put_u32(key, OVS_KEY_ATTR_PRIORITY, priority); if (mask) { nl_msg_put_u32(mask, OVS_KEY_ATTR_PRIORITY, UINT32_MAX); } return n; } } { unsigned long long int mark; unsigned long long int mark_mask; int n = -1; if (mask && sscanf(s, "skb_mark(%lli/%lli)%n", &mark, &mark_mask, &n) > 0 && n > 0) { nl_msg_put_u32(key, OVS_KEY_ATTR_SKB_MARK, mark); nl_msg_put_u32(mask, OVS_KEY_ATTR_SKB_MARK, mark_mask); return n; } else if (sscanf(s, "skb_mark(%lli)%n", &mark, &n) > 0 && n > 0) { nl_msg_put_u32(key, OVS_KEY_ATTR_SKB_MARK, mark); if (mask) { nl_msg_put_u32(mask, OVS_KEY_ATTR_SKB_MARK, UINT32_MAX); } return n; } } { char tun_id_s[32]; int tos, tos_mask, ttl, ttl_mask; struct flow_tnl tun_key, tun_key_mask; unsigned long long tun_id_mask; int n = -1; if (mask && sscanf(s, "tunnel(tun_id=%31[x0123456789abcdefABCDEF]/%llx," "src="IP_SCAN_FMT"/"IP_SCAN_FMT",dst="IP_SCAN_FMT "/"IP_SCAN_FMT",tos=%i/%i,ttl=%i/%i,flags%n", tun_id_s, &tun_id_mask, IP_SCAN_ARGS(&tun_key.ip_src), IP_SCAN_ARGS(&tun_key_mask.ip_src), IP_SCAN_ARGS(&tun_key.ip_dst), IP_SCAN_ARGS(&tun_key_mask.ip_dst), &tos, &tos_mask, &ttl, &ttl_mask, &n) > 0 && n > 0) { int res; uint32_t flags; tun_key.tun_id = htonll(strtoull(tun_id_s, NULL, 0)); tun_key_mask.tun_id = htonll(tun_id_mask); tun_key.ip_tos = tos; tun_key_mask.ip_tos = tos_mask; tun_key.ip_ttl = ttl; tun_key_mask.ip_ttl = ttl_mask; res = parse_flags(&s[n], flow_tun_flag_to_string, &flags); tun_key.flags = flags; tun_key_mask.flags = UINT16_MAX; if (res < 0) { return res; } n += res; if (s[n] != ')') { return -EINVAL; } n++; tun_key_to_attr(key, &tun_key); if (mask) { tun_key_to_attr(mask, &tun_key_mask); } return n; } else if (sscanf(s, "tunnel(tun_id=%31[x0123456789abcdefABCDEF]," "src="IP_SCAN_FMT",dst="IP_SCAN_FMT ",tos=%i,ttl=%i,flags%n", tun_id_s, IP_SCAN_ARGS(&tun_key.ip_src), IP_SCAN_ARGS(&tun_key.ip_dst), &tos, &ttl, &n) > 0 && n > 0) { int res; uint32_t flags; tun_key.tun_id = htonll(strtoull(tun_id_s, NULL, 0)); tun_key.ip_tos = tos; tun_key.ip_ttl = ttl; res = parse_flags(&s[n], flow_tun_flag_to_string, &flags); tun_key.flags = flags; if (res < 0) { return res; } n += res; if (s[n] != ')') { return -EINVAL; } n++; tun_key_to_attr(key, &tun_key); if (mask) { memset(&tun_key, 0xff, sizeof tun_key); tun_key_to_attr(mask, &tun_key); } return n; } } { unsigned long long int in_port; unsigned long long int in_port_mask; int n = -1; if (mask && sscanf(s, "in_port(%lli/%lli)%n", &in_port, &in_port_mask, &n) > 0 && n > 0) { nl_msg_put_u32(key, OVS_KEY_ATTR_IN_PORT, in_port); nl_msg_put_u32(mask, OVS_KEY_ATTR_IN_PORT, in_port_mask); return n; } else if (sscanf(s, "in_port(%lli)%n", &in_port, &n) > 0 && n > 0) { nl_msg_put_u32(key, OVS_KEY_ATTR_IN_PORT, in_port); if (mask) { nl_msg_put_u32(mask, OVS_KEY_ATTR_IN_PORT, UINT32_MAX); } return n; } } if (port_names && !strncmp(s, "in_port(", 8)) { const char *name; const struct simap_node *node; int name_len; name = s + 8; name_len = strcspn(s, ")"); node = simap_find_len(port_names, name, name_len); if (node) { nl_msg_put_u32(key, OVS_KEY_ATTR_IN_PORT, node->data); if (mask) { nl_msg_put_u32(mask, OVS_KEY_ATTR_IN_PORT, UINT32_MAX); } return 8 + name_len + 1; } } { struct ovs_key_ethernet eth_key; struct ovs_key_ethernet eth_key_mask; int n = -1; if (mask && sscanf(s, "eth(src="ETH_ADDR_SCAN_FMT"/"ETH_ADDR_SCAN_FMT"," "dst="ETH_ADDR_SCAN_FMT"/"ETH_ADDR_SCAN_FMT")%n", ETH_ADDR_SCAN_ARGS(eth_key.eth_src), ETH_ADDR_SCAN_ARGS(eth_key_mask.eth_src), ETH_ADDR_SCAN_ARGS(eth_key.eth_dst), ETH_ADDR_SCAN_ARGS(eth_key_mask.eth_dst), &n) > 0 && n > 0) { nl_msg_put_unspec(key, OVS_KEY_ATTR_ETHERNET, ð_key, sizeof eth_key); nl_msg_put_unspec(mask, OVS_KEY_ATTR_ETHERNET, ð_key_mask, sizeof eth_key_mask); return n; } else if (sscanf(s, "eth(src="ETH_ADDR_SCAN_FMT",dst="ETH_ADDR_SCAN_FMT")%n", ETH_ADDR_SCAN_ARGS(eth_key.eth_src), ETH_ADDR_SCAN_ARGS(eth_key.eth_dst), &n) > 0 && n > 0) { nl_msg_put_unspec(key, OVS_KEY_ATTR_ETHERNET, ð_key, sizeof eth_key); if (mask) { memset(ð_key, 0xff, sizeof eth_key); nl_msg_put_unspec(mask, OVS_KEY_ATTR_ETHERNET, ð_key, sizeof eth_key); } return n; } } { uint16_t vid, vid_mask; int pcp, pcp_mask; int cfi, cfi_mask; int n = -1; if (mask && (sscanf(s, "vlan(vid=%"SCNi16"/%"SCNi16",pcp=%i/%i)%n", &vid, &vid_mask, &pcp, &pcp_mask, &n) > 0 && n > 0)) { nl_msg_put_be16(key, OVS_KEY_ATTR_VLAN, htons((vid << VLAN_VID_SHIFT) | (pcp << VLAN_PCP_SHIFT) | VLAN_CFI)); nl_msg_put_be16(mask, OVS_KEY_ATTR_VLAN, htons((vid_mask << VLAN_VID_SHIFT) | (pcp_mask << VLAN_PCP_SHIFT) | (1 << VLAN_CFI_SHIFT))); return n; } else if ((sscanf(s, "vlan(vid=%"SCNi16",pcp=%i)%n", &vid, &pcp, &n) > 0 && n > 0)) { nl_msg_put_be16(key, OVS_KEY_ATTR_VLAN, htons((vid << VLAN_VID_SHIFT) | (pcp << VLAN_PCP_SHIFT) | VLAN_CFI)); if (mask) { nl_msg_put_be16(mask, OVS_KEY_ATTR_VLAN, htons(UINT16_MAX)); } return n; } else if (mask && (sscanf(s, "vlan(vid=%"SCNi16"/%"SCNi16",pcp=%i/%i,cfi=%i/%i)%n", &vid, &vid_mask, &pcp, &pcp_mask, &cfi, &cfi_mask, &n) > 0 && n > 0)) { nl_msg_put_be16(key, OVS_KEY_ATTR_VLAN, htons((vid << VLAN_VID_SHIFT) | (pcp << VLAN_PCP_SHIFT) | (cfi ? VLAN_CFI : 0))); nl_msg_put_be16(mask, OVS_KEY_ATTR_VLAN, htons((vid_mask << VLAN_VID_SHIFT) | (pcp_mask << VLAN_PCP_SHIFT) | (cfi_mask << VLAN_CFI_SHIFT))); return n; } else if ((sscanf(s, "vlan(vid=%"SCNi16",pcp=%i,cfi=%i)%n", &vid, &pcp, &cfi, &n) > 0 && n > 0)) { nl_msg_put_be16(key, OVS_KEY_ATTR_VLAN, htons((vid << VLAN_VID_SHIFT) | (pcp << VLAN_PCP_SHIFT) | (cfi ? VLAN_CFI : 0))); if (mask) { nl_msg_put_be16(mask, OVS_KEY_ATTR_VLAN, htons(UINT16_MAX)); } return n; } } { int eth_type; int eth_type_mask; int n = -1; if (mask && sscanf(s, "eth_type(%i/%i)%n", ð_type, ð_type_mask, &n) > 0 && n > 0) { if (eth_type != 0) { nl_msg_put_be16(key, OVS_KEY_ATTR_ETHERTYPE, htons(eth_type)); } nl_msg_put_be16(mask, OVS_KEY_ATTR_ETHERTYPE, htons(eth_type_mask)); return n; } else if (sscanf(s, "eth_type(%i)%n", ð_type, &n) > 0 && n > 0) { nl_msg_put_be16(key, OVS_KEY_ATTR_ETHERTYPE, htons(eth_type)); if (mask) { nl_msg_put_be16(mask, OVS_KEY_ATTR_ETHERTYPE, htons(UINT16_MAX)); } return n; } } { int label, tc, ttl, bos; int label_mask, tc_mask, ttl_mask, bos_mask; int n = -1; if (mask && sscanf(s, "mpls(label=%"SCNi32"/%"SCNi32",tc=%i/%i,ttl=%i/%i,bos=%i/%i)%n", &label, &label_mask, &tc, &tc_mask, &ttl, &ttl_mask, &bos, &bos_mask, &n) > 0 && n > 0) { struct ovs_key_mpls *mpls, *mpls_mask; mpls = nl_msg_put_unspec_uninit(key, OVS_KEY_ATTR_MPLS, sizeof *mpls); mpls->mpls_lse = mpls_lse_from_components(label, tc, ttl, bos); mpls_mask = nl_msg_put_unspec_uninit(mask, OVS_KEY_ATTR_MPLS, sizeof *mpls_mask); mpls_mask->mpls_lse = mpls_lse_from_components( label_mask, tc_mask, ttl_mask, bos_mask); return n; } else if (sscanf(s, "mpls(label=%"SCNi32",tc=%i,ttl=%i,bos=%i)%n", &label, &tc, &ttl, &bos, &n) > 0 && n > 0) { struct ovs_key_mpls *mpls; mpls = nl_msg_put_unspec_uninit(key, OVS_KEY_ATTR_MPLS, sizeof *mpls); mpls->mpls_lse = mpls_lse_from_components(label, tc, ttl, bos); if (mask) { mpls = nl_msg_put_unspec_uninit(mask, OVS_KEY_ATTR_MPLS, sizeof *mpls); mpls->mpls_lse = htonl(UINT32_MAX); } return n; } } { ovs_be32 ipv4_src, ipv4_src_mask; ovs_be32 ipv4_dst, ipv4_dst_mask; int ipv4_proto, ipv4_proto_mask; int ipv4_tos, ipv4_tos_mask; int ipv4_ttl, ipv4_ttl_mask; char frag[8]; int ipv4_frag_mask; enum ovs_frag_type ipv4_frag; int n = -1; if (mask && sscanf(s, "ipv4(src="IP_SCAN_FMT"/"IP_SCAN_FMT"," "dst="IP_SCAN_FMT"/"IP_SCAN_FMT"," "proto=%i/%i,tos=%i/%i,ttl=%i/%i," "frag=%7[a-z]/%i)%n", IP_SCAN_ARGS(&ipv4_src), IP_SCAN_ARGS(&ipv4_src_mask), IP_SCAN_ARGS(&ipv4_dst), IP_SCAN_ARGS(&ipv4_dst_mask), &ipv4_proto, &ipv4_proto_mask, &ipv4_tos, &ipv4_tos_mask, &ipv4_ttl, &ipv4_ttl_mask, frag, &ipv4_frag_mask, &n) > 0 && n > 0 && ovs_frag_type_from_string(frag, &ipv4_frag)) { struct ovs_key_ipv4 ipv4_key; struct ovs_key_ipv4 ipv4_mask; ipv4_key.ipv4_src = ipv4_src; ipv4_key.ipv4_dst = ipv4_dst; ipv4_key.ipv4_proto = ipv4_proto; ipv4_key.ipv4_tos = ipv4_tos; ipv4_key.ipv4_ttl = ipv4_ttl; ipv4_key.ipv4_frag = ipv4_frag; nl_msg_put_unspec(key, OVS_KEY_ATTR_IPV4, &ipv4_key, sizeof ipv4_key); ipv4_mask.ipv4_src = ipv4_src_mask; ipv4_mask.ipv4_dst = ipv4_dst_mask; ipv4_mask.ipv4_proto = ipv4_proto_mask; ipv4_mask.ipv4_tos = ipv4_tos_mask; ipv4_mask.ipv4_ttl = ipv4_ttl_mask; ipv4_mask.ipv4_frag = ipv4_frag_mask; nl_msg_put_unspec(mask, OVS_KEY_ATTR_IPV4, &ipv4_mask, sizeof ipv4_mask); return n; } else if (sscanf(s, "ipv4(src="IP_SCAN_FMT",dst="IP_SCAN_FMT"," "proto=%i,tos=%i,ttl=%i,frag=%7[a-z])%n", IP_SCAN_ARGS(&ipv4_src), IP_SCAN_ARGS(&ipv4_dst), &ipv4_proto, &ipv4_tos, &ipv4_ttl, frag, &n) > 0 && n > 0 && ovs_frag_type_from_string(frag, &ipv4_frag)) { struct ovs_key_ipv4 ipv4_key; ipv4_key.ipv4_src = ipv4_src; ipv4_key.ipv4_dst = ipv4_dst; ipv4_key.ipv4_proto = ipv4_proto; ipv4_key.ipv4_tos = ipv4_tos; ipv4_key.ipv4_ttl = ipv4_ttl; ipv4_key.ipv4_frag = ipv4_frag; nl_msg_put_unspec(key, OVS_KEY_ATTR_IPV4, &ipv4_key, sizeof ipv4_key); if (mask) { memset(&ipv4_key, 0xff, sizeof ipv4_key); nl_msg_put_unspec(mask, OVS_KEY_ATTR_IPV4, &ipv4_key, sizeof ipv4_key); } return n; } } { char ipv6_src_s[IPV6_SCAN_LEN + 1]; char ipv6_src_mask_s[IPV6_SCAN_LEN + 1]; char ipv6_dst_s[IPV6_SCAN_LEN + 1]; char ipv6_dst_mask_s[IPV6_SCAN_LEN + 1]; int ipv6_label, ipv6_label_mask; int ipv6_proto, ipv6_proto_mask; int ipv6_tclass, ipv6_tclass_mask; int ipv6_hlimit, ipv6_hlimit_mask; char frag[8]; enum ovs_frag_type ipv6_frag; int ipv6_frag_mask; int n = -1; if (mask && sscanf(s, "ipv6(src="IPV6_SCAN_FMT"/"IPV6_SCAN_FMT",dst=" IPV6_SCAN_FMT"/"IPV6_SCAN_FMT"," "label=%i/%i,proto=%i/%i,tclass=%i/%i," "hlimit=%i/%i,frag=%7[a-z]/%i)%n", ipv6_src_s, ipv6_src_mask_s, ipv6_dst_s, ipv6_dst_mask_s, &ipv6_label, &ipv6_label_mask, &ipv6_proto, &ipv6_proto_mask, &ipv6_tclass, &ipv6_tclass_mask, &ipv6_hlimit, &ipv6_hlimit_mask, frag, &ipv6_frag_mask, &n) > 0 && n > 0 && ovs_frag_type_from_string(frag, &ipv6_frag)) { struct ovs_key_ipv6 ipv6_key; struct ovs_key_ipv6 ipv6_mask; if (inet_pton(AF_INET6, ipv6_src_s, &ipv6_key.ipv6_src) != 1 || inet_pton(AF_INET6, ipv6_dst_s, &ipv6_key.ipv6_dst) != 1 || inet_pton(AF_INET6, ipv6_src_mask_s, &ipv6_mask.ipv6_src) != 1 || inet_pton(AF_INET6, ipv6_dst_mask_s, &ipv6_mask.ipv6_dst) != 1) { return -EINVAL; } ipv6_key.ipv6_label = htonl(ipv6_label); ipv6_key.ipv6_proto = ipv6_proto; ipv6_key.ipv6_tclass = ipv6_tclass; ipv6_key.ipv6_hlimit = ipv6_hlimit; ipv6_key.ipv6_frag = ipv6_frag; nl_msg_put_unspec(key, OVS_KEY_ATTR_IPV6, &ipv6_key, sizeof ipv6_key); ipv6_mask.ipv6_label = htonl(ipv6_label_mask); ipv6_mask.ipv6_proto = ipv6_proto_mask; ipv6_mask.ipv6_tclass = ipv6_tclass_mask; ipv6_mask.ipv6_hlimit = ipv6_hlimit_mask; ipv6_mask.ipv6_frag = ipv6_frag_mask; nl_msg_put_unspec(mask, OVS_KEY_ATTR_IPV6, &ipv6_mask, sizeof ipv6_mask); return n; } else if (sscanf(s, "ipv6(src="IPV6_SCAN_FMT",dst="IPV6_SCAN_FMT"," "label=%i,proto=%i,tclass=%i,hlimit=%i,frag=%7[a-z])%n", ipv6_src_s, ipv6_dst_s, &ipv6_label, &ipv6_proto, &ipv6_tclass, &ipv6_hlimit, frag, &n) > 0 && n > 0 && ovs_frag_type_from_string(frag, &ipv6_frag)) { struct ovs_key_ipv6 ipv6_key; if (inet_pton(AF_INET6, ipv6_src_s, &ipv6_key.ipv6_src) != 1 || inet_pton(AF_INET6, ipv6_dst_s, &ipv6_key.ipv6_dst) != 1) { return -EINVAL; } ipv6_key.ipv6_label = htonl(ipv6_label); ipv6_key.ipv6_proto = ipv6_proto; ipv6_key.ipv6_tclass = ipv6_tclass; ipv6_key.ipv6_hlimit = ipv6_hlimit; ipv6_key.ipv6_frag = ipv6_frag; nl_msg_put_unspec(key, OVS_KEY_ATTR_IPV6, &ipv6_key, sizeof ipv6_key); if (mask) { memset(&ipv6_key, 0xff, sizeof ipv6_key); nl_msg_put_unspec(mask, OVS_KEY_ATTR_IPV6, &ipv6_key, sizeof ipv6_key); } return n; } } { int tcp_src; int tcp_dst; int tcp_src_mask; int tcp_dst_mask; int n = -1; if (mask && sscanf(s, "tcp(src=%i/%i,dst=%i/%i)%n", &tcp_src, &tcp_src_mask, &tcp_dst, &tcp_dst_mask, &n) > 0 && n > 0) { struct ovs_key_tcp tcp_key; struct ovs_key_tcp tcp_mask; tcp_key.tcp_src = htons(tcp_src); tcp_key.tcp_dst = htons(tcp_dst); nl_msg_put_unspec(key, OVS_KEY_ATTR_TCP, &tcp_key, sizeof tcp_key); tcp_mask.tcp_src = htons(tcp_src_mask); tcp_mask.tcp_dst = htons(tcp_dst_mask); nl_msg_put_unspec(mask, OVS_KEY_ATTR_TCP, &tcp_mask, sizeof tcp_mask); return n; } else if (sscanf(s, "tcp(src=%i,dst=%i)%n",&tcp_src, &tcp_dst, &n) > 0 && n > 0) { struct ovs_key_tcp tcp_key; tcp_key.tcp_src = htons(tcp_src); tcp_key.tcp_dst = htons(tcp_dst); nl_msg_put_unspec(key, OVS_KEY_ATTR_TCP, &tcp_key, sizeof tcp_key); if (mask) { memset(&tcp_key, 0xff, sizeof tcp_key); nl_msg_put_unspec(mask, OVS_KEY_ATTR_TCP, &tcp_key, sizeof tcp_key); } return n; } } { int udp_src; int udp_dst; int udp_src_mask; int udp_dst_mask; int n = -1; if (mask && sscanf(s, "udp(src=%i/%i,dst=%i/%i)%n", &udp_src, &udp_src_mask, &udp_dst, &udp_dst_mask, &n) > 0 && n > 0) { struct ovs_key_udp udp_key; struct ovs_key_udp udp_mask; udp_key.udp_src = htons(udp_src); udp_key.udp_dst = htons(udp_dst); nl_msg_put_unspec(key, OVS_KEY_ATTR_UDP, &udp_key, sizeof udp_key); udp_mask.udp_src = htons(udp_src_mask); udp_mask.udp_dst = htons(udp_dst_mask); nl_msg_put_unspec(mask, OVS_KEY_ATTR_UDP, &udp_mask, sizeof udp_mask); return n; } if (sscanf(s, "udp(src=%i,dst=%i)%n", &udp_src, &udp_dst, &n) > 0 && n > 0) { struct ovs_key_udp udp_key; udp_key.udp_src = htons(udp_src); udp_key.udp_dst = htons(udp_dst); nl_msg_put_unspec(key, OVS_KEY_ATTR_UDP, &udp_key, sizeof udp_key); if (mask) { memset(&udp_key, 0xff, sizeof udp_key); nl_msg_put_unspec(mask, OVS_KEY_ATTR_UDP, &udp_key, sizeof udp_key); } return n; } } { int sctp_src; int sctp_dst; int sctp_src_mask; int sctp_dst_mask; int n = -1; if (mask && sscanf(s, "sctp(src=%i/%i,dst=%i/%i)%n", &sctp_src, &sctp_src_mask, &sctp_dst, &sctp_dst_mask, &n) > 0 && n > 0) { struct ovs_key_sctp sctp_key; struct ovs_key_sctp sctp_mask; sctp_key.sctp_src = htons(sctp_src); sctp_key.sctp_dst = htons(sctp_dst); nl_msg_put_unspec(key, OVS_KEY_ATTR_SCTP, &sctp_key, sizeof sctp_key); sctp_mask.sctp_src = htons(sctp_src_mask); sctp_mask.sctp_dst = htons(sctp_dst_mask); nl_msg_put_unspec(mask, OVS_KEY_ATTR_SCTP, &sctp_mask, sizeof sctp_mask); return n; } if (sscanf(s, "sctp(src=%i,dst=%i)%n", &sctp_src, &sctp_dst, &n) > 0 && n > 0) { struct ovs_key_sctp sctp_key; sctp_key.sctp_src = htons(sctp_src); sctp_key.sctp_dst = htons(sctp_dst); nl_msg_put_unspec(key, OVS_KEY_ATTR_SCTP, &sctp_key, sizeof sctp_key); if (mask) { memset(&sctp_key, 0xff, sizeof sctp_key); nl_msg_put_unspec(mask, OVS_KEY_ATTR_SCTP, &sctp_key, sizeof sctp_key); } return n; } } { int icmp_type; int icmp_code; int icmp_type_mask; int icmp_code_mask; int n = -1; if (mask && sscanf(s, "icmp(type=%i/%i,code=%i/%i)%n", &icmp_type, &icmp_type_mask, &icmp_code, &icmp_code_mask, &n) > 0 && n > 0) { struct ovs_key_icmp icmp_key; struct ovs_key_icmp icmp_mask; icmp_key.icmp_type = icmp_type; icmp_key.icmp_code = icmp_code; nl_msg_put_unspec(key, OVS_KEY_ATTR_ICMP, &icmp_key, sizeof icmp_key); icmp_mask.icmp_type = icmp_type_mask; icmp_mask.icmp_code = icmp_code_mask; nl_msg_put_unspec(mask, OVS_KEY_ATTR_ICMP, &icmp_mask, sizeof icmp_mask); return n; } else if (sscanf(s, "icmp(type=%i,code=%i)%n", &icmp_type, &icmp_code, &n) > 0 && n > 0) { struct ovs_key_icmp icmp_key; icmp_key.icmp_type = icmp_type; icmp_key.icmp_code = icmp_code; nl_msg_put_unspec(key, OVS_KEY_ATTR_ICMP, &icmp_key, sizeof icmp_key); if (mask) { memset(&icmp_key, 0xff, sizeof icmp_key); nl_msg_put_unspec(mask, OVS_KEY_ATTR_ICMP, &icmp_key, sizeof icmp_key); } return n; } } { struct ovs_key_icmpv6 icmpv6_key; struct ovs_key_icmpv6 icmpv6_mask; int icmpv6_type_mask; int icmpv6_code_mask; int n = -1; if (mask && sscanf(s, "icmpv6(type=%"SCNi8"/%i,code=%"SCNi8"/%i)%n", &icmpv6_key.icmpv6_type, &icmpv6_type_mask, &icmpv6_key.icmpv6_code, &icmpv6_code_mask, &n) > 0 && n > 0) { nl_msg_put_unspec(key, OVS_KEY_ATTR_ICMPV6, &icmpv6_key, sizeof icmpv6_key); icmpv6_mask.icmpv6_type = icmpv6_type_mask; icmpv6_mask.icmpv6_code = icmpv6_code_mask; nl_msg_put_unspec(mask, OVS_KEY_ATTR_ICMPV6, &icmpv6_mask, sizeof icmpv6_mask); return n; } else if (sscanf(s, "icmpv6(type=%"SCNi8",code=%"SCNi8")%n", &icmpv6_key.icmpv6_type, &icmpv6_key.icmpv6_code,&n) > 0 && n > 0) { nl_msg_put_unspec(key, OVS_KEY_ATTR_ICMPV6, &icmpv6_key, sizeof icmpv6_key); if (mask) { memset(&icmpv6_key, 0xff, sizeof icmpv6_key); nl_msg_put_unspec(mask, OVS_KEY_ATTR_ICMPV6, &icmpv6_key, sizeof icmpv6_key); } return n; } } { ovs_be32 arp_sip, arp_sip_mask; ovs_be32 arp_tip, arp_tip_mask; int arp_op, arp_op_mask; uint8_t arp_sha[ETH_ADDR_LEN]; uint8_t arp_sha_mask[ETH_ADDR_LEN]; uint8_t arp_tha[ETH_ADDR_LEN]; uint8_t arp_tha_mask[ETH_ADDR_LEN]; int n = -1; if (mask && sscanf(s, "arp(sip="IP_SCAN_FMT"/"IP_SCAN_FMT"," "tip="IP_SCAN_FMT"/"IP_SCAN_FMT"," "op=%i/%i,sha="ETH_ADDR_SCAN_FMT"/"ETH_ADDR_SCAN_FMT"," "tha="ETH_ADDR_SCAN_FMT"/"ETH_ADDR_SCAN_FMT")%n", IP_SCAN_ARGS(&arp_sip), IP_SCAN_ARGS(&arp_sip_mask), IP_SCAN_ARGS(&arp_tip), IP_SCAN_ARGS(&arp_tip_mask), &arp_op, &arp_op_mask, ETH_ADDR_SCAN_ARGS(arp_sha), ETH_ADDR_SCAN_ARGS(arp_sha_mask), ETH_ADDR_SCAN_ARGS(arp_tha), ETH_ADDR_SCAN_ARGS(arp_tha_mask), &n) > 0 && n > 0) { struct ovs_key_arp arp_key; struct ovs_key_arp arp_mask; memset(&arp_key, 0, sizeof arp_key); arp_key.arp_sip = arp_sip; arp_key.arp_tip = arp_tip; arp_key.arp_op = htons(arp_op); memcpy(arp_key.arp_sha, arp_sha, ETH_ADDR_LEN); memcpy(arp_key.arp_tha, arp_tha, ETH_ADDR_LEN); nl_msg_put_unspec(key, OVS_KEY_ATTR_ARP, &arp_key, sizeof arp_key); arp_mask.arp_sip = arp_sip_mask; arp_mask.arp_tip = arp_tip_mask; arp_mask.arp_op = htons(arp_op_mask); memcpy(arp_mask.arp_sha, arp_sha_mask, ETH_ADDR_LEN); memcpy(arp_mask.arp_tha, arp_tha_mask, ETH_ADDR_LEN); nl_msg_put_unspec(mask, OVS_KEY_ATTR_ARP, &arp_mask, sizeof arp_mask); return n; } else if (sscanf(s, "arp(sip="IP_SCAN_FMT",tip="IP_SCAN_FMT"," "op=%i,sha="ETH_ADDR_SCAN_FMT",tha="ETH_ADDR_SCAN_FMT")%n", IP_SCAN_ARGS(&arp_sip), IP_SCAN_ARGS(&arp_tip), &arp_op, ETH_ADDR_SCAN_ARGS(arp_sha), ETH_ADDR_SCAN_ARGS(arp_tha), &n) > 0 && n > 0) { struct ovs_key_arp arp_key; memset(&arp_key, 0, sizeof arp_key); arp_key.arp_sip = arp_sip; arp_key.arp_tip = arp_tip; arp_key.arp_op = htons(arp_op); memcpy(arp_key.arp_sha, arp_sha, ETH_ADDR_LEN); memcpy(arp_key.arp_tha, arp_tha, ETH_ADDR_LEN); nl_msg_put_unspec(key, OVS_KEY_ATTR_ARP, &arp_key, sizeof arp_key); if (mask) { memset(&arp_key, 0xff, sizeof arp_key); nl_msg_put_unspec(mask, OVS_KEY_ATTR_ARP, &arp_key, sizeof arp_key); } return n; } } { char nd_target_s[IPV6_SCAN_LEN + 1]; char nd_target_mask_s[IPV6_SCAN_LEN + 1]; uint8_t nd_sll[ETH_ADDR_LEN]; uint8_t nd_sll_mask[ETH_ADDR_LEN]; uint8_t nd_tll[ETH_ADDR_LEN]; uint8_t nd_tll_mask[ETH_ADDR_LEN]; int n = -1; nd_target_mask_s[0] = 0; memset(nd_sll_mask, 0xff, sizeof nd_sll_mask); memset(nd_tll_mask, 0xff, sizeof nd_tll_mask); if (mask && sscanf(s, "nd(target="IPV6_SCAN_FMT"/"IPV6_SCAN_FMT")%n", nd_target_s, nd_target_mask_s, &n) > 0 && n > 0) { put_nd_key(n, nd_target_s, NULL, NULL, key); put_nd_mask(n, nd_target_mask_s, NULL, NULL, mask); } else if (sscanf(s, "nd(target="IPV6_SCAN_FMT")%n", nd_target_s, &n) > 0 && n > 0) { put_nd_key(n, nd_target_s, NULL, NULL, key); if (mask) { put_nd_mask(n, nd_target_mask_s, NULL, NULL, mask); } } else if (mask && sscanf(s, "nd(target="IPV6_SCAN_FMT"/"IPV6_SCAN_FMT ",sll="ETH_ADDR_SCAN_FMT"/"ETH_ADDR_SCAN_FMT")%n", nd_target_s, nd_target_mask_s, ETH_ADDR_SCAN_ARGS(nd_sll), ETH_ADDR_SCAN_ARGS(nd_sll_mask), &n) > 0 && n > 0) { put_nd_key(n, nd_target_s, nd_sll, NULL, key); put_nd_mask(n, nd_target_mask_s, nd_sll_mask, NULL, mask); } else if (sscanf(s, "nd(target="IPV6_SCAN_FMT",sll="ETH_ADDR_SCAN_FMT")%n", nd_target_s, ETH_ADDR_SCAN_ARGS(nd_sll), &n) > 0 && n > 0) { put_nd_key(n, nd_target_s, nd_sll, NULL, key); if (mask) { put_nd_mask(n, nd_target_mask_s, nd_sll_mask, NULL, mask); } } else if (mask && sscanf(s, "nd(target="IPV6_SCAN_FMT"/"IPV6_SCAN_FMT ",tll="ETH_ADDR_SCAN_FMT"/"ETH_ADDR_SCAN_FMT")%n", nd_target_s, nd_target_mask_s, ETH_ADDR_SCAN_ARGS(nd_tll), ETH_ADDR_SCAN_ARGS(nd_tll_mask), &n) > 0 && n > 0) { put_nd_key(n, nd_target_s, NULL, nd_tll, key); put_nd_mask(n, nd_target_mask_s, NULL, nd_tll_mask, mask); } else if (sscanf(s, "nd(target="IPV6_SCAN_FMT",tll="ETH_ADDR_SCAN_FMT")%n", nd_target_s, ETH_ADDR_SCAN_ARGS(nd_tll), &n) > 0 && n > 0) { put_nd_key(n, nd_target_s, NULL, nd_tll, key); if (mask) { put_nd_mask(n, nd_target_mask_s, NULL, nd_tll_mask, mask); } } else if (mask && sscanf(s, "nd(target="IPV6_SCAN_FMT"/"IPV6_SCAN_FMT ",sll="ETH_ADDR_SCAN_FMT"/"ETH_ADDR_SCAN_FMT"," "tll="ETH_ADDR_SCAN_FMT"/"ETH_ADDR_SCAN_FMT")%n", nd_target_s, nd_target_mask_s, ETH_ADDR_SCAN_ARGS(nd_sll), ETH_ADDR_SCAN_ARGS(nd_sll_mask), ETH_ADDR_SCAN_ARGS(nd_tll), ETH_ADDR_SCAN_ARGS(nd_tll_mask), &n) > 0 && n > 0) { put_nd_key(n, nd_target_s, nd_sll, nd_tll, key); put_nd_mask(n, nd_target_mask_s, nd_sll_mask, nd_tll_mask, mask); } else if (sscanf(s, "nd(target="IPV6_SCAN_FMT",sll="ETH_ADDR_SCAN_FMT"," "tll="ETH_ADDR_SCAN_FMT")%n", nd_target_s, ETH_ADDR_SCAN_ARGS(nd_sll), ETH_ADDR_SCAN_ARGS(nd_tll), &n) > 0 && n > 0) { put_nd_key(n, nd_target_s, nd_sll, nd_tll, key); if (mask) { put_nd_mask(n, nd_target_mask_s, nd_sll_mask, nd_tll_mask, mask); } } if (n != -1) return n; } if (!strncmp(s, "encap(", 6)) { const char *start = s; size_t encap, encap_mask = 0; encap = nl_msg_start_nested(key, OVS_KEY_ATTR_ENCAP); if (mask) { encap_mask = nl_msg_start_nested(mask, OVS_KEY_ATTR_ENCAP); } s += 6; for (;;) { int retval; s += strspn(s, ", \t\r\n"); if (!*s) { return -EINVAL; } else if (*s == ')') { break; } retval = parse_odp_key_mask_attr(s, port_names, key, mask); if (retval < 0) { return retval; } s += retval; } s++; nl_msg_end_nested(key, encap); if (mask) { nl_msg_end_nested(mask, encap_mask); } return s - start; } return -EINVAL; } /* Parses the string representation of a datapath flow key, in the * format output by odp_flow_key_format(). Returns 0 if successful, * otherwise a positive errno value. On success, the flow key is * appended to 'key' as a series of Netlink attributes. On failure, no * data is appended to 'key'. Either way, 'key''s data might be * reallocated. * * If 'port_names' is nonnull, it points to an simap that maps from a port name * to a port number. (Port names may be used instead of port numbers in * in_port.) * * On success, the attributes appended to 'key' are individually syntactically * valid, but they may not be valid as a sequence. 'key' might, for example, * have duplicated keys. odp_flow_key_to_flow() will detect those errors. */ int odp_flow_from_string(const char *s, const struct simap *port_names, struct ofpbuf *key, struct ofpbuf *mask) { const size_t old_size = key->size; for (;;) { int retval; s += strspn(s, delimiters); if (!*s) { return 0; } retval = parse_odp_key_mask_attr(s, port_names, key, mask); if (retval < 0) { key->size = old_size; return -retval; } s += retval; } return 0; } static uint8_t ovs_to_odp_frag(uint8_t nw_frag) { return (nw_frag == 0 ? OVS_FRAG_TYPE_NONE : nw_frag == FLOW_NW_FRAG_ANY ? OVS_FRAG_TYPE_FIRST : OVS_FRAG_TYPE_LATER); } static uint8_t ovs_to_odp_frag_mask(uint8_t nw_frag_mask) { uint8_t frag_mask = ~(OVS_FRAG_TYPE_FIRST | OVS_FRAG_TYPE_LATER); frag_mask |= (nw_frag_mask & FLOW_NW_FRAG_ANY) ? OVS_FRAG_TYPE_FIRST : 0; frag_mask |= (nw_frag_mask & FLOW_NW_FRAG_LATER) ? OVS_FRAG_TYPE_LATER : 0; return frag_mask; } static void odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data, const struct flow *flow, odp_port_t odp_in_port) { bool is_mask; struct ovs_key_ethernet *eth_key; size_t encap; /* We assume that if 'data' and 'flow' are not the same, we should * treat 'data' as a mask. */ is_mask = (data != flow); nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, data->skb_priority); if (flow->tunnel.ip_dst || is_mask) { tun_key_to_attr(buf, &data->tunnel); } nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, data->pkt_mark); /* Add an ingress port attribute if this is a mask or 'odp_in_port' * is not the magical value "ODPP_NONE". */ if (is_mask || odp_in_port != ODPP_NONE) { nl_msg_put_odp_port(buf, OVS_KEY_ATTR_IN_PORT, odp_in_port); } eth_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ETHERNET, sizeof *eth_key); memcpy(eth_key->eth_src, data->dl_src, ETH_ADDR_LEN); memcpy(eth_key->eth_dst, data->dl_dst, ETH_ADDR_LEN); if (flow->vlan_tci != htons(0) || flow->dl_type == htons(ETH_TYPE_VLAN)) { if (is_mask) { nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, htons(UINT16_MAX)); } else { nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_TYPE_VLAN)); } nl_msg_put_be16(buf, OVS_KEY_ATTR_VLAN, data->vlan_tci); encap = nl_msg_start_nested(buf, OVS_KEY_ATTR_ENCAP); if (flow->vlan_tci == htons(0)) { goto unencap; } } else { encap = 0; } if (ntohs(flow->dl_type) < ETH_TYPE_MIN) { /* For backwards compatibility with kernels that don't support * wildcarding, the following convention is used to encode the * OVS_KEY_ATTR_ETHERTYPE for key and mask: * * key mask matches * -------- -------- ------- * >0x5ff 0xffff Specified Ethernet II Ethertype. * >0x5ff 0 Any Ethernet II or non-Ethernet II frame. * 0xffff Any non-Ethernet II frame (except valid * 802.3 SNAP packet with valid eth_type). */ if (is_mask) { nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, htons(UINT16_MAX)); } goto unencap; } nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, data->dl_type); if (flow->dl_type == htons(ETH_TYPE_IP)) { struct ovs_key_ipv4 *ipv4_key; ipv4_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_IPV4, sizeof *ipv4_key); ipv4_key->ipv4_src = data->nw_src; ipv4_key->ipv4_dst = data->nw_dst; ipv4_key->ipv4_proto = data->nw_proto; ipv4_key->ipv4_tos = data->nw_tos; ipv4_key->ipv4_ttl = data->nw_ttl; ipv4_key->ipv4_frag = is_mask ? ovs_to_odp_frag_mask(data->nw_frag) : ovs_to_odp_frag(data->nw_frag); } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) { struct ovs_key_ipv6 *ipv6_key; ipv6_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_IPV6, sizeof *ipv6_key); memcpy(ipv6_key->ipv6_src, &data->ipv6_src, sizeof ipv6_key->ipv6_src); memcpy(ipv6_key->ipv6_dst, &data->ipv6_dst, sizeof ipv6_key->ipv6_dst); ipv6_key->ipv6_label = data->ipv6_label; ipv6_key->ipv6_proto = data->nw_proto; ipv6_key->ipv6_tclass = data->nw_tos; ipv6_key->ipv6_hlimit = data->nw_ttl; ipv6_key->ipv6_frag = is_mask ? ovs_to_odp_frag_mask(data->nw_frag) : ovs_to_odp_frag(data->nw_frag); } else if (flow->dl_type == htons(ETH_TYPE_ARP) || flow->dl_type == htons(ETH_TYPE_RARP)) { struct ovs_key_arp *arp_key; arp_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ARP, sizeof *arp_key); memset(arp_key, 0, sizeof *arp_key); arp_key->arp_sip = data->nw_src; arp_key->arp_tip = data->nw_dst; arp_key->arp_op = htons(data->nw_proto); memcpy(arp_key->arp_sha, data->arp_sha, ETH_ADDR_LEN); memcpy(arp_key->arp_tha, data->arp_tha, ETH_ADDR_LEN); } if (flow->mpls_depth) { struct ovs_key_mpls *mpls_key; mpls_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_MPLS, sizeof *mpls_key); mpls_key->mpls_lse = data->mpls_lse; } if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) { if (flow->nw_proto == IPPROTO_TCP) { struct ovs_key_tcp *tcp_key; tcp_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_TCP, sizeof *tcp_key); tcp_key->tcp_src = data->tp_src; tcp_key->tcp_dst = data->tp_dst; } else if (flow->nw_proto == IPPROTO_UDP) { struct ovs_key_udp *udp_key; udp_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_UDP, sizeof *udp_key); udp_key->udp_src = data->tp_src; udp_key->udp_dst = data->tp_dst; } else if (flow->nw_proto == IPPROTO_SCTP) { struct ovs_key_sctp *sctp_key; sctp_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_SCTP, sizeof *sctp_key); sctp_key->sctp_src = data->tp_src; sctp_key->sctp_dst = data->tp_dst; } else if (flow->dl_type == htons(ETH_TYPE_IP) && flow->nw_proto == IPPROTO_ICMP) { struct ovs_key_icmp *icmp_key; icmp_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ICMP, sizeof *icmp_key); icmp_key->icmp_type = ntohs(data->tp_src); icmp_key->icmp_code = ntohs(data->tp_dst); } else if (flow->dl_type == htons(ETH_TYPE_IPV6) && flow->nw_proto == IPPROTO_ICMPV6) { struct ovs_key_icmpv6 *icmpv6_key; icmpv6_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ICMPV6, sizeof *icmpv6_key); icmpv6_key->icmpv6_type = ntohs(data->tp_src); icmpv6_key->icmpv6_code = ntohs(data->tp_dst); if (flow->tp_dst == htons(0) && (flow->tp_src == htons(ND_NEIGHBOR_SOLICIT) || flow->tp_src == htons(ND_NEIGHBOR_ADVERT)) && (!is_mask || (data->tp_src == htons(0xffff) && data->tp_dst == htons(0xffff)))) { struct ovs_key_nd *nd_key; nd_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ND, sizeof *nd_key); memcpy(nd_key->nd_target, &data->nd_target, sizeof nd_key->nd_target); memcpy(nd_key->nd_sll, data->arp_sha, ETH_ADDR_LEN); memcpy(nd_key->nd_tll, data->arp_tha, ETH_ADDR_LEN); } } } unencap: if (encap) { nl_msg_end_nested(buf, encap); } } /* Appends a representation of 'flow' as OVS_KEY_ATTR_* attributes to 'buf'. * 'flow->in_port' is ignored (since it is likely to be an OpenFlow port * number rather than a datapath port number). Instead, if 'odp_in_port' * is anything other than ODPP_NONE, it is included in 'buf' as the input * port. * * 'buf' must have at least ODPUTIL_FLOW_KEY_BYTES bytes of space, or be * capable of being expanded to allow for that much space. */ void odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow, odp_port_t odp_in_port) { odp_flow_key_from_flow__(buf, flow, flow, odp_in_port); } /* Appends a representation of 'mask' as OVS_KEY_ATTR_* attributes to * 'buf'. 'flow' is used as a template to determine how to interpret * 'mask'. For example, the 'dl_type' of 'mask' describes the mask, but * it doesn't indicate whether the other fields should be interpreted as * ARP, IPv4, IPv6, etc. * * 'buf' must have at least ODPUTIL_FLOW_KEY_BYTES bytes of space, or be * capable of being expanded to allow for that much space. */ void odp_flow_key_from_mask(struct ofpbuf *buf, const struct flow *mask, const struct flow *flow, uint32_t odp_in_port_mask) { odp_flow_key_from_flow__(buf, mask, flow, u32_to_odp(odp_in_port_mask)); } uint32_t odp_flow_key_hash(const struct nlattr *key, size_t key_len) { BUILD_ASSERT_DECL(!(NLA_ALIGNTO % sizeof(uint32_t))); return hash_words(ALIGNED_CAST(const uint32_t *, key), key_len / sizeof(uint32_t), 0); } static void log_odp_key_attributes(struct vlog_rate_limit *rl, const char *title, uint64_t attrs, int out_of_range_attr, const struct nlattr *key, size_t key_len) { struct ds s; int i; if (VLOG_DROP_DBG(rl)) { return; } ds_init(&s); for (i = 0; i < 64; i++) { if (attrs & (UINT64_C(1) << i)) { char namebuf[OVS_KEY_ATTR_BUFSIZE]; ds_put_format(&s, " %s", ovs_key_attr_to_string(i, namebuf, sizeof namebuf)); } } if (out_of_range_attr) { ds_put_format(&s, " %d (and possibly others)", out_of_range_attr); } ds_put_cstr(&s, ": "); odp_flow_key_format(key, key_len, &s); VLOG_DBG("%s:%s", title, ds_cstr(&s)); ds_destroy(&s); } static bool odp_to_ovs_frag(uint8_t odp_frag, struct flow *flow) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); if (odp_frag > OVS_FRAG_TYPE_LATER) { VLOG_ERR_RL(&rl, "invalid frag %"PRIu8" in flow key", odp_frag); return false; } if (odp_frag != OVS_FRAG_TYPE_NONE) { flow->nw_frag |= FLOW_NW_FRAG_ANY; if (odp_frag == OVS_FRAG_TYPE_LATER) { flow->nw_frag |= FLOW_NW_FRAG_LATER; } } return true; } static bool parse_flow_nlattrs(const struct nlattr *key, size_t key_len, const struct nlattr *attrs[], uint64_t *present_attrsp, int *out_of_range_attrp) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); const struct nlattr *nla; uint64_t present_attrs; size_t left; BUILD_ASSERT(OVS_KEY_ATTR_MAX < CHAR_BIT * sizeof present_attrs); present_attrs = 0; *out_of_range_attrp = 0; NL_ATTR_FOR_EACH (nla, left, key, key_len) { uint16_t type = nl_attr_type(nla); size_t len = nl_attr_get_size(nla); int expected_len = odp_flow_key_attr_len(type); if (len != expected_len && expected_len >= 0) { char namebuf[OVS_KEY_ATTR_BUFSIZE]; VLOG_ERR_RL(&rl, "attribute %s has length %zu but should have " "length %d", ovs_key_attr_to_string(type, namebuf, sizeof namebuf), len, expected_len); return false; } if (type > OVS_KEY_ATTR_MAX) { *out_of_range_attrp = type; } else { if (present_attrs & (UINT64_C(1) << type)) { char namebuf[OVS_KEY_ATTR_BUFSIZE]; VLOG_ERR_RL(&rl, "duplicate %s attribute in flow key", ovs_key_attr_to_string(type, namebuf, sizeof namebuf)); return false; } present_attrs |= UINT64_C(1) << type; attrs[type] = nla; } } if (left) { VLOG_ERR_RL(&rl, "trailing garbage in flow key"); return false; } *present_attrsp = present_attrs; return true; } static enum odp_key_fitness check_expectations(uint64_t present_attrs, int out_of_range_attr, uint64_t expected_attrs, const struct nlattr *key, size_t key_len) { uint64_t missing_attrs; uint64_t extra_attrs; missing_attrs = expected_attrs & ~present_attrs; if (missing_attrs) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); log_odp_key_attributes(&rl, "expected but not present", missing_attrs, 0, key, key_len); return ODP_FIT_TOO_LITTLE; } extra_attrs = present_attrs & ~expected_attrs; if (extra_attrs || out_of_range_attr) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); log_odp_key_attributes(&rl, "present but not expected", extra_attrs, out_of_range_attr, key, key_len); return ODP_FIT_TOO_MUCH; } return ODP_FIT_PERFECT; } static bool parse_ethertype(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], uint64_t present_attrs, uint64_t *expected_attrs, struct flow *flow, const struct flow *src_flow) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); bool is_mask = flow != src_flow; if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE)) { flow->dl_type = nl_attr_get_be16(attrs[OVS_KEY_ATTR_ETHERTYPE]); if (!is_mask && ntohs(flow->dl_type) < ETH_TYPE_MIN) { VLOG_ERR_RL(&rl, "invalid Ethertype %"PRIu16" in flow key", ntohs(flow->dl_type)); return false; } if (is_mask && ntohs(src_flow->dl_type) < ETH_TYPE_MIN && flow->dl_type != htons(0xffff)) { return false; } *expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERTYPE; } else { if (!is_mask) { flow->dl_type = htons(FLOW_DL_TYPE_NONE); } else if (ntohs(src_flow->dl_type) < ETH_TYPE_MIN) { /* See comments in odp_flow_key_from_flow__(). */ VLOG_ERR_RL(&rl, "mask expected for non-Ethernet II frame"); return false; } } return true; } static enum odp_key_fitness parse_l2_5_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], uint64_t present_attrs, int out_of_range_attr, uint64_t expected_attrs, struct flow *flow, const struct nlattr *key, size_t key_len, const struct flow *src_flow) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); bool is_mask = src_flow != flow; const void *check_start = NULL; size_t check_len = 0; enum ovs_key_attr expected_bit = 0xff; if (eth_type_mpls(src_flow->dl_type)) { if (!is_mask) { expected_attrs |= (UINT64_C(1) << OVS_KEY_ATTR_MPLS); if (!(present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_MPLS))) { return ODP_FIT_TOO_LITTLE; } flow->mpls_lse = nl_attr_get_be32(attrs[OVS_KEY_ATTR_MPLS]); flow->mpls_depth++; } else if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_MPLS)) { flow->mpls_lse = nl_attr_get_be32(attrs[OVS_KEY_ATTR_MPLS]); if (flow->mpls_lse != 0 && flow->dl_type != htons(0xffff)) { return ODP_FIT_ERROR; } expected_attrs |= (UINT64_C(1) << OVS_KEY_ATTR_MPLS); if (flow->mpls_lse) { /* XXX Is this needed? */ flow->mpls_depth = 0xffff; } } goto done; } else if (src_flow->dl_type == htons(ETH_TYPE_IP)) { if (!is_mask) { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_IPV4; } if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV4)) { const struct ovs_key_ipv4 *ipv4_key; ipv4_key = nl_attr_get(attrs[OVS_KEY_ATTR_IPV4]); flow->nw_src = ipv4_key->ipv4_src; flow->nw_dst = ipv4_key->ipv4_dst; flow->nw_proto = ipv4_key->ipv4_proto; flow->nw_tos = ipv4_key->ipv4_tos; flow->nw_ttl = ipv4_key->ipv4_ttl; if (is_mask) { flow->nw_frag = ipv4_key->ipv4_frag; check_start = ipv4_key; check_len = sizeof *ipv4_key; expected_bit = OVS_KEY_ATTR_IPV4; } else if (!odp_to_ovs_frag(ipv4_key->ipv4_frag, flow)) { return ODP_FIT_ERROR; } } } else if (src_flow->dl_type == htons(ETH_TYPE_IPV6)) { if (!is_mask) { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_IPV6; } if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV6)) { const struct ovs_key_ipv6 *ipv6_key; ipv6_key = nl_attr_get(attrs[OVS_KEY_ATTR_IPV6]); memcpy(&flow->ipv6_src, ipv6_key->ipv6_src, sizeof flow->ipv6_src); memcpy(&flow->ipv6_dst, ipv6_key->ipv6_dst, sizeof flow->ipv6_dst); flow->ipv6_label = ipv6_key->ipv6_label; flow->nw_proto = ipv6_key->ipv6_proto; flow->nw_tos = ipv6_key->ipv6_tclass; flow->nw_ttl = ipv6_key->ipv6_hlimit; if (is_mask) { flow->nw_frag = ipv6_key->ipv6_frag; check_start = ipv6_key; check_len = sizeof *ipv6_key; expected_bit = OVS_KEY_ATTR_IPV6; } else if (!odp_to_ovs_frag(ipv6_key->ipv6_frag, flow)) { return ODP_FIT_ERROR; } } } else if (src_flow->dl_type == htons(ETH_TYPE_ARP) || src_flow->dl_type == htons(ETH_TYPE_RARP)) { if (!is_mask) { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ARP; } if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ARP)) { const struct ovs_key_arp *arp_key; arp_key = nl_attr_get(attrs[OVS_KEY_ATTR_ARP]); flow->nw_src = arp_key->arp_sip; flow->nw_dst = arp_key->arp_tip; if (!is_mask && (arp_key->arp_op & htons(0xff00))) { VLOG_ERR_RL(&rl, "unsupported ARP opcode %"PRIu16" in flow " "key", ntohs(arp_key->arp_op)); return ODP_FIT_ERROR; } flow->nw_proto = ntohs(arp_key->arp_op); memcpy(flow->arp_sha, arp_key->arp_sha, ETH_ADDR_LEN); memcpy(flow->arp_tha, arp_key->arp_tha, ETH_ADDR_LEN); if (is_mask) { check_start = arp_key; check_len = sizeof *arp_key; expected_bit = OVS_KEY_ATTR_ARP; } } } else { goto done; } if (is_mask) { if (!is_all_zeros(check_start, check_len) && flow->dl_type != htons(0xffff)) { return ODP_FIT_ERROR; } else { expected_attrs |= UINT64_C(1) << expected_bit; } } expected_bit = OVS_KEY_ATTR_UNSPEC; if (src_flow->nw_proto == IPPROTO_TCP && (src_flow->dl_type == htons(ETH_TYPE_IP) || src_flow->dl_type == htons(ETH_TYPE_IPV6)) && !(src_flow->nw_frag & FLOW_NW_FRAG_LATER)) { if (!is_mask) { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_TCP; } if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TCP)) { const struct ovs_key_tcp *tcp_key; tcp_key = nl_attr_get(attrs[OVS_KEY_ATTR_TCP]); flow->tp_src = tcp_key->tcp_src; flow->tp_dst = tcp_key->tcp_dst; expected_bit = OVS_KEY_ATTR_TCP; } } else if (src_flow->nw_proto == IPPROTO_UDP && (src_flow->dl_type == htons(ETH_TYPE_IP) || src_flow->dl_type == htons(ETH_TYPE_IPV6)) && !(src_flow->nw_frag & FLOW_NW_FRAG_LATER)) { if (!is_mask) { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_UDP; } if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_UDP)) { const struct ovs_key_udp *udp_key; udp_key = nl_attr_get(attrs[OVS_KEY_ATTR_UDP]); flow->tp_src = udp_key->udp_src; flow->tp_dst = udp_key->udp_dst; expected_bit = OVS_KEY_ATTR_UDP; } } else if (src_flow->nw_proto == IPPROTO_SCTP && (src_flow->dl_type == htons(ETH_TYPE_IP) || src_flow->dl_type == htons(ETH_TYPE_IPV6)) && !(src_flow->nw_frag & FLOW_NW_FRAG_LATER)) { if (!is_mask) { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_SCTP; } if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_SCTP)) { const struct ovs_key_sctp *sctp_key; sctp_key = nl_attr_get(attrs[OVS_KEY_ATTR_SCTP]); flow->tp_src = sctp_key->sctp_src; flow->tp_dst = sctp_key->sctp_dst; expected_bit = OVS_KEY_ATTR_SCTP; } } else if (src_flow->nw_proto == IPPROTO_ICMP && src_flow->dl_type == htons(ETH_TYPE_IP) && !(src_flow->nw_frag & FLOW_NW_FRAG_LATER)) { if (!is_mask) { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ICMP; } if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ICMP)) { const struct ovs_key_icmp *icmp_key; icmp_key = nl_attr_get(attrs[OVS_KEY_ATTR_ICMP]); flow->tp_src = htons(icmp_key->icmp_type); flow->tp_dst = htons(icmp_key->icmp_code); expected_bit = OVS_KEY_ATTR_ICMP; } } else if (src_flow->nw_proto == IPPROTO_ICMPV6 && src_flow->dl_type == htons(ETH_TYPE_IPV6) && !(src_flow->nw_frag & FLOW_NW_FRAG_LATER)) { if (!is_mask) { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ICMPV6; } if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ICMPV6)) { const struct ovs_key_icmpv6 *icmpv6_key; icmpv6_key = nl_attr_get(attrs[OVS_KEY_ATTR_ICMPV6]); flow->tp_src = htons(icmpv6_key->icmpv6_type); flow->tp_dst = htons(icmpv6_key->icmpv6_code); expected_bit = OVS_KEY_ATTR_ICMPV6; if (src_flow->tp_dst == htons(0) && (src_flow->tp_src == htons(ND_NEIGHBOR_SOLICIT) || src_flow->tp_src == htons(ND_NEIGHBOR_ADVERT))) { if (!is_mask) { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ND; } if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ND)) { const struct ovs_key_nd *nd_key; nd_key = nl_attr_get(attrs[OVS_KEY_ATTR_ND]); memcpy(&flow->nd_target, nd_key->nd_target, sizeof flow->nd_target); memcpy(flow->arp_sha, nd_key->nd_sll, ETH_ADDR_LEN); memcpy(flow->arp_tha, nd_key->nd_tll, ETH_ADDR_LEN); if (is_mask) { if (!is_all_zeros((const uint8_t *) nd_key, sizeof *nd_key) && (flow->tp_src != htons(0xffff) || flow->tp_dst != htons(0xffff))) { return ODP_FIT_ERROR; } else { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ND; } } } } } } if (is_mask && expected_bit != OVS_KEY_ATTR_UNSPEC) { if ((flow->tp_src || flow->tp_dst) && flow->nw_proto != 0xff) { return ODP_FIT_ERROR; } else { expected_attrs |= UINT64_C(1) << expected_bit; } } done: return check_expectations(present_attrs, out_of_range_attr, expected_attrs, key, key_len); } /* Parse 802.1Q header then encapsulated L3 attributes. */ static enum odp_key_fitness parse_8021q_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], uint64_t present_attrs, int out_of_range_attr, uint64_t expected_attrs, struct flow *flow, const struct nlattr *key, size_t key_len, const struct flow *src_flow) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); bool is_mask = src_flow != flow; const struct nlattr *encap = (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ENCAP) ? attrs[OVS_KEY_ATTR_ENCAP] : NULL); enum odp_key_fitness encap_fitness; enum odp_key_fitness fitness; ovs_be16 tci; /* Calculate fitness of outer attributes. */ if (!is_mask) { expected_attrs |= ((UINT64_C(1) << OVS_KEY_ATTR_VLAN) | (UINT64_C(1) << OVS_KEY_ATTR_ENCAP)); } else { if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_VLAN)) { expected_attrs |= (UINT64_C(1) << OVS_KEY_ATTR_VLAN); } if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ENCAP)) { expected_attrs |= (UINT64_C(1) << OVS_KEY_ATTR_ENCAP); } } fitness = check_expectations(present_attrs, out_of_range_attr, expected_attrs, key, key_len); /* Get the VLAN TCI value. */ if (!is_mask && !(present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_VLAN))) { return ODP_FIT_TOO_LITTLE; } else { tci = (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_VLAN) ? nl_attr_get_be16(attrs[OVS_KEY_ATTR_VLAN]) : htons(0)); if (!is_mask) { if (tci == htons(0)) { /* Corner case for a truncated 802.1Q header. */ if (fitness == ODP_FIT_PERFECT && nl_attr_get_size(encap)) { return ODP_FIT_TOO_MUCH; } return fitness; } else if (!(tci & htons(VLAN_CFI))) { VLOG_ERR_RL(&rl, "OVS_KEY_ATTR_VLAN 0x%04"PRIx16" is nonzero " "but CFI bit is not set", ntohs(tci)); return ODP_FIT_ERROR; } } /* Set vlan_tci. * Remove the TPID from dl_type since it's not the real Ethertype. */ flow->dl_type = htons(0); flow->vlan_tci = tci; } if (is_mask && !(present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ENCAP))) { return fitness; } /* Now parse the encapsulated attributes. */ if (!parse_flow_nlattrs(nl_attr_get(encap), nl_attr_get_size(encap), attrs, &present_attrs, &out_of_range_attr)) { return ODP_FIT_ERROR; } expected_attrs = 0; if (!parse_ethertype(attrs, present_attrs, &expected_attrs, flow, src_flow)) { return ODP_FIT_ERROR; } encap_fitness = parse_l2_5_onward(attrs, present_attrs, out_of_range_attr, expected_attrs, flow, key, key_len, src_flow); /* The overall fitness is the worse of the outer and inner attributes. */ return MAX(fitness, encap_fitness); } static enum odp_key_fitness odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, struct flow *flow, const struct flow *src_flow) { const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1]; uint64_t expected_attrs; uint64_t present_attrs; int out_of_range_attr; bool is_mask = src_flow != flow; memset(flow, 0, sizeof *flow); /* Parse attributes. */ if (!parse_flow_nlattrs(key, key_len, attrs, &present_attrs, &out_of_range_attr)) { return ODP_FIT_ERROR; } expected_attrs = 0; /* Metadata. */ if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_PRIORITY)) { flow->skb_priority = nl_attr_get_u32(attrs[OVS_KEY_ATTR_PRIORITY]); expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_PRIORITY; } if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_SKB_MARK)) { flow->pkt_mark = nl_attr_get_u32(attrs[OVS_KEY_ATTR_SKB_MARK]); expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_SKB_MARK; } if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TUNNEL)) { enum odp_key_fitness res; res = odp_tun_key_from_attr(attrs[OVS_KEY_ATTR_TUNNEL], &flow->tunnel); if (res == ODP_FIT_ERROR) { return ODP_FIT_ERROR; } else if (res == ODP_FIT_PERFECT) { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_TUNNEL; } } if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IN_PORT)) { flow->in_port.odp_port = nl_attr_get_odp_port(attrs[OVS_KEY_ATTR_IN_PORT]); expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_IN_PORT; } else if (!is_mask) { flow->in_port.odp_port = ODPP_NONE; } /* Ethernet header. */ if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_ETHERNET)) { const struct ovs_key_ethernet *eth_key; eth_key = nl_attr_get(attrs[OVS_KEY_ATTR_ETHERNET]); memcpy(flow->dl_src, eth_key->eth_src, ETH_ADDR_LEN); memcpy(flow->dl_dst, eth_key->eth_dst, ETH_ADDR_LEN); if (is_mask) { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET; } } if (!is_mask) { expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_ETHERNET; } /* Get Ethertype or 802.1Q TPID or FLOW_DL_TYPE_NONE. */ if (!parse_ethertype(attrs, present_attrs, &expected_attrs, flow, src_flow)) { return ODP_FIT_ERROR; } if ((is_mask && (src_flow->vlan_tci & htons(VLAN_CFI))) || (!is_mask && src_flow->dl_type == htons(ETH_TYPE_VLAN))) { return parse_8021q_onward(attrs, present_attrs, out_of_range_attr, expected_attrs, flow, key, key_len, src_flow); } if (is_mask) { flow->vlan_tci = htons(0xffff); if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_VLAN)) { flow->vlan_tci = nl_attr_get_be16(attrs[OVS_KEY_ATTR_VLAN]); expected_attrs |= (UINT64_C(1) << OVS_KEY_ATTR_VLAN); } } return parse_l2_5_onward(attrs, present_attrs, out_of_range_attr, expected_attrs, flow, key, key_len, src_flow); } /* Converts the 'key_len' bytes of OVS_KEY_ATTR_* attributes in 'key' to a flow * structure in 'flow'. Returns an ODP_FIT_* value that indicates how well * 'key' fits our expectations for what a flow key should contain. * * The 'in_port' will be the datapath's understanding of the port. The * caller will need to translate with odp_port_to_ofp_port() if the * OpenFlow port is needed. * * This function doesn't take the packet itself as an argument because none of * the currently understood OVS_KEY_ATTR_* attributes require it. Currently, * it is always possible to infer which additional attribute(s) should appear * by looking at the attributes for lower-level protocols, e.g. if the network * protocol in OVS_KEY_ATTR_IPV4 or OVS_KEY_ATTR_IPV6 is IPPROTO_TCP then we * know that a OVS_KEY_ATTR_TCP attribute must appear and that otherwise it * must be absent. */ enum odp_key_fitness odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, struct flow *flow) { return odp_flow_key_to_flow__(key, key_len, flow, flow); } /* Converts the 'key_len' bytes of OVS_KEY_ATTR_* attributes in 'key' to a mask * structure in 'mask'. 'flow' must be a previously translated flow * corresponding to 'mask'. Returns an ODP_FIT_* value that indicates how well * 'key' fits our expectations for what a flow key should contain. */ enum odp_key_fitness odp_flow_key_to_mask(const struct nlattr *key, size_t key_len, struct flow *mask, const struct flow *flow) { return odp_flow_key_to_flow__(key, key_len, mask, flow); } /* Returns 'fitness' as a string, for use in debug messages. */ const char * odp_key_fitness_to_string(enum odp_key_fitness fitness) { switch (fitness) { case ODP_FIT_PERFECT: return "OK"; case ODP_FIT_TOO_MUCH: return "too_much"; case ODP_FIT_TOO_LITTLE: return "too_little"; case ODP_FIT_ERROR: return "error"; default: return ""; } } /* Appends an OVS_ACTION_ATTR_USERSPACE action to 'odp_actions' that specifies * Netlink PID 'pid'. If 'userdata' is nonnull, adds a userdata attribute * whose contents are the 'userdata_size' bytes at 'userdata' and returns the * offset within 'odp_actions' of the start of the cookie. (If 'userdata' is * null, then the return value is not meaningful.) */ size_t odp_put_userspace_action(uint32_t pid, const void *userdata, size_t userdata_size, struct ofpbuf *odp_actions) { size_t userdata_ofs; size_t offset; offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_USERSPACE); nl_msg_put_u32(odp_actions, OVS_USERSPACE_ATTR_PID, pid); if (userdata) { userdata_ofs = odp_actions->size + NLA_HDRLEN; /* The OVS kernel module before OVS 1.11 and the upstream Linux kernel * module before Linux 3.10 required the userdata to be exactly 8 bytes * long: * * - The kernel rejected shorter userdata with -ERANGE. * * - The kernel silently dropped userdata beyond the first 8 bytes. * * Thus, for maximum compatibility, always put at least 8 bytes. (We * separately disable features that required more than 8 bytes.) */ memcpy(nl_msg_put_unspec_zero(odp_actions, OVS_USERSPACE_ATTR_USERDATA, MAX(8, userdata_size)), userdata, userdata_size); } else { userdata_ofs = 0; } nl_msg_end_nested(odp_actions, offset); return userdata_ofs; } void odp_put_tunnel_action(const struct flow_tnl *tunnel, struct ofpbuf *odp_actions) { size_t offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SET); tun_key_to_attr(odp_actions, tunnel); nl_msg_end_nested(odp_actions, offset); } /* The commit_odp_actions() function and its helpers. */ static void commit_set_action(struct ofpbuf *odp_actions, enum ovs_key_attr key_type, const void *key, size_t key_size) { size_t offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SET); nl_msg_put_unspec(odp_actions, key_type, key, key_size); nl_msg_end_nested(odp_actions, offset); } void odp_put_pkt_mark_action(const uint32_t pkt_mark, struct ofpbuf *odp_actions) { commit_set_action(odp_actions, OVS_KEY_ATTR_SKB_MARK, &pkt_mark, sizeof(pkt_mark)); } /* If any of the flow key data that ODP actions can modify are different in * 'base->tunnel' and 'flow->tunnel', appends a set_tunnel ODP action to * 'odp_actions' that change the flow tunneling information in key from * 'base->tunnel' into 'flow->tunnel', and then changes 'base->tunnel' in the * same way. In other words, operates the same as commit_odp_actions(), but * only on tunneling information. */ void commit_odp_tunnel_action(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions) { /* A valid IPV4_TUNNEL must have non-zero ip_dst. */ if (flow->tunnel.ip_dst) { if (!memcmp(&base->tunnel, &flow->tunnel, sizeof base->tunnel)) { return; } memcpy(&base->tunnel, &flow->tunnel, sizeof base->tunnel); odp_put_tunnel_action(&base->tunnel, odp_actions); } } static void commit_set_ether_addr_action(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions, struct flow_wildcards *wc) { struct ovs_key_ethernet eth_key; if (eth_addr_equals(base->dl_src, flow->dl_src) && eth_addr_equals(base->dl_dst, flow->dl_dst)) { return; } memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src); memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); memcpy(base->dl_src, flow->dl_src, ETH_ADDR_LEN); memcpy(base->dl_dst, flow->dl_dst, ETH_ADDR_LEN); memcpy(eth_key.eth_src, base->dl_src, ETH_ADDR_LEN); memcpy(eth_key.eth_dst, base->dl_dst, ETH_ADDR_LEN); commit_set_action(odp_actions, OVS_KEY_ATTR_ETHERNET, ð_key, sizeof(eth_key)); } static void commit_vlan_action(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions, struct flow_wildcards *wc) { if (base->vlan_tci == flow->vlan_tci) { return; } memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci); if (base->vlan_tci & htons(VLAN_CFI)) { nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_POP_VLAN); } if (flow->vlan_tci & htons(VLAN_CFI)) { struct ovs_action_push_vlan vlan; vlan.vlan_tpid = htons(ETH_TYPE_VLAN); vlan.vlan_tci = flow->vlan_tci; nl_msg_put_unspec(odp_actions, OVS_ACTION_ATTR_PUSH_VLAN, &vlan, sizeof vlan); } base->vlan_tci = flow->vlan_tci; } static void commit_mpls_action(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions, struct flow_wildcards *wc) { if (flow->mpls_lse == base->mpls_lse && flow->mpls_depth == base->mpls_depth) { return; } memset(&wc->masks.mpls_lse, 0xff, sizeof wc->masks.mpls_lse); if (flow->mpls_depth < base->mpls_depth) { if (base->mpls_depth - flow->mpls_depth > 1) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); VLOG_WARN_RL(&rl, "Multiple mpls_pop actions reduced to " " a single mpls_pop action"); } nl_msg_put_be16(odp_actions, OVS_ACTION_ATTR_POP_MPLS, flow->dl_type); } else if (flow->mpls_depth > base->mpls_depth) { struct ovs_action_push_mpls *mpls; if (flow->mpls_depth - base->mpls_depth > 1) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); VLOG_WARN_RL(&rl, "Multiple mpls_push actions reduced to " " a single mpls_push action"); } mpls = nl_msg_put_unspec_uninit(odp_actions, OVS_ACTION_ATTR_PUSH_MPLS, sizeof *mpls); memset(mpls, 0, sizeof *mpls); mpls->mpls_ethertype = flow->dl_type; mpls->mpls_lse = flow->mpls_lse; } else { struct ovs_key_mpls mpls_key; mpls_key.mpls_lse = flow->mpls_lse; commit_set_action(odp_actions, OVS_KEY_ATTR_MPLS, &mpls_key, sizeof(mpls_key)); } base->dl_type = flow->dl_type; base->mpls_lse = flow->mpls_lse; base->mpls_depth = flow->mpls_depth; } static void commit_set_ipv4_action(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions, struct flow_wildcards *wc) { struct ovs_key_ipv4 ipv4_key; if (base->nw_src == flow->nw_src && base->nw_dst == flow->nw_dst && base->nw_tos == flow->nw_tos && base->nw_ttl == flow->nw_ttl && base->nw_frag == flow->nw_frag) { return; } memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src); memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst); memset(&wc->masks.nw_tos, 0xff, sizeof wc->masks.nw_tos); memset(&wc->masks.nw_ttl, 0xff, sizeof wc->masks.nw_ttl); memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); memset(&wc->masks.nw_frag, 0xff, sizeof wc->masks.nw_frag); ipv4_key.ipv4_src = base->nw_src = flow->nw_src; ipv4_key.ipv4_dst = base->nw_dst = flow->nw_dst; ipv4_key.ipv4_tos = base->nw_tos = flow->nw_tos; ipv4_key.ipv4_ttl = base->nw_ttl = flow->nw_ttl; ipv4_key.ipv4_proto = base->nw_proto; ipv4_key.ipv4_frag = ovs_to_odp_frag(base->nw_frag); commit_set_action(odp_actions, OVS_KEY_ATTR_IPV4, &ipv4_key, sizeof(ipv4_key)); } static void commit_set_ipv6_action(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions, struct flow_wildcards *wc) { struct ovs_key_ipv6 ipv6_key; if (ipv6_addr_equals(&base->ipv6_src, &flow->ipv6_src) && ipv6_addr_equals(&base->ipv6_dst, &flow->ipv6_dst) && base->ipv6_label == flow->ipv6_label && base->nw_tos == flow->nw_tos && base->nw_ttl == flow->nw_ttl && base->nw_frag == flow->nw_frag) { return; } memset(&wc->masks.ipv6_src, 0xff, sizeof wc->masks.ipv6_src); memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst); memset(&wc->masks.ipv6_label, 0xff, sizeof wc->masks.ipv6_label); memset(&wc->masks.nw_tos, 0xff, sizeof wc->masks.nw_tos); memset(&wc->masks.nw_ttl, 0xff, sizeof wc->masks.nw_ttl); memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); memset(&wc->masks.nw_frag, 0xff, sizeof wc->masks.nw_frag); base->ipv6_src = flow->ipv6_src; memcpy(&ipv6_key.ipv6_src, &base->ipv6_src, sizeof(ipv6_key.ipv6_src)); base->ipv6_dst = flow->ipv6_dst; memcpy(&ipv6_key.ipv6_dst, &base->ipv6_dst, sizeof(ipv6_key.ipv6_dst)); ipv6_key.ipv6_label = base->ipv6_label = flow->ipv6_label; ipv6_key.ipv6_tclass = base->nw_tos = flow->nw_tos; ipv6_key.ipv6_hlimit = base->nw_ttl = flow->nw_ttl; ipv6_key.ipv6_proto = base->nw_proto; ipv6_key.ipv6_frag = ovs_to_odp_frag(base->nw_frag); commit_set_action(odp_actions, OVS_KEY_ATTR_IPV6, &ipv6_key, sizeof(ipv6_key)); } static void commit_set_nw_action(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions, struct flow_wildcards *wc) { /* Check if flow really have an IP header. */ if (!flow->nw_proto) { return; } if (base->dl_type == htons(ETH_TYPE_IP)) { commit_set_ipv4_action(flow, base, odp_actions, wc); } else if (base->dl_type == htons(ETH_TYPE_IPV6)) { commit_set_ipv6_action(flow, base, odp_actions, wc); } } static void commit_set_port_action(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions, struct flow_wildcards *wc) { if (!is_ip_any(base) || (!base->tp_src && !base->tp_dst)) { return; } if (base->tp_src == flow->tp_src && base->tp_dst == flow->tp_dst) { return; } memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src); memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst); if (flow->nw_proto == IPPROTO_TCP) { struct ovs_key_tcp port_key; port_key.tcp_src = base->tp_src = flow->tp_src; port_key.tcp_dst = base->tp_dst = flow->tp_dst; commit_set_action(odp_actions, OVS_KEY_ATTR_TCP, &port_key, sizeof(port_key)); } else if (flow->nw_proto == IPPROTO_UDP) { struct ovs_key_udp port_key; port_key.udp_src = base->tp_src = flow->tp_src; port_key.udp_dst = base->tp_dst = flow->tp_dst; commit_set_action(odp_actions, OVS_KEY_ATTR_UDP, &port_key, sizeof(port_key)); } else if (flow->nw_proto == IPPROTO_SCTP) { struct ovs_key_sctp port_key; port_key.sctp_src = base->tp_src = flow->tp_src; port_key.sctp_dst = base->tp_dst = flow->tp_dst; commit_set_action(odp_actions, OVS_KEY_ATTR_SCTP, &port_key, sizeof(port_key)); } } static void commit_set_priority_action(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions, struct flow_wildcards *wc) { if (base->skb_priority == flow->skb_priority) { return; } memset(&wc->masks.skb_priority, 0xff, sizeof wc->masks.skb_priority); base->skb_priority = flow->skb_priority; commit_set_action(odp_actions, OVS_KEY_ATTR_PRIORITY, &base->skb_priority, sizeof(base->skb_priority)); } static void commit_set_pkt_mark_action(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions, struct flow_wildcards *wc) { if (base->pkt_mark == flow->pkt_mark) { return; } memset(&wc->masks.pkt_mark, 0xff, sizeof wc->masks.pkt_mark); base->pkt_mark = flow->pkt_mark; odp_put_pkt_mark_action(base->pkt_mark, odp_actions); } /* If any of the flow key data that ODP actions can modify are different in * 'base' and 'flow', appends ODP actions to 'odp_actions' that change the flow * key from 'base' into 'flow', and then changes 'base' the same way. Does not * commit set_tunnel actions. Users should call commit_odp_tunnel_action() * in addition to this function if needed. Sets fields in 'wc' that are * used as part of the action. */ void commit_odp_actions(const struct flow *flow, struct flow *base, struct ofpbuf *odp_actions, struct flow_wildcards *wc) { commit_set_ether_addr_action(flow, base, odp_actions, wc); commit_vlan_action(flow, base, odp_actions, wc); commit_set_nw_action(flow, base, odp_actions, wc); commit_set_port_action(flow, base, odp_actions, wc); /* Committing MPLS actions should occur after committing nw and port * actions. This is because committing MPLS actions may alter a packet so * that it is no longer IP and thus nw and port actions are no longer valid. */ commit_mpls_action(flow, base, odp_actions, wc); commit_set_priority_action(flow, base, odp_actions, wc); commit_set_pkt_mark_action(flow, base, odp_actions, wc); } openvswitch-2.0.1+git20140120/lib/odp-util.h000066400000000000000000000212041226605124000201250ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ODP_UTIL_H #define ODP_UTIL_H 1 #include #include #include #include #include #include "hash.h" #include "openflow/openflow.h" #include "util.h" struct ds; struct flow; struct flow_tnl; struct flow_wildcards; struct nlattr; struct ofpbuf; struct simap; #define ODPP_LOCAL ODP_PORT_C(OVSP_LOCAL) #define ODPP_NONE ODP_PORT_C(UINT32_MAX) void format_odp_actions(struct ds *, const struct nlattr *odp_actions, size_t actions_len); int odp_actions_from_string(const char *, const struct simap *port_names, struct ofpbuf *odp_actions); /* The maximum number of bytes that odp_flow_key_from_flow() appends to a * buffer. This is the upper bound on the length of a nlattr-formatted flow * key that ovs-vswitchd fully understands. * * OVS doesn't insist that ovs-vswitchd and the datapath have exactly the same * idea of a flow, so therefore this value isn't necessarily an upper bound on * the length of a flow key that the datapath can pass to ovs-vswitchd. * * The longest nlattr-formatted flow key appended by odp_flow_key_from_flow() * would be: * * struct pad nl hdr total * ------ --- ------ ----- * OVS_KEY_ATTR_PRIORITY 4 -- 4 8 * OVS_KEY_ATTR_TUNNEL 0 -- 4 4 * - OVS_TUNNEL_KEY_ATTR_ID 8 -- 4 12 * - OVS_TUNNEL_KEY_ATTR_IPV4_SRC 4 -- 4 8 * - OVS_TUNNEL_KEY_ATTR_IPV4_DST 4 -- 4 8 * - OVS_TUNNEL_KEY_ATTR_TOS 1 3 4 8 * - OVS_TUNNEL_KEY_ATTR_TTL 1 3 4 8 * - OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT 0 -- 4 4 * - OVS_TUNNEL_KEY_ATTR_CSUM 0 -- 4 4 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8 * OVS_KEY_ATTR_SKB_MARK 4 -- 4 8 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) * OVS_KEY_ATTR_8021Q 4 -- 4 8 * OVS_KEY_ATTR_ENCAP 0 -- 4 4 (VLAN encapsulation) * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (inner VLAN ethertype) * OVS_KEY_ATTR_IPV6 40 -- 4 44 * OVS_KEY_ATTR_ICMPV6 2 2 4 8 * OVS_KEY_ATTR_ND 28 -- 4 32 * ---------------------------------------------------------- * total 208 * * We include some slack space in case the calculation isn't quite right or we * add another field and forget to adjust this value. */ #define ODPUTIL_FLOW_KEY_BYTES 256 /* A buffer with sufficient size and alignment to hold an nlattr-formatted flow * key. An array of "struct nlattr" might not, in theory, be sufficiently * aligned because it only contains 16-bit types. */ struct odputil_keybuf { uint32_t keybuf[DIV_ROUND_UP(ODPUTIL_FLOW_KEY_BYTES, 4)]; }; enum odp_key_fitness odp_tun_key_from_attr(const struct nlattr *, struct flow_tnl *); void odp_flow_format(const struct nlattr *key, size_t key_len, const struct nlattr *mask, size_t mask_len, struct ds *, bool verbose); void odp_flow_key_format(const struct nlattr *, size_t, struct ds *); int odp_flow_from_string(const char *s, const struct simap *port_names, struct ofpbuf *, struct ofpbuf *); void odp_flow_key_from_flow(struct ofpbuf *, const struct flow *, odp_port_t odp_in_port); void odp_flow_key_from_mask(struct ofpbuf *, const struct flow *mask, const struct flow *flow, uint32_t odp_in_port); uint32_t odp_flow_key_hash(const struct nlattr *, size_t); /* How well a kernel-provided flow key (a sequence of OVS_KEY_ATTR_* * attributes) matches OVS userspace expectations. * * These values are arranged so that greater values are "more important" than * lesser ones. In particular, a single flow key can fit the descriptions for * both ODP_FIT_TOO_LITTLE and ODP_FIT_TOO_MUCH. Such a key is treated as * ODP_FIT_TOO_LITTLE. */ enum odp_key_fitness { ODP_FIT_PERFECT, /* The key had exactly the fields we expect. */ ODP_FIT_TOO_MUCH, /* The key had fields we don't understand. */ ODP_FIT_TOO_LITTLE, /* The key lacked fields we expected to see. */ ODP_FIT_ERROR, /* The key was invalid. */ }; enum odp_key_fitness odp_flow_key_to_flow(const struct nlattr *, size_t, struct flow *); enum odp_key_fitness odp_flow_key_to_mask(const struct nlattr *key, size_t len, struct flow *mask, const struct flow *flow); const char *odp_key_fitness_to_string(enum odp_key_fitness); void commit_odp_tunnel_action(const struct flow *, struct flow *base, struct ofpbuf *odp_actions); void commit_odp_actions(const struct flow *, struct flow *base, struct ofpbuf *odp_actions, struct flow_wildcards *wc); /* ofproto-dpif interface. * * The following types and functions are logically part of ofproto-dpif. * ofproto-dpif puts values of these types into the flows that it installs in * the kernel datapath, though, so ovs-dpctl needs to interpret them so that * it can print flows in a more human-readable manner. */ enum user_action_cookie_type { USER_ACTION_COOKIE_UNSPEC, USER_ACTION_COOKIE_SFLOW, /* Packet for per-bridge sFlow sampling. */ USER_ACTION_COOKIE_SLOW_PATH, /* Userspace must process this flow. */ USER_ACTION_COOKIE_FLOW_SAMPLE, /* Packet for per-flow sampling. */ USER_ACTION_COOKIE_IPFIX, /* Packet for per-bridge IPFIX sampling. */ }; /* user_action_cookie is passed as argument to OVS_ACTION_ATTR_USERSPACE. * Since it is passed to kernel as u64, its size has to be 8 bytes. */ union user_action_cookie { uint16_t type; /* enum user_action_cookie_type. */ struct { uint16_t type; /* USER_ACTION_COOKIE_SFLOW. */ ovs_be16 vlan_tci; /* Destination VLAN TCI. */ uint32_t output; /* SFL_FLOW_SAMPLE_TYPE 'output' value. */ } sflow; struct { uint16_t type; /* USER_ACTION_COOKIE_SLOW_PATH. */ uint16_t unused; uint32_t reason; /* enum slow_path_reason. */ } slow_path; struct { uint16_t type; /* USER_ACTION_COOKIE_FLOW_SAMPLE. */ uint16_t probability; /* Sampling probability. */ uint32_t collector_set_id; /* ID of IPFIX collector set. */ uint32_t obs_domain_id; /* Observation Domain ID. */ uint32_t obs_point_id; /* Observation Point ID. */ } flow_sample; struct { uint16_t type; /* USER_ACTION_COOKIE_IPFIX. */ } ipfix; }; BUILD_ASSERT_DECL(sizeof(union user_action_cookie) == 16); size_t odp_put_userspace_action(uint32_t pid, const void *userdata, size_t userdata_size, struct ofpbuf *odp_actions); void odp_put_tunnel_action(const struct flow_tnl *tunnel, struct ofpbuf *odp_actions); void odp_put_pkt_mark_action(const uint32_t pkt_mark, struct ofpbuf *odp_actions); /* Reasons why a subfacet might not be fast-pathable. */ enum slow_path_reason { SLOW_CFM = 1, /* CFM packets need per-packet processing. */ SLOW_LACP, /* LACP packets need per-packet processing. */ SLOW_STP, /* STP packets need per-packet processing. */ SLOW_BFD, /* BFD packets need per-packet processing. */ SLOW_CONTROLLER, /* Packets must go to OpenFlow controller. */ __SLOW_MAX }; #endif /* odp-util.h */ openvswitch-2.0.1+git20140120/lib/ofp-actions.c000066400000000000000000002256101226605124000206140ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ofp-actions.h" #include "bundle.h" #include "byte-order.h" #include "compiler.h" #include "dynamic-string.h" #include "learn.h" #include "meta-flow.h" #include "multipath.h" #include "nx-match.h" #include "ofp-util.h" #include "ofpbuf.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ofp_actions); static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); /* Converting OpenFlow 1.0 to ofpacts. */ static enum ofperr output_from_openflow10(const struct ofp10_action_output *oao, struct ofpbuf *out) { struct ofpact_output *output; output = ofpact_put_OUTPUT(out); output->port = u16_to_ofp(ntohs(oao->port)); output->max_len = ntohs(oao->max_len); return ofputil_check_output_port(output->port, OFPP_MAX); } static enum ofperr enqueue_from_openflow10(const struct ofp10_action_enqueue *oae, struct ofpbuf *out) { struct ofpact_enqueue *enqueue; enqueue = ofpact_put_ENQUEUE(out); enqueue->port = u16_to_ofp(ntohs(oae->port)); enqueue->queue = ntohl(oae->queue_id); if (ofp_to_u16(enqueue->port) >= ofp_to_u16(OFPP_MAX) && enqueue->port != OFPP_IN_PORT && enqueue->port != OFPP_LOCAL) { return OFPERR_OFPBAC_BAD_OUT_PORT; } return 0; } static void resubmit_from_openflow(const struct nx_action_resubmit *nar, struct ofpbuf *out) { struct ofpact_resubmit *resubmit; resubmit = ofpact_put_RESUBMIT(out); resubmit->ofpact.compat = OFPUTIL_NXAST_RESUBMIT; resubmit->in_port = u16_to_ofp(ntohs(nar->in_port)); resubmit->table_id = 0xff; } static enum ofperr resubmit_table_from_openflow(const struct nx_action_resubmit *nar, struct ofpbuf *out) { struct ofpact_resubmit *resubmit; if (nar->pad[0] || nar->pad[1] || nar->pad[2]) { return OFPERR_OFPBAC_BAD_ARGUMENT; } resubmit = ofpact_put_RESUBMIT(out); resubmit->ofpact.compat = OFPUTIL_NXAST_RESUBMIT_TABLE; resubmit->in_port = u16_to_ofp(ntohs(nar->in_port)); resubmit->table_id = nar->table; return 0; } static enum ofperr output_reg_from_openflow(const struct nx_action_output_reg *naor, struct ofpbuf *out) { struct ofpact_output_reg *output_reg; if (!is_all_zeros(naor->zero, sizeof naor->zero)) { return OFPERR_OFPBAC_BAD_ARGUMENT; } output_reg = ofpact_put_OUTPUT_REG(out); output_reg->src.field = mf_from_nxm_header(ntohl(naor->src)); output_reg->src.ofs = nxm_decode_ofs(naor->ofs_nbits); output_reg->src.n_bits = nxm_decode_n_bits(naor->ofs_nbits); output_reg->max_len = ntohs(naor->max_len); return mf_check_src(&output_reg->src, NULL); } static void fin_timeout_from_openflow(const struct nx_action_fin_timeout *naft, struct ofpbuf *out) { struct ofpact_fin_timeout *oft; oft = ofpact_put_FIN_TIMEOUT(out); oft->fin_idle_timeout = ntohs(naft->fin_idle_timeout); oft->fin_hard_timeout = ntohs(naft->fin_hard_timeout); } static void controller_from_openflow(const struct nx_action_controller *nac, struct ofpbuf *out) { struct ofpact_controller *oc; oc = ofpact_put_CONTROLLER(out); oc->max_len = ntohs(nac->max_len); oc->controller_id = ntohs(nac->controller_id); oc->reason = nac->reason; } static enum ofperr metadata_from_nxast(const struct nx_action_write_metadata *nawm, struct ofpbuf *out) { struct ofpact_metadata *om; if (!is_all_zeros(nawm->zeros, sizeof nawm->zeros)) { return OFPERR_NXBRC_MUST_BE_ZERO; } om = ofpact_put_WRITE_METADATA(out); om->metadata = nawm->metadata; om->mask = nawm->mask; return 0; } static void note_from_openflow(const struct nx_action_note *nan, struct ofpbuf *out) { struct ofpact_note *note; unsigned int length; length = ntohs(nan->len) - offsetof(struct nx_action_note, note); note = ofpact_put(out, OFPACT_NOTE, offsetof(struct ofpact_note, data) + length); note->length = length; memcpy(note->data, nan->note, length); } static enum ofperr dec_ttl_from_openflow(struct ofpbuf *out, enum ofputil_action_code compat) { uint16_t id = 0; struct ofpact_cnt_ids *ids; enum ofperr error = 0; ids = ofpact_put_DEC_TTL(out); ids->ofpact.compat = compat; ids->n_controllers = 1; ofpbuf_put(out, &id, sizeof id); ids = out->l2; ofpact_update_len(out, &ids->ofpact); return error; } static enum ofperr dec_ttl_cnt_ids_from_openflow(const struct nx_action_cnt_ids *nac_ids, struct ofpbuf *out) { struct ofpact_cnt_ids *ids; size_t ids_size; int i; ids = ofpact_put_DEC_TTL(out); ids->ofpact.compat = OFPUTIL_NXAST_DEC_TTL_CNT_IDS; ids->n_controllers = ntohs(nac_ids->n_controllers); ids_size = ntohs(nac_ids->len) - sizeof *nac_ids; if (!is_all_zeros(nac_ids->zeros, sizeof nac_ids->zeros)) { return OFPERR_NXBRC_MUST_BE_ZERO; } if (ids_size < ids->n_controllers * sizeof(ovs_be16)) { VLOG_WARN_RL(&rl, "Nicira action dec_ttl_cnt_ids only has %zu bytes " "allocated for controller ids. %zu bytes are required for " "%"PRIu16" controllers.", ids_size, ids->n_controllers * sizeof(ovs_be16), ids->n_controllers); return OFPERR_OFPBAC_BAD_LEN; } for (i = 0; i < ids->n_controllers; i++) { uint16_t id = ntohs(((ovs_be16 *)(nac_ids + 1))[i]); ofpbuf_put(out, &id, sizeof id); ids = out->l2; } ofpact_update_len(out, &ids->ofpact); return 0; } static enum ofperr sample_from_openflow(const struct nx_action_sample *nas, struct ofpbuf *out) { struct ofpact_sample *sample; sample = ofpact_put_SAMPLE(out); sample->probability = ntohs(nas->probability); sample->collector_set_id = ntohl(nas->collector_set_id); sample->obs_domain_id = ntohl(nas->obs_domain_id); sample->obs_point_id = ntohl(nas->obs_point_id); if (sample->probability == 0) { return OFPERR_OFPBAC_BAD_ARGUMENT; } return 0; } static enum ofperr decode_nxast_action(const union ofp_action *a, enum ofputil_action_code *code) { const struct nx_action_header *nah = (const struct nx_action_header *) a; uint16_t len = ntohs(a->header.len); if (len < sizeof(struct nx_action_header)) { return OFPERR_OFPBAC_BAD_LEN; } else if (a->vendor.vendor != CONSTANT_HTONL(NX_VENDOR_ID)) { return OFPERR_OFPBAC_BAD_VENDOR; } switch (nah->subtype) { #define NXAST_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) \ case CONSTANT_HTONS(ENUM): \ if (EXTENSIBLE \ ? len >= sizeof(struct STRUCT) \ : len == sizeof(struct STRUCT)) { \ *code = OFPUTIL_##ENUM; \ return 0; \ } else { \ return OFPERR_OFPBAC_BAD_LEN; \ } \ NOT_REACHED(); #include "ofp-util.def" case CONSTANT_HTONS(NXAST_SNAT__OBSOLETE): case CONSTANT_HTONS(NXAST_DROP_SPOOFED_ARP__OBSOLETE): default: return OFPERR_OFPBAC_BAD_TYPE; } } /* Parses 'a' to determine its type. On success stores the correct type into * '*code' and returns 0. On failure returns an OFPERR_* error code and * '*code' is indeterminate. * * The caller must have already verified that 'a''s length is potentially * correct (that is, a->header.len is nonzero and a multiple of sizeof(union * ofp_action) and no longer than the amount of space allocated to 'a'). * * This function verifies that 'a''s length is correct for the type of action * that it represents. */ static enum ofperr decode_openflow10_action(const union ofp_action *a, enum ofputil_action_code *code) { switch (a->type) { case CONSTANT_HTONS(OFPAT10_VENDOR): return decode_nxast_action(a, code); #define OFPAT10_ACTION(ENUM, STRUCT, NAME) \ case CONSTANT_HTONS(ENUM): \ if (a->header.len == htons(sizeof(struct STRUCT))) { \ *code = OFPUTIL_##ENUM; \ return 0; \ } else { \ return OFPERR_OFPBAC_BAD_LEN; \ } \ break; #include "ofp-util.def" default: return OFPERR_OFPBAC_BAD_TYPE; } } static enum ofperr ofpact_from_nxast(const union ofp_action *a, enum ofputil_action_code code, struct ofpbuf *out) { const struct nx_action_resubmit *nar; const struct nx_action_set_tunnel *nast; const struct nx_action_set_queue *nasq; const struct nx_action_note *nan; const struct nx_action_set_tunnel64 *nast64; const struct nx_action_write_metadata *nawm; struct ofpact_tunnel *tunnel; enum ofperr error = 0; switch (code) { case OFPUTIL_ACTION_INVALID: #define OFPAT10_ACTION(ENUM, STRUCT, NAME) case OFPUTIL_##ENUM: #define OFPAT11_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) case OFPUTIL_##ENUM: #include "ofp-util.def" NOT_REACHED(); case OFPUTIL_NXAST_RESUBMIT: resubmit_from_openflow((const struct nx_action_resubmit *) a, out); break; case OFPUTIL_NXAST_SET_TUNNEL: nast = (const struct nx_action_set_tunnel *) a; tunnel = ofpact_put_SET_TUNNEL(out); tunnel->ofpact.compat = code; tunnel->tun_id = ntohl(nast->tun_id); break; case OFPUTIL_NXAST_WRITE_METADATA: nawm = ALIGNED_CAST(const struct nx_action_write_metadata *, a); error = metadata_from_nxast(nawm, out); break; case OFPUTIL_NXAST_SET_QUEUE: nasq = (const struct nx_action_set_queue *) a; ofpact_put_SET_QUEUE(out)->queue_id = ntohl(nasq->queue_id); break; case OFPUTIL_NXAST_POP_QUEUE: ofpact_put_POP_QUEUE(out); break; case OFPUTIL_NXAST_REG_MOVE: error = nxm_reg_move_from_openflow( (const struct nx_action_reg_move *) a, out); break; case OFPUTIL_NXAST_REG_LOAD: error = nxm_reg_load_from_openflow( ALIGNED_CAST(const struct nx_action_reg_load *, a), out); break; case OFPUTIL_NXAST_STACK_PUSH: error = nxm_stack_push_from_openflow( (const struct nx_action_stack *) a, out); break; case OFPUTIL_NXAST_STACK_POP: error = nxm_stack_pop_from_openflow( (const struct nx_action_stack *) a, out); break; case OFPUTIL_NXAST_NOTE: nan = (const struct nx_action_note *) a; note_from_openflow(nan, out); break; case OFPUTIL_NXAST_SET_TUNNEL64: nast64 = ALIGNED_CAST(const struct nx_action_set_tunnel64 *, a); tunnel = ofpact_put_SET_TUNNEL(out); tunnel->ofpact.compat = code; tunnel->tun_id = ntohll(nast64->tun_id); break; case OFPUTIL_NXAST_MULTIPATH: error = multipath_from_openflow((const struct nx_action_multipath *) a, ofpact_put_MULTIPATH(out)); break; case OFPUTIL_NXAST_BUNDLE: case OFPUTIL_NXAST_BUNDLE_LOAD: error = bundle_from_openflow((const struct nx_action_bundle *) a, out); break; case OFPUTIL_NXAST_OUTPUT_REG: error = output_reg_from_openflow( (const struct nx_action_output_reg *) a, out); break; case OFPUTIL_NXAST_RESUBMIT_TABLE: nar = (const struct nx_action_resubmit *) a; error = resubmit_table_from_openflow(nar, out); break; case OFPUTIL_NXAST_LEARN: error = learn_from_openflow( ALIGNED_CAST(const struct nx_action_learn *, a), out); break; case OFPUTIL_NXAST_EXIT: ofpact_put_EXIT(out); break; case OFPUTIL_NXAST_DEC_TTL: error = dec_ttl_from_openflow(out, code); break; case OFPUTIL_NXAST_DEC_TTL_CNT_IDS: error = dec_ttl_cnt_ids_from_openflow( (const struct nx_action_cnt_ids *) a, out); break; case OFPUTIL_NXAST_FIN_TIMEOUT: fin_timeout_from_openflow( (const struct nx_action_fin_timeout *) a, out); break; case OFPUTIL_NXAST_CONTROLLER: controller_from_openflow((const struct nx_action_controller *) a, out); break; case OFPUTIL_NXAST_PUSH_MPLS: { struct nx_action_push_mpls *nxapm = (struct nx_action_push_mpls *)a; if (!eth_type_mpls(nxapm->ethertype)) { return OFPERR_OFPBAC_BAD_ARGUMENT; } ofpact_put_PUSH_MPLS(out)->ethertype = nxapm->ethertype; break; } case OFPUTIL_NXAST_SET_MPLS_TTL: { struct nx_action_mpls_ttl *nxamt = (struct nx_action_mpls_ttl *)a; ofpact_put_SET_MPLS_TTL(out)->ttl = nxamt->ttl; break; } case OFPUTIL_NXAST_DEC_MPLS_TTL: ofpact_put_DEC_MPLS_TTL(out); break; case OFPUTIL_NXAST_POP_MPLS: { struct nx_action_pop_mpls *nxapm = (struct nx_action_pop_mpls *)a; if (eth_type_mpls(nxapm->ethertype)) { return OFPERR_OFPBAC_BAD_ARGUMENT; } ofpact_put_POP_MPLS(out)->ethertype = nxapm->ethertype; break; } case OFPUTIL_NXAST_SAMPLE: error = sample_from_openflow( (const struct nx_action_sample *) a, out); break; } return error; } static enum ofperr ofpact_from_openflow10(const union ofp_action *a, struct ofpbuf *out) { enum ofputil_action_code code; enum ofperr error; error = decode_openflow10_action(a, &code); if (error) { return error; } switch (code) { case OFPUTIL_ACTION_INVALID: #define OFPAT11_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) case OFPUTIL_##ENUM: #include "ofp-util.def" NOT_REACHED(); case OFPUTIL_OFPAT10_OUTPUT: return output_from_openflow10(&a->output10, out); case OFPUTIL_OFPAT10_SET_VLAN_VID: if (a->vlan_vid.vlan_vid & ~htons(0xfff)) { return OFPERR_OFPBAC_BAD_ARGUMENT; } ofpact_put_SET_VLAN_VID(out)->vlan_vid = ntohs(a->vlan_vid.vlan_vid); break; case OFPUTIL_OFPAT10_SET_VLAN_PCP: if (a->vlan_pcp.vlan_pcp & ~7) { return OFPERR_OFPBAC_BAD_ARGUMENT; } ofpact_put_SET_VLAN_PCP(out)->vlan_pcp = a->vlan_pcp.vlan_pcp; break; case OFPUTIL_OFPAT10_STRIP_VLAN: ofpact_put_STRIP_VLAN(out); break; case OFPUTIL_OFPAT10_SET_DL_SRC: memcpy(ofpact_put_SET_ETH_SRC(out)->mac, ((const struct ofp_action_dl_addr *) a)->dl_addr, ETH_ADDR_LEN); break; case OFPUTIL_OFPAT10_SET_DL_DST: memcpy(ofpact_put_SET_ETH_DST(out)->mac, ((const struct ofp_action_dl_addr *) a)->dl_addr, ETH_ADDR_LEN); break; case OFPUTIL_OFPAT10_SET_NW_SRC: ofpact_put_SET_IPV4_SRC(out)->ipv4 = a->nw_addr.nw_addr; break; case OFPUTIL_OFPAT10_SET_NW_DST: ofpact_put_SET_IPV4_DST(out)->ipv4 = a->nw_addr.nw_addr; break; case OFPUTIL_OFPAT10_SET_NW_TOS: if (a->nw_tos.nw_tos & ~IP_DSCP_MASK) { return OFPERR_OFPBAC_BAD_ARGUMENT; } ofpact_put_SET_IPV4_DSCP(out)->dscp = a->nw_tos.nw_tos; break; case OFPUTIL_OFPAT10_SET_TP_SRC: ofpact_put_SET_L4_SRC_PORT(out)->port = ntohs(a->tp_port.tp_port); break; case OFPUTIL_OFPAT10_SET_TP_DST: ofpact_put_SET_L4_DST_PORT(out)->port = ntohs(a->tp_port.tp_port); break; case OFPUTIL_OFPAT10_ENQUEUE: error = enqueue_from_openflow10((const struct ofp10_action_enqueue *) a, out); break; #define NXAST_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) case OFPUTIL_##ENUM: #include "ofp-util.def" return ofpact_from_nxast(a, code, out); } return error; } static inline union ofp_action * action_next(const union ofp_action *a) { return ((union ofp_action *) (void *) ((uint8_t *) a + ntohs(a->header.len))); } static inline bool action_is_valid(const union ofp_action *a, size_t n_actions) { uint16_t len = ntohs(a->header.len); return (!(len % OFP_ACTION_ALIGN) && len >= sizeof *a && len / sizeof *a <= n_actions); } /* This macro is careful to check for actions with bad lengths. */ #define ACTION_FOR_EACH(ITER, LEFT, ACTIONS, N_ACTIONS) \ for ((ITER) = (ACTIONS), (LEFT) = (N_ACTIONS); \ (LEFT) > 0 && action_is_valid(ITER, LEFT); \ ((LEFT) -= ntohs((ITER)->header.len) / sizeof(union ofp_action), \ (ITER) = action_next(ITER))) static void log_bad_action(const union ofp_action *actions, size_t n_actions, size_t ofs, enum ofperr error) { if (!VLOG_DROP_WARN(&rl)) { struct ds s; ds_init(&s); ds_put_hex_dump(&s, actions, n_actions * sizeof *actions, 0, false); VLOG_WARN("bad action at offset %#zx (%s):\n%s", ofs * sizeof *actions, ofperr_get_name(error), ds_cstr(&s)); ds_destroy(&s); } } static enum ofperr ofpacts_from_openflow(const union ofp_action *in, size_t n_in, struct ofpbuf *out, enum ofperr (*ofpact_from_openflow)( const union ofp_action *a, struct ofpbuf *out)) { const union ofp_action *a; size_t left; ACTION_FOR_EACH (a, left, in, n_in) { enum ofperr error = ofpact_from_openflow(a, out); if (error) { log_bad_action(in, n_in, a - in, error); return error; } } if (left) { enum ofperr error = OFPERR_OFPBAC_BAD_LEN; log_bad_action(in, n_in, n_in - left, error); return error; } ofpact_pad(out); return 0; } static enum ofperr ofpacts_from_openflow10(const union ofp_action *in, size_t n_in, struct ofpbuf *out) { return ofpacts_from_openflow(in, n_in, out, ofpact_from_openflow10); } static enum ofperr ofpacts_pull_actions(struct ofpbuf *openflow, unsigned int actions_len, struct ofpbuf *ofpacts, enum ofperr (*translate)(const union ofp_action *actions, size_t n_actions, struct ofpbuf *ofpacts)) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); const union ofp_action *actions; enum ofperr error; ofpbuf_clear(ofpacts); if (actions_len % OFP_ACTION_ALIGN != 0) { VLOG_WARN_RL(&rl, "OpenFlow message actions length %u is not a " "multiple of %d", actions_len, OFP_ACTION_ALIGN); return OFPERR_OFPBRC_BAD_LEN; } actions = ofpbuf_try_pull(openflow, actions_len); if (actions == NULL) { VLOG_WARN_RL(&rl, "OpenFlow message actions length %u exceeds " "remaining message length (%zu)", actions_len, openflow->size); return OFPERR_OFPBRC_BAD_LEN; } error = translate(actions, actions_len / OFP_ACTION_ALIGN, ofpacts); if (error) { ofpbuf_clear(ofpacts); return error; } error = ofpacts_verify(ofpacts->data, ofpacts->size); if (error) { ofpbuf_clear(ofpacts); } return error; } /* Attempts to convert 'actions_len' bytes of OpenFlow 1.0 actions from the * front of 'openflow' into ofpacts. On success, replaces any existing content * in 'ofpacts' by the converted ofpacts; on failure, clears 'ofpacts'. * Returns 0 if successful, otherwise an OpenFlow error. * * The parsed actions are valid generically, but they may not be valid in a * specific context. For example, port numbers up to OFPP_MAX are valid * generically, but specific datapaths may only support port numbers in a * smaller range. Use ofpacts_check() to additional check whether actions are * valid in a specific context. */ enum ofperr ofpacts_pull_openflow10(struct ofpbuf *openflow, unsigned int actions_len, struct ofpbuf *ofpacts) { return ofpacts_pull_actions(openflow, actions_len, ofpacts, ofpacts_from_openflow10); } /* OpenFlow 1.1 actions. */ /* Parses 'a' to determine its type. On success stores the correct type into * '*code' and returns 0. On failure returns an OFPERR_* error code and * '*code' is indeterminate. * * The caller must have already verified that 'a''s length is potentially * correct (that is, a->header.len is nonzero and a multiple of sizeof(union * ofp_action) and no longer than the amount of space allocated to 'a'). * * This function verifies that 'a''s length is correct for the type of action * that it represents. */ static enum ofperr decode_openflow11_action(const union ofp_action *a, enum ofputil_action_code *code) { uint16_t len; switch (a->type) { case CONSTANT_HTONS(OFPAT11_EXPERIMENTER): return decode_nxast_action(a, code); #define OFPAT11_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) \ case CONSTANT_HTONS(ENUM): \ len = ntohs(a->header.len); \ if (EXTENSIBLE \ ? len >= sizeof(struct STRUCT) \ : len == sizeof(struct STRUCT)) { \ *code = OFPUTIL_##ENUM; \ return 0; \ } else { \ return OFPERR_OFPBAC_BAD_LEN; \ } \ NOT_REACHED(); #include "ofp-util.def" default: return OFPERR_OFPBAC_BAD_TYPE; } } static enum ofperr output_from_openflow11(const struct ofp11_action_output *oao, struct ofpbuf *out) { struct ofpact_output *output; enum ofperr error; output = ofpact_put_OUTPUT(out); output->max_len = ntohs(oao->max_len); error = ofputil_port_from_ofp11(oao->port, &output->port); if (error) { return error; } return ofputil_check_output_port(output->port, OFPP_MAX); } static enum ofperr ofpact_from_openflow11(const union ofp_action *a, struct ofpbuf *out) { enum ofputil_action_code code; enum ofperr error; error = decode_openflow11_action(a, &code); if (error) { return error; } switch (code) { case OFPUTIL_ACTION_INVALID: #define OFPAT10_ACTION(ENUM, STRUCT, NAME) case OFPUTIL_##ENUM: #include "ofp-util.def" NOT_REACHED(); case OFPUTIL_OFPAT11_OUTPUT: return output_from_openflow11((const struct ofp11_action_output *) a, out); case OFPUTIL_OFPAT11_SET_VLAN_VID: if (a->vlan_vid.vlan_vid & ~htons(0xfff)) { return OFPERR_OFPBAC_BAD_ARGUMENT; } ofpact_put_SET_VLAN_VID(out)->vlan_vid = ntohs(a->vlan_vid.vlan_vid); break; case OFPUTIL_OFPAT11_SET_VLAN_PCP: if (a->vlan_pcp.vlan_pcp & ~7) { return OFPERR_OFPBAC_BAD_ARGUMENT; } ofpact_put_SET_VLAN_PCP(out)->vlan_pcp = a->vlan_pcp.vlan_pcp; break; case OFPUTIL_OFPAT11_PUSH_VLAN: if (((const struct ofp11_action_push *)a)->ethertype != htons(ETH_TYPE_VLAN_8021Q)) { /* XXX 802.1AD(QinQ) isn't supported at the moment */ return OFPERR_OFPBAC_BAD_ARGUMENT; } ofpact_put_PUSH_VLAN(out); break; case OFPUTIL_OFPAT11_POP_VLAN: ofpact_put_STRIP_VLAN(out); break; case OFPUTIL_OFPAT11_SET_QUEUE: ofpact_put_SET_QUEUE(out)->queue_id = ntohl(((const struct ofp11_action_set_queue *)a)->queue_id); break; case OFPUTIL_OFPAT11_SET_DL_SRC: memcpy(ofpact_put_SET_ETH_SRC(out)->mac, ((const struct ofp_action_dl_addr *) a)->dl_addr, ETH_ADDR_LEN); break; case OFPUTIL_OFPAT11_SET_DL_DST: memcpy(ofpact_put_SET_ETH_DST(out)->mac, ((const struct ofp_action_dl_addr *) a)->dl_addr, ETH_ADDR_LEN); break; case OFPUTIL_OFPAT11_DEC_NW_TTL: dec_ttl_from_openflow(out, code); break; case OFPUTIL_OFPAT11_SET_NW_SRC: ofpact_put_SET_IPV4_SRC(out)->ipv4 = a->nw_addr.nw_addr; break; case OFPUTIL_OFPAT11_SET_NW_DST: ofpact_put_SET_IPV4_DST(out)->ipv4 = a->nw_addr.nw_addr; break; case OFPUTIL_OFPAT11_SET_NW_TOS: if (a->nw_tos.nw_tos & ~IP_DSCP_MASK) { return OFPERR_OFPBAC_BAD_ARGUMENT; } ofpact_put_SET_IPV4_DSCP(out)->dscp = a->nw_tos.nw_tos; break; case OFPUTIL_OFPAT11_SET_TP_SRC: ofpact_put_SET_L4_SRC_PORT(out)->port = ntohs(a->tp_port.tp_port); break; case OFPUTIL_OFPAT11_SET_TP_DST: ofpact_put_SET_L4_DST_PORT(out)->port = ntohs(a->tp_port.tp_port); break; case OFPUTIL_OFPAT12_SET_FIELD: return nxm_reg_load_from_openflow12_set_field( (const struct ofp12_action_set_field *)a, out); case OFPUTIL_OFPAT11_SET_MPLS_TTL: { struct ofp11_action_mpls_ttl *oamt = (struct ofp11_action_mpls_ttl *)a; ofpact_put_SET_MPLS_TTL(out)->ttl = oamt->mpls_ttl; break; } case OFPUTIL_OFPAT11_DEC_MPLS_TTL: ofpact_put_DEC_MPLS_TTL(out); break; case OFPUTIL_OFPAT11_PUSH_MPLS: { struct ofp11_action_push *oap = (struct ofp11_action_push *)a; if (!eth_type_mpls(oap->ethertype)) { return OFPERR_OFPBAC_BAD_ARGUMENT; } ofpact_put_PUSH_MPLS(out)->ethertype = oap->ethertype; break; } case OFPUTIL_OFPAT11_POP_MPLS: { struct ofp11_action_pop_mpls *oapm = (struct ofp11_action_pop_mpls *)a; if (eth_type_mpls(oapm->ethertype)) { return OFPERR_OFPBAC_BAD_ARGUMENT; } ofpact_put_POP_MPLS(out)->ethertype = oapm->ethertype; break; } #define NXAST_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) case OFPUTIL_##ENUM: #include "ofp-util.def" return ofpact_from_nxast(a, code, out); } return error; } static enum ofperr ofpacts_from_openflow11(const union ofp_action *in, size_t n_in, struct ofpbuf *out) { return ofpacts_from_openflow(in, n_in, out, ofpact_from_openflow11); } /* OpenFlow 1.1 instructions. */ #define DEFINE_INST(ENUM, STRUCT, EXTENSIBLE, NAME) \ static inline const struct STRUCT * \ instruction_get_##ENUM(const struct ofp11_instruction *inst)\ { \ ovs_assert(inst->type == htons(ENUM)); \ return ALIGNED_CAST(struct STRUCT *, inst); \ } \ \ static inline void \ instruction_init_##ENUM(struct STRUCT *s) \ { \ memset(s, 0, sizeof *s); \ s->type = htons(ENUM); \ s->len = htons(sizeof *s); \ } \ \ static inline struct STRUCT * \ instruction_put_##ENUM(struct ofpbuf *buf) \ { \ struct STRUCT *s = ofpbuf_put_uninit(buf, sizeof *s); \ instruction_init_##ENUM(s); \ return s; \ } OVS_INSTRUCTIONS #undef DEFINE_INST struct instruction_type_info { enum ovs_instruction_type type; const char *name; }; static const struct instruction_type_info inst_info[] = { #define DEFINE_INST(ENUM, STRUCT, EXTENSIBLE, NAME) {OVSINST_##ENUM, NAME}, OVS_INSTRUCTIONS #undef DEFINE_INST }; const char * ovs_instruction_name_from_type(enum ovs_instruction_type type) { return inst_info[type].name; } int ovs_instruction_type_from_name(const char *name) { const struct instruction_type_info *p; for (p = inst_info; p < &inst_info[ARRAY_SIZE(inst_info)]; p++) { if (!strcasecmp(name, p->name)) { return p->type; } } return -1; } enum ovs_instruction_type ovs_instruction_type_from_ofpact_type(enum ofpact_type type) { switch (type) { case OFPACT_METER: return OVSINST_OFPIT13_METER; case OFPACT_CLEAR_ACTIONS: return OVSINST_OFPIT11_CLEAR_ACTIONS; case OFPACT_WRITE_METADATA: return OVSINST_OFPIT11_WRITE_METADATA; case OFPACT_GOTO_TABLE: return OVSINST_OFPIT11_GOTO_TABLE; case OFPACT_OUTPUT: case OFPACT_CONTROLLER: case OFPACT_ENQUEUE: case OFPACT_OUTPUT_REG: case OFPACT_BUNDLE: case OFPACT_SET_VLAN_VID: case OFPACT_SET_VLAN_PCP: case OFPACT_STRIP_VLAN: case OFPACT_PUSH_VLAN: case OFPACT_SET_ETH_SRC: case OFPACT_SET_ETH_DST: case OFPACT_SET_IPV4_SRC: case OFPACT_SET_IPV4_DST: case OFPACT_SET_IPV4_DSCP: case OFPACT_SET_L4_SRC_PORT: case OFPACT_SET_L4_DST_PORT: case OFPACT_REG_MOVE: case OFPACT_REG_LOAD: case OFPACT_STACK_PUSH: case OFPACT_STACK_POP: case OFPACT_DEC_TTL: case OFPACT_SET_MPLS_TTL: case OFPACT_DEC_MPLS_TTL: case OFPACT_PUSH_MPLS: case OFPACT_POP_MPLS: case OFPACT_SET_TUNNEL: case OFPACT_SET_QUEUE: case OFPACT_POP_QUEUE: case OFPACT_FIN_TIMEOUT: case OFPACT_RESUBMIT: case OFPACT_LEARN: case OFPACT_MULTIPATH: case OFPACT_NOTE: case OFPACT_EXIT: case OFPACT_SAMPLE: default: return OVSINST_OFPIT11_APPLY_ACTIONS; } } static inline struct ofp11_instruction * instruction_next(const struct ofp11_instruction *inst) { return ((struct ofp11_instruction *) (void *) ((uint8_t *) inst + ntohs(inst->len))); } static inline bool instruction_is_valid(const struct ofp11_instruction *inst, size_t n_instructions) { uint16_t len = ntohs(inst->len); return (!(len % OFP11_INSTRUCTION_ALIGN) && len >= sizeof *inst && len / sizeof *inst <= n_instructions); } /* This macro is careful to check for instructions with bad lengths. */ #define INSTRUCTION_FOR_EACH(ITER, LEFT, INSTRUCTIONS, N_INSTRUCTIONS) \ for ((ITER) = (INSTRUCTIONS), (LEFT) = (N_INSTRUCTIONS); \ (LEFT) > 0 && instruction_is_valid(ITER, LEFT); \ ((LEFT) -= (ntohs((ITER)->len) \ / sizeof(struct ofp11_instruction)), \ (ITER) = instruction_next(ITER))) static enum ofperr decode_openflow11_instruction(const struct ofp11_instruction *inst, enum ovs_instruction_type *type) { uint16_t len = ntohs(inst->len); switch (inst->type) { case CONSTANT_HTONS(OFPIT11_EXPERIMENTER): return OFPERR_OFPBIC_BAD_EXPERIMENTER; #define DEFINE_INST(ENUM, STRUCT, EXTENSIBLE, NAME) \ case CONSTANT_HTONS(ENUM): \ if (EXTENSIBLE \ ? len >= sizeof(struct STRUCT) \ : len == sizeof(struct STRUCT)) { \ *type = OVSINST_##ENUM; \ return 0; \ } else { \ return OFPERR_OFPBIC_BAD_LEN; \ } OVS_INSTRUCTIONS #undef DEFINE_INST default: return OFPERR_OFPBIC_UNKNOWN_INST; } } static enum ofperr decode_openflow11_instructions(const struct ofp11_instruction insts[], size_t n_insts, const struct ofp11_instruction *out[]) { const struct ofp11_instruction *inst; size_t left; memset(out, 0, N_OVS_INSTRUCTIONS * sizeof *out); INSTRUCTION_FOR_EACH (inst, left, insts, n_insts) { enum ovs_instruction_type type; enum ofperr error; error = decode_openflow11_instruction(inst, &type); if (error) { return error; } if (out[type]) { return OFPERR_ONFBIC_DUP_INSTRUCTION; } out[type] = inst; } if (left) { VLOG_WARN_RL(&rl, "bad instruction format at offset %zu", (n_insts - left) * sizeof *inst); return OFPERR_OFPBIC_BAD_LEN; } return 0; } static void get_actions_from_instruction(const struct ofp11_instruction *inst, const union ofp_action **actions, size_t *n_actions) { *actions = ALIGNED_CAST(const union ofp_action *, inst + 1); *n_actions = (ntohs(inst->len) - sizeof *inst) / OFP11_INSTRUCTION_ALIGN; } /* Attempts to convert 'actions_len' bytes of OpenFlow 1.1 actions from the * front of 'openflow' into ofpacts. On success, replaces any existing content * in 'ofpacts' by the converted ofpacts; on failure, clears 'ofpacts'. * Returns 0 if successful, otherwise an OpenFlow error. * * In most places in OpenFlow 1.1 and 1.2, actions appear encapsulated in * instructions, so you should call ofpacts_pull_openflow11_instructions() * instead of this function. * * The parsed actions are valid generically, but they may not be valid in a * specific context. For example, port numbers up to OFPP_MAX are valid * generically, but specific datapaths may only support port numbers in a * smaller range. Use ofpacts_check() to additional check whether actions are * valid in a specific context. */ enum ofperr ofpacts_pull_openflow11_actions(struct ofpbuf *openflow, unsigned int actions_len, struct ofpbuf *ofpacts) { return ofpacts_pull_actions(openflow, actions_len, ofpacts, ofpacts_from_openflow11); } enum ofperr ofpacts_pull_openflow11_instructions(struct ofpbuf *openflow, unsigned int instructions_len, struct ofpbuf *ofpacts) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); const struct ofp11_instruction *instructions; const struct ofp11_instruction *insts[N_OVS_INSTRUCTIONS]; enum ofperr error; ofpbuf_clear(ofpacts); if (instructions_len % OFP11_INSTRUCTION_ALIGN != 0) { VLOG_WARN_RL(&rl, "OpenFlow message instructions length %u is not a " "multiple of %d", instructions_len, OFP11_INSTRUCTION_ALIGN); error = OFPERR_OFPBIC_BAD_LEN; goto exit; } instructions = ofpbuf_try_pull(openflow, instructions_len); if (instructions == NULL) { VLOG_WARN_RL(&rl, "OpenFlow message instructions length %u exceeds " "remaining message length (%zu)", instructions_len, openflow->size); error = OFPERR_OFPBIC_BAD_LEN; goto exit; } error = decode_openflow11_instructions( instructions, instructions_len / OFP11_INSTRUCTION_ALIGN, insts); if (error) { goto exit; } if (insts[OVSINST_OFPIT13_METER]) { const struct ofp13_instruction_meter *oim; struct ofpact_meter *om; oim = ALIGNED_CAST(const struct ofp13_instruction_meter *, insts[OVSINST_OFPIT13_METER]); om = ofpact_put_METER(ofpacts); om->meter_id = ntohl(oim->meter_id); } if (insts[OVSINST_OFPIT11_APPLY_ACTIONS]) { const union ofp_action *actions; size_t n_actions; get_actions_from_instruction(insts[OVSINST_OFPIT11_APPLY_ACTIONS], &actions, &n_actions); error = ofpacts_from_openflow11(actions, n_actions, ofpacts); if (error) { goto exit; } } if (insts[OVSINST_OFPIT11_CLEAR_ACTIONS]) { instruction_get_OFPIT11_CLEAR_ACTIONS( insts[OVSINST_OFPIT11_CLEAR_ACTIONS]); ofpact_put_CLEAR_ACTIONS(ofpacts); } /* XXX Write-Actions */ if (insts[OVSINST_OFPIT11_WRITE_METADATA]) { const struct ofp11_instruction_write_metadata *oiwm; struct ofpact_metadata *om; oiwm = ALIGNED_CAST(const struct ofp11_instruction_write_metadata *, insts[OVSINST_OFPIT11_WRITE_METADATA]); om = ofpact_put_WRITE_METADATA(ofpacts); om->metadata = oiwm->metadata; om->mask = oiwm->metadata_mask; } if (insts[OVSINST_OFPIT11_GOTO_TABLE]) { const struct ofp11_instruction_goto_table *oigt; struct ofpact_goto_table *ogt; oigt = instruction_get_OFPIT11_GOTO_TABLE( insts[OVSINST_OFPIT11_GOTO_TABLE]); ogt = ofpact_put_GOTO_TABLE(ofpacts); ogt->table_id = oigt->table_id; } if (insts[OVSINST_OFPIT11_WRITE_ACTIONS]) { error = OFPERR_OFPBIC_UNSUP_INST; goto exit; } error = ofpacts_verify(ofpacts->data, ofpacts->size); exit: if (error) { ofpbuf_clear(ofpacts); } return error; } /* May modify flow->dl_type, caller must restore it. */ static enum ofperr ofpact_check__(const struct ofpact *a, struct flow *flow, ofp_port_t max_ports, uint8_t table_id) { const struct ofpact_enqueue *enqueue; switch (a->type) { case OFPACT_OUTPUT: return ofputil_check_output_port(ofpact_get_OUTPUT(a)->port, max_ports); case OFPACT_CONTROLLER: return 0; case OFPACT_ENQUEUE: enqueue = ofpact_get_ENQUEUE(a); if (ofp_to_u16(enqueue->port) >= ofp_to_u16(max_ports) && enqueue->port != OFPP_IN_PORT && enqueue->port != OFPP_LOCAL) { return OFPERR_OFPBAC_BAD_OUT_PORT; } return 0; case OFPACT_OUTPUT_REG: return mf_check_src(&ofpact_get_OUTPUT_REG(a)->src, flow); case OFPACT_BUNDLE: return bundle_check(ofpact_get_BUNDLE(a), max_ports, flow); case OFPACT_SET_VLAN_VID: case OFPACT_SET_VLAN_PCP: case OFPACT_STRIP_VLAN: case OFPACT_PUSH_VLAN: case OFPACT_SET_ETH_SRC: case OFPACT_SET_ETH_DST: case OFPACT_SET_IPV4_SRC: case OFPACT_SET_IPV4_DST: case OFPACT_SET_IPV4_DSCP: case OFPACT_SET_L4_SRC_PORT: case OFPACT_SET_L4_DST_PORT: return 0; case OFPACT_REG_MOVE: return nxm_reg_move_check(ofpact_get_REG_MOVE(a), flow); case OFPACT_REG_LOAD: return nxm_reg_load_check(ofpact_get_REG_LOAD(a), flow); case OFPACT_STACK_PUSH: return nxm_stack_push_check(ofpact_get_STACK_PUSH(a), flow); case OFPACT_STACK_POP: return nxm_stack_pop_check(ofpact_get_STACK_POP(a), flow); case OFPACT_DEC_TTL: case OFPACT_SET_MPLS_TTL: case OFPACT_DEC_MPLS_TTL: case OFPACT_SET_TUNNEL: case OFPACT_SET_QUEUE: case OFPACT_POP_QUEUE: case OFPACT_FIN_TIMEOUT: case OFPACT_RESUBMIT: return 0; case OFPACT_LEARN: return learn_check(ofpact_get_LEARN(a), flow); case OFPACT_MULTIPATH: return multipath_check(ofpact_get_MULTIPATH(a), flow); case OFPACT_NOTE: case OFPACT_EXIT: return 0; case OFPACT_PUSH_MPLS: flow->dl_type = ofpact_get_PUSH_MPLS(a)->ethertype; return 0; case OFPACT_POP_MPLS: flow->dl_type = ofpact_get_POP_MPLS(a)->ethertype; return 0; case OFPACT_SAMPLE: return 0; case OFPACT_CLEAR_ACTIONS: case OFPACT_WRITE_METADATA: return 0; case OFPACT_METER: { uint32_t mid = ofpact_get_METER(a)->meter_id; if (mid == 0 || mid > OFPM13_MAX) { return OFPERR_OFPMMFC_INVALID_METER; } return 0; } case OFPACT_GOTO_TABLE: if (ofpact_get_GOTO_TABLE(a)->table_id <= table_id) { return OFPERR_OFPBRC_BAD_TABLE_ID; } return 0; default: NOT_REACHED(); } } /* Checks that the 'ofpacts_len' bytes of actions in 'ofpacts' are * appropriate for a packet with the prerequisites satisfied by 'flow' in a * switch with no more than 'max_ports' ports. * * May temporarily modify 'flow', but restores the changes before returning. */ enum ofperr ofpacts_check(const struct ofpact ofpacts[], size_t ofpacts_len, struct flow *flow, ofp_port_t max_ports, uint8_t table_id) { const struct ofpact *a; ovs_be16 dl_type = flow->dl_type; enum ofperr error = 0; OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { error = ofpact_check__(a, flow, max_ports, table_id); if (error) { break; } } flow->dl_type = dl_type; /* Restore. */ return error; } /* Verifies that the 'ofpacts_len' bytes of actions in 'ofpacts' are * in the appropriate order as defined by the OpenFlow spec. */ enum ofperr ofpacts_verify(const struct ofpact ofpacts[], size_t ofpacts_len) { const struct ofpact *a; enum ovs_instruction_type inst; inst = OVSINST_OFPIT11_APPLY_ACTIONS; OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { enum ovs_instruction_type next; next = ovs_instruction_type_from_ofpact_type(a->type); if (inst != OVSINST_OFPIT11_APPLY_ACTIONS && next <= inst) { const char *name = ovs_instruction_name_from_type(inst); const char *next_name = ovs_instruction_name_from_type(next); if (next == inst) { VLOG_WARN("duplicate %s instruction not allowed, for OpenFlow " "1.1+ compatibility", name); } else { VLOG_WARN("invalid instruction ordering: %s must appear " "before %s, for OpenFlow 1.1+ compatibility", next_name, name); } return OFPERR_OFPBAC_UNSUPPORTED_ORDER; } inst = next; } return 0; } /* Converting ofpacts to Nicira OpenFlow extensions. */ static void ofpact_output_reg_to_nxast(const struct ofpact_output_reg *output_reg, struct ofpbuf *out) { struct nx_action_output_reg *naor = ofputil_put_NXAST_OUTPUT_REG(out); naor->ofs_nbits = nxm_encode_ofs_nbits(output_reg->src.ofs, output_reg->src.n_bits); naor->src = htonl(output_reg->src.field->nxm_header); naor->max_len = htons(output_reg->max_len); } static void ofpact_resubmit_to_nxast(const struct ofpact_resubmit *resubmit, struct ofpbuf *out) { struct nx_action_resubmit *nar; if (resubmit->table_id == 0xff && resubmit->ofpact.compat != OFPUTIL_NXAST_RESUBMIT_TABLE) { nar = ofputil_put_NXAST_RESUBMIT(out); } else { nar = ofputil_put_NXAST_RESUBMIT_TABLE(out); nar->table = resubmit->table_id; } nar->in_port = htons(ofp_to_u16(resubmit->in_port)); } static void ofpact_set_tunnel_to_nxast(const struct ofpact_tunnel *tunnel, struct ofpbuf *out) { uint64_t tun_id = tunnel->tun_id; if (tun_id <= UINT32_MAX && tunnel->ofpact.compat != OFPUTIL_NXAST_SET_TUNNEL64) { ofputil_put_NXAST_SET_TUNNEL(out)->tun_id = htonl(tun_id); } else { ofputil_put_NXAST_SET_TUNNEL64(out)->tun_id = htonll(tun_id); } } static void ofpact_write_metadata_to_nxast(const struct ofpact_metadata *om, struct ofpbuf *out) { struct nx_action_write_metadata *nawm; nawm = ofputil_put_NXAST_WRITE_METADATA(out); nawm->metadata = om->metadata; nawm->mask = om->mask; } static void ofpact_note_to_nxast(const struct ofpact_note *note, struct ofpbuf *out) { size_t start_ofs = out->size; struct nx_action_note *nan; unsigned int remainder; unsigned int len; nan = ofputil_put_NXAST_NOTE(out); out->size -= sizeof nan->note; ofpbuf_put(out, note->data, note->length); len = out->size - start_ofs; remainder = len % OFP_ACTION_ALIGN; if (remainder) { ofpbuf_put_zeros(out, OFP_ACTION_ALIGN - remainder); } nan = ofpbuf_at(out, start_ofs, sizeof *nan); nan->len = htons(out->size - start_ofs); } static void ofpact_controller_to_nxast(const struct ofpact_controller *oc, struct ofpbuf *out) { struct nx_action_controller *nac; nac = ofputil_put_NXAST_CONTROLLER(out); nac->max_len = htons(oc->max_len); nac->controller_id = htons(oc->controller_id); nac->reason = oc->reason; } static void ofpact_dec_ttl_to_nxast(const struct ofpact_cnt_ids *oc_ids, struct ofpbuf *out) { if (oc_ids->ofpact.compat == OFPUTIL_NXAST_DEC_TTL) { ofputil_put_NXAST_DEC_TTL(out); } else { struct nx_action_cnt_ids *nac_ids = ofputil_put_NXAST_DEC_TTL_CNT_IDS(out); int ids_len = ROUND_UP(2 * oc_ids->n_controllers, OFP_ACTION_ALIGN); ovs_be16 *ids; size_t i; nac_ids->len = htons(ntohs(nac_ids->len) + ids_len); nac_ids->n_controllers = htons(oc_ids->n_controllers); ids = ofpbuf_put_zeros(out, ids_len); for (i = 0; i < oc_ids->n_controllers; i++) { ids[i] = htons(oc_ids->cnt_ids[i]); } } } static void ofpact_fin_timeout_to_nxast(const struct ofpact_fin_timeout *fin_timeout, struct ofpbuf *out) { struct nx_action_fin_timeout *naft = ofputil_put_NXAST_FIN_TIMEOUT(out); naft->fin_idle_timeout = htons(fin_timeout->fin_idle_timeout); naft->fin_hard_timeout = htons(fin_timeout->fin_hard_timeout); } static void ofpact_sample_to_nxast(const struct ofpact_sample *os, struct ofpbuf *out) { struct nx_action_sample *nas; nas = ofputil_put_NXAST_SAMPLE(out); nas->probability = htons(os->probability); nas->collector_set_id = htonl(os->collector_set_id); nas->obs_domain_id = htonl(os->obs_domain_id); nas->obs_point_id = htonl(os->obs_point_id); } static void ofpact_to_nxast(const struct ofpact *a, struct ofpbuf *out) { switch (a->type) { case OFPACT_CONTROLLER: ofpact_controller_to_nxast(ofpact_get_CONTROLLER(a), out); break; case OFPACT_OUTPUT_REG: ofpact_output_reg_to_nxast(ofpact_get_OUTPUT_REG(a), out); break; case OFPACT_BUNDLE: bundle_to_nxast(ofpact_get_BUNDLE(a), out); break; case OFPACT_REG_MOVE: nxm_reg_move_to_nxast(ofpact_get_REG_MOVE(a), out); break; case OFPACT_REG_LOAD: nxm_reg_load_to_nxast(ofpact_get_REG_LOAD(a), out); break; case OFPACT_STACK_PUSH: nxm_stack_push_to_nxast(ofpact_get_STACK_PUSH(a), out); break; case OFPACT_STACK_POP: nxm_stack_pop_to_nxast(ofpact_get_STACK_POP(a), out); break; case OFPACT_DEC_TTL: ofpact_dec_ttl_to_nxast(ofpact_get_DEC_TTL(a), out); break; case OFPACT_SET_MPLS_TTL: ofputil_put_NXAST_SET_MPLS_TTL(out)->ttl = ofpact_get_SET_MPLS_TTL(a)->ttl; break; case OFPACT_DEC_MPLS_TTL: ofputil_put_NXAST_DEC_MPLS_TTL(out); break; case OFPACT_SET_TUNNEL: ofpact_set_tunnel_to_nxast(ofpact_get_SET_TUNNEL(a), out); break; case OFPACT_WRITE_METADATA: ofpact_write_metadata_to_nxast(ofpact_get_WRITE_METADATA(a), out); break; case OFPACT_SET_QUEUE: ofputil_put_NXAST_SET_QUEUE(out)->queue_id = htonl(ofpact_get_SET_QUEUE(a)->queue_id); break; case OFPACT_POP_QUEUE: ofputil_put_NXAST_POP_QUEUE(out); break; case OFPACT_FIN_TIMEOUT: ofpact_fin_timeout_to_nxast(ofpact_get_FIN_TIMEOUT(a), out); break; case OFPACT_RESUBMIT: ofpact_resubmit_to_nxast(ofpact_get_RESUBMIT(a), out); break; case OFPACT_LEARN: learn_to_nxast(ofpact_get_LEARN(a), out); break; case OFPACT_MULTIPATH: multipath_to_nxast(ofpact_get_MULTIPATH(a), out); break; case OFPACT_NOTE: ofpact_note_to_nxast(ofpact_get_NOTE(a), out); break; case OFPACT_EXIT: ofputil_put_NXAST_EXIT(out); break; case OFPACT_PUSH_MPLS: ofputil_put_NXAST_PUSH_MPLS(out)->ethertype = ofpact_get_PUSH_MPLS(a)->ethertype; break; case OFPACT_POP_MPLS: ofputil_put_NXAST_POP_MPLS(out)->ethertype = ofpact_get_POP_MPLS(a)->ethertype; break; case OFPACT_SAMPLE: ofpact_sample_to_nxast(ofpact_get_SAMPLE(a), out); break; case OFPACT_OUTPUT: case OFPACT_ENQUEUE: case OFPACT_SET_VLAN_VID: case OFPACT_SET_VLAN_PCP: case OFPACT_STRIP_VLAN: case OFPACT_PUSH_VLAN: case OFPACT_SET_ETH_SRC: case OFPACT_SET_ETH_DST: case OFPACT_SET_IPV4_SRC: case OFPACT_SET_IPV4_DST: case OFPACT_SET_IPV4_DSCP: case OFPACT_SET_L4_SRC_PORT: case OFPACT_SET_L4_DST_PORT: case OFPACT_CLEAR_ACTIONS: case OFPACT_GOTO_TABLE: case OFPACT_METER: NOT_REACHED(); } } /* Converting ofpacts to OpenFlow 1.0. */ static void ofpact_output_to_openflow10(const struct ofpact_output *output, struct ofpbuf *out) { struct ofp10_action_output *oao; oao = ofputil_put_OFPAT10_OUTPUT(out); oao->port = htons(ofp_to_u16(output->port)); oao->max_len = htons(output->max_len); } static void ofpact_enqueue_to_openflow10(const struct ofpact_enqueue *enqueue, struct ofpbuf *out) { struct ofp10_action_enqueue *oae; oae = ofputil_put_OFPAT10_ENQUEUE(out); oae->port = htons(ofp_to_u16(enqueue->port)); oae->queue_id = htonl(enqueue->queue); } static void ofpact_to_openflow10(const struct ofpact *a, struct ofpbuf *out) { switch (a->type) { case OFPACT_OUTPUT: ofpact_output_to_openflow10(ofpact_get_OUTPUT(a), out); break; case OFPACT_ENQUEUE: ofpact_enqueue_to_openflow10(ofpact_get_ENQUEUE(a), out); break; case OFPACT_SET_VLAN_VID: ofputil_put_OFPAT10_SET_VLAN_VID(out)->vlan_vid = htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid); break; case OFPACT_SET_VLAN_PCP: ofputil_put_OFPAT10_SET_VLAN_PCP(out)->vlan_pcp = ofpact_get_SET_VLAN_PCP(a)->vlan_pcp; break; case OFPACT_STRIP_VLAN: ofputil_put_OFPAT10_STRIP_VLAN(out); break; case OFPACT_SET_ETH_SRC: memcpy(ofputil_put_OFPAT10_SET_DL_SRC(out)->dl_addr, ofpact_get_SET_ETH_SRC(a)->mac, ETH_ADDR_LEN); break; case OFPACT_SET_ETH_DST: memcpy(ofputil_put_OFPAT10_SET_DL_DST(out)->dl_addr, ofpact_get_SET_ETH_DST(a)->mac, ETH_ADDR_LEN); break; case OFPACT_SET_IPV4_SRC: ofputil_put_OFPAT10_SET_NW_SRC(out)->nw_addr = ofpact_get_SET_IPV4_SRC(a)->ipv4; break; case OFPACT_SET_IPV4_DST: ofputil_put_OFPAT10_SET_NW_DST(out)->nw_addr = ofpact_get_SET_IPV4_DST(a)->ipv4; break; case OFPACT_SET_IPV4_DSCP: ofputil_put_OFPAT10_SET_NW_TOS(out)->nw_tos = ofpact_get_SET_IPV4_DSCP(a)->dscp; break; case OFPACT_SET_L4_SRC_PORT: ofputil_put_OFPAT10_SET_TP_SRC(out)->tp_port = htons(ofpact_get_SET_L4_SRC_PORT(a)->port); break; case OFPACT_SET_L4_DST_PORT: ofputil_put_OFPAT10_SET_TP_DST(out)->tp_port = htons(ofpact_get_SET_L4_DST_PORT(a)->port); break; case OFPACT_PUSH_VLAN: case OFPACT_CLEAR_ACTIONS: case OFPACT_GOTO_TABLE: case OFPACT_METER: /* XXX */ break; case OFPACT_CONTROLLER: case OFPACT_OUTPUT_REG: case OFPACT_BUNDLE: case OFPACT_REG_MOVE: case OFPACT_REG_LOAD: case OFPACT_STACK_PUSH: case OFPACT_STACK_POP: case OFPACT_DEC_TTL: case OFPACT_SET_MPLS_TTL: case OFPACT_DEC_MPLS_TTL: case OFPACT_SET_TUNNEL: case OFPACT_WRITE_METADATA: case OFPACT_SET_QUEUE: case OFPACT_POP_QUEUE: case OFPACT_FIN_TIMEOUT: case OFPACT_RESUBMIT: case OFPACT_LEARN: case OFPACT_MULTIPATH: case OFPACT_NOTE: case OFPACT_EXIT: case OFPACT_PUSH_MPLS: case OFPACT_POP_MPLS: case OFPACT_SAMPLE: ofpact_to_nxast(a, out); break; } } /* Converts the 'ofpacts_len' bytes of ofpacts in 'ofpacts' into OpenFlow 1.0 * actions in 'openflow', appending the actions to any existing data in * 'openflow'. */ void ofpacts_put_openflow10(const struct ofpact ofpacts[], size_t ofpacts_len, struct ofpbuf *openflow) { const struct ofpact *a; OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { ofpact_to_openflow10(a, openflow); } } /* Converting ofpacts to OpenFlow 1.1. */ static void ofpact_output_to_openflow11(const struct ofpact_output *output, struct ofpbuf *out) { struct ofp11_action_output *oao; oao = ofputil_put_OFPAT11_OUTPUT(out); oao->port = ofputil_port_to_ofp11(output->port); oao->max_len = htons(output->max_len); } static void ofpact_dec_ttl_to_openflow11(const struct ofpact_cnt_ids *dec_ttl, struct ofpbuf *out) { if (dec_ttl->n_controllers == 1 && dec_ttl->cnt_ids[0] == 0 && (!dec_ttl->ofpact.compat || dec_ttl->ofpact.compat == OFPUTIL_OFPAT11_DEC_NW_TTL)) { ofputil_put_OFPAT11_DEC_NW_TTL(out); } else { ofpact_dec_ttl_to_nxast(dec_ttl, out); } } static void ofpact_to_openflow11(const struct ofpact *a, struct ofpbuf *out) { switch (a->type) { case OFPACT_OUTPUT: return ofpact_output_to_openflow11(ofpact_get_OUTPUT(a), out); case OFPACT_ENQUEUE: /* XXX */ break; case OFPACT_SET_VLAN_VID: ofputil_put_OFPAT11_SET_VLAN_VID(out)->vlan_vid = htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid); break; case OFPACT_SET_VLAN_PCP: ofputil_put_OFPAT11_SET_VLAN_PCP(out)->vlan_pcp = ofpact_get_SET_VLAN_PCP(a)->vlan_pcp; break; case OFPACT_STRIP_VLAN: ofputil_put_OFPAT11_POP_VLAN(out); break; case OFPACT_PUSH_VLAN: /* XXX ETH_TYPE_VLAN_8021AD case */ ofputil_put_OFPAT11_PUSH_VLAN(out)->ethertype = htons(ETH_TYPE_VLAN_8021Q); break; case OFPACT_SET_QUEUE: ofputil_put_OFPAT11_SET_QUEUE(out)->queue_id = htonl(ofpact_get_SET_QUEUE(a)->queue_id); break; case OFPACT_SET_ETH_SRC: memcpy(ofputil_put_OFPAT11_SET_DL_SRC(out)->dl_addr, ofpact_get_SET_ETH_SRC(a)->mac, ETH_ADDR_LEN); break; case OFPACT_SET_ETH_DST: memcpy(ofputil_put_OFPAT11_SET_DL_DST(out)->dl_addr, ofpact_get_SET_ETH_DST(a)->mac, ETH_ADDR_LEN); break; case OFPACT_SET_IPV4_SRC: ofputil_put_OFPAT11_SET_NW_SRC(out)->nw_addr = ofpact_get_SET_IPV4_SRC(a)->ipv4; break; case OFPACT_SET_IPV4_DST: ofputil_put_OFPAT11_SET_NW_DST(out)->nw_addr = ofpact_get_SET_IPV4_DST(a)->ipv4; break; case OFPACT_SET_IPV4_DSCP: ofputil_put_OFPAT11_SET_NW_TOS(out)->nw_tos = ofpact_get_SET_IPV4_DSCP(a)->dscp; break; case OFPACT_SET_L4_SRC_PORT: ofputil_put_OFPAT11_SET_TP_SRC(out)->tp_port = htons(ofpact_get_SET_L4_SRC_PORT(a)->port); break; case OFPACT_SET_L4_DST_PORT: ofputil_put_OFPAT11_SET_TP_DST(out)->tp_port = htons(ofpact_get_SET_L4_DST_PORT(a)->port); break; case OFPACT_DEC_TTL: ofpact_dec_ttl_to_openflow11(ofpact_get_DEC_TTL(a), out); break; case OFPACT_SET_MPLS_TTL: ofputil_put_OFPAT11_SET_MPLS_TTL(out)->mpls_ttl = ofpact_get_SET_MPLS_TTL(a)->ttl; break; case OFPACT_DEC_MPLS_TTL: ofputil_put_OFPAT11_DEC_MPLS_TTL(out); break; case OFPACT_WRITE_METADATA: /* OpenFlow 1.1 uses OFPIT_WRITE_METADATA to express this action. */ break; case OFPACT_PUSH_MPLS: ofputil_put_OFPAT11_PUSH_MPLS(out)->ethertype = ofpact_get_PUSH_MPLS(a)->ethertype; break; case OFPACT_POP_MPLS: ofputil_put_OFPAT11_POP_MPLS(out)->ethertype = ofpact_get_POP_MPLS(a)->ethertype; break; case OFPACT_CLEAR_ACTIONS: case OFPACT_GOTO_TABLE: case OFPACT_METER: NOT_REACHED(); case OFPACT_CONTROLLER: case OFPACT_OUTPUT_REG: case OFPACT_BUNDLE: case OFPACT_REG_MOVE: case OFPACT_REG_LOAD: case OFPACT_STACK_PUSH: case OFPACT_STACK_POP: case OFPACT_SET_TUNNEL: case OFPACT_POP_QUEUE: case OFPACT_FIN_TIMEOUT: case OFPACT_RESUBMIT: case OFPACT_LEARN: case OFPACT_MULTIPATH: case OFPACT_NOTE: case OFPACT_EXIT: case OFPACT_SAMPLE: ofpact_to_nxast(a, out); break; } } /* Converts the ofpacts in 'ofpacts' (terminated by OFPACT_END) into OpenFlow * 1.1 actions in 'openflow', appending the actions to any existing data in * 'openflow'. */ size_t ofpacts_put_openflow11_actions(const struct ofpact ofpacts[], size_t ofpacts_len, struct ofpbuf *openflow) { const struct ofpact *a; size_t start_size = openflow->size; OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { ofpact_to_openflow11(a, openflow); } return openflow->size - start_size; } static void ofpacts_update_instruction_actions(struct ofpbuf *openflow, size_t ofs) { struct ofp11_instruction_actions *oia; /* Update the instruction's length (or, if it's empty, delete it). */ oia = ofpbuf_at_assert(openflow, ofs, sizeof *oia); if (openflow->size > ofs + sizeof *oia) { oia->len = htons(openflow->size - ofs); } else { openflow->size = ofs; } } void ofpacts_put_openflow11_instructions(const struct ofpact ofpacts[], size_t ofpacts_len, struct ofpbuf *openflow) { const struct ofpact *a; OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { switch (ovs_instruction_type_from_ofpact_type(a->type)) { case OVSINST_OFPIT11_CLEAR_ACTIONS: instruction_put_OFPIT11_CLEAR_ACTIONS(openflow); break; case OVSINST_OFPIT11_GOTO_TABLE: { struct ofp11_instruction_goto_table *oigt; oigt = instruction_put_OFPIT11_GOTO_TABLE(openflow); oigt->table_id = ofpact_get_GOTO_TABLE(a)->table_id; memset(oigt->pad, 0, sizeof oigt->pad); break; } case OVSINST_OFPIT11_WRITE_METADATA: { const struct ofpact_metadata *om; struct ofp11_instruction_write_metadata *oiwm; om = ofpact_get_WRITE_METADATA(a); oiwm = instruction_put_OFPIT11_WRITE_METADATA(openflow); oiwm->metadata = om->metadata; oiwm->metadata_mask = om->mask; break; } case OVSINST_OFPIT13_METER: { const struct ofpact_meter *om; struct ofp13_instruction_meter *oim; om = ofpact_get_METER(a); oim = instruction_put_OFPIT13_METER(openflow); oim->meter_id = htonl(om->meter_id); break; } case OVSINST_OFPIT11_APPLY_ACTIONS: { const size_t ofs = openflow->size; const size_t ofpacts_len_left = (uint8_t*)ofpact_end(ofpacts, ofpacts_len) - (uint8_t*)a; const struct ofpact *action; const struct ofpact *processed = a; instruction_put_OFPIT11_APPLY_ACTIONS(openflow); OFPACT_FOR_EACH(action, a, ofpacts_len_left) { if (ovs_instruction_type_from_ofpact_type(action->type) != OVSINST_OFPIT11_APPLY_ACTIONS) { break; } ofpact_to_openflow11(action, openflow); processed = action; } ofpacts_update_instruction_actions(openflow, ofs); a = processed; break; } case OVSINST_OFPIT11_WRITE_ACTIONS: NOT_REACHED(); } } } /* Returns true if 'action' outputs to 'port', false otherwise. */ static bool ofpact_outputs_to_port(const struct ofpact *ofpact, ofp_port_t port) { switch (ofpact->type) { case OFPACT_OUTPUT: return ofpact_get_OUTPUT(ofpact)->port == port; case OFPACT_ENQUEUE: return ofpact_get_ENQUEUE(ofpact)->port == port; case OFPACT_CONTROLLER: return port == OFPP_CONTROLLER; case OFPACT_OUTPUT_REG: case OFPACT_BUNDLE: case OFPACT_SET_VLAN_VID: case OFPACT_SET_VLAN_PCP: case OFPACT_STRIP_VLAN: case OFPACT_PUSH_VLAN: case OFPACT_SET_ETH_SRC: case OFPACT_SET_ETH_DST: case OFPACT_SET_IPV4_SRC: case OFPACT_SET_IPV4_DST: case OFPACT_SET_IPV4_DSCP: case OFPACT_SET_L4_SRC_PORT: case OFPACT_SET_L4_DST_PORT: case OFPACT_REG_MOVE: case OFPACT_REG_LOAD: case OFPACT_STACK_PUSH: case OFPACT_STACK_POP: case OFPACT_DEC_TTL: case OFPACT_SET_MPLS_TTL: case OFPACT_DEC_MPLS_TTL: case OFPACT_SET_TUNNEL: case OFPACT_WRITE_METADATA: case OFPACT_SET_QUEUE: case OFPACT_POP_QUEUE: case OFPACT_FIN_TIMEOUT: case OFPACT_RESUBMIT: case OFPACT_LEARN: case OFPACT_MULTIPATH: case OFPACT_NOTE: case OFPACT_EXIT: case OFPACT_PUSH_MPLS: case OFPACT_POP_MPLS: case OFPACT_SAMPLE: case OFPACT_CLEAR_ACTIONS: case OFPACT_GOTO_TABLE: case OFPACT_METER: default: return false; } } /* Returns true if any action in the 'ofpacts_len' bytes of 'ofpacts' outputs * to 'port', false otherwise. */ bool ofpacts_output_to_port(const struct ofpact *ofpacts, size_t ofpacts_len, ofp_port_t port) { const struct ofpact *a; OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { if (ofpact_outputs_to_port(a, port)) { return true; } } return false; } bool ofpacts_equal(const struct ofpact *a, size_t a_len, const struct ofpact *b, size_t b_len) { return a_len == b_len && !memcmp(a, b, a_len); } /* Finds the OFPACT_METER action, if any, in the 'ofpacts_len' bytes of * 'ofpacts'. If found, returns its meter ID; if not, returns 0. * * This function relies on the order of 'ofpacts' being correct (as checked by * ofpacts_verify()). */ uint32_t ofpacts_get_meter(const struct ofpact ofpacts[], size_t ofpacts_len) { const struct ofpact *a; OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { enum ovs_instruction_type inst; inst = ovs_instruction_type_from_ofpact_type(a->type); if (a->type == OFPACT_METER) { return ofpact_get_METER(a)->meter_id; } else if (inst > OVSINST_OFPIT13_METER) { break; } } return 0; } /* Formatting ofpacts. */ static void print_note(const struct ofpact_note *note, struct ds *string) { size_t i; ds_put_cstr(string, "note:"); for (i = 0; i < note->length; i++) { if (i) { ds_put_char(string, '.'); } ds_put_format(string, "%02"PRIx8, note->data[i]); } } static void print_dec_ttl(const struct ofpact_cnt_ids *ids, struct ds *s) { size_t i; ds_put_cstr(s, "dec_ttl"); if (ids->ofpact.compat == OFPUTIL_NXAST_DEC_TTL_CNT_IDS) { ds_put_cstr(s, "("); for (i = 0; i < ids->n_controllers; i++) { if (i) { ds_put_cstr(s, ","); } ds_put_format(s, "%"PRIu16, ids->cnt_ids[i]); } ds_put_cstr(s, ")"); } } static void print_fin_timeout(const struct ofpact_fin_timeout *fin_timeout, struct ds *s) { ds_put_cstr(s, "fin_timeout("); if (fin_timeout->fin_idle_timeout) { ds_put_format(s, "idle_timeout=%"PRIu16",", fin_timeout->fin_idle_timeout); } if (fin_timeout->fin_hard_timeout) { ds_put_format(s, "hard_timeout=%"PRIu16",", fin_timeout->fin_hard_timeout); } ds_chomp(s, ','); ds_put_char(s, ')'); } static void ofpact_format(const struct ofpact *a, struct ds *s) { const struct ofpact_enqueue *enqueue; const struct ofpact_resubmit *resubmit; const struct ofpact_controller *controller; const struct ofpact_metadata *metadata; const struct ofpact_tunnel *tunnel; const struct ofpact_sample *sample; ofp_port_t port; switch (a->type) { case OFPACT_OUTPUT: port = ofpact_get_OUTPUT(a)->port; if (ofp_to_u16(port) < ofp_to_u16(OFPP_MAX)) { ds_put_format(s, "output:%"PRIu16, port); } else { ofputil_format_port(port, s); if (port == OFPP_CONTROLLER) { ds_put_format(s, ":%"PRIu16, ofpact_get_OUTPUT(a)->max_len); } } break; case OFPACT_CONTROLLER: controller = ofpact_get_CONTROLLER(a); if (controller->reason == OFPR_ACTION && controller->controller_id == 0) { ds_put_format(s, "CONTROLLER:%"PRIu16, ofpact_get_CONTROLLER(a)->max_len); } else { enum ofp_packet_in_reason reason = controller->reason; ds_put_cstr(s, "controller("); if (reason != OFPR_ACTION) { char reasonbuf[OFPUTIL_PACKET_IN_REASON_BUFSIZE]; ds_put_format(s, "reason=%s,", ofputil_packet_in_reason_to_string( reason, reasonbuf, sizeof reasonbuf)); } if (controller->max_len != UINT16_MAX) { ds_put_format(s, "max_len=%"PRIu16",", controller->max_len); } if (controller->controller_id != 0) { ds_put_format(s, "id=%"PRIu16",", controller->controller_id); } ds_chomp(s, ','); ds_put_char(s, ')'); } break; case OFPACT_ENQUEUE: enqueue = ofpact_get_ENQUEUE(a); ds_put_format(s, "enqueue:"); ofputil_format_port(enqueue->port, s); ds_put_format(s, "q%"PRIu32, enqueue->queue); break; case OFPACT_OUTPUT_REG: ds_put_cstr(s, "output:"); mf_format_subfield(&ofpact_get_OUTPUT_REG(a)->src, s); break; case OFPACT_BUNDLE: bundle_format(ofpact_get_BUNDLE(a), s); break; case OFPACT_SET_VLAN_VID: ds_put_format(s, "mod_vlan_vid:%"PRIu16, ofpact_get_SET_VLAN_VID(a)->vlan_vid); break; case OFPACT_SET_VLAN_PCP: ds_put_format(s, "mod_vlan_pcp:%"PRIu8, ofpact_get_SET_VLAN_PCP(a)->vlan_pcp); break; case OFPACT_STRIP_VLAN: ds_put_cstr(s, "strip_vlan"); break; case OFPACT_PUSH_VLAN: /* XXX 802.1AD case*/ ds_put_format(s, "push_vlan:%#"PRIx16, ETH_TYPE_VLAN_8021Q); break; case OFPACT_SET_ETH_SRC: ds_put_format(s, "mod_dl_src:"ETH_ADDR_FMT, ETH_ADDR_ARGS(ofpact_get_SET_ETH_SRC(a)->mac)); break; case OFPACT_SET_ETH_DST: ds_put_format(s, "mod_dl_dst:"ETH_ADDR_FMT, ETH_ADDR_ARGS(ofpact_get_SET_ETH_DST(a)->mac)); break; case OFPACT_SET_IPV4_SRC: ds_put_format(s, "mod_nw_src:"IP_FMT, IP_ARGS(ofpact_get_SET_IPV4_SRC(a)->ipv4)); break; case OFPACT_SET_IPV4_DST: ds_put_format(s, "mod_nw_dst:"IP_FMT, IP_ARGS(ofpact_get_SET_IPV4_DST(a)->ipv4)); break; case OFPACT_SET_IPV4_DSCP: ds_put_format(s, "mod_nw_tos:%d", ofpact_get_SET_IPV4_DSCP(a)->dscp); break; case OFPACT_SET_L4_SRC_PORT: ds_put_format(s, "mod_tp_src:%d", ofpact_get_SET_L4_SRC_PORT(a)->port); break; case OFPACT_SET_L4_DST_PORT: ds_put_format(s, "mod_tp_dst:%d", ofpact_get_SET_L4_DST_PORT(a)->port); break; case OFPACT_REG_MOVE: nxm_format_reg_move(ofpact_get_REG_MOVE(a), s); break; case OFPACT_REG_LOAD: nxm_format_reg_load(ofpact_get_REG_LOAD(a), s); break; case OFPACT_STACK_PUSH: nxm_format_stack_push(ofpact_get_STACK_PUSH(a), s); break; case OFPACT_STACK_POP: nxm_format_stack_pop(ofpact_get_STACK_POP(a), s); break; case OFPACT_DEC_TTL: print_dec_ttl(ofpact_get_DEC_TTL(a), s); break; case OFPACT_SET_MPLS_TTL: ds_put_format(s, "set_mpls_ttl(%"PRIu8")", ofpact_get_SET_MPLS_TTL(a)->ttl); break; case OFPACT_DEC_MPLS_TTL: ds_put_cstr(s, "dec_mpls_ttl"); break; case OFPACT_SET_TUNNEL: tunnel = ofpact_get_SET_TUNNEL(a); ds_put_format(s, "set_tunnel%s:%#"PRIx64, (tunnel->tun_id > UINT32_MAX || a->compat == OFPUTIL_NXAST_SET_TUNNEL64 ? "64" : ""), tunnel->tun_id); break; case OFPACT_SET_QUEUE: ds_put_format(s, "set_queue:%"PRIu32, ofpact_get_SET_QUEUE(a)->queue_id); break; case OFPACT_POP_QUEUE: ds_put_cstr(s, "pop_queue"); break; case OFPACT_FIN_TIMEOUT: print_fin_timeout(ofpact_get_FIN_TIMEOUT(a), s); break; case OFPACT_RESUBMIT: resubmit = ofpact_get_RESUBMIT(a); if (resubmit->in_port != OFPP_IN_PORT && resubmit->table_id == 255) { ds_put_cstr(s, "resubmit:"); ofputil_format_port(resubmit->in_port, s); } else { ds_put_format(s, "resubmit("); if (resubmit->in_port != OFPP_IN_PORT) { ofputil_format_port(resubmit->in_port, s); } ds_put_char(s, ','); if (resubmit->table_id != 255) { ds_put_format(s, "%"PRIu8, resubmit->table_id); } ds_put_char(s, ')'); } break; case OFPACT_LEARN: learn_format(ofpact_get_LEARN(a), s); break; case OFPACT_MULTIPATH: multipath_format(ofpact_get_MULTIPATH(a), s); break; case OFPACT_NOTE: print_note(ofpact_get_NOTE(a), s); break; case OFPACT_PUSH_MPLS: ds_put_format(s, "push_mpls:0x%04"PRIx16, ntohs(ofpact_get_PUSH_MPLS(a)->ethertype)); break; case OFPACT_POP_MPLS: ds_put_format(s, "pop_mpls:0x%04"PRIx16, ntohs(ofpact_get_POP_MPLS(a)->ethertype)); break; case OFPACT_EXIT: ds_put_cstr(s, "exit"); break; case OFPACT_SAMPLE: sample = ofpact_get_SAMPLE(a); ds_put_format( s, "sample(probability=%"PRIu16",collector_set_id=%"PRIu32 ",obs_domain_id=%"PRIu32",obs_point_id=%"PRIu32")", sample->probability, sample->collector_set_id, sample->obs_domain_id, sample->obs_point_id); break; case OFPACT_CLEAR_ACTIONS: ds_put_format(s, "%s", ovs_instruction_name_from_type( OVSINST_OFPIT11_CLEAR_ACTIONS)); break; case OFPACT_WRITE_METADATA: metadata = ofpact_get_WRITE_METADATA(a); ds_put_format(s, "%s:%#"PRIx64, ovs_instruction_name_from_type( OVSINST_OFPIT11_WRITE_METADATA), ntohll(metadata->metadata)); if (metadata->mask != htonll(UINT64_MAX)) { ds_put_format(s, "/%#"PRIx64, ntohll(metadata->mask)); } break; case OFPACT_GOTO_TABLE: ds_put_format(s, "%s:%"PRIu8, ovs_instruction_name_from_type( OVSINST_OFPIT11_GOTO_TABLE), ofpact_get_GOTO_TABLE(a)->table_id); break; case OFPACT_METER: ds_put_format(s, "%s:%"PRIu32, ovs_instruction_name_from_type(OVSINST_OFPIT13_METER), ofpact_get_METER(a)->meter_id); break; } } /* Appends a string representing the 'ofpacts_len' bytes of ofpacts in * 'ofpacts' to 'string'. */ void ofpacts_format(const struct ofpact *ofpacts, size_t ofpacts_len, struct ds *string) { ds_put_cstr(string, "actions="); if (!ofpacts_len) { ds_put_cstr(string, "drop"); } else { const struct ofpact *a; OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { if (a != ofpacts) { ds_put_cstr(string, ","); } /* XXX write-actions */ ofpact_format(a, string); } } } /* Internal use by helpers. */ void * ofpact_put(struct ofpbuf *ofpacts, enum ofpact_type type, size_t len) { struct ofpact *ofpact; ofpact_pad(ofpacts); ofpact = ofpacts->l2 = ofpbuf_put_uninit(ofpacts, len); ofpact_init(ofpact, type, len); return ofpact; } void ofpact_init(struct ofpact *ofpact, enum ofpact_type type, size_t len) { memset(ofpact, 0, len); ofpact->type = type; ofpact->compat = OFPUTIL_ACTION_INVALID; ofpact->len = len; } /* Updates 'ofpact->len' to the number of bytes in the tail of 'ofpacts' * starting at 'ofpact'. * * This is the correct way to update a variable-length ofpact's length after * adding the variable-length part of the payload. (See the large comment * near the end of ofp-actions.h for more information.) */ void ofpact_update_len(struct ofpbuf *ofpacts, struct ofpact *ofpact) { ovs_assert(ofpact == ofpacts->l2); ofpact->len = (char *) ofpbuf_tail(ofpacts) - (char *) ofpact; } /* Pads out 'ofpacts' to a multiple of OFPACT_ALIGNTO bytes in length. Each * ofpact_put_() calls this function automatically beforehand, but the * client must call this itself after adding the final ofpact to an array of * them. * * (The consequences of failing to call this function are probably not dire. * OFPACT_FOR_EACH will calculate a pointer beyond the end of the ofpacts, but * not dereference it. That's undefined behavior, technically, but it will not * cause a real problem on common systems. Still, it seems better to call * it.) */ void ofpact_pad(struct ofpbuf *ofpacts) { unsigned int rem = ofpacts->size % OFPACT_ALIGNTO; if (rem) { ofpbuf_put_zeros(ofpacts, OFPACT_ALIGNTO - rem); } } void ofpact_set_field_init(struct ofpact_reg_load *load, const struct mf_field *mf, const void *src) { load->ofpact.compat = OFPUTIL_OFPAT12_SET_FIELD; load->dst.field = mf; load->dst.ofs = 0; load->dst.n_bits = mf->n_bits; bitwise_copy(src, mf->n_bytes, load->dst.ofs, &load->subvalue, sizeof load->subvalue, 0, mf->n_bits); } openvswitch-2.0.1+git20140120/lib/ofp-actions.h000066400000000000000000000565661226605124000206350ustar00rootroot00000000000000/* * Copyright (c) 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OFP_ACTIONS_H #define OFP_ACTIONS_H 1 #include #include "meta-flow.h" #include "ofp-errors.h" #include "ofp-util.h" #include "openflow/openflow.h" #include "openflow/nicira-ext.h" #include "openvswitch/types.h" /* List of OVS abstracted actions. * * This macro is used directly only internally by this header, but the list is * still of interest to developers. * * Each DEFINE_OFPACT invocation has the following parameters: * * 1. , used below in the enum definition of OFPACT_, and * elsewhere. * * 2. corresponding to a structure "struct ", that must be * defined below. This structure must be an abstract definition of the * action. Its first member must have type "struct ofpact" and name * "ofpact". It may be fixed length or end with a flexible array member * (e.g. "int member[];"). * * 3. , which has one of two possible values: * * - If "struct " is fixed-length, it must be "ofpact". * * - If "struct " is variable-length, it must be the name of the * flexible array member. */ #define OFPACTS \ /* Output. */ \ DEFINE_OFPACT(OUTPUT, ofpact_output, ofpact) \ DEFINE_OFPACT(CONTROLLER, ofpact_controller, ofpact) \ DEFINE_OFPACT(ENQUEUE, ofpact_enqueue, ofpact) \ DEFINE_OFPACT(OUTPUT_REG, ofpact_output_reg, ofpact) \ DEFINE_OFPACT(BUNDLE, ofpact_bundle, slaves) \ \ /* Header changes. */ \ DEFINE_OFPACT(SET_VLAN_VID, ofpact_vlan_vid, ofpact) \ DEFINE_OFPACT(SET_VLAN_PCP, ofpact_vlan_pcp, ofpact) \ DEFINE_OFPACT(STRIP_VLAN, ofpact_null, ofpact) \ DEFINE_OFPACT(PUSH_VLAN, ofpact_null, ofpact) \ DEFINE_OFPACT(SET_ETH_SRC, ofpact_mac, ofpact) \ DEFINE_OFPACT(SET_ETH_DST, ofpact_mac, ofpact) \ DEFINE_OFPACT(SET_IPV4_SRC, ofpact_ipv4, ofpact) \ DEFINE_OFPACT(SET_IPV4_DST, ofpact_ipv4, ofpact) \ DEFINE_OFPACT(SET_IPV4_DSCP, ofpact_dscp, ofpact) \ DEFINE_OFPACT(SET_L4_SRC_PORT, ofpact_l4_port, ofpact) \ DEFINE_OFPACT(SET_L4_DST_PORT, ofpact_l4_port, ofpact) \ DEFINE_OFPACT(REG_MOVE, ofpact_reg_move, ofpact) \ DEFINE_OFPACT(REG_LOAD, ofpact_reg_load, ofpact) \ DEFINE_OFPACT(STACK_PUSH, ofpact_stack, ofpact) \ DEFINE_OFPACT(STACK_POP, ofpact_stack, ofpact) \ DEFINE_OFPACT(DEC_TTL, ofpact_cnt_ids, cnt_ids) \ DEFINE_OFPACT(SET_MPLS_TTL, ofpact_mpls_ttl, ofpact) \ DEFINE_OFPACT(DEC_MPLS_TTL, ofpact_null, ofpact) \ DEFINE_OFPACT(PUSH_MPLS, ofpact_push_mpls, ofpact) \ DEFINE_OFPACT(POP_MPLS, ofpact_pop_mpls, ofpact) \ \ /* Metadata. */ \ DEFINE_OFPACT(SET_TUNNEL, ofpact_tunnel, ofpact) \ DEFINE_OFPACT(SET_QUEUE, ofpact_queue, ofpact) \ DEFINE_OFPACT(POP_QUEUE, ofpact_null, ofpact) \ DEFINE_OFPACT(FIN_TIMEOUT, ofpact_fin_timeout, ofpact) \ \ /* Flow table interaction. */ \ DEFINE_OFPACT(RESUBMIT, ofpact_resubmit, ofpact) \ DEFINE_OFPACT(LEARN, ofpact_learn, specs) \ \ /* Arithmetic. */ \ DEFINE_OFPACT(MULTIPATH, ofpact_multipath, ofpact) \ \ /* Other. */ \ DEFINE_OFPACT(NOTE, ofpact_note, data) \ DEFINE_OFPACT(EXIT, ofpact_null, ofpact) \ DEFINE_OFPACT(SAMPLE, ofpact_sample, ofpact) \ \ /* Instructions */ \ DEFINE_OFPACT(METER, ofpact_meter, ofpact) \ /* XXX Write-Actions */ \ DEFINE_OFPACT(CLEAR_ACTIONS, ofpact_null, ofpact) \ DEFINE_OFPACT(WRITE_METADATA, ofpact_metadata, ofpact) \ DEFINE_OFPACT(GOTO_TABLE, ofpact_goto_table, ofpact) /* enum ofpact_type, with a member OFPACT_ for each action. */ enum OVS_PACKED_ENUM ofpact_type { #define DEFINE_OFPACT(ENUM, STRUCT, MEMBER) OFPACT_##ENUM, OFPACTS #undef DEFINE_OFPACT }; /* N_OFPACTS, the number of values of "enum ofpact_type". */ enum { N_OFPACTS = #define DEFINE_OFPACT(ENUM, STRUCT, MEMBER) + 1 OFPACTS #undef DEFINE_OFPACT }; /* Header for an action. * * Each action is a structure "struct ofpact_*" that begins with "struct * ofpact", usually followed by other data that describes the action. Actions * are padded out to a multiple of OFPACT_ALIGNTO bytes in length. * * The 'compat' member is special: * * - Most "struct ofpact"s correspond to one particular kind of OpenFlow * action, at least in a given OpenFlow version. For example, * OFPACT_SET_VLAN_VID corresponds to OFPAT10_SET_VLAN_VID in OpenFlow * 1.0. * * For such actions, the 'compat' member is not meaningful and generally * should be zero. * * - A few "struct ofpact"s correspond to multiple OpenFlow actions. For * example, OFPACT_SET_TUNNEL can be NXAST_SET_TUNNEL or * NXAST_SET_TUNNEL64. In these cases, if the "struct ofpact" originated * from OpenFlow, then we want to make sure that, if it gets translated * back to OpenFlow later, it is translated back to the same action type. * (Otherwise, we'd violate the promise made in DESIGN, in the "Action * Reproduction" section.) * * For such actions, the 'compat' member should be the original action * type. (If the action didn't originate from OpenFlow, then setting * 'compat' to zero should be fine: code to translate the ofpact to * OpenFlow must tolerate this case.) */ struct ofpact { enum ofpact_type type; /* OFPACT_*. */ enum ofputil_action_code compat; /* Original type when added, if any. */ uint16_t len; /* Length of the action, in bytes, including * struct ofpact, excluding padding. */ }; #ifdef __GNUC__ /* Make sure that OVS_PACKED_ENUM really worked. */ BUILD_ASSERT_DECL(sizeof(struct ofpact) == 4); #endif /* Alignment. */ #define OFPACT_ALIGNTO 8 #define OFPACT_ALIGN(SIZE) ROUND_UP(SIZE, OFPACT_ALIGNTO) static inline struct ofpact * ofpact_next(const struct ofpact *ofpact) { return (void *) ((uint8_t *) ofpact + OFPACT_ALIGN(ofpact->len)); } static inline struct ofpact * ofpact_end(const struct ofpact *ofpacts, size_t ofpacts_len) { return (void *) ((uint8_t *) ofpacts + ofpacts_len); } /* Assigns POS to each ofpact, in turn, in the OFPACTS_LEN bytes of ofpacts * starting at OFPACTS. */ #define OFPACT_FOR_EACH(POS, OFPACTS, OFPACTS_LEN) \ for ((POS) = (OFPACTS); (POS) < ofpact_end(OFPACTS, OFPACTS_LEN); \ (POS) = ofpact_next(POS)) /* Action structure for each OFPACT_*. */ /* OFPACT_STRIP_VLAN, OFPACT_POP_QUEUE, OFPACT_EXIT, OFPACT_CLEAR_ACTIONS. * * Used for OFPAT10_STRIP_VLAN, NXAST_POP_QUEUE, NXAST_EXIT, * OFPAT11_POP_VLAN, OFPIT11_CLEAR_ACTIONS. * * Action structure for actions that do not have any extra data beyond the * action type. */ struct ofpact_null { struct ofpact ofpact; }; /* OFPACT_OUTPUT. * * Used for OFPAT10_OUTPUT. */ struct ofpact_output { struct ofpact ofpact; ofp_port_t port; /* Output port. */ uint16_t max_len; /* Max send len, for port OFPP_CONTROLLER. */ }; /* OFPACT_CONTROLLER. * * Used for NXAST_CONTROLLER. */ struct ofpact_controller { struct ofpact ofpact; uint16_t max_len; /* Maximum length to send to controller. */ uint16_t controller_id; /* Controller ID to send packet-in. */ enum ofp_packet_in_reason reason; /* Reason to put in packet-in. */ }; /* OFPACT_ENQUEUE. * * Used for OFPAT10_ENQUEUE. */ struct ofpact_enqueue { struct ofpact ofpact; ofp_port_t port; uint32_t queue; }; /* OFPACT_OUTPUT_REG. * * Used for NXAST_OUTPUT_REG. */ struct ofpact_output_reg { struct ofpact ofpact; struct mf_subfield src; uint16_t max_len; }; /* OFPACT_BUNDLE. * * Used for NXAST_BUNDLE. */ struct ofpact_bundle { struct ofpact ofpact; /* Slave choice algorithm to apply to hash value. */ enum nx_bd_algorithm algorithm; /* What fields to hash and how. */ enum nx_hash_fields fields; uint16_t basis; /* Universal hash parameter. */ struct mf_subfield dst; /* Slaves for output. */ unsigned int n_slaves; ofp_port_t slaves[]; }; /* OFPACT_SET_VLAN_VID. * * Used for OFPAT10_SET_VLAN_VID. */ struct ofpact_vlan_vid { struct ofpact ofpact; uint16_t vlan_vid; /* VLAN VID in low 12 bits, 0 in other bits. */ }; /* OFPACT_SET_VLAN_PCP. * * Used for OFPAT10_SET_VLAN_PCP. */ struct ofpact_vlan_pcp { struct ofpact ofpact; uint8_t vlan_pcp; /* VLAN PCP in low 3 bits, 0 in other bits. */ }; /* OFPACT_SET_ETH_SRC, OFPACT_SET_ETH_DST. * * Used for OFPAT10_SET_DL_SRC, OFPAT10_SET_DL_DST. */ struct ofpact_mac { struct ofpact ofpact; uint8_t mac[ETH_ADDR_LEN]; }; /* OFPACT_SET_IPV4_SRC, OFPACT_SET_IPV4_DST. * * Used for OFPAT10_SET_NW_SRC, OFPAT10_SET_NW_DST. */ struct ofpact_ipv4 { struct ofpact ofpact; ovs_be32 ipv4; }; /* OFPACT_SET_IPV4_DSCP. * * Used for OFPAT10_SET_NW_TOS. */ struct ofpact_dscp { struct ofpact ofpact; uint8_t dscp; /* DSCP in high 6 bits, rest ignored. */ }; /* OFPACT_SET_L4_SRC_PORT, OFPACT_SET_L4_DST_PORT. * * Used for OFPAT10_SET_TP_SRC, OFPAT10_SET_TP_DST. */ struct ofpact_l4_port { struct ofpact ofpact; uint16_t port; /* TCP or UDP port number. */ }; /* OFPACT_REG_MOVE. * * Used for NXAST_REG_MOVE. */ struct ofpact_reg_move { struct ofpact ofpact; struct mf_subfield src; struct mf_subfield dst; }; /* OFPACT_STACK_PUSH. * * Used for NXAST_STACK_PUSH and NXAST_STACK_POP. */ struct ofpact_stack { struct ofpact ofpact; struct mf_subfield subfield; }; /* OFPACT_REG_LOAD. * * Used for NXAST_REG_LOAD, OFPAT12_SET_FIELD. */ struct ofpact_reg_load { struct ofpact ofpact; struct mf_subfield dst; union mf_subvalue subvalue; /* Least-significant bits are used. */ }; /* OFPACT_PUSH_VLAN/MPLS/PBB * * Used for NXAST_PUSH_MPLS, OFPAT11_PUSH_MPLS. */ struct ofpact_push_mpls { struct ofpact ofpact; ovs_be16 ethertype; }; /* OFPACT_POP_MPLS * * Used for NXAST_POP_MPLS, OFPAT11_POP_MPLS.. */ struct ofpact_pop_mpls { struct ofpact ofpact; ovs_be16 ethertype; }; /* OFPACT_SET_TUNNEL. * * Used for NXAST_SET_TUNNEL, NXAST_SET_TUNNEL64. */ struct ofpact_tunnel { struct ofpact ofpact; uint64_t tun_id; }; /* OFPACT_SET_QUEUE. * * Used for NXAST_SET_QUEUE. */ struct ofpact_queue { struct ofpact ofpact; uint32_t queue_id; }; /* OFPACT_FIN_TIMEOUT. * * Used for NXAST_FIN_TIMEOUT. */ struct ofpact_fin_timeout { struct ofpact ofpact; uint16_t fin_idle_timeout; uint16_t fin_hard_timeout; }; /* OFPACT_WRITE_METADATA. * * Used for NXAST_WRITE_METADATA. */ struct ofpact_metadata { struct ofpact ofpact; ovs_be64 metadata; ovs_be64 mask; }; /* OFPACT_METER. * * Used for OFPIT13_METER. */ struct ofpact_meter { struct ofpact ofpact; uint32_t meter_id; }; /* OFPACT_RESUBMIT. * * Used for NXAST_RESUBMIT, NXAST_RESUBMIT_TABLE. */ struct ofpact_resubmit { struct ofpact ofpact; ofp_port_t in_port; uint8_t table_id; }; /* Part of struct ofpact_learn, below. */ struct ofpact_learn_spec { int n_bits; /* Number of bits in source and dest. */ int src_type; /* One of NX_LEARN_SRC_*. */ struct mf_subfield src; /* NX_LEARN_SRC_FIELD only. */ union mf_subvalue src_imm; /* NX_LEARN_SRC_IMMEDIATE only. */ int dst_type; /* One of NX_LEARN_DST_*. */ struct mf_subfield dst; /* NX_LEARN_DST_MATCH, NX_LEARN_DST_LOAD only. */ }; /* OFPACT_LEARN. * * Used for NXAST_LEARN. */ struct ofpact_learn { struct ofpact ofpact; uint16_t idle_timeout; /* Idle time before discarding (seconds). */ uint16_t hard_timeout; /* Max time before discarding (seconds). */ uint16_t priority; /* Priority level of flow entry. */ uint64_t cookie; /* Cookie for new flow. */ enum ofputil_flow_mod_flags flags; uint8_t table_id; /* Table to insert flow entry. */ uint16_t fin_idle_timeout; /* Idle timeout after FIN, if nonzero. */ uint16_t fin_hard_timeout; /* Hard timeout after FIN, if nonzero. */ unsigned int n_specs; struct ofpact_learn_spec specs[]; }; /* OFPACT_MULTIPATH. * * Used for NXAST_MULTIPATH. */ struct ofpact_multipath { struct ofpact ofpact; /* What fields to hash and how. */ enum nx_hash_fields fields; uint16_t basis; /* Universal hash parameter. */ /* Multipath link choice algorithm to apply to hash value. */ enum nx_mp_algorithm algorithm; uint16_t max_link; /* Number of output links, minus 1. */ uint32_t arg; /* Algorithm-specific argument. */ /* Where to store the result. */ struct mf_subfield dst; }; /* OFPACT_NOTE. * * Used for NXAST_NOTE. */ struct ofpact_note { struct ofpact ofpact; size_t length; uint8_t data[]; }; /* OFPACT_SAMPLE. * * Used for NXAST_SAMPLE. */ struct ofpact_sample { struct ofpact ofpact; uint16_t probability; // Always >0. uint32_t collector_set_id; uint32_t obs_domain_id; uint32_t obs_point_id; }; /* OFPACT_DEC_TTL. * * Used for OFPAT11_DEC_NW_TTL, NXAST_DEC_TTL and NXAST_DEC_TTL_CNT_IDS. */ struct ofpact_cnt_ids { struct ofpact ofpact; /* Controller ids. */ unsigned int n_controllers; uint16_t cnt_ids[]; }; /* OFPACT_SET_MPLS_TTL. * * Used for NXAST_SET_MPLS_TTL */ struct ofpact_mpls_ttl { struct ofpact ofpact; uint8_t ttl; }; /* OFPACT_GOTO_TABLE * * Used for OFPIT11_GOTO_TABLE */ struct ofpact_goto_table { struct ofpact ofpact; uint8_t table_id; }; /* Converting OpenFlow to ofpacts. */ enum ofperr ofpacts_pull_openflow10(struct ofpbuf *openflow, unsigned int actions_len, struct ofpbuf *ofpacts); enum ofperr ofpacts_pull_openflow11_actions(struct ofpbuf *openflow, unsigned int actions_len, struct ofpbuf *ofpacts); enum ofperr ofpacts_pull_openflow11_instructions(struct ofpbuf *openflow, unsigned int instructions_len, struct ofpbuf *ofpacts); enum ofperr ofpacts_check(const struct ofpact[], size_t ofpacts_len, struct flow *, ofp_port_t max_ports, uint8_t table_id); enum ofperr ofpacts_verify(const struct ofpact ofpacts[], size_t ofpacts_len); /* Converting ofpacts to OpenFlow. */ void ofpacts_put_openflow10(const struct ofpact[], size_t ofpacts_len, struct ofpbuf *openflow); size_t ofpacts_put_openflow11_actions(const struct ofpact[], size_t ofpacts_len, struct ofpbuf *openflow); void ofpacts_put_openflow11_instructions(const struct ofpact[], size_t ofpacts_len, struct ofpbuf *openflow); /* Working with ofpacts. */ bool ofpacts_output_to_port(const struct ofpact[], size_t ofpacts_len, ofp_port_t port); bool ofpacts_equal(const struct ofpact a[], size_t a_len, const struct ofpact b[], size_t b_len); uint32_t ofpacts_get_meter(const struct ofpact[], size_t ofpacts_len); /* Formatting ofpacts. * * (For parsing ofpacts, see ofp-parse.h.) */ void ofpacts_format(const struct ofpact[], size_t ofpacts_len, struct ds *); /* Internal use by the helpers below. */ void ofpact_init(struct ofpact *, enum ofpact_type, size_t len); void *ofpact_put(struct ofpbuf *, enum ofpact_type, size_t len); /* For each OFPACT_ with a corresponding struct , this defines * the following commonly useful functions: * * struct *ofpact_put_(struct ofpbuf *ofpacts); * * Appends a new 'ofpact', of length OFPACT__RAW_SIZE, to 'ofpacts', * initializes it with ofpact_init_(), and returns it. Also sets * 'ofpacts->l2' to the returned action. * * After using this function to add a variable-length action, add the * elements of the flexible array (e.g. with ofpbuf_put()), then use * ofpact_update_len() to update the length embedded into the action. * (Keep in mind the need to refresh the structure from 'ofpacts->l2' after * adding data to 'ofpacts'.) * * struct *ofpact_get_(const struct ofpact *ofpact); * * Returns 'ofpact' cast to "struct *". 'ofpact->type' must be * OFPACT_. * * as well as the following more rarely useful definitions: * * void ofpact_init_(struct *ofpact); * * Initializes the parts of 'ofpact' that identify it as having type * OFPACT_ and length OFPACT__RAW_SIZE and zeros the rest. * * _RAW_SIZE * * The size of the action structure. For a fixed-length action, this is * sizeof(struct ). For a variable-length action, this is the * offset to the variable-length part. * * _SIZE * * An integer constant, the value of OFPACT__RAW_SIZE rounded up to a * multiple of OFPACT_ALIGNTO. */ #define DEFINE_OFPACT(ENUM, STRUCT, MEMBER) \ BUILD_ASSERT_DECL(offsetof(struct STRUCT, ofpact) == 0); \ \ enum { OFPACT_##ENUM##_RAW_SIZE \ = (offsetof(struct STRUCT, MEMBER) \ ? offsetof(struct STRUCT, MEMBER) \ : sizeof(struct STRUCT)) }; \ \ enum { OFPACT_##ENUM##_SIZE \ = ROUND_UP(OFPACT_##ENUM##_RAW_SIZE, OFPACT_ALIGNTO) }; \ \ static inline struct STRUCT * \ ofpact_get_##ENUM(const struct ofpact *ofpact) \ { \ ovs_assert(ofpact->type == OFPACT_##ENUM); \ return ALIGNED_CAST(struct STRUCT *, ofpact); \ } \ \ static inline struct STRUCT * \ ofpact_put_##ENUM(struct ofpbuf *ofpacts) \ { \ return ofpact_put(ofpacts, OFPACT_##ENUM, \ OFPACT_##ENUM##_RAW_SIZE); \ } \ \ static inline void \ ofpact_init_##ENUM(struct STRUCT *ofpact) \ { \ ofpact_init(&ofpact->ofpact, OFPACT_##ENUM, \ OFPACT_##ENUM##_RAW_SIZE); \ } OFPACTS #undef DEFINE_OFPACT /* Functions to use after adding ofpacts to a buffer. */ void ofpact_update_len(struct ofpbuf *, struct ofpact *); void ofpact_pad(struct ofpbuf *); /* OpenFlow 1.1 instructions. * The order is sorted in execution order. Not in the value of OFPIT11_xxx. * It is enforced on parser from text string. */ #define OVS_INSTRUCTIONS \ DEFINE_INST(OFPIT13_METER, \ ofp13_instruction_meter, false, \ "meter") \ \ DEFINE_INST(OFPIT11_APPLY_ACTIONS, \ ofp11_instruction_actions, true, \ "apply_actions") \ \ DEFINE_INST(OFPIT11_CLEAR_ACTIONS, \ ofp11_instruction, false, \ "clear_actions") \ \ DEFINE_INST(OFPIT11_WRITE_ACTIONS, \ ofp11_instruction_actions, true, \ "write_actions") \ \ DEFINE_INST(OFPIT11_WRITE_METADATA, \ ofp11_instruction_write_metadata, false, \ "write_metadata") \ \ DEFINE_INST(OFPIT11_GOTO_TABLE, \ ofp11_instruction_goto_table, false, \ "goto_table") enum ovs_instruction_type { #define DEFINE_INST(ENUM, STRUCT, EXTENSIBLE, NAME) OVSINST_##ENUM, OVS_INSTRUCTIONS #undef DEFINE_INST }; enum { #define DEFINE_INST(ENUM, STRUCT, EXTENSIBLE, NAME) + 1 N_OVS_INSTRUCTIONS = OVS_INSTRUCTIONS #undef DEFINE_INST }; const char *ovs_instruction_name_from_type(enum ovs_instruction_type type); int ovs_instruction_type_from_name(const char *name); enum ovs_instruction_type ovs_instruction_type_from_ofpact_type( enum ofpact_type); void ofpact_set_field_init(struct ofpact_reg_load *load, const struct mf_field *mf, const void *src); #endif /* ofp-actions.h */ openvswitch-2.0.1+git20140120/lib/ofp-errors.c000066400000000000000000000255101226605124000204650ustar00rootroot00000000000000/* * Copyright (c) 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ofp-errors.h" #include #include "byte-order.h" #include "dynamic-string.h" #include "ofp-msgs.h" #include "ofp-util.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ofp_errors); struct triplet { uint32_t vendor; int type, code; }; #include "ofp-errors.inc" /* Returns an ofperr_domain that corresponds to the OpenFlow version number * 'version' (one of the possible values of struct ofp_header's 'version' * member). Returns NULL if the version isn't defined or isn't understood by * OVS. */ static const struct ofperr_domain * ofperr_domain_from_version(enum ofp_version version) { switch (version) { case OFP10_VERSION: return &ofperr_of10; case OFP11_VERSION: return &ofperr_of11; case OFP12_VERSION: return &ofperr_of12; case OFP13_VERSION: return &ofperr_of13; default: return NULL; } } /* Returns the name (e.g. "OpenFlow 1.0") of OpenFlow version 'version'. */ const char * ofperr_domain_get_name(enum ofp_version version) { const struct ofperr_domain *domain = ofperr_domain_from_version(version); return domain ? domain->name : NULL; } /* Returns true if 'error' is a valid OFPERR_* value, false otherwise. */ bool ofperr_is_valid(enum ofperr error) { return error >= OFPERR_OFS && error < OFPERR_OFS + OFPERR_N_ERRORS; } /* Returns the OFPERR_* value that corresponds to 'type' and 'code' within * 'version', or 0 if either no such OFPERR_* value exists or 'version' is * unknown. */ static enum ofperr ofperr_decode(enum ofp_version version, uint32_t vendor, uint16_t type, uint16_t code) { const struct ofperr_domain *domain = ofperr_domain_from_version(version); return domain ? domain->decode(vendor, type, code) : 0; } /* Returns the name of 'error', e.g. "OFPBRC_BAD_TYPE" if 'error' is * OFPBRC_BAD_TYPE, or "" if 'error' is not a valid OFPERR_* value. * * Consider ofperr_to_string() instead, if the error code might be an errno * value. */ const char * ofperr_get_name(enum ofperr error) { return (ofperr_is_valid(error) ? error_names[error - OFPERR_OFS] : ""); } /* Returns the OFPERR_* value that corresponds for 'name', 0 if none exists. * For example, returns OFPERR_OFPHFC_INCOMPATIBLE if 'name' is * "OFPHFC_INCOMPATIBLE". * * This is probably useful only for debugging and testing. */ enum ofperr ofperr_from_name(const char *name) { int i; for (i = 0; i < OFPERR_N_ERRORS; i++) { if (!strcmp(name, error_names[i])) { return i + OFPERR_OFS; } } return 0; } /* Returns an extended description name of 'error', e.g. "ofp_header.type not * supported." if 'error' is OFPBRC_BAD_TYPE, or "" if 'error' is not * a valid OFPERR_* value. */ const char * ofperr_get_description(enum ofperr error) { return (ofperr_is_valid(error) ? error_comments[error - OFPERR_OFS] : ""); } static const struct triplet * ofperr_get_triplet__(enum ofperr error, const struct ofperr_domain *domain) { size_t ofs = error - OFPERR_OFS; ovs_assert(ofperr_is_valid(error)); return &domain->errors[ofs]; } static struct ofpbuf * ofperr_encode_msg__(enum ofperr error, enum ofp_version ofp_version, ovs_be32 xid, const void *data, size_t data_len) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); const struct ofperr_domain *domain; const struct triplet *triplet; struct ofp_error_msg *oem; struct ofpbuf *buf; /* Get the error domain for 'ofp_version', or fall back to OF1.0. */ domain = ofperr_domain_from_version(ofp_version); if (!domain) { VLOG_ERR_RL(&rl, "cannot encode error for unknown OpenFlow " "version 0x%02x", ofp_version); domain = &ofperr_of10; } /* Make sure 'error' is valid in 'domain', or use a fallback error. */ if (!ofperr_is_valid(error)) { /* 'error' seems likely to be a system errno value. */ VLOG_ERR_RL(&rl, "invalid OpenFlow error code %d (%s)", error, ovs_strerror(error)); error = OFPERR_NXBRC_UNENCODABLE_ERROR; } else if (domain->errors[error - OFPERR_OFS].code < 0) { VLOG_ERR_RL(&rl, "cannot encode %s for %s", ofperr_get_name(error), domain->name); error = OFPERR_NXBRC_UNENCODABLE_ERROR; } triplet = ofperr_get_triplet__(error, domain); if (!triplet->vendor) { buf = ofpraw_alloc_xid(OFPRAW_OFPT_ERROR, domain->version, xid, sizeof *oem + data_len); oem = ofpbuf_put_uninit(buf, sizeof *oem); oem->type = htons(triplet->type); oem->code = htons(triplet->code); } else if (ofp_version <= OFP11_VERSION) { struct nx_vendor_error *nve; buf = ofpraw_alloc_xid(OFPRAW_OFPT_ERROR, domain->version, xid, sizeof *oem + sizeof *nve + data_len); oem = ofpbuf_put_uninit(buf, sizeof *oem); oem->type = htons(NXET_VENDOR); oem->code = htons(NXVC_VENDOR_ERROR); nve = ofpbuf_put_uninit(buf, sizeof *nve); nve->vendor = htonl(triplet->vendor); nve->type = htons(triplet->type); nve->code = htons(triplet->code); } else { ovs_be32 vendor = htonl(triplet->vendor); buf = ofpraw_alloc_xid(OFPRAW_OFPT_ERROR, domain->version, xid, sizeof *oem + sizeof(uint32_t) + data_len); oem = ofpbuf_put_uninit(buf, sizeof *oem); oem->type = htons(OFPET12_EXPERIMENTER); oem->code = htons(triplet->type); ofpbuf_put(buf, &vendor, sizeof vendor); } ofpbuf_put(buf, data, data_len); ofpmsg_update_length(buf); return buf; } /* Creates and returns an OpenFlow message of type OFPT_ERROR that conveys the * given 'error'. * * 'oh->version' determines the OpenFlow version of the error reply. * 'oh->xid' determines the xid of the error reply. * The error reply will contain an initial subsequence of 'oh', up to * 'oh->length' or 64 bytes, whichever is shorter. * * This function isn't appropriate for encoding OFPET_HELLO_FAILED error * messages. Use ofperr_encode_hello() instead. */ struct ofpbuf * ofperr_encode_reply(enum ofperr error, const struct ofp_header *oh) { uint16_t len = ntohs(oh->length); return ofperr_encode_msg__(error, oh->version, oh->xid, oh, MIN(len, 64)); } /* Creates and returns an OpenFlow message of type OFPT_ERROR that conveys the * given 'error', in the error domain 'domain'. The error message will include * the additional null-terminated text string 's'. * * If 'version' is an unknown version then OFP10_VERSION is used. * OFPET_HELLO_FAILED error messages are supposed to be backward-compatible, * so in theory this should work. */ struct ofpbuf * ofperr_encode_hello(enum ofperr error, enum ofp_version ofp_version, const char *s) { return ofperr_encode_msg__(error, ofp_version, htonl(0), s, strlen(s)); } int ofperr_get_vendor(enum ofperr error, enum ofp_version version) { const struct ofperr_domain *domain = ofperr_domain_from_version(version); return domain ? ofperr_get_triplet__(error, domain)->vendor : -1; } /* Returns the value that would go into an OFPT_ERROR message's 'type' for * encoding 'error' in 'domain'. Returns -1 if 'error' is not encodable in * 'version' or 'version' is unknown. * * 'error' must be a valid OFPERR_* code, as checked by ofperr_is_valid(). */ int ofperr_get_type(enum ofperr error, enum ofp_version version) { const struct ofperr_domain *domain = ofperr_domain_from_version(version); return domain ? ofperr_get_triplet__(error, domain)->type : -1; } /* Returns the value that would go into an OFPT_ERROR message's 'code' for * encoding 'error' in 'domain'. Returns -1 if 'error' is not encodable in * 'version', 'version' is unknown or if 'error' represents a category * rather than a specific error. * * * 'error' must be a valid OFPERR_* code, as checked by ofperr_is_valid(). */ int ofperr_get_code(enum ofperr error, enum ofp_version version) { const struct ofperr_domain *domain = ofperr_domain_from_version(version); return domain ? ofperr_get_triplet__(error, domain)->code : -1; } /* Tries to decode 'oh', which should be an OpenFlow OFPT_ERROR message. * Returns an OFPERR_* constant on success, 0 on failure. * * If 'payload' is nonnull, on success '*payload' is initialized to the * error's payload, and on failure it is cleared. */ enum ofperr ofperr_decode_msg(const struct ofp_header *oh, struct ofpbuf *payload) { const struct ofp_error_msg *oem; enum ofpraw raw; uint16_t type, code; enum ofperr error; uint32_t vendor; struct ofpbuf b; if (payload) { memset(payload, 0, sizeof *payload); } /* Pull off the error message. */ ofpbuf_use_const(&b, oh, ntohs(oh->length)); error = ofpraw_pull(&raw, &b); if (error) { return 0; } oem = ofpbuf_pull(&b, sizeof *oem); /* Get the error type and code. */ vendor = 0; type = ntohs(oem->type); code = ntohs(oem->code); if (type == NXET_VENDOR && code == NXVC_VENDOR_ERROR) { const struct nx_vendor_error *nve = ofpbuf_try_pull(&b, sizeof *nve); if (!nve) { return 0; } vendor = ntohl(nve->vendor); type = ntohs(nve->type); code = ntohs(nve->code); } else if (type == OFPET12_EXPERIMENTER) { const ovs_be32 *vendorp = ofpbuf_try_pull(&b, sizeof *vendorp); if (!vendorp) { return 0; } vendor = ntohl(*vendorp); type = code; code = 0; } /* Translate the error type and code into an ofperr. */ error = ofperr_decode(oh->version, vendor, type, code); if (error && payload) { ofpbuf_use_const(payload, b.data, b.size); } return error; } /* If 'error' is a valid OFPERR_* value, returns its name * (e.g. "OFPBRC_BAD_TYPE" for OFPBRC_BAD_TYPE). Otherwise, assumes that * 'error' is a positive errno value and returns what ovs_strerror() produces * for 'error'. */ const char * ofperr_to_string(enum ofperr error) { return (ofperr_is_valid(error) ? ofperr_get_name(error) : ovs_strerror(error)); } openvswitch-2.0.1+git20140120/lib/ofp-errors.h000066400000000000000000000453451226605124000205020ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OFP_ERRORS_H #define OFP_ERRORS_H 1 #include #include #include #include "openflow/openflow.h" struct ds; struct ofpbuf; /* Error codes. * * We embed system errno values and OpenFlow standard and vendor extension * error codes into the positive range of "int": * * - Errno values are assumed to use the range 1 through 2**30 - 1. * * (C and POSIX say that errno values are positive. We assume that they * are less than 2**29. They are actually less than 65536 on at least * Linux, FreeBSD, OpenBSD, and Windows.) * * - OpenFlow standard and vendor extension error codes use the range * starting at 2**30 (OFPERR_OFS). * * Zero and negative values are not used. */ #define OFPERR_OFS (1 << 30) /* OpenFlow error codes. * * The comments below are parsed by the extract-ofp-errors program at build * time and used to determine the mapping between "enum ofperr" constants and * error type/code values used in the OpenFlow protocol: * * - The first part of each comment specifies the vendor, OpenFlow versions, * type, and sometimes a code for each protocol that supports the error: * * # The vendor is OF for standard OpenFlow error codes. Otherwise it * is one of the *_VENDOR_ID codes defined in openflow-common.h. * * # The version can specify a specific OpenFlow version, a version * range delimited by "-", or an open-ended range with "+". * * # Standard OpenFlow errors have both a type and a code. Extension * errors generally have only a type, no code. There is one * exception: Nicira extension (NX) errors for OpenFlow 1.0 and 1.1 * have both a type and a code. (This means that the version * specification for NX errors may not include version 1.0 or 1.1 (or * both) along with version 1.2 or later, because the requirements * for those versions are different.) * * - Additional text is a human-readable description of the meaning of each * error, used to explain the error to the user. Any text enclosed in * square brackets is omitted; this can be used to explain rationale for * choice of error codes in the case where this is desirable. */ enum ofperr { /* Expected duplications. */ /* Expected: 0x0,3,5 in OF1.1 means both OFPBIC_BAD_EXPERIMENTER and * OFPBIC_BAD_EXP_TYPE. */ /* ## ------------------ ## */ /* ## OFPET_HELLO_FAILED ## */ /* ## ------------------ ## */ /* OF1.0+(0,0). No compatible version. */ OFPERR_OFPHFC_INCOMPATIBLE = OFPERR_OFS, /* OF1.0+(0,1). Permissions error. */ OFPERR_OFPHFC_EPERM, /* ## ----------------- ## */ /* ## OFPET_BAD_REQUEST ## */ /* ## ----------------- ## */ /* OF1.0+(1,0). ofp_header.version not supported. */ OFPERR_OFPBRC_BAD_VERSION, /* OF1.0+(1,1). ofp_header.type not supported. */ OFPERR_OFPBRC_BAD_TYPE, /* OF1.0+(1,2). ofp_stats_msg.type not supported. */ OFPERR_OFPBRC_BAD_STAT, /* OF1.0+(1,3). Vendor not supported (in ofp_vendor_header or * ofp_stats_msg). */ OFPERR_OFPBRC_BAD_VENDOR, /* OF1.0+(1,4). Vendor subtype not supported. */ OFPERR_OFPBRC_BAD_SUBTYPE, /* OF1.0+(1,5). Permissions error. */ OFPERR_OFPBRC_EPERM, /* OF1.0+(1,6). Wrong request length for type. */ OFPERR_OFPBRC_BAD_LEN, /* OF1.0+(1,7). Specified buffer has already been used. */ OFPERR_OFPBRC_BUFFER_EMPTY, /* OF1.0+(1,8). Specified buffer does not exist. */ OFPERR_OFPBRC_BUFFER_UNKNOWN, /* NX1.0(1,512), OF1.1+(1,9). Specified table-id invalid or does not exist. * [ A non-standard error (1,512), formerly OFPERR_NXBRC_BAD_TABLE_ID, * is used for OpenFlow 1.0 as there seems to be no appropriate error * code defined the specification. ] */ OFPERR_OFPBRC_BAD_TABLE_ID, /* OF1.2+(1,10). Denied because controller is slave. */ OFPERR_OFPBRC_IS_SLAVE, /* NX1.0-1.1(1,514), OF1.2+(1,11). Invalid port. [ A non-standard error * (1,514), formerly OFPERR_NXBRC_BAD_IN_PORT is used for OpenFlow 1.0 and * 1.1 as there seems to be no appropriate error code defined the * specifications. ] */ OFPERR_OFPBRC_BAD_PORT, /* OF1.2+(1,12). Invalid packet in packet-out. */ OFPERR_OFPBRC_BAD_PACKET, /* OF1.3+(1,13). Multipart request overflowed the assigned buffer. */ OFPERR_OFPBRC_MULTIPART_BUFFER_OVERFLOW, /* NX1.0-1.1(1,256), NX1.2+(2). Invalid NXM flow match. */ OFPERR_NXBRC_NXM_INVALID, /* NX1.0-1.1(1,257), NX1.2+(3). The nxm_type, or nxm_type taken in * combination with nxm_hasmask or nxm_length or both, is invalid or not * implemented. */ OFPERR_NXBRC_NXM_BAD_TYPE, /* NX1.0-1.1(1,515), NX1.2+(4). Must-be-zero field had nonzero value. */ OFPERR_NXBRC_MUST_BE_ZERO, /* NX1.0-1.1(1,516), NX1.2+(5). The reason in an ofp_port_status message * is not valid. */ OFPERR_NXBRC_BAD_REASON, /* NX1.0-1.1(1,517), NX1.2+(6). The 'id' in an NXST_FLOW_MONITOR request * is the same as an existing monitor id (or two monitors in the same * NXST_FLOW_MONITOR request have the same 'id'). */ OFPERR_NXBRC_FM_DUPLICATE_ID, /* NX1.0-1.1(1,518), NX1.2+(7). The 'flags' in an NXST_FLOW_MONITOR * request either does not specify at least one of the NXFMF_ADD, * NXFMF_DELETE, or NXFMF_MODIFY flags, or specifies a flag bit that is not * defined. */ OFPERR_NXBRC_FM_BAD_FLAGS, /* NX1.0-1.1(1,519), NX1.2+(8). The 'id' in an NXT_FLOW_MONITOR_CANCEL * request is not the id of any existing monitor. */ OFPERR_NXBRC_FM_BAD_ID, /* NX1.0-1.1(1,520), NX1.2+(9). The 'event' in an NXST_FLOW_MONITOR reply * does not specify one of the NXFME_ABBREV, NXFME_ADD, NXFME_DELETE, or * NXFME_MODIFY. */ OFPERR_NXBRC_FM_BAD_EVENT, /* NX1.0-1.1(1,521), NX1.2+(10). The error that occurred cannot be * represented in this OpenFlow version. */ OFPERR_NXBRC_UNENCODABLE_ERROR, /* ## ---------------- ## */ /* ## OFPET_BAD_ACTION ## */ /* ## ---------------- ## */ /* OF1.0+(2,0). Unknown action type. */ OFPERR_OFPBAC_BAD_TYPE, /* OF1.0+(2,1). Length problem in actions. */ OFPERR_OFPBAC_BAD_LEN, /* OF1.0+(2,2). Unknown experimenter id specified. */ OFPERR_OFPBAC_BAD_VENDOR, /* OF1.0+(2,3). Unknown action type for experimenter id. */ OFPERR_OFPBAC_BAD_VENDOR_TYPE, /* OF1.0+(2,4). Problem validating output port. */ OFPERR_OFPBAC_BAD_OUT_PORT, /* OF1.0+(2,5). Bad action argument. */ OFPERR_OFPBAC_BAD_ARGUMENT, /* OF1.0+(2,6). Permissions error. */ OFPERR_OFPBAC_EPERM, /* OF1.0+(2,7). Can't handle this many actions. */ OFPERR_OFPBAC_TOO_MANY, /* OF1.0+(2,8). Problem validating output queue. */ OFPERR_OFPBAC_BAD_QUEUE, /* OF1.1+(2,9). Invalid group id in forward action. */ OFPERR_OFPBAC_BAD_OUT_GROUP, /* NX1.0(1,522), OF1.1+(2,10). Action can't apply for this match or a * prerequisite for use of this field is unmet. */ OFPERR_OFPBAC_MATCH_INCONSISTENT, /* OF1.1+(2,11). Action order is unsupported for the action list in an * Apply-Actions instruction */ OFPERR_OFPBAC_UNSUPPORTED_ORDER, /* OF1.1+(2,12). Actions uses an unsupported tag/encap. */ OFPERR_OFPBAC_BAD_TAG, /* NX1.0-1.1(1,523), OF1.2+(2,13). Action uses unknown or unsupported OXM * or NXM field. */ OFPERR_OFPBAC_BAD_SET_TYPE, /* NX1.0-1.1(1,524), OF1.2+(2,14). Action references past the end of an * OXM or NXM field, or uses a length of zero. */ OFPERR_OFPBAC_BAD_SET_LEN, /* NX1.0-1.1(1,525), OF1.2+(2,15). Action sets a field to an invalid or * unsupported value, or modifies a read-only field. */ OFPERR_OFPBAC_BAD_SET_ARGUMENT, /* NX1.0-1.1(2,256), NX1.2+(11). Must-be-zero action argument had nonzero * value. */ OFPERR_NXBAC_MUST_BE_ZERO, /* ## --------------------- ## */ /* ## OFPET_BAD_INSTRUCTION ## */ /* ## --------------------- ## */ /* OF1.1+(3,0). Unknown instruction. */ OFPERR_OFPBIC_UNKNOWN_INST, /* OF1.1+(3,1). Switch or table does not support the instruction. */ OFPERR_OFPBIC_UNSUP_INST, /* OF1.1+(3,2). Invalid Table-ID specified. */ OFPERR_OFPBIC_BAD_TABLE_ID, /* OF1.1+(3,3). Metadata value unsupported by datapath. */ OFPERR_OFPBIC_UNSUP_METADATA, /* OF1.1+(3,4). Metadata mask value unsupported by datapath. */ OFPERR_OFPBIC_UNSUP_METADATA_MASK, /* OF1.1+(3,5). Unknown experimenter id specified. */ OFPERR_OFPBIC_BAD_EXPERIMENTER, /* OF1.1(3,5), OF1.2+(3,6). Unknown instruction for experimenter id. */ OFPERR_OFPBIC_BAD_EXP_TYPE, /* OF1.2+(3,7). Length problem in instructions. */ OFPERR_OFPBIC_BAD_LEN, /* OF1.2+(3,8). Permissions error. */ OFPERR_OFPBIC_EPERM, /* ONF1.1+(2600). Duplicate instruction. */ OFPERR_ONFBIC_DUP_INSTRUCTION, /* ## --------------- ## */ /* ## OFPET_BAD_MATCH ## */ /* ## --------------- ## */ /* OF1.1+(4,0). Unsupported match type specified by the match */ OFPERR_OFPBMC_BAD_TYPE, /* OF1.1+(4,1). Length problem in match. */ OFPERR_OFPBMC_BAD_LEN, /* OF1.1+(4,2). Match uses an unsupported tag/encap. */ OFPERR_OFPBMC_BAD_TAG, /* OF1.1+(4,3). Unsupported datalink addr mask - switch does not support * arbitrary datalink address mask. */ OFPERR_OFPBMC_BAD_DL_ADDR_MASK, /* OF1.1+(4,4). Unsupported network addr mask - switch does not support * arbitrary network address mask. */ OFPERR_OFPBMC_BAD_NW_ADDR_MASK, /* OF1.1+(4,5). Unsupported wildcard specified in the match. */ OFPERR_OFPBMC_BAD_WILDCARDS, /* OF1.1+(4,6). Unsupported field in the match. */ OFPERR_OFPBMC_BAD_FIELD, /* NX1.0(1,258), OF1.1+(4,7). Unsupported value in a match * field. */ OFPERR_OFPBMC_BAD_VALUE, /* NX1.0-1.1(1,259), OF1.2+(4,8). Unsupported mask specified in the match, * field is not dl-address or nw-address. */ OFPERR_OFPBMC_BAD_MASK, /* NX1.0-1.1(1,260), OF1.2+(4,9). A prerequisite was not met. */ OFPERR_OFPBMC_BAD_PREREQ, /* NX1.0-1.1(1,261), OF1.2+(4,10). A field type was duplicated. */ OFPERR_OFPBMC_DUP_FIELD, /* OF1.2+(4,11). Permissions error. */ OFPERR_OFPBMC_EPERM, /* ## --------------------- ## */ /* ## OFPET_FLOW_MOD_FAILED ## */ /* ## --------------------- ## */ /* OF1.1+(5,0). Unspecified error. */ OFPERR_OFPFMFC_UNKNOWN, /* OF1.0(3,0), OF1.1+(5,1). Flow not added because of full table(s). */ OFPERR_OFPFMFC_TABLE_FULL, /* OF1.1+(5,2). Table does not exist */ OFPERR_OFPFMFC_BAD_TABLE_ID, /* OF1.0(3,1), OF1.1+(5,3). Attempted to add overlapping flow with * CHECK_OVERLAP flag set. */ OFPERR_OFPFMFC_OVERLAP, /* OF1.0(3,2), OF1.1+(5,4). Permissions error. */ OFPERR_OFPFMFC_EPERM, /* OF1.1+(5,5). Flow not added because of unsupported idle/hard * timeout. */ OFPERR_OFPFMFC_BAD_TIMEOUT, /* OF1.0(3,3). Flow not added because of non-zero idle/hard timeout. */ OFPERR_OFPFMFC_BAD_EMERG_TIMEOUT, /* OF1.0(3,4), OF1.1+(5,6). Unsupported or unknown command. */ OFPERR_OFPFMFC_BAD_COMMAND, /* NX1.0(3,258), NX1.1(5,258), OF1.2+(5,7). Unsupported or unknown * flags. */ OFPERR_OFPFMFC_BAD_FLAGS, /* OF1.0(3,5). Unsupported action list - cannot process in the order * specified. */ OFPERR_OFPFMFC_UNSUPPORTED, /* NX1.0-1.1(5,256), NX1.2+(12). Generic hardware error. */ OFPERR_NXFMFC_HARDWARE, /* NX1.0-1.1(5,257), NX1.2+(13). A nonexistent table ID was specified in * the "command" field of struct ofp_flow_mod, when the * nxt_flow_mod_table_id extension is enabled. */ OFPERR_NXFMFC_BAD_TABLE_ID, /* ## ---------------------- ## */ /* ## OFPET_GROUP_MOD_FAILED ## */ /* ## ---------------------- ## */ /* OF1.1+(6,0). Group not added because a group ADD attempted to replace * an already-present group. */ OFPERR_OFPGMFC_GROUP_EXISTS, /* OF1.1+(6,1). Group not added because Group specified is invalid. */ OFPERR_OFPGMFC_INVALID_GROUP, /* OF1.1+(6,2). Switch does not support unequal load sharing with select * groups. */ OFPERR_OFPGMFC_WEIGHT_UNSUPPORTED, /* OF1.1+(6,3). The group table is full. */ OFPERR_OFPGMFC_OUT_OF_GROUPS, /* OF1.1+(6,4). The maximum number of action buckets for a group has been * exceeded. */ OFPERR_OFPGMFC_OUT_OF_BUCKETS, /* OF1.1+(6,5). Switch does not support groups that forward to groups. */ OFPERR_OFPGMFC_CHAINING_UNSUPPORTED, /* OF1.1+(6,6). This group cannot watch the watch_port or watch_group * specified. */ OFPERR_OFPGMFC_WATCH_UNSUPPORTED, /* OF1.1+(6,7). Group entry would cause a loop. */ OFPERR_OFPGMFC_LOOP, /* OF1.1+(6,8). Group not modified because a group MODIFY attempted to * modify a non-existent group. */ OFPERR_OFPGMFC_UNKNOWN_GROUP, /* OF1.2+(6,9). Group not deleted because another group is forwarding to it. */ OFPERR_OFPGMFC_CHAINED_GROUP, /* OF1.2+(6,10). Unsupported or unknown group type. */ OFPERR_OFPGMFC_BAD_TYPE, /* OF1.2+(6,11). Unsupported or unknown command. */ OFPERR_OFPGMFC_BAD_COMMAND, /* OF1.2+(6,12). Error in bucket. */ OFPERR_OFPGMFC_BAD_BUCKET, /* OF1.2+(6,13). Error in watch port/group. */ OFPERR_OFPGMFC_BAD_WATCH, /* OF1.2+(6,14). Permissions error. */ OFPERR_OFPGMFC_EPERM, /* ## --------------------- ## */ /* ## OFPET_PORT_MOD_FAILED ## */ /* ## --------------------- ## */ /* OF1.0(4,0), OF1.1+(7,0). Specified port does not exist. */ OFPERR_OFPPMFC_BAD_PORT, /* OF1.0(4,1), OF1.1+(7,1). Specified hardware address does not match the * port number. */ OFPERR_OFPPMFC_BAD_HW_ADDR, /* OF1.1+(7,2). Specified config is invalid. */ OFPERR_OFPPMFC_BAD_CONFIG, /* OF1.1+(7,3). Specified advertise is invalid. */ OFPERR_OFPPMFC_BAD_ADVERTISE, /* OF1.2+(7,4). Permissions error. */ OFPERR_OFPPMFC_EPERM, /* ## ---------------------- ## */ /* ## OFPET_TABLE_MOD_FAILED ## */ /* ## ---------------------- ## */ /* OF1.1+(8,0). Specified table does not exist. */ OFPERR_OFPTMFC_BAD_TABLE, /* OF1.1+(8,1). Specified config is invalid. */ OFPERR_OFPTMFC_BAD_CONFIG, /* OF1.2+(8,2). Permissions error. */ OFPERR_OFPTMFC_EPERM, /* ## --------------------- ## */ /* ## OFPET_QUEUE_OP_FAILED ## */ /* ## --------------------- ## */ /* OF1.0(5,0), OF1.1+(9,0). Invalid port (or port does not exist). */ OFPERR_OFPQOFC_BAD_PORT, /* OF1.0(5,1), OF1.1+(9,1). Queue does not exist. */ OFPERR_OFPQOFC_BAD_QUEUE, /* OF1.0(5,2), OF1.1+(9,2). Permissions error. */ OFPERR_OFPQOFC_EPERM, /* ## -------------------------- ## */ /* ## OFPET_SWITCH_CONFIG_FAILED ## */ /* ## -------------------------- ## */ /* OF1.1+(10,0). Specified flags is invalid. */ OFPERR_OFPSCFC_BAD_FLAGS, /* OF1.1+(10,1). Specified len is invalid. */ OFPERR_OFPSCFC_BAD_LEN, /* OF1.2+(10,2). Permissions error. */ OFPERR_OFPSCFC_EPERM, /* ## ------------------------- ## */ /* ## OFPET_ROLE_REQUEST_FAILED ## */ /* ## ------------------------- ## */ /* OF1.2+(11,0). Stale Message: old generation_id. */ OFPERR_OFPRRFC_STALE, /* OF1.2+(11,1). Controller role change unsupported. */ OFPERR_OFPRRFC_UNSUP, /* NX1.0-1.1(1,513), OF1.2+(11,2). Invalid role. */ OFPERR_OFPRRFC_BAD_ROLE, /* ## ---------------------- ## */ /* ## OFPET_METER_MOD_FAILED ## */ /* ## ---------------------- ## */ /* OF1.3+(12,0). Unspecified error. */ OFPERR_OFPMMFC_UNKNOWN, /* OF1.3+(12,1). Meter not added because a Meter ADD attempted to * replace an existing Meter. */ OFPERR_OFPMMFC_METER_EXISTS, /* OF1.3+(12,2). Meter not added because Meter specified is invalid. */ OFPERR_OFPMMFC_INVALID_METER, /* OF1.3+(12,3). Meter not modified because a Meter MODIFY attempted * to modify a non-existent Meter. */ OFPERR_OFPMMFC_UNKNOWN_METER, /* OF1.3+(12,4). Unsupported or unknown command. */ OFPERR_OFPMMFC_BAD_COMMAND, /* OF1.3+(12,5). Flag configuration unsupported. */ OFPERR_OFPMMFC_BAD_FLAGS, /* OF1.3+(12,6). Rate unsupported. */ OFPERR_OFPMMFC_BAD_RATE, /* OF1.3+(12,7). Burst size unsupported. */ OFPERR_OFPMMFC_BAD_BURST, /* OF1.3+(12,8). Band unsupported. */ OFPERR_OFPMMFC_BAD_BAND, /* OF1.3+(12,9). Band value unsupported. */ OFPERR_OFPMMFC_BAD_BAND_VALUE, /* OF1.3+(12,10). No more meters available. */ OFPERR_OFPMMFC_OUT_OF_METERS, /* OF1.3+(12,11). The maximum number of properties for a meter has * been exceeded. */ OFPERR_OFPMMFC_OUT_OF_BANDS, /* ## --------------------------- ## */ /* ## OFPET_TABLE_FEATURES_FAILED ## */ /* ## --------------------------- ## */ /* OF1.3+(13,0). Specified table does not exist. */ OFPERR_OFPTFFC_BAD_TABLE, /* OF1.3+(13,1). Invalid metadata mask. */ OFPERR_OFPTFFC_BAD_METADATA, /* OF1.3+(13,2). Unknown property type. */ OFPERR_OFPTFFC_BAD_TYPE, /* OF1.3+(13,3). Length problem in properties. */ OFPERR_OFPTFFC_BAD_LEN, /* OF1.3+(13,4). Unsupported property value. */ OFPERR_OFPTFFC_BAD_ARGUMENT, /* OF1.3+(13,5). Permissions error. */ OFPERR_OFPTFFC_EPERM, /* ## ------------------ ## */ /* ## OFPET_EXPERIMENTER ## */ /* ## ------------------ ## */ }; const char *ofperr_domain_get_name(enum ofp_version); bool ofperr_is_valid(enum ofperr); enum ofperr ofperr_from_name(const char *); enum ofperr ofperr_decode_msg(const struct ofp_header *, struct ofpbuf *payload); struct ofpbuf *ofperr_encode_reply(enum ofperr, const struct ofp_header *); struct ofpbuf *ofperr_encode_hello(enum ofperr, enum ofp_version ofp_version, const char *); int ofperr_get_vendor(enum ofperr, enum ofp_version); int ofperr_get_type(enum ofperr, enum ofp_version); int ofperr_get_code(enum ofperr, enum ofp_version); const char *ofperr_get_name(enum ofperr); const char *ofperr_get_description(enum ofperr); void ofperr_format(struct ds *, enum ofperr); const char *ofperr_to_string(enum ofperr); #endif /* ofp-errors.h */ openvswitch-2.0.1+git20140120/lib/ofp-msgs.c000066400000000000000000001022531226605124000201220ustar00rootroot00000000000000/* * Copyright (c) 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ofp-msgs.h" #include "byte-order.h" #include "dynamic-string.h" #include "hash.h" #include "hmap.h" #include "ofpbuf.h" #include "openflow/nicira-ext.h" #include "openflow/openflow.h" #include "ovs-thread.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ofp_msgs); #define OFPT_VENDOR 4 #define OFPT10_STATS_REQUEST 16 #define OFPT10_STATS_REPLY 17 #define OFPT11_STATS_REQUEST 18 #define OFPT11_STATS_REPLY 19 #define OFPST_VENDOR 0xffff /* A thin abstraction of OpenFlow headers: * * - 'version' and 'type' come straight from struct ofp_header, so these are * always present and meaningful. * * - 'stat' comes from the 'type' member in statistics messages only. It is * meaningful, therefore, only if 'version' and 'type' taken together * specify a statistics request or reply. Otherwise it is 0. * * - 'vendor' is meaningful only for vendor messages, that is, if 'version' * and 'type' specify a vendor message or if 'version' and 'type' specify * a statistics message and 'stat' specifies a vendor statistic type. * Otherwise it is 0. * * - 'subtype' is meaningful only for vendor messages and otherwise 0. It * specifies a vendor-defined subtype. There is no standard format for * these but 32 bits seems like it should be enough. */ struct ofphdrs { uint8_t version; /* From ofp_header. */ uint8_t type; /* From ofp_header. */ uint16_t stat; /* From ofp10_stats_msg or ofp11_stats_msg. */ uint32_t vendor; /* From ofp_vendor_header, * ofp10_vendor_stats_msg, or * ofp11_vendor_stats_msg. */ uint32_t subtype; /* From nicira_header, nicira10_stats_msg, or * nicira11_stats_msg. */ }; BUILD_ASSERT_DECL(sizeof(struct ofphdrs) == 12); /* A mapping from OpenFlow headers to OFPRAW_*. */ struct raw_instance { struct hmap_node hmap_node; /* In 'raw_instance_map'. */ struct ofphdrs hdrs; /* Key. */ enum ofpraw raw; /* Value. */ unsigned int hdrs_len; /* ofphdrs_len(hdrs). */ }; /* Information about a particular 'enum ofpraw'. */ struct raw_info { /* All possible instantiations of this OFPRAW_* into OpenFlow headers. */ struct raw_instance *instances; /* min_version - max_version + 1 elems. */ uint8_t min_version; uint8_t max_version; unsigned int min_body; unsigned int extra_multiple; enum ofptype type; const char *name; }; /* All understood OpenFlow message types, indexed by their 'struct ofphdrs'. */ static struct hmap raw_instance_map; #include "ofp-msgs.inc" static ovs_be32 alloc_xid(void); /* ofphdrs functions. */ static uint32_t ofphdrs_hash(const struct ofphdrs *); static bool ofphdrs_equal(const struct ofphdrs *a, const struct ofphdrs *b); static enum ofperr ofphdrs_decode(struct ofphdrs *, const struct ofp_header *oh, size_t length); static void ofphdrs_decode_assert(struct ofphdrs *, const struct ofp_header *oh, size_t length); size_t ofphdrs_len(const struct ofphdrs *); static const struct raw_info *raw_info_get(enum ofpraw); static struct raw_instance *raw_instance_get(const struct raw_info *, uint8_t version); static enum ofperr ofpraw_from_ofphdrs(enum ofpraw *, const struct ofphdrs *); /* Returns a transaction ID to use for an outgoing OpenFlow message. */ static ovs_be32 alloc_xid(void) { static atomic_uint32_t next_xid = ATOMIC_VAR_INIT(1); uint32_t xid; atomic_add(&next_xid, 1, &xid); return htonl(xid); } static uint32_t ofphdrs_hash(const struct ofphdrs *hdrs) { BUILD_ASSERT_DECL(sizeof *hdrs == 12); return hash_words((const uint32_t *) hdrs, 3, 0); } static bool ofphdrs_equal(const struct ofphdrs *a, const struct ofphdrs *b) { return !memcmp(a, b, sizeof *a); } static void log_bad_vendor(uint32_t vendor) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); VLOG_WARN_RL(&rl, "OpenFlow message has unknown vendor %#"PRIx32, vendor); } static enum ofperr ofphdrs_decode(struct ofphdrs *hdrs, const struct ofp_header *oh, size_t length) { memset(hdrs, 0, sizeof *hdrs); if (length < sizeof *oh) { return OFPERR_OFPBRC_BAD_LEN; } /* Get base message version and type (OFPT_*). */ hdrs->version = oh->version; hdrs->type = oh->type; if (hdrs->type == OFPT_VENDOR) { /* Get vendor. */ const struct ofp_vendor_header *ovh; if (length < sizeof *ovh) { return OFPERR_OFPBRC_BAD_LEN; } ovh = (const struct ofp_vendor_header *) oh; hdrs->vendor = ntohl(ovh->vendor); if (hdrs->vendor == NX_VENDOR_ID) { /* Get Nicira message subtype (NXT_*). */ const struct nicira_header *nh; if (length < sizeof *nh) { return OFPERR_OFPBRC_BAD_LEN; } nh = (const struct nicira_header *) oh; hdrs->subtype = ntohl(nh->subtype); } else { log_bad_vendor(hdrs->vendor); return OFPERR_OFPBRC_BAD_VENDOR; } } else if (hdrs->version == OFP10_VERSION && (hdrs->type == OFPT10_STATS_REQUEST || hdrs->type == OFPT10_STATS_REPLY)) { const struct ofp10_stats_msg *osm; /* Get statistic type (OFPST_*). */ if (length < sizeof *osm) { return OFPERR_OFPBRC_BAD_LEN; } osm = (const struct ofp10_stats_msg *) oh; hdrs->stat = ntohs(osm->type); if (hdrs->stat == OFPST_VENDOR) { /* Get vendor. */ const struct ofp10_vendor_stats_msg *ovsm; if (length < sizeof *ovsm) { return OFPERR_OFPBRC_BAD_LEN; } ovsm = (const struct ofp10_vendor_stats_msg *) oh; hdrs->vendor = ntohl(ovsm->vendor); if (hdrs->vendor == NX_VENDOR_ID) { /* Get Nicira statistic type (NXST_*). */ const struct nicira10_stats_msg *nsm; if (length < sizeof *nsm) { return OFPERR_OFPBRC_BAD_LEN; } nsm = (const struct nicira10_stats_msg *) oh; hdrs->subtype = ntohl(nsm->subtype); } else { log_bad_vendor(hdrs->vendor); return OFPERR_OFPBRC_BAD_VENDOR; } } } else if (hdrs->version != OFP10_VERSION && (hdrs->type == OFPT11_STATS_REQUEST || hdrs->type == OFPT11_STATS_REPLY)) { const struct ofp11_stats_msg *osm; /* Get statistic type (OFPST_*). */ if (length < sizeof *osm) { return OFPERR_OFPBRC_BAD_LEN; } osm = (const struct ofp11_stats_msg *) oh; hdrs->stat = ntohs(osm->type); if (hdrs->stat == OFPST_VENDOR) { /* Get vendor. */ const struct ofp11_vendor_stats_msg *ovsm; if (length < sizeof *ovsm) { return OFPERR_OFPBRC_BAD_LEN; } ovsm = (const struct ofp11_vendor_stats_msg *) oh; hdrs->vendor = ntohl(ovsm->vendor); if (hdrs->vendor == NX_VENDOR_ID) { /* Get Nicira statistic type (NXST_*). */ const struct nicira11_stats_msg *nsm; if (length < sizeof *nsm) { return OFPERR_OFPBRC_BAD_LEN; } nsm = (const struct nicira11_stats_msg *) oh; hdrs->subtype = ntohl(nsm->subtype); } else { log_bad_vendor(hdrs->vendor); return OFPERR_OFPBRC_BAD_VENDOR; } } } return 0; } static void ofphdrs_decode_assert(struct ofphdrs *hdrs, const struct ofp_header *oh, size_t length) { enum ofperr error = ofphdrs_decode(hdrs, oh, length); ovs_assert(!error); } static bool ofphdrs_is_stat(const struct ofphdrs *hdrs) { switch ((enum ofp_version) hdrs->version) { case OFP10_VERSION: return (hdrs->type == OFPT10_STATS_REQUEST || hdrs->type == OFPT10_STATS_REPLY); case OFP11_VERSION: case OFP12_VERSION: case OFP13_VERSION: return (hdrs->type == OFPT11_STATS_REQUEST || hdrs->type == OFPT11_STATS_REPLY); } return false; } size_t ofphdrs_len(const struct ofphdrs *hdrs) { if (hdrs->type == OFPT_VENDOR) { return sizeof(struct nicira_header); } switch ((enum ofp_version) hdrs->version) { case OFP10_VERSION: if (hdrs->type == OFPT10_STATS_REQUEST || hdrs->type == OFPT10_STATS_REPLY) { return (hdrs->stat == OFPST_VENDOR ? sizeof(struct nicira10_stats_msg) : sizeof(struct ofp10_stats_msg)); } break; case OFP11_VERSION: case OFP12_VERSION: case OFP13_VERSION: if (hdrs->type == OFPT11_STATS_REQUEST || hdrs->type == OFPT11_STATS_REPLY) { return (hdrs->stat == OFPST_VENDOR ? sizeof(struct nicira11_stats_msg) : sizeof(struct ofp11_stats_msg)); } break; } return sizeof(struct ofp_header); } /* Determines the OFPRAW_* type of the OpenFlow message at 'oh', which has * length 'oh->length'. (The caller must ensure that 'oh->length' bytes of * data are readable at 'oh'.) On success, returns 0 and stores the type into * '*raw'. On failure, returns an OFPERR_* error code and zeros '*raw'. * * This function checks that 'oh' is a valid length for its particular type of * message, and returns an error if not. */ enum ofperr ofpraw_decode(enum ofpraw *raw, const struct ofp_header *oh) { struct ofpbuf msg; ofpbuf_use_const(&msg, oh, ntohs(oh->length)); return ofpraw_pull(raw, &msg); } /* Does the same job as ofpraw_decode(), except that it assert-fails if * ofpraw_decode() would have reported an error. Thus, it's able to use the * return value for the OFPRAW_* message type instead of an error code. * * (It only makes sense to use this function if you previously called * ofpraw_decode() on the message and thus know that it's OK.) */ enum ofpraw ofpraw_decode_assert(const struct ofp_header *oh) { enum ofperr error; enum ofpraw raw; error = ofpraw_decode(&raw, oh); ovs_assert(!error); return raw; } /* Determines the OFPRAW_* type of the OpenFlow message in 'msg', which starts * at 'msg->data' and has length 'msg->size' bytes. On success, returns 0 and * stores the type into '*rawp'. On failure, returns an OFPERR_* error code * and zeros '*rawp'. * * This function checks that the message has a valid length for its particular * type of message, and returns an error if not. * * In addition to setting '*rawp', this function pulls off the OpenFlow header * (including the stats headers, vendor header, and any subtype header) with * ofpbuf_pull(). It also sets 'msg->l2' to the start of the OpenFlow header * and 'msg->l3' just beyond the headers (that is, to the final value of * msg->data). */ enum ofperr ofpraw_pull(enum ofpraw *rawp, struct ofpbuf *msg) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); const struct raw_instance *instance; const struct raw_info *info; struct ofphdrs hdrs; unsigned int min_len; unsigned int len; enum ofperr error; enum ofpraw raw; /* Set default outputs. */ msg->l2 = msg->l3 = msg->data; *rawp = 0; len = msg->size; error = ofphdrs_decode(&hdrs, msg->data, len); if (error) { return error; } error = ofpraw_from_ofphdrs(&raw, &hdrs); if (error) { return error; } info = raw_info_get(raw); instance = raw_instance_get(info, hdrs.version); msg->l2 = ofpbuf_pull(msg, instance->hdrs_len); msg->l3 = msg->data; min_len = instance->hdrs_len + info->min_body; switch (info->extra_multiple) { case 0: if (len != min_len) { VLOG_WARN_RL(&rl, "received %s with incorrect length %u (expected " "length %u)", info->name, len, min_len); return OFPERR_OFPBRC_BAD_LEN; } break; case 1: if (len < min_len) { VLOG_WARN_RL(&rl, "received %s with incorrect length %u (expected " "length at least %u bytes)", info->name, len, min_len); return OFPERR_OFPBRC_BAD_LEN; } break; default: if (len < min_len || (len - min_len) % info->extra_multiple) { VLOG_WARN_RL(&rl, "received %s with incorrect length %u (must be " "exactly %u bytes or longer by an integer multiple " "of %u bytes)", info->name, len, min_len, info->extra_multiple); return OFPERR_OFPBRC_BAD_LEN; } break; } *rawp = raw; return 0; } /* Does the same job as ofpraw_pull(), except that it assert-fails if * ofpraw_pull() would have reported an error. Thus, it's able to use the * return value for the OFPRAW_* message type instead of an error code. * * (It only makes sense to use this function if you previously called * ofpraw_decode() on the message and thus know that it's OK.) */ enum ofpraw ofpraw_pull_assert(struct ofpbuf *msg) { enum ofperr error; enum ofpraw raw; error = ofpraw_pull(&raw, msg); ovs_assert(!error); return raw; } /* Determines the OFPRAW_* type of the OpenFlow message that starts at 'oh' and * has length 'length' bytes. On success, returns 0 and stores the type into * '*rawp'. On failure, returns an OFPERR_* error code and zeros '*rawp'. * * Unlike other functions for decoding message types, this one is not picky * about message length. For example, it will successfully decode a message * whose body is shorter than the minimum length for a message of its type. * Thus, this is the correct function to use for decoding the type of a message * that might have been truncated, such as the payload of an OpenFlow error * message (which is allowed to be truncated to 64 bytes). */ enum ofperr ofpraw_decode_partial(enum ofpraw *raw, const struct ofp_header *oh, size_t length) { struct ofphdrs hdrs; enum ofperr error; error = ofphdrs_decode(&hdrs, oh, length); if (!error) { error = ofpraw_from_ofphdrs(raw, &hdrs); } if (error) { *raw = 0; } return error; } /* Encoding messages using OFPRAW_* values. */ static void ofpraw_put__(enum ofpraw, uint8_t version, ovs_be32 xid, size_t extra_tailroom, struct ofpbuf *); /* Allocates and returns a new ofpbuf that contains an OpenFlow header for * 'raw' with OpenFlow version 'version' and a fresh OpenFlow transaction ID. * The ofpbuf has enough tailroom for the minimum body length of 'raw', plus * 'extra_tailroom' additional bytes. * * Each 'raw' value is valid only for certain OpenFlow versions. The caller * must specify a valid (raw, version) pair. * * In the returned ofpbuf, 'l2' points to the beginning of the OpenFlow header * and 'l3' points just after it, to where the message's body will start. The * caller must actually allocate the body into the space reserved for it, * e.g. with ofpbuf_put_uninit(). * * The caller owns the returned ofpbuf and must free it when it is no longer * needed, e.g. with ofpbuf_delete(). */ struct ofpbuf * ofpraw_alloc(enum ofpraw raw, uint8_t version, size_t extra_tailroom) { return ofpraw_alloc_xid(raw, version, alloc_xid(), extra_tailroom); } /* Same as ofpraw_alloc() but the caller provides the transaction ID. */ struct ofpbuf * ofpraw_alloc_xid(enum ofpraw raw, uint8_t version, ovs_be32 xid, size_t extra_tailroom) { struct ofpbuf *buf = ofpbuf_new(0); ofpraw_put__(raw, version, xid, extra_tailroom, buf); return buf; } /* Same as ofpraw_alloc(), but obtains the OpenFlow version and transaction ID * from 'request->version' and 'request->xid', respectively. * * Even though the version comes from 'request->version', the caller must still * know what it is doing, by specifying a valid pairing of 'raw' and * 'request->version', just like ofpraw_alloc(). */ struct ofpbuf * ofpraw_alloc_reply(enum ofpraw raw, const struct ofp_header *request, size_t extra_tailroom) { return ofpraw_alloc_xid(raw, request->version, request->xid, extra_tailroom); } /* Allocates and returns a new ofpbuf that contains an OpenFlow header that is * a stats reply to the stats request in 'request', using the same OpenFlow * version and transaction ID as 'request'. The ofpbuf has enough tailroom for * the stats reply's minimum body length, plus 'extra_tailroom' additional * bytes. * * 'request' must be a stats request, that is, an OFPRAW_OFPST* or OFPRAW_NXST* * value. Every stats request has a corresponding reply, so the (raw, version) * pairing pitfalls of the other ofpraw_alloc_*() functions don't apply here. * * In the returned ofpbuf, 'l2' points to the beginning of the OpenFlow header * and 'l3' points just after it, to where the message's body will start. The * caller must actually allocate the body into the space reserved for it, * e.g. with ofpbuf_put_uninit(). * * The caller owns the returned ofpbuf and must free it when it is no longer * needed, e.g. with ofpbuf_delete(). */ struct ofpbuf * ofpraw_alloc_stats_reply(const struct ofp_header *request, size_t extra_tailroom) { enum ofpraw request_raw; enum ofpraw reply_raw; enum ofperr error; error = ofpraw_decode_partial(&request_raw, request, ntohs(request->length)); ovs_assert(!error); reply_raw = ofpraw_stats_request_to_reply(request_raw, request->version); ovs_assert(reply_raw); return ofpraw_alloc_reply(reply_raw, request, extra_tailroom); } /* Appends to 'buf' an OpenFlow header for 'raw' with OpenFlow version * 'version' and a fresh OpenFlow transaction ID. Preallocates enough tailroom * in 'buf' for the minimum body length of 'raw', plus 'extra_tailroom' * additional bytes. * * Each 'raw' value is valid only for certain OpenFlow versions. The caller * must specify a valid (raw, version) pair. * * Upon return, 'buf->l2' points to the beginning of the OpenFlow header and * 'buf->l3' points just after it, to where the message's body will start. The * caller must actually allocating the body into the space reserved for it, * e.g. with ofpbuf_put_uninit(). */ void ofpraw_put(enum ofpraw raw, uint8_t version, struct ofpbuf *buf) { ofpraw_put__(raw, version, alloc_xid(), 0, buf); } /* Same as ofpraw_put() but the caller provides the transaction ID. */ void ofpraw_put_xid(enum ofpraw raw, uint8_t version, ovs_be32 xid, struct ofpbuf *buf) { ofpraw_put__(raw, version, xid, 0, buf); } /* Same as ofpraw_put(), but obtains the OpenFlow version and transaction ID * from 'request->version' and 'request->xid', respectively. * * Even though the version comes from 'request->version', the caller must still * know what it is doing, by specifying a valid pairing of 'raw' and * 'request->version', just like ofpraw_put(). */ void ofpraw_put_reply(enum ofpraw raw, const struct ofp_header *request, struct ofpbuf *buf) { ofpraw_put__(raw, request->version, request->xid, 0, buf); } /* Appends to 'buf' an OpenFlow header that is a stats reply to the stats * request in 'request', using the same OpenFlow version and transaction ID as * 'request'. Preallocate enough tailroom in 'buf for the stats reply's * minimum body length, plus 'extra_tailroom' additional bytes. * * 'request' must be a stats request, that is, an OFPRAW_OFPST* or OFPRAW_NXST* * value. Every stats request has a corresponding reply, so the (raw, version) * pairing pitfalls of the other ofpraw_alloc_*() functions don't apply here. * * In the returned ofpbuf, 'l2' points to the beginning of the OpenFlow header * and 'l3' points just after it, to where the message's body will start. The * caller must actually allocate the body into the space reserved for it, * e.g. with ofpbuf_put_uninit(). * * The caller owns the returned ofpbuf and must free it when it is no longer * needed, e.g. with ofpbuf_delete(). */ void ofpraw_put_stats_reply(const struct ofp_header *request, struct ofpbuf *buf) { enum ofperr error; enum ofpraw raw; error = ofpraw_decode_partial(&raw, request, ntohs(request->length)); ovs_assert(!error); raw = ofpraw_stats_request_to_reply(raw, request->version); ovs_assert(raw); ofpraw_put__(raw, request->version, request->xid, 0, buf); } static void ofpraw_put__(enum ofpraw raw, uint8_t version, ovs_be32 xid, size_t extra_tailroom, struct ofpbuf *buf) { const struct raw_info *info = raw_info_get(raw); const struct raw_instance *instance = raw_instance_get(info, version); const struct ofphdrs *hdrs = &instance->hdrs; struct ofp_header *oh; ofpbuf_prealloc_tailroom(buf, (instance->hdrs_len + info->min_body + extra_tailroom)); buf->l2 = ofpbuf_put_uninit(buf, instance->hdrs_len); buf->l3 = ofpbuf_tail(buf); oh = buf->l2; oh->version = version; oh->type = hdrs->type; oh->length = htons(buf->size); oh->xid = xid; if (hdrs->type == OFPT_VENDOR) { struct nicira_header *nh = buf->l2; ovs_assert(hdrs->vendor == NX_VENDOR_ID); nh->vendor = htonl(hdrs->vendor); nh->subtype = htonl(hdrs->subtype); } else if (version == OFP10_VERSION && (hdrs->type == OFPT10_STATS_REQUEST || hdrs->type == OFPT10_STATS_REPLY)) { struct ofp10_stats_msg *osm = buf->l2; osm->type = htons(hdrs->stat); osm->flags = htons(0); if (hdrs->stat == OFPST_VENDOR) { struct ofp10_vendor_stats_msg *ovsm = buf->l2; ovsm->vendor = htonl(hdrs->vendor); if (hdrs->vendor == NX_VENDOR_ID) { struct nicira10_stats_msg *nsm = buf->l2; nsm->subtype = htonl(hdrs->subtype); memset(nsm->pad, 0, sizeof nsm->pad); } else { NOT_REACHED(); } } } else if (version != OFP10_VERSION && (hdrs->type == OFPT11_STATS_REQUEST || hdrs->type == OFPT11_STATS_REPLY)) { struct ofp11_stats_msg *osm = buf->l2; osm->type = htons(hdrs->stat); osm->flags = htons(0); memset(osm->pad, 0, sizeof osm->pad); if (hdrs->stat == OFPST_VENDOR) { struct ofp11_vendor_stats_msg *ovsm = buf->l2; ovsm->vendor = htonl(hdrs->vendor); if (hdrs->vendor == NX_VENDOR_ID) { struct nicira11_stats_msg *nsm = buf->l2; nsm->subtype = htonl(hdrs->subtype); } else { NOT_REACHED(); } } } } /* Returns 'raw''s name. * * The name is the name used for 'raw' in the OpenFlow specification. For * example, ofpraw_get_name(OFPRAW_OFPT10_FEATURES_REPLY) is * "OFPT_FEATURES_REPLY". * * The caller must not modify or free the returned string. */ const char * ofpraw_get_name(enum ofpraw raw) { return raw_info_get(raw)->name; } /* Returns the stats reply that corresponds to 'raw' in the given OpenFlow * 'version'. */ enum ofpraw ofpraw_stats_request_to_reply(enum ofpraw raw, uint8_t version) { const struct raw_info *info = raw_info_get(raw); const struct raw_instance *instance = raw_instance_get(info, version); enum ofpraw reply_raw; struct ofphdrs hdrs; enum ofperr error; hdrs = instance->hdrs; switch ((enum ofp_version)hdrs.version) { case OFP10_VERSION: ovs_assert(hdrs.type == OFPT10_STATS_REQUEST); hdrs.type = OFPT10_STATS_REPLY; break; case OFP11_VERSION: case OFP12_VERSION: case OFP13_VERSION: ovs_assert(hdrs.type == OFPT11_STATS_REQUEST); hdrs.type = OFPT11_STATS_REPLY; break; default: NOT_REACHED(); } error = ofpraw_from_ofphdrs(&reply_raw, &hdrs); ovs_assert(!error); return reply_raw; } /* Determines the OFPTYPE_* type of the OpenFlow message at 'oh', which has * length 'oh->length'. (The caller must ensure that 'oh->length' bytes of * data are readable at 'oh'.) On success, returns 0 and stores the type into * '*typep'. On failure, returns an OFPERR_* error code and zeros '*typep'. * * This function checks that 'oh' is a valid length for its particular type of * message, and returns an error if not. */ enum ofperr ofptype_decode(enum ofptype *typep, const struct ofp_header *oh) { enum ofperr error; enum ofpraw raw; error = ofpraw_decode(&raw, oh); *typep = error ? 0 : ofptype_from_ofpraw(raw); return error; } /* Determines the OFPTYPE_* type of the OpenFlow message in 'msg', which starts * at 'msg->data' and has length 'msg->size' bytes. On success, returns 0 and * stores the type into '*typep'. On failure, returns an OFPERR_* error code * and zeros '*typep'. * * This function checks that the message has a valid length for its particular * type of message, and returns an error if not. * * In addition to setting '*typep', this function pulls off the OpenFlow header * (including the stats headers, vendor header, and any subtype header) with * ofpbuf_pull(). It also sets 'msg->l2' to the start of the OpenFlow header * and 'msg->l3' just beyond the headers (that is, to the final value of * msg->data). */ enum ofperr ofptype_pull(enum ofptype *typep, struct ofpbuf *buf) { enum ofperr error; enum ofpraw raw; error = ofpraw_pull(&raw, buf); *typep = error ? 0 : ofptype_from_ofpraw(raw); return error; } /* Returns the OFPTYPE_* type that corresponds to 'raw'. * * (This is a one-way trip, because the mapping from ofpraw to ofptype is * many-to-one.) */ enum ofptype ofptype_from_ofpraw(enum ofpraw raw) { return raw_info_get(raw)->type; } /* Updates the 'length' field of the OpenFlow message in 'buf' to * 'buf->size'. */ void ofpmsg_update_length(struct ofpbuf *buf) { struct ofp_header *oh = ofpbuf_at_assert(buf, 0, sizeof *oh); oh->length = htons(buf->size); } /* Returns just past the Openflow header (including the stats headers, vendor * header, and any subtype header) in 'oh'. */ const void * ofpmsg_body(const struct ofp_header *oh) { struct ofphdrs hdrs; ofphdrs_decode_assert(&hdrs, oh, ntohs(oh->length)); return (const uint8_t *) oh + ofphdrs_len(&hdrs); } static ovs_be16 *ofpmp_flags__(const struct ofp_header *); /* Initializes 'replies' as a new list of stats messages that reply to * 'request', which must be a stats request message. Initially the list will * consist of only a single reply part without any body. The caller should * use calls to the other ofpmp_*() functions to add to the body and split the * message into multiple parts, if necessary. */ void ofpmp_init(struct list *replies, const struct ofp_header *request) { struct ofpbuf *msg; list_init(replies); msg = ofpraw_alloc_stats_reply(request, 1000); list_push_back(replies, &msg->list_node); } /* Prepares to append up to 'len' bytes to the series of statistics replies in * 'replies', which should have been initialized with ofpmp_init(), if * necessary adding a new reply to the list. * * Returns an ofpbuf with at least 'len' bytes of tailroom. The 'len' bytes * have not actually been allocated, so the caller must do so with * e.g. ofpbuf_put_uninit(). */ struct ofpbuf * ofpmp_reserve(struct list *replies, size_t len) { struct ofpbuf *msg = ofpbuf_from_list(list_back(replies)); if (msg->size + len <= UINT16_MAX) { ofpbuf_prealloc_tailroom(msg, len); return msg; } else { unsigned int hdrs_len; struct ofpbuf *next; struct ofphdrs hdrs; ofphdrs_decode_assert(&hdrs, msg->data, msg->size); hdrs_len = ofphdrs_len(&hdrs); next = ofpbuf_new(MAX(1024, hdrs_len + len)); ofpbuf_put(next, msg->data, hdrs_len); next->l2 = next->data; next->l3 = ofpbuf_tail(next); list_push_back(replies, &next->list_node); *ofpmp_flags__(msg->data) |= htons(OFPSF_REPLY_MORE); return next; } } /* Appends 'len' bytes to the series of statistics replies in 'replies', and * returns the first byte. */ void * ofpmp_append(struct list *replies, size_t len) { return ofpbuf_put_uninit(ofpmp_reserve(replies, len), len); } /* Sometimes, when composing stats replies, it's difficult to predict how long * an individual reply chunk will be before actually encoding it into the reply * buffer. This function allows easy handling of this case: just encode the * reply, then use this function to break the message into two pieces if it * exceeds the OpenFlow message limit. * * In detail, if the final stats message in 'replies' is too long for OpenFlow, * this function breaks it into two separate stats replies, the first one with * the first 'start_ofs' bytes, the second one containing the bytes from that * offset onward. */ void ofpmp_postappend(struct list *replies, size_t start_ofs) { struct ofpbuf *msg = ofpbuf_from_list(list_back(replies)); ovs_assert(start_ofs <= UINT16_MAX); if (msg->size > UINT16_MAX) { size_t len = msg->size - start_ofs; memcpy(ofpmp_append(replies, len), (const uint8_t *) msg->data + start_ofs, len); msg->size = start_ofs; } } static ovs_be16 * ofpmp_flags__(const struct ofp_header *oh) { switch ((enum ofp_version)oh->version) { case OFP10_VERSION: return &((struct ofp10_stats_msg *) oh)->flags; case OFP11_VERSION: case OFP12_VERSION: case OFP13_VERSION: return &((struct ofp11_stats_msg *) oh)->flags; default: NOT_REACHED(); } } /* Returns the OFPSF_* flags found in the OpenFlow stats header of 'oh', which * must be an OpenFlow stats request or reply. * * (OFPSF_REPLY_MORE is the only defined flag.) */ uint16_t ofpmp_flags(const struct ofp_header *oh) { return ntohs(*ofpmp_flags__(oh)); } /* Returns true if the OFPSF_REPLY_MORE flag is set in the OpenFlow stats * header of 'oh', which must be an OpenFlow stats request or reply, false if * it is not set. */ bool ofpmp_more(const struct ofp_header *oh) { return (ofpmp_flags(oh) & OFPSF_REPLY_MORE) != 0; } static void ofpmsgs_init(void); static const struct raw_info * raw_info_get(enum ofpraw raw) { ofpmsgs_init(); ovs_assert(raw < ARRAY_SIZE(raw_infos)); return &raw_infos[raw]; } static struct raw_instance * raw_instance_get(const struct raw_info *info, uint8_t version) { ovs_assert(version >= info->min_version && version <= info->max_version); return &info->instances[version - info->min_version]; } static enum ofperr ofpraw_from_ofphdrs(enum ofpraw *raw, const struct ofphdrs *hdrs) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); struct raw_instance *raw_hdrs; uint32_t hash; ofpmsgs_init(); hash = ofphdrs_hash(hdrs); HMAP_FOR_EACH_WITH_HASH (raw_hdrs, hmap_node, hash, &raw_instance_map) { if (ofphdrs_equal(hdrs, &raw_hdrs->hdrs)) { *raw = raw_hdrs->raw; return 0; } } if (!VLOG_DROP_WARN(&rl)) { struct ds s; ds_init(&s); ds_put_format(&s, "version %"PRIu8", type %"PRIu8, hdrs->version, hdrs->type); if (ofphdrs_is_stat(hdrs)) { ds_put_format(&s, ", stat %"PRIu16, hdrs->stat); } if (hdrs->vendor) { ds_put_format(&s, ", vendor 0x%"PRIx32", subtype %"PRIu32, hdrs->vendor, hdrs->subtype); } VLOG_WARN("unknown OpenFlow message (%s)", ds_cstr(&s)); ds_destroy(&s); } return (hdrs->vendor ? OFPERR_OFPBRC_BAD_SUBTYPE : ofphdrs_is_stat(hdrs) ? OFPERR_OFPBRC_BAD_STAT : OFPERR_OFPBRC_BAD_TYPE); } static void ofpmsgs_init(void) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; const struct raw_info *info; if (!ovsthread_once_start(&once)) { return; } hmap_init(&raw_instance_map); for (info = raw_infos; info < &raw_infos[ARRAY_SIZE(raw_infos)]; info++) { int n_instances = info->max_version - info->min_version + 1; struct raw_instance *inst; for (inst = info->instances; inst < &info->instances[n_instances]; inst++) { inst->hdrs_len = ofphdrs_len(&inst->hdrs); hmap_insert(&raw_instance_map, &inst->hmap_node, ofphdrs_hash(&inst->hdrs)); } } ovsthread_once_done(&once); } openvswitch-2.0.1+git20140120/lib/ofp-msgs.h000066400000000000000000000610621226605124000201310ustar00rootroot00000000000000/* * Copyright (c) 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OFP_MSGS_H #define OFP_MSGS_H 1 /* OpenFlow message headers abstraction. * * OpenFlow headers are unnecessarily complicated: * * - Some messages with the same meaning were renumbered between 1.0 and 1.1. * * - "Statistics" (aka multipart) messages have a different format from other * messages. * * - The 1.0 header for statistics messages is an odd number of 32-bit words * long, leaving 64-bit quantities in the body misaligned. The 1.1 header * for statistics added a padding word to fix this misalignment, although * many statistic message bodies did not change. * * - Vendor-defined messages have an additional header but no standard way to * distinguish individual types of message within a given vendor. * * This file attempts to abstract out the differences between the various forms * of headers. */ #include "openvswitch/types.h" #include "ofp-errors.h" #include "util.h" struct list; /* Raw identifiers for OpenFlow messages. * * Some OpenFlow messages with similar meanings have multiple variants across * OpenFlow versions or vendor extensions. Each variant has a different * OFPRAW_* enumeration constant. More specifically, if two messages have * different types, different numbers, or different arguments, then they must * have different OFPRAW_* values. * * The comments here must follow a stylized form because the "extract-ofp-msgs" * program parses them at build time to generate data tables. The syntax of * each comment is: * * type versions (number): arguments. * * where the syntax of each part is: * * - type: One of OFPT (standard OpenFlow message), OFPST (standard OpenFlow * statistics message), NXT (Nicira extension message), or NXST (Nicira * extension statistics message). * * As new vendors implement extensions it will make sense to expand the * dictionary of possible types. * * - versions: The OpenFlow version or versions in which this message is * supported, e.g. "1.0" or "1.1" or "1.0+". * * - number: * For OFPT, the 'type' in struct ofp_header. * For OFPST, the 'type' in struct ofp_stats_msg or ofp11_stats_msg. * For NXT, the 'subtype' in struct nicira_header. * For NXST, the 'subtype' in struct nicira10_stats_msg or * nicira11_stats_msg. * * - arguments: The types of data that follow the OpenFlow headers (the * message "body"). This can be "void" if the message has no body. * Otherwise, it should be a comma-separated sequence of C types. The * last type in the sequence can end with [] if the body ends in a * variable-length sequence. * * The arguments are used to validate the lengths of messages when a * header is parsed. Any message whose length isn't valid as a length of * the specified types will be rejected with OFPERR_OFPBRC_BAD_LEN. * * A few OpenFlow messages, such as OFPT_PACKET_IN, intentionally end with * only part of a structure, up to some specified member. The syntax "up * to " indicates this, e.g. "struct ofp11_packet_in up to data". */ enum ofpraw { /* Immutable standard messages. * * The OpenFlow standard promises to preserve these messages and their numbers * in future versions, so we mark them as , which covers every OpenFlow * version numbered 0x01...0xff, rather than as OF1.0+, which covers only * OpenFlow versions that we otherwise implement. * * Without here, then we would fail to decode "hello" messages that * announce a version higher than we understand, even though there still could * be a version in common with the peer that we do understand. The * keyword is less useful for the other messages, because our OpenFlow channels * accept only OpenFlow messages with a previously negotiated version. */ /* OFPT (0): uint8_t[]. */ OFPRAW_OFPT_HELLO, /* OFPT (1): struct ofp_error_msg, uint8_t[]. */ OFPRAW_OFPT_ERROR, /* OFPT (2): uint8_t[]. */ OFPRAW_OFPT_ECHO_REQUEST, /* OFPT (3): uint8_t[]. */ OFPRAW_OFPT_ECHO_REPLY, /* Other standard messages. * * The meanings of these messages can (and often do) change from one version * of OpenFlow to another. */ /* OFPT 1.0+ (5): void. */ OFPRAW_OFPT_FEATURES_REQUEST, /* OFPT 1.0 (6): struct ofp_switch_features, struct ofp10_phy_port[]. */ OFPRAW_OFPT10_FEATURES_REPLY, /* OFPT 1.1-1.2 (6): struct ofp_switch_features, struct ofp11_port[]. */ OFPRAW_OFPT11_FEATURES_REPLY, /* OFPT 1.3+ (6): struct ofp_switch_features. */ OFPRAW_OFPT13_FEATURES_REPLY, /* OFPT 1.0+ (7): void. */ OFPRAW_OFPT_GET_CONFIG_REQUEST, /* OFPT 1.0+ (8): struct ofp_switch_config. */ OFPRAW_OFPT_GET_CONFIG_REPLY, /* OFPT 1.0+ (9): struct ofp_switch_config. */ OFPRAW_OFPT_SET_CONFIG, /* OFPT 1.0 (10): struct ofp10_packet_in up to data, uint8_t[]. */ OFPRAW_OFPT10_PACKET_IN, /* OFPT 1.1 (10): struct ofp11_packet_in, uint8_t[]. */ OFPRAW_OFPT11_PACKET_IN, /* OFPT 1.2 (10): struct ofp12_packet_in, uint8_t[]. */ OFPRAW_OFPT12_PACKET_IN, /* OFPT 1.3 (10): struct ofp13_packet_in, uint8_t[]. */ OFPRAW_OFPT13_PACKET_IN, /* NXT 1.0+ (17): struct nx_packet_in, uint8_t[]. */ OFPRAW_NXT_PACKET_IN, /* OFPT 1.0 (11): struct ofp10_flow_removed. */ OFPRAW_OFPT10_FLOW_REMOVED, /* OFPT 1.1+ (11): struct ofp11_flow_removed, uint8_t[8][]. */ OFPRAW_OFPT11_FLOW_REMOVED, /* NXT 1.0+ (14): struct nx_flow_removed, uint8_t[8][]. */ OFPRAW_NXT_FLOW_REMOVED, /* OFPT 1.0 (12): struct ofp_port_status, struct ofp10_phy_port. */ OFPRAW_OFPT10_PORT_STATUS, /* OFPT 1.1+ (12): struct ofp_port_status, struct ofp11_port. */ OFPRAW_OFPT11_PORT_STATUS, /* OFPT 1.0 (13): struct ofp10_packet_out, uint8_t[]. */ OFPRAW_OFPT10_PACKET_OUT, /* OFPT 1.1+ (13): struct ofp11_packet_out, uint8_t[]. */ OFPRAW_OFPT11_PACKET_OUT, /* OFPT 1.0 (14): struct ofp10_flow_mod, struct ofp_action_header[]. */ OFPRAW_OFPT10_FLOW_MOD, /* OFPT 1.1+ (14): struct ofp11_flow_mod, struct ofp11_instruction[]. */ OFPRAW_OFPT11_FLOW_MOD, /* NXT 1.0+ (13): struct nx_flow_mod, uint8_t[8][]. */ OFPRAW_NXT_FLOW_MOD, /* OFPT 1.0 (15): struct ofp10_port_mod. */ OFPRAW_OFPT10_PORT_MOD, /* OFPT 1.1+ (16): struct ofp11_port_mod. */ OFPRAW_OFPT11_PORT_MOD, /* OFPT 1.0 (18): void. */ OFPRAW_OFPT10_BARRIER_REQUEST, /* OFPT 1.1+ (20): void. */ OFPRAW_OFPT11_BARRIER_REQUEST, /* OFPT 1.0 (19): void. */ OFPRAW_OFPT10_BARRIER_REPLY, /* OFPT 1.1+ (21): void. */ OFPRAW_OFPT11_BARRIER_REPLY, /* OFPT 1.1+ (22): struct ofp11_queue_get_config_request. */ OFPRAW_OFPT11_QUEUE_GET_CONFIG_REQUEST, /* OFPT 1.1+ (23): struct ofp11_queue_get_config_reply, struct ofp_packet_queue[]. */ OFPRAW_OFPT11_QUEUE_GET_CONFIG_REPLY, /* OFPT 1.2+ (24): struct ofp12_role_request. */ OFPRAW_OFPT12_ROLE_REQUEST, /* NXT 1.0+ (10): struct nx_role_request. */ OFPRAW_NXT_ROLE_REQUEST, /* OFPT 1.2+ (25): struct ofp12_role_request. */ OFPRAW_OFPT12_ROLE_REPLY, /* NXT 1.0+ (11): struct nx_role_request. */ OFPRAW_NXT_ROLE_REPLY, /* OFPT 1.3+ (26): void. */ OFPRAW_OFPT13_GET_ASYNC_REQUEST, /* OFPT 1.3+ (27): struct ofp13_async_config. */ OFPRAW_OFPT13_GET_ASYNC_REPLY, /* OFPT 1.3+ (28): struct ofp13_async_config. */ OFPRAW_OFPT13_SET_ASYNC, /* NXT 1.0+ (19): struct nx_async_config. */ OFPRAW_NXT_SET_ASYNC_CONFIG, /* OFPT 1.3+ (29): struct ofp13_meter_mod, uint8_t[8][]. */ OFPRAW_OFPT13_METER_MOD, /* Standard statistics. */ /* OFPST 1.0+ (0): void. */ OFPRAW_OFPST_DESC_REQUEST, /* OFPST 1.0+ (0): struct ofp_desc_stats. */ OFPRAW_OFPST_DESC_REPLY, /* OFPST 1.0 (1): struct ofp10_flow_stats_request. */ OFPRAW_OFPST10_FLOW_REQUEST, /* OFPST 1.1+ (1): struct ofp11_flow_stats_request, uint8_t[8][]. */ OFPRAW_OFPST11_FLOW_REQUEST, /* NXST 1.0 (0): struct nx_flow_stats_request, uint8_t[8][]. */ OFPRAW_NXST_FLOW_REQUEST, /* OFPST 1.0 (1): uint8_t[]. */ OFPRAW_OFPST10_FLOW_REPLY, /* OFPST 1.1-1.2 (1): uint8_t[]. */ OFPRAW_OFPST11_FLOW_REPLY, /* OFPST 1.3+ (1): uint8_t[]. */ OFPRAW_OFPST13_FLOW_REPLY, /* NXST 1.0 (0): uint8_t[]. */ OFPRAW_NXST_FLOW_REPLY, /* OFPST 1.0 (2): struct ofp10_flow_stats_request. */ OFPRAW_OFPST10_AGGREGATE_REQUEST, /* OFPST 1.1+ (2): struct ofp11_flow_stats_request, uint8_t[8][]. */ OFPRAW_OFPST11_AGGREGATE_REQUEST, /* NXST 1.0 (1): struct nx_flow_stats_request, uint8_t[8][]. */ OFPRAW_NXST_AGGREGATE_REQUEST, /* OFPST 1.0+ (2): struct ofp_aggregate_stats_reply. */ OFPRAW_OFPST_AGGREGATE_REPLY, /* NXST 1.0 (1): struct ofp_aggregate_stats_reply. */ OFPRAW_NXST_AGGREGATE_REPLY, /* OFPST 1.0+ (3): void. */ OFPRAW_OFPST_TABLE_REQUEST, /* OFPST 1.0 (3): struct ofp10_table_stats[]. */ OFPRAW_OFPST10_TABLE_REPLY, /* OFPST 1.1 (3): struct ofp11_table_stats[]. */ OFPRAW_OFPST11_TABLE_REPLY, /* OFPST 1.2 (3): struct ofp12_table_stats[]. */ OFPRAW_OFPST12_TABLE_REPLY, /* OFPST 1.3 (3): struct ofp13_table_stats[]. */ OFPRAW_OFPST13_TABLE_REPLY, /* OFPST 1.0 (4): struct ofp10_port_stats_request. */ OFPRAW_OFPST10_PORT_REQUEST, /* OFPST 1.1+ (4): struct ofp11_port_stats_request. */ OFPRAW_OFPST11_PORT_REQUEST, /* OFPST 1.0 (4): struct ofp10_port_stats[]. */ OFPRAW_OFPST10_PORT_REPLY, /* OFPST 1.1-1.2 (4): struct ofp11_port_stats[]. */ OFPRAW_OFPST11_PORT_REPLY, /* OFPST 1.3+ (4): struct ofp13_port_stats[]. */ OFPRAW_OFPST13_PORT_REPLY, /* OFPST 1.0 (5): struct ofp10_queue_stats_request. */ OFPRAW_OFPST10_QUEUE_REQUEST, /* OFPST 1.1+ (5): struct ofp11_queue_stats_request. */ OFPRAW_OFPST11_QUEUE_REQUEST, /* OFPST 1.0 (5): struct ofp10_queue_stats[]. */ OFPRAW_OFPST10_QUEUE_REPLY, /* OFPST 1.1-1.2 (5): struct ofp11_queue_stats[]. */ OFPRAW_OFPST11_QUEUE_REPLY, /* OFPST 1.3+ (5): struct ofp13_queue_stats[]. */ OFPRAW_OFPST13_QUEUE_REPLY, /* OFPST 1.1+ (6): struct ofp11_group_stats_request. */ OFPRAW_OFPST11_GROUP_REQUEST, /* OFPST 1.1-1.2 (6): struct ofp11_group_stats[]. */ OFPRAW_OFPST11_GROUP_REPLY, /* OFPST 1.3 (6): struct ofp13_group_stats[]. */ OFPRAW_OFPST13_GROUP_REPLY, /* OFPST 1.1+ (7): void. */ OFPRAW_OFPST11_GROUP_DESC_REQUEST, /* OFPST 1.1+ (7): struct ofp11_group_desc_stats[]. */ OFPRAW_OFPST11_GROUP_DESC_REPLY, /* OFPST 1.2+ (8): void. */ OFPRAW_OFPST12_GROUP_FEATURES_REQUEST, /* OFPST 1.2+ (8): struct ofp12_group_features_stats. */ OFPRAW_OFPST12_GROUP_FEATURES_REPLY, /* OFPST 1.3+ (9): struct ofp13_meter_multipart_request. */ OFPRAW_OFPST13_METER_REQUEST, /* OFPST 1.3+ (9): uint8_t[8][]. */ OFPRAW_OFPST13_METER_REPLY, /* OFPST 1.3+ (10): struct ofp13_meter_multipart_request. */ OFPRAW_OFPST13_METER_CONFIG_REQUEST, /* OFPST 1.3+ (10): uint8_t[8][]. */ OFPRAW_OFPST13_METER_CONFIG_REPLY, /* OFPST 1.3+ (11): void. */ OFPRAW_OFPST13_METER_FEATURES_REQUEST, /* OFPST 1.3+ (11): struct ofp13_meter_features. */ OFPRAW_OFPST13_METER_FEATURES_REPLY, /* OFPST 1.3+ (12): struct ofp13_table_features[]. */ OFPRAW_OFPST13_TABLE_FEATURES_REQUEST, /* OFPST 1.3+ (12): struct ofp13_table_features[]. */ OFPRAW_OFPST13_TABLE_FEATURES_REPLY, /* OFPST 1.0+ (13): void. */ OFPRAW_OFPST_PORT_DESC_REQUEST, /* OFPST 1.0 (13): struct ofp10_phy_port[]. */ OFPRAW_OFPST10_PORT_DESC_REPLY, /* OFPST 1.1+ (13): struct ofp11_port[]. */ OFPRAW_OFPST11_PORT_DESC_REPLY, /* Nicira extension messages. * * Nicira extensions that correspond to standard OpenFlow messages are listed * alongside the standard versions above. */ /* NXT 1.0 (12): struct nx_set_flow_format. */ OFPRAW_NXT_SET_FLOW_FORMAT, /* NXT 1.0+ (15): struct nx_flow_mod_table_id. */ OFPRAW_NXT_FLOW_MOD_TABLE_ID, /* NXT 1.0+ (16): struct nx_set_packet_in_format. */ OFPRAW_NXT_SET_PACKET_IN_FORMAT, /* NXT 1.0+ (18): void. */ OFPRAW_NXT_FLOW_AGE, /* NXT 1.0+ (20): struct nx_controller_id. */ OFPRAW_NXT_SET_CONTROLLER_ID, /* NXT 1.0+ (21): struct nx_flow_monitor_cancel. */ OFPRAW_NXT_FLOW_MONITOR_CANCEL, /* NXT 1.0+ (22): void. */ OFPRAW_NXT_FLOW_MONITOR_PAUSED, /* NXT 1.0+ (23): void. */ OFPRAW_NXT_FLOW_MONITOR_RESUMED, /* Nicira extension statistics. * * Nicira extension statistics that correspond to standard OpenFlow statistics * are listed alongside the standard versions above. */ /* NXST 1.0 (2): uint8_t[8][]. */ OFPRAW_NXST_FLOW_MONITOR_REQUEST, /* NXST 1.0 (2): uint8_t[8][]. */ OFPRAW_NXST_FLOW_MONITOR_REPLY, }; /* Decoding messages into OFPRAW_* values. */ enum ofperr ofpraw_decode(enum ofpraw *, const struct ofp_header *); enum ofpraw ofpraw_decode_assert(const struct ofp_header *); enum ofperr ofpraw_pull(enum ofpraw *, struct ofpbuf *); enum ofpraw ofpraw_pull_assert(struct ofpbuf *); enum ofperr ofpraw_decode_partial(enum ofpraw *, const struct ofp_header *, size_t length); /* Encoding messages using OFPRAW_* values. */ struct ofpbuf *ofpraw_alloc(enum ofpraw, uint8_t ofp_version, size_t extra_tailroom); struct ofpbuf *ofpraw_alloc_xid(enum ofpraw, uint8_t ofp_version, ovs_be32 xid, size_t extra_tailroom); struct ofpbuf *ofpraw_alloc_reply(enum ofpraw, const struct ofp_header *request, size_t extra_tailroom); struct ofpbuf *ofpraw_alloc_stats_reply(const struct ofp_header *request, size_t extra_tailroom); void ofpraw_put(enum ofpraw, uint8_t ofp_version, struct ofpbuf *); void ofpraw_put_xid(enum ofpraw, uint8_t ofp_version, ovs_be32 xid, struct ofpbuf *); void ofpraw_put_reply(enum ofpraw, const struct ofp_header *request, struct ofpbuf *); void ofpraw_put_stats_reply(const struct ofp_header *request, struct ofpbuf *); /* Information about OFPRAW_* values. */ const char *ofpraw_get_name(enum ofpraw); enum ofpraw ofpraw_stats_request_to_reply(enum ofpraw, uint8_t version); /* Semantic identifiers for OpenFlow messages. * * Each OFPTYPE_* enumeration constant represents one or more concrete format * of OpenFlow message. When two variants of a message have essentially the * same meaning, they are assigned a single OFPTYPE_* value. * * The comments here must follow a stylized form because the "extract-ofp-msgs" * program parses them at build time to generate data tables. The format is * simply to list each OFPRAW_* enumeration constant for a given OFPTYPE_*, * each followed by a period. */ enum ofptype { /* Immutable messages. */ OFPTYPE_HELLO, /* OFPRAW_OFPT_HELLO. */ OFPTYPE_ERROR, /* OFPRAW_OFPT_ERROR. */ OFPTYPE_ECHO_REQUEST, /* OFPRAW_OFPT_ECHO_REQUEST. */ OFPTYPE_ECHO_REPLY, /* OFPRAW_OFPT_ECHO_REPLY. */ /* Switch configuration messages. */ OFPTYPE_FEATURES_REQUEST, /* OFPRAW_OFPT_FEATURES_REQUEST. */ OFPTYPE_FEATURES_REPLY, /* OFPRAW_OFPT10_FEATURES_REPLY. * OFPRAW_OFPT11_FEATURES_REPLY. * OFPRAW_OFPT13_FEATURES_REPLY. */ OFPTYPE_GET_CONFIG_REQUEST, /* OFPRAW_OFPT_GET_CONFIG_REQUEST. */ OFPTYPE_GET_CONFIG_REPLY, /* OFPRAW_OFPT_GET_CONFIG_REPLY. */ OFPTYPE_SET_CONFIG, /* OFPRAW_OFPT_SET_CONFIG. */ /* Asynchronous messages. */ OFPTYPE_PACKET_IN, /* OFPRAW_OFPT10_PACKET_IN. * OFPRAW_OFPT11_PACKET_IN. * OFPRAW_OFPT12_PACKET_IN. * OFPRAW_OFPT13_PACKET_IN. * OFPRAW_NXT_PACKET_IN. */ OFPTYPE_FLOW_REMOVED, /* OFPRAW_OFPT10_FLOW_REMOVED. * OFPRAW_OFPT11_FLOW_REMOVED. * OFPRAW_NXT_FLOW_REMOVED. */ OFPTYPE_PORT_STATUS, /* OFPRAW_OFPT10_PORT_STATUS. * OFPRAW_OFPT11_PORT_STATUS. */ /* Controller command messages. */ OFPTYPE_PACKET_OUT, /* OFPRAW_OFPT10_PACKET_OUT. * OFPRAW_OFPT11_PACKET_OUT. */ OFPTYPE_FLOW_MOD, /* OFPRAW_OFPT10_FLOW_MOD. * OFPRAW_OFPT11_FLOW_MOD. * OFPRAW_NXT_FLOW_MOD. */ OFPTYPE_PORT_MOD, /* OFPRAW_OFPT10_PORT_MOD. * OFPRAW_OFPT11_PORT_MOD. */ /* Barrier messages. */ OFPTYPE_BARRIER_REQUEST, /* OFPRAW_OFPT10_BARRIER_REQUEST. * OFPRAW_OFPT11_BARRIER_REQUEST. */ OFPTYPE_BARRIER_REPLY, /* OFPRAW_OFPT10_BARRIER_REPLY. * OFPRAW_OFPT11_BARRIER_REPLY. */ /* Queue Configuration messages. */ OFPTYPE_QUEUE_GET_CONFIG_REQUEST, /* OFPRAW_OFPT11_QUEUE_GET_CONFIG_REQUEST. */ OFPTYPE_QUEUE_GET_CONFIG_REPLY, /* OFPRAW_OFPT11_QUEUE_GET_CONFIG_REPLY. */ /* Controller role change request messages. */ OFPTYPE_ROLE_REQUEST, /* OFPRAW_OFPT12_ROLE_REQUEST. * OFPRAW_NXT_ROLE_REQUEST. */ OFPTYPE_ROLE_REPLY, /* OFPRAW_OFPT12_ROLE_REPLY. * OFPRAW_NXT_ROLE_REPLY. */ /* Asynchronous message configuration. */ OFPTYPE_GET_ASYNC_REQUEST, /* OFPRAW_OFPT13_GET_ASYNC_REQUEST. */ OFPTYPE_GET_ASYNC_REPLY, /* OFPRAW_OFPT13_GET_ASYNC_REPLY. */ OFPTYPE_SET_ASYNC_CONFIG, /* OFPRAW_NXT_SET_ASYNC_CONFIG. * OFPRAW_OFPT13_SET_ASYNC. */ /* Meters and rate limiters configuration messages. */ OFPTYPE_METER_MOD, /* OFPRAW_OFPT13_METER_MOD. */ /* Statistics. */ OFPTYPE_DESC_STATS_REQUEST, /* OFPRAW_OFPST_DESC_REQUEST. */ OFPTYPE_DESC_STATS_REPLY, /* OFPRAW_OFPST_DESC_REPLY. */ OFPTYPE_FLOW_STATS_REQUEST, /* OFPRAW_OFPST10_FLOW_REQUEST. * OFPRAW_OFPST11_FLOW_REQUEST. * OFPRAW_NXST_FLOW_REQUEST. */ OFPTYPE_FLOW_STATS_REPLY, /* OFPRAW_OFPST10_FLOW_REPLY. * OFPRAW_OFPST11_FLOW_REPLY. * OFPRAW_OFPST13_FLOW_REPLY. * OFPRAW_NXST_FLOW_REPLY. */ OFPTYPE_AGGREGATE_STATS_REQUEST, /* OFPRAW_OFPST10_AGGREGATE_REQUEST. * OFPRAW_OFPST11_AGGREGATE_REQUEST. * OFPRAW_NXST_AGGREGATE_REQUEST. */ OFPTYPE_AGGREGATE_STATS_REPLY, /* OFPRAW_OFPST_AGGREGATE_REPLY. * OFPRAW_NXST_AGGREGATE_REPLY. */ OFPTYPE_TABLE_STATS_REQUEST, /* OFPRAW_OFPST_TABLE_REQUEST. */ OFPTYPE_TABLE_STATS_REPLY, /* OFPRAW_OFPST10_TABLE_REPLY. * OFPRAW_OFPST11_TABLE_REPLY. * OFPRAW_OFPST12_TABLE_REPLY. * OFPRAW_OFPST13_TABLE_REPLY. */ OFPTYPE_PORT_STATS_REQUEST, /* OFPRAW_OFPST10_PORT_REQUEST. * OFPRAW_OFPST11_PORT_REQUEST. */ OFPTYPE_PORT_STATS_REPLY, /* OFPRAW_OFPST10_PORT_REPLY. * OFPRAW_OFPST11_PORT_REPLY. * OFPRAW_OFPST13_PORT_REPLY. */ OFPTYPE_QUEUE_STATS_REQUEST, /* OFPRAW_OFPST10_QUEUE_REQUEST. * OFPRAW_OFPST11_QUEUE_REQUEST. */ OFPTYPE_QUEUE_STATS_REPLY, /* OFPRAW_OFPST10_QUEUE_REPLY. * OFPRAW_OFPST11_QUEUE_REPLY. * OFPRAW_OFPST13_QUEUE_REPLY. */ OFPTYPE_GROUP_STATS_REQUEST, /* OFPRAW_OFPST11_GROUP_REQUEST. */ OFPTYPE_GROUP_STATS_REPLY, /* OFPRAW_OFPST11_GROUP_REPLY. * OFPRAW_OFPST13_GROUP_REPLY. */ OFPTYPE_GROUP_DESC_STATS_REQUEST, /* OFPRAW_OFPST11_GROUP_DESC_REQUEST. */ OFPTYPE_GROUP_DESC_STATS_REPLY, /* OFPRAW_OFPST11_GROUP_DESC_REPLY. */ OFPTYPE_GROUP_FEATURES_STATS_REQUEST, /* OFPRAW_OFPST12_GROUP_FEATURES_REQUEST. */ OFPTYPE_GROUP_FEATURES_STATS_REPLY, /* OFPRAW_OFPST12_GROUP_FEATURES_REPLY. */ OFPTYPE_METER_STATS_REQUEST, /* OFPRAW_OFPST13_METER_REQUEST. */ OFPTYPE_METER_STATS_REPLY, /* OFPRAW_OFPST13_METER_REPLY. */ OFPTYPE_METER_CONFIG_STATS_REQUEST, /* OFPRAW_OFPST13_METER_CONFIG_REQUEST. */ OFPTYPE_METER_CONFIG_STATS_REPLY, /* OFPRAW_OFPST13_METER_CONFIG_REPLY. */ OFPTYPE_METER_FEATURES_STATS_REQUEST, /* OFPRAW_OFPST13_METER_FEATURES_REQUEST. */ OFPTYPE_METER_FEATURES_STATS_REPLY, /* OFPRAW_OFPST13_METER_FEATURES_REPLY. */ OFPTYPE_TABLE_FEATURES_STATS_REQUEST, /* OFPRAW_OFPST13_TABLE_FEATURES_REQUEST. */ OFPTYPE_TABLE_FEATURES_STATS_REPLY, /* OFPRAW_OFPST13_TABLE_FEATURES_REPLY. */ OFPTYPE_PORT_DESC_STATS_REQUEST, /* OFPRAW_OFPST_PORT_DESC_REQUEST. */ OFPTYPE_PORT_DESC_STATS_REPLY, /* OFPRAW_OFPST10_PORT_DESC_REPLY. * OFPRAW_OFPST11_PORT_DESC_REPLY. */ /* Nicira extensions. */ OFPTYPE_SET_FLOW_FORMAT, /* OFPRAW_NXT_SET_FLOW_FORMAT. */ OFPTYPE_FLOW_MOD_TABLE_ID, /* OFPRAW_NXT_FLOW_MOD_TABLE_ID. */ OFPTYPE_SET_PACKET_IN_FORMAT, /* OFPRAW_NXT_SET_PACKET_IN_FORMAT. */ OFPTYPE_FLOW_AGE, /* OFPRAW_NXT_FLOW_AGE. */ OFPTYPE_SET_CONTROLLER_ID, /* OFPRAW_NXT_SET_CONTROLLER_ID. */ /* Flow monitor extension. */ OFPTYPE_FLOW_MONITOR_STATS_REQUEST, /* OFPRAW_NXST_FLOW_MONITOR_REQUEST. */ OFPTYPE_FLOW_MONITOR_STATS_REPLY, /* OFPRAW_NXST_FLOW_MONITOR_REPLY. */ OFPTYPE_FLOW_MONITOR_CANCEL, /* OFPRAW_NXT_FLOW_MONITOR_CANCEL. */ OFPTYPE_FLOW_MONITOR_PAUSED, /* OFPRAW_NXT_FLOW_MONITOR_PAUSED. */ OFPTYPE_FLOW_MONITOR_RESUMED, /* OFPRAW_NXT_FLOW_MONITOR_RESUMED. */ }; /* Decoding messages into OFPTYPE_* values. */ enum ofperr ofptype_decode(enum ofptype *, const struct ofp_header *); enum ofperr ofptype_pull(enum ofptype *, struct ofpbuf *); enum ofptype ofptype_from_ofpraw(enum ofpraw); /* OpenFlow message properties. */ void ofpmsg_update_length(struct ofpbuf *); const void *ofpmsg_body(const struct ofp_header *); /* Multipart messages (aka "statistics"). * * Individual OpenFlow messages are limited to 64 kB in size, but some messages * need to be longer. Therefore, multipart messages allow a longer message to * be divided into multiple parts at some convenient boundary. For example, * limiting the response to a "flow dump" request to 64 kB would unreasonably * limit the maximum number of flows in an OpenFlow switch, so a "flow dump" is * expressed as a multipart request/reply pair, with the reply broken into * pieces between flows. * * Multipart messages always consist of a request/reply pair. * * In OpenFlow 1.0, 1.1, and 1.2, requests must always fit in a single message, * that is, only a multipart reply may have more than one part. OpenFlow 1.3 * adds one multipart request. This code does not yet support multipart * requests. */ /* Encoding multipart replies. * * These functions are useful for multipart replies that might really require * more than one message. A multipart message that is known in advance to fit * within 64 kB doesn't need any special treatment, so you might as well use * the ofpraw_alloc_*() functions. * * These functions work with a "struct list" of "struct ofpbuf"s, each of * which represents one part of a multipart message. */ void ofpmp_init(struct list *, const struct ofp_header *request); struct ofpbuf *ofpmp_reserve(struct list *, size_t len); void *ofpmp_append(struct list *, size_t len); void ofpmp_postappend(struct list *, size_t start_ofs); /* Decoding multipart replies. */ uint16_t ofpmp_flags(const struct ofp_header *); bool ofpmp_more(const struct ofp_header *); #endif /* ofp-msgs.h */ openvswitch-2.0.1+git20140120/lib/ofp-parse.c000066400000000000000000001642271226605124000202740ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ofp-parse.h" #include #include #include #include "bundle.h" #include "byte-order.h" #include "dynamic-string.h" #include "learn.h" #include "meta-flow.h" #include "multipath.h" #include "netdev.h" #include "nx-match.h" #include "ofp-actions.h" #include "ofp-util.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "ovs-thread.h" #include "packets.h" #include "socket-util.h" #include "vconn.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ofp_parse); /* Parses 'str' as an 8-bit unsigned integer into '*valuep'. * * 'name' describes the value parsed in an error message, if any. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT str_to_u8(const char *str, const char *name, uint8_t *valuep) { int value; if (!str_to_int(str, 0, &value) || value < 0 || value > 255) { return xasprintf("invalid %s \"%s\"", name, str); } *valuep = value; return NULL; } /* Parses 'str' as a 16-bit unsigned integer into '*valuep'. * * 'name' describes the value parsed in an error message, if any. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT str_to_u16(const char *str, const char *name, uint16_t *valuep) { int value; if (!str_to_int(str, 0, &value) || value < 0 || value > 65535) { return xasprintf("invalid %s \"%s\"", name, str); } *valuep = value; return NULL; } /* Parses 'str' as a 32-bit unsigned integer into '*valuep'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT str_to_u32(const char *str, uint32_t *valuep) { char *tail; uint32_t value; if (!str[0]) { return xstrdup("missing required numeric argument"); } errno = 0; value = strtoul(str, &tail, 0); if (errno == EINVAL || errno == ERANGE || *tail) { return xasprintf("invalid numeric format %s", str); } *valuep = value; return NULL; } /* Parses 'str' as an 64-bit unsigned integer into '*valuep'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT str_to_u64(const char *str, uint64_t *valuep) { char *tail; uint64_t value; if (!str[0]) { return xstrdup("missing required numeric argument"); } errno = 0; value = strtoull(str, &tail, 0); if (errno == EINVAL || errno == ERANGE || *tail) { return xasprintf("invalid numeric format %s", str); } *valuep = value; return NULL; } /* Parses 'str' as an 64-bit unsigned integer in network byte order into * '*valuep'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT str_to_be64(const char *str, ovs_be64 *valuep) { uint64_t value = 0; char *error; error = str_to_u64(str, &value); if (!error) { *valuep = htonll(value); } return error; } /* Parses 'str' as an Ethernet address into 'mac'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT str_to_mac(const char *str, uint8_t mac[6]) { if (sscanf(str, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(mac)) != ETH_ADDR_SCAN_COUNT) { return xasprintf("invalid mac address %s", str); } return NULL; } /* Parses 'str' as an IP address into '*ip'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT str_to_ip(const char *str, ovs_be32 *ip) { struct in_addr in_addr; if (lookup_ip(str, &in_addr)) { return xasprintf("%s: could not convert to IP address", str); } *ip = in_addr.s_addr; return NULL; } /* Parses 'arg' as the argument to an "enqueue" action, and appends such an * action to 'ofpacts'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT parse_enqueue(char *arg, struct ofpbuf *ofpacts) { char *sp = NULL; char *port = strtok_r(arg, ":q", &sp); char *queue = strtok_r(NULL, "", &sp); struct ofpact_enqueue *enqueue; if (port == NULL || queue == NULL) { return xstrdup("\"enqueue\" syntax is \"enqueue:PORT:QUEUE\""); } enqueue = ofpact_put_ENQUEUE(ofpacts); if (!ofputil_port_from_string(port, &enqueue->port)) { return xasprintf("%s: enqueue to unknown port", port); } return str_to_u32(queue, &enqueue->queue); } /* Parses 'arg' as the argument to an "output" action, and appends such an * action to 'ofpacts'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT parse_output(const char *arg, struct ofpbuf *ofpacts) { if (strchr(arg, '[')) { struct ofpact_output_reg *output_reg; output_reg = ofpact_put_OUTPUT_REG(ofpacts); output_reg->max_len = UINT16_MAX; return mf_parse_subfield(&output_reg->src, arg); } else { struct ofpact_output *output; output = ofpact_put_OUTPUT(ofpacts); if (!ofputil_port_from_string(arg, &output->port)) { return xasprintf("%s: output to unknown port", arg); } output->max_len = output->port == OFPP_CONTROLLER ? UINT16_MAX : 0; return NULL; } } /* Parses 'arg' as the argument to an "resubmit" action, and appends such an * action to 'ofpacts'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT parse_resubmit(char *arg, struct ofpbuf *ofpacts) { struct ofpact_resubmit *resubmit; char *in_port_s, *table_s; resubmit = ofpact_put_RESUBMIT(ofpacts); in_port_s = strsep(&arg, ","); if (in_port_s && in_port_s[0]) { if (!ofputil_port_from_string(in_port_s, &resubmit->in_port)) { return xasprintf("%s: resubmit to unknown port", in_port_s); } } else { resubmit->in_port = OFPP_IN_PORT; } table_s = strsep(&arg, ","); if (table_s && table_s[0]) { uint32_t table_id = 0; char *error; error = str_to_u32(table_s, &table_id); if (error) { return error; } resubmit->table_id = table_id; } else { resubmit->table_id = 255; } if (resubmit->in_port == OFPP_IN_PORT && resubmit->table_id == 255) { return xstrdup("at least one \"in_port\" or \"table\" must be " "specified on resubmit"); } return NULL; } /* Parses 'arg' as the argument to a "note" action, and appends such an action * to 'ofpacts'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT parse_note(const char *arg, struct ofpbuf *ofpacts) { struct ofpact_note *note; note = ofpact_put_NOTE(ofpacts); while (*arg != '\0') { uint8_t byte; bool ok; if (*arg == '.') { arg++; } if (*arg == '\0') { break; } byte = hexits_value(arg, 2, &ok); if (!ok) { return xstrdup("bad hex digit in `note' argument"); } ofpbuf_put(ofpacts, &byte, 1); note = ofpacts->l2; note->length++; arg += 2; } ofpact_update_len(ofpacts, ¬e->ofpact); return NULL; } /* Parses 'arg' as the argument to a "fin_timeout" action, and appends such an * action to 'ofpacts'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT parse_fin_timeout(struct ofpbuf *b, char *arg) { struct ofpact_fin_timeout *oft = ofpact_put_FIN_TIMEOUT(b); char *key, *value; while (ofputil_parse_key_value(&arg, &key, &value)) { char *error; if (!strcmp(key, "idle_timeout")) { error = str_to_u16(value, key, &oft->fin_idle_timeout); } else if (!strcmp(key, "hard_timeout")) { error = str_to_u16(value, key, &oft->fin_hard_timeout); } else { error = xasprintf("invalid key '%s' in 'fin_timeout' argument", key); } if (error) { return error; } } return NULL; } /* Parses 'arg' as the argument to a "controller" action, and appends such an * action to 'ofpacts'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT parse_controller(struct ofpbuf *b, char *arg) { enum ofp_packet_in_reason reason = OFPR_ACTION; uint16_t controller_id = 0; uint16_t max_len = UINT16_MAX; if (!arg[0]) { /* Use defaults. */ } else if (strspn(arg, "0123456789") == strlen(arg)) { char *error = str_to_u16(arg, "max_len", &max_len); if (error) { return error; } } else { char *name, *value; while (ofputil_parse_key_value(&arg, &name, &value)) { if (!strcmp(name, "reason")) { if (!ofputil_packet_in_reason_from_string(value, &reason)) { return xasprintf("unknown reason \"%s\"", value); } } else if (!strcmp(name, "max_len")) { char *error = str_to_u16(value, "max_len", &max_len); if (error) { return error; } } else if (!strcmp(name, "id")) { char *error = str_to_u16(value, "id", &controller_id); if (error) { return error; } } else { return xasprintf("unknown key \"%s\" parsing controller " "action", name); } } } if (reason == OFPR_ACTION && controller_id == 0) { struct ofpact_output *output; output = ofpact_put_OUTPUT(b); output->port = OFPP_CONTROLLER; output->max_len = max_len; } else { struct ofpact_controller *controller; controller = ofpact_put_CONTROLLER(b); controller->max_len = max_len; controller->reason = reason; controller->controller_id = controller_id; } return NULL; } static void parse_noargs_dec_ttl(struct ofpbuf *b) { struct ofpact_cnt_ids *ids; uint16_t id = 0; ids = ofpact_put_DEC_TTL(b); ofpbuf_put(b, &id, sizeof id); ids = b->l2; ids->n_controllers++; ofpact_update_len(b, &ids->ofpact); } /* Parses 'arg' as the argument to a "dec_ttl" action, and appends such an * action to 'ofpacts'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT parse_dec_ttl(struct ofpbuf *b, char *arg) { if (*arg == '\0') { parse_noargs_dec_ttl(b); } else { struct ofpact_cnt_ids *ids; char *cntr; ids = ofpact_put_DEC_TTL(b); ids->ofpact.compat = OFPUTIL_NXAST_DEC_TTL_CNT_IDS; for (cntr = strtok_r(arg, ", ", &arg); cntr != NULL; cntr = strtok_r(NULL, ", ", &arg)) { uint16_t id = atoi(cntr); ofpbuf_put(b, &id, sizeof id); ids = b->l2; ids->n_controllers++; } if (!ids->n_controllers) { return xstrdup("dec_ttl_cnt_ids: expected at least one controller " "id."); } ofpact_update_len(b, &ids->ofpact); } return NULL; } /* Parses 'arg' as the argument to a "set_mpls_ttl" action, and appends such an * action to 'ofpacts'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT parse_set_mpls_ttl(struct ofpbuf *b, const char *arg) { struct ofpact_mpls_ttl *mpls_ttl = ofpact_put_SET_MPLS_TTL(b); if (*arg == '\0') { return xstrdup("parse_set_mpls_ttl: expected ttl."); } mpls_ttl->ttl = atoi(arg); return NULL; } /* Parses a "set_field" action with argument 'arg', appending the parsed * action to 'ofpacts'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT set_field_parse__(char *arg, struct ofpbuf *ofpacts, enum ofputil_protocol *usable_protocols) { struct ofpact_reg_load *load = ofpact_put_REG_LOAD(ofpacts); char *value; char *delim; char *key; const struct mf_field *mf; char *error; union mf_value mf_value; value = arg; delim = strstr(arg, "->"); if (!delim) { return xasprintf("%s: missing `->'", arg); } if (strlen(delim) <= strlen("->")) { return xasprintf("%s: missing field name following `->'", arg); } key = delim + strlen("->"); mf = mf_from_name(key); if (!mf) { return xasprintf("%s is not a valid OXM field name", key); } if (!mf->writable) { return xasprintf("%s is read-only", key); } delim[0] = '\0'; error = mf_parse_value(mf, value, &mf_value); if (error) { return error; } if (!mf_is_value_valid(mf, &mf_value)) { return xasprintf("%s is not a valid value for field %s", value, key); } ofpact_set_field_init(load, mf, &mf_value); *usable_protocols &= mf->usable_protocols; return NULL; } /* Parses 'arg' as the argument to a "set_field" action, and appends such an * action to 'ofpacts'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT set_field_parse(const char *arg, struct ofpbuf *ofpacts, enum ofputil_protocol *usable_protocols) { char *copy = xstrdup(arg); char *error = set_field_parse__(copy, ofpacts, usable_protocols); free(copy); return error; } /* Parses 'arg' as the argument to a "write_metadata" instruction, and appends * such an action to 'ofpacts'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT parse_metadata(struct ofpbuf *b, char *arg) { struct ofpact_metadata *om; char *mask = strchr(arg, '/'); om = ofpact_put_WRITE_METADATA(b); if (mask) { char *error; *mask = '\0'; error = str_to_be64(mask + 1, &om->mask); if (error) { return error; } } else { om->mask = htonll(UINT64_MAX); } return str_to_be64(arg, &om->metadata); } /* Parses 'arg' as the argument to a "sample" action, and appends such an * action to 'ofpacts'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT parse_sample(struct ofpbuf *b, char *arg) { struct ofpact_sample *os = ofpact_put_SAMPLE(b); char *key, *value; while (ofputil_parse_key_value(&arg, &key, &value)) { char *error = NULL; if (!strcmp(key, "probability")) { error = str_to_u16(value, "probability", &os->probability); if (!error && os->probability == 0) { error = xasprintf("invalid probability value \"%s\"", value); } } else if (!strcmp(key, "collector_set_id")) { error = str_to_u32(value, &os->collector_set_id); } else if (!strcmp(key, "obs_domain_id")) { error = str_to_u32(value, &os->obs_domain_id); } else if (!strcmp(key, "obs_point_id")) { error = str_to_u32(value, &os->obs_point_id); } else { error = xasprintf("invalid key \"%s\" in \"sample\" argument", key); } if (error) { return error; } } if (os->probability == 0) { return xstrdup("non-zero \"probability\" must be specified on sample"); } return NULL; } /* Parses 'arg' as the argument to action 'code', and appends such an action to * 'ofpacts'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT parse_named_action(enum ofputil_action_code code, char *arg, struct ofpbuf *ofpacts, enum ofputil_protocol *usable_protocols) { size_t orig_size = ofpacts->size; struct ofpact_tunnel *tunnel; char *error = NULL; uint16_t ethertype = 0; uint16_t vid = 0; uint8_t tos = 0; uint8_t pcp = 0; switch (code) { case OFPUTIL_ACTION_INVALID: NOT_REACHED(); case OFPUTIL_OFPAT10_OUTPUT: case OFPUTIL_OFPAT11_OUTPUT: error = parse_output(arg, ofpacts); break; case OFPUTIL_OFPAT10_SET_VLAN_VID: case OFPUTIL_OFPAT11_SET_VLAN_VID: error = str_to_u16(arg, "VLAN VID", &vid); if (error) { return error; } if (vid & ~VLAN_VID_MASK) { return xasprintf("%s: not a valid VLAN VID", arg); } ofpact_put_SET_VLAN_VID(ofpacts)->vlan_vid = vid; break; case OFPUTIL_OFPAT10_SET_VLAN_PCP: case OFPUTIL_OFPAT11_SET_VLAN_PCP: error = str_to_u8(arg, "VLAN PCP", &pcp); if (error) { return error; } if (pcp & ~7) { return xasprintf("%s: not a valid VLAN PCP", arg); } ofpact_put_SET_VLAN_PCP(ofpacts)->vlan_pcp = pcp; break; case OFPUTIL_OFPAT12_SET_FIELD: return set_field_parse(arg, ofpacts, usable_protocols); case OFPUTIL_OFPAT10_STRIP_VLAN: case OFPUTIL_OFPAT11_POP_VLAN: ofpact_put_STRIP_VLAN(ofpacts); break; case OFPUTIL_OFPAT11_PUSH_VLAN: *usable_protocols &= OFPUTIL_P_OF11_UP; error = str_to_u16(arg, "ethertype", ðertype); if (error) { return error; } if (ethertype != ETH_TYPE_VLAN_8021Q) { /* XXX ETH_TYPE_VLAN_8021AD case isn't supported */ return xasprintf("%s: not a valid VLAN ethertype", arg); } ofpact_put_PUSH_VLAN(ofpacts); break; case OFPUTIL_OFPAT11_SET_QUEUE: error = str_to_u32(arg, &ofpact_put_SET_QUEUE(ofpacts)->queue_id); break; case OFPUTIL_OFPAT10_SET_DL_SRC: case OFPUTIL_OFPAT11_SET_DL_SRC: error = str_to_mac(arg, ofpact_put_SET_ETH_SRC(ofpacts)->mac); break; case OFPUTIL_OFPAT10_SET_DL_DST: case OFPUTIL_OFPAT11_SET_DL_DST: error = str_to_mac(arg, ofpact_put_SET_ETH_DST(ofpacts)->mac); break; case OFPUTIL_OFPAT10_SET_NW_SRC: case OFPUTIL_OFPAT11_SET_NW_SRC: error = str_to_ip(arg, &ofpact_put_SET_IPV4_SRC(ofpacts)->ipv4); break; case OFPUTIL_OFPAT10_SET_NW_DST: case OFPUTIL_OFPAT11_SET_NW_DST: error = str_to_ip(arg, &ofpact_put_SET_IPV4_DST(ofpacts)->ipv4); break; case OFPUTIL_OFPAT10_SET_NW_TOS: case OFPUTIL_OFPAT11_SET_NW_TOS: error = str_to_u8(arg, "TOS", &tos); if (error) { return error; } if (tos & ~IP_DSCP_MASK) { return xasprintf("%s: not a valid TOS", arg); } ofpact_put_SET_IPV4_DSCP(ofpacts)->dscp = tos; break; case OFPUTIL_OFPAT11_DEC_NW_TTL: NOT_REACHED(); case OFPUTIL_OFPAT10_SET_TP_SRC: case OFPUTIL_OFPAT11_SET_TP_SRC: error = str_to_u16(arg, "source port", &ofpact_put_SET_L4_SRC_PORT(ofpacts)->port); break; case OFPUTIL_OFPAT10_SET_TP_DST: case OFPUTIL_OFPAT11_SET_TP_DST: error = str_to_u16(arg, "destination port", &ofpact_put_SET_L4_DST_PORT(ofpacts)->port); break; case OFPUTIL_OFPAT10_ENQUEUE: error = parse_enqueue(arg, ofpacts); break; case OFPUTIL_NXAST_RESUBMIT: error = parse_resubmit(arg, ofpacts); break; case OFPUTIL_NXAST_SET_TUNNEL: case OFPUTIL_NXAST_SET_TUNNEL64: tunnel = ofpact_put_SET_TUNNEL(ofpacts); tunnel->ofpact.compat = code; error = str_to_u64(arg, &tunnel->tun_id); break; case OFPUTIL_NXAST_WRITE_METADATA: error = parse_metadata(ofpacts, arg); break; case OFPUTIL_NXAST_SET_QUEUE: error = str_to_u32(arg, &ofpact_put_SET_QUEUE(ofpacts)->queue_id); break; case OFPUTIL_NXAST_POP_QUEUE: ofpact_put_POP_QUEUE(ofpacts); break; case OFPUTIL_NXAST_REG_MOVE: error = nxm_parse_reg_move(ofpact_put_REG_MOVE(ofpacts), arg); break; case OFPUTIL_NXAST_REG_LOAD: error = nxm_parse_reg_load(ofpact_put_REG_LOAD(ofpacts), arg); break; case OFPUTIL_NXAST_NOTE: error = parse_note(arg, ofpacts); break; case OFPUTIL_NXAST_MULTIPATH: error = multipath_parse(ofpact_put_MULTIPATH(ofpacts), arg); break; case OFPUTIL_NXAST_BUNDLE: error = bundle_parse(arg, ofpacts); break; case OFPUTIL_NXAST_BUNDLE_LOAD: error = bundle_parse_load(arg, ofpacts); break; case OFPUTIL_NXAST_RESUBMIT_TABLE: case OFPUTIL_NXAST_OUTPUT_REG: case OFPUTIL_NXAST_DEC_TTL_CNT_IDS: NOT_REACHED(); case OFPUTIL_NXAST_LEARN: error = learn_parse(arg, ofpacts); break; case OFPUTIL_NXAST_EXIT: ofpact_put_EXIT(ofpacts); break; case OFPUTIL_NXAST_DEC_TTL: error = parse_dec_ttl(ofpacts, arg); break; case OFPUTIL_NXAST_SET_MPLS_TTL: case OFPUTIL_OFPAT11_SET_MPLS_TTL: error = parse_set_mpls_ttl(ofpacts, arg); break; case OFPUTIL_OFPAT11_DEC_MPLS_TTL: case OFPUTIL_NXAST_DEC_MPLS_TTL: ofpact_put_DEC_MPLS_TTL(ofpacts); break; case OFPUTIL_NXAST_FIN_TIMEOUT: error = parse_fin_timeout(ofpacts, arg); break; case OFPUTIL_NXAST_CONTROLLER: error = parse_controller(ofpacts, arg); break; case OFPUTIL_OFPAT11_PUSH_MPLS: case OFPUTIL_NXAST_PUSH_MPLS: error = str_to_u16(arg, "push_mpls", ðertype); if (!error) { ofpact_put_PUSH_MPLS(ofpacts)->ethertype = htons(ethertype); } break; case OFPUTIL_OFPAT11_POP_MPLS: case OFPUTIL_NXAST_POP_MPLS: error = str_to_u16(arg, "pop_mpls", ðertype); if (!error) { ofpact_put_POP_MPLS(ofpacts)->ethertype = htons(ethertype); } break; case OFPUTIL_NXAST_STACK_PUSH: error = nxm_parse_stack_action(ofpact_put_STACK_PUSH(ofpacts), arg); break; case OFPUTIL_NXAST_STACK_POP: error = nxm_parse_stack_action(ofpact_put_STACK_POP(ofpacts), arg); break; case OFPUTIL_NXAST_SAMPLE: error = parse_sample(ofpacts, arg); break; } if (error) { ofpacts->size = orig_size; } return error; } /* Parses action 'act', with argument 'arg', and appends a parsed version to * 'ofpacts'. * * 'n_actions' specifies the number of actions already parsed (for proper * handling of "drop" actions). * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT str_to_ofpact__(char *pos, char *act, char *arg, struct ofpbuf *ofpacts, int n_actions, enum ofputil_protocol *usable_protocols) { int code = ofputil_action_code_from_name(act); if (code >= 0) { return parse_named_action(code, arg, ofpacts, usable_protocols); } else if (!strcasecmp(act, "drop")) { if (n_actions) { return xstrdup("Drop actions must not be preceded by other " "actions"); } else if (ofputil_parse_key_value(&pos, &act, &arg)) { return xstrdup("Drop actions must not be followed by other " "actions"); } } else { ofp_port_t port; if (ofputil_port_from_string(act, &port)) { ofpact_put_OUTPUT(ofpacts)->port = port; } else { return xasprintf("Unknown action: %s", act); } } return NULL; } /* Parses 'str' as a series of actions, and appends them to 'ofpacts'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT str_to_ofpacts(char *str, struct ofpbuf *ofpacts, enum ofputil_protocol *usable_protocols) { size_t orig_size = ofpacts->size; char *pos, *act, *arg; enum ofperr error; int n_actions; pos = str; n_actions = 0; while (ofputil_parse_key_value(&pos, &act, &arg)) { char *error = str_to_ofpact__(pos, act, arg, ofpacts, n_actions, usable_protocols); if (error) { ofpacts->size = orig_size; return error; } n_actions++; } error = ofpacts_verify(ofpacts->data, ofpacts->size); if (error) { ofpacts->size = orig_size; return xstrdup("Incorrect action ordering"); } ofpact_pad(ofpacts); return NULL; } /* Parses 'arg' as the argument to instruction 'type', and appends such an * instruction to 'ofpacts'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT parse_named_instruction(enum ovs_instruction_type type, char *arg, struct ofpbuf *ofpacts, enum ofputil_protocol *usable_protocols) { char *error_s = NULL; enum ofperr error; *usable_protocols &= OFPUTIL_P_OF11_UP; switch (type) { case OVSINST_OFPIT11_APPLY_ACTIONS: NOT_REACHED(); /* This case is handled by str_to_inst_ofpacts() */ break; case OVSINST_OFPIT11_WRITE_ACTIONS: /* XXX */ error_s = xstrdup("instruction write-actions is not supported yet"); break; case OVSINST_OFPIT11_CLEAR_ACTIONS: ofpact_put_CLEAR_ACTIONS(ofpacts); break; case OVSINST_OFPIT13_METER: *usable_protocols &= OFPUTIL_P_OF13_UP; error_s = str_to_u32(arg, &ofpact_put_METER(ofpacts)->meter_id); break; case OVSINST_OFPIT11_WRITE_METADATA: *usable_protocols &= OFPUTIL_P_NXM_OF11_UP; error_s = parse_metadata(ofpacts, arg); break; case OVSINST_OFPIT11_GOTO_TABLE: { struct ofpact_goto_table *ogt = ofpact_put_GOTO_TABLE(ofpacts); char *table_s = strsep(&arg, ","); if (!table_s || !table_s[0]) { return xstrdup("instruction goto-table needs table id"); } error_s = str_to_u8(table_s, "table", &ogt->table_id); break; } } if (error_s) { return error_s; } /* If write_metadata is specified as an action AND an instruction, ofpacts could be invalid. */ error = ofpacts_verify(ofpacts->data, ofpacts->size); if (error) { return xstrdup("Incorrect instruction ordering"); } return NULL; } /* Parses 'str' as a series of instructions, and appends them to 'ofpacts'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT str_to_inst_ofpacts(char *str, struct ofpbuf *ofpacts, enum ofputil_protocol *usable_protocols) { size_t orig_size = ofpacts->size; char *pos, *inst, *arg; int type; const char *prev_inst = NULL; int prev_type = -1; int n_actions = 0; pos = str; while (ofputil_parse_key_value(&pos, &inst, &arg)) { type = ovs_instruction_type_from_name(inst); if (type < 0) { char *error = str_to_ofpact__(pos, inst, arg, ofpacts, n_actions, usable_protocols); if (error) { ofpacts->size = orig_size; return error; } type = OVSINST_OFPIT11_APPLY_ACTIONS; if (prev_type == type) { n_actions++; continue; } } else if (type == OVSINST_OFPIT11_APPLY_ACTIONS) { ofpacts->size = orig_size; return xasprintf("%s isn't supported. Just write actions then " "it is interpreted as apply_actions", inst); } else { char *error = parse_named_instruction(type, arg, ofpacts, usable_protocols); if (error) { ofpacts->size = orig_size; return error; } } if (type <= prev_type) { ofpacts->size = orig_size; if (type == prev_type) { return xasprintf("instruction %s may be specified only once", inst); } else { return xasprintf("instruction %s must be specified before %s", inst, prev_inst); } } prev_inst = inst; prev_type = type; n_actions++; } ofpact_pad(ofpacts); return NULL; } struct protocol { const char *name; uint16_t dl_type; uint8_t nw_proto; }; static bool parse_protocol(const char *name, const struct protocol **p_out) { static const struct protocol protocols[] = { { "ip", ETH_TYPE_IP, 0 }, { "arp", ETH_TYPE_ARP, 0 }, { "icmp", ETH_TYPE_IP, IPPROTO_ICMP }, { "tcp", ETH_TYPE_IP, IPPROTO_TCP }, { "udp", ETH_TYPE_IP, IPPROTO_UDP }, { "sctp", ETH_TYPE_IP, IPPROTO_SCTP }, { "ipv6", ETH_TYPE_IPV6, 0 }, { "ip6", ETH_TYPE_IPV6, 0 }, { "icmp6", ETH_TYPE_IPV6, IPPROTO_ICMPV6 }, { "tcp6", ETH_TYPE_IPV6, IPPROTO_TCP }, { "udp6", ETH_TYPE_IPV6, IPPROTO_UDP }, { "sctp6", ETH_TYPE_IPV6, IPPROTO_SCTP }, { "rarp", ETH_TYPE_RARP, 0}, { "mpls", ETH_TYPE_MPLS, 0 }, { "mplsm", ETH_TYPE_MPLS_MCAST, 0 }, }; const struct protocol *p; for (p = protocols; p < &protocols[ARRAY_SIZE(protocols)]; p++) { if (!strcmp(p->name, name)) { *p_out = p; return true; } } *p_out = NULL; return false; } /* Parses 's' as the (possibly masked) value of field 'mf', and updates * 'match' appropriately. Restricts the set of usable protocols to ones * supporting the parsed field. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ static char * WARN_UNUSED_RESULT parse_field(const struct mf_field *mf, const char *s, struct match *match, enum ofputil_protocol *usable_protocols) { union mf_value value, mask; char *error; error = mf_parse(mf, s, &value, &mask); if (!error) { *usable_protocols &= mf_set(mf, &value, &mask, match); } return error; } static char * WARN_UNUSED_RESULT parse_ofp_str__(struct ofputil_flow_mod *fm, int command, char *string, enum ofputil_protocol *usable_protocols) { enum { F_OUT_PORT = 1 << 0, F_ACTIONS = 1 << 1, F_TIMEOUT = 1 << 3, F_PRIORITY = 1 << 4, F_FLAGS = 1 << 5, } fields; char *save_ptr = NULL; char *act_str = NULL; char *name; *usable_protocols = OFPUTIL_P_ANY; switch (command) { case -1: fields = F_OUT_PORT; break; case OFPFC_ADD: fields = F_ACTIONS | F_TIMEOUT | F_PRIORITY | F_FLAGS; break; case OFPFC_DELETE: fields = F_OUT_PORT; break; case OFPFC_DELETE_STRICT: fields = F_OUT_PORT | F_PRIORITY; break; case OFPFC_MODIFY: fields = F_ACTIONS | F_TIMEOUT | F_PRIORITY | F_FLAGS; break; case OFPFC_MODIFY_STRICT: fields = F_ACTIONS | F_TIMEOUT | F_PRIORITY | F_FLAGS; break; default: NOT_REACHED(); } match_init_catchall(&fm->match); fm->priority = OFP_DEFAULT_PRIORITY; fm->cookie = htonll(0); fm->cookie_mask = htonll(0); if (command == OFPFC_MODIFY || command == OFPFC_MODIFY_STRICT) { /* For modify, by default, don't update the cookie. */ fm->new_cookie = htonll(UINT64_MAX); } else{ fm->new_cookie = htonll(0); } fm->modify_cookie = false; fm->table_id = 0xff; fm->command = command; fm->idle_timeout = OFP_FLOW_PERMANENT; fm->hard_timeout = OFP_FLOW_PERMANENT; fm->buffer_id = UINT32_MAX; fm->out_port = OFPP_ANY; fm->flags = 0; if (fields & F_ACTIONS) { act_str = strstr(string, "action"); if (!act_str) { return xstrdup("must specify an action"); } *act_str = '\0'; act_str = strchr(act_str + 1, '='); if (!act_str) { return xstrdup("must specify an action"); } act_str++; } for (name = strtok_r(string, "=, \t\r\n", &save_ptr); name; name = strtok_r(NULL, "=, \t\r\n", &save_ptr)) { const struct protocol *p; char *error = NULL; if (parse_protocol(name, &p)) { match_set_dl_type(&fm->match, htons(p->dl_type)); if (p->nw_proto) { match_set_nw_proto(&fm->match, p->nw_proto); } } else if (fields & F_FLAGS && !strcmp(name, "send_flow_rem")) { fm->flags |= OFPUTIL_FF_SEND_FLOW_REM; } else if (fields & F_FLAGS && !strcmp(name, "check_overlap")) { fm->flags |= OFPUTIL_FF_CHECK_OVERLAP; } else if (fields & F_FLAGS && !strcmp(name, "reset_counts")) { fm->flags |= OFPUTIL_FF_RESET_COUNTS; *usable_protocols &= OFPUTIL_P_OF12_UP; } else if (fields & F_FLAGS && !strcmp(name, "no_packet_counts")) { fm->flags |= OFPUTIL_FF_NO_PKT_COUNTS; *usable_protocols &= OFPUTIL_P_OF13_UP; } else if (fields & F_FLAGS && !strcmp(name, "no_byte_counts")) { fm->flags |= OFPUTIL_FF_NO_BYT_COUNTS; *usable_protocols &= OFPUTIL_P_OF13_UP; } else { char *value; value = strtok_r(NULL, ", \t\r\n", &save_ptr); if (!value) { return xasprintf("field %s missing value", name); } if (!strcmp(name, "table")) { error = str_to_u8(value, "table", &fm->table_id); if (fm->table_id != 0xff) { *usable_protocols &= OFPUTIL_P_TID; } } else if (!strcmp(name, "out_port")) { if (!ofputil_port_from_string(value, &fm->out_port)) { error = xasprintf("%s is not a valid OpenFlow port", value); } } else if (fields & F_PRIORITY && !strcmp(name, "priority")) { uint16_t priority = 0; error = str_to_u16(value, name, &priority); fm->priority = priority; } else if (fields & F_TIMEOUT && !strcmp(name, "idle_timeout")) { error = str_to_u16(value, name, &fm->idle_timeout); } else if (fields & F_TIMEOUT && !strcmp(name, "hard_timeout")) { error = str_to_u16(value, name, &fm->hard_timeout); } else if (!strcmp(name, "cookie")) { char *mask = strchr(value, '/'); if (mask) { /* A mask means we're searching for a cookie. */ if (command == OFPFC_ADD) { return xstrdup("flow additions cannot use " "a cookie mask"); } *mask = '\0'; error = str_to_be64(value, &fm->cookie); if (error) { return error; } error = str_to_be64(mask + 1, &fm->cookie_mask); /* Matching of the cookie is only supported through NXM or * OF1.1+. */ if (fm->cookie_mask != htonll(0)) { *usable_protocols &= OFPUTIL_P_NXM_OF11_UP; } } else { /* No mask means that the cookie is being set. */ if (command != OFPFC_ADD && command != OFPFC_MODIFY && command != OFPFC_MODIFY_STRICT) { return xstrdup("cannot set cookie"); } error = str_to_be64(value, &fm->new_cookie); fm->modify_cookie = true; } } else if (mf_from_name(name)) { error = parse_field(mf_from_name(name), value, &fm->match, usable_protocols); } else if (!strcmp(name, "duration") || !strcmp(name, "n_packets") || !strcmp(name, "n_bytes") || !strcmp(name, "idle_age") || !strcmp(name, "hard_age")) { /* Ignore these, so that users can feed the output of * "ovs-ofctl dump-flows" back into commands that parse * flows. */ } else { error = xasprintf("unknown keyword %s", name); } if (error) { return error; } } } /* Check for usable protocol interdependencies between match fields. */ if (fm->match.flow.dl_type == htons(ETH_TYPE_IPV6)) { const struct flow_wildcards *wc = &fm->match.wc; /* Only NXM and OXM support matching L3 and L4 fields within IPv6. * * (IPv6 specific fields as well as arp_sha, arp_tha, nw_frag, and * nw_ttl are covered elsewhere so they don't need to be included in * this test too.) */ if (wc->masks.nw_proto || wc->masks.nw_tos || wc->masks.tp_src || wc->masks.tp_dst) { *usable_protocols &= OFPUTIL_P_NXM_OXM_ANY; } } if (!fm->cookie_mask && fm->new_cookie == htonll(UINT64_MAX) && (command == OFPFC_MODIFY || command == OFPFC_MODIFY_STRICT)) { /* On modifies without a mask, we are supposed to add a flow if * one does not exist. If a cookie wasn't been specified, use a * default of zero. */ fm->new_cookie = htonll(0); } if (fields & F_ACTIONS) { struct ofpbuf ofpacts; char *error; ofpbuf_init(&ofpacts, 32); error = str_to_inst_ofpacts(act_str, &ofpacts, usable_protocols); if (!error) { enum ofperr err; err = ofpacts_check(ofpacts.data, ofpacts.size, &fm->match.flow, OFPP_MAX, 0); if (err) { error = xasprintf("actions are invalid with specified match " "(%s)", ofperr_to_string(err)); } } if (error) { ofpbuf_uninit(&ofpacts); return error; } fm->ofpacts_len = ofpacts.size; fm->ofpacts = ofpbuf_steal_data(&ofpacts); } else { fm->ofpacts_len = 0; fm->ofpacts = NULL; } return NULL; } /* Convert 'str_' (as described in the Flow Syntax section of the ovs-ofctl man * page) into 'fm' for sending the specified flow_mod 'command' to a switch. * Returns the set of usable protocols in '*usable_protocols'. * * To parse syntax for an OFPT_FLOW_MOD (or NXT_FLOW_MOD), use an OFPFC_* * constant for 'command'. To parse syntax for an OFPST_FLOW or * OFPST_AGGREGATE (or NXST_FLOW or NXST_AGGREGATE), use -1 for 'command'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ char * WARN_UNUSED_RESULT parse_ofp_str(struct ofputil_flow_mod *fm, int command, const char *str_, enum ofputil_protocol *usable_protocols) { char *string = xstrdup(str_); char *error; error = parse_ofp_str__(fm, command, string, usable_protocols); if (error) { fm->ofpacts = NULL; fm->ofpacts_len = 0; } free(string); return error; } static char * WARN_UNUSED_RESULT parse_ofp_meter_mod_str__(struct ofputil_meter_mod *mm, char *string, struct ofpbuf *bands, int command, enum ofputil_protocol *usable_protocols) { enum { F_METER = 1 << 0, F_FLAGS = 1 << 1, F_BANDS = 1 << 2, } fields; char *save_ptr = NULL; char *band_str = NULL; char *name; /* Meters require at least OF 1.3. */ *usable_protocols &= OFPUTIL_P_OF13_UP; switch (command) { case -1: fields = F_METER; break; case OFPMC13_ADD: fields = F_METER | F_FLAGS | F_BANDS; break; case OFPMC13_DELETE: fields = F_METER; break; case OFPMC13_MODIFY: fields = F_METER | F_FLAGS | F_BANDS; break; default: NOT_REACHED(); } mm->command = command; mm->meter.meter_id = 0; mm->meter.flags = 0; if (fields & F_BANDS) { band_str = strstr(string, "band"); if (!band_str) { return xstrdup("must specify bands"); } *band_str = '\0'; band_str = strchr(band_str + 1, '='); if (!band_str) { return xstrdup("must specify bands"); } band_str++; } for (name = strtok_r(string, "=, \t\r\n", &save_ptr); name; name = strtok_r(NULL, "=, \t\r\n", &save_ptr)) { if (fields & F_FLAGS && !strcmp(name, "kbps")) { mm->meter.flags |= OFPMF13_KBPS; } else if (fields & F_FLAGS && !strcmp(name, "pktps")) { mm->meter.flags |= OFPMF13_PKTPS; } else if (fields & F_FLAGS && !strcmp(name, "burst")) { mm->meter.flags |= OFPMF13_BURST; } else if (fields & F_FLAGS && !strcmp(name, "stats")) { mm->meter.flags |= OFPMF13_STATS; } else { char *value; value = strtok_r(NULL, ", \t\r\n", &save_ptr); if (!value) { return xasprintf("field %s missing value", name); } if (!strcmp(name, "meter")) { if (!strcmp(value, "all")) { mm->meter.meter_id = OFPM13_ALL; } else if (!strcmp(value, "controller")) { mm->meter.meter_id = OFPM13_CONTROLLER; } else if (!strcmp(value, "slowpath")) { mm->meter.meter_id = OFPM13_SLOWPATH; } else { char *error = str_to_u32(value, &mm->meter.meter_id); if (error) { return error; } if (mm->meter.meter_id > OFPM13_MAX) { return xasprintf("invalid value for %s", name); } } } else { return xasprintf("unknown keyword %s", name); } } } if (fields & F_METER && !mm->meter.meter_id) { return xstrdup("must specify 'meter'"); } if (fields & F_FLAGS && !mm->meter.flags) { return xstrdup("meter must specify either 'kbps' or 'pktps'"); } if (fields & F_BANDS) { uint16_t n_bands = 0; struct ofputil_meter_band *band = NULL; int i; for (name = strtok_r(band_str, "=, \t\r\n", &save_ptr); name; name = strtok_r(NULL, "=, \t\r\n", &save_ptr)) { char *value; value = strtok_r(NULL, ", \t\r\n", &save_ptr); if (!value) { return xasprintf("field %s missing value", name); } if (!strcmp(name, "type")) { /* Start a new band */ band = ofpbuf_put_zeros(bands, sizeof *band); n_bands++; if (!strcmp(value, "drop")) { band->type = OFPMBT13_DROP; } else if (!strcmp(value, "dscp_remark")) { band->type = OFPMBT13_DSCP_REMARK; } else { return xasprintf("field %s unknown value %s", name, value); } } else if (!band || !band->type) { return xstrdup("band must start with the 'type' keyword"); } else if (!strcmp(name, "rate")) { char *error = str_to_u32(value, &band->rate); if (error) { return error; } } else if (!strcmp(name, "burst_size")) { char *error = str_to_u32(value, &band->burst_size); if (error) { return error; } } else if (!strcmp(name, "prec_level")) { char *error = str_to_u8(value, name, &band->prec_level); if (error) { return error; } } else { return xasprintf("unknown keyword %s", name); } } /* validate bands */ if (!n_bands) { return xstrdup("meter must have bands"); } mm->meter.n_bands = n_bands; mm->meter.bands = ofpbuf_steal_data(bands); for (i = 0; i < n_bands; ++i) { band = &mm->meter.bands[i]; if (!band->type) { return xstrdup("band must have 'type'"); } if (band->type == OFPMBT13_DSCP_REMARK) { if (!band->prec_level) { return xstrdup("'dscp_remark' band must have" " 'prec_level'"); } } else { if (band->prec_level) { return xstrdup("Only 'dscp_remark' band may have" " 'prec_level'"); } } if (!band->rate) { return xstrdup("band must have 'rate'"); } if (mm->meter.flags & OFPMF13_BURST) { if (!band->burst_size) { return xstrdup("band must have 'burst_size' " "when 'burst' flag is set"); } } else { if (band->burst_size) { return xstrdup("band may have 'burst_size' only " "when 'burst' flag is set"); } } } } else { mm->meter.n_bands = 0; mm->meter.bands = NULL; } return NULL; } /* Convert 'str_' (as described in the Flow Syntax section of the ovs-ofctl man * page) into 'mm' for sending the specified meter_mod 'command' to a switch. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ char * WARN_UNUSED_RESULT parse_ofp_meter_mod_str(struct ofputil_meter_mod *mm, const char *str_, int command, enum ofputil_protocol *usable_protocols) { struct ofpbuf bands; char *string; char *error; ofpbuf_init(&bands, 64); string = xstrdup(str_); error = parse_ofp_meter_mod_str__(mm, string, &bands, command, usable_protocols); free(string); ofpbuf_uninit(&bands); return error; } static char * WARN_UNUSED_RESULT parse_flow_monitor_request__(struct ofputil_flow_monitor_request *fmr, const char *str_, char *string, enum ofputil_protocol *usable_protocols) { static atomic_uint32_t id = ATOMIC_VAR_INIT(0); char *save_ptr = NULL; char *name; atomic_add(&id, 1, &fmr->id); fmr->flags = (NXFMF_INITIAL | NXFMF_ADD | NXFMF_DELETE | NXFMF_MODIFY | NXFMF_OWN | NXFMF_ACTIONS); fmr->out_port = OFPP_NONE; fmr->table_id = 0xff; match_init_catchall(&fmr->match); for (name = strtok_r(string, "=, \t\r\n", &save_ptr); name; name = strtok_r(NULL, "=, \t\r\n", &save_ptr)) { const struct protocol *p; if (!strcmp(name, "!initial")) { fmr->flags &= ~NXFMF_INITIAL; } else if (!strcmp(name, "!add")) { fmr->flags &= ~NXFMF_ADD; } else if (!strcmp(name, "!delete")) { fmr->flags &= ~NXFMF_DELETE; } else if (!strcmp(name, "!modify")) { fmr->flags &= ~NXFMF_MODIFY; } else if (!strcmp(name, "!actions")) { fmr->flags &= ~NXFMF_ACTIONS; } else if (!strcmp(name, "!own")) { fmr->flags &= ~NXFMF_OWN; } else if (parse_protocol(name, &p)) { match_set_dl_type(&fmr->match, htons(p->dl_type)); if (p->nw_proto) { match_set_nw_proto(&fmr->match, p->nw_proto); } } else { char *value; value = strtok_r(NULL, ", \t\r\n", &save_ptr); if (!value) { return xasprintf("%s: field %s missing value", str_, name); } if (!strcmp(name, "table")) { char *error = str_to_u8(value, "table", &fmr->table_id); if (error) { return error; } } else if (!strcmp(name, "out_port")) { fmr->out_port = u16_to_ofp(atoi(value)); } else if (mf_from_name(name)) { char *error; error = parse_field(mf_from_name(name), value, &fmr->match, usable_protocols); if (error) { return error; } } else { return xasprintf("%s: unknown keyword %s", str_, name); } } } return NULL; } /* Convert 'str_' (as described in the documentation for the "monitor" command * in the ovs-ofctl man page) into 'fmr'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ char * WARN_UNUSED_RESULT parse_flow_monitor_request(struct ofputil_flow_monitor_request *fmr, const char *str_, enum ofputil_protocol *usable_protocols) { char *string = xstrdup(str_); char *error = parse_flow_monitor_request__(fmr, str_, string, usable_protocols); free(string); return error; } /* Parses 's' as a set of OpenFlow actions and appends the actions to * 'actions'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ char * WARN_UNUSED_RESULT parse_ofpacts(const char *s_, struct ofpbuf *ofpacts, enum ofputil_protocol *usable_protocols) { char *s = xstrdup(s_); char *error; *usable_protocols = OFPUTIL_P_ANY; error = str_to_ofpacts(s, ofpacts, usable_protocols); free(s); return error; } /* Parses 'string' as an OFPT_FLOW_MOD or NXT_FLOW_MOD with command 'command' * (one of OFPFC_*) into 'fm'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ char * WARN_UNUSED_RESULT parse_ofp_flow_mod_str(struct ofputil_flow_mod *fm, const char *string, uint16_t command, enum ofputil_protocol *usable_protocols) { char *error = parse_ofp_str(fm, command, string, usable_protocols); if (!error) { /* Normalize a copy of the match. This ensures that non-normalized * flows get logged but doesn't affect what gets sent to the switch, so * that the switch can do whatever it likes with the flow. */ struct match match_copy = fm->match; ofputil_normalize_match(&match_copy); } return error; } /* Opens file 'file_name' and reads each line as a flow_mod of the specified * type (one of OFPFC_*). Stores each flow_mod in '*fm', an array allocated * on the caller's behalf, and the number of flow_mods in '*n_fms'. * * Returns NULL if successful, otherwise a malloc()'d string describing the * error. The caller is responsible for freeing the returned string. */ char * WARN_UNUSED_RESULT parse_ofp_flow_mod_file(const char *file_name, uint16_t command, struct ofputil_flow_mod **fms, size_t *n_fms, enum ofputil_protocol *usable_protocols) { size_t allocated_fms; int line_number; FILE *stream; struct ds s; *usable_protocols = OFPUTIL_P_ANY; *fms = NULL; *n_fms = 0; stream = !strcmp(file_name, "-") ? stdin : fopen(file_name, "r"); if (stream == NULL) { return xasprintf("%s: open failed (%s)", file_name, ovs_strerror(errno)); } allocated_fms = *n_fms; ds_init(&s); line_number = 0; while (!ds_get_preprocessed_line(&s, stream, &line_number)) { char *error; enum ofputil_protocol usable; if (*n_fms >= allocated_fms) { *fms = x2nrealloc(*fms, &allocated_fms, sizeof **fms); } error = parse_ofp_flow_mod_str(&(*fms)[*n_fms], ds_cstr(&s), command, &usable); if (error) { size_t i; for (i = 0; i < *n_fms; i++) { free((*fms)[i].ofpacts); } free(*fms); *fms = NULL; *n_fms = 0; ds_destroy(&s); if (stream != stdin) { fclose(stream); } return xasprintf("%s:%d: %s", file_name, line_number, error); } *usable_protocols &= usable; /* Each line can narrow the set. */ *n_fms += 1; } ds_destroy(&s); if (stream != stdin) { fclose(stream); } return NULL; } char * WARN_UNUSED_RESULT parse_ofp_flow_stats_request_str(struct ofputil_flow_stats_request *fsr, bool aggregate, const char *string, enum ofputil_protocol *usable_protocols) { struct ofputil_flow_mod fm; char *error; error = parse_ofp_str(&fm, -1, string, usable_protocols); if (error) { return error; } /* Special table ID support not required for stats requests. */ if (*usable_protocols & OFPUTIL_P_OF10_STD_TID) { *usable_protocols |= OFPUTIL_P_OF10_STD; } if (*usable_protocols & OFPUTIL_P_OF10_NXM_TID) { *usable_protocols |= OFPUTIL_P_OF10_NXM; } fsr->aggregate = aggregate; fsr->cookie = fm.cookie; fsr->cookie_mask = fm.cookie_mask; fsr->match = fm.match; fsr->out_port = fm.out_port; fsr->table_id = fm.table_id; return NULL; } /* Parses a specification of a flow from 's' into 'flow'. 's' must take the * form FIELD=VALUE[,FIELD=VALUE]... where each FIELD is the name of a * mf_field. Fields must be specified in a natural order for satisfying * prerequisites. * * Returns NULL on success, otherwise a malloc()'d string that explains the * problem. */ char * parse_ofp_exact_flow(struct flow *flow, const char *s) { char *pos, *key, *value_s; char *error = NULL; char *copy; memset(flow, 0, sizeof *flow); pos = copy = xstrdup(s); while (ofputil_parse_key_value(&pos, &key, &value_s)) { const struct protocol *p; if (parse_protocol(key, &p)) { if (flow->dl_type) { error = xasprintf("%s: Ethernet type set multiple times", s); goto exit; } flow->dl_type = htons(p->dl_type); if (p->nw_proto) { if (flow->nw_proto) { error = xasprintf("%s: network protocol set " "multiple times", s); goto exit; } flow->nw_proto = p->nw_proto; } } else { const struct mf_field *mf; union mf_value value; char *field_error; mf = mf_from_name(key); if (!mf) { error = xasprintf("%s: unknown field %s", s, key); goto exit; } if (!mf_are_prereqs_ok(mf, flow)) { error = xasprintf("%s: prerequisites not met for setting %s", s, key); goto exit; } if (!mf_is_zero(mf, flow)) { error = xasprintf("%s: field %s set multiple times", s, key); goto exit; } field_error = mf_parse_value(mf, value_s, &value); if (field_error) { error = xasprintf("%s: bad value for %s (%s)", s, key, field_error); free(field_error); goto exit; } mf_set_flow_value(mf, &value, flow); } } if (!flow->in_port.ofp_port) { flow->in_port.ofp_port = OFPP_NONE; } exit: free(copy); if (error) { memset(flow, 0, sizeof *flow); } return error; } openvswitch-2.0.1+git20140120/lib/ofp-parse.h000066400000000000000000000047171226605124000202760ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* OpenFlow protocol string to flow parser. */ #ifndef OFP_PARSE_H #define OFP_PARSE_H 1 #include #include #include #include "compiler.h" struct flow; struct ofpbuf; struct ofputil_flow_mod; struct ofputil_flow_monitor_request; struct ofputil_flow_stats_request; struct ofputil_meter_mod; enum ofputil_protocol; char *parse_ofp_str(struct ofputil_flow_mod *, int command, const char *str_, enum ofputil_protocol *usable_protocols) WARN_UNUSED_RESULT; char *parse_ofp_flow_mod_str(struct ofputil_flow_mod *, const char *string, uint16_t command, enum ofputil_protocol *usable_protocols) WARN_UNUSED_RESULT; char *parse_ofp_flow_mod_file(const char *file_name, uint16_t command, struct ofputil_flow_mod **fms, size_t *n_fms, enum ofputil_protocol *usable_protocols) WARN_UNUSED_RESULT; char *parse_ofp_flow_stats_request_str(struct ofputil_flow_stats_request *, bool aggregate, const char *string, enum ofputil_protocol *usable_protocols) WARN_UNUSED_RESULT; char *parse_ofpacts(const char *, struct ofpbuf *ofpacts, enum ofputil_protocol *usable_protocols) WARN_UNUSED_RESULT; char *parse_ofp_exact_flow(struct flow *, const char *); char *parse_ofp_meter_mod_str(struct ofputil_meter_mod *, const char *string, int command, enum ofputil_protocol *usable_protocols) WARN_UNUSED_RESULT; char *parse_flow_monitor_request(struct ofputil_flow_monitor_request *, const char *, enum ofputil_protocol *usable_protocols) WARN_UNUSED_RESULT; #endif /* ofp-parse.h */ openvswitch-2.0.1+git20140120/lib/ofp-print.c000066400000000000000000002142051226605124000203060ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ofp-print.h" #include #include #include #include #include #include #include #include #include "bundle.h" #include "byte-order.h" #include "compiler.h" #include "dynamic-string.h" #include "flow.h" #include "learn.h" #include "multipath.h" #include "meta-flow.h" #include "netdev.h" #include "nx-match.h" #include "ofp-actions.h" #include "ofp-errors.h" #include "ofp-msgs.h" #include "ofp-util.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "openflow/nicira-ext.h" #include "packets.h" #include "type-props.h" #include "unaligned.h" #include "util.h" static void ofp_print_queue_name(struct ds *string, uint32_t port); static void ofp_print_error(struct ds *, enum ofperr); /* Returns a string that represents the contents of the Ethernet frame in the * 'len' bytes starting at 'data'. The caller must free the returned string.*/ char * ofp_packet_to_string(const void *data, size_t len) { struct ds ds = DS_EMPTY_INITIALIZER; struct ofpbuf buf; struct flow flow; ofpbuf_use_const(&buf, data, len); flow_extract(&buf, 0, 0, NULL, NULL, &flow); flow_format(&ds, &flow); if (buf.l7) { if (flow.nw_proto == IPPROTO_TCP) { struct tcp_header *th = buf.l4; ds_put_format(&ds, " tcp_csum:%"PRIx16, ntohs(th->tcp_csum)); } else if (flow.nw_proto == IPPROTO_UDP) { struct udp_header *uh = buf.l4; ds_put_format(&ds, " udp_csum:%"PRIx16, ntohs(uh->udp_csum)); } else if (flow.nw_proto == IPPROTO_SCTP) { struct sctp_header *sh = buf.l4; ds_put_format(&ds, " sctp_csum:%"PRIx32, ntohl(sh->sctp_csum)); } } ds_put_char(&ds, '\n'); return ds_cstr(&ds); } static void ofp_print_packet_in(struct ds *string, const struct ofp_header *oh, int verbosity) { char reasonbuf[OFPUTIL_PACKET_IN_REASON_BUFSIZE]; struct ofputil_packet_in pin; int error; int i; error = ofputil_decode_packet_in(&pin, oh); if (error) { ofp_print_error(string, error); return; } if (pin.table_id) { ds_put_format(string, " table_id=%"PRIu8, pin.table_id); } if (pin.cookie) { ds_put_format(string, " cookie=0x%"PRIx64, ntohll(pin.cookie)); } ds_put_format(string, " total_len=%"PRIu16" in_port=", pin.total_len); ofputil_format_port(pin.fmd.in_port, string); if (pin.fmd.tun_id != htonll(0)) { ds_put_format(string, " tun_id=0x%"PRIx64, ntohll(pin.fmd.tun_id)); } if (pin.fmd.tun_src != htonl(0)) { ds_put_format(string, " tun_src="IP_FMT, IP_ARGS(pin.fmd.tun_src)); } if (pin.fmd.tun_dst != htonl(0)) { ds_put_format(string, " tun_dst="IP_FMT, IP_ARGS(pin.fmd.tun_dst)); } if (pin.fmd.metadata != htonll(0)) { ds_put_format(string, " metadata=0x%"PRIx64, ntohll(pin.fmd.metadata)); } for (i = 0; i < FLOW_N_REGS; i++) { if (pin.fmd.regs[i]) { ds_put_format(string, " reg%d=0x%"PRIx32, i, pin.fmd.regs[i]); } } if (pin.fmd.pkt_mark != 0) { ds_put_format(string, " pkt_mark=0x%"PRIx32, pin.fmd.pkt_mark); } ds_put_format(string, " (via %s)", ofputil_packet_in_reason_to_string(pin.reason, reasonbuf, sizeof reasonbuf)); ds_put_format(string, " data_len=%zu", pin.packet_len); if (pin.buffer_id == UINT32_MAX) { ds_put_format(string, " (unbuffered)"); if (pin.total_len != pin.packet_len) { ds_put_format(string, " (***total_len != data_len***)"); } } else { ds_put_format(string, " buffer=0x%08"PRIx32, pin.buffer_id); if (pin.total_len < pin.packet_len) { ds_put_format(string, " (***total_len < data_len***)"); } } ds_put_char(string, '\n'); if (verbosity > 0) { char *packet = ofp_packet_to_string(pin.packet, pin.packet_len); ds_put_cstr(string, packet); free(packet); } if (verbosity > 2) { ds_put_hex_dump(string, pin.packet, pin.packet_len, 0, false); } } static void ofp_print_packet_out(struct ds *string, const struct ofp_header *oh, int verbosity) { struct ofputil_packet_out po; struct ofpbuf ofpacts; enum ofperr error; ofpbuf_init(&ofpacts, 64); error = ofputil_decode_packet_out(&po, oh, &ofpacts); if (error) { ofpbuf_uninit(&ofpacts); ofp_print_error(string, error); return; } ds_put_cstr(string, " in_port="); ofputil_format_port(po.in_port, string); ds_put_char(string, ' '); ofpacts_format(po.ofpacts, po.ofpacts_len, string); if (po.buffer_id == UINT32_MAX) { ds_put_format(string, " data_len=%zu", po.packet_len); if (verbosity > 0 && po.packet_len > 0) { char *packet = ofp_packet_to_string(po.packet, po.packet_len); ds_put_char(string, '\n'); ds_put_cstr(string, packet); free(packet); } if (verbosity > 2) { ds_put_hex_dump(string, po.packet, po.packet_len, 0, false); } } else { ds_put_format(string, " buffer=0x%08"PRIx32, po.buffer_id); } ofpbuf_uninit(&ofpacts); } /* qsort comparison function. */ static int compare_ports(const void *a_, const void *b_) { const struct ofputil_phy_port *a = a_; const struct ofputil_phy_port *b = b_; uint16_t ap = ofp_to_u16(a->port_no); uint16_t bp = ofp_to_u16(b->port_no); return ap < bp ? -1 : ap > bp; } static void ofp_print_bit_names(struct ds *string, uint32_t bits, const char *(*bit_to_name)(uint32_t bit), char separator) { int n = 0; int i; if (!bits) { ds_put_cstr(string, "0"); return; } for (i = 0; i < 32; i++) { uint32_t bit = UINT32_C(1) << i; if (bits & bit) { const char *name = bit_to_name(bit); if (name) { if (n++) { ds_put_char(string, separator); } ds_put_cstr(string, name); bits &= ~bit; } } } if (bits) { if (n) { ds_put_char(string, separator); } ds_put_format(string, "0x%"PRIx32, bits); } } static const char * netdev_feature_to_name(uint32_t bit) { enum netdev_features f = bit; switch (f) { case NETDEV_F_10MB_HD: return "10MB-HD"; case NETDEV_F_10MB_FD: return "10MB-FD"; case NETDEV_F_100MB_HD: return "100MB-HD"; case NETDEV_F_100MB_FD: return "100MB-FD"; case NETDEV_F_1GB_HD: return "1GB-HD"; case NETDEV_F_1GB_FD: return "1GB-FD"; case NETDEV_F_10GB_FD: return "10GB-FD"; case NETDEV_F_40GB_FD: return "40GB-FD"; case NETDEV_F_100GB_FD: return "100GB-FD"; case NETDEV_F_1TB_FD: return "1TB-FD"; case NETDEV_F_OTHER: return "OTHER"; case NETDEV_F_COPPER: return "COPPER"; case NETDEV_F_FIBER: return "FIBER"; case NETDEV_F_AUTONEG: return "AUTO_NEG"; case NETDEV_F_PAUSE: return "AUTO_PAUSE"; case NETDEV_F_PAUSE_ASYM: return "AUTO_PAUSE_ASYM"; } return NULL; } static void ofp_print_port_features(struct ds *string, enum netdev_features features) { ofp_print_bit_names(string, features, netdev_feature_to_name, ' '); ds_put_char(string, '\n'); } static const char * ofputil_port_config_to_name(uint32_t bit) { enum ofputil_port_config pc = bit; switch (pc) { case OFPUTIL_PC_PORT_DOWN: return "PORT_DOWN"; case OFPUTIL_PC_NO_STP: return "NO_STP"; case OFPUTIL_PC_NO_RECV: return "NO_RECV"; case OFPUTIL_PC_NO_RECV_STP: return "NO_RECV_STP"; case OFPUTIL_PC_NO_FLOOD: return "NO_FLOOD"; case OFPUTIL_PC_NO_FWD: return "NO_FWD"; case OFPUTIL_PC_NO_PACKET_IN: return "NO_PACKET_IN"; } return NULL; } static void ofp_print_port_config(struct ds *string, enum ofputil_port_config config) { ofp_print_bit_names(string, config, ofputil_port_config_to_name, ' '); ds_put_char(string, '\n'); } static const char * ofputil_port_state_to_name(uint32_t bit) { enum ofputil_port_state ps = bit; switch (ps) { case OFPUTIL_PS_LINK_DOWN: return "LINK_DOWN"; case OFPUTIL_PS_BLOCKED: return "BLOCKED"; case OFPUTIL_PS_LIVE: return "LIVE"; case OFPUTIL_PS_STP_LISTEN: case OFPUTIL_PS_STP_LEARN: case OFPUTIL_PS_STP_FORWARD: case OFPUTIL_PS_STP_BLOCK: /* Handled elsewhere. */ return NULL; } return NULL; } static void ofp_print_port_state(struct ds *string, enum ofputil_port_state state) { enum ofputil_port_state stp_state; /* The STP state is a 2-bit field so it doesn't fit in with the bitmask * pattern. We have to special case it. * * OVS doesn't support STP, so this field will always be 0 if we are * talking to OVS, so we'd always print STP_LISTEN in that case. * Therefore, we don't print anything at all if the value is STP_LISTEN, to * avoid confusing users. */ stp_state = state & OFPUTIL_PS_STP_MASK; if (stp_state) { ds_put_cstr(string, (stp_state == OFPUTIL_PS_STP_LEARN ? "STP_LEARN" : stp_state == OFPUTIL_PS_STP_FORWARD ? "STP_FORWARD" : "STP_BLOCK")); state &= ~OFPUTIL_PS_STP_MASK; if (state) { ofp_print_bit_names(string, state, ofputil_port_state_to_name, ' '); } } else { ofp_print_bit_names(string, state, ofputil_port_state_to_name, ' '); } ds_put_char(string, '\n'); } static void ofp_print_phy_port(struct ds *string, const struct ofputil_phy_port *port) { char name[sizeof port->name]; int j; memcpy(name, port->name, sizeof name); for (j = 0; j < sizeof name - 1; j++) { if (!isprint((unsigned char) name[j])) { break; } } name[j] = '\0'; ds_put_char(string, ' '); ofputil_format_port(port->port_no, string); ds_put_format(string, "(%s): addr:"ETH_ADDR_FMT"\n", name, ETH_ADDR_ARGS(port->hw_addr)); ds_put_cstr(string, " config: "); ofp_print_port_config(string, port->config); ds_put_cstr(string, " state: "); ofp_print_port_state(string, port->state); if (port->curr) { ds_put_format(string, " current: "); ofp_print_port_features(string, port->curr); } if (port->advertised) { ds_put_format(string, " advertised: "); ofp_print_port_features(string, port->advertised); } if (port->supported) { ds_put_format(string, " supported: "); ofp_print_port_features(string, port->supported); } if (port->peer) { ds_put_format(string, " peer: "); ofp_print_port_features(string, port->peer); } ds_put_format(string, " speed: %"PRIu32" Mbps now, " "%"PRIu32" Mbps max\n", port->curr_speed / UINT32_C(1000), port->max_speed / UINT32_C(1000)); } /* Given a buffer 'b' that contains an array of OpenFlow ports of type * 'ofp_version', writes a detailed description of each port into * 'string'. */ static void ofp_print_phy_ports(struct ds *string, uint8_t ofp_version, struct ofpbuf *b) { size_t n_ports; struct ofputil_phy_port *ports; enum ofperr error; size_t i; n_ports = ofputil_count_phy_ports(ofp_version, b); ports = xmalloc(n_ports * sizeof *ports); for (i = 0; i < n_ports; i++) { error = ofputil_pull_phy_port(ofp_version, b, &ports[i]); if (error) { ofp_print_error(string, error); goto exit; } } qsort(ports, n_ports, sizeof *ports, compare_ports); for (i = 0; i < n_ports; i++) { ofp_print_phy_port(string, &ports[i]); } exit: free(ports); } static const char * ofputil_capabilities_to_name(uint32_t bit) { enum ofputil_capabilities capabilities = bit; switch (capabilities) { case OFPUTIL_C_FLOW_STATS: return "FLOW_STATS"; case OFPUTIL_C_TABLE_STATS: return "TABLE_STATS"; case OFPUTIL_C_PORT_STATS: return "PORT_STATS"; case OFPUTIL_C_IP_REASM: return "IP_REASM"; case OFPUTIL_C_QUEUE_STATS: return "QUEUE_STATS"; case OFPUTIL_C_ARP_MATCH_IP: return "ARP_MATCH_IP"; case OFPUTIL_C_STP: return "STP"; case OFPUTIL_C_GROUP_STATS: return "GROUP_STATS"; case OFPUTIL_C_PORT_BLOCKED: return "PORT_BLOCKED"; } return NULL; } static const char * ofputil_action_bitmap_to_name(uint32_t bit) { enum ofputil_action_bitmap action = bit; switch (action) { case OFPUTIL_A_OUTPUT: return "OUTPUT"; case OFPUTIL_A_SET_VLAN_VID: return "SET_VLAN_VID"; case OFPUTIL_A_SET_VLAN_PCP: return "SET_VLAN_PCP"; case OFPUTIL_A_STRIP_VLAN: return "STRIP_VLAN"; case OFPUTIL_A_SET_DL_SRC: return "SET_DL_SRC"; case OFPUTIL_A_SET_DL_DST: return "SET_DL_DST"; case OFPUTIL_A_SET_NW_SRC: return "SET_NW_SRC"; case OFPUTIL_A_SET_NW_DST: return "SET_NW_DST"; case OFPUTIL_A_SET_NW_ECN: return "SET_NW_ECN"; case OFPUTIL_A_SET_NW_TOS: return "SET_NW_TOS"; case OFPUTIL_A_SET_TP_SRC: return "SET_TP_SRC"; case OFPUTIL_A_SET_TP_DST: return "SET_TP_DST"; case OFPUTIL_A_SET_FIELD: return "SET_FIELD"; case OFPUTIL_A_ENQUEUE: return "ENQUEUE"; case OFPUTIL_A_COPY_TTL_OUT: return "COPY_TTL_OUT"; case OFPUTIL_A_COPY_TTL_IN: return "COPY_TTL_IN"; case OFPUTIL_A_SET_MPLS_LABEL: return "SET_MPLS_LABEL"; case OFPUTIL_A_SET_MPLS_TC: return "SET_MPLS_TC"; case OFPUTIL_A_SET_MPLS_TTL: return "SET_MPLS_TTL"; case OFPUTIL_A_DEC_MPLS_TTL: return "DEC_MPLS_TTL"; case OFPUTIL_A_PUSH_VLAN: return "PUSH_VLAN"; case OFPUTIL_A_POP_VLAN: return "POP_VLAN"; case OFPUTIL_A_PUSH_MPLS: return "PUSH_MPLS"; case OFPUTIL_A_POP_MPLS: return "POP_MPLS"; case OFPUTIL_A_SET_QUEUE: return "SET_QUEUE"; case OFPUTIL_A_GROUP: return "GROUP"; case OFPUTIL_A_SET_NW_TTL: return "SET_NW_TTL"; case OFPUTIL_A_DEC_NW_TTL: return "DEC_NW_TTL"; } return NULL; } static void ofp_print_switch_features(struct ds *string, const struct ofp_header *oh) { struct ofputil_switch_features features; enum ofperr error; struct ofpbuf b; error = ofputil_decode_switch_features(oh, &features, &b); if (error) { ofp_print_error(string, error); return; } ds_put_format(string, " dpid:%016"PRIx64"\n", features.datapath_id); ds_put_format(string, "n_tables:%"PRIu8", n_buffers:%"PRIu32, features.n_tables, features.n_buffers); if (features.auxiliary_id) { ds_put_format(string, ", auxiliary_id:%"PRIu8, features.auxiliary_id); } ds_put_char(string, '\n'); ds_put_cstr(string, "capabilities: "); ofp_print_bit_names(string, features.capabilities, ofputil_capabilities_to_name, ' '); ds_put_char(string, '\n'); switch ((enum ofp_version)oh->version) { case OFP10_VERSION: ds_put_cstr(string, "actions: "); ofp_print_bit_names(string, features.actions, ofputil_action_bitmap_to_name, ' '); ds_put_char(string, '\n'); break; case OFP11_VERSION: case OFP12_VERSION: break; case OFP13_VERSION: return; /* no ports in ofp13_switch_features */ default: NOT_REACHED(); } ofp_print_phy_ports(string, oh->version, &b); } static void ofp_print_switch_config(struct ds *string, const struct ofp_switch_config *osc) { enum ofp_config_flags flags; flags = ntohs(osc->flags); ds_put_format(string, " frags=%s", ofputil_frag_handling_to_string(flags)); flags &= ~OFPC_FRAG_MASK; if (flags & OFPC_INVALID_TTL_TO_CONTROLLER) { ds_put_format(string, " invalid_ttl_to_controller"); flags &= ~OFPC_INVALID_TTL_TO_CONTROLLER; } if (flags) { ds_put_format(string, " ***unknown flags 0x%04"PRIx16"***", flags); } ds_put_format(string, " miss_send_len=%"PRIu16"\n", ntohs(osc->miss_send_len)); } static void print_wild(struct ds *string, const char *leader, int is_wild, int verbosity, const char *format, ...) __attribute__((format(printf, 5, 6))); static void print_wild(struct ds *string, const char *leader, int is_wild, int verbosity, const char *format, ...) { if (is_wild && verbosity < 2) { return; } ds_put_cstr(string, leader); if (!is_wild) { va_list args; va_start(args, format); ds_put_format_valist(string, format, args); va_end(args); } else { ds_put_char(string, '*'); } ds_put_char(string, ','); } static void print_wild_port(struct ds *string, const char *leader, int is_wild, int verbosity, ofp_port_t port) { if (is_wild && verbosity < 2) { return; } ds_put_cstr(string, leader); if (!is_wild) { ofputil_format_port(port, string); } else { ds_put_char(string, '*'); } ds_put_char(string, ','); } static void print_ip_netmask(struct ds *string, const char *leader, ovs_be32 ip, uint32_t wild_bits, int verbosity) { if (wild_bits >= 32 && verbosity < 2) { return; } ds_put_cstr(string, leader); if (wild_bits < 32) { ds_put_format(string, IP_FMT, IP_ARGS(ip)); if (wild_bits) { ds_put_format(string, "/%d", 32 - wild_bits); } } else { ds_put_char(string, '*'); } ds_put_char(string, ','); } void ofp10_match_print(struct ds *f, const struct ofp10_match *om, int verbosity) { char *s = ofp10_match_to_string(om, verbosity); ds_put_cstr(f, s); free(s); } char * ofp10_match_to_string(const struct ofp10_match *om, int verbosity) { struct ds f = DS_EMPTY_INITIALIZER; uint32_t w = ntohl(om->wildcards); bool skip_type = false; bool skip_proto = false; if (!(w & OFPFW10_DL_TYPE)) { skip_type = true; if (om->dl_type == htons(ETH_TYPE_IP)) { if (!(w & OFPFW10_NW_PROTO)) { skip_proto = true; if (om->nw_proto == IPPROTO_ICMP) { ds_put_cstr(&f, "icmp,"); } else if (om->nw_proto == IPPROTO_TCP) { ds_put_cstr(&f, "tcp,"); } else if (om->nw_proto == IPPROTO_UDP) { ds_put_cstr(&f, "udp,"); } else if (om->nw_proto == IPPROTO_SCTP) { ds_put_cstr(&f, "sctp,"); } else { ds_put_cstr(&f, "ip,"); skip_proto = false; } } else { ds_put_cstr(&f, "ip,"); } } else if (om->dl_type == htons(ETH_TYPE_ARP)) { ds_put_cstr(&f, "arp,"); } else if (om->dl_type == htons(ETH_TYPE_RARP)){ ds_put_cstr(&f, "rarp,"); } else if (om->dl_type == htons(ETH_TYPE_MPLS)) { ds_put_cstr(&f, "mpls,"); } else if (om->dl_type == htons(ETH_TYPE_MPLS_MCAST)) { ds_put_cstr(&f, "mplsm,"); } else { skip_type = false; } } print_wild_port(&f, "in_port=", w & OFPFW10_IN_PORT, verbosity, u16_to_ofp(ntohs(om->in_port))); print_wild(&f, "dl_vlan=", w & OFPFW10_DL_VLAN, verbosity, "%d", ntohs(om->dl_vlan)); print_wild(&f, "dl_vlan_pcp=", w & OFPFW10_DL_VLAN_PCP, verbosity, "%d", om->dl_vlan_pcp); print_wild(&f, "dl_src=", w & OFPFW10_DL_SRC, verbosity, ETH_ADDR_FMT, ETH_ADDR_ARGS(om->dl_src)); print_wild(&f, "dl_dst=", w & OFPFW10_DL_DST, verbosity, ETH_ADDR_FMT, ETH_ADDR_ARGS(om->dl_dst)); if (!skip_type) { print_wild(&f, "dl_type=", w & OFPFW10_DL_TYPE, verbosity, "0x%04x", ntohs(om->dl_type)); } print_ip_netmask(&f, "nw_src=", om->nw_src, (w & OFPFW10_NW_SRC_MASK) >> OFPFW10_NW_SRC_SHIFT, verbosity); print_ip_netmask(&f, "nw_dst=", om->nw_dst, (w & OFPFW10_NW_DST_MASK) >> OFPFW10_NW_DST_SHIFT, verbosity); if (!skip_proto) { if (om->dl_type == htons(ETH_TYPE_ARP) || om->dl_type == htons(ETH_TYPE_RARP)) { print_wild(&f, "arp_op=", w & OFPFW10_NW_PROTO, verbosity, "%u", om->nw_proto); } else { print_wild(&f, "nw_proto=", w & OFPFW10_NW_PROTO, verbosity, "%u", om->nw_proto); } } print_wild(&f, "nw_tos=", w & OFPFW10_NW_TOS, verbosity, "%u", om->nw_tos); if (om->nw_proto == IPPROTO_ICMP) { print_wild(&f, "icmp_type=", w & OFPFW10_ICMP_TYPE, verbosity, "%d", ntohs(om->tp_src)); print_wild(&f, "icmp_code=", w & OFPFW10_ICMP_CODE, verbosity, "%d", ntohs(om->tp_dst)); } else { print_wild(&f, "tp_src=", w & OFPFW10_TP_SRC, verbosity, "%d", ntohs(om->tp_src)); print_wild(&f, "tp_dst=", w & OFPFW10_TP_DST, verbosity, "%d", ntohs(om->tp_dst)); } if (ds_last(&f) == ',') { f.length--; } return ds_cstr(&f); } static void ofp_print_flow_flags(struct ds *s, enum ofputil_flow_mod_flags flags) { if (flags & OFPUTIL_FF_SEND_FLOW_REM) { ds_put_cstr(s, "send_flow_rem "); } if (flags & OFPUTIL_FF_CHECK_OVERLAP) { ds_put_cstr(s, "check_overlap "); } if (flags & OFPUTIL_FF_RESET_COUNTS) { ds_put_cstr(s, "reset_counts "); } if (flags & OFPUTIL_FF_NO_PKT_COUNTS) { ds_put_cstr(s, "no_packet_counts "); } if (flags & OFPUTIL_FF_NO_BYT_COUNTS) { ds_put_cstr(s, "no_byte_counts "); } } static void ofp_print_flow_mod(struct ds *s, const struct ofp_header *oh, int verbosity) { struct ofputil_flow_mod fm; struct ofpbuf ofpacts; bool need_priority; enum ofperr error; enum ofpraw raw; enum ofputil_protocol protocol; protocol = ofputil_protocol_from_ofp_version(oh->version); protocol = ofputil_protocol_set_tid(protocol, true); ofpbuf_init(&ofpacts, 64); error = ofputil_decode_flow_mod(&fm, oh, protocol, &ofpacts); if (error) { ofpbuf_uninit(&ofpacts); ofp_print_error(s, error); return; } ds_put_char(s, ' '); switch (fm.command) { case OFPFC_ADD: ds_put_cstr(s, "ADD"); break; case OFPFC_MODIFY: ds_put_cstr(s, "MOD"); break; case OFPFC_MODIFY_STRICT: ds_put_cstr(s, "MOD_STRICT"); break; case OFPFC_DELETE: ds_put_cstr(s, "DEL"); break; case OFPFC_DELETE_STRICT: ds_put_cstr(s, "DEL_STRICT"); break; default: ds_put_format(s, "cmd:%d", fm.command); } if (fm.table_id != 0) { ds_put_format(s, " table:%d", fm.table_id); } ds_put_char(s, ' '); ofpraw_decode(&raw, oh); if (verbosity >= 3 && raw == OFPRAW_OFPT10_FLOW_MOD) { const struct ofp10_flow_mod *ofm = ofpmsg_body(oh); ofp10_match_print(s, &ofm->match, verbosity); /* ofp_print_match() doesn't print priority. */ need_priority = true; } else if (verbosity >= 3 && raw == OFPRAW_NXT_FLOW_MOD) { const struct nx_flow_mod *nfm = ofpmsg_body(oh); const void *nxm = nfm + 1; char *nxm_s; nxm_s = nx_match_to_string(nxm, ntohs(nfm->match_len)); ds_put_cstr(s, nxm_s); free(nxm_s); /* nx_match_to_string() doesn't print priority. */ need_priority = true; } else { match_format(&fm.match, s, fm.priority); /* match_format() does print priority. */ need_priority = false; } if (ds_last(s) != ' ') { ds_put_char(s, ' '); } if (fm.new_cookie != htonll(0) && fm.new_cookie != htonll(UINT64_MAX)) { ds_put_format(s, "cookie:0x%"PRIx64" ", ntohll(fm.new_cookie)); } if (fm.cookie_mask != htonll(0)) { ds_put_format(s, "cookie:0x%"PRIx64"/0x%"PRIx64" ", ntohll(fm.cookie), ntohll(fm.cookie_mask)); } if (fm.idle_timeout != OFP_FLOW_PERMANENT) { ds_put_format(s, "idle:%"PRIu16" ", fm.idle_timeout); } if (fm.hard_timeout != OFP_FLOW_PERMANENT) { ds_put_format(s, "hard:%"PRIu16" ", fm.hard_timeout); } if (fm.priority != OFP_DEFAULT_PRIORITY && need_priority) { ds_put_format(s, "pri:%"PRIu16" ", fm.priority); } if (fm.buffer_id != UINT32_MAX) { ds_put_format(s, "buf:0x%"PRIx32" ", fm.buffer_id); } if (fm.out_port != OFPP_ANY) { ds_put_format(s, "out_port:"); ofputil_format_port(fm.out_port, s); ds_put_char(s, ' '); } if (oh->version == OFP10_VERSION || oh->version == OFP11_VERSION) { /* Don't print the reset_counts flag for OF1.0 and OF1.1 because those * versions don't really have such a flag and printing one is likely to * confuse people. */ fm.flags &= ~OFPUTIL_FF_RESET_COUNTS; } ofp_print_flow_flags(s, fm.flags); ofpacts_format(fm.ofpacts, fm.ofpacts_len, s); ofpbuf_uninit(&ofpacts); } static void ofp_print_duration(struct ds *string, unsigned int sec, unsigned int nsec) { ds_put_format(string, "%u", sec); if (nsec > 0) { ds_put_format(string, ".%09u", nsec); while (string->string[string->length - 1] == '0') { string->length--; } } ds_put_char(string, 's'); } /* Returns a string form of 'reason'. The return value is either a statically * allocated constant string or the 'bufsize'-byte buffer 'reasonbuf'. * 'bufsize' should be at least OFP_FLOW_REMOVED_REASON_BUFSIZE. */ #define OFP_FLOW_REMOVED_REASON_BUFSIZE (INT_STRLEN(int) + 1) static const char * ofp_flow_removed_reason_to_string(enum ofp_flow_removed_reason reason, char *reasonbuf, size_t bufsize) { switch (reason) { case OFPRR_IDLE_TIMEOUT: return "idle"; case OFPRR_HARD_TIMEOUT: return "hard"; case OFPRR_DELETE: return "delete"; case OFPRR_GROUP_DELETE: return "group_delete"; case OFPRR_EVICTION: return "eviction"; case OFPRR_METER_DELETE: return "meter_delete"; default: snprintf(reasonbuf, bufsize, "%d", (int) reason); return reasonbuf; } } static void ofp_print_flow_removed(struct ds *string, const struct ofp_header *oh) { char reasonbuf[OFP_FLOW_REMOVED_REASON_BUFSIZE]; struct ofputil_flow_removed fr; enum ofperr error; error = ofputil_decode_flow_removed(&fr, oh); if (error) { ofp_print_error(string, error); return; } ds_put_char(string, ' '); match_format(&fr.match, string, fr.priority); ds_put_format(string, " reason=%s", ofp_flow_removed_reason_to_string(fr.reason, reasonbuf, sizeof reasonbuf)); if (fr.table_id != 255) { ds_put_format(string, " table_id=%"PRIu8, fr.table_id); } if (fr.cookie != htonll(0)) { ds_put_format(string, " cookie:0x%"PRIx64, ntohll(fr.cookie)); } ds_put_cstr(string, " duration"); ofp_print_duration(string, fr.duration_sec, fr.duration_nsec); ds_put_format(string, " idle%"PRIu16, fr.idle_timeout); if (fr.hard_timeout) { /* The hard timeout was only added in OF1.2, so only print it if it is * actually in use to avoid gratuitous change to the formatting. */ ds_put_format(string, " hard%"PRIu16, fr.hard_timeout); } ds_put_format(string, " pkts%"PRIu64" bytes%"PRIu64"\n", fr.packet_count, fr.byte_count); } static void ofp_print_port_mod(struct ds *string, const struct ofp_header *oh) { struct ofputil_port_mod pm; enum ofperr error; error = ofputil_decode_port_mod(oh, &pm); if (error) { ofp_print_error(string, error); return; } ds_put_cstr(string, "port: "); ofputil_format_port(pm.port_no, string); ds_put_format(string, ": addr:"ETH_ADDR_FMT"\n", ETH_ADDR_ARGS(pm.hw_addr)); ds_put_cstr(string, " config: "); ofp_print_port_config(string, pm.config); ds_put_cstr(string, " mask: "); ofp_print_port_config(string, pm.mask); ds_put_cstr(string, " advertise: "); if (pm.advertise) { ofp_print_port_features(string, pm.advertise); } else { ds_put_cstr(string, "UNCHANGED\n"); } } static void ofp_print_meter_flags(struct ds *s, uint16_t flags) { if (flags & OFPMF13_KBPS) { ds_put_cstr(s, "kbps "); } if (flags & OFPMF13_PKTPS) { ds_put_cstr(s, "pktps "); } if (flags & OFPMF13_BURST) { ds_put_cstr(s, "burst "); } if (flags & OFPMF13_STATS) { ds_put_cstr(s, "stats "); } flags &= ~(OFPMF13_KBPS | OFPMF13_PKTPS | OFPMF13_BURST | OFPMF13_STATS); if (flags) { ds_put_format(s, "flags:0x%"PRIx16" ", flags); } } static void ofp_print_meter_band(struct ds *s, uint16_t flags, const struct ofputil_meter_band *mb) { ds_put_cstr(s, "\ntype="); switch (mb->type) { case OFPMBT13_DROP: ds_put_cstr(s, "drop"); break; case OFPMBT13_DSCP_REMARK: ds_put_cstr(s, "dscp_remark"); break; default: ds_put_format(s, "%u", mb->type); } ds_put_format(s, " rate=%"PRIu32, mb->rate); if (flags & OFPMF13_BURST) { ds_put_format(s, " burst_size=%"PRIu32, mb->burst_size); } if (mb->type == OFPMBT13_DSCP_REMARK) { ds_put_format(s, " prec_level=%"PRIu8, mb->prec_level); } } static void ofp_print_meter_stats(struct ds *s, const struct ofputil_meter_stats *ms) { uint16_t i; ds_put_format(s, "meter:%"PRIu32" ", ms->meter_id); ds_put_format(s, "flow_count:%"PRIu32" ", ms->flow_count); ds_put_format(s, "packet_in_count:%"PRIu64" ", ms->packet_in_count); ds_put_format(s, "byte_in_count:%"PRIu64" ", ms->byte_in_count); ds_put_cstr(s, "duration:"); ofp_print_duration(s, ms->duration_sec, ms->duration_nsec); ds_put_char(s, ' '); ds_put_cstr(s, "bands:\n"); for (i = 0; i < ms->n_bands; ++i) { ds_put_format(s, "%d: ", i); ds_put_format(s, "packet_count:%"PRIu64" ", ms->bands[i].packet_count); ds_put_format(s, "byte_count:%"PRIu64"\n", ms->bands[i].byte_count); } } static void ofp_print_meter_config(struct ds *s, const struct ofputil_meter_config *mc) { uint16_t i; ds_put_format(s, "meter=%"PRIu32" ", mc->meter_id); ofp_print_meter_flags(s, mc->flags); ds_put_cstr(s, "bands="); for (i = 0; i < mc->n_bands; ++i) { ofp_print_meter_band(s, mc->flags, &mc->bands[i]); } ds_put_char(s, '\n'); } static void ofp_print_meter_mod(struct ds *s, const struct ofp_header *oh) { struct ofputil_meter_mod mm; struct ofpbuf bands; enum ofperr error; ofpbuf_init(&bands, 64); error = ofputil_decode_meter_mod(oh, &mm, &bands); if (error) { ofpbuf_uninit(&bands); ofp_print_error(s, error); return; } switch (mm.command) { case OFPMC13_ADD: ds_put_cstr(s, " ADD "); break; case OFPMC13_MODIFY: ds_put_cstr(s, " MOD "); break; case OFPMC13_DELETE: ds_put_cstr(s, " DEL "); break; default: ds_put_format(s, " cmd:%d ", mm.command); } ofp_print_meter_config(s, &mm.meter); ofpbuf_uninit(&bands); } static void ofp_print_meter_stats_request(struct ds *s, const struct ofp_header *oh) { uint32_t meter_id; ofputil_decode_meter_request(oh, &meter_id); ds_put_format(s, " meter=%"PRIu32, meter_id); } static const char * ofputil_meter_capabilities_to_name(uint32_t bit) { enum ofp13_meter_flags flag = bit; switch (flag) { case OFPMF13_KBPS: return "kbps"; case OFPMF13_PKTPS: return "pktps"; case OFPMF13_BURST: return "burst"; case OFPMF13_STATS: return "stats"; } return NULL; } static const char * ofputil_meter_band_types_to_name(uint32_t bit) { switch (bit) { case 1 << OFPMBT13_DROP: return "drop"; case 1 << OFPMBT13_DSCP_REMARK: return "dscp_remark"; } return NULL; } static void ofp_print_meter_features_reply(struct ds *s, const struct ofp_header *oh) { struct ofputil_meter_features mf; ofputil_decode_meter_features(oh, &mf); ds_put_format(s, "\nmax_meter:%"PRIu32, mf.max_meters); ds_put_format(s, " max_bands:%"PRIu8, mf.max_bands); ds_put_format(s, " max_color:%"PRIu8"\n", mf.max_color); ds_put_cstr(s, "band_types: "); ofp_print_bit_names(s, mf.band_types, ofputil_meter_band_types_to_name, ' '); ds_put_char(s, '\n'); ds_put_cstr(s, "capabilities: "); ofp_print_bit_names(s, mf.capabilities, ofputil_meter_capabilities_to_name, ' '); ds_put_char(s, '\n'); } static void ofp_print_meter_config_reply(struct ds *s, const struct ofp_header *oh) { struct ofpbuf bands; struct ofpbuf b; ofpbuf_use_const(&b, oh, ntohs(oh->length)); ofpbuf_init(&bands, 64); for (;;) { struct ofputil_meter_config mc; int retval; retval = ofputil_decode_meter_config(&b, &mc, &bands); if (retval) { if (retval != EOF) { ofp_print_error(s, retval); } break; } ds_put_char(s, '\n'); ofp_print_meter_config(s, &mc); } ofpbuf_uninit(&bands); } static void ofp_print_meter_stats_reply(struct ds *s, const struct ofp_header *oh) { struct ofpbuf bands; struct ofpbuf b; ofpbuf_use_const(&b, oh, ntohs(oh->length)); ofpbuf_init(&bands, 64); for (;;) { struct ofputil_meter_stats ms; int retval; retval = ofputil_decode_meter_stats(&b, &ms, &bands); if (retval) { if (retval != EOF) { ofp_print_error(s, retval); } break; } ds_put_char(s, '\n'); ofp_print_meter_stats(s, &ms); } ofpbuf_uninit(&bands); } static void ofp_print_error(struct ds *string, enum ofperr error) { if (string->length) { ds_put_char(string, ' '); } ds_put_format(string, "***decode error: %s***\n", ofperr_get_name(error)); } static void ofp_print_hello(struct ds *string, const struct ofp_header *oh) { uint32_t allowed_versions; bool ok; ok = ofputil_decode_hello(oh, &allowed_versions); ds_put_cstr(string, "\n version bitmap: "); ofputil_format_version_bitmap(string, allowed_versions); if (!ok) { ds_put_cstr(string, "\n unknown data in hello:\n"); ds_put_hex_dump(string, oh, ntohs(oh->length), 0, true); } } static void ofp_print_error_msg(struct ds *string, const struct ofp_header *oh) { size_t len = ntohs(oh->length); struct ofpbuf payload; enum ofperr error; char *s; error = ofperr_decode_msg(oh, &payload); if (!error) { ds_put_cstr(string, "***decode error***"); ds_put_hex_dump(string, oh + 1, len - sizeof *oh, 0, true); return; } ds_put_format(string, " %s\n", ofperr_get_name(error)); if (error == OFPERR_OFPHFC_INCOMPATIBLE || error == OFPERR_OFPHFC_EPERM) { ds_put_printable(string, payload.data, payload.size); } else { s = ofp_to_string(payload.data, payload.size, 1); ds_put_cstr(string, s); free(s); } } static void ofp_print_port_status(struct ds *string, const struct ofp_header *oh) { struct ofputil_port_status ps; enum ofperr error; error = ofputil_decode_port_status(oh, &ps); if (error) { ofp_print_error(string, error); return; } if (ps.reason == OFPPR_ADD) { ds_put_format(string, " ADD:"); } else if (ps.reason == OFPPR_DELETE) { ds_put_format(string, " DEL:"); } else if (ps.reason == OFPPR_MODIFY) { ds_put_format(string, " MOD:"); } ofp_print_phy_port(string, &ps.desc); } static void ofp_print_ofpst_desc_reply(struct ds *string, const struct ofp_header *oh) { const struct ofp_desc_stats *ods = ofpmsg_body(oh); ds_put_char(string, '\n'); ds_put_format(string, "Manufacturer: %.*s\n", (int) sizeof ods->mfr_desc, ods->mfr_desc); ds_put_format(string, "Hardware: %.*s\n", (int) sizeof ods->hw_desc, ods->hw_desc); ds_put_format(string, "Software: %.*s\n", (int) sizeof ods->sw_desc, ods->sw_desc); ds_put_format(string, "Serial Num: %.*s\n", (int) sizeof ods->serial_num, ods->serial_num); ds_put_format(string, "DP Description: %.*s\n", (int) sizeof ods->dp_desc, ods->dp_desc); } static void ofp_print_flow_stats_request(struct ds *string, const struct ofp_header *oh) { struct ofputil_flow_stats_request fsr; enum ofperr error; error = ofputil_decode_flow_stats_request(&fsr, oh); if (error) { ofp_print_error(string, error); return; } if (fsr.table_id != 0xff) { ds_put_format(string, " table=%"PRIu8, fsr.table_id); } if (fsr.out_port != OFPP_ANY) { ds_put_cstr(string, " out_port="); ofputil_format_port(fsr.out_port, string); } ds_put_char(string, ' '); match_format(&fsr.match, string, OFP_DEFAULT_PRIORITY); } void ofp_print_flow_stats(struct ds *string, struct ofputil_flow_stats *fs) { ds_put_format(string, " cookie=0x%"PRIx64", duration=", ntohll(fs->cookie)); ofp_print_duration(string, fs->duration_sec, fs->duration_nsec); ds_put_format(string, ", table=%"PRIu8", ", fs->table_id); ds_put_format(string, "n_packets=%"PRIu64", ", fs->packet_count); ds_put_format(string, "n_bytes=%"PRIu64", ", fs->byte_count); if (fs->idle_timeout != OFP_FLOW_PERMANENT) { ds_put_format(string, "idle_timeout=%"PRIu16", ", fs->idle_timeout); } if (fs->hard_timeout != OFP_FLOW_PERMANENT) { ds_put_format(string, "hard_timeout=%"PRIu16", ", fs->hard_timeout); } if (fs->flags) { ofp_print_flow_flags(string, fs->flags); } if (fs->idle_age >= 0) { ds_put_format(string, "idle_age=%d, ", fs->idle_age); } if (fs->hard_age >= 0 && fs->hard_age != fs->duration_sec) { ds_put_format(string, "hard_age=%d, ", fs->hard_age); } match_format(&fs->match, string, fs->priority); if (string->string[string->length - 1] != ' ') { ds_put_char(string, ' '); } ofpacts_format(fs->ofpacts, fs->ofpacts_len, string); } static void ofp_print_flow_stats_reply(struct ds *string, const struct ofp_header *oh) { struct ofpbuf ofpacts; struct ofpbuf b; ofpbuf_use_const(&b, oh, ntohs(oh->length)); ofpbuf_init(&ofpacts, 64); for (;;) { struct ofputil_flow_stats fs; int retval; retval = ofputil_decode_flow_stats_reply(&fs, &b, true, &ofpacts); if (retval) { if (retval != EOF) { ds_put_cstr(string, " ***parse error***"); } break; } ds_put_char(string, '\n'); ofp_print_flow_stats(string, &fs); } ofpbuf_uninit(&ofpacts); } static void ofp_print_aggregate_stats_reply(struct ds *string, const struct ofp_header *oh) { struct ofputil_aggregate_stats as; enum ofperr error; error = ofputil_decode_aggregate_stats_reply(&as, oh); if (error) { ofp_print_error(string, error); return; } ds_put_format(string, " packet_count=%"PRIu64, as.packet_count); ds_put_format(string, " byte_count=%"PRIu64, as.byte_count); ds_put_format(string, " flow_count=%"PRIu32, as.flow_count); } static void print_port_stat(struct ds *string, const char *leader, uint64_t stat, int more) { ds_put_cstr(string, leader); if (stat != UINT64_MAX) { ds_put_format(string, "%"PRIu64, stat); } else { ds_put_char(string, '?'); } if (more) { ds_put_cstr(string, ", "); } else { ds_put_cstr(string, "\n"); } } static void ofp_print_ofpst_port_request(struct ds *string, const struct ofp_header *oh) { ofp_port_t ofp10_port; enum ofperr error; error = ofputil_decode_port_stats_request(oh, &ofp10_port); if (error) { ofp_print_error(string, error); return; } ds_put_cstr(string, " port_no="); ofputil_format_port(ofp10_port, string); } static void ofp_print_ofpst_port_reply(struct ds *string, const struct ofp_header *oh, int verbosity) { struct ofpbuf b; ds_put_format(string, " %zu ports\n", ofputil_count_port_stats(oh)); if (verbosity < 1) { return; } ofpbuf_use_const(&b, oh, ntohs(oh->length)); for (;;) { struct ofputil_port_stats ps; int retval; retval = ofputil_decode_port_stats(&ps, &b); if (retval) { if (retval != EOF) { ds_put_cstr(string, " ***parse error***"); } return; } ds_put_cstr(string, " port "); if (ofp_to_u16(ps.port_no) < 10) { ds_put_char(string, ' '); } ofputil_format_port(ps.port_no, string); ds_put_cstr(string, ": rx "); print_port_stat(string, "pkts=", ps.stats.rx_packets, 1); print_port_stat(string, "bytes=", ps.stats.rx_bytes, 1); print_port_stat(string, "drop=", ps.stats.rx_dropped, 1); print_port_stat(string, "errs=", ps.stats.rx_errors, 1); print_port_stat(string, "frame=", ps.stats.rx_frame_errors, 1); print_port_stat(string, "over=", ps.stats.rx_over_errors, 1); print_port_stat(string, "crc=", ps.stats.rx_crc_errors, 0); ds_put_cstr(string, " tx "); print_port_stat(string, "pkts=", ps.stats.tx_packets, 1); print_port_stat(string, "bytes=", ps.stats.tx_bytes, 1); print_port_stat(string, "drop=", ps.stats.tx_dropped, 1); print_port_stat(string, "errs=", ps.stats.tx_errors, 1); print_port_stat(string, "coll=", ps.stats.collisions, 0); if (ps.duration_sec != UINT32_MAX) { ds_put_cstr(string, " duration="); ofp_print_duration(string, ps.duration_sec, ps.duration_nsec); ds_put_char(string, '\n'); } } } static void ofp_print_one_ofpst_table_reply(struct ds *string, enum ofp_version ofp_version, const char *name, struct ofp12_table_stats *ts) { char name_[OFP_MAX_TABLE_NAME_LEN + 1]; /* ofp13_table_stats is different */ if (ofp_version > OFP12_VERSION) { return; } ovs_strlcpy(name_, name, sizeof name_); ds_put_format(string, " %d: %-8s: ", ts->table_id, name_); ds_put_format(string, "wild=0x%05"PRIx64", ", ntohll(ts->wildcards)); ds_put_format(string, "max=%6"PRIu32", ", ntohl(ts->max_entries)); ds_put_format(string, "active=%"PRIu32"\n", ntohl(ts->active_count)); ds_put_cstr(string, " "); ds_put_format(string, "lookup=%"PRIu64", ", ntohll(ts->lookup_count)); ds_put_format(string, "matched=%"PRIu64"\n", ntohll(ts->matched_count)); if (ofp_version < OFP11_VERSION) { return; } ds_put_cstr(string, " "); ds_put_format(string, "match=0x%08"PRIx64", ", ntohll(ts->match)); ds_put_format(string, "instructions=0x%08"PRIx32", ", ntohl(ts->instructions)); ds_put_format(string, "config=0x%08"PRIx32"\n", ntohl(ts->config)); ds_put_cstr(string, " "); ds_put_format(string, "write_actions=0x%08"PRIx32", ", ntohl(ts->write_actions)); ds_put_format(string, "apply_actions=0x%08"PRIx32"\n", ntohl(ts->apply_actions)); if (ofp_version < OFP12_VERSION) { return; } ds_put_cstr(string, " "); ds_put_format(string, "write_setfields=0x%016"PRIx64"\n", ntohll(ts->write_setfields)); ds_put_cstr(string, " "); ds_put_format(string, "apply_setfields=0x%016"PRIx64"\n", ntohll(ts->apply_setfields)); ds_put_cstr(string, " "); ds_put_format(string, "metadata_match=0x%016"PRIx64"\n", ntohll(ts->metadata_match)); ds_put_cstr(string, " "); ds_put_format(string, "metadata_write=0x%016"PRIx64"\n", ntohll(ts->metadata_write)); } static void ofp_print_ofpst_table_reply13(struct ds *string, const struct ofp_header *oh, int verbosity) { struct ofp13_table_stats *ts; struct ofpbuf b; size_t n; ofpbuf_use_const(&b, oh, ntohs(oh->length)); ofpraw_pull_assert(&b); n = b.size / sizeof *ts; ds_put_format(string, " %zu tables\n", n); if (verbosity < 1) { return; } for (;;) { ts = ofpbuf_try_pull(&b, sizeof *ts); if (!ts) { return; } ds_put_format(string, " %d: active=%"PRIu32", lookup=%"PRIu64 \ ", matched=%"PRIu64"\n", ts->table_id, ntohl(ts->active_count), ntohll(ts->lookup_count), ntohll(ts->matched_count)); } } static void ofp_print_ofpst_table_reply12(struct ds *string, const struct ofp_header *oh, int verbosity) { struct ofp12_table_stats *ts; struct ofpbuf b; size_t n; ofpbuf_use_const(&b, oh, ntohs(oh->length)); ofpraw_pull_assert(&b); n = b.size / sizeof *ts; ds_put_format(string, " %zu tables\n", n); if (verbosity < 1) { return; } for (;;) { ts = ofpbuf_try_pull(&b, sizeof *ts); if (!ts) { return; } ofp_print_one_ofpst_table_reply(string, OFP12_VERSION, ts->name, ts); } } static void ofp_print_ofpst_table_reply11(struct ds *string, const struct ofp_header *oh, int verbosity) { struct ofp11_table_stats *ts; struct ofpbuf b; size_t n; ofpbuf_use_const(&b, oh, ntohs(oh->length)); ofpraw_pull_assert(&b); n = b.size / sizeof *ts; ds_put_format(string, " %zu tables\n", n); if (verbosity < 1) { return; } for (;;) { struct ofp12_table_stats ts12; ts = ofpbuf_try_pull(&b, sizeof *ts); if (!ts) { return; } ts12.table_id = ts->table_id; ts12.wildcards = htonll(ntohl(ts->wildcards)); ts12.max_entries = ts->max_entries; ts12.active_count = ts->active_count; ts12.lookup_count = ts->lookup_count; ts12.matched_count = ts->matched_count; ts12.match = htonll(ntohl(ts->match)); ts12.instructions = ts->instructions; ts12.config = ts->config; ts12.write_actions = ts->write_actions; ts12.apply_actions = ts->apply_actions; ofp_print_one_ofpst_table_reply(string, OFP11_VERSION, ts->name, &ts12); } } static void ofp_print_ofpst_table_reply10(struct ds *string, const struct ofp_header *oh, int verbosity) { struct ofp10_table_stats *ts; struct ofpbuf b; size_t n; ofpbuf_use_const(&b, oh, ntohs(oh->length)); ofpraw_pull_assert(&b); n = b.size / sizeof *ts; ds_put_format(string, " %zu tables\n", n); if (verbosity < 1) { return; } for (;;) { struct ofp12_table_stats ts12; ts = ofpbuf_try_pull(&b, sizeof *ts); if (!ts) { return; } ts12.table_id = ts->table_id; ts12.wildcards = htonll(ntohl(ts->wildcards)); ts12.max_entries = ts->max_entries; ts12.active_count = ts->active_count; ts12.lookup_count = get_32aligned_be64(&ts->lookup_count); ts12.matched_count = get_32aligned_be64(&ts->matched_count); ofp_print_one_ofpst_table_reply(string, OFP10_VERSION, ts->name, &ts12); } } static void ofp_print_ofpst_table_reply(struct ds *string, const struct ofp_header *oh, int verbosity) { switch ((enum ofp_version)oh->version) { case OFP13_VERSION: ofp_print_ofpst_table_reply13(string, oh, verbosity); break; case OFP12_VERSION: ofp_print_ofpst_table_reply12(string, oh, verbosity); break; case OFP11_VERSION: ofp_print_ofpst_table_reply11(string, oh, verbosity); break; case OFP10_VERSION: ofp_print_ofpst_table_reply10(string, oh, verbosity); break; default: NOT_REACHED(); } } static void ofp_print_queue_name(struct ds *string, uint32_t queue_id) { if (queue_id == OFPQ_ALL) { ds_put_cstr(string, "ALL"); } else { ds_put_format(string, "%"PRIu32, queue_id); } } static void ofp_print_ofpst_queue_request(struct ds *string, const struct ofp_header *oh) { struct ofputil_queue_stats_request oqsr; enum ofperr error; error = ofputil_decode_queue_stats_request(oh, &oqsr); if (error) { ds_put_format(string, "***decode error: %s***\n", ofperr_get_name(error)); return; } ds_put_cstr(string, "port="); ofputil_format_port(oqsr.port_no, string); ds_put_cstr(string, " queue="); ofp_print_queue_name(string, oqsr.queue_id); } static void ofp_print_ofpst_queue_reply(struct ds *string, const struct ofp_header *oh, int verbosity) { struct ofpbuf b; ds_put_format(string, " %zu queues\n", ofputil_count_queue_stats(oh)); if (verbosity < 1) { return; } ofpbuf_use_const(&b, oh, ntohs(oh->length)); for (;;) { struct ofputil_queue_stats qs; int retval; retval = ofputil_decode_queue_stats(&qs, &b); if (retval) { if (retval != EOF) { ds_put_cstr(string, " ***parse error***"); } return; } ds_put_cstr(string, " port "); ofputil_format_port(qs.port_no, string); ds_put_cstr(string, " queue "); ofp_print_queue_name(string, qs.queue_id); ds_put_cstr(string, ": "); print_port_stat(string, "bytes=", qs.tx_bytes, 1); print_port_stat(string, "pkts=", qs.tx_packets, 1); print_port_stat(string, "errors=", qs.tx_errors, 1); ds_put_cstr(string, "duration="); if (qs.duration_sec != UINT32_MAX) { ofp_print_duration(string, qs.duration_sec, qs.duration_nsec); } else { ds_put_char(string, '?'); } ds_put_char(string, '\n'); } } static void ofp_print_ofpst_port_desc_reply(struct ds *string, const struct ofp_header *oh) { struct ofpbuf b; ofpbuf_use_const(&b, oh, ntohs(oh->length)); ofpraw_pull_assert(&b); ds_put_char(string, '\n'); ofp_print_phy_ports(string, oh->version, &b); } static void ofp_print_stats_request(struct ds *string, const struct ofp_header *oh) { uint16_t flags = ofpmp_flags(oh); if (flags) { ds_put_format(string, " ***unknown flags 0x%04"PRIx16"***", flags); } } static void ofp_print_stats_reply(struct ds *string, const struct ofp_header *oh) { uint16_t flags = ofpmp_flags(oh); if (flags) { ds_put_cstr(string, " flags="); if (flags & OFPSF_REPLY_MORE) { ds_put_cstr(string, "[more]"); flags &= ~OFPSF_REPLY_MORE; } if (flags) { ds_put_format(string, "[***unknown flags 0x%04"PRIx16"***]", flags); } } } static void ofp_print_echo(struct ds *string, const struct ofp_header *oh, int verbosity) { size_t len = ntohs(oh->length); ds_put_format(string, " %zu bytes of payload\n", len - sizeof *oh); if (verbosity > 1) { ds_put_hex_dump(string, oh + 1, len - sizeof *oh, 0, true); } } static void ofp_print_role_message(struct ds *string, const struct ofp_header *oh) { struct ofputil_role_request rr; enum ofperr error; error = ofputil_decode_role_message(oh, &rr); if (error) { ofp_print_error(string, error); return; } ds_put_cstr(string, " role="); switch (rr.role) { case OFPCR12_ROLE_NOCHANGE: ds_put_cstr(string, "nochange"); break; case OFPCR12_ROLE_EQUAL: ds_put_cstr(string, "equal"); /* OF 1.2 wording */ break; case OFPCR12_ROLE_MASTER: ds_put_cstr(string, "master"); break; case OFPCR12_ROLE_SLAVE: ds_put_cstr(string, "slave"); break; default: NOT_REACHED(); } if (rr.have_generation_id) { ds_put_format(string, " generation_id=%"PRIu64, rr.generation_id); } } static void ofp_print_nxt_flow_mod_table_id(struct ds *string, const struct nx_flow_mod_table_id *nfmti) { ds_put_format(string, " %s", nfmti->set ? "enable" : "disable"); } static void ofp_print_nxt_set_flow_format(struct ds *string, const struct nx_set_flow_format *nsff) { uint32_t format = ntohl(nsff->format); ds_put_cstr(string, " format="); if (ofputil_nx_flow_format_is_valid(format)) { ds_put_cstr(string, ofputil_nx_flow_format_to_string(format)); } else { ds_put_format(string, "%"PRIu32, format); } } static void ofp_print_nxt_set_packet_in_format(struct ds *string, const struct nx_set_packet_in_format *nspf) { uint32_t format = ntohl(nspf->format); ds_put_cstr(string, " format="); if (ofputil_packet_in_format_is_valid(format)) { ds_put_cstr(string, ofputil_packet_in_format_to_string(format)); } else { ds_put_format(string, "%"PRIu32, format); } } /* Returns a string form of 'reason'. The return value is either a statically * allocated constant string or the 'bufsize'-byte buffer 'reasonbuf'. * 'bufsize' should be at least OFP_PORT_REASON_BUFSIZE. */ #define OFP_PORT_REASON_BUFSIZE (INT_STRLEN(int) + 1) static const char * ofp_port_reason_to_string(enum ofp_port_reason reason, char *reasonbuf, size_t bufsize) { switch (reason) { case OFPPR_ADD: return "add"; case OFPPR_DELETE: return "delete"; case OFPPR_MODIFY: return "modify"; default: snprintf(reasonbuf, bufsize, "%d", (int) reason); return reasonbuf; } } static void ofp_print_nxt_set_async_config(struct ds *string, const struct nx_async_config *nac) { int i; for (i = 0; i < 2; i++) { int j; ds_put_format(string, "\n %s:\n", i == 0 ? "master" : "slave"); ds_put_cstr(string, " PACKET_IN:"); for (j = 0; j < 32; j++) { if (nac->packet_in_mask[i] & htonl(1u << j)) { char reasonbuf[OFPUTIL_PACKET_IN_REASON_BUFSIZE]; const char *reason; reason = ofputil_packet_in_reason_to_string(j, reasonbuf, sizeof reasonbuf); ds_put_format(string, " %s", reason); } } if (!nac->packet_in_mask[i]) { ds_put_cstr(string, " (off)"); } ds_put_char(string, '\n'); ds_put_cstr(string, " PORT_STATUS:"); for (j = 0; j < 32; j++) { if (nac->port_status_mask[i] & htonl(1u << j)) { char reasonbuf[OFP_PORT_REASON_BUFSIZE]; const char *reason; reason = ofp_port_reason_to_string(j, reasonbuf, sizeof reasonbuf); ds_put_format(string, " %s", reason); } } if (!nac->port_status_mask[i]) { ds_put_cstr(string, " (off)"); } ds_put_char(string, '\n'); ds_put_cstr(string, " FLOW_REMOVED:"); for (j = 0; j < 32; j++) { if (nac->flow_removed_mask[i] & htonl(1u << j)) { char reasonbuf[OFP_FLOW_REMOVED_REASON_BUFSIZE]; const char *reason; reason = ofp_flow_removed_reason_to_string(j, reasonbuf, sizeof reasonbuf); ds_put_format(string, " %s", reason); } } if (!nac->flow_removed_mask[i]) { ds_put_cstr(string, " (off)"); } ds_put_char(string, '\n'); } } static void ofp_print_nxt_set_controller_id(struct ds *string, const struct nx_controller_id *nci) { ds_put_format(string, " id=%"PRIu16, ntohs(nci->controller_id)); } static void ofp_print_nxt_flow_monitor_cancel(struct ds *string, const struct ofp_header *oh) { ds_put_format(string, " id=%"PRIu32, ofputil_decode_flow_monitor_cancel(oh)); } static const char * nx_flow_monitor_flags_to_name(uint32_t bit) { enum nx_flow_monitor_flags fmf = bit; switch (fmf) { case NXFMF_INITIAL: return "initial"; case NXFMF_ADD: return "add"; case NXFMF_DELETE: return "delete"; case NXFMF_MODIFY: return "modify"; case NXFMF_ACTIONS: return "actions"; case NXFMF_OWN: return "own"; } return NULL; } static void ofp_print_nxst_flow_monitor_request(struct ds *string, const struct ofp_header *oh) { struct ofpbuf b; ofpbuf_use_const(&b, oh, ntohs(oh->length)); for (;;) { struct ofputil_flow_monitor_request request; int retval; retval = ofputil_decode_flow_monitor_request(&request, &b); if (retval) { if (retval != EOF) { ofp_print_error(string, retval); } return; } ds_put_format(string, "\n id=%"PRIu32" flags=", request.id); ofp_print_bit_names(string, request.flags, nx_flow_monitor_flags_to_name, ','); if (request.out_port != OFPP_NONE) { ds_put_cstr(string, " out_port="); ofputil_format_port(request.out_port, string); } if (request.table_id != 0xff) { ds_put_format(string, " table=%"PRIu8, request.table_id); } ds_put_char(string, ' '); match_format(&request.match, string, OFP_DEFAULT_PRIORITY); ds_chomp(string, ' '); } } static void ofp_print_nxst_flow_monitor_reply(struct ds *string, const struct ofp_header *oh) { uint64_t ofpacts_stub[1024 / 8]; struct ofpbuf ofpacts; struct ofpbuf b; ofpbuf_use_const(&b, oh, ntohs(oh->length)); ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); for (;;) { char reasonbuf[OFP_FLOW_REMOVED_REASON_BUFSIZE]; struct ofputil_flow_update update; struct match match; int retval; update.match = &match; retval = ofputil_decode_flow_update(&update, &b, &ofpacts); if (retval) { if (retval != EOF) { ofp_print_error(string, retval); } ofpbuf_uninit(&ofpacts); return; } ds_put_cstr(string, "\n event="); switch (update.event) { case NXFME_ADDED: ds_put_cstr(string, "ADDED"); break; case NXFME_DELETED: ds_put_format(string, "DELETED reason=%s", ofp_flow_removed_reason_to_string(update.reason, reasonbuf, sizeof reasonbuf)); break; case NXFME_MODIFIED: ds_put_cstr(string, "MODIFIED"); break; case NXFME_ABBREV: ds_put_format(string, "ABBREV xid=0x%"PRIx32, ntohl(update.xid)); continue; } ds_put_format(string, " table=%"PRIu8, update.table_id); if (update.idle_timeout != OFP_FLOW_PERMANENT) { ds_put_format(string, " idle_timeout=%"PRIu16, update.idle_timeout); } if (update.hard_timeout != OFP_FLOW_PERMANENT) { ds_put_format(string, " hard_timeout=%"PRIu16, update.hard_timeout); } ds_put_format(string, " cookie=%#"PRIx64, ntohll(update.cookie)); ds_put_char(string, ' '); match_format(update.match, string, OFP_DEFAULT_PRIORITY); if (update.ofpacts_len) { if (string->string[string->length - 1] != ' ') { ds_put_char(string, ' '); } ofpacts_format(update.ofpacts, update.ofpacts_len, string); } } } void ofp_print_version(const struct ofp_header *oh, struct ds *string) { switch (oh->version) { case OFP10_VERSION: break; case OFP11_VERSION: ds_put_cstr(string, " (OF1.1)"); break; case OFP12_VERSION: ds_put_cstr(string, " (OF1.2)"); break; case OFP13_VERSION: ds_put_cstr(string, " (OF1.3)"); break; default: ds_put_format(string, " (OF 0x%02"PRIx8")", oh->version); break; } ds_put_format(string, " (xid=0x%"PRIx32"):", ntohl(oh->xid)); } static void ofp_header_to_string__(const struct ofp_header *oh, enum ofpraw raw, struct ds *string) { ds_put_cstr(string, ofpraw_get_name(raw)); ofp_print_version(oh, string); } static void ofp_print_not_implemented(struct ds *string) { ds_put_cstr(string, "NOT IMPLEMENTED YET!\n"); } static void ofp_to_string__(const struct ofp_header *oh, enum ofpraw raw, struct ds *string, int verbosity) { const void *msg = oh; ofp_header_to_string__(oh, raw, string); switch (ofptype_from_ofpraw(raw)) { /* FIXME: Change the following once they are implemented: */ case OFPTYPE_QUEUE_GET_CONFIG_REQUEST: case OFPTYPE_QUEUE_GET_CONFIG_REPLY: case OFPTYPE_GET_ASYNC_REQUEST: case OFPTYPE_GET_ASYNC_REPLY: case OFPTYPE_GROUP_STATS_REQUEST: case OFPTYPE_GROUP_STATS_REPLY: case OFPTYPE_GROUP_DESC_STATS_REQUEST: case OFPTYPE_GROUP_DESC_STATS_REPLY: case OFPTYPE_GROUP_FEATURES_STATS_REQUEST: case OFPTYPE_GROUP_FEATURES_STATS_REPLY: case OFPTYPE_TABLE_FEATURES_STATS_REQUEST: case OFPTYPE_TABLE_FEATURES_STATS_REPLY: ofp_print_not_implemented(string); break; case OFPTYPE_HELLO: ofp_print_hello(string, oh); break; case OFPTYPE_ERROR: ofp_print_error_msg(string, oh); break; case OFPTYPE_ECHO_REQUEST: case OFPTYPE_ECHO_REPLY: ofp_print_echo(string, oh, verbosity); break; case OFPTYPE_FEATURES_REQUEST: break; case OFPTYPE_FEATURES_REPLY: ofp_print_switch_features(string, oh); break; case OFPTYPE_GET_CONFIG_REQUEST: break; case OFPTYPE_GET_CONFIG_REPLY: case OFPTYPE_SET_CONFIG: ofp_print_switch_config(string, ofpmsg_body(oh)); break; case OFPTYPE_PACKET_IN: ofp_print_packet_in(string, oh, verbosity); break; case OFPTYPE_FLOW_REMOVED: ofp_print_flow_removed(string, oh); break; case OFPTYPE_PORT_STATUS: ofp_print_port_status(string, oh); break; case OFPTYPE_PACKET_OUT: ofp_print_packet_out(string, oh, verbosity); break; case OFPTYPE_FLOW_MOD: ofp_print_flow_mod(string, oh, verbosity); break; case OFPTYPE_PORT_MOD: ofp_print_port_mod(string, oh); break; case OFPTYPE_METER_MOD: ofp_print_meter_mod(string, oh); break; case OFPTYPE_BARRIER_REQUEST: case OFPTYPE_BARRIER_REPLY: break; case OFPTYPE_ROLE_REQUEST: case OFPTYPE_ROLE_REPLY: ofp_print_role_message(string, oh); break; case OFPTYPE_METER_STATS_REQUEST: case OFPTYPE_METER_CONFIG_STATS_REQUEST: ofp_print_stats_request(string, oh); ofp_print_meter_stats_request(string, oh); break; case OFPTYPE_METER_STATS_REPLY: ofp_print_stats_reply(string, oh); ofp_print_meter_stats_reply(string, oh); break; case OFPTYPE_METER_CONFIG_STATS_REPLY: ofp_print_stats_reply(string, oh); ofp_print_meter_config_reply(string, oh); break; case OFPTYPE_METER_FEATURES_STATS_REPLY: ofp_print_stats_reply(string, oh); ofp_print_meter_features_reply(string, oh); break; case OFPTYPE_DESC_STATS_REQUEST: case OFPTYPE_PORT_DESC_STATS_REQUEST: case OFPTYPE_METER_FEATURES_STATS_REQUEST: ofp_print_stats_request(string, oh); break; case OFPTYPE_FLOW_STATS_REQUEST: case OFPTYPE_AGGREGATE_STATS_REQUEST: ofp_print_stats_request(string, oh); ofp_print_flow_stats_request(string, oh); break; case OFPTYPE_TABLE_STATS_REQUEST: ofp_print_stats_request(string, oh); break; case OFPTYPE_PORT_STATS_REQUEST: ofp_print_stats_request(string, oh); ofp_print_ofpst_port_request(string, oh); break; case OFPTYPE_QUEUE_STATS_REQUEST: ofp_print_stats_request(string, oh); ofp_print_ofpst_queue_request(string, oh); break; case OFPTYPE_DESC_STATS_REPLY: ofp_print_stats_reply(string, oh); ofp_print_ofpst_desc_reply(string, oh); break; case OFPTYPE_FLOW_STATS_REPLY: ofp_print_stats_reply(string, oh); ofp_print_flow_stats_reply(string, oh); break; case OFPTYPE_QUEUE_STATS_REPLY: ofp_print_stats_reply(string, oh); ofp_print_ofpst_queue_reply(string, oh, verbosity); break; case OFPTYPE_PORT_STATS_REPLY: ofp_print_stats_reply(string, oh); ofp_print_ofpst_port_reply(string, oh, verbosity); break; case OFPTYPE_TABLE_STATS_REPLY: ofp_print_stats_reply(string, oh); ofp_print_ofpst_table_reply(string, oh, verbosity); break; case OFPTYPE_AGGREGATE_STATS_REPLY: ofp_print_stats_reply(string, oh); ofp_print_aggregate_stats_reply(string, oh); break; case OFPTYPE_PORT_DESC_STATS_REPLY: ofp_print_stats_reply(string, oh); ofp_print_ofpst_port_desc_reply(string, oh); break; case OFPTYPE_FLOW_MOD_TABLE_ID: ofp_print_nxt_flow_mod_table_id(string, ofpmsg_body(oh)); break; case OFPTYPE_SET_FLOW_FORMAT: ofp_print_nxt_set_flow_format(string, ofpmsg_body(oh)); break; case OFPTYPE_SET_PACKET_IN_FORMAT: ofp_print_nxt_set_packet_in_format(string, ofpmsg_body(oh)); break; case OFPTYPE_FLOW_AGE: break; case OFPTYPE_SET_CONTROLLER_ID: ofp_print_nxt_set_controller_id(string, ofpmsg_body(oh)); break; case OFPTYPE_SET_ASYNC_CONFIG: ofp_print_nxt_set_async_config(string, ofpmsg_body(oh)); break; case OFPTYPE_FLOW_MONITOR_CANCEL: ofp_print_nxt_flow_monitor_cancel(string, msg); break; case OFPTYPE_FLOW_MONITOR_PAUSED: case OFPTYPE_FLOW_MONITOR_RESUMED: break; case OFPTYPE_FLOW_MONITOR_STATS_REQUEST: ofp_print_nxst_flow_monitor_request(string, msg); break; case OFPTYPE_FLOW_MONITOR_STATS_REPLY: ofp_print_nxst_flow_monitor_reply(string, msg); break; } } /* Composes and returns a string representing the OpenFlow packet of 'len' * bytes at 'oh' at the given 'verbosity' level. 0 is a minimal amount of * verbosity and higher numbers increase verbosity. The caller is responsible * for freeing the string. */ char * ofp_to_string(const void *oh_, size_t len, int verbosity) { struct ds string = DS_EMPTY_INITIALIZER; const struct ofp_header *oh = oh_; if (!len) { ds_put_cstr(&string, "OpenFlow message is empty\n"); } else if (len < sizeof(struct ofp_header)) { ds_put_format(&string, "OpenFlow packet too short (only %zu bytes):\n", len); } else if (ntohs(oh->length) > len) { enum ofperr error; enum ofpraw raw; error = ofpraw_decode_partial(&raw, oh, len); if (!error) { ofp_header_to_string__(oh, raw, &string); ds_put_char(&string, '\n'); } ds_put_format(&string, "(***truncated to %zu bytes from %"PRIu16"***)\n", len, ntohs(oh->length)); } else if (ntohs(oh->length) < len) { ds_put_format(&string, "(***only uses %"PRIu16" bytes out of %zu***)\n", ntohs(oh->length), len); } else { enum ofperr error; enum ofpraw raw; error = ofpraw_decode(&raw, oh); if (!error) { ofp_to_string__(oh, raw, &string, verbosity); if (verbosity >= 5) { if (ds_last(&string) != '\n') { ds_put_char(&string, '\n'); } ds_put_hex_dump(&string, oh, len, 0, true); } if (ds_last(&string) != '\n') { ds_put_char(&string, '\n'); } return ds_steal_cstr(&string); } ofp_print_error(&string, error); } ds_put_hex_dump(&string, oh, len, 0, true); return ds_steal_cstr(&string); } static void print_and_free(FILE *stream, char *string) { fputs(string, stream); free(string); } /* Pretty-print the OpenFlow packet of 'len' bytes at 'oh' to 'stream' at the * given 'verbosity' level. 0 is a minimal amount of verbosity and higher * numbers increase verbosity. */ void ofp_print(FILE *stream, const void *oh, size_t len, int verbosity) { print_and_free(stream, ofp_to_string(oh, len, verbosity)); } /* Dumps the contents of the Ethernet frame in the 'len' bytes starting at * 'data' to 'stream'. */ void ofp_print_packet(FILE *stream, const void *data, size_t len) { print_and_free(stream, ofp_packet_to_string(data, len)); } openvswitch-2.0.1+git20140120/lib/ofp-print.h000066400000000000000000000027001226605124000203060ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* OpenFlow protocol pretty-printer. */ #ifndef OFP_PRINT_H #define OFP_PRINT_H 1 #include #include struct ds; struct ofp10_match; struct ofp_flow_mod; struct ofp_header; struct ofputil_flow_stats; #ifdef __cplusplus extern "C" { #endif void ofp_print(FILE *, const void *, size_t, int verbosity); void ofp_print_packet(FILE *stream, const void *data, size_t len); void ofp10_match_print(struct ds *, const struct ofp10_match *, int verbosity); char *ofp_to_string(const void *, size_t, int verbosity); char *ofp10_match_to_string(const struct ofp10_match *, int verbosity); char *ofp_packet_to_string(const void *data, size_t len); void ofp_print_flow_stats(struct ds *, struct ofputil_flow_stats *); void ofp_print_version(const struct ofp_header *, struct ds *); #ifdef __cplusplus } #endif #endif /* ofp-print.h */ openvswitch-2.0.1+git20140120/lib/ofp-util.c000066400000000000000000005346701226605124000201420ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ofp-print.h" #include #include #include #include #include #include #include #include "bundle.h" #include "byte-order.h" #include "classifier.h" #include "dynamic-string.h" #include "learn.h" #include "meta-flow.h" #include "multipath.h" #include "netdev.h" #include "nx-match.h" #include "ofp-actions.h" #include "ofp-errors.h" #include "ofp-msgs.h" #include "ofp-util.h" #include "ofpbuf.h" #include "packets.h" #include "random.h" #include "unaligned.h" #include "type-props.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ofp_util); /* Rate limit for OpenFlow message parse errors. These always indicate a bug * in the peer and so there's not much point in showing a lot of them. */ static struct vlog_rate_limit bad_ofmsg_rl = VLOG_RATE_LIMIT_INIT(1, 5); /* Given the wildcard bit count in the least-significant 6 of 'wcbits', returns * an IP netmask with a 1 in each bit that must match and a 0 in each bit that * is wildcarded. * * The bits in 'wcbits' are in the format used in enum ofp_flow_wildcards: 0 * is exact match, 1 ignores the LSB, 2 ignores the 2 least-significant bits, * ..., 32 and higher wildcard the entire field. This is the *opposite* of the * usual convention where e.g. /24 indicates that 8 bits (not 24 bits) are * wildcarded. */ ovs_be32 ofputil_wcbits_to_netmask(int wcbits) { wcbits &= 0x3f; return wcbits < 32 ? htonl(~((1u << wcbits) - 1)) : 0; } /* Given the IP netmask 'netmask', returns the number of bits of the IP address * that it wildcards, that is, the number of 0-bits in 'netmask', a number * between 0 and 32 inclusive. * * If 'netmask' is not a CIDR netmask (see ip_is_cidr()), the return value will * still be in the valid range but isn't otherwise meaningful. */ int ofputil_netmask_to_wcbits(ovs_be32 netmask) { return 32 - ip_count_cidr_bits(netmask); } /* Converts the OpenFlow 1.0 wildcards in 'ofpfw' (OFPFW10_*) into a * flow_wildcards in 'wc' for use in struct match. It is the caller's * responsibility to handle the special case where the flow match's dl_vlan is * set to OFP_VLAN_NONE. */ void ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc) { BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20); /* Initialize most of wc. */ flow_wildcards_init_catchall(wc); if (!(ofpfw & OFPFW10_IN_PORT)) { wc->masks.in_port.ofp_port = u16_to_ofp(UINT16_MAX); } if (!(ofpfw & OFPFW10_NW_TOS)) { wc->masks.nw_tos |= IP_DSCP_MASK; } if (!(ofpfw & OFPFW10_NW_PROTO)) { wc->masks.nw_proto = UINT8_MAX; } wc->masks.nw_src = ofputil_wcbits_to_netmask(ofpfw >> OFPFW10_NW_SRC_SHIFT); wc->masks.nw_dst = ofputil_wcbits_to_netmask(ofpfw >> OFPFW10_NW_DST_SHIFT); if (!(ofpfw & OFPFW10_TP_SRC)) { wc->masks.tp_src = htons(UINT16_MAX); } if (!(ofpfw & OFPFW10_TP_DST)) { wc->masks.tp_dst = htons(UINT16_MAX); } if (!(ofpfw & OFPFW10_DL_SRC)) { memset(wc->masks.dl_src, 0xff, ETH_ADDR_LEN); } if (!(ofpfw & OFPFW10_DL_DST)) { memset(wc->masks.dl_dst, 0xff, ETH_ADDR_LEN); } if (!(ofpfw & OFPFW10_DL_TYPE)) { wc->masks.dl_type = htons(UINT16_MAX); } /* VLAN TCI mask. */ if (!(ofpfw & OFPFW10_DL_VLAN_PCP)) { wc->masks.vlan_tci |= htons(VLAN_PCP_MASK | VLAN_CFI); } if (!(ofpfw & OFPFW10_DL_VLAN)) { wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI); } } /* Converts the ofp10_match in 'ofmatch' into a struct match in 'match'. */ void ofputil_match_from_ofp10_match(const struct ofp10_match *ofmatch, struct match *match) { uint32_t ofpfw = ntohl(ofmatch->wildcards) & OFPFW10_ALL; /* Initialize match->wc. */ memset(&match->flow, 0, sizeof match->flow); ofputil_wildcard_from_ofpfw10(ofpfw, &match->wc); /* Initialize most of match->flow. */ match->flow.nw_src = ofmatch->nw_src; match->flow.nw_dst = ofmatch->nw_dst; match->flow.in_port.ofp_port = u16_to_ofp(ntohs(ofmatch->in_port)); match->flow.dl_type = ofputil_dl_type_from_openflow(ofmatch->dl_type); match->flow.tp_src = ofmatch->tp_src; match->flow.tp_dst = ofmatch->tp_dst; memcpy(match->flow.dl_src, ofmatch->dl_src, ETH_ADDR_LEN); memcpy(match->flow.dl_dst, ofmatch->dl_dst, ETH_ADDR_LEN); match->flow.nw_tos = ofmatch->nw_tos & IP_DSCP_MASK; match->flow.nw_proto = ofmatch->nw_proto; /* Translate VLANs. */ if (!(ofpfw & OFPFW10_DL_VLAN) && ofmatch->dl_vlan == htons(OFP10_VLAN_NONE)) { /* Match only packets without 802.1Q header. * * When OFPFW10_DL_VLAN_PCP is wildcarded, this is obviously correct. * * If OFPFW10_DL_VLAN_PCP is matched, the flow match is contradictory, * because we can't have a specific PCP without an 802.1Q header. * However, older versions of OVS treated this as matching packets * withut an 802.1Q header, so we do here too. */ match->flow.vlan_tci = htons(0); match->wc.masks.vlan_tci = htons(0xffff); } else { ovs_be16 vid, pcp, tci; vid = ofmatch->dl_vlan & htons(VLAN_VID_MASK); pcp = htons((ofmatch->dl_vlan_pcp << VLAN_PCP_SHIFT) & VLAN_PCP_MASK); tci = vid | pcp | htons(VLAN_CFI); match->flow.vlan_tci = tci & match->wc.masks.vlan_tci; } /* Clean up. */ match_zero_wildcarded_fields(match); } /* Convert 'match' into the OpenFlow 1.0 match structure 'ofmatch'. */ void ofputil_match_to_ofp10_match(const struct match *match, struct ofp10_match *ofmatch) { const struct flow_wildcards *wc = &match->wc; uint32_t ofpfw; /* Figure out most OpenFlow wildcards. */ ofpfw = 0; if (!wc->masks.in_port.ofp_port) { ofpfw |= OFPFW10_IN_PORT; } if (!wc->masks.dl_type) { ofpfw |= OFPFW10_DL_TYPE; } if (!wc->masks.nw_proto) { ofpfw |= OFPFW10_NW_PROTO; } ofpfw |= (ofputil_netmask_to_wcbits(wc->masks.nw_src) << OFPFW10_NW_SRC_SHIFT); ofpfw |= (ofputil_netmask_to_wcbits(wc->masks.nw_dst) << OFPFW10_NW_DST_SHIFT); if (!(wc->masks.nw_tos & IP_DSCP_MASK)) { ofpfw |= OFPFW10_NW_TOS; } if (!wc->masks.tp_src) { ofpfw |= OFPFW10_TP_SRC; } if (!wc->masks.tp_dst) { ofpfw |= OFPFW10_TP_DST; } if (eth_addr_is_zero(wc->masks.dl_src)) { ofpfw |= OFPFW10_DL_SRC; } if (eth_addr_is_zero(wc->masks.dl_dst)) { ofpfw |= OFPFW10_DL_DST; } /* Translate VLANs. */ ofmatch->dl_vlan = htons(0); ofmatch->dl_vlan_pcp = 0; if (match->wc.masks.vlan_tci == htons(0)) { ofpfw |= OFPFW10_DL_VLAN | OFPFW10_DL_VLAN_PCP; } else if (match->wc.masks.vlan_tci & htons(VLAN_CFI) && !(match->flow.vlan_tci & htons(VLAN_CFI))) { ofmatch->dl_vlan = htons(OFP10_VLAN_NONE); ofpfw |= OFPFW10_DL_VLAN_PCP; } else { if (!(match->wc.masks.vlan_tci & htons(VLAN_VID_MASK))) { ofpfw |= OFPFW10_DL_VLAN; } else { ofmatch->dl_vlan = htons(vlan_tci_to_vid(match->flow.vlan_tci)); } if (!(match->wc.masks.vlan_tci & htons(VLAN_PCP_MASK))) { ofpfw |= OFPFW10_DL_VLAN_PCP; } else { ofmatch->dl_vlan_pcp = vlan_tci_to_pcp(match->flow.vlan_tci); } } /* Compose most of the match structure. */ ofmatch->wildcards = htonl(ofpfw); ofmatch->in_port = htons(ofp_to_u16(match->flow.in_port.ofp_port)); memcpy(ofmatch->dl_src, match->flow.dl_src, ETH_ADDR_LEN); memcpy(ofmatch->dl_dst, match->flow.dl_dst, ETH_ADDR_LEN); ofmatch->dl_type = ofputil_dl_type_to_openflow(match->flow.dl_type); ofmatch->nw_src = match->flow.nw_src; ofmatch->nw_dst = match->flow.nw_dst; ofmatch->nw_tos = match->flow.nw_tos & IP_DSCP_MASK; ofmatch->nw_proto = match->flow.nw_proto; ofmatch->tp_src = match->flow.tp_src; ofmatch->tp_dst = match->flow.tp_dst; memset(ofmatch->pad1, '\0', sizeof ofmatch->pad1); memset(ofmatch->pad2, '\0', sizeof ofmatch->pad2); } enum ofperr ofputil_pull_ofp11_match(struct ofpbuf *buf, struct match *match, uint16_t *padded_match_len) { struct ofp11_match_header *omh = buf->data; uint16_t match_len; if (buf->size < sizeof *omh) { return OFPERR_OFPBMC_BAD_LEN; } match_len = ntohs(omh->length); switch (ntohs(omh->type)) { case OFPMT_STANDARD: { struct ofp11_match *om; if (match_len != sizeof *om || buf->size < sizeof *om) { return OFPERR_OFPBMC_BAD_LEN; } om = ofpbuf_pull(buf, sizeof *om); if (padded_match_len) { *padded_match_len = match_len; } return ofputil_match_from_ofp11_match(om, match); } case OFPMT_OXM: if (padded_match_len) { *padded_match_len = ROUND_UP(match_len, 8); } return oxm_pull_match(buf, match); default: return OFPERR_OFPBMC_BAD_TYPE; } } /* Converts the ofp11_match in 'match' into a struct match in 'match. Returns * 0 if successful, otherwise an OFPERR_* value. */ enum ofperr ofputil_match_from_ofp11_match(const struct ofp11_match *ofmatch, struct match *match) { uint16_t wc = ntohl(ofmatch->wildcards); uint8_t dl_src_mask[ETH_ADDR_LEN]; uint8_t dl_dst_mask[ETH_ADDR_LEN]; bool ipv4, arp, rarp; int i; match_init_catchall(match); if (!(wc & OFPFW11_IN_PORT)) { ofp_port_t ofp_port; enum ofperr error; error = ofputil_port_from_ofp11(ofmatch->in_port, &ofp_port); if (error) { return OFPERR_OFPBMC_BAD_VALUE; } match_set_in_port(match, ofp_port); } for (i = 0; i < ETH_ADDR_LEN; i++) { dl_src_mask[i] = ~ofmatch->dl_src_mask[i]; } match_set_dl_src_masked(match, ofmatch->dl_src, dl_src_mask); for (i = 0; i < ETH_ADDR_LEN; i++) { dl_dst_mask[i] = ~ofmatch->dl_dst_mask[i]; } match_set_dl_dst_masked(match, ofmatch->dl_dst, dl_dst_mask); if (!(wc & OFPFW11_DL_VLAN)) { if (ofmatch->dl_vlan == htons(OFPVID11_NONE)) { /* Match only packets without a VLAN tag. */ match->flow.vlan_tci = htons(0); match->wc.masks.vlan_tci = htons(UINT16_MAX); } else { if (ofmatch->dl_vlan == htons(OFPVID11_ANY)) { /* Match any packet with a VLAN tag regardless of VID. */ match->flow.vlan_tci = htons(VLAN_CFI); match->wc.masks.vlan_tci = htons(VLAN_CFI); } else if (ntohs(ofmatch->dl_vlan) < 4096) { /* Match only packets with the specified VLAN VID. */ match->flow.vlan_tci = htons(VLAN_CFI) | ofmatch->dl_vlan; match->wc.masks.vlan_tci = htons(VLAN_CFI | VLAN_VID_MASK); } else { /* Invalid VID. */ return OFPERR_OFPBMC_BAD_VALUE; } if (!(wc & OFPFW11_DL_VLAN_PCP)) { if (ofmatch->dl_vlan_pcp <= 7) { match->flow.vlan_tci |= htons(ofmatch->dl_vlan_pcp << VLAN_PCP_SHIFT); match->wc.masks.vlan_tci |= htons(VLAN_PCP_MASK); } else { /* Invalid PCP. */ return OFPERR_OFPBMC_BAD_VALUE; } } } } if (!(wc & OFPFW11_DL_TYPE)) { match_set_dl_type(match, ofputil_dl_type_from_openflow(ofmatch->dl_type)); } ipv4 = match->flow.dl_type == htons(ETH_TYPE_IP); arp = match->flow.dl_type == htons(ETH_TYPE_ARP); rarp = match->flow.dl_type == htons(ETH_TYPE_RARP); if (ipv4 && !(wc & OFPFW11_NW_TOS)) { if (ofmatch->nw_tos & ~IP_DSCP_MASK) { /* Invalid TOS. */ return OFPERR_OFPBMC_BAD_VALUE; } match_set_nw_dscp(match, ofmatch->nw_tos); } if (ipv4 || arp || rarp) { if (!(wc & OFPFW11_NW_PROTO)) { match_set_nw_proto(match, ofmatch->nw_proto); } match_set_nw_src_masked(match, ofmatch->nw_src, ~ofmatch->nw_src_mask); match_set_nw_dst_masked(match, ofmatch->nw_dst, ~ofmatch->nw_dst_mask); } #define OFPFW11_TP_ALL (OFPFW11_TP_SRC | OFPFW11_TP_DST) if (ipv4 && (wc & OFPFW11_TP_ALL) != OFPFW11_TP_ALL) { switch (match->flow.nw_proto) { case IPPROTO_ICMP: /* "A.2.3 Flow Match Structures" in OF1.1 says: * * The tp_src and tp_dst fields will be ignored unless the * network protocol specified is as TCP, UDP or SCTP. * * but I'm pretty sure we should support ICMP too, otherwise * that's a regression from OF1.0. */ if (!(wc & OFPFW11_TP_SRC)) { uint16_t icmp_type = ntohs(ofmatch->tp_src); if (icmp_type < 0x100) { match_set_icmp_type(match, icmp_type); } else { return OFPERR_OFPBMC_BAD_FIELD; } } if (!(wc & OFPFW11_TP_DST)) { uint16_t icmp_code = ntohs(ofmatch->tp_dst); if (icmp_code < 0x100) { match_set_icmp_code(match, icmp_code); } else { return OFPERR_OFPBMC_BAD_FIELD; } } break; case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_SCTP: if (!(wc & (OFPFW11_TP_SRC))) { match_set_tp_src(match, ofmatch->tp_src); } if (!(wc & (OFPFW11_TP_DST))) { match_set_tp_dst(match, ofmatch->tp_dst); } break; default: /* OF1.1 says explicitly to ignore this. */ break; } } if (eth_type_mpls(match->flow.dl_type)) { enum { OFPFW11_MPLS_ALL = OFPFW11_MPLS_LABEL | OFPFW11_MPLS_TC }; if ((wc & OFPFW11_MPLS_ALL) != OFPFW11_MPLS_ALL) { /* MPLS not supported. */ return OFPERR_OFPBMC_BAD_TAG; } } match_set_metadata_masked(match, ofmatch->metadata, ~ofmatch->metadata_mask); return 0; } /* Convert 'match' into the OpenFlow 1.1 match structure 'ofmatch'. */ void ofputil_match_to_ofp11_match(const struct match *match, struct ofp11_match *ofmatch) { uint32_t wc = 0; int i; memset(ofmatch, 0, sizeof *ofmatch); ofmatch->omh.type = htons(OFPMT_STANDARD); ofmatch->omh.length = htons(OFPMT11_STANDARD_LENGTH); if (!match->wc.masks.in_port.ofp_port) { wc |= OFPFW11_IN_PORT; } else { ofmatch->in_port = ofputil_port_to_ofp11(match->flow.in_port.ofp_port); } memcpy(ofmatch->dl_src, match->flow.dl_src, ETH_ADDR_LEN); for (i = 0; i < ETH_ADDR_LEN; i++) { ofmatch->dl_src_mask[i] = ~match->wc.masks.dl_src[i]; } memcpy(ofmatch->dl_dst, match->flow.dl_dst, ETH_ADDR_LEN); for (i = 0; i < ETH_ADDR_LEN; i++) { ofmatch->dl_dst_mask[i] = ~match->wc.masks.dl_dst[i]; } if (match->wc.masks.vlan_tci == htons(0)) { wc |= OFPFW11_DL_VLAN | OFPFW11_DL_VLAN_PCP; } else if (match->wc.masks.vlan_tci & htons(VLAN_CFI) && !(match->flow.vlan_tci & htons(VLAN_CFI))) { ofmatch->dl_vlan = htons(OFPVID11_NONE); wc |= OFPFW11_DL_VLAN_PCP; } else { if (!(match->wc.masks.vlan_tci & htons(VLAN_VID_MASK))) { ofmatch->dl_vlan = htons(OFPVID11_ANY); } else { ofmatch->dl_vlan = htons(vlan_tci_to_vid(match->flow.vlan_tci)); } if (!(match->wc.masks.vlan_tci & htons(VLAN_PCP_MASK))) { wc |= OFPFW11_DL_VLAN_PCP; } else { ofmatch->dl_vlan_pcp = vlan_tci_to_pcp(match->flow.vlan_tci); } } if (!match->wc.masks.dl_type) { wc |= OFPFW11_DL_TYPE; } else { ofmatch->dl_type = ofputil_dl_type_to_openflow(match->flow.dl_type); } if (!(match->wc.masks.nw_tos & IP_DSCP_MASK)) { wc |= OFPFW11_NW_TOS; } else { ofmatch->nw_tos = match->flow.nw_tos & IP_DSCP_MASK; } if (!match->wc.masks.nw_proto) { wc |= OFPFW11_NW_PROTO; } else { ofmatch->nw_proto = match->flow.nw_proto; } ofmatch->nw_src = match->flow.nw_src; ofmatch->nw_src_mask = ~match->wc.masks.nw_src; ofmatch->nw_dst = match->flow.nw_dst; ofmatch->nw_dst_mask = ~match->wc.masks.nw_dst; if (!match->wc.masks.tp_src) { wc |= OFPFW11_TP_SRC; } else { ofmatch->tp_src = match->flow.tp_src; } if (!match->wc.masks.tp_dst) { wc |= OFPFW11_TP_DST; } else { ofmatch->tp_dst = match->flow.tp_dst; } /* MPLS not supported. */ wc |= OFPFW11_MPLS_LABEL; wc |= OFPFW11_MPLS_TC; ofmatch->metadata = match->flow.metadata; ofmatch->metadata_mask = ~match->wc.masks.metadata; ofmatch->wildcards = htonl(wc); } /* Returns the "typical" length of a match for 'protocol', for use in * estimating space to preallocate. */ int ofputil_match_typical_len(enum ofputil_protocol protocol) { switch (protocol) { case OFPUTIL_P_OF10_STD: case OFPUTIL_P_OF10_STD_TID: return sizeof(struct ofp10_match); case OFPUTIL_P_OF10_NXM: case OFPUTIL_P_OF10_NXM_TID: return NXM_TYPICAL_LEN; case OFPUTIL_P_OF11_STD: return sizeof(struct ofp11_match); case OFPUTIL_P_OF12_OXM: case OFPUTIL_P_OF13_OXM: return NXM_TYPICAL_LEN; default: NOT_REACHED(); } } /* Appends to 'b' an struct ofp11_match_header followed by a match that * expresses 'match' properly for 'protocol', plus enough zero bytes to pad the * data appended out to a multiple of 8. 'protocol' must be one that is usable * in OpenFlow 1.1 or later. * * This function can cause 'b''s data to be reallocated. * * Returns the number of bytes appended to 'b', excluding the padding. Never * returns zero. */ int ofputil_put_ofp11_match(struct ofpbuf *b, const struct match *match, enum ofputil_protocol protocol) { switch (protocol) { case OFPUTIL_P_OF10_STD: case OFPUTIL_P_OF10_STD_TID: case OFPUTIL_P_OF10_NXM: case OFPUTIL_P_OF10_NXM_TID: NOT_REACHED(); case OFPUTIL_P_OF11_STD: { struct ofp11_match *om; /* Make sure that no padding is needed. */ BUILD_ASSERT_DECL(sizeof *om % 8 == 0); om = ofpbuf_put_uninit(b, sizeof *om); ofputil_match_to_ofp11_match(match, om); return sizeof *om; } case OFPUTIL_P_OF12_OXM: case OFPUTIL_P_OF13_OXM: return oxm_put_match(b, match); } NOT_REACHED(); } /* Given a 'dl_type' value in the format used in struct flow, returns the * corresponding 'dl_type' value for use in an ofp10_match or ofp11_match * structure. */ ovs_be16 ofputil_dl_type_to_openflow(ovs_be16 flow_dl_type) { return (flow_dl_type == htons(FLOW_DL_TYPE_NONE) ? htons(OFP_DL_TYPE_NOT_ETH_TYPE) : flow_dl_type); } /* Given a 'dl_type' value in the format used in an ofp10_match or ofp11_match * structure, returns the corresponding 'dl_type' value for use in struct * flow. */ ovs_be16 ofputil_dl_type_from_openflow(ovs_be16 ofp_dl_type) { return (ofp_dl_type == htons(OFP_DL_TYPE_NOT_ETH_TYPE) ? htons(FLOW_DL_TYPE_NONE) : ofp_dl_type); } /* Protocols. */ struct proto_abbrev { enum ofputil_protocol protocol; const char *name; }; /* Most users really don't care about some of the differences between * protocols. These abbreviations help with that. */ static const struct proto_abbrev proto_abbrevs[] = { { OFPUTIL_P_ANY, "any" }, { OFPUTIL_P_OF10_STD_ANY, "OpenFlow10" }, { OFPUTIL_P_OF10_NXM_ANY, "NXM" }, { OFPUTIL_P_ANY_OXM, "OXM" }, }; #define N_PROTO_ABBREVS ARRAY_SIZE(proto_abbrevs) enum ofputil_protocol ofputil_flow_dump_protocols[] = { OFPUTIL_P_OF13_OXM, OFPUTIL_P_OF12_OXM, OFPUTIL_P_OF11_STD, OFPUTIL_P_OF10_NXM, OFPUTIL_P_OF10_STD, }; size_t ofputil_n_flow_dump_protocols = ARRAY_SIZE(ofputil_flow_dump_protocols); /* Returns the set of ofputil_protocols that are supported with the given * OpenFlow 'version'. 'version' should normally be an 8-bit OpenFlow version * identifier (e.g. 0x01 for OpenFlow 1.0, 0x02 for OpenFlow 1.1). Returns 0 * if 'version' is not supported or outside the valid range. */ enum ofputil_protocol ofputil_protocols_from_ofp_version(enum ofp_version version) { switch (version) { case OFP10_VERSION: return OFPUTIL_P_OF10_STD_ANY | OFPUTIL_P_OF10_NXM_ANY; case OFP11_VERSION: return OFPUTIL_P_OF11_STD; case OFP12_VERSION: return OFPUTIL_P_OF12_OXM; case OFP13_VERSION: return OFPUTIL_P_OF13_OXM; default: return 0; } } /* Returns the ofputil_protocol that is initially in effect on an OpenFlow * connection that has negotiated the given 'version'. 'version' should * normally be an 8-bit OpenFlow version identifier (e.g. 0x01 for OpenFlow * 1.0, 0x02 for OpenFlow 1.1). Returns 0 if 'version' is not supported or * outside the valid range. */ enum ofputil_protocol ofputil_protocol_from_ofp_version(enum ofp_version version) { return rightmost_1bit(ofputil_protocols_from_ofp_version(version)); } /* Returns the OpenFlow protocol version number (e.g. OFP10_VERSION, * etc.) that corresponds to 'protocol'. */ enum ofp_version ofputil_protocol_to_ofp_version(enum ofputil_protocol protocol) { switch (protocol) { case OFPUTIL_P_OF10_STD: case OFPUTIL_P_OF10_STD_TID: case OFPUTIL_P_OF10_NXM: case OFPUTIL_P_OF10_NXM_TID: return OFP10_VERSION; case OFPUTIL_P_OF11_STD: return OFP11_VERSION; case OFPUTIL_P_OF12_OXM: return OFP12_VERSION; case OFPUTIL_P_OF13_OXM: return OFP13_VERSION; } NOT_REACHED(); } /* Returns a bitmap of OpenFlow versions that are supported by at * least one of the 'protocols'. */ uint32_t ofputil_protocols_to_version_bitmap(enum ofputil_protocol protocols) { uint32_t bitmap = 0; for (; protocols; protocols = zero_rightmost_1bit(protocols)) { enum ofputil_protocol protocol = rightmost_1bit(protocols); bitmap |= 1u << ofputil_protocol_to_ofp_version(protocol); } return bitmap; } /* Returns the set of protocols that are supported on top of the * OpenFlow versions included in 'bitmap'. */ enum ofputil_protocol ofputil_protocols_from_version_bitmap(uint32_t bitmap) { enum ofputil_protocol protocols = 0; for (; bitmap; bitmap = zero_rightmost_1bit(bitmap)) { enum ofp_version version = rightmost_1bit_idx(bitmap); protocols |= ofputil_protocols_from_ofp_version(version); } return protocols; } /* Returns true if 'protocol' is a single OFPUTIL_P_* value, false * otherwise. */ bool ofputil_protocol_is_valid(enum ofputil_protocol protocol) { return protocol & OFPUTIL_P_ANY && is_pow2(protocol); } /* Returns the equivalent of 'protocol' with the Nicira flow_mod_table_id * extension turned on or off if 'enable' is true or false, respectively. * * This extension is only useful for protocols whose "standard" version does * not allow specific tables to be modified. In particular, this is true of * OpenFlow 1.0. In later versions of OpenFlow, a flow_mod request always * specifies a table ID and so there is no need for such an extension. When * 'protocol' is such a protocol that doesn't need a flow_mod_table_id * extension, this function just returns its 'protocol' argument unchanged * regardless of the value of 'enable'. */ enum ofputil_protocol ofputil_protocol_set_tid(enum ofputil_protocol protocol, bool enable) { switch (protocol) { case OFPUTIL_P_OF10_STD: case OFPUTIL_P_OF10_STD_TID: return enable ? OFPUTIL_P_OF10_STD_TID : OFPUTIL_P_OF10_STD; case OFPUTIL_P_OF10_NXM: case OFPUTIL_P_OF10_NXM_TID: return enable ? OFPUTIL_P_OF10_NXM_TID : OFPUTIL_P_OF10_NXM; case OFPUTIL_P_OF11_STD: return OFPUTIL_P_OF11_STD; case OFPUTIL_P_OF12_OXM: return OFPUTIL_P_OF12_OXM; case OFPUTIL_P_OF13_OXM: return OFPUTIL_P_OF13_OXM; default: NOT_REACHED(); } } /* Returns the "base" version of 'protocol'. That is, if 'protocol' includes * some extension to a standard protocol version, the return value is the * standard version of that protocol without any extension. If 'protocol' is a * standard protocol version, returns 'protocol' unchanged. */ enum ofputil_protocol ofputil_protocol_to_base(enum ofputil_protocol protocol) { return ofputil_protocol_set_tid(protocol, false); } /* Returns 'new_base' with any extensions taken from 'cur'. */ enum ofputil_protocol ofputil_protocol_set_base(enum ofputil_protocol cur, enum ofputil_protocol new_base) { bool tid = (cur & OFPUTIL_P_TID) != 0; switch (new_base) { case OFPUTIL_P_OF10_STD: case OFPUTIL_P_OF10_STD_TID: return ofputil_protocol_set_tid(OFPUTIL_P_OF10_STD, tid); case OFPUTIL_P_OF10_NXM: case OFPUTIL_P_OF10_NXM_TID: return ofputil_protocol_set_tid(OFPUTIL_P_OF10_NXM, tid); case OFPUTIL_P_OF11_STD: return ofputil_protocol_set_tid(OFPUTIL_P_OF11_STD, tid); case OFPUTIL_P_OF12_OXM: return ofputil_protocol_set_tid(OFPUTIL_P_OF12_OXM, tid); case OFPUTIL_P_OF13_OXM: return ofputil_protocol_set_tid(OFPUTIL_P_OF13_OXM, tid); default: NOT_REACHED(); } } /* Returns a string form of 'protocol', if a simple form exists (that is, if * 'protocol' is either a single protocol or it is a combination of protocols * that have a single abbreviation). Otherwise, returns NULL. */ const char * ofputil_protocol_to_string(enum ofputil_protocol protocol) { const struct proto_abbrev *p; /* Use a "switch" statement for single-bit names so that we get a compiler * warning if we forget any. */ switch (protocol) { case OFPUTIL_P_OF10_NXM: return "NXM-table_id"; case OFPUTIL_P_OF10_NXM_TID: return "NXM+table_id"; case OFPUTIL_P_OF10_STD: return "OpenFlow10-table_id"; case OFPUTIL_P_OF10_STD_TID: return "OpenFlow10+table_id"; case OFPUTIL_P_OF11_STD: return "OpenFlow11"; case OFPUTIL_P_OF12_OXM: return "OXM-OpenFlow12"; case OFPUTIL_P_OF13_OXM: return "OXM-OpenFlow13"; } /* Check abbreviations. */ for (p = proto_abbrevs; p < &proto_abbrevs[N_PROTO_ABBREVS]; p++) { if (protocol == p->protocol) { return p->name; } } return NULL; } /* Returns a string that represents 'protocols'. The return value might be a * comma-separated list if 'protocols' doesn't have a simple name. The return * value is "none" if 'protocols' is 0. * * The caller must free the returned string (with free()). */ char * ofputil_protocols_to_string(enum ofputil_protocol protocols) { struct ds s; ovs_assert(!(protocols & ~OFPUTIL_P_ANY)); if (protocols == 0) { return xstrdup("none"); } ds_init(&s); while (protocols) { const struct proto_abbrev *p; int i; if (s.length) { ds_put_char(&s, ','); } for (p = proto_abbrevs; p < &proto_abbrevs[N_PROTO_ABBREVS]; p++) { if ((protocols & p->protocol) == p->protocol) { ds_put_cstr(&s, p->name); protocols &= ~p->protocol; goto match; } } for (i = 0; i < CHAR_BIT * sizeof(enum ofputil_protocol); i++) { enum ofputil_protocol bit = 1u << i; if (protocols & bit) { ds_put_cstr(&s, ofputil_protocol_to_string(bit)); protocols &= ~bit; goto match; } } NOT_REACHED(); match: ; } return ds_steal_cstr(&s); } static enum ofputil_protocol ofputil_protocol_from_string__(const char *s, size_t n) { const struct proto_abbrev *p; int i; for (i = 0; i < CHAR_BIT * sizeof(enum ofputil_protocol); i++) { enum ofputil_protocol bit = 1u << i; const char *name = ofputil_protocol_to_string(bit); if (name && n == strlen(name) && !strncasecmp(s, name, n)) { return bit; } } for (p = proto_abbrevs; p < &proto_abbrevs[N_PROTO_ABBREVS]; p++) { if (n == strlen(p->name) && !strncasecmp(s, p->name, n)) { return p->protocol; } } return 0; } /* Returns the nonempty set of protocols represented by 's', which can be a * single protocol name or abbreviation or a comma-separated list of them. * * Aborts the program with an error message if 's' is invalid. */ enum ofputil_protocol ofputil_protocols_from_string(const char *s) { const char *orig_s = s; enum ofputil_protocol protocols; protocols = 0; while (*s) { enum ofputil_protocol p; size_t n; n = strcspn(s, ","); if (n == 0) { s++; continue; } p = ofputil_protocol_from_string__(s, n); if (!p) { ovs_fatal(0, "%.*s: unknown flow protocol", (int) n, s); } protocols |= p; s += n; } if (!protocols) { ovs_fatal(0, "%s: no flow protocol specified", orig_s); } return protocols; } static int ofputil_version_from_string(const char *s) { if (!strcasecmp(s, "OpenFlow10")) { return OFP10_VERSION; } if (!strcasecmp(s, "OpenFlow11")) { return OFP11_VERSION; } if (!strcasecmp(s, "OpenFlow12")) { return OFP12_VERSION; } if (!strcasecmp(s, "OpenFlow13")) { return OFP13_VERSION; } return 0; } static bool is_delimiter(unsigned char c) { return isspace(c) || c == ','; } uint32_t ofputil_versions_from_string(const char *s) { size_t i = 0; uint32_t bitmap = 0; while (s[i]) { size_t j; int version; char *key; if (is_delimiter(s[i])) { i++; continue; } j = 0; while (s[i + j] && !is_delimiter(s[i + j])) { j++; } key = xmemdup0(s + i, j); version = ofputil_version_from_string(key); if (!version) { VLOG_FATAL("Unknown OpenFlow version: \"%s\"", key); } free(key); bitmap |= 1u << version; i += j; } return bitmap; } uint32_t ofputil_versions_from_strings(char ** const s, size_t count) { uint32_t bitmap = 0; while (count--) { int version = ofputil_version_from_string(s[count]); if (!version) { VLOG_WARN("Unknown OpenFlow version: \"%s\"", s[count]); } else { bitmap |= 1u << version; } } return bitmap; } const char * ofputil_version_to_string(enum ofp_version ofp_version) { switch (ofp_version) { case OFP10_VERSION: return "OpenFlow10"; case OFP11_VERSION: return "OpenFlow11"; case OFP12_VERSION: return "OpenFlow12"; case OFP13_VERSION: return "OpenFlow13"; default: NOT_REACHED(); } } bool ofputil_packet_in_format_is_valid(enum nx_packet_in_format packet_in_format) { switch (packet_in_format) { case NXPIF_OPENFLOW10: case NXPIF_NXM: return true; } return false; } const char * ofputil_packet_in_format_to_string(enum nx_packet_in_format packet_in_format) { switch (packet_in_format) { case NXPIF_OPENFLOW10: return "openflow10"; case NXPIF_NXM: return "nxm"; default: NOT_REACHED(); } } int ofputil_packet_in_format_from_string(const char *s) { return (!strcmp(s, "openflow10") ? NXPIF_OPENFLOW10 : !strcmp(s, "nxm") ? NXPIF_NXM : -1); } void ofputil_format_version(struct ds *msg, enum ofp_version version) { ds_put_format(msg, "0x%02x", version); } void ofputil_format_version_name(struct ds *msg, enum ofp_version version) { ds_put_cstr(msg, ofputil_version_to_string(version)); } static void ofputil_format_version_bitmap__(struct ds *msg, uint32_t bitmap, void (*format_version)(struct ds *msg, enum ofp_version)) { while (bitmap) { format_version(msg, raw_ctz(bitmap)); bitmap = zero_rightmost_1bit(bitmap); if (bitmap) { ds_put_cstr(msg, ", "); } } } void ofputil_format_version_bitmap(struct ds *msg, uint32_t bitmap) { ofputil_format_version_bitmap__(msg, bitmap, ofputil_format_version); } void ofputil_format_version_bitmap_names(struct ds *msg, uint32_t bitmap) { ofputil_format_version_bitmap__(msg, bitmap, ofputil_format_version_name); } static bool ofputil_decode_hello_bitmap(const struct ofp_hello_elem_header *oheh, uint32_t *allowed_versionsp) { uint16_t bitmap_len = ntohs(oheh->length) - sizeof *oheh; const ovs_be32 *bitmap = ALIGNED_CAST(const ovs_be32 *, oheh + 1); uint32_t allowed_versions; if (!bitmap_len || bitmap_len % sizeof *bitmap) { return false; } /* Only use the first 32-bit element of the bitmap as that is all the * current implementation supports. Subsequent elements are ignored which * should have no effect on session negotiation until Open vSwtich supports * wire-protocol versions greater than 31. */ allowed_versions = ntohl(bitmap[0]); if (allowed_versions & 1) { /* There's no OpenFlow version 0. */ VLOG_WARN_RL(&bad_ofmsg_rl, "peer claims to support invalid OpenFlow " "version 0x00"); allowed_versions &= ~1u; } if (!allowed_versions) { VLOG_WARN_RL(&bad_ofmsg_rl, "peer does not support any OpenFlow " "version (between 0x01 and 0x1f)"); return false; } *allowed_versionsp = allowed_versions; return true; } static uint32_t version_bitmap_from_version(uint8_t ofp_version) { return ((ofp_version < 32 ? 1u << ofp_version : 0) - 1) << 1; } /* Decodes OpenFlow OFPT_HELLO message 'oh', storing into '*allowed_versions' * the set of OpenFlow versions for which 'oh' announces support. * * Because of how OpenFlow defines OFPT_HELLO messages, this function is always * successful, and thus '*allowed_versions' is always initialized. However, it * returns false if 'oh' contains some data that could not be fully understood, * true if 'oh' was completely parsed. */ bool ofputil_decode_hello(const struct ofp_header *oh, uint32_t *allowed_versions) { struct ofpbuf msg; bool ok = true; ofpbuf_use_const(&msg, oh, ntohs(oh->length)); ofpbuf_pull(&msg, sizeof *oh); *allowed_versions = version_bitmap_from_version(oh->version); while (msg.size) { const struct ofp_hello_elem_header *oheh; unsigned int len; if (msg.size < sizeof *oheh) { return false; } oheh = msg.data; len = ntohs(oheh->length); if (len < sizeof *oheh || !ofpbuf_try_pull(&msg, ROUND_UP(len, 8))) { return false; } if (oheh->type != htons(OFPHET_VERSIONBITMAP) || !ofputil_decode_hello_bitmap(oheh, allowed_versions)) { ok = false; } } return ok; } /* Returns true if 'allowed_versions' needs to be accompanied by a version * bitmap to be correctly expressed in an OFPT_HELLO message. */ static bool should_send_version_bitmap(uint32_t allowed_versions) { return !is_pow2((allowed_versions >> 1) + 1); } /* Create an OFPT_HELLO message that expresses support for the OpenFlow * versions in the 'allowed_versions' bitmaps and returns the message. */ struct ofpbuf * ofputil_encode_hello(uint32_t allowed_versions) { enum ofp_version ofp_version; struct ofpbuf *msg; ofp_version = leftmost_1bit_idx(allowed_versions); msg = ofpraw_alloc(OFPRAW_OFPT_HELLO, ofp_version, 0); if (should_send_version_bitmap(allowed_versions)) { struct ofp_hello_elem_header *oheh; uint16_t map_len; map_len = sizeof allowed_versions; oheh = ofpbuf_put_zeros(msg, ROUND_UP(map_len + sizeof *oheh, 8)); oheh->type = htons(OFPHET_VERSIONBITMAP); oheh->length = htons(map_len + sizeof *oheh); *ALIGNED_CAST(ovs_be32 *, oheh + 1) = htonl(allowed_versions); ofpmsg_update_length(msg); } return msg; } /* Returns an OpenFlow message that, sent on an OpenFlow connection whose * protocol is 'current', at least partly transitions the protocol to 'want'. * Stores in '*next' the protocol that will be in effect on the OpenFlow * connection if the switch processes the returned message correctly. (If * '*next != want' then the caller will have to iterate.) * * If 'current == want', or if it is not possible to transition from 'current' * to 'want' (because, for example, 'current' and 'want' use different OpenFlow * protocol versions), returns NULL and stores 'current' in '*next'. */ struct ofpbuf * ofputil_encode_set_protocol(enum ofputil_protocol current, enum ofputil_protocol want, enum ofputil_protocol *next) { enum ofp_version cur_version, want_version; enum ofputil_protocol cur_base, want_base; bool cur_tid, want_tid; cur_version = ofputil_protocol_to_ofp_version(current); want_version = ofputil_protocol_to_ofp_version(want); if (cur_version != want_version) { *next = current; return NULL; } cur_base = ofputil_protocol_to_base(current); want_base = ofputil_protocol_to_base(want); if (cur_base != want_base) { *next = ofputil_protocol_set_base(current, want_base); switch (want_base) { case OFPUTIL_P_OF10_NXM: return ofputil_encode_nx_set_flow_format(NXFF_NXM); case OFPUTIL_P_OF10_STD: return ofputil_encode_nx_set_flow_format(NXFF_OPENFLOW10); case OFPUTIL_P_OF11_STD: case OFPUTIL_P_OF12_OXM: case OFPUTIL_P_OF13_OXM: /* There is only one variant of each OpenFlow 1.1+ protocol, and we * verified above that we're not trying to change versions. */ NOT_REACHED(); case OFPUTIL_P_OF10_STD_TID: case OFPUTIL_P_OF10_NXM_TID: NOT_REACHED(); } } cur_tid = (current & OFPUTIL_P_TID) != 0; want_tid = (want & OFPUTIL_P_TID) != 0; if (cur_tid != want_tid) { *next = ofputil_protocol_set_tid(current, want_tid); return ofputil_make_flow_mod_table_id(want_tid); } ovs_assert(current == want); *next = current; return NULL; } /* Returns an NXT_SET_FLOW_FORMAT message that can be used to set the flow * format to 'nxff'. */ struct ofpbuf * ofputil_encode_nx_set_flow_format(enum nx_flow_format nxff) { struct nx_set_flow_format *sff; struct ofpbuf *msg; ovs_assert(ofputil_nx_flow_format_is_valid(nxff)); msg = ofpraw_alloc(OFPRAW_NXT_SET_FLOW_FORMAT, OFP10_VERSION, 0); sff = ofpbuf_put_zeros(msg, sizeof *sff); sff->format = htonl(nxff); return msg; } /* Returns the base protocol if 'flow_format' is a valid NXFF_* value, false * otherwise. */ enum ofputil_protocol ofputil_nx_flow_format_to_protocol(enum nx_flow_format flow_format) { switch (flow_format) { case NXFF_OPENFLOW10: return OFPUTIL_P_OF10_STD; case NXFF_NXM: return OFPUTIL_P_OF10_NXM; default: return 0; } } /* Returns true if 'flow_format' is a valid NXFF_* value, false otherwise. */ bool ofputil_nx_flow_format_is_valid(enum nx_flow_format flow_format) { return ofputil_nx_flow_format_to_protocol(flow_format) != 0; } /* Returns a string version of 'flow_format', which must be a valid NXFF_* * value. */ const char * ofputil_nx_flow_format_to_string(enum nx_flow_format flow_format) { switch (flow_format) { case NXFF_OPENFLOW10: return "openflow10"; case NXFF_NXM: return "nxm"; default: NOT_REACHED(); } } struct ofpbuf * ofputil_make_set_packet_in_format(enum ofp_version ofp_version, enum nx_packet_in_format packet_in_format) { struct nx_set_packet_in_format *spif; struct ofpbuf *msg; msg = ofpraw_alloc(OFPRAW_NXT_SET_PACKET_IN_FORMAT, ofp_version, 0); spif = ofpbuf_put_zeros(msg, sizeof *spif); spif->format = htonl(packet_in_format); return msg; } /* Returns an OpenFlow message that can be used to turn the flow_mod_table_id * extension on or off (according to 'flow_mod_table_id'). */ struct ofpbuf * ofputil_make_flow_mod_table_id(bool flow_mod_table_id) { struct nx_flow_mod_table_id *nfmti; struct ofpbuf *msg; msg = ofpraw_alloc(OFPRAW_NXT_FLOW_MOD_TABLE_ID, OFP10_VERSION, 0); nfmti = ofpbuf_put_zeros(msg, sizeof *nfmti); nfmti->set = flow_mod_table_id; return msg; } struct ofputil_flow_mod_flag { uint16_t raw_flag; enum ofp_version min_version, max_version; enum ofputil_flow_mod_flags flag; }; static const struct ofputil_flow_mod_flag ofputil_flow_mod_flags[] = { { OFPFF_SEND_FLOW_REM, OFP10_VERSION, 0, OFPUTIL_FF_SEND_FLOW_REM }, { OFPFF_CHECK_OVERLAP, OFP10_VERSION, 0, OFPUTIL_FF_CHECK_OVERLAP }, { OFPFF10_EMERG, OFP10_VERSION, OFP10_VERSION, OFPUTIL_FF_EMERG }, { OFPFF12_RESET_COUNTS, OFP12_VERSION, 0, OFPUTIL_FF_RESET_COUNTS }, { OFPFF13_NO_PKT_COUNTS, OFP13_VERSION, 0, OFPUTIL_FF_NO_PKT_COUNTS }, { OFPFF13_NO_BYT_COUNTS, OFP13_VERSION, 0, OFPUTIL_FF_NO_BYT_COUNTS }, { 0, 0, 0, 0 }, }; static enum ofperr ofputil_decode_flow_mod_flags(ovs_be16 raw_flags_, enum ofp_flow_mod_command command, enum ofp_version version, enum ofputil_flow_mod_flags *flagsp) { uint16_t raw_flags = ntohs(raw_flags_); const struct ofputil_flow_mod_flag *f; *flagsp = 0; for (f = ofputil_flow_mod_flags; f->raw_flag; f++) { if (raw_flags & f->raw_flag && version >= f->min_version && (!f->max_version || version <= f->max_version)) { raw_flags &= ~f->raw_flag; *flagsp |= f->flag; } } /* In OF1.0 and OF1.1, "add" always resets counters, and other commands * never do. * * In OF1.2 and later, OFPFF12_RESET_COUNTS controls whether each command * resets counters. */ if ((version == OFP10_VERSION || version == OFP11_VERSION) && command == OFPFC_ADD) { *flagsp |= OFPUTIL_FF_RESET_COUNTS; } return raw_flags ? OFPERR_OFPFMFC_BAD_FLAGS : 0; } static ovs_be16 ofputil_encode_flow_mod_flags(enum ofputil_flow_mod_flags flags, enum ofp_version version) { const struct ofputil_flow_mod_flag *f; uint16_t raw_flags; raw_flags = 0; for (f = ofputil_flow_mod_flags; f->raw_flag; f++) { if (f->flag & flags && version >= f->min_version && (!f->max_version || version <= f->max_version)) { raw_flags |= f->raw_flag; } } return htons(raw_flags); } /* Converts an OFPT_FLOW_MOD or NXT_FLOW_MOD message 'oh' into an abstract * flow_mod in 'fm'. Returns 0 if successful, otherwise an OpenFlow error * code. * * Uses 'ofpacts' to store the abstract OFPACT_* version of 'oh''s actions. * The caller must initialize 'ofpacts' and retains ownership of it. * 'fm->ofpacts' will point into the 'ofpacts' buffer. * * Does not validate the flow_mod actions. The caller should do that, with * ofpacts_check(). */ enum ofperr ofputil_decode_flow_mod(struct ofputil_flow_mod *fm, const struct ofp_header *oh, enum ofputil_protocol protocol, struct ofpbuf *ofpacts) { ovs_be16 raw_flags; enum ofperr error; struct ofpbuf b; enum ofpraw raw; ofpbuf_use_const(&b, oh, ntohs(oh->length)); raw = ofpraw_pull_assert(&b); if (raw == OFPRAW_OFPT11_FLOW_MOD) { /* Standard OpenFlow 1.1+ flow_mod. */ const struct ofp11_flow_mod *ofm; ofm = ofpbuf_pull(&b, sizeof *ofm); error = ofputil_pull_ofp11_match(&b, &fm->match, NULL); if (error) { return error; } error = ofpacts_pull_openflow11_instructions(&b, b.size, ofpacts); if (error) { return error; } /* Translate the message. */ fm->priority = ntohs(ofm->priority); if (ofm->command == OFPFC_ADD || (oh->version == OFP11_VERSION && (ofm->command == OFPFC_MODIFY || ofm->command == OFPFC_MODIFY_STRICT) && ofm->cookie_mask == htonll(0))) { /* In OpenFlow 1.1 only, a "modify" or "modify-strict" that does * not match on the cookie is treated as an "add" if there is no * match. */ fm->cookie = htonll(0); fm->cookie_mask = htonll(0); fm->new_cookie = ofm->cookie; } else { fm->cookie = ofm->cookie; fm->cookie_mask = ofm->cookie_mask; fm->new_cookie = htonll(UINT64_MAX); } fm->modify_cookie = false; fm->command = ofm->command; fm->table_id = ofm->table_id; fm->idle_timeout = ntohs(ofm->idle_timeout); fm->hard_timeout = ntohs(ofm->hard_timeout); fm->buffer_id = ntohl(ofm->buffer_id); error = ofputil_port_from_ofp11(ofm->out_port, &fm->out_port); if (error) { return error; } if ((ofm->command == OFPFC_DELETE || ofm->command == OFPFC_DELETE_STRICT) && ofm->out_group != htonl(OFPG_ANY)) { return OFPERR_OFPFMFC_UNKNOWN; } raw_flags = ofm->flags; } else { uint16_t command; if (raw == OFPRAW_OFPT10_FLOW_MOD) { /* Standard OpenFlow 1.0 flow_mod. */ const struct ofp10_flow_mod *ofm; /* Get the ofp10_flow_mod. */ ofm = ofpbuf_pull(&b, sizeof *ofm); /* Translate the rule. */ ofputil_match_from_ofp10_match(&ofm->match, &fm->match); ofputil_normalize_match(&fm->match); /* Now get the actions. */ error = ofpacts_pull_openflow10(&b, b.size, ofpacts); if (error) { return error; } /* OpenFlow 1.0 says that exact-match rules have to have the * highest possible priority. */ fm->priority = (ofm->match.wildcards & htonl(OFPFW10_ALL) ? ntohs(ofm->priority) : UINT16_MAX); /* Translate the message. */ command = ntohs(ofm->command); fm->cookie = htonll(0); fm->cookie_mask = htonll(0); fm->new_cookie = ofm->cookie; fm->idle_timeout = ntohs(ofm->idle_timeout); fm->hard_timeout = ntohs(ofm->hard_timeout); fm->buffer_id = ntohl(ofm->buffer_id); fm->out_port = u16_to_ofp(ntohs(ofm->out_port)); raw_flags = ofm->flags; } else if (raw == OFPRAW_NXT_FLOW_MOD) { /* Nicira extended flow_mod. */ const struct nx_flow_mod *nfm; /* Dissect the message. */ nfm = ofpbuf_pull(&b, sizeof *nfm); error = nx_pull_match(&b, ntohs(nfm->match_len), &fm->match, &fm->cookie, &fm->cookie_mask); if (error) { return error; } error = ofpacts_pull_openflow10(&b, b.size, ofpacts); if (error) { return error; } /* Translate the message. */ command = ntohs(nfm->command); if ((command & 0xff) == OFPFC_ADD && fm->cookie_mask) { /* Flow additions may only set a new cookie, not match an * existing cookie. */ return OFPERR_NXBRC_NXM_INVALID; } fm->priority = ntohs(nfm->priority); fm->new_cookie = nfm->cookie; fm->idle_timeout = ntohs(nfm->idle_timeout); fm->hard_timeout = ntohs(nfm->hard_timeout); fm->buffer_id = ntohl(nfm->buffer_id); fm->out_port = u16_to_ofp(ntohs(nfm->out_port)); raw_flags = nfm->flags; } else { NOT_REACHED(); } fm->modify_cookie = fm->new_cookie != htonll(UINT64_MAX); if (protocol & OFPUTIL_P_TID) { fm->command = command & 0xff; fm->table_id = command >> 8; } else { fm->command = command; fm->table_id = 0xff; } } fm->ofpacts = ofpacts->data; fm->ofpacts_len = ofpacts->size; error = ofputil_decode_flow_mod_flags(raw_flags, fm->command, oh->version, &fm->flags); if (error) { return error; } if (fm->flags & OFPUTIL_FF_EMERG) { /* We do not support the OpenFlow 1.0 emergency flow cache, which * is not required in OpenFlow 1.0.1 and removed from OpenFlow 1.1. * * OpenFlow 1.0 specifies the error code to use when idle_timeout * or hard_timeout is nonzero. Otherwise, there is no good error * code, so just state that the flow table is full. */ return (fm->hard_timeout || fm->idle_timeout ? OFPERR_OFPFMFC_BAD_EMERG_TIMEOUT : OFPERR_OFPFMFC_TABLE_FULL); } return 0; } static enum ofperr ofputil_pull_bands(struct ofpbuf *msg, size_t len, uint16_t *n_bands, struct ofpbuf *bands) { const struct ofp13_meter_band_header *ombh; struct ofputil_meter_band *mb; uint16_t n = 0; ombh = ofpbuf_try_pull(msg, len); if (!ombh) { return OFPERR_OFPBRC_BAD_LEN; } while (len >= sizeof (struct ofp13_meter_band_drop)) { size_t ombh_len = ntohs(ombh->len); /* All supported band types have the same length. */ if (ombh_len != sizeof (struct ofp13_meter_band_drop)) { return OFPERR_OFPBRC_BAD_LEN; } mb = ofpbuf_put_uninit(bands, sizeof *mb); mb->type = ntohs(ombh->type); mb->rate = ntohl(ombh->rate); mb->burst_size = ntohl(ombh->burst_size); mb->prec_level = (mb->type == OFPMBT13_DSCP_REMARK) ? ((struct ofp13_meter_band_dscp_remark *)ombh)->prec_level : 0; n++; len -= ombh_len; ombh = ALIGNED_CAST(struct ofp13_meter_band_header *, (char *) ombh + ombh_len); } if (len) { return OFPERR_OFPBRC_BAD_LEN; } *n_bands = n; return 0; } enum ofperr ofputil_decode_meter_mod(const struct ofp_header *oh, struct ofputil_meter_mod *mm, struct ofpbuf *bands) { const struct ofp13_meter_mod *omm; struct ofpbuf b; ofpbuf_use_const(&b, oh, ntohs(oh->length)); ofpraw_pull_assert(&b); omm = ofpbuf_pull(&b, sizeof *omm); /* Translate the message. */ mm->command = ntohs(omm->command); mm->meter.meter_id = ntohl(omm->meter_id); if (mm->command == OFPMC13_DELETE) { mm->meter.flags = 0; mm->meter.n_bands = 0; mm->meter.bands = NULL; } else { enum ofperr error; mm->meter.flags = ntohs(omm->flags); mm->meter.bands = bands->data; error = ofputil_pull_bands(&b, b.size, &mm->meter.n_bands, bands); if (error) { return error; } } return 0; } void ofputil_decode_meter_request(const struct ofp_header *oh, uint32_t *meter_id) { const struct ofp13_meter_multipart_request *omr = ofpmsg_body(oh); *meter_id = ntohl(omr->meter_id); } struct ofpbuf * ofputil_encode_meter_request(enum ofp_version ofp_version, enum ofputil_meter_request_type type, uint32_t meter_id) { struct ofpbuf *msg; enum ofpraw raw; switch (type) { case OFPUTIL_METER_CONFIG: raw = OFPRAW_OFPST13_METER_CONFIG_REQUEST; break; case OFPUTIL_METER_STATS: raw = OFPRAW_OFPST13_METER_REQUEST; break; default: case OFPUTIL_METER_FEATURES: raw = OFPRAW_OFPST13_METER_FEATURES_REQUEST; break; } msg = ofpraw_alloc(raw, ofp_version, 0); if (type != OFPUTIL_METER_FEATURES) { struct ofp13_meter_multipart_request *omr; omr = ofpbuf_put_zeros(msg, sizeof *omr); omr->meter_id = htonl(meter_id); } return msg; } static void ofputil_put_bands(uint16_t n_bands, const struct ofputil_meter_band *mb, struct ofpbuf *msg) { uint16_t n = 0; for (n = 0; n < n_bands; ++n) { /* Currently all band types have same size. */ struct ofp13_meter_band_dscp_remark *ombh; size_t ombh_len = sizeof *ombh; ombh = ofpbuf_put_zeros(msg, ombh_len); ombh->type = htons(mb->type); ombh->len = htons(ombh_len); ombh->rate = htonl(mb->rate); ombh->burst_size = htonl(mb->burst_size); ombh->prec_level = mb->prec_level; mb++; } } /* Encode a meter stat for 'mc' and append it to 'replies'. */ void ofputil_append_meter_config(struct list *replies, const struct ofputil_meter_config *mc) { struct ofpbuf *msg = ofpbuf_from_list(list_back(replies)); size_t start_ofs = msg->size; struct ofp13_meter_config *reply = ofpbuf_put_uninit(msg, sizeof *reply); reply->flags = htons(mc->flags); reply->meter_id = htonl(mc->meter_id); ofputil_put_bands(mc->n_bands, mc->bands, msg); reply->length = htons(msg->size - start_ofs); ofpmp_postappend(replies, start_ofs); } /* Encode a meter stat for 'ms' and append it to 'replies'. */ void ofputil_append_meter_stats(struct list *replies, const struct ofputil_meter_stats *ms) { struct ofp13_meter_stats *reply; uint16_t n = 0; uint16_t len; len = sizeof *reply + ms->n_bands * sizeof(struct ofp13_meter_band_stats); reply = ofpmp_append(replies, len); reply->meter_id = htonl(ms->meter_id); reply->len = htons(len); memset(reply->pad, 0, sizeof reply->pad); reply->flow_count = htonl(ms->flow_count); reply->packet_in_count = htonll(ms->packet_in_count); reply->byte_in_count = htonll(ms->byte_in_count); reply->duration_sec = htonl(ms->duration_sec); reply->duration_nsec = htonl(ms->duration_nsec); for (n = 0; n < ms->n_bands; ++n) { const struct ofputil_meter_band_stats *src = &ms->bands[n]; struct ofp13_meter_band_stats *dst = &reply->band_stats[n]; dst->packet_band_count = htonll(src->packet_count); dst->byte_band_count = htonll(src->byte_count); } } /* Converts an OFPMP_METER_CONFIG reply in 'msg' into an abstract * ofputil_meter_config in 'mc', with mc->bands pointing to bands decoded into * 'bands'. The caller must have initialized 'bands' and retains ownership of * it across the call. * * Multiple OFPST13_METER_CONFIG replies can be packed into a single OpenFlow * message. Calling this function multiple times for a single 'msg' iterates * through the replies. 'bands' is cleared for each reply. * * Returns 0 if successful, EOF if no replies were left in this 'msg', * otherwise a positive errno value. */ int ofputil_decode_meter_config(struct ofpbuf *msg, struct ofputil_meter_config *mc, struct ofpbuf *bands) { const struct ofp13_meter_config *omc; enum ofperr err; /* Pull OpenFlow headers for the first call. */ if (!msg->l2) { ofpraw_pull_assert(msg); } if (!msg->size) { return EOF; } omc = ofpbuf_try_pull(msg, sizeof *omc); if (!omc) { VLOG_WARN_RL(&bad_ofmsg_rl, "OFPMP_METER_CONFIG reply has %zu leftover bytes at end", msg->size); return OFPERR_OFPBRC_BAD_LEN; } ofpbuf_clear(bands); err = ofputil_pull_bands(msg, ntohs(omc->length) - sizeof *omc, &mc->n_bands, bands); if (err) { return err; } mc->meter_id = ntohl(omc->meter_id); mc->flags = ntohs(omc->flags); mc->bands = bands->data; return 0; } static enum ofperr ofputil_pull_band_stats(struct ofpbuf *msg, size_t len, uint16_t *n_bands, struct ofpbuf *bands) { const struct ofp13_meter_band_stats *ombs; struct ofputil_meter_band_stats *mbs; uint16_t n, i; ombs = ofpbuf_try_pull(msg, len); if (!ombs) { return OFPERR_OFPBRC_BAD_LEN; } n = len / sizeof *ombs; if (len != n * sizeof *ombs) { return OFPERR_OFPBRC_BAD_LEN; } mbs = ofpbuf_put_uninit(bands, len); for (i = 0; i < n; ++i) { mbs[i].packet_count = ntohll(ombs[i].packet_band_count); mbs[i].byte_count = ntohll(ombs[i].byte_band_count); } *n_bands = n; return 0; } /* Converts an OFPMP_METER reply in 'msg' into an abstract * ofputil_meter_stats in 'ms', with ms->bands pointing to band stats * decoded into 'bands'. * * Multiple OFPMP_METER replies can be packed into a single OpenFlow * message. Calling this function multiple times for a single 'msg' iterates * through the replies. 'bands' is cleared for each reply. * * Returns 0 if successful, EOF if no replies were left in this 'msg', * otherwise a positive errno value. */ int ofputil_decode_meter_stats(struct ofpbuf *msg, struct ofputil_meter_stats *ms, struct ofpbuf *bands) { const struct ofp13_meter_stats *oms; enum ofperr err; /* Pull OpenFlow headers for the first call. */ if (!msg->l2) { ofpraw_pull_assert(msg); } if (!msg->size) { return EOF; } oms = ofpbuf_try_pull(msg, sizeof *oms); if (!oms) { VLOG_WARN_RL(&bad_ofmsg_rl, "OFPMP_METER reply has %zu leftover bytes at end", msg->size); return OFPERR_OFPBRC_BAD_LEN; } ofpbuf_clear(bands); err = ofputil_pull_band_stats(msg, ntohs(oms->len) - sizeof *oms, &ms->n_bands, bands); if (err) { return err; } ms->meter_id = ntohl(oms->meter_id); ms->flow_count = ntohl(oms->flow_count); ms->packet_in_count = ntohll(oms->packet_in_count); ms->byte_in_count = ntohll(oms->byte_in_count); ms->duration_sec = ntohl(oms->duration_sec); ms->duration_nsec = ntohl(oms->duration_nsec); ms->bands = bands->data; return 0; } void ofputil_decode_meter_features(const struct ofp_header *oh, struct ofputil_meter_features *mf) { const struct ofp13_meter_features *omf = ofpmsg_body(oh); mf->max_meters = ntohl(omf->max_meter); mf->band_types = ntohl(omf->band_types); mf->capabilities = ntohl(omf->capabilities); mf->max_bands = omf->max_bands; mf->max_color = omf->max_color; } struct ofpbuf * ofputil_encode_meter_features_reply(const struct ofputil_meter_features *mf, const struct ofp_header *request) { struct ofpbuf *reply; struct ofp13_meter_features *omf; reply = ofpraw_alloc_stats_reply(request, 0); omf = ofpbuf_put_zeros(reply, sizeof *omf); omf->max_meter = htonl(mf->max_meters); omf->band_types = htonl(mf->band_types); omf->capabilities = htonl(mf->capabilities); omf->max_bands = mf->max_bands; omf->max_color = mf->max_color; return reply; } struct ofpbuf * ofputil_encode_meter_mod(enum ofp_version ofp_version, const struct ofputil_meter_mod *mm) { struct ofpbuf *msg; struct ofp13_meter_mod *omm; msg = ofpraw_alloc(OFPRAW_OFPT13_METER_MOD, ofp_version, NXM_TYPICAL_LEN + mm->meter.n_bands * 16); omm = ofpbuf_put_zeros(msg, sizeof *omm); omm->command = htons(mm->command); if (mm->command != OFPMC13_DELETE) { omm->flags = htons(mm->meter.flags); } omm->meter_id = htonl(mm->meter.meter_id); ofputil_put_bands(mm->meter.n_bands, mm->meter.bands, msg); ofpmsg_update_length(msg); return msg; } static ovs_be16 ofputil_tid_command(const struct ofputil_flow_mod *fm, enum ofputil_protocol protocol) { return htons(protocol & OFPUTIL_P_TID ? (fm->command & 0xff) | (fm->table_id << 8) : fm->command); } /* Converts 'fm' into an OFPT_FLOW_MOD or NXT_FLOW_MOD message according to * 'protocol' and returns the message. */ struct ofpbuf * ofputil_encode_flow_mod(const struct ofputil_flow_mod *fm, enum ofputil_protocol protocol) { enum ofp_version version = ofputil_protocol_to_ofp_version(protocol); ovs_be16 raw_flags = ofputil_encode_flow_mod_flags(fm->flags, version); struct ofpbuf *msg; switch (protocol) { case OFPUTIL_P_OF11_STD: case OFPUTIL_P_OF12_OXM: case OFPUTIL_P_OF13_OXM: { struct ofp11_flow_mod *ofm; int tailroom; tailroom = ofputil_match_typical_len(protocol) + fm->ofpacts_len; msg = ofpraw_alloc(OFPRAW_OFPT11_FLOW_MOD, version, tailroom); ofm = ofpbuf_put_zeros(msg, sizeof *ofm); if ((protocol == OFPUTIL_P_OF11_STD && (fm->command == OFPFC_MODIFY || fm->command == OFPFC_MODIFY_STRICT) && fm->cookie_mask == htonll(0)) || fm->command == OFPFC_ADD) { ofm->cookie = fm->new_cookie; } else { ofm->cookie = fm->cookie; } ofm->cookie_mask = fm->cookie_mask; ofm->table_id = fm->table_id; ofm->command = fm->command; ofm->idle_timeout = htons(fm->idle_timeout); ofm->hard_timeout = htons(fm->hard_timeout); ofm->priority = htons(fm->priority); ofm->buffer_id = htonl(fm->buffer_id); ofm->out_port = ofputil_port_to_ofp11(fm->out_port); ofm->out_group = htonl(OFPG11_ANY); ofm->flags = raw_flags; ofputil_put_ofp11_match(msg, &fm->match, protocol); ofpacts_put_openflow11_instructions(fm->ofpacts, fm->ofpacts_len, msg); break; } case OFPUTIL_P_OF10_STD: case OFPUTIL_P_OF10_STD_TID: { struct ofp10_flow_mod *ofm; msg = ofpraw_alloc(OFPRAW_OFPT10_FLOW_MOD, OFP10_VERSION, fm->ofpacts_len); ofm = ofpbuf_put_zeros(msg, sizeof *ofm); ofputil_match_to_ofp10_match(&fm->match, &ofm->match); ofm->cookie = fm->new_cookie; ofm->command = ofputil_tid_command(fm, protocol); ofm->idle_timeout = htons(fm->idle_timeout); ofm->hard_timeout = htons(fm->hard_timeout); ofm->priority = htons(fm->priority); ofm->buffer_id = htonl(fm->buffer_id); ofm->out_port = htons(ofp_to_u16(fm->out_port)); ofm->flags = raw_flags; ofpacts_put_openflow10(fm->ofpacts, fm->ofpacts_len, msg); break; } case OFPUTIL_P_OF10_NXM: case OFPUTIL_P_OF10_NXM_TID: { struct nx_flow_mod *nfm; int match_len; msg = ofpraw_alloc(OFPRAW_NXT_FLOW_MOD, OFP10_VERSION, NXM_TYPICAL_LEN + fm->ofpacts_len); nfm = ofpbuf_put_zeros(msg, sizeof *nfm); nfm->command = ofputil_tid_command(fm, protocol); nfm->cookie = fm->new_cookie; match_len = nx_put_match(msg, &fm->match, fm->cookie, fm->cookie_mask); nfm = msg->l3; nfm->idle_timeout = htons(fm->idle_timeout); nfm->hard_timeout = htons(fm->hard_timeout); nfm->priority = htons(fm->priority); nfm->buffer_id = htonl(fm->buffer_id); nfm->out_port = htons(ofp_to_u16(fm->out_port)); nfm->flags = raw_flags; nfm->match_len = htons(match_len); ofpacts_put_openflow10(fm->ofpacts, fm->ofpacts_len, msg); break; } default: NOT_REACHED(); } ofpmsg_update_length(msg); return msg; } static enum ofperr ofputil_decode_ofpst10_flow_request(struct ofputil_flow_stats_request *fsr, const struct ofp10_flow_stats_request *ofsr, bool aggregate) { fsr->aggregate = aggregate; ofputil_match_from_ofp10_match(&ofsr->match, &fsr->match); fsr->out_port = u16_to_ofp(ntohs(ofsr->out_port)); fsr->table_id = ofsr->table_id; fsr->cookie = fsr->cookie_mask = htonll(0); return 0; } static enum ofperr ofputil_decode_ofpst11_flow_request(struct ofputil_flow_stats_request *fsr, struct ofpbuf *b, bool aggregate) { const struct ofp11_flow_stats_request *ofsr; enum ofperr error; ofsr = ofpbuf_pull(b, sizeof *ofsr); fsr->aggregate = aggregate; fsr->table_id = ofsr->table_id; error = ofputil_port_from_ofp11(ofsr->out_port, &fsr->out_port); if (error) { return error; } if (ofsr->out_group != htonl(OFPG11_ANY)) { return OFPERR_OFPFMFC_UNKNOWN; } fsr->cookie = ofsr->cookie; fsr->cookie_mask = ofsr->cookie_mask; error = ofputil_pull_ofp11_match(b, &fsr->match, NULL); if (error) { return error; } return 0; } static enum ofperr ofputil_decode_nxst_flow_request(struct ofputil_flow_stats_request *fsr, struct ofpbuf *b, bool aggregate) { const struct nx_flow_stats_request *nfsr; enum ofperr error; nfsr = ofpbuf_pull(b, sizeof *nfsr); error = nx_pull_match(b, ntohs(nfsr->match_len), &fsr->match, &fsr->cookie, &fsr->cookie_mask); if (error) { return error; } if (b->size) { return OFPERR_OFPBRC_BAD_LEN; } fsr->aggregate = aggregate; fsr->out_port = u16_to_ofp(ntohs(nfsr->out_port)); fsr->table_id = nfsr->table_id; return 0; } /* Converts an OFPST_FLOW, OFPST_AGGREGATE, NXST_FLOW, or NXST_AGGREGATE * request 'oh', into an abstract flow_stats_request in 'fsr'. Returns 0 if * successful, otherwise an OpenFlow error code. */ enum ofperr ofputil_decode_flow_stats_request(struct ofputil_flow_stats_request *fsr, const struct ofp_header *oh) { enum ofpraw raw; struct ofpbuf b; ofpbuf_use_const(&b, oh, ntohs(oh->length)); raw = ofpraw_pull_assert(&b); switch ((int) raw) { case OFPRAW_OFPST10_FLOW_REQUEST: return ofputil_decode_ofpst10_flow_request(fsr, b.data, false); case OFPRAW_OFPST10_AGGREGATE_REQUEST: return ofputil_decode_ofpst10_flow_request(fsr, b.data, true); case OFPRAW_OFPST11_FLOW_REQUEST: return ofputil_decode_ofpst11_flow_request(fsr, &b, false); case OFPRAW_OFPST11_AGGREGATE_REQUEST: return ofputil_decode_ofpst11_flow_request(fsr, &b, true); case OFPRAW_NXST_FLOW_REQUEST: return ofputil_decode_nxst_flow_request(fsr, &b, false); case OFPRAW_NXST_AGGREGATE_REQUEST: return ofputil_decode_nxst_flow_request(fsr, &b, true); default: /* Hey, the caller lied. */ NOT_REACHED(); } } /* Converts abstract flow_stats_request 'fsr' into an OFPST_FLOW, * OFPST_AGGREGATE, NXST_FLOW, or NXST_AGGREGATE request 'oh' according to * 'protocol', and returns the message. */ struct ofpbuf * ofputil_encode_flow_stats_request(const struct ofputil_flow_stats_request *fsr, enum ofputil_protocol protocol) { struct ofpbuf *msg; enum ofpraw raw; switch (protocol) { case OFPUTIL_P_OF11_STD: case OFPUTIL_P_OF12_OXM: case OFPUTIL_P_OF13_OXM: { struct ofp11_flow_stats_request *ofsr; raw = (fsr->aggregate ? OFPRAW_OFPST11_AGGREGATE_REQUEST : OFPRAW_OFPST11_FLOW_REQUEST); msg = ofpraw_alloc(raw, ofputil_protocol_to_ofp_version(protocol), ofputil_match_typical_len(protocol)); ofsr = ofpbuf_put_zeros(msg, sizeof *ofsr); ofsr->table_id = fsr->table_id; ofsr->out_port = ofputil_port_to_ofp11(fsr->out_port); ofsr->out_group = htonl(OFPG11_ANY); ofsr->cookie = fsr->cookie; ofsr->cookie_mask = fsr->cookie_mask; ofputil_put_ofp11_match(msg, &fsr->match, protocol); break; } case OFPUTIL_P_OF10_STD: case OFPUTIL_P_OF10_STD_TID: { struct ofp10_flow_stats_request *ofsr; raw = (fsr->aggregate ? OFPRAW_OFPST10_AGGREGATE_REQUEST : OFPRAW_OFPST10_FLOW_REQUEST); msg = ofpraw_alloc(raw, OFP10_VERSION, 0); ofsr = ofpbuf_put_zeros(msg, sizeof *ofsr); ofputil_match_to_ofp10_match(&fsr->match, &ofsr->match); ofsr->table_id = fsr->table_id; ofsr->out_port = htons(ofp_to_u16(fsr->out_port)); break; } case OFPUTIL_P_OF10_NXM: case OFPUTIL_P_OF10_NXM_TID: { struct nx_flow_stats_request *nfsr; int match_len; raw = (fsr->aggregate ? OFPRAW_NXST_AGGREGATE_REQUEST : OFPRAW_NXST_FLOW_REQUEST); msg = ofpraw_alloc(raw, OFP10_VERSION, NXM_TYPICAL_LEN); ofpbuf_put_zeros(msg, sizeof *nfsr); match_len = nx_put_match(msg, &fsr->match, fsr->cookie, fsr->cookie_mask); nfsr = msg->l3; nfsr->out_port = htons(ofp_to_u16(fsr->out_port)); nfsr->match_len = htons(match_len); nfsr->table_id = fsr->table_id; break; } default: NOT_REACHED(); } return msg; } /* Converts an OFPST_FLOW or NXST_FLOW reply in 'msg' into an abstract * ofputil_flow_stats in 'fs'. * * Multiple OFPST_FLOW or NXST_FLOW replies can be packed into a single * OpenFlow message. Calling this function multiple times for a single 'msg' * iterates through the replies. The caller must initially leave 'msg''s layer * pointers null and not modify them between calls. * * Most switches don't send the values needed to populate fs->idle_age and * fs->hard_age, so those members will usually be set to 0. If the switch from * which 'msg' originated is known to implement NXT_FLOW_AGE, then pass * 'flow_age_extension' as true so that the contents of 'msg' determine the * 'idle_age' and 'hard_age' members in 'fs'. * * Uses 'ofpacts' to store the abstract OFPACT_* version of the flow stats * reply's actions. The caller must initialize 'ofpacts' and retains ownership * of it. 'fs->ofpacts' will point into the 'ofpacts' buffer. * * Returns 0 if successful, EOF if no replies were left in this 'msg', * otherwise a positive errno value. */ int ofputil_decode_flow_stats_reply(struct ofputil_flow_stats *fs, struct ofpbuf *msg, bool flow_age_extension, struct ofpbuf *ofpacts) { const struct ofp_header *oh; enum ofperr error; enum ofpraw raw; error = (msg->l2 ? ofpraw_decode(&raw, msg->l2) : ofpraw_pull(&raw, msg)); if (error) { return error; } oh = msg->l2; if (!msg->size) { return EOF; } else if (raw == OFPRAW_OFPST11_FLOW_REPLY || raw == OFPRAW_OFPST13_FLOW_REPLY) { const struct ofp11_flow_stats *ofs; size_t length; uint16_t padded_match_len; ofs = ofpbuf_try_pull(msg, sizeof *ofs); if (!ofs) { VLOG_WARN_RL(&bad_ofmsg_rl, "OFPST_FLOW reply has %zu leftover " "bytes at end", msg->size); return EINVAL; } length = ntohs(ofs->length); if (length < sizeof *ofs) { VLOG_WARN_RL(&bad_ofmsg_rl, "OFPST_FLOW reply claims invalid " "length %zu", length); return EINVAL; } if (ofputil_pull_ofp11_match(msg, &fs->match, &padded_match_len)) { VLOG_WARN_RL(&bad_ofmsg_rl, "OFPST_FLOW reply bad match"); return EINVAL; } if (ofpacts_pull_openflow11_instructions(msg, length - sizeof *ofs - padded_match_len, ofpacts)) { VLOG_WARN_RL(&bad_ofmsg_rl, "OFPST_FLOW reply bad instructions"); return EINVAL; } fs->priority = ntohs(ofs->priority); fs->table_id = ofs->table_id; fs->duration_sec = ntohl(ofs->duration_sec); fs->duration_nsec = ntohl(ofs->duration_nsec); fs->idle_timeout = ntohs(ofs->idle_timeout); fs->hard_timeout = ntohs(ofs->hard_timeout); if (raw == OFPRAW_OFPST13_FLOW_REPLY) { error = ofputil_decode_flow_mod_flags(ofs->flags, -1, oh->version, &fs->flags); if (error) { return error; } } else { fs->flags = 0; } fs->idle_age = -1; fs->hard_age = -1; fs->cookie = ofs->cookie; fs->packet_count = ntohll(ofs->packet_count); fs->byte_count = ntohll(ofs->byte_count); } else if (raw == OFPRAW_OFPST10_FLOW_REPLY) { const struct ofp10_flow_stats *ofs; size_t length; ofs = ofpbuf_try_pull(msg, sizeof *ofs); if (!ofs) { VLOG_WARN_RL(&bad_ofmsg_rl, "OFPST_FLOW reply has %zu leftover " "bytes at end", msg->size); return EINVAL; } length = ntohs(ofs->length); if (length < sizeof *ofs) { VLOG_WARN_RL(&bad_ofmsg_rl, "OFPST_FLOW reply claims invalid " "length %zu", length); return EINVAL; } if (ofpacts_pull_openflow10(msg, length - sizeof *ofs, ofpacts)) { return EINVAL; } fs->cookie = get_32aligned_be64(&ofs->cookie); ofputil_match_from_ofp10_match(&ofs->match, &fs->match); fs->priority = ntohs(ofs->priority); fs->table_id = ofs->table_id; fs->duration_sec = ntohl(ofs->duration_sec); fs->duration_nsec = ntohl(ofs->duration_nsec); fs->idle_timeout = ntohs(ofs->idle_timeout); fs->hard_timeout = ntohs(ofs->hard_timeout); fs->idle_age = -1; fs->hard_age = -1; fs->packet_count = ntohll(get_32aligned_be64(&ofs->packet_count)); fs->byte_count = ntohll(get_32aligned_be64(&ofs->byte_count)); fs->flags = 0; } else if (raw == OFPRAW_NXST_FLOW_REPLY) { const struct nx_flow_stats *nfs; size_t match_len, actions_len, length; nfs = ofpbuf_try_pull(msg, sizeof *nfs); if (!nfs) { VLOG_WARN_RL(&bad_ofmsg_rl, "NXST_FLOW reply has %zu leftover " "bytes at end", msg->size); return EINVAL; } length = ntohs(nfs->length); match_len = ntohs(nfs->match_len); if (length < sizeof *nfs + ROUND_UP(match_len, 8)) { VLOG_WARN_RL(&bad_ofmsg_rl, "NXST_FLOW reply with match_len=%zu " "claims invalid length %zu", match_len, length); return EINVAL; } if (nx_pull_match(msg, match_len, &fs->match, NULL, NULL)) { return EINVAL; } actions_len = length - sizeof *nfs - ROUND_UP(match_len, 8); if (ofpacts_pull_openflow10(msg, actions_len, ofpacts)) { return EINVAL; } fs->cookie = nfs->cookie; fs->table_id = nfs->table_id; fs->duration_sec = ntohl(nfs->duration_sec); fs->duration_nsec = ntohl(nfs->duration_nsec); fs->priority = ntohs(nfs->priority); fs->idle_timeout = ntohs(nfs->idle_timeout); fs->hard_timeout = ntohs(nfs->hard_timeout); fs->idle_age = -1; fs->hard_age = -1; if (flow_age_extension) { if (nfs->idle_age) { fs->idle_age = ntohs(nfs->idle_age) - 1; } if (nfs->hard_age) { fs->hard_age = ntohs(nfs->hard_age) - 1; } } fs->packet_count = ntohll(nfs->packet_count); fs->byte_count = ntohll(nfs->byte_count); fs->flags = 0; } else { NOT_REACHED(); } fs->ofpacts = ofpacts->data; fs->ofpacts_len = ofpacts->size; return 0; } /* Returns 'count' unchanged except that UINT64_MAX becomes 0. * * We use this in situations where OVS internally uses UINT64_MAX to mean * "value unknown" but OpenFlow 1.0 does not define any unknown value. */ static uint64_t unknown_to_zero(uint64_t count) { return count != UINT64_MAX ? count : 0; } /* Appends an OFPST_FLOW or NXST_FLOW reply that contains the data in 'fs' to * those already present in the list of ofpbufs in 'replies'. 'replies' should * have been initialized with ofputil_start_stats_reply(). */ void ofputil_append_flow_stats_reply(const struct ofputil_flow_stats *fs, struct list *replies) { struct ofpbuf *reply = ofpbuf_from_list(list_back(replies)); size_t start_ofs = reply->size; enum ofpraw raw; ofpraw_decode_partial(&raw, reply->data, reply->size); if (raw == OFPRAW_OFPST11_FLOW_REPLY || raw == OFPRAW_OFPST13_FLOW_REPLY) { const struct ofp_header *oh = reply->data; struct ofp11_flow_stats *ofs; ofpbuf_put_uninit(reply, sizeof *ofs); oxm_put_match(reply, &fs->match); ofpacts_put_openflow11_instructions(fs->ofpacts, fs->ofpacts_len, reply); ofs = ofpbuf_at_assert(reply, start_ofs, sizeof *ofs); ofs->length = htons(reply->size - start_ofs); ofs->table_id = fs->table_id; ofs->pad = 0; ofs->duration_sec = htonl(fs->duration_sec); ofs->duration_nsec = htonl(fs->duration_nsec); ofs->priority = htons(fs->priority); ofs->idle_timeout = htons(fs->idle_timeout); ofs->hard_timeout = htons(fs->hard_timeout); if (raw == OFPRAW_OFPST13_FLOW_REPLY) { ofs->flags = ofputil_encode_flow_mod_flags(fs->flags, oh->version); } else { ofs->flags = 0; } memset(ofs->pad2, 0, sizeof ofs->pad2); ofs->cookie = fs->cookie; ofs->packet_count = htonll(unknown_to_zero(fs->packet_count)); ofs->byte_count = htonll(unknown_to_zero(fs->byte_count)); } else if (raw == OFPRAW_OFPST10_FLOW_REPLY) { struct ofp10_flow_stats *ofs; ofpbuf_put_uninit(reply, sizeof *ofs); ofpacts_put_openflow10(fs->ofpacts, fs->ofpacts_len, reply); ofs = ofpbuf_at_assert(reply, start_ofs, sizeof *ofs); ofs->length = htons(reply->size - start_ofs); ofs->table_id = fs->table_id; ofs->pad = 0; ofputil_match_to_ofp10_match(&fs->match, &ofs->match); ofs->duration_sec = htonl(fs->duration_sec); ofs->duration_nsec = htonl(fs->duration_nsec); ofs->priority = htons(fs->priority); ofs->idle_timeout = htons(fs->idle_timeout); ofs->hard_timeout = htons(fs->hard_timeout); memset(ofs->pad2, 0, sizeof ofs->pad2); put_32aligned_be64(&ofs->cookie, fs->cookie); put_32aligned_be64(&ofs->packet_count, htonll(unknown_to_zero(fs->packet_count))); put_32aligned_be64(&ofs->byte_count, htonll(unknown_to_zero(fs->byte_count))); } else if (raw == OFPRAW_NXST_FLOW_REPLY) { struct nx_flow_stats *nfs; int match_len; ofpbuf_put_uninit(reply, sizeof *nfs); match_len = nx_put_match(reply, &fs->match, 0, 0); ofpacts_put_openflow10(fs->ofpacts, fs->ofpacts_len, reply); nfs = ofpbuf_at_assert(reply, start_ofs, sizeof *nfs); nfs->length = htons(reply->size - start_ofs); nfs->table_id = fs->table_id; nfs->pad = 0; nfs->duration_sec = htonl(fs->duration_sec); nfs->duration_nsec = htonl(fs->duration_nsec); nfs->priority = htons(fs->priority); nfs->idle_timeout = htons(fs->idle_timeout); nfs->hard_timeout = htons(fs->hard_timeout); nfs->idle_age = htons(fs->idle_age < 0 ? 0 : fs->idle_age < UINT16_MAX ? fs->idle_age + 1 : UINT16_MAX); nfs->hard_age = htons(fs->hard_age < 0 ? 0 : fs->hard_age < UINT16_MAX ? fs->hard_age + 1 : UINT16_MAX); nfs->match_len = htons(match_len); nfs->cookie = fs->cookie; nfs->packet_count = htonll(fs->packet_count); nfs->byte_count = htonll(fs->byte_count); } else { NOT_REACHED(); } ofpmp_postappend(replies, start_ofs); } /* Converts abstract ofputil_aggregate_stats 'stats' into an OFPST_AGGREGATE or * NXST_AGGREGATE reply matching 'request', and returns the message. */ struct ofpbuf * ofputil_encode_aggregate_stats_reply( const struct ofputil_aggregate_stats *stats, const struct ofp_header *request) { struct ofp_aggregate_stats_reply *asr; uint64_t packet_count; uint64_t byte_count; struct ofpbuf *msg; enum ofpraw raw; ofpraw_decode(&raw, request); if (raw == OFPRAW_OFPST10_AGGREGATE_REQUEST) { packet_count = unknown_to_zero(stats->packet_count); byte_count = unknown_to_zero(stats->byte_count); } else { packet_count = stats->packet_count; byte_count = stats->byte_count; } msg = ofpraw_alloc_stats_reply(request, 0); asr = ofpbuf_put_zeros(msg, sizeof *asr); put_32aligned_be64(&asr->packet_count, htonll(packet_count)); put_32aligned_be64(&asr->byte_count, htonll(byte_count)); asr->flow_count = htonl(stats->flow_count); return msg; } enum ofperr ofputil_decode_aggregate_stats_reply(struct ofputil_aggregate_stats *stats, const struct ofp_header *reply) { struct ofp_aggregate_stats_reply *asr; struct ofpbuf msg; ofpbuf_use_const(&msg, reply, ntohs(reply->length)); ofpraw_pull_assert(&msg); asr = msg.l3; stats->packet_count = ntohll(get_32aligned_be64(&asr->packet_count)); stats->byte_count = ntohll(get_32aligned_be64(&asr->byte_count)); stats->flow_count = ntohl(asr->flow_count); return 0; } /* Converts an OFPT_FLOW_REMOVED or NXT_FLOW_REMOVED message 'oh' into an * abstract ofputil_flow_removed in 'fr'. Returns 0 if successful, otherwise * an OpenFlow error code. */ enum ofperr ofputil_decode_flow_removed(struct ofputil_flow_removed *fr, const struct ofp_header *oh) { enum ofpraw raw; struct ofpbuf b; ofpbuf_use_const(&b, oh, ntohs(oh->length)); raw = ofpraw_pull_assert(&b); if (raw == OFPRAW_OFPT11_FLOW_REMOVED) { const struct ofp12_flow_removed *ofr; enum ofperr error; ofr = ofpbuf_pull(&b, sizeof *ofr); error = ofputil_pull_ofp11_match(&b, &fr->match, NULL); if (error) { return error; } fr->priority = ntohs(ofr->priority); fr->cookie = ofr->cookie; fr->reason = ofr->reason; fr->table_id = ofr->table_id; fr->duration_sec = ntohl(ofr->duration_sec); fr->duration_nsec = ntohl(ofr->duration_nsec); fr->idle_timeout = ntohs(ofr->idle_timeout); fr->hard_timeout = ntohs(ofr->hard_timeout); fr->packet_count = ntohll(ofr->packet_count); fr->byte_count = ntohll(ofr->byte_count); } else if (raw == OFPRAW_OFPT10_FLOW_REMOVED) { const struct ofp10_flow_removed *ofr; ofr = ofpbuf_pull(&b, sizeof *ofr); ofputil_match_from_ofp10_match(&ofr->match, &fr->match); fr->priority = ntohs(ofr->priority); fr->cookie = ofr->cookie; fr->reason = ofr->reason; fr->table_id = 255; fr->duration_sec = ntohl(ofr->duration_sec); fr->duration_nsec = ntohl(ofr->duration_nsec); fr->idle_timeout = ntohs(ofr->idle_timeout); fr->hard_timeout = 0; fr->packet_count = ntohll(ofr->packet_count); fr->byte_count = ntohll(ofr->byte_count); } else if (raw == OFPRAW_NXT_FLOW_REMOVED) { struct nx_flow_removed *nfr; enum ofperr error; nfr = ofpbuf_pull(&b, sizeof *nfr); error = nx_pull_match(&b, ntohs(nfr->match_len), &fr->match, NULL, NULL); if (error) { return error; } if (b.size) { return OFPERR_OFPBRC_BAD_LEN; } fr->priority = ntohs(nfr->priority); fr->cookie = nfr->cookie; fr->reason = nfr->reason; fr->table_id = nfr->table_id ? nfr->table_id - 1 : 255; fr->duration_sec = ntohl(nfr->duration_sec); fr->duration_nsec = ntohl(nfr->duration_nsec); fr->idle_timeout = ntohs(nfr->idle_timeout); fr->hard_timeout = 0; fr->packet_count = ntohll(nfr->packet_count); fr->byte_count = ntohll(nfr->byte_count); } else { NOT_REACHED(); } return 0; } /* Converts abstract ofputil_flow_removed 'fr' into an OFPT_FLOW_REMOVED or * NXT_FLOW_REMOVED message 'oh' according to 'protocol', and returns the * message. */ struct ofpbuf * ofputil_encode_flow_removed(const struct ofputil_flow_removed *fr, enum ofputil_protocol protocol) { struct ofpbuf *msg; switch (protocol) { case OFPUTIL_P_OF11_STD: case OFPUTIL_P_OF12_OXM: case OFPUTIL_P_OF13_OXM: { struct ofp12_flow_removed *ofr; msg = ofpraw_alloc_xid(OFPRAW_OFPT11_FLOW_REMOVED, ofputil_protocol_to_ofp_version(protocol), htonl(0), ofputil_match_typical_len(protocol)); ofr = ofpbuf_put_zeros(msg, sizeof *ofr); ofr->cookie = fr->cookie; ofr->priority = htons(fr->priority); ofr->reason = fr->reason; ofr->table_id = fr->table_id; ofr->duration_sec = htonl(fr->duration_sec); ofr->duration_nsec = htonl(fr->duration_nsec); ofr->idle_timeout = htons(fr->idle_timeout); ofr->hard_timeout = htons(fr->hard_timeout); ofr->packet_count = htonll(fr->packet_count); ofr->byte_count = htonll(fr->byte_count); ofputil_put_ofp11_match(msg, &fr->match, protocol); break; } case OFPUTIL_P_OF10_STD: case OFPUTIL_P_OF10_STD_TID: { struct ofp10_flow_removed *ofr; msg = ofpraw_alloc_xid(OFPRAW_OFPT10_FLOW_REMOVED, OFP10_VERSION, htonl(0), 0); ofr = ofpbuf_put_zeros(msg, sizeof *ofr); ofputil_match_to_ofp10_match(&fr->match, &ofr->match); ofr->cookie = fr->cookie; ofr->priority = htons(fr->priority); ofr->reason = fr->reason; ofr->duration_sec = htonl(fr->duration_sec); ofr->duration_nsec = htonl(fr->duration_nsec); ofr->idle_timeout = htons(fr->idle_timeout); ofr->packet_count = htonll(unknown_to_zero(fr->packet_count)); ofr->byte_count = htonll(unknown_to_zero(fr->byte_count)); break; } case OFPUTIL_P_OF10_NXM: case OFPUTIL_P_OF10_NXM_TID: { struct nx_flow_removed *nfr; int match_len; msg = ofpraw_alloc_xid(OFPRAW_NXT_FLOW_REMOVED, OFP10_VERSION, htonl(0), NXM_TYPICAL_LEN); nfr = ofpbuf_put_zeros(msg, sizeof *nfr); match_len = nx_put_match(msg, &fr->match, 0, 0); nfr = msg->l3; nfr->cookie = fr->cookie; nfr->priority = htons(fr->priority); nfr->reason = fr->reason; nfr->table_id = fr->table_id + 1; nfr->duration_sec = htonl(fr->duration_sec); nfr->duration_nsec = htonl(fr->duration_nsec); nfr->idle_timeout = htons(fr->idle_timeout); nfr->match_len = htons(match_len); nfr->packet_count = htonll(fr->packet_count); nfr->byte_count = htonll(fr->byte_count); break; } default: NOT_REACHED(); } return msg; } static void ofputil_decode_packet_in_finish(struct ofputil_packet_in *pin, struct match *match, struct ofpbuf *b) { pin->packet = b->data; pin->packet_len = b->size; pin->fmd.in_port = match->flow.in_port.ofp_port; pin->fmd.tun_id = match->flow.tunnel.tun_id; pin->fmd.tun_src = match->flow.tunnel.ip_src; pin->fmd.tun_dst = match->flow.tunnel.ip_dst; pin->fmd.metadata = match->flow.metadata; memcpy(pin->fmd.regs, match->flow.regs, sizeof pin->fmd.regs); pin->fmd.pkt_mark = match->flow.pkt_mark; } enum ofperr ofputil_decode_packet_in(struct ofputil_packet_in *pin, const struct ofp_header *oh) { enum ofpraw raw; struct ofpbuf b; memset(pin, 0, sizeof *pin); ofpbuf_use_const(&b, oh, ntohs(oh->length)); raw = ofpraw_pull_assert(&b); if (raw == OFPRAW_OFPT13_PACKET_IN || raw == OFPRAW_OFPT12_PACKET_IN) { const struct ofp13_packet_in *opi; struct match match; int error; size_t packet_in_size; if (raw == OFPRAW_OFPT12_PACKET_IN) { packet_in_size = sizeof (struct ofp12_packet_in); } else { packet_in_size = sizeof (struct ofp13_packet_in); } opi = ofpbuf_pull(&b, packet_in_size); error = oxm_pull_match_loose(&b, &match); if (error) { return error; } if (!ofpbuf_try_pull(&b, 2)) { return OFPERR_OFPBRC_BAD_LEN; } pin->reason = opi->pi.reason; pin->table_id = opi->pi.table_id; pin->buffer_id = ntohl(opi->pi.buffer_id); pin->total_len = ntohs(opi->pi.total_len); if (raw == OFPRAW_OFPT13_PACKET_IN) { pin->cookie = opi->cookie; } ofputil_decode_packet_in_finish(pin, &match, &b); } else if (raw == OFPRAW_OFPT10_PACKET_IN) { const struct ofp10_packet_in *opi; opi = ofpbuf_pull(&b, offsetof(struct ofp10_packet_in, data)); pin->packet = opi->data; pin->packet_len = b.size; pin->fmd.in_port = u16_to_ofp(ntohs(opi->in_port)); pin->reason = opi->reason; pin->buffer_id = ntohl(opi->buffer_id); pin->total_len = ntohs(opi->total_len); } else if (raw == OFPRAW_NXT_PACKET_IN) { const struct nx_packet_in *npi; struct match match; int error; npi = ofpbuf_pull(&b, sizeof *npi); error = nx_pull_match_loose(&b, ntohs(npi->match_len), &match, NULL, NULL); if (error) { return error; } if (!ofpbuf_try_pull(&b, 2)) { return OFPERR_OFPBRC_BAD_LEN; } pin->reason = npi->reason; pin->table_id = npi->table_id; pin->cookie = npi->cookie; pin->buffer_id = ntohl(npi->buffer_id); pin->total_len = ntohs(npi->total_len); ofputil_decode_packet_in_finish(pin, &match, &b); } else { NOT_REACHED(); } return 0; } static void ofputil_packet_in_to_match(const struct ofputil_packet_in *pin, struct match *match) { int i; match_init_catchall(match); if (pin->fmd.tun_id != htonll(0)) { match_set_tun_id(match, pin->fmd.tun_id); } if (pin->fmd.tun_src != htonl(0)) { match_set_tun_src(match, pin->fmd.tun_src); } if (pin->fmd.tun_dst != htonl(0)) { match_set_tun_dst(match, pin->fmd.tun_dst); } if (pin->fmd.metadata != htonll(0)) { match_set_metadata(match, pin->fmd.metadata); } for (i = 0; i < FLOW_N_REGS; i++) { if (pin->fmd.regs[i]) { match_set_reg(match, i, pin->fmd.regs[i]); } } if (pin->fmd.pkt_mark != 0) { match_set_pkt_mark(match, pin->fmd.pkt_mark); } match_set_in_port(match, pin->fmd.in_port); } /* Converts abstract ofputil_packet_in 'pin' into a PACKET_IN message * in the format specified by 'packet_in_format'. */ struct ofpbuf * ofputil_encode_packet_in(const struct ofputil_packet_in *pin, enum ofputil_protocol protocol, enum nx_packet_in_format packet_in_format) { size_t send_len = MIN(pin->send_len, pin->packet_len); struct ofpbuf *packet; /* Add OFPT_PACKET_IN. */ if (protocol == OFPUTIL_P_OF13_OXM || protocol == OFPUTIL_P_OF12_OXM) { struct ofp13_packet_in *opi; struct match match; enum ofpraw packet_in_raw; enum ofp_version packet_in_version; size_t packet_in_size; if (protocol == OFPUTIL_P_OF12_OXM) { packet_in_raw = OFPRAW_OFPT12_PACKET_IN; packet_in_version = OFP12_VERSION; packet_in_size = sizeof (struct ofp12_packet_in); } else { packet_in_raw = OFPRAW_OFPT13_PACKET_IN; packet_in_version = OFP13_VERSION; packet_in_size = sizeof (struct ofp13_packet_in); } ofputil_packet_in_to_match(pin, &match); /* The final argument is just an estimate of the space required. */ packet = ofpraw_alloc_xid(packet_in_raw, packet_in_version, htonl(0), (sizeof(struct flow_metadata) * 2 + 2 + send_len)); ofpbuf_put_zeros(packet, packet_in_size); oxm_put_match(packet, &match); ofpbuf_put_zeros(packet, 2); ofpbuf_put(packet, pin->packet, send_len); opi = packet->l3; opi->pi.buffer_id = htonl(pin->buffer_id); opi->pi.total_len = htons(pin->total_len); opi->pi.reason = pin->reason; opi->pi.table_id = pin->table_id; if (protocol == OFPUTIL_P_OF13_OXM) { opi->cookie = pin->cookie; } } else if (packet_in_format == NXPIF_OPENFLOW10) { struct ofp10_packet_in *opi; packet = ofpraw_alloc_xid(OFPRAW_OFPT10_PACKET_IN, OFP10_VERSION, htonl(0), send_len); opi = ofpbuf_put_zeros(packet, offsetof(struct ofp10_packet_in, data)); opi->total_len = htons(pin->total_len); opi->in_port = htons(ofp_to_u16(pin->fmd.in_port)); opi->reason = pin->reason; opi->buffer_id = htonl(pin->buffer_id); ofpbuf_put(packet, pin->packet, send_len); } else if (packet_in_format == NXPIF_NXM) { struct nx_packet_in *npi; struct match match; size_t match_len; ofputil_packet_in_to_match(pin, &match); /* The final argument is just an estimate of the space required. */ packet = ofpraw_alloc_xid(OFPRAW_NXT_PACKET_IN, OFP10_VERSION, htonl(0), (sizeof(struct flow_metadata) * 2 + 2 + send_len)); ofpbuf_put_zeros(packet, sizeof *npi); match_len = nx_put_match(packet, &match, 0, 0); ofpbuf_put_zeros(packet, 2); ofpbuf_put(packet, pin->packet, send_len); npi = packet->l3; npi->buffer_id = htonl(pin->buffer_id); npi->total_len = htons(pin->total_len); npi->reason = pin->reason; npi->table_id = pin->table_id; npi->cookie = pin->cookie; npi->match_len = htons(match_len); } else { NOT_REACHED(); } ofpmsg_update_length(packet); return packet; } /* Returns a string form of 'reason'. The return value is either a statically * allocated constant string or the 'bufsize'-byte buffer 'reasonbuf'. * 'bufsize' should be at least OFPUTIL_PACKET_IN_REASON_BUFSIZE. */ const char * ofputil_packet_in_reason_to_string(enum ofp_packet_in_reason reason, char *reasonbuf, size_t bufsize) { switch (reason) { case OFPR_NO_MATCH: return "no_match"; case OFPR_ACTION: return "action"; case OFPR_INVALID_TTL: return "invalid_ttl"; case OFPR_N_REASONS: default: snprintf(reasonbuf, bufsize, "%d", (int) reason); return reasonbuf; } } bool ofputil_packet_in_reason_from_string(const char *s, enum ofp_packet_in_reason *reason) { int i; for (i = 0; i < OFPR_N_REASONS; i++) { char reasonbuf[OFPUTIL_PACKET_IN_REASON_BUFSIZE]; const char *reason_s; reason_s = ofputil_packet_in_reason_to_string(i, reasonbuf, sizeof reasonbuf); if (!strcasecmp(s, reason_s)) { *reason = i; return true; } } return false; } /* Converts an OFPT_PACKET_OUT in 'opo' into an abstract ofputil_packet_out in * 'po'. * * Uses 'ofpacts' to store the abstract OFPACT_* version of the packet out * message's actions. The caller must initialize 'ofpacts' and retains * ownership of it. 'po->ofpacts' will point into the 'ofpacts' buffer. * * Returns 0 if successful, otherwise an OFPERR_* value. */ enum ofperr ofputil_decode_packet_out(struct ofputil_packet_out *po, const struct ofp_header *oh, struct ofpbuf *ofpacts) { enum ofpraw raw; struct ofpbuf b; ofpbuf_use_const(&b, oh, ntohs(oh->length)); raw = ofpraw_pull_assert(&b); if (raw == OFPRAW_OFPT11_PACKET_OUT) { enum ofperr error; const struct ofp11_packet_out *opo = ofpbuf_pull(&b, sizeof *opo); po->buffer_id = ntohl(opo->buffer_id); error = ofputil_port_from_ofp11(opo->in_port, &po->in_port); if (error) { return error; } error = ofpacts_pull_openflow11_actions(&b, ntohs(opo->actions_len), ofpacts); if (error) { return error; } } else if (raw == OFPRAW_OFPT10_PACKET_OUT) { enum ofperr error; const struct ofp10_packet_out *opo = ofpbuf_pull(&b, sizeof *opo); po->buffer_id = ntohl(opo->buffer_id); po->in_port = u16_to_ofp(ntohs(opo->in_port)); error = ofpacts_pull_openflow10(&b, ntohs(opo->actions_len), ofpacts); if (error) { return error; } } else { NOT_REACHED(); } if (ofp_to_u16(po->in_port) >= ofp_to_u16(OFPP_MAX) && po->in_port != OFPP_LOCAL && po->in_port != OFPP_NONE && po->in_port != OFPP_CONTROLLER) { VLOG_WARN_RL(&bad_ofmsg_rl, "packet-out has bad input port %#"PRIx16, po->in_port); return OFPERR_OFPBRC_BAD_PORT; } po->ofpacts = ofpacts->data; po->ofpacts_len = ofpacts->size; if (po->buffer_id == UINT32_MAX) { po->packet = b.data; po->packet_len = b.size; } else { po->packet = NULL; po->packet_len = 0; } return 0; } /* ofputil_phy_port */ /* NETDEV_F_* to and from OFPPF_* and OFPPF10_*. */ BUILD_ASSERT_DECL((int) NETDEV_F_10MB_HD == OFPPF_10MB_HD); /* bit 0 */ BUILD_ASSERT_DECL((int) NETDEV_F_10MB_FD == OFPPF_10MB_FD); /* bit 1 */ BUILD_ASSERT_DECL((int) NETDEV_F_100MB_HD == OFPPF_100MB_HD); /* bit 2 */ BUILD_ASSERT_DECL((int) NETDEV_F_100MB_FD == OFPPF_100MB_FD); /* bit 3 */ BUILD_ASSERT_DECL((int) NETDEV_F_1GB_HD == OFPPF_1GB_HD); /* bit 4 */ BUILD_ASSERT_DECL((int) NETDEV_F_1GB_FD == OFPPF_1GB_FD); /* bit 5 */ BUILD_ASSERT_DECL((int) NETDEV_F_10GB_FD == OFPPF_10GB_FD); /* bit 6 */ /* NETDEV_F_ bits 11...15 are OFPPF10_ bits 7...11: */ BUILD_ASSERT_DECL((int) NETDEV_F_COPPER == (OFPPF10_COPPER << 4)); BUILD_ASSERT_DECL((int) NETDEV_F_FIBER == (OFPPF10_FIBER << 4)); BUILD_ASSERT_DECL((int) NETDEV_F_AUTONEG == (OFPPF10_AUTONEG << 4)); BUILD_ASSERT_DECL((int) NETDEV_F_PAUSE == (OFPPF10_PAUSE << 4)); BUILD_ASSERT_DECL((int) NETDEV_F_PAUSE_ASYM == (OFPPF10_PAUSE_ASYM << 4)); static enum netdev_features netdev_port_features_from_ofp10(ovs_be32 ofp10_) { uint32_t ofp10 = ntohl(ofp10_); return (ofp10 & 0x7f) | ((ofp10 & 0xf80) << 4); } static ovs_be32 netdev_port_features_to_ofp10(enum netdev_features features) { return htonl((features & 0x7f) | ((features & 0xf800) >> 4)); } BUILD_ASSERT_DECL((int) NETDEV_F_10MB_HD == OFPPF_10MB_HD); /* bit 0 */ BUILD_ASSERT_DECL((int) NETDEV_F_10MB_FD == OFPPF_10MB_FD); /* bit 1 */ BUILD_ASSERT_DECL((int) NETDEV_F_100MB_HD == OFPPF_100MB_HD); /* bit 2 */ BUILD_ASSERT_DECL((int) NETDEV_F_100MB_FD == OFPPF_100MB_FD); /* bit 3 */ BUILD_ASSERT_DECL((int) NETDEV_F_1GB_HD == OFPPF_1GB_HD); /* bit 4 */ BUILD_ASSERT_DECL((int) NETDEV_F_1GB_FD == OFPPF_1GB_FD); /* bit 5 */ BUILD_ASSERT_DECL((int) NETDEV_F_10GB_FD == OFPPF_10GB_FD); /* bit 6 */ BUILD_ASSERT_DECL((int) NETDEV_F_40GB_FD == OFPPF11_40GB_FD); /* bit 7 */ BUILD_ASSERT_DECL((int) NETDEV_F_100GB_FD == OFPPF11_100GB_FD); /* bit 8 */ BUILD_ASSERT_DECL((int) NETDEV_F_1TB_FD == OFPPF11_1TB_FD); /* bit 9 */ BUILD_ASSERT_DECL((int) NETDEV_F_OTHER == OFPPF11_OTHER); /* bit 10 */ BUILD_ASSERT_DECL((int) NETDEV_F_COPPER == OFPPF11_COPPER); /* bit 11 */ BUILD_ASSERT_DECL((int) NETDEV_F_FIBER == OFPPF11_FIBER); /* bit 12 */ BUILD_ASSERT_DECL((int) NETDEV_F_AUTONEG == OFPPF11_AUTONEG); /* bit 13 */ BUILD_ASSERT_DECL((int) NETDEV_F_PAUSE == OFPPF11_PAUSE); /* bit 14 */ BUILD_ASSERT_DECL((int) NETDEV_F_PAUSE_ASYM == OFPPF11_PAUSE_ASYM);/* bit 15 */ static enum netdev_features netdev_port_features_from_ofp11(ovs_be32 ofp11) { return ntohl(ofp11) & 0xffff; } static ovs_be32 netdev_port_features_to_ofp11(enum netdev_features features) { return htonl(features & 0xffff); } static enum ofperr ofputil_decode_ofp10_phy_port(struct ofputil_phy_port *pp, const struct ofp10_phy_port *opp) { memset(pp, 0, sizeof *pp); pp->port_no = u16_to_ofp(ntohs(opp->port_no)); memcpy(pp->hw_addr, opp->hw_addr, OFP_ETH_ALEN); ovs_strlcpy(pp->name, opp->name, OFP_MAX_PORT_NAME_LEN); pp->config = ntohl(opp->config) & OFPPC10_ALL; pp->state = ntohl(opp->state) & OFPPS10_ALL; pp->curr = netdev_port_features_from_ofp10(opp->curr); pp->advertised = netdev_port_features_from_ofp10(opp->advertised); pp->supported = netdev_port_features_from_ofp10(opp->supported); pp->peer = netdev_port_features_from_ofp10(opp->peer); pp->curr_speed = netdev_features_to_bps(pp->curr, 0) / 1000; pp->max_speed = netdev_features_to_bps(pp->supported, 0) / 1000; return 0; } static enum ofperr ofputil_decode_ofp11_port(struct ofputil_phy_port *pp, const struct ofp11_port *op) { enum ofperr error; memset(pp, 0, sizeof *pp); error = ofputil_port_from_ofp11(op->port_no, &pp->port_no); if (error) { return error; } memcpy(pp->hw_addr, op->hw_addr, OFP_ETH_ALEN); ovs_strlcpy(pp->name, op->name, OFP_MAX_PORT_NAME_LEN); pp->config = ntohl(op->config) & OFPPC11_ALL; pp->state = ntohl(op->state) & OFPPC11_ALL; pp->curr = netdev_port_features_from_ofp11(op->curr); pp->advertised = netdev_port_features_from_ofp11(op->advertised); pp->supported = netdev_port_features_from_ofp11(op->supported); pp->peer = netdev_port_features_from_ofp11(op->peer); pp->curr_speed = ntohl(op->curr_speed); pp->max_speed = ntohl(op->max_speed); return 0; } static size_t ofputil_get_phy_port_size(enum ofp_version ofp_version) { switch (ofp_version) { case OFP10_VERSION: return sizeof(struct ofp10_phy_port); case OFP11_VERSION: case OFP12_VERSION: case OFP13_VERSION: return sizeof(struct ofp11_port); default: NOT_REACHED(); } } static void ofputil_encode_ofp10_phy_port(const struct ofputil_phy_port *pp, struct ofp10_phy_port *opp) { memset(opp, 0, sizeof *opp); opp->port_no = htons(ofp_to_u16(pp->port_no)); memcpy(opp->hw_addr, pp->hw_addr, ETH_ADDR_LEN); ovs_strlcpy(opp->name, pp->name, OFP_MAX_PORT_NAME_LEN); opp->config = htonl(pp->config & OFPPC10_ALL); opp->state = htonl(pp->state & OFPPS10_ALL); opp->curr = netdev_port_features_to_ofp10(pp->curr); opp->advertised = netdev_port_features_to_ofp10(pp->advertised); opp->supported = netdev_port_features_to_ofp10(pp->supported); opp->peer = netdev_port_features_to_ofp10(pp->peer); } static void ofputil_encode_ofp11_port(const struct ofputil_phy_port *pp, struct ofp11_port *op) { memset(op, 0, sizeof *op); op->port_no = ofputil_port_to_ofp11(pp->port_no); memcpy(op->hw_addr, pp->hw_addr, ETH_ADDR_LEN); ovs_strlcpy(op->name, pp->name, OFP_MAX_PORT_NAME_LEN); op->config = htonl(pp->config & OFPPC11_ALL); op->state = htonl(pp->state & OFPPS11_ALL); op->curr = netdev_port_features_to_ofp11(pp->curr); op->advertised = netdev_port_features_to_ofp11(pp->advertised); op->supported = netdev_port_features_to_ofp11(pp->supported); op->peer = netdev_port_features_to_ofp11(pp->peer); op->curr_speed = htonl(pp->curr_speed); op->max_speed = htonl(pp->max_speed); } static void ofputil_put_phy_port(enum ofp_version ofp_version, const struct ofputil_phy_port *pp, struct ofpbuf *b) { switch (ofp_version) { case OFP10_VERSION: { struct ofp10_phy_port *opp; if (b->size + sizeof *opp <= UINT16_MAX) { opp = ofpbuf_put_uninit(b, sizeof *opp); ofputil_encode_ofp10_phy_port(pp, opp); } break; } case OFP11_VERSION: case OFP12_VERSION: case OFP13_VERSION: { struct ofp11_port *op; if (b->size + sizeof *op <= UINT16_MAX) { op = ofpbuf_put_uninit(b, sizeof *op); ofputil_encode_ofp11_port(pp, op); } break; } default: NOT_REACHED(); } } void ofputil_append_port_desc_stats_reply(enum ofp_version ofp_version, const struct ofputil_phy_port *pp, struct list *replies) { switch (ofp_version) { case OFP10_VERSION: { struct ofp10_phy_port *opp; opp = ofpmp_append(replies, sizeof *opp); ofputil_encode_ofp10_phy_port(pp, opp); break; } case OFP11_VERSION: case OFP12_VERSION: case OFP13_VERSION: { struct ofp11_port *op; op = ofpmp_append(replies, sizeof *op); ofputil_encode_ofp11_port(pp, op); break; } default: NOT_REACHED(); } } /* ofputil_switch_features */ #define OFPC_COMMON (OFPC_FLOW_STATS | OFPC_TABLE_STATS | OFPC_PORT_STATS | \ OFPC_IP_REASM | OFPC_QUEUE_STATS) BUILD_ASSERT_DECL((int) OFPUTIL_C_FLOW_STATS == OFPC_FLOW_STATS); BUILD_ASSERT_DECL((int) OFPUTIL_C_TABLE_STATS == OFPC_TABLE_STATS); BUILD_ASSERT_DECL((int) OFPUTIL_C_PORT_STATS == OFPC_PORT_STATS); BUILD_ASSERT_DECL((int) OFPUTIL_C_IP_REASM == OFPC_IP_REASM); BUILD_ASSERT_DECL((int) OFPUTIL_C_QUEUE_STATS == OFPC_QUEUE_STATS); BUILD_ASSERT_DECL((int) OFPUTIL_C_ARP_MATCH_IP == OFPC_ARP_MATCH_IP); struct ofputil_action_bit_translation { enum ofputil_action_bitmap ofputil_bit; int of_bit; }; static const struct ofputil_action_bit_translation of10_action_bits[] = { { OFPUTIL_A_OUTPUT, OFPAT10_OUTPUT }, { OFPUTIL_A_SET_VLAN_VID, OFPAT10_SET_VLAN_VID }, { OFPUTIL_A_SET_VLAN_PCP, OFPAT10_SET_VLAN_PCP }, { OFPUTIL_A_STRIP_VLAN, OFPAT10_STRIP_VLAN }, { OFPUTIL_A_SET_DL_SRC, OFPAT10_SET_DL_SRC }, { OFPUTIL_A_SET_DL_DST, OFPAT10_SET_DL_DST }, { OFPUTIL_A_SET_NW_SRC, OFPAT10_SET_NW_SRC }, { OFPUTIL_A_SET_NW_DST, OFPAT10_SET_NW_DST }, { OFPUTIL_A_SET_NW_TOS, OFPAT10_SET_NW_TOS }, { OFPUTIL_A_SET_TP_SRC, OFPAT10_SET_TP_SRC }, { OFPUTIL_A_SET_TP_DST, OFPAT10_SET_TP_DST }, { OFPUTIL_A_ENQUEUE, OFPAT10_ENQUEUE }, { 0, 0 }, }; static enum ofputil_action_bitmap decode_action_bits(ovs_be32 of_actions, const struct ofputil_action_bit_translation *x) { enum ofputil_action_bitmap ofputil_actions; ofputil_actions = 0; for (; x->ofputil_bit; x++) { if (of_actions & htonl(1u << x->of_bit)) { ofputil_actions |= x->ofputil_bit; } } return ofputil_actions; } static uint32_t ofputil_capabilities_mask(enum ofp_version ofp_version) { /* Handle capabilities whose bit is unique for all Open Flow versions */ switch (ofp_version) { case OFP10_VERSION: case OFP11_VERSION: return OFPC_COMMON | OFPC_ARP_MATCH_IP; case OFP12_VERSION: case OFP13_VERSION: return OFPC_COMMON | OFPC12_PORT_BLOCKED; default: /* Caller needs to check osf->header.version itself */ return 0; } } /* Decodes an OpenFlow 1.0 or 1.1 "switch_features" structure 'osf' into an * abstract representation in '*features'. Initializes '*b' to iterate over * the OpenFlow port structures following 'osf' with later calls to * ofputil_pull_phy_port(). Returns 0 if successful, otherwise an * OFPERR_* value. */ enum ofperr ofputil_decode_switch_features(const struct ofp_header *oh, struct ofputil_switch_features *features, struct ofpbuf *b) { const struct ofp_switch_features *osf; enum ofpraw raw; ofpbuf_use_const(b, oh, ntohs(oh->length)); raw = ofpraw_pull_assert(b); osf = ofpbuf_pull(b, sizeof *osf); features->datapath_id = ntohll(osf->datapath_id); features->n_buffers = ntohl(osf->n_buffers); features->n_tables = osf->n_tables; features->auxiliary_id = 0; features->capabilities = ntohl(osf->capabilities) & ofputil_capabilities_mask(oh->version); if (b->size % ofputil_get_phy_port_size(oh->version)) { return OFPERR_OFPBRC_BAD_LEN; } if (raw == OFPRAW_OFPT10_FEATURES_REPLY) { if (osf->capabilities & htonl(OFPC10_STP)) { features->capabilities |= OFPUTIL_C_STP; } features->actions = decode_action_bits(osf->actions, of10_action_bits); } else if (raw == OFPRAW_OFPT11_FEATURES_REPLY || raw == OFPRAW_OFPT13_FEATURES_REPLY) { if (osf->capabilities & htonl(OFPC11_GROUP_STATS)) { features->capabilities |= OFPUTIL_C_GROUP_STATS; } features->actions = 0; if (raw == OFPRAW_OFPT13_FEATURES_REPLY) { features->auxiliary_id = osf->auxiliary_id; } } else { return OFPERR_OFPBRC_BAD_VERSION; } return 0; } /* Returns true if the maximum number of ports are in 'oh'. */ static bool max_ports_in_features(const struct ofp_header *oh) { size_t pp_size = ofputil_get_phy_port_size(oh->version); return ntohs(oh->length) + pp_size > UINT16_MAX; } /* Given a buffer 'b' that contains a Features Reply message, checks if * it contains the maximum number of ports that will fit. If so, it * returns true and removes the ports from the message. The caller * should then send an OFPST_PORT_DESC stats request to get the ports, * since the switch may have more ports than could be represented in the * Features Reply. Otherwise, returns false. */ bool ofputil_switch_features_ports_trunc(struct ofpbuf *b) { struct ofp_header *oh = b->data; if (max_ports_in_features(oh)) { /* Remove all the ports. */ b->size = (sizeof(struct ofp_header) + sizeof(struct ofp_switch_features)); ofpmsg_update_length(b); return true; } return false; } static ovs_be32 encode_action_bits(enum ofputil_action_bitmap ofputil_actions, const struct ofputil_action_bit_translation *x) { uint32_t of_actions; of_actions = 0; for (; x->ofputil_bit; x++) { if (ofputil_actions & x->ofputil_bit) { of_actions |= 1 << x->of_bit; } } return htonl(of_actions); } /* Returns a buffer owned by the caller that encodes 'features' in the format * required by 'protocol' with the given 'xid'. The caller should append port * information to the buffer with subsequent calls to * ofputil_put_switch_features_port(). */ struct ofpbuf * ofputil_encode_switch_features(const struct ofputil_switch_features *features, enum ofputil_protocol protocol, ovs_be32 xid) { struct ofp_switch_features *osf; struct ofpbuf *b; enum ofp_version version; enum ofpraw raw; version = ofputil_protocol_to_ofp_version(protocol); switch (version) { case OFP10_VERSION: raw = OFPRAW_OFPT10_FEATURES_REPLY; break; case OFP11_VERSION: case OFP12_VERSION: raw = OFPRAW_OFPT11_FEATURES_REPLY; break; case OFP13_VERSION: raw = OFPRAW_OFPT13_FEATURES_REPLY; break; default: NOT_REACHED(); } b = ofpraw_alloc_xid(raw, version, xid, 0); osf = ofpbuf_put_zeros(b, sizeof *osf); osf->datapath_id = htonll(features->datapath_id); osf->n_buffers = htonl(features->n_buffers); osf->n_tables = features->n_tables; osf->capabilities = htonl(features->capabilities & OFPC_COMMON); osf->capabilities = htonl(features->capabilities & ofputil_capabilities_mask(version)); switch (version) { case OFP10_VERSION: if (features->capabilities & OFPUTIL_C_STP) { osf->capabilities |= htonl(OFPC10_STP); } osf->actions = encode_action_bits(features->actions, of10_action_bits); break; case OFP13_VERSION: osf->auxiliary_id = features->auxiliary_id; /* fall through */ case OFP11_VERSION: case OFP12_VERSION: if (features->capabilities & OFPUTIL_C_GROUP_STATS) { osf->capabilities |= htonl(OFPC11_GROUP_STATS); } break; default: NOT_REACHED(); } return b; } /* Encodes 'pp' into the format required by the switch_features message already * in 'b', which should have been returned by ofputil_encode_switch_features(), * and appends the encoded version to 'b'. */ void ofputil_put_switch_features_port(const struct ofputil_phy_port *pp, struct ofpbuf *b) { const struct ofp_header *oh = b->data; if (oh->version < OFP13_VERSION) { ofputil_put_phy_port(oh->version, pp, b); } } /* ofputil_port_status */ /* Decodes the OpenFlow "port status" message in '*ops' into an abstract form * in '*ps'. Returns 0 if successful, otherwise an OFPERR_* value. */ enum ofperr ofputil_decode_port_status(const struct ofp_header *oh, struct ofputil_port_status *ps) { const struct ofp_port_status *ops; struct ofpbuf b; int retval; ofpbuf_use_const(&b, oh, ntohs(oh->length)); ofpraw_pull_assert(&b); ops = ofpbuf_pull(&b, sizeof *ops); if (ops->reason != OFPPR_ADD && ops->reason != OFPPR_DELETE && ops->reason != OFPPR_MODIFY) { return OFPERR_NXBRC_BAD_REASON; } ps->reason = ops->reason; retval = ofputil_pull_phy_port(oh->version, &b, &ps->desc); ovs_assert(retval != EOF); return retval; } /* Converts the abstract form of a "port status" message in '*ps' into an * OpenFlow message suitable for 'protocol', and returns that encoded form in * a buffer owned by the caller. */ struct ofpbuf * ofputil_encode_port_status(const struct ofputil_port_status *ps, enum ofputil_protocol protocol) { struct ofp_port_status *ops; struct ofpbuf *b; enum ofp_version version; enum ofpraw raw; version = ofputil_protocol_to_ofp_version(protocol); switch (version) { case OFP10_VERSION: raw = OFPRAW_OFPT10_PORT_STATUS; break; case OFP11_VERSION: case OFP12_VERSION: case OFP13_VERSION: raw = OFPRAW_OFPT11_PORT_STATUS; break; default: NOT_REACHED(); } b = ofpraw_alloc_xid(raw, version, htonl(0), 0); ops = ofpbuf_put_zeros(b, sizeof *ops); ops->reason = ps->reason; ofputil_put_phy_port(version, &ps->desc, b); ofpmsg_update_length(b); return b; } /* ofputil_port_mod */ /* Decodes the OpenFlow "port mod" message in '*oh' into an abstract form in * '*pm'. Returns 0 if successful, otherwise an OFPERR_* value. */ enum ofperr ofputil_decode_port_mod(const struct ofp_header *oh, struct ofputil_port_mod *pm) { enum ofpraw raw; struct ofpbuf b; ofpbuf_use_const(&b, oh, ntohs(oh->length)); raw = ofpraw_pull_assert(&b); if (raw == OFPRAW_OFPT10_PORT_MOD) { const struct ofp10_port_mod *opm = b.data; pm->port_no = u16_to_ofp(ntohs(opm->port_no)); memcpy(pm->hw_addr, opm->hw_addr, ETH_ADDR_LEN); pm->config = ntohl(opm->config) & OFPPC10_ALL; pm->mask = ntohl(opm->mask) & OFPPC10_ALL; pm->advertise = netdev_port_features_from_ofp10(opm->advertise); } else if (raw == OFPRAW_OFPT11_PORT_MOD) { const struct ofp11_port_mod *opm = b.data; enum ofperr error; error = ofputil_port_from_ofp11(opm->port_no, &pm->port_no); if (error) { return error; } memcpy(pm->hw_addr, opm->hw_addr, ETH_ADDR_LEN); pm->config = ntohl(opm->config) & OFPPC11_ALL; pm->mask = ntohl(opm->mask) & OFPPC11_ALL; pm->advertise = netdev_port_features_from_ofp11(opm->advertise); } else { return OFPERR_OFPBRC_BAD_TYPE; } pm->config &= pm->mask; return 0; } /* Converts the abstract form of a "port mod" message in '*pm' into an OpenFlow * message suitable for 'protocol', and returns that encoded form in a buffer * owned by the caller. */ struct ofpbuf * ofputil_encode_port_mod(const struct ofputil_port_mod *pm, enum ofputil_protocol protocol) { enum ofp_version ofp_version = ofputil_protocol_to_ofp_version(protocol); struct ofpbuf *b; switch (ofp_version) { case OFP10_VERSION: { struct ofp10_port_mod *opm; b = ofpraw_alloc(OFPRAW_OFPT10_PORT_MOD, ofp_version, 0); opm = ofpbuf_put_zeros(b, sizeof *opm); opm->port_no = htons(ofp_to_u16(pm->port_no)); memcpy(opm->hw_addr, pm->hw_addr, ETH_ADDR_LEN); opm->config = htonl(pm->config & OFPPC10_ALL); opm->mask = htonl(pm->mask & OFPPC10_ALL); opm->advertise = netdev_port_features_to_ofp10(pm->advertise); break; } case OFP11_VERSION: case OFP12_VERSION: case OFP13_VERSION: { struct ofp11_port_mod *opm; b = ofpraw_alloc(OFPRAW_OFPT11_PORT_MOD, ofp_version, 0); opm = ofpbuf_put_zeros(b, sizeof *opm); opm->port_no = ofputil_port_to_ofp11(pm->port_no); memcpy(opm->hw_addr, pm->hw_addr, ETH_ADDR_LEN); opm->config = htonl(pm->config & OFPPC11_ALL); opm->mask = htonl(pm->mask & OFPPC11_ALL); opm->advertise = netdev_port_features_to_ofp11(pm->advertise); break; } default: NOT_REACHED(); } return b; } /* ofputil_role_request */ /* Decodes the OpenFlow "role request" or "role reply" message in '*oh' into * an abstract form in '*rr'. Returns 0 if successful, otherwise an * OFPERR_* value. */ enum ofperr ofputil_decode_role_message(const struct ofp_header *oh, struct ofputil_role_request *rr) { struct ofpbuf b; enum ofpraw raw; ofpbuf_use_const(&b, oh, ntohs(oh->length)); raw = ofpraw_pull_assert(&b); if (raw == OFPRAW_OFPT12_ROLE_REQUEST || raw == OFPRAW_OFPT12_ROLE_REPLY) { const struct ofp12_role_request *orr = b.l3; if (orr->role != htonl(OFPCR12_ROLE_NOCHANGE) && orr->role != htonl(OFPCR12_ROLE_EQUAL) && orr->role != htonl(OFPCR12_ROLE_MASTER) && orr->role != htonl(OFPCR12_ROLE_SLAVE)) { return OFPERR_OFPRRFC_BAD_ROLE; } rr->role = ntohl(orr->role); if (raw == OFPRAW_OFPT12_ROLE_REQUEST ? orr->role == htonl(OFPCR12_ROLE_NOCHANGE) : orr->generation_id == htonll(UINT64_MAX)) { rr->have_generation_id = false; rr->generation_id = 0; } else { rr->have_generation_id = true; rr->generation_id = ntohll(orr->generation_id); } } else if (raw == OFPRAW_NXT_ROLE_REQUEST || raw == OFPRAW_NXT_ROLE_REPLY) { const struct nx_role_request *nrr = b.l3; BUILD_ASSERT(NX_ROLE_OTHER + 1 == OFPCR12_ROLE_EQUAL); BUILD_ASSERT(NX_ROLE_MASTER + 1 == OFPCR12_ROLE_MASTER); BUILD_ASSERT(NX_ROLE_SLAVE + 1 == OFPCR12_ROLE_SLAVE); if (nrr->role != htonl(NX_ROLE_OTHER) && nrr->role != htonl(NX_ROLE_MASTER) && nrr->role != htonl(NX_ROLE_SLAVE)) { return OFPERR_OFPRRFC_BAD_ROLE; } rr->role = ntohl(nrr->role) + 1; rr->have_generation_id = false; rr->generation_id = 0; } else { NOT_REACHED(); } return 0; } /* Returns an encoded form of a role reply suitable for the "request" in a * buffer owned by the caller. */ struct ofpbuf * ofputil_encode_role_reply(const struct ofp_header *request, const struct ofputil_role_request *rr) { struct ofpbuf *buf; enum ofpraw raw; raw = ofpraw_decode_assert(request); if (raw == OFPRAW_OFPT12_ROLE_REQUEST) { struct ofp12_role_request *orr; buf = ofpraw_alloc_reply(OFPRAW_OFPT12_ROLE_REPLY, request, 0); orr = ofpbuf_put_zeros(buf, sizeof *orr); orr->role = htonl(rr->role); orr->generation_id = htonll(rr->have_generation_id ? rr->generation_id : UINT64_MAX); } else if (raw == OFPRAW_NXT_ROLE_REQUEST) { struct nx_role_request *nrr; BUILD_ASSERT(NX_ROLE_OTHER == OFPCR12_ROLE_EQUAL - 1); BUILD_ASSERT(NX_ROLE_MASTER == OFPCR12_ROLE_MASTER - 1); BUILD_ASSERT(NX_ROLE_SLAVE == OFPCR12_ROLE_SLAVE - 1); buf = ofpraw_alloc_reply(OFPRAW_NXT_ROLE_REPLY, request, 0); nrr = ofpbuf_put_zeros(buf, sizeof *nrr); nrr->role = htonl(rr->role - 1); } else { NOT_REACHED(); } return buf; } /* Table stats. */ static void ofputil_put_ofp10_table_stats(const struct ofp12_table_stats *in, struct ofpbuf *buf) { struct wc_map { enum ofp10_flow_wildcards wc10; enum oxm12_ofb_match_fields mf12; }; static const struct wc_map wc_map[] = { { OFPFW10_IN_PORT, OFPXMT12_OFB_IN_PORT }, { OFPFW10_DL_VLAN, OFPXMT12_OFB_VLAN_VID }, { OFPFW10_DL_SRC, OFPXMT12_OFB_ETH_SRC }, { OFPFW10_DL_DST, OFPXMT12_OFB_ETH_DST}, { OFPFW10_DL_TYPE, OFPXMT12_OFB_ETH_TYPE }, { OFPFW10_NW_PROTO, OFPXMT12_OFB_IP_PROTO }, { OFPFW10_TP_SRC, OFPXMT12_OFB_TCP_SRC }, { OFPFW10_TP_DST, OFPXMT12_OFB_TCP_DST }, { OFPFW10_NW_SRC_MASK, OFPXMT12_OFB_IPV4_SRC }, { OFPFW10_NW_DST_MASK, OFPXMT12_OFB_IPV4_DST }, { OFPFW10_DL_VLAN_PCP, OFPXMT12_OFB_VLAN_PCP }, { OFPFW10_NW_TOS, OFPXMT12_OFB_IP_DSCP }, }; struct ofp10_table_stats *out; const struct wc_map *p; out = ofpbuf_put_zeros(buf, sizeof *out); out->table_id = in->table_id; ovs_strlcpy(out->name, in->name, sizeof out->name); out->wildcards = 0; for (p = wc_map; p < &wc_map[ARRAY_SIZE(wc_map)]; p++) { if (in->wildcards & htonll(1ULL << p->mf12)) { out->wildcards |= htonl(p->wc10); } } out->max_entries = in->max_entries; out->active_count = in->active_count; put_32aligned_be64(&out->lookup_count, in->lookup_count); put_32aligned_be64(&out->matched_count, in->matched_count); } static ovs_be32 oxm12_to_ofp11_flow_match_fields(ovs_be64 oxm12) { struct map { enum ofp11_flow_match_fields fmf11; enum oxm12_ofb_match_fields mf12; }; static const struct map map[] = { { OFPFMF11_IN_PORT, OFPXMT12_OFB_IN_PORT }, { OFPFMF11_DL_VLAN, OFPXMT12_OFB_VLAN_VID }, { OFPFMF11_DL_VLAN_PCP, OFPXMT12_OFB_VLAN_PCP }, { OFPFMF11_DL_TYPE, OFPXMT12_OFB_ETH_TYPE }, { OFPFMF11_NW_TOS, OFPXMT12_OFB_IP_DSCP }, { OFPFMF11_NW_PROTO, OFPXMT12_OFB_IP_PROTO }, { OFPFMF11_TP_SRC, OFPXMT12_OFB_TCP_SRC }, { OFPFMF11_TP_DST, OFPXMT12_OFB_TCP_DST }, { OFPFMF11_MPLS_LABEL, OFPXMT12_OFB_MPLS_LABEL }, { OFPFMF11_MPLS_TC, OFPXMT12_OFB_MPLS_TC }, /* I don't know what OFPFMF11_TYPE means. */ { OFPFMF11_DL_SRC, OFPXMT12_OFB_ETH_SRC }, { OFPFMF11_DL_DST, OFPXMT12_OFB_ETH_DST }, { OFPFMF11_NW_SRC, OFPXMT12_OFB_IPV4_SRC }, { OFPFMF11_NW_DST, OFPXMT12_OFB_IPV4_DST }, { OFPFMF11_METADATA, OFPXMT12_OFB_METADATA }, }; const struct map *p; uint32_t fmf11; fmf11 = 0; for (p = map; p < &map[ARRAY_SIZE(map)]; p++) { if (oxm12 & htonll(1ULL << p->mf12)) { fmf11 |= p->fmf11; } } return htonl(fmf11); } static void ofputil_put_ofp11_table_stats(const struct ofp12_table_stats *in, struct ofpbuf *buf) { struct ofp11_table_stats *out; out = ofpbuf_put_zeros(buf, sizeof *out); out->table_id = in->table_id; ovs_strlcpy(out->name, in->name, sizeof out->name); out->wildcards = oxm12_to_ofp11_flow_match_fields(in->wildcards); out->match = oxm12_to_ofp11_flow_match_fields(in->match); out->instructions = in->instructions; out->write_actions = in->write_actions; out->apply_actions = in->apply_actions; out->config = in->config; out->max_entries = in->max_entries; out->active_count = in->active_count; out->lookup_count = in->lookup_count; out->matched_count = in->matched_count; } static void ofputil_put_ofp13_table_stats(const struct ofp12_table_stats *in, struct ofpbuf *buf) { struct ofp13_table_stats *out; /* OF 1.3 splits table features off the ofp_table_stats, * so there is not much here. */ out = ofpbuf_put_uninit(buf, sizeof *out); out->table_id = in->table_id; out->active_count = in->active_count; out->lookup_count = in->lookup_count; out->matched_count = in->matched_count; } struct ofpbuf * ofputil_encode_table_stats_reply(const struct ofp12_table_stats stats[], int n, const struct ofp_header *request) { struct ofpbuf *reply; int i; reply = ofpraw_alloc_stats_reply(request, n * sizeof *stats); switch ((enum ofp_version) request->version) { case OFP10_VERSION: for (i = 0; i < n; i++) { ofputil_put_ofp10_table_stats(&stats[i], reply); } break; case OFP11_VERSION: for (i = 0; i < n; i++) { ofputil_put_ofp11_table_stats(&stats[i], reply); } break; case OFP12_VERSION: ofpbuf_put(reply, stats, n * sizeof *stats); break; case OFP13_VERSION: for (i = 0; i < n; i++) { ofputil_put_ofp13_table_stats(&stats[i], reply); } break; default: NOT_REACHED(); } return reply; } /* ofputil_flow_monitor_request */ /* Converts an NXST_FLOW_MONITOR request in 'msg' into an abstract * ofputil_flow_monitor_request in 'rq'. * * Multiple NXST_FLOW_MONITOR requests can be packed into a single OpenFlow * message. Calling this function multiple times for a single 'msg' iterates * through the requests. The caller must initially leave 'msg''s layer * pointers null and not modify them between calls. * * Returns 0 if successful, EOF if no requests were left in this 'msg', * otherwise an OFPERR_* value. */ int ofputil_decode_flow_monitor_request(struct ofputil_flow_monitor_request *rq, struct ofpbuf *msg) { struct nx_flow_monitor_request *nfmr; uint16_t flags; if (!msg->l2) { msg->l2 = msg->data; ofpraw_pull_assert(msg); } if (!msg->size) { return EOF; } nfmr = ofpbuf_try_pull(msg, sizeof *nfmr); if (!nfmr) { VLOG_WARN_RL(&bad_ofmsg_rl, "NXST_FLOW_MONITOR request has %zu " "leftover bytes at end", msg->size); return OFPERR_OFPBRC_BAD_LEN; } flags = ntohs(nfmr->flags); if (!(flags & (NXFMF_ADD | NXFMF_DELETE | NXFMF_MODIFY)) || flags & ~(NXFMF_INITIAL | NXFMF_ADD | NXFMF_DELETE | NXFMF_MODIFY | NXFMF_ACTIONS | NXFMF_OWN)) { VLOG_WARN_RL(&bad_ofmsg_rl, "NXST_FLOW_MONITOR has bad flags %#"PRIx16, flags); return OFPERR_NXBRC_FM_BAD_FLAGS; } if (!is_all_zeros(nfmr->zeros, sizeof nfmr->zeros)) { return OFPERR_NXBRC_MUST_BE_ZERO; } rq->id = ntohl(nfmr->id); rq->flags = flags; rq->out_port = u16_to_ofp(ntohs(nfmr->out_port)); rq->table_id = nfmr->table_id; return nx_pull_match(msg, ntohs(nfmr->match_len), &rq->match, NULL, NULL); } void ofputil_append_flow_monitor_request( const struct ofputil_flow_monitor_request *rq, struct ofpbuf *msg) { struct nx_flow_monitor_request *nfmr; size_t start_ofs; int match_len; if (!msg->size) { ofpraw_put(OFPRAW_NXST_FLOW_MONITOR_REQUEST, OFP10_VERSION, msg); } start_ofs = msg->size; ofpbuf_put_zeros(msg, sizeof *nfmr); match_len = nx_put_match(msg, &rq->match, htonll(0), htonll(0)); nfmr = ofpbuf_at_assert(msg, start_ofs, sizeof *nfmr); nfmr->id = htonl(rq->id); nfmr->flags = htons(rq->flags); nfmr->out_port = htons(ofp_to_u16(rq->out_port)); nfmr->match_len = htons(match_len); nfmr->table_id = rq->table_id; } /* Converts an NXST_FLOW_MONITOR reply (also known as a flow update) in 'msg' * into an abstract ofputil_flow_update in 'update'. The caller must have * initialized update->match to point to space allocated for a match. * * Uses 'ofpacts' to store the abstract OFPACT_* version of the update's * actions (except for NXFME_ABBREV, which never includes actions). The caller * must initialize 'ofpacts' and retains ownership of it. 'update->ofpacts' * will point into the 'ofpacts' buffer. * * Multiple flow updates can be packed into a single OpenFlow message. Calling * this function multiple times for a single 'msg' iterates through the * updates. The caller must initially leave 'msg''s layer pointers null and * not modify them between calls. * * Returns 0 if successful, EOF if no updates were left in this 'msg', * otherwise an OFPERR_* value. */ int ofputil_decode_flow_update(struct ofputil_flow_update *update, struct ofpbuf *msg, struct ofpbuf *ofpacts) { struct nx_flow_update_header *nfuh; unsigned int length; if (!msg->l2) { msg->l2 = msg->data; ofpraw_pull_assert(msg); } if (!msg->size) { return EOF; } if (msg->size < sizeof(struct nx_flow_update_header)) { goto bad_len; } nfuh = msg->data; update->event = ntohs(nfuh->event); length = ntohs(nfuh->length); if (length > msg->size || length % 8) { goto bad_len; } if (update->event == NXFME_ABBREV) { struct nx_flow_update_abbrev *nfua; if (length != sizeof *nfua) { goto bad_len; } nfua = ofpbuf_pull(msg, sizeof *nfua); update->xid = nfua->xid; return 0; } else if (update->event == NXFME_ADDED || update->event == NXFME_DELETED || update->event == NXFME_MODIFIED) { struct nx_flow_update_full *nfuf; unsigned int actions_len; unsigned int match_len; enum ofperr error; if (length < sizeof *nfuf) { goto bad_len; } nfuf = ofpbuf_pull(msg, sizeof *nfuf); match_len = ntohs(nfuf->match_len); if (sizeof *nfuf + match_len > length) { goto bad_len; } update->reason = ntohs(nfuf->reason); update->idle_timeout = ntohs(nfuf->idle_timeout); update->hard_timeout = ntohs(nfuf->hard_timeout); update->table_id = nfuf->table_id; update->cookie = nfuf->cookie; update->priority = ntohs(nfuf->priority); error = nx_pull_match(msg, match_len, update->match, NULL, NULL); if (error) { return error; } actions_len = length - sizeof *nfuf - ROUND_UP(match_len, 8); error = ofpacts_pull_openflow10(msg, actions_len, ofpacts); if (error) { return error; } update->ofpacts = ofpacts->data; update->ofpacts_len = ofpacts->size; return 0; } else { VLOG_WARN_RL(&bad_ofmsg_rl, "NXST_FLOW_MONITOR reply has bad event %"PRIu16, ntohs(nfuh->event)); return OFPERR_NXBRC_FM_BAD_EVENT; } bad_len: VLOG_WARN_RL(&bad_ofmsg_rl, "NXST_FLOW_MONITOR reply has %zu " "leftover bytes at end", msg->size); return OFPERR_OFPBRC_BAD_LEN; } uint32_t ofputil_decode_flow_monitor_cancel(const struct ofp_header *oh) { const struct nx_flow_monitor_cancel *cancel = ofpmsg_body(oh); return ntohl(cancel->id); } struct ofpbuf * ofputil_encode_flow_monitor_cancel(uint32_t id) { struct nx_flow_monitor_cancel *nfmc; struct ofpbuf *msg; msg = ofpraw_alloc(OFPRAW_NXT_FLOW_MONITOR_CANCEL, OFP10_VERSION, 0); nfmc = ofpbuf_put_uninit(msg, sizeof *nfmc); nfmc->id = htonl(id); return msg; } void ofputil_start_flow_update(struct list *replies) { struct ofpbuf *msg; msg = ofpraw_alloc_xid(OFPRAW_NXST_FLOW_MONITOR_REPLY, OFP10_VERSION, htonl(0), 1024); list_init(replies); list_push_back(replies, &msg->list_node); } void ofputil_append_flow_update(const struct ofputil_flow_update *update, struct list *replies) { struct nx_flow_update_header *nfuh; struct ofpbuf *msg; size_t start_ofs; msg = ofpbuf_from_list(list_back(replies)); start_ofs = msg->size; if (update->event == NXFME_ABBREV) { struct nx_flow_update_abbrev *nfua; nfua = ofpbuf_put_zeros(msg, sizeof *nfua); nfua->xid = update->xid; } else { struct nx_flow_update_full *nfuf; int match_len; ofpbuf_put_zeros(msg, sizeof *nfuf); match_len = nx_put_match(msg, update->match, htonll(0), htonll(0)); ofpacts_put_openflow10(update->ofpacts, update->ofpacts_len, msg); nfuf = ofpbuf_at_assert(msg, start_ofs, sizeof *nfuf); nfuf->reason = htons(update->reason); nfuf->priority = htons(update->priority); nfuf->idle_timeout = htons(update->idle_timeout); nfuf->hard_timeout = htons(update->hard_timeout); nfuf->match_len = htons(match_len); nfuf->table_id = update->table_id; nfuf->cookie = update->cookie; } nfuh = ofpbuf_at_assert(msg, start_ofs, sizeof *nfuh); nfuh->length = htons(msg->size - start_ofs); nfuh->event = htons(update->event); ofpmp_postappend(replies, start_ofs); } struct ofpbuf * ofputil_encode_packet_out(const struct ofputil_packet_out *po, enum ofputil_protocol protocol) { enum ofp_version ofp_version = ofputil_protocol_to_ofp_version(protocol); struct ofpbuf *msg; size_t size; size = po->ofpacts_len; if (po->buffer_id == UINT32_MAX) { size += po->packet_len; } switch (ofp_version) { case OFP10_VERSION: { struct ofp10_packet_out *opo; size_t actions_ofs; msg = ofpraw_alloc(OFPRAW_OFPT10_PACKET_OUT, OFP10_VERSION, size); ofpbuf_put_zeros(msg, sizeof *opo); actions_ofs = msg->size; ofpacts_put_openflow10(po->ofpacts, po->ofpacts_len, msg); opo = msg->l3; opo->buffer_id = htonl(po->buffer_id); opo->in_port = htons(ofp_to_u16(po->in_port)); opo->actions_len = htons(msg->size - actions_ofs); break; } case OFP11_VERSION: case OFP12_VERSION: case OFP13_VERSION: { struct ofp11_packet_out *opo; size_t len; msg = ofpraw_alloc(OFPRAW_OFPT11_PACKET_OUT, ofp_version, size); ofpbuf_put_zeros(msg, sizeof *opo); len = ofpacts_put_openflow11_actions(po->ofpacts, po->ofpacts_len, msg); opo = msg->l3; opo->buffer_id = htonl(po->buffer_id); opo->in_port = ofputil_port_to_ofp11(po->in_port); opo->actions_len = htons(len); break; } default: NOT_REACHED(); } if (po->buffer_id == UINT32_MAX) { ofpbuf_put(msg, po->packet, po->packet_len); } ofpmsg_update_length(msg); return msg; } /* Creates and returns an OFPT_ECHO_REQUEST message with an empty payload. */ struct ofpbuf * make_echo_request(enum ofp_version ofp_version) { return ofpraw_alloc_xid(OFPRAW_OFPT_ECHO_REQUEST, ofp_version, htonl(0), 0); } /* Creates and returns an OFPT_ECHO_REPLY message matching the * OFPT_ECHO_REQUEST message in 'rq'. */ struct ofpbuf * make_echo_reply(const struct ofp_header *rq) { struct ofpbuf rq_buf; struct ofpbuf *reply; ofpbuf_use_const(&rq_buf, rq, ntohs(rq->length)); ofpraw_pull_assert(&rq_buf); reply = ofpraw_alloc_reply(OFPRAW_OFPT_ECHO_REPLY, rq, rq_buf.size); ofpbuf_put(reply, rq_buf.data, rq_buf.size); return reply; } struct ofpbuf * ofputil_encode_barrier_request(enum ofp_version ofp_version) { enum ofpraw type; switch (ofp_version) { case OFP13_VERSION: case OFP12_VERSION: case OFP11_VERSION: type = OFPRAW_OFPT11_BARRIER_REQUEST; break; case OFP10_VERSION: type = OFPRAW_OFPT10_BARRIER_REQUEST; break; default: NOT_REACHED(); } return ofpraw_alloc(type, ofp_version, 0); } const char * ofputil_frag_handling_to_string(enum ofp_config_flags flags) { switch (flags & OFPC_FRAG_MASK) { case OFPC_FRAG_NORMAL: return "normal"; case OFPC_FRAG_DROP: return "drop"; case OFPC_FRAG_REASM: return "reassemble"; case OFPC_FRAG_NX_MATCH: return "nx-match"; } NOT_REACHED(); } bool ofputil_frag_handling_from_string(const char *s, enum ofp_config_flags *flags) { if (!strcasecmp(s, "normal")) { *flags = OFPC_FRAG_NORMAL; } else if (!strcasecmp(s, "drop")) { *flags = OFPC_FRAG_DROP; } else if (!strcasecmp(s, "reassemble")) { *flags = OFPC_FRAG_REASM; } else if (!strcasecmp(s, "nx-match")) { *flags = OFPC_FRAG_NX_MATCH; } else { return false; } return true; } /* Converts the OpenFlow 1.1+ port number 'ofp11_port' into an OpenFlow 1.0 * port number and stores the latter in '*ofp10_port', for the purpose of * decoding OpenFlow 1.1+ protocol messages. Returns 0 if successful, * otherwise an OFPERR_* number. On error, stores OFPP_NONE in '*ofp10_port'. * * See the definition of OFP11_MAX for an explanation of the mapping. */ enum ofperr ofputil_port_from_ofp11(ovs_be32 ofp11_port, ofp_port_t *ofp10_port) { uint32_t ofp11_port_h = ntohl(ofp11_port); if (ofp11_port_h < ofp_to_u16(OFPP_MAX)) { *ofp10_port = u16_to_ofp(ofp11_port_h); return 0; } else if (ofp11_port_h >= ofp11_to_u32(OFPP11_MAX)) { *ofp10_port = u16_to_ofp(ofp11_port_h - OFPP11_OFFSET); return 0; } else { *ofp10_port = OFPP_NONE; VLOG_WARN_RL(&bad_ofmsg_rl, "port %"PRIu32" is outside the supported " "range 0 through %d or 0x%"PRIx32" through 0x%"PRIx32, ofp11_port_h, ofp_to_u16(OFPP_MAX) - 1, ofp11_to_u32(OFPP11_MAX), UINT32_MAX); return OFPERR_OFPBAC_BAD_OUT_PORT; } } /* Returns the OpenFlow 1.1+ port number equivalent to the OpenFlow 1.0 port * number 'ofp10_port', for encoding OpenFlow 1.1+ protocol messages. * * See the definition of OFP11_MAX for an explanation of the mapping. */ ovs_be32 ofputil_port_to_ofp11(ofp_port_t ofp10_port) { return htonl(ofp_to_u16(ofp10_port) < ofp_to_u16(OFPP_MAX) ? ofp_to_u16(ofp10_port) : ofp_to_u16(ofp10_port) + OFPP11_OFFSET); } /* Checks that 'port' is a valid output port for the OFPAT10_OUTPUT action, given * that the switch will never have more than 'max_ports' ports. Returns 0 if * 'port' is valid, otherwise an OpenFlow return code. */ enum ofperr ofputil_check_output_port(ofp_port_t port, ofp_port_t max_ports) { switch (port) { case OFPP_IN_PORT: case OFPP_TABLE: case OFPP_NORMAL: case OFPP_FLOOD: case OFPP_ALL: case OFPP_CONTROLLER: case OFPP_NONE: case OFPP_LOCAL: return 0; default: if (ofp_to_u16(port) < ofp_to_u16(max_ports)) { return 0; } return OFPERR_OFPBAC_BAD_OUT_PORT; } } #define OFPUTIL_NAMED_PORTS \ OFPUTIL_NAMED_PORT(IN_PORT) \ OFPUTIL_NAMED_PORT(TABLE) \ OFPUTIL_NAMED_PORT(NORMAL) \ OFPUTIL_NAMED_PORT(FLOOD) \ OFPUTIL_NAMED_PORT(ALL) \ OFPUTIL_NAMED_PORT(CONTROLLER) \ OFPUTIL_NAMED_PORT(LOCAL) \ OFPUTIL_NAMED_PORT(ANY) /* For backwards compatibility, so that "none" is recognized as OFPP_ANY */ #define OFPUTIL_NAMED_PORTS_WITH_NONE \ OFPUTIL_NAMED_PORTS \ OFPUTIL_NAMED_PORT(NONE) /* Stores the port number represented by 's' into '*portp'. 's' may be an * integer or, for reserved ports, the standard OpenFlow name for the port * (e.g. "LOCAL"). * * Returns true if successful, false if 's' is not a valid OpenFlow port number * or name. The caller should issue an error message in this case, because * this function usually does not. (This gives the caller an opportunity to * look up the port name another way, e.g. by contacting the switch and listing * the names of all its ports). * * This function accepts OpenFlow 1.0 port numbers. It also accepts a subset * of OpenFlow 1.1+ port numbers, mapping those port numbers into the 16-bit * range as described in include/openflow/openflow-1.1.h. */ bool ofputil_port_from_string(const char *s, ofp_port_t *portp) { uint32_t port32; *portp = 0; if (str_to_uint(s, 10, &port32)) { if (port32 < ofp_to_u16(OFPP_MAX)) { /* Pass. */ } else if (port32 < ofp_to_u16(OFPP_FIRST_RESV)) { VLOG_WARN("port %u is a reserved OF1.0 port number that will " "be translated to %u when talking to an OF1.1 or " "later controller", port32, port32 + OFPP11_OFFSET); } else if (port32 <= ofp_to_u16(OFPP_LAST_RESV)) { char name[OFP_MAX_PORT_NAME_LEN]; ofputil_port_to_string(u16_to_ofp(port32), name, sizeof name); VLOG_WARN_ONCE("referring to port %s as %"PRIu32" is deprecated " "for compatibility with OpenFlow 1.1 and later", name, port32); } else if (port32 < ofp11_to_u32(OFPP11_MAX)) { VLOG_WARN("port %u is outside the supported range 0 through " "%"PRIx16" or 0x%x through 0x%"PRIx32, port32, UINT16_MAX, ofp11_to_u32(OFPP11_MAX), UINT32_MAX); return false; } else { port32 -= OFPP11_OFFSET; } *portp = u16_to_ofp(port32); return true; } else { struct pair { const char *name; ofp_port_t value; }; static const struct pair pairs[] = { #define OFPUTIL_NAMED_PORT(NAME) {#NAME, OFPP_##NAME}, OFPUTIL_NAMED_PORTS_WITH_NONE #undef OFPUTIL_NAMED_PORT }; const struct pair *p; for (p = pairs; p < &pairs[ARRAY_SIZE(pairs)]; p++) { if (!strcasecmp(s, p->name)) { *portp = p->value; return true; } } return false; } } /* Appends to 's' a string representation of the OpenFlow port number 'port'. * Most ports' string representation is just the port number, but for special * ports, e.g. OFPP_LOCAL, it is the name, e.g. "LOCAL". */ void ofputil_format_port(ofp_port_t port, struct ds *s) { char name[OFP_MAX_PORT_NAME_LEN]; ofputil_port_to_string(port, name, sizeof name); ds_put_cstr(s, name); } /* Puts in the 'bufsize' byte in 'namebuf' a null-terminated string * representation of OpenFlow port number 'port'. Most ports are represented * as just the port number, but special ports, e.g. OFPP_LOCAL, are represented * by name, e.g. "LOCAL". */ void ofputil_port_to_string(ofp_port_t port, char namebuf[OFP_MAX_PORT_NAME_LEN], size_t bufsize) { switch (port) { #define OFPUTIL_NAMED_PORT(NAME) \ case OFPP_##NAME: \ ovs_strlcpy(namebuf, #NAME, bufsize); \ break; OFPUTIL_NAMED_PORTS #undef OFPUTIL_NAMED_PORT default: snprintf(namebuf, bufsize, "%"PRIu16, port); break; } } /* Given a buffer 'b' that contains an array of OpenFlow ports of type * 'ofp_version', tries to pull the first element from the array. If * successful, initializes '*pp' with an abstract representation of the * port and returns 0. If no ports remain to be decoded, returns EOF. * On an error, returns a positive OFPERR_* value. */ int ofputil_pull_phy_port(enum ofp_version ofp_version, struct ofpbuf *b, struct ofputil_phy_port *pp) { switch (ofp_version) { case OFP10_VERSION: { const struct ofp10_phy_port *opp = ofpbuf_try_pull(b, sizeof *opp); return opp ? ofputil_decode_ofp10_phy_port(pp, opp) : EOF; } case OFP11_VERSION: case OFP12_VERSION: case OFP13_VERSION: { const struct ofp11_port *op = ofpbuf_try_pull(b, sizeof *op); return op ? ofputil_decode_ofp11_port(pp, op) : EOF; } default: NOT_REACHED(); } } /* Given a buffer 'b' that contains an array of OpenFlow ports of type * 'ofp_version', returns the number of elements. */ size_t ofputil_count_phy_ports(uint8_t ofp_version, struct ofpbuf *b) { return b->size / ofputil_get_phy_port_size(ofp_version); } /* Returns the 'enum ofputil_action_code' corresponding to 'name' (e.g. if * 'name' is "output" then the return value is OFPUTIL_OFPAT10_OUTPUT), or -1 if * 'name' is not the name of any action. * * ofp-util.def lists the mapping from names to action. */ int ofputil_action_code_from_name(const char *name) { static const char *const names[OFPUTIL_N_ACTIONS] = { NULL, #define OFPAT10_ACTION(ENUM, STRUCT, NAME) NAME, #define OFPAT11_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) NAME, #define NXAST_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) NAME, #include "ofp-util.def" }; const char *const *p; for (p = names; p < &names[ARRAY_SIZE(names)]; p++) { if (*p && !strcasecmp(name, *p)) { return p - names; } } return -1; } /* Appends an action of the type specified by 'code' to 'buf' and returns the * action. Initializes the parts of 'action' that identify it as having type * and length 'sizeof *action' and zeros the rest. For actions that * have variable length, the length used and cleared is that of struct * . */ void * ofputil_put_action(enum ofputil_action_code code, struct ofpbuf *buf) { switch (code) { case OFPUTIL_ACTION_INVALID: NOT_REACHED(); #define OFPAT10_ACTION(ENUM, STRUCT, NAME) \ case OFPUTIL_##ENUM: return ofputil_put_##ENUM(buf); #define OFPAT11_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) \ case OFPUTIL_##ENUM: return ofputil_put_##ENUM(buf); #define NXAST_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) \ case OFPUTIL_##ENUM: return ofputil_put_##ENUM(buf); #include "ofp-util.def" } NOT_REACHED(); } #define OFPAT10_ACTION(ENUM, STRUCT, NAME) \ void \ ofputil_init_##ENUM(struct STRUCT *s) \ { \ memset(s, 0, sizeof *s); \ s->type = htons(ENUM); \ s->len = htons(sizeof *s); \ } \ \ struct STRUCT * \ ofputil_put_##ENUM(struct ofpbuf *buf) \ { \ struct STRUCT *s = ofpbuf_put_uninit(buf, sizeof *s); \ ofputil_init_##ENUM(s); \ return s; \ } #define OFPAT11_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) \ OFPAT10_ACTION(ENUM, STRUCT, NAME) #define NXAST_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) \ void \ ofputil_init_##ENUM(struct STRUCT *s) \ { \ memset(s, 0, sizeof *s); \ s->type = htons(OFPAT10_VENDOR); \ s->len = htons(sizeof *s); \ s->vendor = htonl(NX_VENDOR_ID); \ s->subtype = htons(ENUM); \ } \ \ struct STRUCT * \ ofputil_put_##ENUM(struct ofpbuf *buf) \ { \ struct STRUCT *s = ofpbuf_put_uninit(buf, sizeof *s); \ ofputil_init_##ENUM(s); \ return s; \ } #include "ofp-util.def" static void ofputil_normalize_match__(struct match *match, bool may_log) { enum { MAY_NW_ADDR = 1 << 0, /* nw_src, nw_dst */ MAY_TP_ADDR = 1 << 1, /* tp_src, tp_dst */ MAY_NW_PROTO = 1 << 2, /* nw_proto */ MAY_IPVx = 1 << 3, /* tos, frag, ttl */ MAY_ARP_SHA = 1 << 4, /* arp_sha */ MAY_ARP_THA = 1 << 5, /* arp_tha */ MAY_IPV6 = 1 << 6, /* ipv6_src, ipv6_dst, ipv6_label */ MAY_ND_TARGET = 1 << 7, /* nd_target */ MAY_MPLS = 1 << 8, /* mpls label and tc */ } may_match; struct flow_wildcards wc; /* Figure out what fields may be matched. */ if (match->flow.dl_type == htons(ETH_TYPE_IP)) { may_match = MAY_NW_PROTO | MAY_IPVx | MAY_NW_ADDR; if (match->flow.nw_proto == IPPROTO_TCP || match->flow.nw_proto == IPPROTO_UDP || match->flow.nw_proto == IPPROTO_SCTP || match->flow.nw_proto == IPPROTO_ICMP) { may_match |= MAY_TP_ADDR; } } else if (match->flow.dl_type == htons(ETH_TYPE_IPV6)) { may_match = MAY_NW_PROTO | MAY_IPVx | MAY_IPV6; if (match->flow.nw_proto == IPPROTO_TCP || match->flow.nw_proto == IPPROTO_UDP || match->flow.nw_proto == IPPROTO_SCTP) { may_match |= MAY_TP_ADDR; } else if (match->flow.nw_proto == IPPROTO_ICMPV6) { may_match |= MAY_TP_ADDR; if (match->flow.tp_src == htons(ND_NEIGHBOR_SOLICIT)) { may_match |= MAY_ND_TARGET | MAY_ARP_SHA; } else if (match->flow.tp_src == htons(ND_NEIGHBOR_ADVERT)) { may_match |= MAY_ND_TARGET | MAY_ARP_THA; } } } else if (match->flow.dl_type == htons(ETH_TYPE_ARP) || match->flow.dl_type == htons(ETH_TYPE_RARP)) { may_match = MAY_NW_PROTO | MAY_NW_ADDR | MAY_ARP_SHA | MAY_ARP_THA; } else if (eth_type_mpls(match->flow.dl_type)) { may_match = MAY_MPLS; } else { may_match = 0; } /* Clear the fields that may not be matched. */ wc = match->wc; if (!(may_match & MAY_NW_ADDR)) { wc.masks.nw_src = wc.masks.nw_dst = htonl(0); } if (!(may_match & MAY_TP_ADDR)) { wc.masks.tp_src = wc.masks.tp_dst = htons(0); } if (!(may_match & MAY_NW_PROTO)) { wc.masks.nw_proto = 0; } if (!(may_match & MAY_IPVx)) { wc.masks.nw_tos = 0; wc.masks.nw_ttl = 0; } if (!(may_match & MAY_ARP_SHA)) { memset(wc.masks.arp_sha, 0, ETH_ADDR_LEN); } if (!(may_match & MAY_ARP_THA)) { memset(wc.masks.arp_tha, 0, ETH_ADDR_LEN); } if (!(may_match & MAY_IPV6)) { wc.masks.ipv6_src = wc.masks.ipv6_dst = in6addr_any; wc.masks.ipv6_label = htonl(0); } if (!(may_match & MAY_ND_TARGET)) { wc.masks.nd_target = in6addr_any; } if (!(may_match & MAY_MPLS)) { wc.masks.mpls_lse = htonl(0); wc.masks.mpls_depth = 0; } /* Log any changes. */ if (!flow_wildcards_equal(&wc, &match->wc)) { bool log = may_log && !VLOG_DROP_INFO(&bad_ofmsg_rl); char *pre = log ? match_to_string(match, OFP_DEFAULT_PRIORITY) : NULL; match->wc = wc; match_zero_wildcarded_fields(match); if (log) { char *post = match_to_string(match, OFP_DEFAULT_PRIORITY); VLOG_INFO("normalization changed ofp_match, details:"); VLOG_INFO(" pre: %s", pre); VLOG_INFO("post: %s", post); free(pre); free(post); } } } /* "Normalizes" the wildcards in 'match'. That means: * * 1. If the type of level N is known, then only the valid fields for that * level may be specified. For example, ARP does not have a TOS field, * so nw_tos must be wildcarded if 'match' specifies an ARP flow. * Similarly, IPv4 does not have any IPv6 addresses, so ipv6_src and * ipv6_dst (and other fields) must be wildcarded if 'match' specifies an * IPv4 flow. * * 2. If the type of level N is not known (or not understood by Open * vSwitch), then no fields at all for that level may be specified. For * example, Open vSwitch does not understand SCTP, an L4 protocol, so the * L4 fields tp_src and tp_dst must be wildcarded if 'match' specifies an * SCTP flow. * * If this function changes 'match', it logs a rate-limited informational * message. */ void ofputil_normalize_match(struct match *match) { ofputil_normalize_match__(match, true); } /* Same as ofputil_normalize_match() without the logging. Thus, this function * is suitable for a program's internal use, whereas ofputil_normalize_match() * sense for use on flows received from elsewhere (so that a bug in the program * that sent them can be reported and corrected). */ void ofputil_normalize_match_quiet(struct match *match) { ofputil_normalize_match__(match, false); } /* Parses a key or a key-value pair from '*stringp'. * * On success: Stores the key into '*keyp'. Stores the value, if present, into * '*valuep', otherwise an empty string. Advances '*stringp' past the end of * the key-value pair, preparing it for another call. '*keyp' and '*valuep' * are substrings of '*stringp' created by replacing some of its bytes by null * terminators. Returns true. * * If '*stringp' is just white space or commas, sets '*keyp' and '*valuep' to * NULL and returns false. */ bool ofputil_parse_key_value(char **stringp, char **keyp, char **valuep) { char *pos, *key, *value; size_t key_len; pos = *stringp; pos += strspn(pos, ", \t\r\n"); if (*pos == '\0') { *keyp = *valuep = NULL; return false; } key = pos; key_len = strcspn(pos, ":=(, \t\r\n"); if (key[key_len] == ':' || key[key_len] == '=') { /* The value can be separated by a colon. */ size_t value_len; value = key + key_len + 1; value_len = strcspn(value, ", \t\r\n"); pos = value + value_len + (value[value_len] != '\0'); value[value_len] = '\0'; } else if (key[key_len] == '(') { /* The value can be surrounded by balanced parentheses. The outermost * set of parentheses is removed. */ int level = 1; size_t value_len; value = key + key_len + 1; for (value_len = 0; level > 0; value_len++) { switch (value[value_len]) { case '\0': level = 0; break; case '(': level++; break; case ')': level--; break; } } value[value_len - 1] = '\0'; pos = value + value_len; } else { /* There might be no value at all. */ value = key + key_len; /* Will become the empty string below. */ pos = key + key_len + (key[key_len] != '\0'); } key[key_len] = '\0'; *stringp = pos; *keyp = key; *valuep = value; return true; } /* Encode a dump ports request for 'port', the encoded message * will be for Open Flow version 'ofp_version'. Returns message * as a struct ofpbuf. Returns encoded message on success, NULL on error */ struct ofpbuf * ofputil_encode_dump_ports_request(enum ofp_version ofp_version, ofp_port_t port) { struct ofpbuf *request; switch (ofp_version) { case OFP10_VERSION: { struct ofp10_port_stats_request *req; request = ofpraw_alloc(OFPRAW_OFPST10_PORT_REQUEST, ofp_version, 0); req = ofpbuf_put_zeros(request, sizeof *req); req->port_no = htons(ofp_to_u16(port)); break; } case OFP11_VERSION: case OFP12_VERSION: case OFP13_VERSION: { struct ofp11_port_stats_request *req; request = ofpraw_alloc(OFPRAW_OFPST11_PORT_REQUEST, ofp_version, 0); req = ofpbuf_put_zeros(request, sizeof *req); req->port_no = ofputil_port_to_ofp11(port); break; } default: NOT_REACHED(); } return request; } static void ofputil_port_stats_to_ofp10(const struct ofputil_port_stats *ops, struct ofp10_port_stats *ps10) { ps10->port_no = htons(ofp_to_u16(ops->port_no)); memset(ps10->pad, 0, sizeof ps10->pad); put_32aligned_be64(&ps10->rx_packets, htonll(ops->stats.rx_packets)); put_32aligned_be64(&ps10->tx_packets, htonll(ops->stats.tx_packets)); put_32aligned_be64(&ps10->rx_bytes, htonll(ops->stats.rx_bytes)); put_32aligned_be64(&ps10->tx_bytes, htonll(ops->stats.tx_bytes)); put_32aligned_be64(&ps10->rx_dropped, htonll(ops->stats.rx_dropped)); put_32aligned_be64(&ps10->tx_dropped, htonll(ops->stats.tx_dropped)); put_32aligned_be64(&ps10->rx_errors, htonll(ops->stats.rx_errors)); put_32aligned_be64(&ps10->tx_errors, htonll(ops->stats.tx_errors)); put_32aligned_be64(&ps10->rx_frame_err, htonll(ops->stats.rx_frame_errors)); put_32aligned_be64(&ps10->rx_over_err, htonll(ops->stats.rx_over_errors)); put_32aligned_be64(&ps10->rx_crc_err, htonll(ops->stats.rx_crc_errors)); put_32aligned_be64(&ps10->collisions, htonll(ops->stats.collisions)); } static void ofputil_port_stats_to_ofp11(const struct ofputil_port_stats *ops, struct ofp11_port_stats *ps11) { ps11->port_no = ofputil_port_to_ofp11(ops->port_no); memset(ps11->pad, 0, sizeof ps11->pad); ps11->rx_packets = htonll(ops->stats.rx_packets); ps11->tx_packets = htonll(ops->stats.tx_packets); ps11->rx_bytes = htonll(ops->stats.rx_bytes); ps11->tx_bytes = htonll(ops->stats.tx_bytes); ps11->rx_dropped = htonll(ops->stats.rx_dropped); ps11->tx_dropped = htonll(ops->stats.tx_dropped); ps11->rx_errors = htonll(ops->stats.rx_errors); ps11->tx_errors = htonll(ops->stats.tx_errors); ps11->rx_frame_err = htonll(ops->stats.rx_frame_errors); ps11->rx_over_err = htonll(ops->stats.rx_over_errors); ps11->rx_crc_err = htonll(ops->stats.rx_crc_errors); ps11->collisions = htonll(ops->stats.collisions); } static void ofputil_port_stats_to_ofp13(const struct ofputil_port_stats *ops, struct ofp13_port_stats *ps13) { ofputil_port_stats_to_ofp11(ops, &ps13->ps); ps13->duration_sec = htonl(ops->duration_sec); ps13->duration_nsec = htonl(ops->duration_nsec); } /* Encode a ports stat for 'ops' and append it to 'replies'. */ void ofputil_append_port_stat(struct list *replies, const struct ofputil_port_stats *ops) { struct ofpbuf *msg = ofpbuf_from_list(list_back(replies)); struct ofp_header *oh = msg->data; switch ((enum ofp_version)oh->version) { case OFP13_VERSION: { struct ofp13_port_stats *reply = ofpmp_append(replies, sizeof *reply); ofputil_port_stats_to_ofp13(ops, reply); break; } case OFP12_VERSION: case OFP11_VERSION: { struct ofp11_port_stats *reply = ofpmp_append(replies, sizeof *reply); ofputil_port_stats_to_ofp11(ops, reply); break; } case OFP10_VERSION: { struct ofp10_port_stats *reply = ofpmp_append(replies, sizeof *reply); ofputil_port_stats_to_ofp10(ops, reply); break; } default: NOT_REACHED(); } } static enum ofperr ofputil_port_stats_from_ofp10(struct ofputil_port_stats *ops, const struct ofp10_port_stats *ps10) { memset(ops, 0, sizeof *ops); ops->port_no = u16_to_ofp(ntohs(ps10->port_no)); ops->stats.rx_packets = ntohll(get_32aligned_be64(&ps10->rx_packets)); ops->stats.tx_packets = ntohll(get_32aligned_be64(&ps10->tx_packets)); ops->stats.rx_bytes = ntohll(get_32aligned_be64(&ps10->rx_bytes)); ops->stats.tx_bytes = ntohll(get_32aligned_be64(&ps10->tx_bytes)); ops->stats.rx_dropped = ntohll(get_32aligned_be64(&ps10->rx_dropped)); ops->stats.tx_dropped = ntohll(get_32aligned_be64(&ps10->tx_dropped)); ops->stats.rx_errors = ntohll(get_32aligned_be64(&ps10->rx_errors)); ops->stats.tx_errors = ntohll(get_32aligned_be64(&ps10->tx_errors)); ops->stats.rx_frame_errors = ntohll(get_32aligned_be64(&ps10->rx_frame_err)); ops->stats.rx_over_errors = ntohll(get_32aligned_be64(&ps10->rx_over_err)); ops->stats.rx_crc_errors = ntohll(get_32aligned_be64(&ps10->rx_crc_err)); ops->stats.collisions = ntohll(get_32aligned_be64(&ps10->collisions)); ops->duration_sec = ops->duration_nsec = UINT32_MAX; return 0; } static enum ofperr ofputil_port_stats_from_ofp11(struct ofputil_port_stats *ops, const struct ofp11_port_stats *ps11) { enum ofperr error; memset(ops, 0, sizeof *ops); error = ofputil_port_from_ofp11(ps11->port_no, &ops->port_no); if (error) { return error; } ops->stats.rx_packets = ntohll(ps11->rx_packets); ops->stats.tx_packets = ntohll(ps11->tx_packets); ops->stats.rx_bytes = ntohll(ps11->rx_bytes); ops->stats.tx_bytes = ntohll(ps11->tx_bytes); ops->stats.rx_dropped = ntohll(ps11->rx_dropped); ops->stats.tx_dropped = ntohll(ps11->tx_dropped); ops->stats.rx_errors = ntohll(ps11->rx_errors); ops->stats.tx_errors = ntohll(ps11->tx_errors); ops->stats.rx_frame_errors = ntohll(ps11->rx_frame_err); ops->stats.rx_over_errors = ntohll(ps11->rx_over_err); ops->stats.rx_crc_errors = ntohll(ps11->rx_crc_err); ops->stats.collisions = ntohll(ps11->collisions); ops->duration_sec = ops->duration_nsec = UINT32_MAX; return 0; } static enum ofperr ofputil_port_stats_from_ofp13(struct ofputil_port_stats *ops, const struct ofp13_port_stats *ps13) { enum ofperr error = ofputil_port_stats_from_ofp11(ops, &ps13->ps); if (!error) { ops->duration_sec = ntohl(ps13->duration_sec); ops->duration_nsec = ntohl(ps13->duration_nsec); } return error; } static size_t ofputil_get_port_stats_size(enum ofp_version ofp_version) { switch (ofp_version) { case OFP10_VERSION: return sizeof(struct ofp10_port_stats); case OFP11_VERSION: case OFP12_VERSION: return sizeof(struct ofp11_port_stats); case OFP13_VERSION: return sizeof(struct ofp13_port_stats); default: NOT_REACHED(); } } /* Returns the number of port stats elements in OFPTYPE_PORT_STATS_REPLY * message 'oh'. */ size_t ofputil_count_port_stats(const struct ofp_header *oh) { struct ofpbuf b; ofpbuf_use_const(&b, oh, ntohs(oh->length)); ofpraw_pull_assert(&b); return b.size / ofputil_get_port_stats_size(oh->version); } /* Converts an OFPST_PORT_STATS reply in 'msg' into an abstract * ofputil_port_stats in 'ps'. * * Multiple OFPST_PORT_STATS replies can be packed into a single OpenFlow * message. Calling this function multiple times for a single 'msg' iterates * through the replies. The caller must initially leave 'msg''s layer pointers * null and not modify them between calls. * * Returns 0 if successful, EOF if no replies were left in this 'msg', * otherwise a positive errno value. */ int ofputil_decode_port_stats(struct ofputil_port_stats *ps, struct ofpbuf *msg) { enum ofperr error; enum ofpraw raw; error = (msg->l2 ? ofpraw_decode(&raw, msg->l2) : ofpraw_pull(&raw, msg)); if (error) { return error; } if (!msg->size) { return EOF; } else if (raw == OFPRAW_OFPST13_PORT_REPLY) { const struct ofp13_port_stats *ps13; ps13 = ofpbuf_try_pull(msg, sizeof *ps13); if (!ps13) { goto bad_len; } return ofputil_port_stats_from_ofp13(ps, ps13); } else if (raw == OFPRAW_OFPST11_PORT_REPLY) { const struct ofp11_port_stats *ps11; ps11 = ofpbuf_try_pull(msg, sizeof *ps11); if (!ps11) { goto bad_len; } return ofputil_port_stats_from_ofp11(ps, ps11); } else if (raw == OFPRAW_OFPST10_PORT_REPLY) { const struct ofp10_port_stats *ps10; ps10 = ofpbuf_try_pull(msg, sizeof *ps10); if (!ps10) { goto bad_len; } return ofputil_port_stats_from_ofp10(ps, ps10); } else { NOT_REACHED(); } bad_len: VLOG_WARN_RL(&bad_ofmsg_rl, "OFPST_PORT reply has %zu leftover " "bytes at end", msg->size); return OFPERR_OFPBRC_BAD_LEN; } /* Parse a port status request message into a 16 bit OpenFlow 1.0 * port number and stores the latter in '*ofp10_port'. * Returns 0 if successful, otherwise an OFPERR_* number. */ enum ofperr ofputil_decode_port_stats_request(const struct ofp_header *request, ofp_port_t *ofp10_port) { switch ((enum ofp_version)request->version) { case OFP13_VERSION: case OFP12_VERSION: case OFP11_VERSION: { const struct ofp11_port_stats_request *psr11 = ofpmsg_body(request); return ofputil_port_from_ofp11(psr11->port_no, ofp10_port); } case OFP10_VERSION: { const struct ofp10_port_stats_request *psr10 = ofpmsg_body(request); *ofp10_port = u16_to_ofp(ntohs(psr10->port_no)); return 0; } default: NOT_REACHED(); } } /* Parse a queue status request message into 'oqsr'. * Returns 0 if successful, otherwise an OFPERR_* number. */ enum ofperr ofputil_decode_queue_stats_request(const struct ofp_header *request, struct ofputil_queue_stats_request *oqsr) { switch ((enum ofp_version)request->version) { case OFP13_VERSION: case OFP12_VERSION: case OFP11_VERSION: { const struct ofp11_queue_stats_request *qsr11 = ofpmsg_body(request); oqsr->queue_id = ntohl(qsr11->queue_id); return ofputil_port_from_ofp11(qsr11->port_no, &oqsr->port_no); } case OFP10_VERSION: { const struct ofp10_queue_stats_request *qsr10 = ofpmsg_body(request); oqsr->queue_id = ntohl(qsr10->queue_id); oqsr->port_no = u16_to_ofp(ntohs(qsr10->port_no)); /* OF 1.0 uses OFPP_ALL for OFPP_ANY */ if (oqsr->port_no == OFPP_ALL) { oqsr->port_no = OFPP_ANY; } return 0; } default: NOT_REACHED(); } } /* Encode a queue statsrequest for 'oqsr', the encoded message * will be fore Open Flow version 'ofp_version'. Returns message * as a struct ofpbuf. Returns encoded message on success, NULL on error */ struct ofpbuf * ofputil_encode_queue_stats_request(enum ofp_version ofp_version, const struct ofputil_queue_stats_request *oqsr) { struct ofpbuf *request; switch (ofp_version) { case OFP11_VERSION: case OFP12_VERSION: case OFP13_VERSION: { struct ofp11_queue_stats_request *req; request = ofpraw_alloc(OFPRAW_OFPST11_QUEUE_REQUEST, ofp_version, 0); req = ofpbuf_put_zeros(request, sizeof *req); req->port_no = ofputil_port_to_ofp11(oqsr->port_no); req->queue_id = htonl(oqsr->queue_id); break; } case OFP10_VERSION: { struct ofp10_queue_stats_request *req; request = ofpraw_alloc(OFPRAW_OFPST10_QUEUE_REQUEST, ofp_version, 0); req = ofpbuf_put_zeros(request, sizeof *req); /* OpenFlow 1.0 needs OFPP_ALL instead of OFPP_ANY */ req->port_no = htons(ofp_to_u16(oqsr->port_no == OFPP_ANY ? OFPP_ALL : oqsr->port_no)); req->queue_id = htonl(oqsr->queue_id); break; } default: NOT_REACHED(); } return request; } static size_t ofputil_get_queue_stats_size(enum ofp_version ofp_version) { switch (ofp_version) { case OFP10_VERSION: return sizeof(struct ofp10_queue_stats); case OFP11_VERSION: case OFP12_VERSION: return sizeof(struct ofp11_queue_stats); case OFP13_VERSION: return sizeof(struct ofp13_queue_stats); default: NOT_REACHED(); } } /* Returns the number of queue stats elements in OFPTYPE_QUEUE_STATS_REPLY * message 'oh'. */ size_t ofputil_count_queue_stats(const struct ofp_header *oh) { struct ofpbuf b; ofpbuf_use_const(&b, oh, ntohs(oh->length)); ofpraw_pull_assert(&b); return b.size / ofputil_get_queue_stats_size(oh->version); } static enum ofperr ofputil_queue_stats_from_ofp10(struct ofputil_queue_stats *oqs, const struct ofp10_queue_stats *qs10) { oqs->port_no = u16_to_ofp(ntohs(qs10->port_no)); oqs->queue_id = ntohl(qs10->queue_id); oqs->tx_bytes = ntohll(get_32aligned_be64(&qs10->tx_bytes)); oqs->tx_packets = ntohll(get_32aligned_be64(&qs10->tx_packets)); oqs->tx_errors = ntohll(get_32aligned_be64(&qs10->tx_errors)); oqs->duration_sec = oqs->duration_nsec = UINT32_MAX; return 0; } static enum ofperr ofputil_queue_stats_from_ofp11(struct ofputil_queue_stats *oqs, const struct ofp11_queue_stats *qs11) { enum ofperr error; error = ofputil_port_from_ofp11(qs11->port_no, &oqs->port_no); if (error) { return error; } oqs->queue_id = ntohl(qs11->queue_id); oqs->tx_bytes = ntohll(qs11->tx_bytes); oqs->tx_packets = ntohll(qs11->tx_packets); oqs->tx_errors = ntohll(qs11->tx_errors); oqs->duration_sec = oqs->duration_nsec = UINT32_MAX; return 0; } static enum ofperr ofputil_queue_stats_from_ofp13(struct ofputil_queue_stats *oqs, const struct ofp13_queue_stats *qs13) { enum ofperr error = ofputil_queue_stats_from_ofp11(oqs, &qs13->qs); if (!error) { oqs->duration_sec = ntohl(qs13->duration_sec); oqs->duration_nsec = ntohl(qs13->duration_nsec); } return error; } /* Converts an OFPST_QUEUE_STATS reply in 'msg' into an abstract * ofputil_queue_stats in 'qs'. * * Multiple OFPST_QUEUE_STATS replies can be packed into a single OpenFlow * message. Calling this function multiple times for a single 'msg' iterates * through the replies. The caller must initially leave 'msg''s layer pointers * null and not modify them between calls. * * Returns 0 if successful, EOF if no replies were left in this 'msg', * otherwise a positive errno value. */ int ofputil_decode_queue_stats(struct ofputil_queue_stats *qs, struct ofpbuf *msg) { enum ofperr error; enum ofpraw raw; error = (msg->l2 ? ofpraw_decode(&raw, msg->l2) : ofpraw_pull(&raw, msg)); if (error) { return error; } if (!msg->size) { return EOF; } else if (raw == OFPRAW_OFPST13_QUEUE_REPLY) { const struct ofp13_queue_stats *qs13; qs13 = ofpbuf_try_pull(msg, sizeof *qs13); if (!qs13) { goto bad_len; } return ofputil_queue_stats_from_ofp13(qs, qs13); } else if (raw == OFPRAW_OFPST11_QUEUE_REPLY) { const struct ofp11_queue_stats *qs11; qs11 = ofpbuf_try_pull(msg, sizeof *qs11); if (!qs11) { goto bad_len; } return ofputil_queue_stats_from_ofp11(qs, qs11); } else if (raw == OFPRAW_OFPST10_QUEUE_REPLY) { const struct ofp10_queue_stats *qs10; qs10 = ofpbuf_try_pull(msg, sizeof *qs10); if (!qs10) { goto bad_len; } return ofputil_queue_stats_from_ofp10(qs, qs10); } else { NOT_REACHED(); } bad_len: VLOG_WARN_RL(&bad_ofmsg_rl, "OFPST_QUEUE reply has %zu leftover " "bytes at end", msg->size); return OFPERR_OFPBRC_BAD_LEN; } static void ofputil_queue_stats_to_ofp10(const struct ofputil_queue_stats *oqs, struct ofp10_queue_stats *qs10) { qs10->port_no = htons(ofp_to_u16(oqs->port_no)); memset(qs10->pad, 0, sizeof qs10->pad); qs10->queue_id = htonl(oqs->queue_id); put_32aligned_be64(&qs10->tx_bytes, htonll(oqs->tx_bytes)); put_32aligned_be64(&qs10->tx_packets, htonll(oqs->tx_packets)); put_32aligned_be64(&qs10->tx_errors, htonll(oqs->tx_errors)); } static void ofputil_queue_stats_to_ofp11(const struct ofputil_queue_stats *oqs, struct ofp11_queue_stats *qs11) { qs11->port_no = ofputil_port_to_ofp11(oqs->port_no); qs11->queue_id = htonl(oqs->queue_id); qs11->tx_bytes = htonll(oqs->tx_bytes); qs11->tx_packets = htonll(oqs->tx_packets); qs11->tx_errors = htonll(oqs->tx_errors); } static void ofputil_queue_stats_to_ofp13(const struct ofputil_queue_stats *oqs, struct ofp13_queue_stats *qs13) { ofputil_queue_stats_to_ofp11(oqs, &qs13->qs); if (oqs->duration_sec != UINT32_MAX) { qs13->duration_sec = htonl(oqs->duration_sec); qs13->duration_nsec = htonl(oqs->duration_nsec); } else { qs13->duration_sec = htonl(UINT32_MAX); qs13->duration_nsec = htonl(UINT32_MAX); } } /* Encode a queue stat for 'oqs' and append it to 'replies'. */ void ofputil_append_queue_stat(struct list *replies, const struct ofputil_queue_stats *oqs) { struct ofpbuf *msg = ofpbuf_from_list(list_back(replies)); struct ofp_header *oh = msg->data; switch ((enum ofp_version)oh->version) { case OFP13_VERSION: { struct ofp13_queue_stats *reply = ofpmp_append(replies, sizeof *reply); ofputil_queue_stats_to_ofp13(oqs, reply); break; } case OFP12_VERSION: case OFP11_VERSION: { struct ofp11_queue_stats *reply = ofpmp_append(replies, sizeof *reply); ofputil_queue_stats_to_ofp11(oqs, reply); break; } case OFP10_VERSION: { struct ofp10_queue_stats *reply = ofpmp_append(replies, sizeof *reply); ofputil_queue_stats_to_ofp10(oqs, reply); break; } default: NOT_REACHED(); } } openvswitch-2.0.1+git20140120/lib/ofp-util.def000066400000000000000000000112601226605124000204370ustar00rootroot00000000000000/* -*- c -*- */ #ifndef OFPAT10_ACTION #define OFPAT10_ACTION(ENUM, STRUCT, NAME) #endif OFPAT10_ACTION(OFPAT10_OUTPUT, ofp10_action_output, "output") OFPAT10_ACTION(OFPAT10_SET_VLAN_VID, ofp_action_vlan_vid, "mod_vlan_vid") OFPAT10_ACTION(OFPAT10_SET_VLAN_PCP, ofp_action_vlan_pcp, "mod_vlan_pcp") OFPAT10_ACTION(OFPAT10_STRIP_VLAN, ofp_action_header, "strip_vlan") OFPAT10_ACTION(OFPAT10_SET_DL_SRC, ofp_action_dl_addr, "mod_dl_src") OFPAT10_ACTION(OFPAT10_SET_DL_DST, ofp_action_dl_addr, "mod_dl_dst") OFPAT10_ACTION(OFPAT10_SET_NW_SRC, ofp_action_nw_addr, "mod_nw_src") OFPAT10_ACTION(OFPAT10_SET_NW_DST, ofp_action_nw_addr, "mod_nw_dst") OFPAT10_ACTION(OFPAT10_SET_NW_TOS, ofp_action_nw_tos, "mod_nw_tos") OFPAT10_ACTION(OFPAT10_SET_TP_SRC, ofp_action_tp_port, "mod_tp_src") OFPAT10_ACTION(OFPAT10_SET_TP_DST, ofp_action_tp_port, "mod_tp_dst") OFPAT10_ACTION(OFPAT10_ENQUEUE, ofp10_action_enqueue, "enqueue") #ifndef OFPAT11_ACTION #define OFPAT11_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) #endif OFPAT11_ACTION(OFPAT11_OUTPUT, ofp11_action_output, 0, "output") OFPAT11_ACTION(OFPAT11_SET_VLAN_VID, ofp_action_vlan_vid, 0, "mod_vlan_vid") OFPAT11_ACTION(OFPAT11_SET_VLAN_PCP, ofp_action_vlan_pcp, 0, "mod_vlan_pcp") OFPAT11_ACTION(OFPAT11_SET_DL_SRC, ofp_action_dl_addr, 0, "mod_dl_src") OFPAT11_ACTION(OFPAT11_SET_DL_DST, ofp_action_dl_addr, 0, "mod_dl_dst") OFPAT11_ACTION(OFPAT11_SET_NW_SRC, ofp_action_nw_addr, 0, "mod_nw_src") OFPAT11_ACTION(OFPAT11_SET_NW_DST, ofp_action_nw_addr, 0, "mod_nw_dst") OFPAT11_ACTION(OFPAT11_SET_NW_TOS, ofp_action_nw_tos, 0, "mod_nw_tos") //OFPAT11_ACTION(OFPAT11_SET_NW_ECN, ofp11_action_nw_ecn, "0, mod_nw_ecn") OFPAT11_ACTION(OFPAT11_SET_TP_SRC, ofp_action_tp_port, 0, "mod_tp_src") OFPAT11_ACTION(OFPAT11_SET_TP_DST, ofp_action_tp_port, 0, "mod_tp_dst") OFPAT11_ACTION(OFPAT11_SET_MPLS_TTL, ofp11_action_mpls_ttl, 0, "set_mpls_ttl") OFPAT11_ACTION(OFPAT11_DEC_MPLS_TTL, ofp_action_header, 0, "dec_mpls_ttl") OFPAT11_ACTION(OFPAT11_PUSH_VLAN, ofp11_action_push, 0, "push_vlan") OFPAT11_ACTION(OFPAT11_POP_VLAN, ofp_action_header, 0, "pop_vlan") OFPAT11_ACTION(OFPAT11_PUSH_MPLS, ofp11_action_push, 0, "push_mpls") OFPAT11_ACTION(OFPAT11_POP_MPLS, ofp11_action_pop_mpls, 0, "pop_mpls") OFPAT11_ACTION(OFPAT11_SET_QUEUE, ofp11_action_set_queue, 0, "set_queue") //OFPAT11_ACTION(OFPAT11_SET_NW_TTL, ofp11_action_nw_ttl, 0, "set_nw_ttl") OFPAT11_ACTION(OFPAT11_DEC_NW_TTL, ofp_action_header, 0, NULL) OFPAT11_ACTION(OFPAT12_SET_FIELD, ofp12_action_set_field, 1, "set_field") #ifndef NXAST_ACTION #define NXAST_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) #endif NXAST_ACTION(NXAST_RESUBMIT, nx_action_resubmit, 0, "resubmit") NXAST_ACTION(NXAST_SET_TUNNEL, nx_action_set_tunnel, 0, "set_tunnel") NXAST_ACTION(NXAST_SET_QUEUE, nx_action_set_queue, 0, "set_queue") NXAST_ACTION(NXAST_POP_QUEUE, nx_action_pop_queue, 0, "pop_queue") NXAST_ACTION(NXAST_REG_MOVE, nx_action_reg_move, 0, "move") NXAST_ACTION(NXAST_REG_LOAD, nx_action_reg_load, 0, "load") NXAST_ACTION(NXAST_STACK_PUSH, nx_action_stack, 0, "push") NXAST_ACTION(NXAST_STACK_POP, nx_action_stack, 0, "pop") NXAST_ACTION(NXAST_NOTE, nx_action_note, 1, "note") NXAST_ACTION(NXAST_SET_TUNNEL64, nx_action_set_tunnel64, 0, "set_tunnel64") NXAST_ACTION(NXAST_MULTIPATH, nx_action_multipath, 0, "multipath") NXAST_ACTION(NXAST_BUNDLE, nx_action_bundle, 1, "bundle") NXAST_ACTION(NXAST_BUNDLE_LOAD, nx_action_bundle, 1, "bundle_load") NXAST_ACTION(NXAST_RESUBMIT_TABLE, nx_action_resubmit, 0, NULL) NXAST_ACTION(NXAST_OUTPUT_REG, nx_action_output_reg, 0, NULL) NXAST_ACTION(NXAST_LEARN, nx_action_learn, 1, "learn") NXAST_ACTION(NXAST_EXIT, nx_action_header, 0, "exit") NXAST_ACTION(NXAST_DEC_TTL, nx_action_header, 0, "dec_ttl") NXAST_ACTION(NXAST_FIN_TIMEOUT, nx_action_fin_timeout, 0, "fin_timeout") NXAST_ACTION(NXAST_CONTROLLER, nx_action_controller, 0, "controller") NXAST_ACTION(NXAST_DEC_TTL_CNT_IDS, nx_action_cnt_ids, 1, NULL) NXAST_ACTION(NXAST_WRITE_METADATA, nx_action_write_metadata, 0, "write_metadata") NXAST_ACTION(NXAST_SET_MPLS_TTL, nx_action_mpls_ttl, 0, "set_mpls_ttl") NXAST_ACTION(NXAST_DEC_MPLS_TTL, nx_action_header, 0, "dec_mpls_ttl") NXAST_ACTION(NXAST_PUSH_MPLS, nx_action_push_mpls, 0, "push_mpls") NXAST_ACTION(NXAST_POP_MPLS, nx_action_pop_mpls, 0, "pop_mpls") NXAST_ACTION(NXAST_SAMPLE, nx_action_sample, 0, "sample") #undef OFPAT10_ACTION #undef OFPAT11_ACTION #undef NXAST_ACTION openvswitch-2.0.1+git20140120/lib/ofp-util.h000066400000000000000000001032171226605124000201340ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OFP_UTIL_H #define OFP_UTIL_H 1 #include #include #include #include "classifier.h" #include "compiler.h" #include "flow.h" #include "match.h" #include "netdev.h" #include "openflow/nicira-ext.h" #include "openvswitch/types.h" #include "type-props.h" struct ofpbuf; /* Port numbers. */ enum ofperr ofputil_port_from_ofp11(ovs_be32 ofp11_port, ofp_port_t *ofp10_port); ovs_be32 ofputil_port_to_ofp11(ofp_port_t ofp10_port); enum ofperr ofputil_check_output_port(ofp_port_t ofp_port, ofp_port_t max_ports); bool ofputil_port_from_string(const char *, ofp_port_t *portp); void ofputil_format_port(ofp_port_t port, struct ds *); void ofputil_port_to_string(ofp_port_t, char namebuf[OFP_MAX_PORT_NAME_LEN], size_t bufsize); /* Converting OFPFW10_NW_SRC_MASK and OFPFW10_NW_DST_MASK wildcard bit counts * to and from IP bitmasks. */ ovs_be32 ofputil_wcbits_to_netmask(int wcbits); int ofputil_netmask_to_wcbits(ovs_be32 netmask); /* Protocols. * * A "protocol" is an OpenFlow version plus, for some OpenFlow versions, * a bit extra about the flow match format in use. * * These are arranged from most portable to least portable, or alternatively * from least powerful to most powerful. Protocols earlier on the list are * more likely to be understood for the purpose of making requests, but * protocol later on the list are more likely to accurately describe a flow * within a switch. * * On any given OpenFlow connection, a single protocol is in effect at any * given time. These values use separate bits only because that makes it easy * to test whether a particular protocol is within a given set of protocols and * to implement set union and intersection. */ enum ofputil_protocol { /* OpenFlow 1.0 protocols. * * The "STD" protocols use the standard OpenFlow 1.0 flow format. * The "NXM" protocols use the Nicira Extensible Match (NXM) flow format. * * The protocols with "TID" mean that the nx_flow_mod_table_id Nicira * extension has been enabled. The other protocols have it disabled. */ #define OFPUTIL_P_NONE 0 OFPUTIL_P_OF10_STD = 1 << 0, OFPUTIL_P_OF10_STD_TID = 1 << 1, OFPUTIL_P_OF10_NXM = 1 << 2, OFPUTIL_P_OF10_NXM_TID = 1 << 3, #define OFPUTIL_P_OF10_STD_ANY (OFPUTIL_P_OF10_STD | OFPUTIL_P_OF10_STD_TID) #define OFPUTIL_P_OF10_NXM_ANY (OFPUTIL_P_OF10_NXM | OFPUTIL_P_OF10_NXM_TID) /* OpenFlow 1.1 protocol. * * We only support the standard OpenFlow 1.1 flow format. * * OpenFlow 1.1 always operates with an equivalent of the * nx_flow_mod_table_id Nicira extension enabled, so there is no "TID" * variant. */ OFPUTIL_P_OF11_STD = 1 << 4, /* OpenFlow 1.2+ protocols (only one variant each). * * These use the standard OpenFlow Extensible Match (OXM) flow format. * * OpenFlow 1.2+ always operates with an equivalent of the * nx_flow_mod_table_id Nicira extension enabled, so there is no "TID" * variant. */ OFPUTIL_P_OF12_OXM = 1 << 5, OFPUTIL_P_OF13_OXM = 1 << 6, #define OFPUTIL_P_ANY_OXM (OFPUTIL_P_OF12_OXM | OFPUTIL_P_OF13_OXM) #define OFPUTIL_P_NXM_OF11_UP (OFPUTIL_P_OF10_NXM_ANY | OFPUTIL_P_OF11_STD | \ OFPUTIL_P_ANY_OXM) #define OFPUTIL_P_NXM_OXM_ANY (OFPUTIL_P_OF10_NXM_ANY | OFPUTIL_P_ANY_OXM) #define OFPUTIL_P_OF11_UP (OFPUTIL_P_OF11_STD | OFPUTIL_P_ANY_OXM) #define OFPUTIL_P_OF12_UP (OFPUTIL_P_ANY_OXM) #define OFPUTIL_P_OF13_UP (OFPUTIL_P_OF13_OXM) /* All protocols. */ #define OFPUTIL_P_ANY ((1 << 7) - 1) /* Protocols in which a specific table may be specified in flow_mods. */ #define OFPUTIL_P_TID (OFPUTIL_P_OF10_STD_TID | \ OFPUTIL_P_OF10_NXM_TID | \ OFPUTIL_P_OF11_STD | \ OFPUTIL_P_ANY_OXM) }; /* Protocols to use for flow dumps, from most to least preferred. */ extern enum ofputil_protocol ofputil_flow_dump_protocols[]; extern size_t ofputil_n_flow_dump_protocols; enum ofputil_protocol ofputil_protocol_from_ofp_version(enum ofp_version); enum ofputil_protocol ofputil_protocols_from_ofp_version(enum ofp_version); enum ofp_version ofputil_protocol_to_ofp_version(enum ofputil_protocol); bool ofputil_protocol_is_valid(enum ofputil_protocol); enum ofputil_protocol ofputil_protocol_set_tid(enum ofputil_protocol, bool enable); enum ofputil_protocol ofputil_protocol_to_base(enum ofputil_protocol); enum ofputil_protocol ofputil_protocol_set_base( enum ofputil_protocol cur, enum ofputil_protocol new_base); const char *ofputil_protocol_to_string(enum ofputil_protocol); char *ofputil_protocols_to_string(enum ofputil_protocol); enum ofputil_protocol ofputil_protocols_from_string(const char *); void ofputil_format_version(struct ds *, enum ofp_version); void ofputil_format_version_name(struct ds *, enum ofp_version); /* A bitmap of version numbers * * Bit offsets correspond to ofp_version numbers which in turn correspond to * wire-protocol numbers for Open Flow versions.. E.g. (1u << OFP11_VERSION) * is the mask for Open Flow 1.1. If the bit for a version is set then it is * allowed, otherwise it is disallowed. */ void ofputil_format_version_bitmap(struct ds *msg, uint32_t bitmap); void ofputil_format_version_bitmap_names(struct ds *msg, uint32_t bitmap); uint32_t ofputil_protocols_to_version_bitmap(enum ofputil_protocol); enum ofputil_protocol ofputil_protocols_from_version_bitmap(uint32_t bitmap); /* Bitmap of OpenFlow versions that Open vSwitch supports. */ #define OFPUTIL_SUPPORTED_VERSIONS \ ((1u << OFP10_VERSION) | (1u << OFP12_VERSION) | (1u << OFP13_VERSION)) /* Bitmap of OpenFlow versions to enable by default (a subset of * OFPUTIL_SUPPORTED_VERSIONS). */ #define OFPUTIL_DEFAULT_VERSIONS (1u << OFP10_VERSION) enum ofputil_protocol ofputil_protocols_from_string(const char *s); const char *ofputil_version_to_string(enum ofp_version ofp_version); uint32_t ofputil_versions_from_string(const char *s); uint32_t ofputil_versions_from_strings(char ** const s, size_t count); bool ofputil_decode_hello(const struct ofp_header *, uint32_t *allowed_versions); struct ofpbuf *ofputil_encode_hello(uint32_t version_bitmap); struct ofpbuf *ofputil_encode_set_protocol(enum ofputil_protocol current, enum ofputil_protocol want, enum ofputil_protocol *next); /* nx_flow_format */ struct ofpbuf *ofputil_encode_nx_set_flow_format(enum nx_flow_format); enum ofputil_protocol ofputil_nx_flow_format_to_protocol(enum nx_flow_format); bool ofputil_nx_flow_format_is_valid(enum nx_flow_format); const char *ofputil_nx_flow_format_to_string(enum nx_flow_format); /* Work with ofp10_match. */ void ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *); void ofputil_match_from_ofp10_match(const struct ofp10_match *, struct match *); void ofputil_normalize_match(struct match *); void ofputil_normalize_match_quiet(struct match *); void ofputil_match_to_ofp10_match(const struct match *, struct ofp10_match *); /* Work with ofp11_match. */ enum ofperr ofputil_pull_ofp11_match(struct ofpbuf *, struct match *, uint16_t *padded_match_len); enum ofperr ofputil_match_from_ofp11_match(const struct ofp11_match *, struct match *); int ofputil_put_ofp11_match(struct ofpbuf *, const struct match *, enum ofputil_protocol); void ofputil_match_to_ofp11_match(const struct match *, struct ofp11_match *); int ofputil_match_typical_len(enum ofputil_protocol); /* dl_type translation between OpenFlow and 'struct flow' format. */ ovs_be16 ofputil_dl_type_to_openflow(ovs_be16 flow_dl_type); ovs_be16 ofputil_dl_type_from_openflow(ovs_be16 ofp_dl_type); /* PACKET_IN. */ bool ofputil_packet_in_format_is_valid(enum nx_packet_in_format); int ofputil_packet_in_format_from_string(const char *); const char *ofputil_packet_in_format_to_string(enum nx_packet_in_format); struct ofpbuf *ofputil_make_set_packet_in_format(enum ofp_version, enum nx_packet_in_format); /* NXT_FLOW_MOD_TABLE_ID extension. */ struct ofpbuf *ofputil_make_flow_mod_table_id(bool flow_mod_table_id); /* Protocol-independent flow_mod flags. */ enum ofputil_flow_mod_flags { OFPUTIL_FF_SEND_FLOW_REM = 1 << 0, /* All versions. */ OFPUTIL_FF_CHECK_OVERLAP = 1 << 1, /* All versions. */ OFPUTIL_FF_EMERG = 1 << 2, /* OpenFlow 1.0 only. */ OFPUTIL_FF_RESET_COUNTS = 1 << 3, /* OpenFlow 1.2+. */ OFPUTIL_FF_NO_PKT_COUNTS = 1 << 4, /* OpenFlow 1.3+. */ OFPUTIL_FF_NO_BYT_COUNTS = 1 << 5 /* OpenFlow 1.3+. */ }; /* Protocol-independent flow_mod. * * The handling of cookies across multiple versions of OpenFlow is a bit * confusing. See DESIGN for the details. */ struct ofputil_flow_mod { struct list list_node; /* For queuing flow_mods. */ struct match match; unsigned int priority; /* Cookie matching. The flow_mod affects only flows that have cookies that * bitwise match 'cookie' bits in positions where 'cookie_mask has 1-bits. * * 'cookie_mask' should be zero for OFPFC_ADD flow_mods. */ ovs_be64 cookie; /* Cookie bits to match. */ ovs_be64 cookie_mask; /* 1-bit in each 'cookie' bit to match. */ /* Cookie changes. * * OFPFC_ADD uses 'new_cookie' as the new flow's cookie. 'new_cookie' * should not be UINT64_MAX. * * OFPFC_MODIFY and OFPFC_MODIFY_STRICT have two cases: * * - If one or more matching flows exist and 'modify_cookie' is true, * then the flow_mod changes the existing flows' cookies to * 'new_cookie'. 'new_cookie' should not be UINT64_MAX. * * - If no matching flow exists, 'new_cookie' is not UINT64_MAX, and * 'cookie_mask' is 0, then the flow_mod adds a new flow with * 'new_cookie' as its cookie. */ ovs_be64 new_cookie; /* New cookie to install or UINT64_MAX. */ bool modify_cookie; /* Set cookie of existing flow to 'new_cookie'? */ uint8_t table_id; uint16_t command; uint16_t idle_timeout; uint16_t hard_timeout; uint32_t buffer_id; ofp_port_t out_port; enum ofputil_flow_mod_flags flags; struct ofpact *ofpacts; /* Series of "struct ofpact"s. */ size_t ofpacts_len; /* Length of ofpacts, in bytes. */ }; enum ofperr ofputil_decode_flow_mod(struct ofputil_flow_mod *, const struct ofp_header *, enum ofputil_protocol, struct ofpbuf *ofpacts); struct ofpbuf *ofputil_encode_flow_mod(const struct ofputil_flow_mod *, enum ofputil_protocol); /* Flow stats or aggregate stats request, independent of protocol. */ struct ofputil_flow_stats_request { bool aggregate; /* Aggregate results? */ struct match match; ovs_be64 cookie; ovs_be64 cookie_mask; ofp_port_t out_port; uint8_t table_id; }; enum ofperr ofputil_decode_flow_stats_request( struct ofputil_flow_stats_request *, const struct ofp_header *); struct ofpbuf *ofputil_encode_flow_stats_request( const struct ofputil_flow_stats_request *, enum ofputil_protocol); /* Flow stats reply, independent of protocol. */ struct ofputil_flow_stats { struct match match; ovs_be64 cookie; uint8_t table_id; uint32_t duration_sec; uint32_t duration_nsec; uint16_t priority; uint16_t idle_timeout; uint16_t hard_timeout; int idle_age; /* Seconds since last packet, -1 if unknown. */ int hard_age; /* Seconds since last change, -1 if unknown. */ uint64_t packet_count; /* Packet count, UINT64_MAX if unknown. */ uint64_t byte_count; /* Byte count, UINT64_MAX if unknown. */ struct ofpact *ofpacts; size_t ofpacts_len; enum ofputil_flow_mod_flags flags; }; int ofputil_decode_flow_stats_reply(struct ofputil_flow_stats *, struct ofpbuf *msg, bool flow_age_extension, struct ofpbuf *ofpacts); void ofputil_append_flow_stats_reply(const struct ofputil_flow_stats *, struct list *replies); /* Aggregate stats reply, independent of protocol. */ struct ofputil_aggregate_stats { uint64_t packet_count; /* Packet count, UINT64_MAX if unknown. */ uint64_t byte_count; /* Byte count, UINT64_MAX if unknown. */ uint32_t flow_count; }; struct ofpbuf *ofputil_encode_aggregate_stats_reply( const struct ofputil_aggregate_stats *stats, const struct ofp_header *request); enum ofperr ofputil_decode_aggregate_stats_reply( struct ofputil_aggregate_stats *, const struct ofp_header *reply); /* Flow removed message, independent of protocol. */ struct ofputil_flow_removed { struct match match; uint16_t priority; ovs_be64 cookie; uint8_t reason; /* One of OFPRR_*. */ uint8_t table_id; /* 255 if message didn't include table ID. */ uint32_t duration_sec; uint32_t duration_nsec; uint16_t idle_timeout; uint16_t hard_timeout; uint64_t packet_count; /* Packet count, UINT64_MAX if unknown. */ uint64_t byte_count; /* Byte count, UINT64_MAX if unknown. */ }; enum ofperr ofputil_decode_flow_removed(struct ofputil_flow_removed *, const struct ofp_header *); struct ofpbuf *ofputil_encode_flow_removed(const struct ofputil_flow_removed *, enum ofputil_protocol); /* Abstract packet-in message. */ struct ofputil_packet_in { struct list list_node; /* For queueing packet_ins. */ const void *packet; size_t packet_len; enum ofp_packet_in_reason reason; /* One of OFPR_*. */ uint16_t controller_id; /* Controller ID to send to. */ uint8_t table_id; ovs_be64 cookie; uint32_t buffer_id; int send_len; uint16_t total_len; /* Full length of frame. */ struct flow_metadata fmd; /* Metadata at creation time. */ }; enum ofperr ofputil_decode_packet_in(struct ofputil_packet_in *, const struct ofp_header *); struct ofpbuf *ofputil_encode_packet_in(const struct ofputil_packet_in *, enum ofputil_protocol protocol, enum nx_packet_in_format); enum { OFPUTIL_PACKET_IN_REASON_BUFSIZE = INT_STRLEN(int) + 1 }; const char *ofputil_packet_in_reason_to_string(enum ofp_packet_in_reason, char *reasonbuf, size_t bufsize); bool ofputil_packet_in_reason_from_string(const char *, enum ofp_packet_in_reason *); /* Abstract packet-out message. * * ofputil_decode_packet_out() will ensure that 'in_port' is a physical port * (OFPP_MAX or less) or one of OFPP_LOCAL, OFPP_NONE, or OFPP_CONTROLLER. */ struct ofputil_packet_out { const void *packet; /* Packet data, if buffer_id == UINT32_MAX. */ size_t packet_len; /* Length of packet data in bytes. */ uint32_t buffer_id; /* Buffer id or UINT32_MAX if no buffer. */ ofp_port_t in_port; /* Packet's input port. */ struct ofpact *ofpacts; /* Actions. */ size_t ofpacts_len; /* Size of ofpacts in bytes. */ }; enum ofperr ofputil_decode_packet_out(struct ofputil_packet_out *, const struct ofp_header *, struct ofpbuf *ofpacts); struct ofpbuf *ofputil_encode_packet_out(const struct ofputil_packet_out *, enum ofputil_protocol protocol); enum ofputil_port_config { /* OpenFlow 1.0 and 1.1 share these values for these port config bits. */ OFPUTIL_PC_PORT_DOWN = 1 << 0, /* Port is administratively down. */ OFPUTIL_PC_NO_RECV = 1 << 2, /* Drop all packets received by port. */ OFPUTIL_PC_NO_FWD = 1 << 5, /* Drop packets forwarded to port. */ OFPUTIL_PC_NO_PACKET_IN = 1 << 6, /* No send packet-in msgs for port. */ /* OpenFlow 1.0 only. */ OFPUTIL_PC_NO_STP = 1 << 1, /* No 802.1D spanning tree for port. */ OFPUTIL_PC_NO_RECV_STP = 1 << 3, /* Drop received 802.1D STP packets. */ OFPUTIL_PC_NO_FLOOD = 1 << 4, /* Do not include port when flooding. */ /* There are no OpenFlow 1.1-only bits. */ }; enum ofputil_port_state { /* OpenFlow 1.0 and 1.1 share this values for these port state bits. */ OFPUTIL_PS_LINK_DOWN = 1 << 0, /* No physical link present. */ /* OpenFlow 1.1 only. */ OFPUTIL_PS_BLOCKED = 1 << 1, /* Port is blocked */ OFPUTIL_PS_LIVE = 1 << 2, /* Live for Fast Failover Group. */ /* OpenFlow 1.0 only. */ OFPUTIL_PS_STP_LISTEN = 0 << 8, /* Not learning or relaying frames. */ OFPUTIL_PS_STP_LEARN = 1 << 8, /* Learning but not relaying frames. */ OFPUTIL_PS_STP_FORWARD = 2 << 8, /* Learning and relaying frames. */ OFPUTIL_PS_STP_BLOCK = 3 << 8, /* Not part of spanning tree. */ OFPUTIL_PS_STP_MASK = 3 << 8 /* Bit mask for OFPPS10_STP_* values. */ }; /* Abstract ofp10_phy_port or ofp11_port. */ struct ofputil_phy_port { ofp_port_t port_no; uint8_t hw_addr[OFP_ETH_ALEN]; char name[OFP_MAX_PORT_NAME_LEN]; enum ofputil_port_config config; enum ofputil_port_state state; /* NETDEV_F_* feature bitmasks. */ enum netdev_features curr; /* Current features. */ enum netdev_features advertised; /* Features advertised by the port. */ enum netdev_features supported; /* Features supported by the port. */ enum netdev_features peer; /* Features advertised by peer. */ /* Speed. */ uint32_t curr_speed; /* Current speed, in kbps. */ uint32_t max_speed; /* Maximum supported speed, in kbps. */ }; enum ofputil_capabilities { /* OpenFlow 1.0, 1.1, 1.2, and 1.3 share these capability values. */ OFPUTIL_C_FLOW_STATS = 1 << 0, /* Flow statistics. */ OFPUTIL_C_TABLE_STATS = 1 << 1, /* Table statistics. */ OFPUTIL_C_PORT_STATS = 1 << 2, /* Port statistics. */ OFPUTIL_C_IP_REASM = 1 << 5, /* Can reassemble IP fragments. */ OFPUTIL_C_QUEUE_STATS = 1 << 6, /* Queue statistics. */ /* OpenFlow 1.0 and 1.1 share this capability. */ OFPUTIL_C_ARP_MATCH_IP = 1 << 7, /* Match IP addresses in ARP pkts. */ /* OpenFlow 1.0 only. */ OFPUTIL_C_STP = 1 << 3, /* 802.1d spanning tree. */ /* OpenFlow 1.1, 1.2, and 1.3 share this capability. */ OFPUTIL_C_GROUP_STATS = 1 << 4, /* Group statistics. */ /* OpenFlow 1.2 and 1.3 share this capability */ OFPUTIL_C_PORT_BLOCKED = 1 << 8, /* Switch will block looping ports */ }; enum ofputil_action_bitmap { OFPUTIL_A_OUTPUT = 1 << 0, OFPUTIL_A_SET_VLAN_VID = 1 << 1, OFPUTIL_A_SET_VLAN_PCP = 1 << 2, OFPUTIL_A_STRIP_VLAN = 1 << 3, OFPUTIL_A_SET_DL_SRC = 1 << 4, OFPUTIL_A_SET_DL_DST = 1 << 5, OFPUTIL_A_SET_NW_SRC = 1 << 6, OFPUTIL_A_SET_NW_DST = 1 << 7, OFPUTIL_A_SET_NW_ECN = 1 << 8, OFPUTIL_A_SET_NW_TOS = 1 << 9, OFPUTIL_A_SET_TP_SRC = 1 << 10, OFPUTIL_A_SET_TP_DST = 1 << 11, OFPUTIL_A_ENQUEUE = 1 << 12, OFPUTIL_A_COPY_TTL_OUT = 1 << 13, OFPUTIL_A_COPY_TTL_IN = 1 << 14, OFPUTIL_A_SET_MPLS_LABEL = 1 << 15, OFPUTIL_A_SET_MPLS_TC = 1 << 16, OFPUTIL_A_SET_MPLS_TTL = 1 << 17, OFPUTIL_A_DEC_MPLS_TTL = 1 << 18, OFPUTIL_A_PUSH_VLAN = 1 << 19, OFPUTIL_A_POP_VLAN = 1 << 20, OFPUTIL_A_PUSH_MPLS = 1 << 21, OFPUTIL_A_POP_MPLS = 1 << 22, OFPUTIL_A_SET_QUEUE = 1 << 23, OFPUTIL_A_GROUP = 1 << 24, OFPUTIL_A_SET_NW_TTL = 1 << 25, OFPUTIL_A_DEC_NW_TTL = 1 << 26, OFPUTIL_A_SET_FIELD = 1 << 27, }; /* Abstract ofp_switch_features. */ struct ofputil_switch_features { uint64_t datapath_id; /* Datapath unique ID. */ uint32_t n_buffers; /* Max packets buffered at once. */ uint8_t n_tables; /* Number of tables supported by datapath. */ uint8_t auxiliary_id; /* Identify auxiliary connections */ enum ofputil_capabilities capabilities; enum ofputil_action_bitmap actions; }; enum ofperr ofputil_decode_switch_features(const struct ofp_header *, struct ofputil_switch_features *, struct ofpbuf *); struct ofpbuf *ofputil_encode_switch_features( const struct ofputil_switch_features *, enum ofputil_protocol, ovs_be32 xid); void ofputil_put_switch_features_port(const struct ofputil_phy_port *, struct ofpbuf *); bool ofputil_switch_features_ports_trunc(struct ofpbuf *b); /* phy_port helper functions. */ int ofputil_pull_phy_port(enum ofp_version ofp_version, struct ofpbuf *, struct ofputil_phy_port *); size_t ofputil_count_phy_ports(uint8_t ofp_version, struct ofpbuf *); /* Abstract ofp_port_status. */ struct ofputil_port_status { enum ofp_port_reason reason; struct ofputil_phy_port desc; }; enum ofperr ofputil_decode_port_status(const struct ofp_header *, struct ofputil_port_status *); struct ofpbuf *ofputil_encode_port_status(const struct ofputil_port_status *, enum ofputil_protocol); /* Abstract ofp_port_mod. */ struct ofputil_port_mod { ofp_port_t port_no; uint8_t hw_addr[OFP_ETH_ALEN]; enum ofputil_port_config config; enum ofputil_port_config mask; enum netdev_features advertise; }; enum ofperr ofputil_decode_port_mod(const struct ofp_header *, struct ofputil_port_mod *); struct ofpbuf *ofputil_encode_port_mod(const struct ofputil_port_mod *, enum ofputil_protocol); /* Meter band configuration for all supported band types. */ struct ofputil_meter_band { uint16_t type; uint8_t prec_level; /* Non-zero if type == OFPMBT_DSCP_REMARK. */ uint32_t rate; uint32_t burst_size; }; struct ofputil_meter_band_stats { uint64_t packet_count; uint64_t byte_count; }; struct ofputil_meter_config { uint32_t meter_id; uint16_t flags; uint16_t n_bands; struct ofputil_meter_band *bands; }; /* Abstract ofp_meter_mod. */ struct ofputil_meter_mod { uint16_t command; struct ofputil_meter_config meter; }; struct ofputil_meter_stats { uint32_t meter_id; uint32_t flow_count; uint64_t packet_in_count; uint64_t byte_in_count; uint32_t duration_sec; uint32_t duration_nsec; uint16_t n_bands; struct ofputil_meter_band_stats *bands; }; struct ofputil_meter_features { uint32_t max_meters; /* Maximum number of meters. */ uint32_t band_types; /* Can support max 32 band types. */ uint32_t capabilities; /* Supported flags. */ uint8_t max_bands; uint8_t max_color; }; enum ofperr ofputil_decode_meter_mod(const struct ofp_header *, struct ofputil_meter_mod *, struct ofpbuf *bands); struct ofpbuf *ofputil_encode_meter_mod(enum ofp_version, const struct ofputil_meter_mod *); void ofputil_decode_meter_features(const struct ofp_header *, struct ofputil_meter_features *); struct ofpbuf *ofputil_encode_meter_features_reply(const struct ofputil_meter_features *, const struct ofp_header * request); void ofputil_decode_meter_request(const struct ofp_header *, uint32_t *meter_id); void ofputil_append_meter_config(struct list *replies, const struct ofputil_meter_config *); void ofputil_append_meter_stats(struct list *replies, const struct ofputil_meter_stats *); enum ofputil_meter_request_type { OFPUTIL_METER_FEATURES, OFPUTIL_METER_CONFIG, OFPUTIL_METER_STATS }; struct ofpbuf *ofputil_encode_meter_request(enum ofp_version, enum ofputil_meter_request_type, uint32_t meter_id); int ofputil_decode_meter_stats(struct ofpbuf *, struct ofputil_meter_stats *, struct ofpbuf *bands); int ofputil_decode_meter_config(struct ofpbuf *, struct ofputil_meter_config *, struct ofpbuf *bands); /* Type for meter_id in ofproto provider interface, UINT32_MAX if invalid. */ typedef struct { uint32_t uint32; } ofproto_meter_id; /* Abstract ofp_role_request and reply. */ struct ofputil_role_request { enum ofp12_controller_role role; bool have_generation_id; uint64_t generation_id; }; enum ofperr ofputil_decode_role_message(const struct ofp_header *, struct ofputil_role_request *); struct ofpbuf *ofputil_encode_role_reply(const struct ofp_header *, const struct ofputil_role_request *); /* Abstract table stats. * * For now we use ofp12_table_stats as a superset of the other protocol * versions' table stats. */ struct ofpbuf *ofputil_encode_table_stats_reply( const struct ofp12_table_stats[], int n, const struct ofp_header *request); /* Abstract nx_flow_monitor_request. */ struct ofputil_flow_monitor_request { uint32_t id; enum nx_flow_monitor_flags flags; ofp_port_t out_port; uint8_t table_id; struct match match; }; int ofputil_decode_flow_monitor_request(struct ofputil_flow_monitor_request *, struct ofpbuf *msg); void ofputil_append_flow_monitor_request( const struct ofputil_flow_monitor_request *, struct ofpbuf *msg); /* Abstract nx_flow_update. */ struct ofputil_flow_update { enum nx_flow_update_event event; /* Used only for NXFME_ADDED, NXFME_DELETED, NXFME_MODIFIED. */ enum ofp_flow_removed_reason reason; uint16_t idle_timeout; uint16_t hard_timeout; uint8_t table_id; ovs_be64 cookie; struct match *match; uint16_t priority; struct ofpact *ofpacts; size_t ofpacts_len; /* Used only for NXFME_ABBREV. */ ovs_be32 xid; }; int ofputil_decode_flow_update(struct ofputil_flow_update *, struct ofpbuf *msg, struct ofpbuf *ofpacts); void ofputil_start_flow_update(struct list *replies); void ofputil_append_flow_update(const struct ofputil_flow_update *, struct list *replies); /* Abstract nx_flow_monitor_cancel. */ uint32_t ofputil_decode_flow_monitor_cancel(const struct ofp_header *); struct ofpbuf *ofputil_encode_flow_monitor_cancel(uint32_t id); /* Encoding OpenFlow stats messages. */ void ofputil_append_port_desc_stats_reply(enum ofp_version ofp_version, const struct ofputil_phy_port *pp, struct list *replies); /* Encoding simple OpenFlow messages. */ struct ofpbuf *make_echo_request(enum ofp_version); struct ofpbuf *make_echo_reply(const struct ofp_header *rq); struct ofpbuf *ofputil_encode_barrier_request(enum ofp_version); const char *ofputil_frag_handling_to_string(enum ofp_config_flags); bool ofputil_frag_handling_from_string(const char *, enum ofp_config_flags *); /* Actions. */ /* The type of an action. * * For each implemented OFPAT10_* and NXAST_* action type, there is a * corresponding constant prefixed with OFPUTIL_, e.g.: * * OFPUTIL_OFPAT10_OUTPUT * OFPUTIL_OFPAT10_SET_VLAN_VID * OFPUTIL_OFPAT10_SET_VLAN_PCP * OFPUTIL_OFPAT10_STRIP_VLAN * OFPUTIL_OFPAT10_SET_DL_SRC * OFPUTIL_OFPAT10_SET_DL_DST * OFPUTIL_OFPAT10_SET_NW_SRC * OFPUTIL_OFPAT10_SET_NW_DST * OFPUTIL_OFPAT10_SET_NW_TOS * OFPUTIL_OFPAT10_SET_TP_SRC * OFPUTIL_OFPAT10_SET_TP_DST * OFPUTIL_OFPAT10_ENQUEUE * OFPUTIL_NXAST_RESUBMIT * OFPUTIL_NXAST_SET_TUNNEL * OFPUTIL_NXAST_SET_METADATA * OFPUTIL_NXAST_SET_QUEUE * OFPUTIL_NXAST_POP_QUEUE * OFPUTIL_NXAST_REG_MOVE * OFPUTIL_NXAST_REG_LOAD * OFPUTIL_NXAST_NOTE * OFPUTIL_NXAST_SET_TUNNEL64 * OFPUTIL_NXAST_MULTIPATH * OFPUTIL_NXAST_BUNDLE * OFPUTIL_NXAST_BUNDLE_LOAD * OFPUTIL_NXAST_RESUBMIT_TABLE * OFPUTIL_NXAST_OUTPUT_REG * OFPUTIL_NXAST_LEARN * OFPUTIL_NXAST_DEC_TTL * OFPUTIL_NXAST_FIN_TIMEOUT * * (The above list helps developers who want to "grep" for these definitions.) */ enum OVS_PACKED_ENUM ofputil_action_code { OFPUTIL_ACTION_INVALID, #define OFPAT10_ACTION(ENUM, STRUCT, NAME) OFPUTIL_##ENUM, #define OFPAT11_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) OFPUTIL_##ENUM, #define NXAST_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) OFPUTIL_##ENUM, #include "ofp-util.def" }; /* The number of values of "enum ofputil_action_code". */ enum { #define OFPAT10_ACTION(ENUM, STRUCT, NAME) + 1 #define OFPAT11_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) + 1 #define NXAST_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) + 1 OFPUTIL_N_ACTIONS = 1 #include "ofp-util.def" }; int ofputil_action_code_from_name(const char *); void *ofputil_put_action(enum ofputil_action_code, struct ofpbuf *buf); /* For each OpenFlow action that has a corresponding action structure * struct , this defines two functions: * * void ofputil_init_(struct *action); * * Initializes the parts of 'action' that identify it as having type * and length 'sizeof *action' and zeros the rest. For actions that have * variable length, the length used and cleared is that of struct . * * struct *ofputil_put_(struct ofpbuf *buf); * * Appends a new 'action', of length 'sizeof(struct )', to 'buf', * initializes it with ofputil_init_(), and returns it. */ #define OFPAT10_ACTION(ENUM, STRUCT, NAME) \ void ofputil_init_##ENUM(struct STRUCT *); \ struct STRUCT *ofputil_put_##ENUM(struct ofpbuf *); #define OFPAT11_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) \ void ofputil_init_##ENUM(struct STRUCT *); \ struct STRUCT *ofputil_put_##ENUM(struct ofpbuf *); #define NXAST_ACTION(ENUM, STRUCT, EXTENSIBLE, NAME) \ void ofputil_init_##ENUM(struct STRUCT *); \ struct STRUCT *ofputil_put_##ENUM(struct ofpbuf *); #include "ofp-util.def" #define OFP_ACTION_ALIGN 8 /* Alignment of ofp_actions. */ bool action_outputs_to_port(const union ofp_action *, ovs_be16 port); enum ofperr ofputil_pull_actions(struct ofpbuf *, unsigned int actions_len, union ofp_action **, size_t *); bool ofputil_actions_equal(const union ofp_action *a, size_t n_a, const union ofp_action *b, size_t n_b); union ofp_action *ofputil_actions_clone(const union ofp_action *, size_t n); /* Handy utility for parsing flows and actions. */ bool ofputil_parse_key_value(char **stringp, char **keyp, char **valuep); struct ofputil_port_stats { ofp_port_t port_no; struct netdev_stats stats; uint32_t duration_sec; /* UINT32_MAX if unknown. */ uint32_t duration_nsec; }; struct ofpbuf *ofputil_encode_dump_ports_request(enum ofp_version ofp_version, ofp_port_t port); void ofputil_append_port_stat(struct list *replies, const struct ofputil_port_stats *ops); size_t ofputil_count_port_stats(const struct ofp_header *); int ofputil_decode_port_stats(struct ofputil_port_stats *, struct ofpbuf *msg); enum ofperr ofputil_decode_port_stats_request(const struct ofp_header *request, ofp_port_t *ofp10_port); struct ofputil_queue_stats_request { ofp_port_t port_no; /* OFPP_ANY means "all ports". */ uint32_t queue_id; }; enum ofperr ofputil_decode_queue_stats_request(const struct ofp_header *request, struct ofputil_queue_stats_request *oqsr); struct ofpbuf * ofputil_encode_queue_stats_request(enum ofp_version ofp_version, const struct ofputil_queue_stats_request *oqsr); struct ofputil_queue_stats { ofp_port_t port_no; uint32_t queue_id; /* Values of unsupported statistics are set to all-1-bits (UINT64_MAX). */ uint64_t tx_bytes; uint64_t tx_packets; uint64_t tx_errors; /* UINT32_MAX if unknown. */ uint32_t duration_sec; uint32_t duration_nsec; }; size_t ofputil_count_queue_stats(const struct ofp_header *); int ofputil_decode_queue_stats(struct ofputil_queue_stats *qs, struct ofpbuf *msg); void ofputil_append_queue_stat(struct list *replies, const struct ofputil_queue_stats *oqs); #endif /* ofp-util.h */ openvswitch-2.0.1+git20140120/lib/ofp-version-opt.c000066400000000000000000000020151226605124000214310ustar00rootroot00000000000000#include #include "ofp-util.h" #include "ofp-version-opt.h" #include "ovs-thread.h" #include "vlog.h" #include "dynamic-string.h" VLOG_DEFINE_THIS_MODULE(ofp_version); static uint32_t allowed_versions = 0; uint32_t get_allowed_ofp_versions(void) { return allowed_versions ? allowed_versions : OFPUTIL_DEFAULT_VERSIONS; } void set_allowed_ofp_versions(const char *string) { assert_single_threaded(); allowed_versions = ofputil_versions_from_string(string); } void mask_allowed_ofp_versions(uint32_t bitmap) { assert_single_threaded(); allowed_versions &= bitmap; } void ofp_version_usage(void) { struct ds msg = DS_EMPTY_INITIALIZER; ofputil_format_version_bitmap_names(&msg, OFPUTIL_DEFAULT_VERSIONS); printf( "\nOpen Flow Version options:\n" " -V, --version display version information\n" " -O, --protocols set allowed Open Flow versions\n" " (default: %s)\n", ds_cstr(&msg)); ds_destroy(&msg); } openvswitch-2.0.1+git20140120/lib/ofp-version-opt.h000066400000000000000000000017051226605124000214430ustar00rootroot00000000000000#ifndef OFP_VERSION_H #define OFP_VERSION_H 1 #include #include "util.h" #include "ofp-util.h" #define OFP_VERSION_LONG_OPTIONS \ {"version", no_argument, NULL, 'V'}, \ {"protocols", required_argument, NULL, 'O'} #define OFP_VERSION_OPTION_HANDLERS \ case 'V': \ ovs_print_version(OFP10_VERSION, OFP13_VERSION); \ exit(EXIT_SUCCESS); \ \ case 'O': \ set_allowed_ofp_versions(optarg); \ break; uint32_t get_allowed_ofp_versions(void); void set_allowed_ofp_versions(const char *string); void mask_allowed_ofp_versions(uint32_t); void ofp_version_usage(void); #endif openvswitch-2.0.1+git20140120/lib/ofp-version.man000066400000000000000000000013061226605124000211640ustar00rootroot00000000000000.de IQ . br . ns . IP "\\$1" .. .IP "\fB\-O \fR[\fIversion\fR[\fB,\fIversion\fR]...]\fR" .IQ "\fB\-\-protocols=\fR[\fIversion\fR[\fB,\fIversion\fR]...]\fR" Sets the OpenFlow protocol versions that are allowed when establishing an OpenFlow session. . .IP The following versions are considered to be ready for general use. These protocol versions are enabled by default: . .RS .IP \(bu \fBOpenFlow10\fR, for OpenFlow 1.0. .RE . .IP Support for the following protocol versions is provided for testing and development purposes. They are not enabled by default: . .RS .IP \(bu \fBOpenFlow11\fR, for OpenFlow 1.1. . .IP \(bu \fBOpenFlow12\fR, for OpenFlow 1.2. . .IP \(bu \fBOpenFlow13\fR, for OpenFlow 1.3. .RE openvswitch-2.0.1+git20140120/lib/ofpbuf.c000066400000000000000000000421371226605124000176540ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ofpbuf.h" #include #include #include "dynamic-string.h" #include "util.h" static void ofpbuf_use__(struct ofpbuf *b, void *base, size_t allocated, enum ofpbuf_source source) { b->base = b->data = base; b->allocated = allocated; b->source = source; b->size = 0; b->l2 = b->l2_5 = b->l3 = b->l4 = b->l7 = NULL; list_poison(&b->list_node); b->private_p = NULL; } /* Initializes 'b' as an empty ofpbuf that contains the 'allocated' bytes of * memory starting at 'base'. 'base' should be the first byte of a region * obtained from malloc(). It will be freed (with free()) if 'b' is resized or * freed. */ void ofpbuf_use(struct ofpbuf *b, void *base, size_t allocated) { ofpbuf_use__(b, base, allocated, OFPBUF_MALLOC); } /* Initializes 'b' as an empty ofpbuf that contains the 'allocated' bytes of * memory starting at 'base'. 'base' should point to a buffer on the stack. * (Nothing actually relies on 'base' being allocated on the stack. It could * be static or malloc()'d memory. But stack space is the most common use * case.) * * 'base' should be appropriately aligned. Using an array of uint32_t or * uint64_t for the buffer is a reasonable way to ensure appropriate alignment * for 32- or 64-bit data. * * An ofpbuf operation that requires reallocating data will assert-fail if this * function was used to initialize it. Thus, one need not call ofpbuf_uninit() * on an ofpbuf initialized by this function (though doing so is harmless), * because it is guaranteed that 'b' does not own any heap-allocated memory. */ void ofpbuf_use_stack(struct ofpbuf *b, void *base, size_t allocated) { ofpbuf_use__(b, base, allocated, OFPBUF_STACK); } /* Initializes 'b' as an empty ofpbuf that contains the 'allocated' bytes of * memory starting at 'base'. 'base' should point to a buffer on the stack. * (Nothing actually relies on 'base' being allocated on the stack. It could * be static or malloc()'d memory. But stack space is the most common use * case.) * * 'base' should be appropriately aligned. Using an array of uint32_t or * uint64_t for the buffer is a reasonable way to ensure appropriate alignment * for 32- or 64-bit data. * * An ofpbuf operation that requires reallocating data will copy the provided * buffer into a malloc()'d buffer. Thus, it is wise to call ofpbuf_uninit() * on an ofpbuf initialized by this function, so that if it expanded into the * heap, that memory is freed. */ void ofpbuf_use_stub(struct ofpbuf *b, void *base, size_t allocated) { ofpbuf_use__(b, base, allocated, OFPBUF_STUB); } /* Initializes 'b' as an ofpbuf whose data starts at 'data' and continues for * 'size' bytes. This is appropriate for an ofpbuf that will be used to * inspect existing data, without moving it around or reallocating it, and * generally without modifying it at all. * * An ofpbuf operation that requires reallocating data will assert-fail if this * function was used to initialize it. */ void ofpbuf_use_const(struct ofpbuf *b, const void *data, size_t size) { ofpbuf_use__(b, CONST_CAST(void *, data), size, OFPBUF_STACK); b->size = size; } /* Initializes 'b' as an empty ofpbuf with an initial capacity of 'size' * bytes. */ void ofpbuf_init(struct ofpbuf *b, size_t size) { ofpbuf_use(b, size ? xmalloc(size) : NULL, size); } /* Frees memory that 'b' points to. */ void ofpbuf_uninit(struct ofpbuf *b) { if (b && b->source == OFPBUF_MALLOC) { free(b->base); } } /* Returns a pointer that may be passed to free() to accomplish the same thing * as ofpbuf_uninit(b). The return value is a null pointer if ofpbuf_uninit() * would not free any memory. */ void * ofpbuf_get_uninit_pointer(struct ofpbuf *b) { return b && b->source == OFPBUF_MALLOC ? b->base : NULL; } /* Frees memory that 'b' points to and allocates a new ofpbuf */ void ofpbuf_reinit(struct ofpbuf *b, size_t size) { ofpbuf_uninit(b); ofpbuf_init(b, size); } /* Creates and returns a new ofpbuf with an initial capacity of 'size' * bytes. */ struct ofpbuf * ofpbuf_new(size_t size) { struct ofpbuf *b = xmalloc(sizeof *b); ofpbuf_init(b, size); return b; } /* Creates and returns a new ofpbuf with an initial capacity of 'size + * headroom' bytes, reserving the first 'headroom' bytes as headroom. */ struct ofpbuf * ofpbuf_new_with_headroom(size_t size, size_t headroom) { struct ofpbuf *b = ofpbuf_new(size + headroom); ofpbuf_reserve(b, headroom); return b; } /* Creates and returns a new ofpbuf that initially contains a copy of the * 'buffer->size' bytes of data starting at 'buffer->data' with no headroom or * tailroom. */ struct ofpbuf * ofpbuf_clone(const struct ofpbuf *buffer) { return ofpbuf_clone_with_headroom(buffer, 0); } /* Creates and returns a new ofpbuf whose data are copied from 'buffer'. The * returned ofpbuf will additionally have 'headroom' bytes of headroom. */ struct ofpbuf * ofpbuf_clone_with_headroom(const struct ofpbuf *buffer, size_t headroom) { struct ofpbuf *new_buffer; uintptr_t data_delta; new_buffer = ofpbuf_clone_data_with_headroom(buffer->data, buffer->size, headroom); data_delta = (char *) new_buffer->data - (char *) buffer->data; if (buffer->l2) { new_buffer->l2 = (char *) buffer->l2 + data_delta; } if (buffer->l2_5) { new_buffer->l2_5 = (char *) buffer->l2_5 + data_delta; } if (buffer->l3) { new_buffer->l3 = (char *) buffer->l3 + data_delta; } if (buffer->l4) { new_buffer->l4 = (char *) buffer->l4 + data_delta; } if (buffer->l7) { new_buffer->l7 = (char *) buffer->l7 + data_delta; } return new_buffer; } /* Creates and returns a new ofpbuf that initially contains a copy of the * 'size' bytes of data starting at 'data' with no headroom or tailroom. */ struct ofpbuf * ofpbuf_clone_data(const void *data, size_t size) { return ofpbuf_clone_data_with_headroom(data, size, 0); } /* Creates and returns a new ofpbuf that initially contains 'headroom' bytes of * headroom followed by a copy of the 'size' bytes of data starting at * 'data'. */ struct ofpbuf * ofpbuf_clone_data_with_headroom(const void *data, size_t size, size_t headroom) { struct ofpbuf *b = ofpbuf_new_with_headroom(size, headroom); ofpbuf_put(b, data, size); return b; } /* Frees memory that 'b' points to, as well as 'b' itself. */ void ofpbuf_delete(struct ofpbuf *b) { if (b) { ofpbuf_uninit(b); free(b); } } /* Returns the number of bytes of headroom in 'b', that is, the number of bytes * of unused space in ofpbuf 'b' before the data that is in use. (Most * commonly, the data in a ofpbuf is at its beginning, and thus the ofpbuf's * headroom is 0.) */ size_t ofpbuf_headroom(const struct ofpbuf *b) { return (char*)b->data - (char*)b->base; } /* Returns the number of bytes that may be appended to the tail end of ofpbuf * 'b' before the ofpbuf must be reallocated. */ size_t ofpbuf_tailroom(const struct ofpbuf *b) { return (char*)ofpbuf_end(b) - (char*)ofpbuf_tail(b); } static void ofpbuf_copy__(struct ofpbuf *b, uint8_t *new_base, size_t new_headroom, size_t new_tailroom) { const uint8_t *old_base = b->base; size_t old_headroom = ofpbuf_headroom(b); size_t old_tailroom = ofpbuf_tailroom(b); size_t copy_headroom = MIN(old_headroom, new_headroom); size_t copy_tailroom = MIN(old_tailroom, new_tailroom); memcpy(&new_base[new_headroom - copy_headroom], &old_base[old_headroom - copy_headroom], copy_headroom + b->size + copy_tailroom); } /* Reallocates 'b' so that it has exactly 'new_headroom' and 'new_tailroom' * bytes of headroom and tailroom, respectively. */ static void ofpbuf_resize__(struct ofpbuf *b, size_t new_headroom, size_t new_tailroom) { void *new_base, *new_data; size_t new_allocated; new_allocated = new_headroom + b->size + new_tailroom; switch (b->source) { case OFPBUF_MALLOC: if (new_headroom == ofpbuf_headroom(b)) { new_base = xrealloc(b->base, new_allocated); } else { new_base = xmalloc(new_allocated); ofpbuf_copy__(b, new_base, new_headroom, new_tailroom); free(b->base); } break; case OFPBUF_STACK: NOT_REACHED(); case OFPBUF_STUB: b->source = OFPBUF_MALLOC; new_base = xmalloc(new_allocated); ofpbuf_copy__(b, new_base, new_headroom, new_tailroom); break; default: NOT_REACHED(); } b->allocated = new_allocated; b->base = new_base; new_data = (char *) new_base + new_headroom; if (b->data != new_data) { uintptr_t data_delta = (char *) new_data - (char *) b->data; b->data = new_data; if (b->l2) { b->l2 = (char *) b->l2 + data_delta; } if (b->l2_5) { b->l2_5 = (char *) b->l2_5 + data_delta; } if (b->l3) { b->l3 = (char *) b->l3 + data_delta; } if (b->l4) { b->l4 = (char *) b->l4 + data_delta; } if (b->l7) { b->l7 = (char *) b->l7 + data_delta; } } } /* Ensures that 'b' has room for at least 'size' bytes at its tail end, * reallocating and copying its data if necessary. Its headroom, if any, is * preserved. */ void ofpbuf_prealloc_tailroom(struct ofpbuf *b, size_t size) { if (size > ofpbuf_tailroom(b)) { ofpbuf_resize__(b, ofpbuf_headroom(b), MAX(size, 64)); } } /* Ensures that 'b' has room for at least 'size' bytes at its head, * reallocating and copying its data if necessary. Its tailroom, if any, is * preserved. */ void ofpbuf_prealloc_headroom(struct ofpbuf *b, size_t size) { if (size > ofpbuf_headroom(b)) { ofpbuf_resize__(b, MAX(size, 64), ofpbuf_tailroom(b)); } } /* Trims the size of 'b' to fit its actual content, reducing its tailroom to * 0. Its headroom, if any, is preserved. * * Buffers not obtained from malloc() are not resized, since that wouldn't save * any memory. */ void ofpbuf_trim(struct ofpbuf *b) { if (b->source == OFPBUF_MALLOC && (ofpbuf_headroom(b) || ofpbuf_tailroom(b))) { ofpbuf_resize__(b, 0, 0); } } /* If 'b' is shorter than 'length' bytes, pads its tail out with zeros to that * length. */ void ofpbuf_padto(struct ofpbuf *b, size_t length) { if (b->size < length) { ofpbuf_put_zeros(b, length - b->size); } } /* Appends 'size' bytes of data to the tail end of 'b', reallocating and * copying its data if necessary. Returns a pointer to the first byte of the * new data, which is left uninitialized. */ void * ofpbuf_put_uninit(struct ofpbuf *b, size_t size) { void *p; ofpbuf_prealloc_tailroom(b, size); p = ofpbuf_tail(b); b->size += size; return p; } /* Appends 'size' zeroed bytes to the tail end of 'b'. Data in 'b' is * reallocated and copied if necessary. Returns a pointer to the first byte of * the data's location in the ofpbuf. */ void * ofpbuf_put_zeros(struct ofpbuf *b, size_t size) { void *dst = ofpbuf_put_uninit(b, size); memset(dst, 0, size); return dst; } /* Appends the 'size' bytes of data in 'p' to the tail end of 'b'. Data in 'b' * is reallocated and copied if necessary. Returns a pointer to the first * byte of the data's location in the ofpbuf. */ void * ofpbuf_put(struct ofpbuf *b, const void *p, size_t size) { void *dst = ofpbuf_put_uninit(b, size); memcpy(dst, p, size); return dst; } /* Parses as many pairs of hex digits as possible (possibly separated by * spaces) from the beginning of 's', appending bytes for their values to 'b'. * Returns the first character of 's' that is not the first of a pair of hex * digits. If 'n' is nonnull, stores the number of bytes added to 'b' in * '*n'. */ char * ofpbuf_put_hex(struct ofpbuf *b, const char *s, size_t *n) { size_t initial_size = b->size; for (;;) { uint8_t byte; bool ok; s += strspn(s, " "); byte = hexits_value(s, 2, &ok); if (!ok) { if (n) { *n = b->size - initial_size; } return CONST_CAST(char *, s); } ofpbuf_put(b, &byte, 1); s += 2; } } /* Reserves 'size' bytes of headroom so that they can be later allocated with * ofpbuf_push_uninit() without reallocating the ofpbuf. */ void ofpbuf_reserve(struct ofpbuf *b, size_t size) { ovs_assert(!b->size); ofpbuf_prealloc_tailroom(b, size); b->data = (char*)b->data + size; } /* Prefixes 'size' bytes to the head end of 'b', reallocating and copying its * data if necessary. Returns a pointer to the first byte of the data's * location in the ofpbuf. The new data is left uninitialized. */ void * ofpbuf_push_uninit(struct ofpbuf *b, size_t size) { ofpbuf_prealloc_headroom(b, size); b->data = (char*)b->data - size; b->size += size; return b->data; } /* Prefixes 'size' zeroed bytes to the head end of 'b', reallocating and * copying its data if necessary. Returns a pointer to the first byte of the * data's location in the ofpbuf. */ void * ofpbuf_push_zeros(struct ofpbuf *b, size_t size) { void *dst = ofpbuf_push_uninit(b, size); memset(dst, 0, size); return dst; } /* Copies the 'size' bytes starting at 'p' to the head end of 'b', reallocating * and copying its data if necessary. Returns a pointer to the first byte of * the data's location in the ofpbuf. */ void * ofpbuf_push(struct ofpbuf *b, const void *p, size_t size) { void *dst = ofpbuf_push_uninit(b, size); memcpy(dst, p, size); return dst; } /* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to * byte 'offset'. Otherwise, returns a null pointer. */ void * ofpbuf_at(const struct ofpbuf *b, size_t offset, size_t size) { return offset + size <= b->size ? (char *) b->data + offset : NULL; } /* Returns a pointer to byte 'offset' in 'b', which must contain at least * 'offset + size' bytes of data. */ void * ofpbuf_at_assert(const struct ofpbuf *b, size_t offset, size_t size) { ovs_assert(offset + size <= b->size); return ((char *) b->data) + offset; } /* Returns the byte following the last byte of data in use in 'b'. */ void * ofpbuf_tail(const struct ofpbuf *b) { return (char *) b->data + b->size; } /* Returns the byte following the last byte allocated for use (but not * necessarily in use) by 'b'. */ void * ofpbuf_end(const struct ofpbuf *b) { return (char *) b->base + b->allocated; } /* Clears any data from 'b'. */ void ofpbuf_clear(struct ofpbuf *b) { b->data = b->base; b->size = 0; } /* Removes 'size' bytes from the head end of 'b', which must contain at least * 'size' bytes of data. Returns the first byte of data removed. */ void * ofpbuf_pull(struct ofpbuf *b, size_t size) { void *data = b->data; ovs_assert(b->size >= size); b->data = (char*)b->data + size; b->size -= size; return data; } /* If 'b' has at least 'size' bytes of data, removes that many bytes from the * head end of 'b' and returns the first byte removed. Otherwise, returns a * null pointer without modifying 'b'. */ void * ofpbuf_try_pull(struct ofpbuf *b, size_t size) { return b->size >= size ? ofpbuf_pull(b, size) : NULL; } /* Returns the data in 'b' as a block of malloc()'d memory and frees the buffer * within 'b'. (If 'b' itself was dynamically allocated, e.g. with * ofpbuf_new(), then it should still be freed with, e.g., ofpbuf_delete().) */ void * ofpbuf_steal_data(struct ofpbuf *b) { void *p; if (b->source == OFPBUF_MALLOC && b->data == b->base) { p = b->data; } else { p = xmemdup(b->data, b->size); if (b->source == OFPBUF_MALLOC) { free(b->base); } } b->base = b->data = NULL; return p; } /* Returns a string that describes some of 'b''s metadata plus a hex dump of up * to 'maxbytes' from the start of the buffer. */ char * ofpbuf_to_string(const struct ofpbuf *b, size_t maxbytes) { struct ds s; ds_init(&s); ds_put_format(&s, "size=%zu, allocated=%zu, head=%zu, tail=%zu\n", b->size, b->allocated, ofpbuf_headroom(b), ofpbuf_tailroom(b)); ds_put_hex_dump(&s, b->data, MIN(b->size, maxbytes), 0, false); return ds_cstr(&s); } /* Removes each of the "struct ofpbuf"s on 'list' from the list and frees * them. */ void ofpbuf_list_delete(struct list *list) { struct ofpbuf *b, *next; LIST_FOR_EACH_SAFE (b, next, list_node, list) { list_remove(&b->list_node); ofpbuf_delete(b); } } openvswitch-2.0.1+git20140120/lib/ofpbuf.h000066400000000000000000000105321226605124000176530ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OFPBUF_H #define OFPBUF_H 1 #include #include #include "list.h" #include "util.h" #ifdef __cplusplus extern "C" { #endif enum ofpbuf_source { OFPBUF_MALLOC, /* Obtained via malloc(). */ OFPBUF_STACK, /* Un-movable stack space or static buffer. */ OFPBUF_STUB /* Starts on stack, may expand into heap. */ }; /* Buffer for holding arbitrary data. An ofpbuf is automatically reallocated * as necessary if it grows too large for the available memory. */ struct ofpbuf { void *base; /* First byte of allocated space. */ size_t allocated; /* Number of bytes allocated. */ enum ofpbuf_source source; /* Source of memory allocated as 'base'. */ void *data; /* First byte actually in use. */ size_t size; /* Number of bytes in use. */ void *l2; /* Link-level header. */ void *l2_5; /* MPLS label stack */ void *l3; /* Network-level header. */ void *l4; /* Transport-level header. */ void *l7; /* Application data. */ struct list list_node; /* Private list element for use by owner. */ void *private_p; /* Private pointer for use by owner. */ }; void ofpbuf_use(struct ofpbuf *, void *, size_t); void ofpbuf_use_stack(struct ofpbuf *, void *, size_t); void ofpbuf_use_stub(struct ofpbuf *, void *, size_t); void ofpbuf_use_const(struct ofpbuf *, const void *, size_t); void ofpbuf_init(struct ofpbuf *, size_t); void ofpbuf_uninit(struct ofpbuf *); void *ofpbuf_get_uninit_pointer(struct ofpbuf *); void ofpbuf_reinit(struct ofpbuf *, size_t); struct ofpbuf *ofpbuf_new(size_t); struct ofpbuf *ofpbuf_new_with_headroom(size_t, size_t headroom); struct ofpbuf *ofpbuf_clone(const struct ofpbuf *); struct ofpbuf *ofpbuf_clone_with_headroom(const struct ofpbuf *, size_t headroom); struct ofpbuf *ofpbuf_clone_data(const void *, size_t); struct ofpbuf *ofpbuf_clone_data_with_headroom(const void *, size_t, size_t headroom); void ofpbuf_delete(struct ofpbuf *); void *ofpbuf_at(const struct ofpbuf *, size_t offset, size_t size); void *ofpbuf_at_assert(const struct ofpbuf *, size_t offset, size_t size); void *ofpbuf_tail(const struct ofpbuf *); void *ofpbuf_end(const struct ofpbuf *); void *ofpbuf_put_uninit(struct ofpbuf *, size_t); void *ofpbuf_put_zeros(struct ofpbuf *, size_t); void *ofpbuf_put(struct ofpbuf *, const void *, size_t); char *ofpbuf_put_hex(struct ofpbuf *, const char *s, size_t *n); void ofpbuf_reserve(struct ofpbuf *, size_t); void *ofpbuf_push_uninit(struct ofpbuf *b, size_t); void *ofpbuf_push_zeros(struct ofpbuf *, size_t); void *ofpbuf_push(struct ofpbuf *b, const void *, size_t); size_t ofpbuf_headroom(const struct ofpbuf *); size_t ofpbuf_tailroom(const struct ofpbuf *); void ofpbuf_prealloc_headroom(struct ofpbuf *, size_t); void ofpbuf_prealloc_tailroom(struct ofpbuf *, size_t); void ofpbuf_trim(struct ofpbuf *); void ofpbuf_padto(struct ofpbuf *, size_t); void ofpbuf_clear(struct ofpbuf *); void *ofpbuf_pull(struct ofpbuf *, size_t); void *ofpbuf_try_pull(struct ofpbuf *, size_t); void *ofpbuf_steal_data(struct ofpbuf *); char *ofpbuf_to_string(const struct ofpbuf *, size_t maxbytes); static inline struct ofpbuf *ofpbuf_from_list(const struct list *list) { return CONTAINER_OF(list, struct ofpbuf, list_node); } void ofpbuf_list_delete(struct list *); static inline bool ofpbuf_equal(const struct ofpbuf *a, const struct ofpbuf *b) { return a->size == b->size && memcmp(a->data, b->data, a->size) == 0; } #ifdef __cplusplus } #endif #endif /* ofpbuf.h */ openvswitch-2.0.1+git20140120/lib/ovs-atomic-c11.h000066400000000000000000000047561226605124000210500ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* This header implements atomic operation primitives on compilers that * have built-in support for C11 */ #ifndef IN_OVS_ATOMIC_H #error "This header should only be included indirectly via ovs-atomic.h." #endif #include /* Nonstandard atomic types. */ typedef _Atomic(uint8_t) atomic_uint8_t; typedef _Atomic(uint16_t) atomic_uint16_t; typedef _Atomic(uint32_t) atomic_uint32_t; typedef _Atomic(uint64_t) atomic_uint64_t; typedef _Atomic(int8_t) atomic_int8_t; typedef _Atomic(int16_t) atomic_int16_t; typedef _Atomic(int32_t) atomic_int32_t; typedef _Atomic(int64_t) atomic_int64_t; #define atomic_read(SRC, DST) \ atomic_read_explicit(SRC, DST, memory_order_seq_cst) #define atomic_read_explicit(SRC, DST, ORDER) \ (*(DST) = atomic_load_explicit(SRC, ORDER), \ (void) 0) #define atomic_add(RMW, ARG, ORIG) \ atomic_add_explicit(RMW, ARG, ORIG, memory_order_seq_cst) #define atomic_sub(RMW, ARG, ORIG) \ atomic_sub_explicit(RMW, ARG, ORIG, memory_order_seq_cst) #define atomic_or(RMW, ARG, ORIG) \ atomic_or_explicit(RMW, ARG, ORIG, memory_order_seq_cst) #define atomic_xor(RMW, ARG, ORIG) \ atomic_xor_explicit(RMW, ARG, ORIG, memory_order_seq_cst) #define atomic_and(RMW, ARG, ORIG) \ atomic_and_explicit(RMW, ARG, ORIG, memory_order_seq_cst) #define atomic_add_explicit(RMW, ARG, ORIG, ORDER) \ (*(ORIG) = atomic_fetch_add_explicit(RMW, ARG, ORDER), (void) 0) #define atomic_sub_explicit(RMW, ARG, ORIG, ORDER) \ (*(ORIG) = atomic_fetch_sub_explicit(RMW, ARG, ORDER), (void) 0) #define atomic_or_explicit(RMW, ARG, ORIG, ORDER) \ (*(ORIG) = atomic_fetch_or_explicit(RMW, ARG, ORDER), (void) 0) #define atomic_xor_explicit(RMW, ARG, ORIG, ORDER) \ (*(ORIG) = atomic_fetch_xor_explicit(RMW, ARG, ORDER), (void) 0) #define atomic_and_explicit(RMW, ARG, ORIG, ORDER) \ (*(ORIG) = atomic_fetch_and_explicit(RMW, ARG, ORDER), (void) 0) openvswitch-2.0.1+git20140120/lib/ovs-atomic-clang.h000066400000000000000000000077211226605124000215430ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* This header implements atomic operation primitives on Clang. */ #ifndef IN_OVS_ATOMIC_H #error "This header should only be included indirectly via ovs-atomic.h." #endif #define OVS_ATOMIC_CLANG_IMPL 1 /* Standard atomic types. */ typedef _Atomic(_Bool) atomic_bool; typedef _Atomic(char) atomic_char; typedef _Atomic(signed char) atomic_schar; typedef _Atomic(unsigned char) atomic_uchar; typedef _Atomic(short) atomic_short; typedef _Atomic(unsigned short) atomic_ushort; typedef _Atomic(int) atomic_int; typedef _Atomic(unsigned int) atomic_uint; typedef _Atomic(long) atomic_long; typedef _Atomic(unsigned long) atomic_ulong; typedef _Atomic(long long) atomic_llong; typedef _Atomic(unsigned long long) atomic_ullong; typedef _Atomic(size_t) atomic_size_t; typedef _Atomic(ptrdiff_t) atomic_ptrdiff_t; typedef _Atomic(intmax_t) atomic_intmax_t; typedef _Atomic(uintmax_t) atomic_uintmax_t; typedef _Atomic(intptr_t) atomic_intptr_t; typedef _Atomic(uintptr_t) atomic_uintptr_t; /* Nonstandard atomic types. */ typedef _Atomic(uint8_t) atomic_uint8_t; typedef _Atomic(uint16_t) atomic_uint16_t; typedef _Atomic(uint32_t) atomic_uint32_t; typedef _Atomic(uint64_t) atomic_uint64_t; typedef _Atomic(int8_t) atomic_int8_t; typedef _Atomic(int16_t) atomic_int16_t; typedef _Atomic(int32_t) atomic_int32_t; typedef _Atomic(int64_t) atomic_int64_t; #define ATOMIC_VAR_INIT(VALUE) (VALUE) #define atomic_init(OBJECT, VALUE) __c11_atomic_init(OBJECT, VALUE) /* Clang hard-codes these exact values internally but does not appear to * export any names for them. */ typedef enum { memory_order_relaxed = 0, memory_order_consume = 1, memory_order_acquire = 2, memory_order_release = 3, memory_order_acq_rel = 4, memory_order_seq_cst = 5 } memory_order; #define atomic_thread_fence(ORDER) __c11_atomic_thread_fence(ORDER) #define atomic_signal_fence(ORDER) __c11_atomic_signal_fence(ORDER) #define atomic_store(DST, SRC) \ atomic_store_explicit(DST, SRC, memory_order_seq_cst) #define atomic_store_explicit(DST, SRC, ORDER) \ __c11_atomic_store(DST, SRC, ORDER) #define atomic_read(SRC, DST) \ atomic_read_explicit(SRC, DST, memory_order_seq_cst) #define atomic_read_explicit(SRC, DST, ORDER) \ (*(DST) = __c11_atomic_load(SRC, ORDER), \ (void) 0) #define atomic_add(RMW, ARG, ORIG) \ atomic_add_explicit(RMW, ARG, ORIG, memory_order_seq_cst) #define atomic_sub(RMW, ARG, ORIG) \ atomic_sub_explicit(RMW, ARG, ORIG, memory_order_seq_cst) #define atomic_or(RMW, ARG, ORIG) \ atomic_or_explicit(RMW, ARG, ORIG, memory_order_seq_cst) #define atomic_xor(RMW, ARG, ORIG) \ atomic_xor_explicit(RMW, ARG, ORIG, memory_order_seq_cst) #define atomic_and(RMW, ARG, ORIG) \ atomic_and_explicit(RMW, ARG, ORIG, memory_order_seq_cst) #define atomic_add_explicit(RMW, ARG, ORIG, ORDER) \ (*(ORIG) = __c11_atomic_fetch_add(RMW, ARG, ORDER), (void) 0) #define atomic_sub_explicit(RMW, ARG, ORIG, ORDER) \ (*(ORIG) = __c11_atomic_fetch_sub(RMW, ARG, ORDER), (void) 0) #define atomic_or_explicit(RMW, ARG, ORIG, ORDER) \ (*(ORIG) = __c11_atomic_fetch_or(RMW, ARG, ORDER), (void) 0) #define atomic_xor_explicit(RMW, ARG, ORIG, ORDER) \ (*(ORIG) = __c11_atomic_fetch_xor(RMW, ARG, ORDER), (void) 0) #define atomic_and_explicit(RMW, ARG, ORIG, ORDER) \ (*(ORIG) = __c11_atomic_fetch_and(RMW, ARG, ORDER), (void) 0) #include "ovs-atomic-flag-gcc4.7+.h" openvswitch-2.0.1+git20140120/lib/ovs-atomic-flag-gcc4.7+.h000066400000000000000000000027541226605124000224270ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* This header implements atomic_flag on Clang and on GCC 4.7 and later. */ #ifndef IN_OVS_ATOMIC_H #error "This header should only be included indirectly via ovs-atomic.h." #endif /* atomic_flag */ typedef struct { unsigned char b; } atomic_flag; #define ATOMIC_FLAG_INIT { .b = false } static inline bool atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order) { return __atomic_test_and_set(&object->b, order); } static inline bool atomic_flag_test_and_set(volatile atomic_flag *object) { return atomic_flag_test_and_set_explicit(object, memory_order_seq_cst); } static inline void atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order) { __atomic_clear(object, order); } static inline void atomic_flag_clear(volatile atomic_flag *object) { atomic_flag_clear_explicit(object, memory_order_seq_cst); } openvswitch-2.0.1+git20140120/lib/ovs-atomic-gcc4+.c000066400000000000000000000067421226605124000213470ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ovs-atomic.h" #include "ovs-thread.h" #if OVS_ATOMIC_GCC4P_IMPL static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; #define DEFINE_LOCKED_OP(TYPE, NAME, OPERATOR) \ TYPE##_t \ locked_##TYPE##_##NAME(struct locked_##TYPE *u, TYPE##_t arg) \ { \ TYPE##_t old_value; \ \ ovs_mutex_lock(&mutex); \ old_value = u->value; \ u->value OPERATOR arg; \ ovs_mutex_unlock(&mutex); \ \ return old_value; \ } #define DEFINE_LOCKED_TYPE(TYPE) \ TYPE##_t \ locked_##TYPE##_load(const struct locked_##TYPE *u) \ { \ TYPE##_t value; \ \ ovs_mutex_lock(&mutex); \ value = u->value; \ ovs_mutex_unlock(&mutex); \ \ return value; \ } \ \ void \ locked_##TYPE##_store(struct locked_##TYPE *u, TYPE##_t value) \ { \ ovs_mutex_lock(&mutex); \ u->value = value; \ ovs_mutex_unlock(&mutex); \ } \ DEFINE_LOCKED_OP(TYPE, add, +=); \ DEFINE_LOCKED_OP(TYPE, sub, -=); \ DEFINE_LOCKED_OP(TYPE, or, |=); \ DEFINE_LOCKED_OP(TYPE, xor, ^=); \ DEFINE_LOCKED_OP(TYPE, and, &=) DEFINE_LOCKED_TYPE(uint64); DEFINE_LOCKED_TYPE(int64); #endif /* OVS_ATOMIC_GCC4P_IMPL */ openvswitch-2.0.1+git20140120/lib/ovs-atomic-gcc4+.h000066400000000000000000000230041226605124000213420ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* This header implements atomic operation primitives on GCC 4.x. */ #ifndef IN_OVS_ATOMIC_H #error "This header should only be included indirectly via ovs-atomic.h." #endif #define OVS_ATOMIC_GCC4P_IMPL 1 #define DEFINE_LOCKLESS_ATOMIC(TYPE, NAME) typedef struct { TYPE value; } NAME #define ATOMIC_BOOL_LOCK_FREE 2 DEFINE_LOCKLESS_ATOMIC(bool, atomic_bool); #define ATOMIC_CHAR_LOCK_FREE 2 DEFINE_LOCKLESS_ATOMIC(char, atomic_char); DEFINE_LOCKLESS_ATOMIC(signed char, atomic_schar); DEFINE_LOCKLESS_ATOMIC(unsigned char, atomic_uchar); #define ATOMIC_SHORT_LOCK_FREE 2 DEFINE_LOCKLESS_ATOMIC(short, atomic_short); DEFINE_LOCKLESS_ATOMIC(unsigned short, atomic_ushort); #define ATOMIC_INT_LOCK_FREE 2 DEFINE_LOCKLESS_ATOMIC(int, atomic_int); DEFINE_LOCKLESS_ATOMIC(unsigned int, atomic_uint); #if ULONG_MAX <= UINTPTR_MAX #define ATOMIC_LONG_LOCK_FREE 2 DEFINE_LOCKLESS_ATOMIC(long, atomic_long); DEFINE_LOCKLESS_ATOMIC(unsigned long, atomic_ulong); #elif ULONG_MAX == UINT64_MAX #define ATOMIC_LONG_LOCK_FREE 0 typedef struct locked_int64 atomic_long; typedef struct locked_uint64 atomic_ulong; #else #error "not implemented" #endif #if ULLONG_MAX <= UINTPTR_MAX #define ATOMIC_LLONG_LOCK_FREE 2 DEFINE_LOCKLESS_ATOMIC(long long, atomic_llong); DEFINE_LOCKLESS_ATOMIC(unsigned long long, atomic_ullong); #elif ULLONG_MAX == UINT64_MAX #define ATOMIC_LLONG_LOCK_FREE 0 typedef struct locked_int64 atomic_llong; typedef struct locked_uint64 atomic_ullong; #else #error "not implemented" #endif #if SIZE_MAX <= UINTPTR_MAX DEFINE_LOCKLESS_ATOMIC(size_t, atomic_size_t); DEFINE_LOCKLESS_ATOMIC(ptrdiff_t, atomic_ptrdiff_t); #elif SIZE_MAX == UINT64_MAX typedef struct locked_uint64 atomic_size_t; typedef struct locked_int64 atomic_ptrdiff_t; #else #error "not implemented" #endif #if UINTMAX_MAX <= UINTPTR_MAX DEFINE_LOCKLESS_ATOMIC(intmax_t, atomic_intmax_t); DEFINE_LOCKLESS_ATOMIC(uintmax_t, atomic_uintmax_t); #elif UINTMAX_MAX == UINT64_MAX typedef struct locked_int64 atomic_intmax_t; typedef struct locked_uint64 atomic_uintmax_t; #else #error "not implemented" #endif #define ATOMIC_POINTER_LOCK_FREE 2 DEFINE_LOCKLESS_ATOMIC(intptr_t, atomic_intptr_t); DEFINE_LOCKLESS_ATOMIC(uintptr_t, atomic_uintptr_t); /* Nonstandard atomic types. */ DEFINE_LOCKLESS_ATOMIC(uint8_t, atomic_uint8_t); DEFINE_LOCKLESS_ATOMIC(uint16_t, atomic_uint16_t); DEFINE_LOCKLESS_ATOMIC(uint32_t, atomic_uint32_t); DEFINE_LOCKLESS_ATOMIC(int8_t, atomic_int8_t); DEFINE_LOCKLESS_ATOMIC(int16_t, atomic_int16_t); DEFINE_LOCKLESS_ATOMIC(int32_t, atomic_int32_t); #if UINT64_MAX <= UINTPTR_MAX DEFINE_LOCKLESS_ATOMIC(uint64_t, atomic_uint64_t); DEFINE_LOCKLESS_ATOMIC(int64_t, atomic_int64_t); #else typedef struct locked_uint64 atomic_uint64_t; typedef struct locked_int64 atomic_int64_t; #endif typedef enum { memory_order_relaxed, memory_order_consume, memory_order_acquire, memory_order_release, memory_order_acq_rel, memory_order_seq_cst } memory_order; /* locked_uint64. */ #define IF_LOCKED_UINT64(OBJECT, THEN, ELSE) \ __builtin_choose_expr( \ __builtin_types_compatible_p(typeof(OBJECT), struct locked_uint64), \ (THEN), (ELSE)) #define AS_LOCKED_UINT64(OBJECT) ((struct locked_uint64 *) (void *) (OBJECT)) #define AS_UINT64(OBJECT) ((uint64_t *) (OBJECT)) struct locked_uint64 { uint64_t value; }; uint64_t locked_uint64_load(const struct locked_uint64 *); void locked_uint64_store(struct locked_uint64 *, uint64_t); uint64_t locked_uint64_add(struct locked_uint64 *, uint64_t arg); uint64_t locked_uint64_sub(struct locked_uint64 *, uint64_t arg); uint64_t locked_uint64_or(struct locked_uint64 *, uint64_t arg); uint64_t locked_uint64_xor(struct locked_uint64 *, uint64_t arg); uint64_t locked_uint64_and(struct locked_uint64 *, uint64_t arg); #define IF_LOCKED_INT64(OBJECT, THEN, ELSE) \ __builtin_choose_expr( \ __builtin_types_compatible_p(typeof(OBJECT), struct locked_int64), \ (THEN), (ELSE)) #define AS_LOCKED_INT64(OBJECT) ((struct locked_int64 *) (void *) (OBJECT)) #define AS_INT64(OBJECT) ((int64_t *) (OBJECT)) struct locked_int64 { int64_t value; }; int64_t locked_int64_load(const struct locked_int64 *); void locked_int64_store(struct locked_int64 *, int64_t); int64_t locked_int64_add(struct locked_int64 *, int64_t arg); int64_t locked_int64_sub(struct locked_int64 *, int64_t arg); int64_t locked_int64_or(struct locked_int64 *, int64_t arg); int64_t locked_int64_xor(struct locked_int64 *, int64_t arg); int64_t locked_int64_and(struct locked_int64 *, int64_t arg); #define ATOMIC_VAR_INIT(VALUE) { .value = (VALUE) } #define atomic_init(OBJECT, VALUE) ((OBJECT)->value = (VALUE), (void) 0) static inline void atomic_thread_fence(memory_order order) { if (order != memory_order_relaxed) { __sync_synchronize(); } } static inline void atomic_thread_fence_if_seq_cst(memory_order order) { if (order == memory_order_seq_cst) { __sync_synchronize(); } } static inline void atomic_signal_fence(memory_order order OVS_UNUSED) { if (order != memory_order_relaxed) { asm volatile("" : : : "memory"); } } #define ATOMIC_SWITCH(OBJECT, LOCKLESS_CASE, \ LOCKED_UINT64_CASE, LOCKED_INT64_CASE) \ IF_LOCKED_UINT64(OBJECT, LOCKED_UINT64_CASE, \ IF_LOCKED_INT64(OBJECT, LOCKED_INT64_CASE, \ LOCKLESS_CASE)) #define atomic_is_lock_free(OBJ) \ ((void) (OBJ)->value, \ ATOMIC_SWITCH(OBJ, true, false, false)) #define atomic_store(DST, SRC) \ atomic_store_explicit(DST, SRC, memory_order_seq_cst) #define atomic_store_explicit(DST, SRC, ORDER) \ (ATOMIC_SWITCH(DST, \ (atomic_thread_fence(ORDER), \ (DST)->value = (SRC), \ atomic_thread_fence_if_seq_cst(ORDER)), \ locked_uint64_store(AS_LOCKED_UINT64(DST), SRC), \ locked_int64_store(AS_LOCKED_INT64(DST), SRC)), \ (void) 0) #define atomic_read(SRC, DST) \ atomic_read_explicit(SRC, DST, memory_order_seq_cst) #define atomic_read_explicit(SRC, DST, ORDER) \ (ATOMIC_SWITCH(SRC, \ (atomic_thread_fence_if_seq_cst(ORDER), \ (*DST) = (SRC)->value, \ atomic_thread_fence(ORDER)), \ *(DST) = locked_uint64_load(AS_LOCKED_UINT64(SRC)), \ *(DST) = locked_int64_load(AS_LOCKED_INT64(SRC))), \ (void) 0) #define atomic_op__(RMW, OP, ARG, ORIG) \ (ATOMIC_SWITCH(RMW, \ *(ORIG) = __sync_fetch_and_##OP(&(RMW)->value, ARG), \ *(ORIG) = locked_uint64_##OP(AS_LOCKED_UINT64(RMW), ARG), \ *(ORIG) = locked_int64_##OP(AS_LOCKED_INT64(RMW), ARG)), \ (void) 0) #define atomic_add(RMW, ARG, ORIG) atomic_op__(RMW, add, ARG, ORIG) #define atomic_sub(RMW, ARG, ORIG) atomic_op__(RMW, sub, ARG, ORIG) #define atomic_or( RMW, ARG, ORIG) atomic_op__(RMW, or, ARG, ORIG) #define atomic_xor(RMW, ARG, ORIG) atomic_op__(RMW, xor, ARG, ORIG) #define atomic_and(RMW, ARG, ORIG) atomic_op__(RMW, and, ARG, ORIG) #define atomic_add_explicit(RMW, OPERAND, ORIG, ORDER) \ ((void) (ORDER), atomic_add(RMW, OPERAND, ORIG)) #define atomic_sub_explicit(RMW, OPERAND, ORIG, ORDER) \ ((void) (ORDER), atomic_sub(RMW, OPERAND, ORIG)) #define atomic_or_explicit(RMW, OPERAND, ORIG, ORDER) \ ((void) (ORDER), atomic_or(RMW, OPERAND, ORIG)) #define atomic_xor_explicit(RMW, OPERAND, ORIG, ORDER) \ ((void) (ORDER), atomic_xor(RMW, OPERAND, ORIG)) #define atomic_and_explicit(RMW, OPERAND, ORIG, ORDER) \ ((void) (ORDER), atomic_and(RMW, OPERAND, ORIG)) /* atomic_flag */ typedef struct { int b; } atomic_flag; #define ATOMIC_FLAG_INIT { false } static inline bool atomic_flag_test_and_set(volatile atomic_flag *object) { return __sync_lock_test_and_set(&object->b, 1); } static inline bool atomic_flag_test_and_set_explicit(volatile atomic_flag *object, memory_order order OVS_UNUSED) { return atomic_flag_test_and_set(object); } static inline void atomic_flag_clear(volatile atomic_flag *object) { __sync_lock_release(&object->b); } static inline void atomic_flag_clear_explicit(volatile atomic_flag *object, memory_order order OVS_UNUSED) { atomic_flag_clear(object); } openvswitch-2.0.1+git20140120/lib/ovs-atomic-gcc4.7+.h000066400000000000000000000077601226605124000215220ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* This header implements atomic operation primitives on GCC 4.7 and later. */ #ifndef IN_OVS_ATOMIC_H #error "This header should only be included indirectly via ovs-atomic.h." #endif /* C11 standardized atomic type. */ typedef bool atomic_bool; typedef char atomic_char; typedef signed char atomic_schar; typedef unsigned char atomic_uchar; typedef short atomic_short; typedef unsigned short atomic_ushort; typedef int atomic_int; typedef unsigned int atomic_uint; typedef long atomic_long; typedef unsigned long atomic_ulong; typedef long long atomic_llong; typedef unsigned long long atomic_ullong; typedef size_t atomic_size_t; typedef ptrdiff_t atomic_ptrdiff_t; typedef intmax_t atomic_intmax_t; typedef uintmax_t atomic_uintmax_t; typedef intptr_t atomic_intptr_t; typedef uintptr_t atomic_uintptr_t; /* Nonstandard atomic types. */ typedef int8_t atomic_int8_t; typedef uint8_t atomic_uint8_t; typedef int16_t atomic_int16_t; typedef uint16_t atomic_uint16_t; typedef int32_t atomic_int32_t; typedef uint32_t atomic_uint32_t; typedef int64_t atomic_int64_t; typedef uint64_t atomic_uint64_t; typedef enum { memory_order_relaxed = __ATOMIC_RELAXED, memory_order_consume = __ATOMIC_CONSUME, memory_order_acquire = __ATOMIC_ACQUIRE, memory_order_release = __ATOMIC_RELEASE, memory_order_acq_rel = __ATOMIC_ACQ_REL, memory_order_seq_cst = __ATOMIC_SEQ_CST } memory_order; #define ATOMIC_VAR_INIT(VALUE) (VALUE) #define atomic_init(OBJECT, VALUE) (*(OBJECT) = (VALUE), (void) 0) #define atomic_thread_fence __atomic_thread_fence #define atomic_signal_fence __atomic_signal_fence #define atomic_is_lock_free __atomic_is_lock_free #define atomic_store(DST, SRC) \ atomic_store_explicit(DST, SRC, memory_order_seq_cst) #define atomic_store_explicit __atomic_store_n #define atomic_read(SRC, DST) \ atomic_read_explicit(SRC, DST, memory_order_seq_cst) #define atomic_read_explicit(SRC, DST, ORDER) \ (*(DST) = __atomic_load_n(SRC, ORDER), \ (void) 0) #define atomic_add(RMW, OPERAND, ORIG) \ atomic_add_explicit(RMW, OPERAND, ORIG, memory_order_seq_cst) #define atomic_sub(RMW, OPERAND, ORIG) \ atomic_sub_explicit(RMW, OPERAND, ORIG, memory_order_seq_cst) #define atomic_or(RMW, OPERAND, ORIG) \ atomic_or_explicit(RMW, OPERAND, ORIG, memory_order_seq_cst) #define atomic_xor(RMW, OPERAND, ORIG) \ atomic_xor_explicit(RMW, OPERAND, ORIG, memory_order_seq_cst) #define atomic_and(RMW, OPERAND, ORIG) \ atomic_and_explicit(RMW, OPERAND, ORIG, memory_order_seq_cst) #define atomic_add_explicit(RMW, OPERAND, ORIG, ORDER) \ (*(ORIG) = __atomic_fetch_add(RMW, OPERAND, ORDER), (void) 0) #define atomic_sub_explicit(RMW, OPERAND, ORIG, ORDER) \ (*(ORIG) = __atomic_fetch_sub(RMW, OPERAND, ORDER), (void) 0) #define atomic_or_explicit(RMW, OPERAND, ORIG, ORDER) \ (*(ORIG) = __atomic_fetch_or(RMW, OPERAND, ORDER), (void) 0) #define atomic_xor_explicit(RMW, OPERAND, ORIG, ORDER) \ (*(ORIG) = __atomic_fetch_xor(RMW, OPERAND, ORDER), (void) 0) #define atomic_and_explicit(RMW, OPERAND, ORIG, ORDER) \ (*(ORIG) = __atomic_fetch_and(RMW, OPERAND, ORDER), (void) 0) #include "ovs-atomic-flag-gcc4.7+.h" openvswitch-2.0.1+git20140120/lib/ovs-atomic-pthreads.c000066400000000000000000000030631226605124000222570ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ovs-atomic.h" #include "ovs-thread.h" #if OVS_ATOMIC_PTHREADS_IMPL bool atomic_flag_test_and_set(volatile atomic_flag *flag_) { atomic_flag *flag = CONST_CAST(atomic_flag *, flag_); bool old_value; xpthread_mutex_lock(&flag->mutex); old_value = flag->b; flag->b = true; xpthread_mutex_unlock(&flag->mutex); return old_value; } bool atomic_flag_test_and_set_explicit(volatile atomic_flag *flag, memory_order order OVS_UNUSED) { return atomic_flag_test_and_set(flag); } void atomic_flag_clear(volatile atomic_flag *flag_) { atomic_flag *flag = CONST_CAST(atomic_flag *, flag_); xpthread_mutex_lock(&flag->mutex); flag->b = false; xpthread_mutex_unlock(&flag->mutex); } void atomic_flag_clear_explicit(volatile atomic_flag *flag, memory_order order OVS_UNUSED) { return atomic_flag_clear(flag); } #endif /* OVS_ATOMIC_PTHREADS_IMPL */ openvswitch-2.0.1+git20140120/lib/ovs-atomic-pthreads.h000066400000000000000000000127601226605124000222700ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* This header implements atomic operation primitives using pthreads. */ #ifndef IN_OVS_ATOMIC_H #error "This header should only be included indirectly via ovs-atomic.h." #endif #define OVS_ATOMIC_PTHREADS_IMPL 1 #define DEFINE_PTHREAD_ATOMIC(TYPE, NAME) \ typedef struct { \ TYPE value; \ pthread_mutex_t mutex; \ } NAME; #define ATOMIC_BOOL_LOCK_FREE 0 DEFINE_PTHREAD_ATOMIC(bool, atomic_bool); #define ATOMIC_CHAR_LOCK_FREE 0 DEFINE_PTHREAD_ATOMIC(char, atomic_char); DEFINE_PTHREAD_ATOMIC(signed char, atomic_schar); DEFINE_PTHREAD_ATOMIC(unsigned char, atomic_uchar); #define ATOMIC_SHORT_LOCK_FREE 0 DEFINE_PTHREAD_ATOMIC(short, atomic_short); DEFINE_PTHREAD_ATOMIC(unsigned short, atomic_ushort); #define ATOMIC_INT_LOCK_FREE 0 DEFINE_PTHREAD_ATOMIC(int, atomic_int); DEFINE_PTHREAD_ATOMIC(unsigned int, atomic_uint); #define ATOMIC_LONG_LOCK_FREE 0 DEFINE_PTHREAD_ATOMIC(long, atomic_long); DEFINE_PTHREAD_ATOMIC(unsigned long, atomic_ulong); #define ATOMIC_LLONG_LOCK_FREE 0 DEFINE_PTHREAD_ATOMIC(long long, atomic_llong); DEFINE_PTHREAD_ATOMIC(unsigned long long, atomic_ullong); DEFINE_PTHREAD_ATOMIC(size_t, atomic_size_t); DEFINE_PTHREAD_ATOMIC(ptrdiff_t, atomic_ptrdiff_t); DEFINE_PTHREAD_ATOMIC(intmax_t, atomic_intmax_t); DEFINE_PTHREAD_ATOMIC(uintmax_t, atomic_uintmax_t); #define ATOMIC_POINTER_LOCK_FREE 0 DEFINE_PTHREAD_ATOMIC(intptr_t, atomic_intptr_t); DEFINE_PTHREAD_ATOMIC(uintptr_t, atomic_uintptr_t); /* Nonstandard atomic types. */ DEFINE_PTHREAD_ATOMIC(uint8_t, atomic_uint8_t); DEFINE_PTHREAD_ATOMIC(uint16_t, atomic_uint16_t); DEFINE_PTHREAD_ATOMIC(uint32_t, atomic_uint32_t); DEFINE_PTHREAD_ATOMIC(int8_t, atomic_int8_t); DEFINE_PTHREAD_ATOMIC(int16_t, atomic_int16_t); DEFINE_PTHREAD_ATOMIC(int32_t, atomic_int32_t); DEFINE_PTHREAD_ATOMIC(uint64_t, atomic_uint64_t); DEFINE_PTHREAD_ATOMIC(int64_t, atomic_int64_t); typedef enum { memory_order_relaxed, memory_order_consume, memory_order_acquire, memory_order_release, memory_order_acq_rel, memory_order_seq_cst } memory_order; #define ATOMIC_VAR_INIT(VALUE) { VALUE, PTHREAD_MUTEX_INITIALIZER } #define atomic_init(OBJECT, VALUE) \ ((OBJECT)->value = (VALUE), \ pthread_mutex_init(&(OBJECT)->mutex, NULL), \ (void) 0) static inline void atomic_thread_fence(memory_order order OVS_UNUSED) { /* Nothing to do. */ } static inline void atomic_signal_fence(memory_order order OVS_UNUSED) { /* Nothing to do. */ } #define atomic_is_lock_free(OBJ) false #define atomic_store(DST, SRC) \ (pthread_mutex_lock(&(DST)->mutex), \ (DST)->value = (SRC), \ pthread_mutex_unlock(&(DST)->mutex), \ (void) 0) #define atomic_store_explicit(DST, SRC, ORDER) \ ((void) (ORDER), atomic_store(DST, SRC)) #define atomic_read(SRC, DST) \ (pthread_mutex_lock(CONST_CAST(pthread_mutex_t *, &(SRC)->mutex)), \ *(DST) = (SRC)->value, \ pthread_mutex_unlock(CONST_CAST(pthread_mutex_t *, &(SRC)->mutex)), \ (void) 0) #define atomic_read_explicit(SRC, DST, ORDER) \ ((void) (ORDER), atomic_read(SRC, DST)) #define atomic_op__(RMW, OPERATOR, OPERAND, ORIG) \ (pthread_mutex_lock(&(RMW)->mutex), \ *(ORIG) = (RMW)->value, \ (RMW)->value OPERATOR (OPERAND), \ pthread_mutex_unlock(&(RMW)->mutex), \ (void) 0) #define atomic_add(RMW, OPERAND, ORIG) atomic_op__(RMW, +=, OPERAND, ORIG) #define atomic_sub(RMW, OPERAND, ORIG) atomic_op__(RMW, -=, OPERAND, ORIG) #define atomic_or( RMW, OPERAND, ORIG) atomic_op__(RMW, |=, OPERAND, ORIG) #define atomic_xor(RMW, OPERAND, ORIG) atomic_op__(RMW, ^=, OPERAND, ORIG) #define atomic_and(RMW, OPERAND, ORIG) atomic_op__(RMW, &=, OPERAND, ORIG) #define atomic_add_explicit(RMW, OPERAND, ORIG, ORDER) \ ((void) (ORDER), atomic_add(RMW, OPERAND, ORIG)) #define atomic_sub_explicit(RMW, OPERAND, ORIG, ORDER) \ ((void) (ORDER), atomic_sub(RMW, OPERAND, ORIG)) #define atomic_or_explicit(RMW, OPERAND, ORIG, ORDER) \ ((void) (ORDER), atomic_or(RMW, OPERAND, ORIG)) #define atomic_xor_explicit(RMW, OPERAND, ORIG, ORDER) \ ((void) (ORDER), atomic_xor(RMW, OPERAND, ORIG)) #define atomic_and_explicit(RMW, OPERAND, ORIG, ORDER) \ ((void) (ORDER), atomic_and(RMW, OPERAND, ORIG)) /* atomic_flag */ typedef struct { bool b; pthread_mutex_t mutex; } atomic_flag; #define ATOMIC_FLAG_INIT { false, PTHREAD_MUTEX_INITIALIZER } bool atomic_flag_test_and_set(volatile atomic_flag *); bool atomic_flag_test_and_set_explicit(volatile atomic_flag *, memory_order); void atomic_flag_clear(volatile atomic_flag *); void atomic_flag_clear_explicit(volatile atomic_flag *, memory_order); openvswitch-2.0.1+git20140120/lib/ovs-atomic.h000066400000000000000000000211071226605124000204530ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVS_ATOMIC_H #define OVS_ATOMIC_H 1 /* Atomic operations. * * This library implements atomic operations with an API based on the one * defined in C11. It includes multiple implementations for compilers and * libraries with varying degrees of built-in support for C11, including a * fallback implementation for systems that have pthreads but no other support * for atomics. * * This comment describes the common features of all the implementations. * * * Types * ===== * * The following atomic types are supported as typedefs for atomic versions of * the listed ordinary types: * * ordinary type atomic version * ------------------- ---------------------- * bool atomic_bool * * char atomic_char * signed char atomic_schar * unsigned char atomic_uchar * * short atomic_short * unsigned short atomic_ushort * * int atomic_int * unsigned int atomic_uint * * long atomic_long * unsigned long atomic_ulong * * long long atomic_llong * unsigned long long atomic_ullong * * size_t atomic_size_t * ptrdiff_t atomic_ptrdiff_t * * intmax_t atomic_intmax_t * uintmax_t atomic_uintmax_t * * intptr_t atomic_intptr_t * uintptr_t atomic_uintptr_t * * uint8_t atomic_uint8_t (*) * uint16_t atomic_uint16_t (*) * uint32_t atomic_uint32_t (*) * int8_t atomic_int8_t (*) * int16_t atomic_int16_t (*) * int32_t atomic_int32_t (*) * uint64_t atomic_uint64_t (*) * int64_t atomic_int64_t (*) * * (*) Not specified by C11. * * The atomic version of a type doesn't necessarily have the same size or * representation as the ordinary version; for example, atomic_int might be a * typedef for a struct that also includes a mutex. The range of an atomic * type does match the range of the corresponding ordinary type. * * C11 says that one may use the _Atomic keyword in place of the typedef name, * e.g. "_Atomic int" instead of "atomic_int". This library doesn't support * that. * * * Initialization * ============== * * To initialize an atomic variable at its point of definition, use * ATOMIC_VAR_INIT: * * static atomic_int ai = ATOMIC_VAR_INIT(123); * * To initialize an atomic variable in code, use atomic_init(): * * static atomic_int ai; * ... * atomic_init(&ai, 123); * * * Barriers * ======== * * enum memory_order specifies the strictness of a memory barrier. It has the * following values: * * memory_order_relaxed: * * Compiler barrier only. Does not imply any CPU memory ordering. * * memory_order_acquire: * * Memory accesses after an acquire barrier cannot be moved before the * barrier. Memory accesses before an acquire barrier *can* be moved * after it. * * memory_order_release: * * Memory accesses before a release barrier cannot be moved after the * barrier. Memory accesses after a release barrier *can* be moved * before it. * * memory_order_acq_rel: * * Memory accesses cannot be moved across an acquire-release barrier in * either direction. * * memory_order_seq_cst: * * Prevents movement of memory accesses like an acquire-release barrier, * but whereas acquire-release synchronizes cooperating threads, * sequential-consistency synchronizes the whole system. * * memory_order_consume: * * A slight relaxation of memory_order_acquire. * * The following functions insert explicit barriers. Most of the other atomic * functions also include barriers. * * void atomic_thread_fence(memory_order order); * * Inserts a barrier of the specified type. * * For memory_order_relaxed, this is a no-op. * * void atomic_signal_fence(memory_order order); * * Inserts a barrier of the specified type, but only with respect to * signal handlers in the same thread as the barrier. This is * basically a compiler optimization barrier, except for * memory_order_relaxed, which is a no-op. * * * Atomic Operations * ================= * * In this section, A is an atomic type and C is the corresponding non-atomic * type. * * The "store" primitives match C11: * * void atomic_store(A *object, C value); * void atomic_store_explicit(A *object, C value, memory_order); * * Atomically stores 'value' into '*object', respecting the given * memory order (or memory_order_seq_cst for atomic_store()). * * The following primitives differ from the C11 ones (and have different names) * because there does not appear to be a way to implement the standard * primitives in standard C: * * void atomic_read(A *src, C *dst); * void atomic_read_explicit(A *src, C *dst, memory_order); * * Atomically loads a value from 'src', writing the value read into * '*dst', respecting the given memory order (or memory_order_seq_cst * for atomic_read()). * * void atomic_add(A *rmw, C arg, C *orig); * void atomic_sub(A *rmw, C arg, C *orig); * void atomic_or(A *rmw, C arg, C *orig); * void atomic_xor(A *rmw, C arg, C *orig); * void atomic_and(A *rmw, C arg, C *orig); * void atomic_add_explicit(A *rmw, C arg, C *orig, memory_order); * void atomic_sub_explicit(A *rmw, C arg, C *orig, memory_order); * void atomic_or_explicit(A *rmw, C arg, C *orig, memory_order); * void atomic_xor_explicit(A *rmw, C arg, C *orig, memory_order); * void atomic_and_explicit(A *rmw, C arg, C *orig, memory_order); * * Atomically applies the given operation, with 'arg' as the second * operand, to '*rmw', and stores the original value of '*rmw' into * '*orig', respecting the given memory order (or memory_order_seq_cst * if none is specified). * * The results are similar to those that would be obtained with +=, -=, * |=, ^=, or |= on non-atomic types. * * * atomic_flag * =========== * * atomic_flag is a typedef for a type with two states, set and clear, that * provides atomic test-and-set functionality. * * ATOMIC_FLAG_INIT is an initializer for atomic_flag. The initial state is * "clear". * * The following functions are available. * * bool atomic_flag_test_and_set(atomic_flag *object) * bool atomic_flag_test_and_set_explicit(atomic_flag *object, * memory_order); * * Atomically sets '*object', respsecting the given memory order (or * memory_order_seq_cst for atomic_flag_test_and_set()). Returns the * previous value of the flag (false for clear, true for set). * * void atomic_flag_clear(atomic_flag *object); * void atomic_flag_clear_explicit(atomic_flag *object, memory_order); * * Atomically clears '*object', respecting the given memory order (or * memory_order_seq_cst for atomic_flag_clear()). */ #include #include #include #include #include #include "compiler.h" #include "util.h" #define IN_OVS_ATOMIC_H #if __CHECKER__ /* sparse doesn't understand some GCC extensions we use. */ #include "ovs-atomic-pthreads.h" #elif HAVE_STDATOMIC_H #include "ovs-atomic-c11.h" #elif __has_extension(c_atomic) #include "ovs-atomic-clang.h" #elif __GNUC__ >= 4 && __GNUC_MINOR__ >= 7 #include "ovs-atomic-gcc4.7+.h" #elif HAVE_GCC4_ATOMICS #include "ovs-atomic-gcc4+.h" #else #include "ovs-atomic-pthreads.h" #endif #undef IN_OVS_ATOMIC_H #endif /* ovs-atomic.h */ openvswitch-2.0.1+git20140120/lib/ovs-thread.c000066400000000000000000000221571226605124000204470ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ovs-thread.h" #include #include #include #include #include "compiler.h" #include "poll-loop.h" #include "socket-util.h" #include "util.h" #ifdef __CHECKER__ /* Omit the definitions in this file because they are somewhat difficult to * write without prompting "sparse" complaints, without ugliness or * cut-and-paste. Since "sparse" is just a checker, not a compiler, it * doesn't matter that we don't define them. */ #else #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ovs_thread); /* If there is a reason that we cannot fork anymore (unless the fork will be * immediately followed by an exec), then this points to a string that * explains why. */ static const char *must_not_fork; /* True if we created any threads beyond the main initial thread. */ static bool multithreaded; #define LOCK_FUNCTION(TYPE, FUN) \ void \ ovs_##TYPE##_##FUN##_at(const struct ovs_##TYPE *l_, \ const char *where) \ OVS_NO_THREAD_SAFETY_ANALYSIS \ { \ struct ovs_##TYPE *l = CONST_CAST(struct ovs_##TYPE *, l_); \ int error = pthread_##TYPE##_##FUN(&l->lock); \ if (OVS_UNLIKELY(error)) { \ ovs_abort(error, "pthread_%s_%s failed", #TYPE, #FUN); \ } \ l->where = where; \ } LOCK_FUNCTION(mutex, lock); LOCK_FUNCTION(rwlock, rdlock); LOCK_FUNCTION(rwlock, wrlock); #define TRY_LOCK_FUNCTION(TYPE, FUN) \ int \ ovs_##TYPE##_##FUN##_at(const struct ovs_##TYPE *l_, \ const char *where) \ OVS_NO_THREAD_SAFETY_ANALYSIS \ { \ struct ovs_##TYPE *l = CONST_CAST(struct ovs_##TYPE *, l_); \ int error = pthread_##TYPE##_##FUN(&l->lock); \ if (OVS_UNLIKELY(error) && error != EBUSY) { \ ovs_abort(error, "pthread_%s_%s failed", #TYPE, #FUN); \ } \ if (!error) { \ l->where = where; \ } \ return error; \ } TRY_LOCK_FUNCTION(mutex, trylock); TRY_LOCK_FUNCTION(rwlock, tryrdlock); TRY_LOCK_FUNCTION(rwlock, trywrlock); #define UNLOCK_FUNCTION(TYPE, FUN) \ void \ ovs_##TYPE##_##FUN(const struct ovs_##TYPE *l_) \ OVS_NO_THREAD_SAFETY_ANALYSIS \ { \ struct ovs_##TYPE *l = CONST_CAST(struct ovs_##TYPE *, l_); \ int error; \ l->where = NULL; \ error = pthread_##TYPE##_##FUN(&l->lock); \ if (OVS_UNLIKELY(error)) { \ ovs_abort(error, "pthread_%s_%sfailed", #TYPE, #FUN); \ } \ } UNLOCK_FUNCTION(mutex, unlock); UNLOCK_FUNCTION(mutex, destroy); UNLOCK_FUNCTION(rwlock, unlock); UNLOCK_FUNCTION(rwlock, destroy); #define XPTHREAD_FUNC1(FUNCTION, PARAM1) \ void \ x##FUNCTION(PARAM1 arg1) \ { \ int error = FUNCTION(arg1); \ if (OVS_UNLIKELY(error)) { \ ovs_abort(error, "%s failed", #FUNCTION); \ } \ } #define XPTHREAD_FUNC2(FUNCTION, PARAM1, PARAM2) \ void \ x##FUNCTION(PARAM1 arg1, PARAM2 arg2) \ { \ int error = FUNCTION(arg1, arg2); \ if (OVS_UNLIKELY(error)) { \ ovs_abort(error, "%s failed", #FUNCTION); \ } \ } XPTHREAD_FUNC1(pthread_mutex_lock, pthread_mutex_t *); XPTHREAD_FUNC1(pthread_mutex_unlock, pthread_mutex_t *); XPTHREAD_FUNC1(pthread_mutexattr_init, pthread_mutexattr_t *); XPTHREAD_FUNC1(pthread_mutexattr_destroy, pthread_mutexattr_t *); XPTHREAD_FUNC2(pthread_mutexattr_settype, pthread_mutexattr_t *, int); XPTHREAD_FUNC2(pthread_mutexattr_gettype, pthread_mutexattr_t *, int *); XPTHREAD_FUNC2(pthread_cond_init, pthread_cond_t *, pthread_condattr_t *); XPTHREAD_FUNC1(pthread_cond_destroy, pthread_cond_t *); XPTHREAD_FUNC1(pthread_cond_signal, pthread_cond_t *); XPTHREAD_FUNC1(pthread_cond_broadcast, pthread_cond_t *); XPTHREAD_FUNC2(pthread_join, pthread_t, void **); typedef void destructor_func(void *); XPTHREAD_FUNC2(pthread_key_create, pthread_key_t *, destructor_func *); XPTHREAD_FUNC2(pthread_setspecific, pthread_key_t, const void *); static void ovs_mutex_init__(const struct ovs_mutex *l_, int type) { struct ovs_mutex *l = CONST_CAST(struct ovs_mutex *, l_); pthread_mutexattr_t attr; int error; l->where = NULL; xpthread_mutexattr_init(&attr); xpthread_mutexattr_settype(&attr, type); error = pthread_mutex_init(&l->lock, &attr); if (OVS_UNLIKELY(error)) { ovs_abort(error, "pthread_mutex_init failed"); } xpthread_mutexattr_destroy(&attr); } /* Initializes 'mutex' as a normal (non-recursive) mutex. */ void ovs_mutex_init(const struct ovs_mutex *mutex) { ovs_mutex_init__(mutex, PTHREAD_MUTEX_ERRORCHECK); } /* Initializes 'mutex' as a recursive mutex. */ void ovs_mutex_init_recursive(const struct ovs_mutex *mutex) { ovs_mutex_init__(mutex, PTHREAD_MUTEX_RECURSIVE); } void ovs_rwlock_init(const struct ovs_rwlock *l_) { struct ovs_rwlock *l = CONST_CAST(struct ovs_rwlock *, l_); int error; l->where = NULL; error = pthread_rwlock_init(&l->lock, NULL); if (OVS_UNLIKELY(error)) { ovs_abort(error, "pthread_rwlock_init failed"); } } void ovs_mutex_cond_wait(pthread_cond_t *cond, const struct ovs_mutex *mutex_) { struct ovs_mutex *mutex = CONST_CAST(struct ovs_mutex *, mutex_); int error = pthread_cond_wait(cond, &mutex->lock); if (OVS_UNLIKELY(error)) { ovs_abort(error, "pthread_cond_wait failed"); } } DEFINE_EXTERN_PER_THREAD_DATA(ovsthread_id, 0); struct ovsthread_aux { void *(*start)(void *); void *arg; }; static void * ovsthread_wrapper(void *aux_) { static atomic_uint next_id = ATOMIC_VAR_INIT(1); struct ovsthread_aux *auxp = aux_; struct ovsthread_aux aux; unsigned int id; atomic_add(&next_id, 1, &id); *ovsthread_id_get() = id; aux = *auxp; free(auxp); return aux.start(aux.arg); } void xpthread_create(pthread_t *threadp, pthread_attr_t *attr, void *(*start)(void *), void *arg) { struct ovsthread_aux *aux; pthread_t thread; int error; forbid_forking("multiple threads exist"); multithreaded = true; aux = xmalloc(sizeof *aux); aux->start = start; aux->arg = arg; error = pthread_create(threadp ? threadp : &thread, attr, ovsthread_wrapper, aux); if (error) { ovs_abort(error, "pthread_create failed"); } } bool ovsthread_once_start__(struct ovsthread_once *once) { ovs_mutex_lock(&once->mutex); if (!ovsthread_once_is_done__(once)) { return false; } ovs_mutex_unlock(&once->mutex); return true; } void ovsthread_once_done(struct ovsthread_once *once) { atomic_store(&once->done, true); ovs_mutex_unlock(&once->mutex); } /* Asserts that the process has not yet created any threads (beyond the initial * thread). * * ('where' is used in logging. Commonly one would use * assert_single_threaded() to automatically provide the caller's source file * and line number for 'where'.) */ void assert_single_threaded_at(const char *where) { if (multithreaded) { VLOG_FATAL("%s: attempted operation not allowed when multithreaded", where); } } /* Forks the current process (checking that this is allowed). Aborts with * VLOG_FATAL if fork() returns an error, and otherwise returns the value * returned by fork(). * * ('where' is used in logging. Commonly one would use xfork() to * automatically provide the caller's source file and line number for * 'where'.) */ pid_t xfork_at(const char *where) { pid_t pid; if (must_not_fork) { VLOG_FATAL("%s: attempted to fork but forking not allowed (%s)", where, must_not_fork); } pid = fork(); if (pid < 0) { VLOG_FATAL("%s: fork failed (%s)", where, ovs_strerror(errno)); } return pid; } /* Notes that the process must not call fork() from now on, for the specified * 'reason'. (The process may still fork() if it execs itself immediately * afterward.) */ void forbid_forking(const char *reason) { ovs_assert(reason != NULL); must_not_fork = reason; } /* Returns true if the process is allowed to fork, false otherwise. */ bool may_fork(void) { return !must_not_fork; } #endif openvswitch-2.0.1+git20140120/lib/ovs-thread.h000066400000000000000000000547661226605124000204670ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVS_THREAD_H #define OVS_THREAD_H 1 #include #include #include #include "ovs-atomic.h" #include "util.h" /* Mutex. */ struct OVS_LOCKABLE ovs_mutex { pthread_mutex_t lock; const char *where; }; /* "struct ovs_mutex" initializer. */ #ifdef PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP #define OVS_MUTEX_INITIALIZER { PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP, NULL } #else #define OVS_MUTEX_INITIALIZER { PTHREAD_MUTEX_INITIALIZER, NULL } #endif /* ovs_mutex functions analogous to pthread_mutex_*() functions. * * Most of these functions abort the process with an error message on any * error. ovs_mutex_trylock() is an exception: it passes through a 0 or EBUSY * return value to the caller and aborts on any other error. */ void ovs_mutex_init(const struct ovs_mutex *); void ovs_mutex_init_recursive(const struct ovs_mutex *); void ovs_mutex_destroy(const struct ovs_mutex *); void ovs_mutex_unlock(const struct ovs_mutex *mutex) OVS_RELEASES(mutex); void ovs_mutex_lock_at(const struct ovs_mutex *mutex, const char *where) OVS_ACQUIRES(mutex); #define ovs_mutex_lock(mutex) \ ovs_mutex_lock_at(mutex, SOURCE_LOCATOR) int ovs_mutex_trylock_at(const struct ovs_mutex *mutex, const char *where) OVS_TRY_LOCK(0, mutex); #define ovs_mutex_trylock(mutex) \ ovs_mutex_trylock_at(mutex, SOURCE_LOCATOR) void ovs_mutex_cond_wait(pthread_cond_t *, const struct ovs_mutex *); /* Wrappers for pthread_mutex_*() that abort the process on any error. * This is still needed when ovs-atomic-pthreads.h is used. */ void xpthread_mutex_lock(pthread_mutex_t *mutex); void xpthread_mutex_unlock(pthread_mutex_t *mutex); /* Wrappers for pthread_mutexattr_*() that abort the process on any error. */ void xpthread_mutexattr_init(pthread_mutexattr_t *); void xpthread_mutexattr_destroy(pthread_mutexattr_t *); void xpthread_mutexattr_settype(pthread_mutexattr_t *, int type); void xpthread_mutexattr_gettype(pthread_mutexattr_t *, int *typep); /* Read-write lock. */ struct OVS_LOCKABLE ovs_rwlock { pthread_rwlock_t lock; const char *where; }; /* Initializer. */ #define OVS_RWLOCK_INITIALIZER { PTHREAD_RWLOCK_INITIALIZER, NULL } /* ovs_rwlock functions analogous to pthread_rwlock_*() functions. * * Most of these functions abort the process with an error message on any * error. The "trylock" functions are exception: they pass through a 0 or * EBUSY return value to the caller and abort on any other error. */ void ovs_rwlock_init(const struct ovs_rwlock *); void ovs_rwlock_destroy(const struct ovs_rwlock *); void ovs_rwlock_unlock(const struct ovs_rwlock *rwlock) OVS_RELEASES(rwlock); void ovs_rwlock_wrlock_at(const struct ovs_rwlock *rwlock, const char *where) OVS_ACQ_WRLOCK(rwlock); #define ovs_rwlock_wrlock(rwlock) \ ovs_rwlock_wrlock_at(rwlock, SOURCE_LOCATOR) int ovs_rwlock_trywrlock_at(const struct ovs_rwlock *rwlock, const char *where) OVS_TRY_WRLOCK(0, rwlock); #define ovs_rwlock_trywrlock(rwlock) \ ovs_rwlock_trywrlock_at(rwlock, SOURCE_LOCATOR) void ovs_rwlock_rdlock_at(const struct ovs_rwlock *rwlock, const char *where) OVS_ACQ_RDLOCK(rwlock); #define ovs_rwlock_rdlock(rwlock) \ ovs_rwlock_rdlock_at(rwlock, SOURCE_LOCATOR) int ovs_rwlock_tryrdlock_at(const struct ovs_rwlock *rwlock, const char *where) OVS_TRY_RDLOCK(0, rwlock); #define ovs_rwlock_tryrdlock(rwlock) \ ovs_rwlock_tryrdlock_at(rwlock, SOURCE_LOCATOR) /* Wrappers for xpthread_cond_*() that abort the process on any error. * * Use ovs_mutex_cond_wait() to wait for a condition. */ void xpthread_cond_init(pthread_cond_t *, pthread_condattr_t *); void xpthread_cond_destroy(pthread_cond_t *); void xpthread_cond_signal(pthread_cond_t *); void xpthread_cond_broadcast(pthread_cond_t *); #ifdef __CHECKER__ /* Replace these functions by the macros already defined in the * annotations, because the macro definitions have correct semantics for the * conditional acquisition that can't be captured in a function annotation. * The difference in semantics from pthread_*() to xpthread_*() does not matter * because sparse is not a compiler. */ #define xpthread_mutex_trylock pthread_mutex_trylock #define xpthread_rwlock_tryrdlock pthread_rwlock_tryrdlock #define xpthread_rwlock_trywrlock pthread_rwlock_trywrlock #endif void xpthread_key_create(pthread_key_t *, void (*destructor)(void *)); void xpthread_setspecific(pthread_key_t, const void *); void xpthread_create(pthread_t *, pthread_attr_t *, void *(*)(void *), void *); void xpthread_join(pthread_t, void **); /* Per-thread data. * * Multiple forms of per-thread data exist, each with its own pluses and * minuses: * * - POSIX per-thread data via pthread_key_t is portable to any pthreads * implementation, and allows a destructor function to be defined. It * only (directly) supports per-thread pointers, which are always * initialized to NULL. It requires once-only allocation of a * pthread_key_t value. It is relatively slow. * * - The thread_local feature newly defined in C11 works with * any data type and initializer, and it is fast. thread_local does not * require once-only initialization like pthread_key_t. C11 does not * define what happens if one attempts to access a thread_local object * from a thread other than the one to which that object belongs. There * is no provision to call a user-specified destructor when a thread * ends. * * - The __thread keyword is a GCC extension similar to thread_local but * with a longer history. __thread is not portable to every GCC version * or environment. __thread does not restrict the use of a thread-local * object outside its own thread. * * Here's a handy summary: * * pthread_key_t thread_local __thread * ------------- ------------ ------------- * portability high low medium * speed low high high * supports destructors? yes no no * needs key allocation? yes no no * arbitrary initializer? no yes yes * cross-thread access? yes no yes */ /* For static data, use this macro in a source file: * * DEFINE_STATIC_PER_THREAD_DATA(TYPE, NAME, INITIALIZER). * * For global data, "declare" the data in the header and "define" it in * the source file, with: * * DECLARE_EXTERN_PER_THREAD_DATA(TYPE, NAME). * DEFINE_EXTERN_PER_THREAD_DATA(NAME, INITIALIZER). * * One should prefer to use POSIX per-thread data, via pthread_key_t, when its * performance is acceptable, because of its portability (see the table above). * This macro is an alternatives that takes advantage of thread_local (and * __thread), for its performance, when it is available, and falls back to * POSIX per-thread data otherwise. * * Defines per-thread variable NAME with the given TYPE, initialized to * INITIALIZER (which must be valid as an initializer for a variable with * static lifetime). * * The public interface to the variable is: * * TYPE *NAME_get(void) * TYPE *NAME_get_unsafe(void) * * Returns the address of this thread's instance of NAME. * * Use NAME_get() in a context where this might be the first use of the * per-thread variable in the program. Use NAME_get_unsafe(), which * avoids a conditional test and is thus slightly faster, in a context * where one knows that NAME_get() has already been called previously. * * There is no "NAME_set()" (or "NAME_set_unsafe()") function. To set the * value of the per-thread variable, dereference the pointer returned by * TYPE_get() or TYPE_get_unsafe(), e.g. *TYPE_get() = 0. */ #if HAVE_THREAD_LOCAL || HAVE___THREAD #if HAVE_THREAD_LOCAL #include #elif HAVE___THREAD #define thread_local __thread #else #error #endif #define DEFINE_STATIC_PER_THREAD_DATA(TYPE, NAME, ...) \ typedef TYPE NAME##_type; \ \ static NAME##_type * \ NAME##_get_unsafe(void) \ { \ static thread_local NAME##_type var = __VA_ARGS__; \ return &var; \ } \ \ static NAME##_type * \ NAME##_get(void) \ { \ return NAME##_get_unsafe(); \ } #define DECLARE_EXTERN_PER_THREAD_DATA(TYPE, NAME) \ typedef TYPE NAME##_type; \ extern thread_local NAME##_type NAME##_var; \ \ static inline NAME##_type * \ NAME##_get_unsafe(void) \ { \ return &NAME##_var; \ } \ \ static inline NAME##_type * \ NAME##_get(void) \ { \ return NAME##_get_unsafe(); \ } #define DEFINE_EXTERN_PER_THREAD_DATA(NAME, ...) \ thread_local NAME##_type NAME##_var = __VA_ARGS__; #else /* no C implementation support for thread-local storage */ #define DEFINE_STATIC_PER_THREAD_DATA(TYPE, NAME, ...) \ typedef TYPE NAME##_type; \ static pthread_key_t NAME##_key; \ \ static NAME##_type * \ NAME##_get_unsafe(void) \ { \ return pthread_getspecific(NAME##_key); \ } \ \ static void \ NAME##_once_init(void) \ { \ if (pthread_key_create(&NAME##_key, free)) { \ abort(); \ } \ } \ \ static NAME##_type * \ NAME##_get(void) \ { \ static pthread_once_t once = PTHREAD_ONCE_INIT; \ NAME##_type *value; \ \ pthread_once(&once, NAME##_once_init); \ value = NAME##_get_unsafe(); \ if (!value) { \ static const NAME##_type initial_value = __VA_ARGS__; \ \ value = xmalloc(sizeof *value); \ *value = initial_value; \ xpthread_setspecific(NAME##_key, value); \ } \ return value; \ } #define DECLARE_EXTERN_PER_THREAD_DATA(TYPE, NAME) \ typedef TYPE NAME##_type; \ static pthread_key_t NAME##_key; \ \ static inline NAME##_type * \ NAME##_get_unsafe(void) \ { \ return pthread_getspecific(NAME##_key); \ } \ \ NAME##_type *NAME##_get(void); #define DEFINE_EXTERN_PER_THREAD_DATA(NAME, ...) \ static void \ NAME##_once_init(void) \ { \ if (pthread_key_create(&NAME##_key, free)) { \ abort(); \ } \ } \ \ NAME##_type * \ NAME##_get(void) \ { \ static pthread_once_t once = PTHREAD_ONCE_INIT; \ NAME##_type *value; \ \ pthread_once(&once, NAME##_once_init); \ value = NAME##_get_unsafe(); \ if (!value) { \ static const NAME##_type initial_value = __VA_ARGS__; \ \ value = xmalloc(sizeof *value); \ *value = initial_value; \ xpthread_setspecific(NAME##_key, value); \ } \ return value; \ } #endif /* DEFINE_PER_THREAD_MALLOCED_DATA(TYPE, NAME). * * This is a simple wrapper around POSIX per-thread data primitives. It * defines per-thread variable NAME with the given TYPE, which must be a * pointer type. In each thread, the per-thread variable is initialized to * NULL. When a thread terminates, the variable is freed with free(). * * The public interface to the variable is: * * TYPE NAME_get(void) * TYPE NAME_get_unsafe(void) * * Returns the value of per-thread variable NAME in this thread. * * Use NAME_get() in a context where this might be the first use of the * per-thread variable in the program. Use NAME_get_unsafe(), which * avoids a conditional test and is thus slightly faster, in a context * where one knows that NAME_get() has already been called previously. * * TYPE NAME_set(TYPE new_value) * TYPE NAME_set_unsafe(TYPE new_value) * * Sets the value of per-thread variable NAME to 'new_value' in this * thread, and returns its previous value. * * Use NAME_set() in a context where this might be the first use of the * per-thread variable in the program. Use NAME_set_unsafe(), which * avoids a conditional test and is thus slightly faster, in a context * where one knows that NAME_set() has already been called previously. */ #define DEFINE_PER_THREAD_MALLOCED_DATA(TYPE, NAME) \ static pthread_key_t NAME##_key; \ \ static void \ NAME##_once_init(void) \ { \ if (pthread_key_create(&NAME##_key, free)) { \ abort(); \ } \ } \ \ static void \ NAME##_init(void) \ { \ static pthread_once_t once = PTHREAD_ONCE_INIT; \ pthread_once(&once, NAME##_once_init); \ } \ \ static TYPE \ NAME##_get_unsafe(void) \ { \ return pthread_getspecific(NAME##_key); \ } \ \ static OVS_UNUSED TYPE \ NAME##_get(void) \ { \ NAME##_init(); \ return NAME##_get_unsafe(); \ } \ \ static TYPE \ NAME##_set_unsafe(TYPE value) \ { \ TYPE old_value = NAME##_get_unsafe(); \ xpthread_setspecific(NAME##_key, value); \ return old_value; \ } \ \ static OVS_UNUSED TYPE \ NAME##_set(TYPE value) \ { \ NAME##_init(); \ return NAME##_set_unsafe(value); \ } /* Convenient once-only execution. * * * Problem * ======= * * POSIX provides pthread_once_t and pthread_once() as primitives for running a * set of code only once per process execution. They are used like this: * * static void run_once(void) { ...initialization... } * static pthread_once_t once = PTHREAD_ONCE_INIT; * ... * pthread_once(&once, run_once); * * pthread_once() does not allow passing any parameters to the initialization * function, which is often inconvenient, because it means that the function * can only access data declared at file scope. * * * Solution * ======== * * Use ovsthread_once, like this, instead: * * static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; * * if (ovsthread_once_start(&once)) { * ...initialization... * ovsthread_once_done(&once); * } */ struct ovsthread_once { atomic_bool done; struct ovs_mutex mutex; }; #define OVSTHREAD_ONCE_INITIALIZER \ { \ ATOMIC_VAR_INIT(false), \ OVS_MUTEX_INITIALIZER, \ } static inline bool ovsthread_once_start(struct ovsthread_once *once) OVS_TRY_LOCK(true, once->mutex); void ovsthread_once_done(struct ovsthread_once *once) OVS_RELEASES(once->mutex); bool ovsthread_once_start__(struct ovsthread_once *once) OVS_TRY_LOCK(false, once->mutex); static inline bool ovsthread_once_is_done__(struct ovsthread_once *once) { bool done; atomic_read_explicit(&once->done, &done, memory_order_relaxed); return done; } /* Returns true if this is the first call to ovsthread_once_start() for * 'once'. In this case, the caller should perform whatever initialization * actions it needs to do, then call ovsthread_once_done() for 'once'. * * Returns false if this is not the first call to ovsthread_once_start() for * 'once'. In this case, the call will not return until after * ovsthread_once_done() has been called. */ static inline bool ovsthread_once_start(struct ovsthread_once *once) { return OVS_UNLIKELY(!ovsthread_once_is_done__(once) && !ovsthread_once_start__(once)); } /* Thread ID. * * pthread_t isn't so nice for some purposes. Its size and representation are * implementation dependent, which means that there is no way to hash it. * This thread ID avoids the problem. */ DECLARE_EXTERN_PER_THREAD_DATA(unsigned int, ovsthread_id); /* Returns a per-thread identifier unique within the lifetime of the * process. */ static inline unsigned int ovsthread_id_self(void) { return *ovsthread_id_get(); } void assert_single_threaded_at(const char *where); #define assert_single_threaded() assert_single_threaded_at(SOURCE_LOCATOR) pid_t xfork_at(const char *where); #define xfork() xfork_at(SOURCE_LOCATOR) void forbid_forking(const char *reason); bool may_fork(void); #endif /* ovs-thread.h */ openvswitch-2.0.1+git20140120/lib/ovs.tmac000066400000000000000000000063771226605124000177120ustar00rootroot00000000000000.\" -*- nroff -*- .\" ovs.tmac .\" .\" Open vSwitch troff macro library . . .\" Continuation line for .IP. .de IQ . br . ns . IP "\\$1" .. . .\" Introduces a sub-subsection .de ST . PP . RS -0.15in . I "\\$1" . RE .. . .\" The content between the lines below is from an-ext.tmac in groff .\" 1.21, with some modifications. .\" ---------------------------------------------------------------------- .\" an-ext.tmac .\" .\" Written by Eric S. Raymond .\" Werner Lemberg .\" .\" Version 2007-Feb-02 .\" .\" Copyright (C) 2007, 2009, 2011 Free Software Foundation, Inc. .\" You may freely use, modify and/or distribute this file. .\" .\" .\" The code below provides extension macros for the `man' macro package. .\" Care has been taken to make the code portable; groff extensions are .\" properly hidden so that all troff implementations can use it without .\" changes. .\" .\" With groff, this file is sourced by the `man' macro package itself. .\" Man page authors who are concerned about portability might add the .\" used macros directly to the prologue of the man page(s). . . .\" Convention: Auxiliary macros and registers start with `m' followed .\" by an uppercase letter or digit. . . .\" Declare start of command synopsis. Sets up hanging indentation. .de SY . ie !\\n(mS \{\ . nh . nr mS 1 . nr mA \\n(.j . ad l . nr mI \\n(.i . \} . el \{\ . br . ns . \} . . HP \w'\fB\\$1\fP\ 'u . B "\\$1" .. . . .\" End of command synopsis. Restores adjustment. .de YS . in \\n(mIu . ad \\n(mA . hy \\n(HY . nr mS 0 .. . . .\" Declare optional option. .de OP . ie \\n(.$-1 \ . RI "[\fB\\$1\fP" "\ \\$2" "]" . el \ . RB "[" "\\$1" "]" .. . . .\" Start URL. .de UR . ds m1 \\$1\" . nh . if \\n(mH \{\ . \" Start diversion in a new environment. . do ev URL-div . do di URL-div . \} .. . . .\" End URL. .de UE . ie \\n(mH \{\ . br . di . ev . . \" Has there been one or more input lines for the link text? . ie \\n(dn \{\ . do HTML-NS "" . \" Yes, strip off final newline of diversion and emit it. . do chop URL-div . do URL-div \c . do HTML-NS . \} . el \ . do HTML-NS "\\*(m1" \&\\$*\" . \} . el \ \\*(la\\*(m1\\*(ra\\$*\" . . hy \\n(HY .. . . .\" Start email address. .de MT . ds m1 \\$1\" . nh . if \\n(mH \{\ . \" Start diversion in a new environment. . do ev URL-div . do di URL-div . \} .. . . .\" End email address. .de ME . ie \\n(mH \{\ . br . di . ev . . \" Has there been one or more input lines for the link text? . ie \\n(dn \{\ . do HTML-NS "" . \" Yes, strip off final newline of diversion and emit it. . do chop URL-div . do URL-div \c . do HTML-NS . \} . el \ . do HTML-NS "\\*(m1" \&\\$*\" . \} . el \ \\*(la\\*(m1\\*(ra\\$*\" . . hy \\n(HY .. . . .\" Continuation line for .TP header. .de TQ . br . ns . TP \\$1\" no doublequotes around argument! .. . . .\" Start example. .de EX . nr mE \\n(.f . nf . nh . ft CW .. . . .\" End example. .de EE . ft \\n(mE . fi . hy \\n(HY .. . .\" EOF .\" ---------------------------------------------------------------------- openvswitch-2.0.1+git20140120/lib/ovsdb-data.c000066400000000000000000001701321226605124000204140ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ovsdb-data.h" #include #include #include #include #include "dynamic-string.h" #include "hash.h" #include "ovs-thread.h" #include "ovsdb-error.h" #include "ovsdb-parser.h" #include "json.h" #include "shash.h" #include "smap.h" #include "sort.h" #include "unicode.h" static struct json * wrap_json(const char *name, struct json *wrapped) { return json_array_create_2(json_string_create(name), wrapped); } /* Initializes 'atom' with the default value of the given 'type'. * * The default value for an atom is as defined in ovsdb/SPECS: * * - "integer" or "real": 0 * * - "boolean": false * * - "string": "" (the empty string) * * - "uuid": 00000000-0000-0000-0000-000000000000 * * The caller must eventually arrange for 'atom' to be destroyed (with * ovsdb_atom_destroy()). */ void ovsdb_atom_init_default(union ovsdb_atom *atom, enum ovsdb_atomic_type type) { switch (type) { case OVSDB_TYPE_VOID: NOT_REACHED(); case OVSDB_TYPE_INTEGER: atom->integer = 0; break; case OVSDB_TYPE_REAL: atom->real = 0.0; break; case OVSDB_TYPE_BOOLEAN: atom->boolean = false; break; case OVSDB_TYPE_STRING: atom->string = xmemdup("", 1); break; case OVSDB_TYPE_UUID: uuid_zero(&atom->uuid); break; case OVSDB_N_TYPES: default: NOT_REACHED(); } } /* Returns a read-only atom of the given 'type' that has the default value for * 'type'. The caller must not modify or free the returned atom. * * See ovsdb_atom_init_default() for an explanation of the default value of an * atom. */ const union ovsdb_atom * ovsdb_atom_default(enum ovsdb_atomic_type type) { static union ovsdb_atom default_atoms[OVSDB_N_TYPES]; static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; if (ovsthread_once_start(&once)) { int i; for (i = 0; i < OVSDB_N_TYPES; i++) { if (i != OVSDB_TYPE_VOID) { ovsdb_atom_init_default(&default_atoms[i], i); } } ovsthread_once_done(&once); } ovs_assert(ovsdb_atomic_type_is_valid(type)); return &default_atoms[type]; } /* Returns true if 'atom', which must have the given 'type', has the default * value for that type. * * See ovsdb_atom_init_default() for an explanation of the default value of an * atom. */ bool ovsdb_atom_is_default(const union ovsdb_atom *atom, enum ovsdb_atomic_type type) { switch (type) { case OVSDB_TYPE_VOID: NOT_REACHED(); case OVSDB_TYPE_INTEGER: return atom->integer == 0; case OVSDB_TYPE_REAL: return atom->real == 0.0; case OVSDB_TYPE_BOOLEAN: return atom->boolean == false; case OVSDB_TYPE_STRING: return atom->string[0] == '\0'; case OVSDB_TYPE_UUID: return uuid_is_zero(&atom->uuid); case OVSDB_N_TYPES: default: NOT_REACHED(); } } /* Initializes 'new' as a copy of 'old', with the given 'type'. * * The caller must eventually arrange for 'new' to be destroyed (with * ovsdb_atom_destroy()). */ void ovsdb_atom_clone(union ovsdb_atom *new, const union ovsdb_atom *old, enum ovsdb_atomic_type type) { switch (type) { case OVSDB_TYPE_VOID: NOT_REACHED(); case OVSDB_TYPE_INTEGER: new->integer = old->integer; break; case OVSDB_TYPE_REAL: new->real = old->real; break; case OVSDB_TYPE_BOOLEAN: new->boolean = old->boolean; break; case OVSDB_TYPE_STRING: new->string = xstrdup(old->string); break; case OVSDB_TYPE_UUID: new->uuid = old->uuid; break; case OVSDB_N_TYPES: default: NOT_REACHED(); } } /* Swaps the contents of 'a' and 'b', which need not have the same type. */ void ovsdb_atom_swap(union ovsdb_atom *a, union ovsdb_atom *b) { union ovsdb_atom tmp = *a; *a = *b; *b = tmp; } /* Returns a hash value for 'atom', which has the specified 'type', folding * 'basis' into the calculation. */ uint32_t ovsdb_atom_hash(const union ovsdb_atom *atom, enum ovsdb_atomic_type type, uint32_t basis) { switch (type) { case OVSDB_TYPE_VOID: NOT_REACHED(); case OVSDB_TYPE_INTEGER: return hash_int(atom->integer, basis); case OVSDB_TYPE_REAL: return hash_double(atom->real, basis); case OVSDB_TYPE_BOOLEAN: return hash_boolean(atom->boolean, basis); case OVSDB_TYPE_STRING: return hash_string(atom->string, basis); case OVSDB_TYPE_UUID: return hash_int(uuid_hash(&atom->uuid), basis); case OVSDB_N_TYPES: default: NOT_REACHED(); } } /* Compares 'a' and 'b', which both have type 'type', and returns a * strcmp()-like result. */ int ovsdb_atom_compare_3way(const union ovsdb_atom *a, const union ovsdb_atom *b, enum ovsdb_atomic_type type) { switch (type) { case OVSDB_TYPE_VOID: NOT_REACHED(); case OVSDB_TYPE_INTEGER: return a->integer < b->integer ? -1 : a->integer > b->integer; case OVSDB_TYPE_REAL: return a->real < b->real ? -1 : a->real > b->real; case OVSDB_TYPE_BOOLEAN: return a->boolean - b->boolean; case OVSDB_TYPE_STRING: return strcmp(a->string, b->string); case OVSDB_TYPE_UUID: return uuid_compare_3way(&a->uuid, &b->uuid); case OVSDB_N_TYPES: default: NOT_REACHED(); } } static struct ovsdb_error * unwrap_json(const struct json *json, const char *name, enum json_type value_type, const struct json **value) { if (json->type != JSON_ARRAY || json->u.array.n != 2 || json->u.array.elems[0]->type != JSON_STRING || (name && strcmp(json->u.array.elems[0]->u.string, name)) || json->u.array.elems[1]->type != value_type) { *value = NULL; return ovsdb_syntax_error(json, NULL, "expected [\"%s\", <%s>]", name, json_type_to_string(value_type)); } *value = json->u.array.elems[1]; return NULL; } static struct ovsdb_error * parse_json_pair(const struct json *json, const struct json **elem0, const struct json **elem1) { if (json->type != JSON_ARRAY || json->u.array.n != 2) { return ovsdb_syntax_error(json, NULL, "expected 2-element array"); } *elem0 = json->u.array.elems[0]; *elem1 = json->u.array.elems[1]; return NULL; } static void ovsdb_symbol_referenced(struct ovsdb_symbol *symbol, const struct ovsdb_base_type *base) { ovs_assert(base->type == OVSDB_TYPE_UUID); if (base->u.uuid.refTableName) { switch (base->u.uuid.refType) { case OVSDB_REF_STRONG: symbol->strong_ref = true; break; case OVSDB_REF_WEAK: symbol->weak_ref = true; break; } } } static struct ovsdb_error * WARN_UNUSED_RESULT ovsdb_atom_parse_uuid(struct uuid *uuid, const struct json *json, struct ovsdb_symbol_table *symtab, const struct ovsdb_base_type *base) { struct ovsdb_error *error0; const struct json *value; error0 = unwrap_json(json, "uuid", JSON_STRING, &value); if (!error0) { const char *uuid_string = json_string(value); if (!uuid_from_string(uuid, uuid_string)) { return ovsdb_syntax_error(json, NULL, "\"%s\" is not a valid UUID", uuid_string); } } else if (symtab) { struct ovsdb_error *error1; error1 = unwrap_json(json, "named-uuid", JSON_STRING, &value); if (!error1) { struct ovsdb_symbol *symbol; ovsdb_error_destroy(error0); if (!ovsdb_parser_is_id(json_string(value))) { return ovsdb_syntax_error(json, NULL, "named-uuid string is " "not a valid "); } symbol = ovsdb_symbol_table_insert(symtab, json_string(value)); *uuid = symbol->uuid; ovsdb_symbol_referenced(symbol, base); return NULL; } ovsdb_error_destroy(error1); } return error0; } static struct ovsdb_error * WARN_UNUSED_RESULT ovsdb_atom_from_json__(union ovsdb_atom *atom, const struct ovsdb_base_type *base, const struct json *json, struct ovsdb_symbol_table *symtab) { enum ovsdb_atomic_type type = base->type; switch (type) { case OVSDB_TYPE_VOID: NOT_REACHED(); case OVSDB_TYPE_INTEGER: if (json->type == JSON_INTEGER) { atom->integer = json->u.integer; return NULL; } break; case OVSDB_TYPE_REAL: if (json->type == JSON_INTEGER) { atom->real = json->u.integer; return NULL; } else if (json->type == JSON_REAL) { atom->real = json->u.real; return NULL; } break; case OVSDB_TYPE_BOOLEAN: if (json->type == JSON_TRUE) { atom->boolean = true; return NULL; } else if (json->type == JSON_FALSE) { atom->boolean = false; return NULL; } break; case OVSDB_TYPE_STRING: if (json->type == JSON_STRING) { atom->string = xstrdup(json->u.string); return NULL; } break; case OVSDB_TYPE_UUID: return ovsdb_atom_parse_uuid(&atom->uuid, json, symtab, base); case OVSDB_N_TYPES: default: NOT_REACHED(); } return ovsdb_syntax_error(json, NULL, "expected %s", ovsdb_atomic_type_to_string(type)); } /* Parses 'json' as an atom of the type described by 'base'. If successful, * returns NULL and initializes 'atom' with the parsed atom. On failure, * returns an error and the contents of 'atom' are indeterminate. The caller * is responsible for freeing the error or the atom that is returned. * * Violations of constraints expressed by 'base' are treated as errors. * * If 'symtab' is nonnull, then named UUIDs in 'symtab' are accepted. Refer to * ovsdb/SPECS for information about this, and for the syntax that this * function accepts. If 'base' is a reference and a symbol is parsed, then the * symbol's 'strong_ref' or 'weak_ref' member is set to true, as * appropriate. */ struct ovsdb_error * ovsdb_atom_from_json(union ovsdb_atom *atom, const struct ovsdb_base_type *base, const struct json *json, struct ovsdb_symbol_table *symtab) { struct ovsdb_error *error; error = ovsdb_atom_from_json__(atom, base, json, symtab); if (error) { return error; } error = ovsdb_atom_check_constraints(atom, base); if (error) { ovsdb_atom_destroy(atom, base->type); } return error; } /* Converts 'atom', of the specified 'type', to JSON format, and returns the * JSON. The caller is responsible for freeing the returned JSON. * * Refer to ovsdb/SPECS for the format of the JSON that this function * produces. */ struct json * ovsdb_atom_to_json(const union ovsdb_atom *atom, enum ovsdb_atomic_type type) { switch (type) { case OVSDB_TYPE_VOID: NOT_REACHED(); case OVSDB_TYPE_INTEGER: return json_integer_create(atom->integer); case OVSDB_TYPE_REAL: return json_real_create(atom->real); case OVSDB_TYPE_BOOLEAN: return json_boolean_create(atom->boolean); case OVSDB_TYPE_STRING: return json_string_create(atom->string); case OVSDB_TYPE_UUID: return wrap_json("uuid", json_string_create_nocopy( xasprintf(UUID_FMT, UUID_ARGS(&atom->uuid)))); case OVSDB_N_TYPES: default: NOT_REACHED(); } } /* Returns strlen(json_to_string(ovsdb_atom_to_json(atom, type), 0)). */ size_t ovsdb_atom_json_length(const union ovsdb_atom *atom, enum ovsdb_atomic_type type) { struct json json; switch (type) { case OVSDB_TYPE_VOID: NOT_REACHED(); case OVSDB_TYPE_INTEGER: json.type = JSON_INTEGER; json.u.integer = atom->integer; break; case OVSDB_TYPE_REAL: json.type = JSON_REAL; json.u.real = atom->real; break; case OVSDB_TYPE_BOOLEAN: json.type = atom->boolean ? JSON_TRUE : JSON_FALSE; break; case OVSDB_TYPE_STRING: json.type = JSON_STRING; json.u.string = atom->string; break; case OVSDB_TYPE_UUID: return strlen("[\"uuid\",\"00000000-0000-0000-0000-000000000000\"]"); case OVSDB_N_TYPES: default: NOT_REACHED(); } return json_serialized_length(&json); } static char * ovsdb_atom_from_string__(union ovsdb_atom *atom, const struct ovsdb_base_type *base, const char *s, struct ovsdb_symbol_table *symtab) { enum ovsdb_atomic_type type = base->type; switch (type) { case OVSDB_TYPE_VOID: NOT_REACHED(); case OVSDB_TYPE_INTEGER: { long long int integer; if (!str_to_llong(s, 10, &integer)) { return xasprintf("\"%s\" is not a valid integer", s); } atom->integer = integer; } break; case OVSDB_TYPE_REAL: if (!str_to_double(s, &atom->real)) { return xasprintf("\"%s\" is not a valid real number", s); } /* Our JSON input routines map negative zero to zero, so do that here * too for consistency. */ if (atom->real == 0.0) { atom->real = 0.0; } break; case OVSDB_TYPE_BOOLEAN: if (!strcmp(s, "true") || !strcmp(s, "yes") || !strcmp(s, "on") || !strcmp(s, "1")) { atom->boolean = true; } else if (!strcmp(s, "false") || !strcmp(s, "no") || !strcmp(s, "off") || !strcmp(s, "0")) { atom->boolean = false; } else { return xasprintf("\"%s\" is not a valid boolean " "(use \"true\" or \"false\")", s); } break; case OVSDB_TYPE_STRING: if (*s == '\0') { return xstrdup("An empty string is not valid as input; " "use \"\" to represent the empty string"); } else if (*s == '"') { size_t s_len = strlen(s); if (s_len < 2 || s[s_len - 1] != '"') { return xasprintf("%s: missing quote at end of " "quoted string", s); } else if (!json_string_unescape(s + 1, s_len - 2, &atom->string)) { char *error = xasprintf("%s: %s", s, atom->string); free(atom->string); return error; } } else { atom->string = xstrdup(s); } break; case OVSDB_TYPE_UUID: if (*s == '@') { struct ovsdb_symbol *symbol = ovsdb_symbol_table_insert(symtab, s); atom->uuid = symbol->uuid; ovsdb_symbol_referenced(symbol, base); } else if (!uuid_from_string(&atom->uuid, s)) { return xasprintf("\"%s\" is not a valid UUID", s); } break; case OVSDB_N_TYPES: default: NOT_REACHED(); } return NULL; } /* Initializes 'atom' to a value of type 'base' parsed from 's', which takes * one of the following forms: * * - OVSDB_TYPE_INTEGER: A decimal integer optionally preceded by a sign. * * - OVSDB_TYPE_REAL: A floating-point number in the format accepted by * strtod(). * * - OVSDB_TYPE_BOOLEAN: "true", "yes", "on", "1" for true, or "false", * "no", "off", or "0" for false. * * - OVSDB_TYPE_STRING: A JSON string if it begins with a quote, otherwise * an arbitrary string. * * - OVSDB_TYPE_UUID: A UUID in RFC 4122 format. If 'symtab' is nonnull, * then an identifier beginning with '@' is also acceptable. If the * named identifier is already in 'symtab', then the associated UUID is * used; otherwise, a new, random UUID is used and added to the symbol * table. If 'base' is a reference and a symbol is parsed, then the * symbol's 'strong_ref' or 'weak_ref' member is set to true, as * appropriate. * * Returns a null pointer if successful, otherwise an error message describing * the problem. On failure, the contents of 'atom' are indeterminate. The * caller is responsible for freeing the atom or the error. */ char * ovsdb_atom_from_string(union ovsdb_atom *atom, const struct ovsdb_base_type *base, const char *s, struct ovsdb_symbol_table *symtab) { struct ovsdb_error *error; char *msg; msg = ovsdb_atom_from_string__(atom, base, s, symtab); if (msg) { return msg; } error = ovsdb_atom_check_constraints(atom, base); if (error) { ovsdb_atom_destroy(atom, base->type); msg = ovsdb_error_to_string(error); ovsdb_error_destroy(error); } return msg; } static bool string_needs_quotes(const char *s) { const char *p = s; unsigned char c; c = *p++; if (!isalpha(c) && c != '_') { return true; } while ((c = *p++) != '\0') { if (!isalpha(c) && c != '_' && c != '-' && c != '.') { return true; } } if (!strcmp(s, "true") || !strcmp(s, "false")) { return true; } return false; } /* Appends 'atom' (which has the given 'type') to 'out', in a format acceptable * to ovsdb_atom_from_string(). */ void ovsdb_atom_to_string(const union ovsdb_atom *atom, enum ovsdb_atomic_type type, struct ds *out) { switch (type) { case OVSDB_TYPE_VOID: NOT_REACHED(); case OVSDB_TYPE_INTEGER: ds_put_format(out, "%"PRId64, atom->integer); break; case OVSDB_TYPE_REAL: ds_put_format(out, "%.*g", DBL_DIG, atom->real); break; case OVSDB_TYPE_BOOLEAN: ds_put_cstr(out, atom->boolean ? "true" : "false"); break; case OVSDB_TYPE_STRING: if (string_needs_quotes(atom->string)) { struct json json; json.type = JSON_STRING; json.u.string = atom->string; json_to_ds(&json, 0, out); } else { ds_put_cstr(out, atom->string); } break; case OVSDB_TYPE_UUID: ds_put_format(out, UUID_FMT, UUID_ARGS(&atom->uuid)); break; case OVSDB_N_TYPES: default: NOT_REACHED(); } } /* Appends 'atom' (which has the given 'type') to 'out', in a bare string * format that cannot be parsed uniformly back into a datum but is easier for * shell scripts, etc., to deal with. */ void ovsdb_atom_to_bare(const union ovsdb_atom *atom, enum ovsdb_atomic_type type, struct ds *out) { if (type == OVSDB_TYPE_STRING) { ds_put_cstr(out, atom->string); } else { ovsdb_atom_to_string(atom, type, out); } } static struct ovsdb_error * check_string_constraints(const char *s, const struct ovsdb_string_constraints *c) { size_t n_chars; char *msg; msg = utf8_validate(s, &n_chars); if (msg) { struct ovsdb_error *error; error = ovsdb_error("constraint violation", "not a valid UTF-8 string: %s", msg); free(msg); return error; } if (n_chars < c->minLen) { return ovsdb_error( "constraint violation", "\"%s\" length %zu is less than minimum allowed " "length %u", s, n_chars, c->minLen); } else if (n_chars > c->maxLen) { return ovsdb_error( "constraint violation", "\"%s\" length %zu is greater than maximum allowed " "length %u", s, n_chars, c->maxLen); } return NULL; } /* Checks whether 'atom' meets the constraints (if any) defined in 'base'. * (base->type must specify 'atom''s type.) Returns a null pointer if the * constraints are met, otherwise an error that explains the violation. * * Checking UUID constraints is deferred to transaction commit time, so this * function does nothing for UUID constraints. */ struct ovsdb_error * ovsdb_atom_check_constraints(const union ovsdb_atom *atom, const struct ovsdb_base_type *base) { if (base->enum_ && ovsdb_datum_find_key(base->enum_, atom, base->type) == UINT_MAX) { struct ovsdb_error *error; struct ds actual = DS_EMPTY_INITIALIZER; struct ds valid = DS_EMPTY_INITIALIZER; ovsdb_atom_to_string(atom, base->type, &actual); ovsdb_datum_to_string(base->enum_, ovsdb_base_type_get_enum_type(base->type), &valid); error = ovsdb_error("constraint violation", "%s is not one of the allowed values (%s)", ds_cstr(&actual), ds_cstr(&valid)); ds_destroy(&actual); ds_destroy(&valid); return error; } switch (base->type) { case OVSDB_TYPE_VOID: NOT_REACHED(); case OVSDB_TYPE_INTEGER: if (atom->integer >= base->u.integer.min && atom->integer <= base->u.integer.max) { return NULL; } else if (base->u.integer.min != INT64_MIN) { if (base->u.integer.max != INT64_MAX) { return ovsdb_error("constraint violation", "%"PRId64" is not in the valid range " "%"PRId64" to %"PRId64" (inclusive)", atom->integer, base->u.integer.min, base->u.integer.max); } else { return ovsdb_error("constraint violation", "%"PRId64" is less than minimum allowed " "value %"PRId64, atom->integer, base->u.integer.min); } } else { return ovsdb_error("constraint violation", "%"PRId64" is greater than maximum allowed " "value %"PRId64, atom->integer, base->u.integer.max); } NOT_REACHED(); case OVSDB_TYPE_REAL: if (atom->real >= base->u.real.min && atom->real <= base->u.real.max) { return NULL; } else if (base->u.real.min != -DBL_MAX) { if (base->u.real.max != DBL_MAX) { return ovsdb_error("constraint violation", "%.*g is not in the valid range " "%.*g to %.*g (inclusive)", DBL_DIG, atom->real, DBL_DIG, base->u.real.min, DBL_DIG, base->u.real.max); } else { return ovsdb_error("constraint violation", "%.*g is less than minimum allowed " "value %.*g", DBL_DIG, atom->real, DBL_DIG, base->u.real.min); } } else { return ovsdb_error("constraint violation", "%.*g is greater than maximum allowed " "value %.*g", DBL_DIG, atom->real, DBL_DIG, base->u.real.max); } NOT_REACHED(); case OVSDB_TYPE_BOOLEAN: return NULL; case OVSDB_TYPE_STRING: return check_string_constraints(atom->string, &base->u.string); case OVSDB_TYPE_UUID: return NULL; case OVSDB_N_TYPES: default: NOT_REACHED(); } } static union ovsdb_atom * alloc_default_atoms(enum ovsdb_atomic_type type, size_t n) { if (type != OVSDB_TYPE_VOID && n) { union ovsdb_atom *atoms; unsigned int i; atoms = xmalloc(n * sizeof *atoms); for (i = 0; i < n; i++) { ovsdb_atom_init_default(&atoms[i], type); } return atoms; } else { /* Avoid wasting memory in the n == 0 case, because xmalloc(0) is * treated as xmalloc(1). */ return NULL; } } /* Initializes 'datum' as an empty datum. (An empty datum can be treated as * any type.) */ void ovsdb_datum_init_empty(struct ovsdb_datum *datum) { datum->n = 0; datum->keys = NULL; datum->values = NULL; } /* Initializes 'datum' as a datum that has the default value for 'type'. * * The default value for a particular type is as defined in ovsdb/SPECS: * * - If n_min is 0, then the default value is the empty set (or map). * * - If n_min is 1, the default value is a single value or a single * key-value pair, whose key and value are the defaults for their * atomic types. (See ovsdb_atom_init_default() for details.) * * - n_min > 1 is invalid. See ovsdb_type_is_valid(). */ void ovsdb_datum_init_default(struct ovsdb_datum *datum, const struct ovsdb_type *type) { datum->n = type->n_min; datum->keys = alloc_default_atoms(type->key.type, datum->n); datum->values = alloc_default_atoms(type->value.type, datum->n); } /* Returns a read-only datum of the given 'type' that has the default value for * 'type'. The caller must not modify or free the returned datum. * * See ovsdb_datum_init_default() for an explanation of the default value of a * datum. */ const struct ovsdb_datum * ovsdb_datum_default(const struct ovsdb_type *type) { if (type->n_min == 0) { static const struct ovsdb_datum empty; return ∅ } else if (type->n_min == 1) { static struct ovsdb_datum default_data[OVSDB_N_TYPES][OVSDB_N_TYPES]; struct ovsdb_datum *d; int kt = type->key.type; int vt = type->value.type; ovs_assert(ovsdb_type_is_valid(type)); d = &default_data[kt][vt]; if (!d->n) { d->n = 1; d->keys = CONST_CAST(union ovsdb_atom *, ovsdb_atom_default(kt)); if (vt != OVSDB_TYPE_VOID) { d->values = CONST_CAST(union ovsdb_atom *, ovsdb_atom_default(vt)); } } return d; } else { NOT_REACHED(); } } /* Returns true if 'datum', which must have the given 'type', has the default * value for that type. * * See ovsdb_datum_init_default() for an explanation of the default value of a * datum. */ bool ovsdb_datum_is_default(const struct ovsdb_datum *datum, const struct ovsdb_type *type) { size_t i; if (datum->n != type->n_min) { return false; } for (i = 0; i < datum->n; i++) { if (!ovsdb_atom_is_default(&datum->keys[i], type->key.type)) { return false; } if (type->value.type != OVSDB_TYPE_VOID && !ovsdb_atom_is_default(&datum->values[i], type->value.type)) { return false; } } return true; } static union ovsdb_atom * clone_atoms(const union ovsdb_atom *old, enum ovsdb_atomic_type type, size_t n) { if (type != OVSDB_TYPE_VOID && n) { union ovsdb_atom *new; unsigned int i; new = xmalloc(n * sizeof *new); for (i = 0; i < n; i++) { ovsdb_atom_clone(&new[i], &old[i], type); } return new; } else { /* Avoid wasting memory in the n == 0 case, because xmalloc(0) is * treated as xmalloc(1). */ return NULL; } } /* Initializes 'new' as a copy of 'old', with the given 'type'. * * The caller must eventually arrange for 'new' to be destroyed (with * ovsdb_datum_destroy()). */ void ovsdb_datum_clone(struct ovsdb_datum *new, const struct ovsdb_datum *old, const struct ovsdb_type *type) { unsigned int n = old->n; new->n = n; new->keys = clone_atoms(old->keys, type->key.type, n); new->values = clone_atoms(old->values, type->value.type, n); } static void free_data(enum ovsdb_atomic_type type, union ovsdb_atom *atoms, size_t n_atoms) { if (ovsdb_atom_needs_destruction(type)) { unsigned int i; for (i = 0; i < n_atoms; i++) { ovsdb_atom_destroy(&atoms[i], type); } } free(atoms); } /* Frees the data owned by 'datum', which must have the given 'type'. * * This does not actually call free(datum). If necessary, the caller must be * responsible for that. */ void ovsdb_datum_destroy(struct ovsdb_datum *datum, const struct ovsdb_type *type) { free_data(type->key.type, datum->keys, datum->n); free_data(type->value.type, datum->values, datum->n); } /* Swaps the contents of 'a' and 'b', which need not have the same type. */ void ovsdb_datum_swap(struct ovsdb_datum *a, struct ovsdb_datum *b) { struct ovsdb_datum tmp = *a; *a = *b; *b = tmp; } struct ovsdb_datum_sort_cbdata { enum ovsdb_atomic_type key_type; enum ovsdb_atomic_type value_type; struct ovsdb_datum *datum; }; static int ovsdb_datum_sort_compare_cb(size_t a, size_t b, void *cbdata_) { struct ovsdb_datum_sort_cbdata *cbdata = cbdata_; int retval; retval = ovsdb_atom_compare_3way(&cbdata->datum->keys[a], &cbdata->datum->keys[b], cbdata->key_type); if (retval || cbdata->value_type == OVSDB_TYPE_VOID) { return retval; } return ovsdb_atom_compare_3way(&cbdata->datum->values[a], &cbdata->datum->values[b], cbdata->value_type); } static void ovsdb_datum_sort_swap_cb(size_t a, size_t b, void *cbdata_) { struct ovsdb_datum_sort_cbdata *cbdata = cbdata_; ovsdb_atom_swap(&cbdata->datum->keys[a], &cbdata->datum->keys[b]); if (cbdata->datum->values) { ovsdb_atom_swap(&cbdata->datum->values[a], &cbdata->datum->values[b]); } } static void ovsdb_datum_sort__(struct ovsdb_datum *datum, enum ovsdb_atomic_type key_type, enum ovsdb_atomic_type value_type) { struct ovsdb_datum_sort_cbdata cbdata; cbdata.key_type = key_type; cbdata.value_type = value_type; cbdata.datum = datum; sort(datum->n, ovsdb_datum_sort_compare_cb, ovsdb_datum_sort_swap_cb, &cbdata); } /* The keys in an ovsdb_datum must be unique and in sorted order. Most * functions that modify an ovsdb_datum maintain these invariants. For those * that don't, this function checks and restores these invariants for 'datum', * whose keys are of type 'key_type'. * * This function returns NULL if successful, otherwise an error message. The * caller must free the returned error when it is no longer needed. On error, * 'datum' is sorted but not unique. */ struct ovsdb_error * ovsdb_datum_sort(struct ovsdb_datum *datum, enum ovsdb_atomic_type key_type) { size_t i; if (datum->n < 2) { return NULL; } ovsdb_datum_sort__(datum, key_type, OVSDB_TYPE_VOID); for (i = 0; i < datum->n - 1; i++) { if (ovsdb_atom_equals(&datum->keys[i], &datum->keys[i + 1], key_type)) { if (datum->values) { return ovsdb_error(NULL, "map contains duplicate key"); } else { return ovsdb_error(NULL, "set contains duplicate"); } } } return NULL; } /* This function is the same as ovsdb_datum_sort(), except that the caller * knows that 'datum' is unique. The operation therefore "cannot fail", so * this function assert-fails if it actually does. */ void ovsdb_datum_sort_assert(struct ovsdb_datum *datum, enum ovsdb_atomic_type key_type) { struct ovsdb_error *error = ovsdb_datum_sort(datum, key_type); if (error) { NOT_REACHED(); } } /* This is similar to ovsdb_datum_sort(), except that it drops duplicate keys * instead of reporting an error. In a map type, the smallest value among a * group of duplicate pairs is retained and the others are dropped. * * Returns the number of keys (or pairs) that were dropped. */ size_t ovsdb_datum_sort_unique(struct ovsdb_datum *datum, enum ovsdb_atomic_type key_type, enum ovsdb_atomic_type value_type) { size_t src, dst; if (datum->n < 2) { return 0; } ovsdb_datum_sort__(datum, key_type, value_type); dst = 1; for (src = 1; src < datum->n; src++) { if (ovsdb_atom_equals(&datum->keys[src], &datum->keys[dst - 1], key_type)) { ovsdb_atom_destroy(&datum->keys[src], key_type); if (value_type != OVSDB_TYPE_VOID) { ovsdb_atom_destroy(&datum->values[src], value_type); } } else { if (src != dst) { datum->keys[dst] = datum->keys[src]; if (value_type != OVSDB_TYPE_VOID) { datum->values[dst] = datum->values[src]; } } dst++; } } datum->n = dst; return datum->n - src; } /* Checks that each of the atoms in 'datum' conforms to the constraints * specified by its 'type'. Returns an error if a constraint is violated, * otherwise a null pointer. * * This function is not commonly useful because the most ordinary way to obtain * a datum is ultimately via ovsdb_atom_from_string() or * ovsdb_atom_from_json(), which check constraints themselves. */ struct ovsdb_error * ovsdb_datum_check_constraints(const struct ovsdb_datum *datum, const struct ovsdb_type *type) { struct ovsdb_error *error; unsigned int i; for (i = 0; i < datum->n; i++) { error = ovsdb_atom_check_constraints(&datum->keys[i], &type->key); if (error) { return error; } } if (type->value.type != OVSDB_TYPE_VOID) { for (i = 0; i < datum->n; i++) { error = ovsdb_atom_check_constraints(&datum->values[i], &type->value); if (error) { return error; } } } return NULL; } static struct ovsdb_error * ovsdb_datum_from_json__(struct ovsdb_datum *datum, const struct ovsdb_type *type, const struct json *json, struct ovsdb_symbol_table *symtab) { struct ovsdb_error *error; if (ovsdb_type_is_map(type) || (json->type == JSON_ARRAY && json->u.array.n > 0 && json->u.array.elems[0]->type == JSON_STRING && !strcmp(json->u.array.elems[0]->u.string, "set"))) { bool is_map = ovsdb_type_is_map(type); const char *class = is_map ? "map" : "set"; const struct json *inner; unsigned int i; size_t n; error = unwrap_json(json, class, JSON_ARRAY, &inner); if (error) { return error; } n = inner->u.array.n; if (n < type->n_min || n > type->n_max) { return ovsdb_syntax_error(json, NULL, "%s must have %u to " "%u members but %zu are present", class, type->n_min, type->n_max, n); } datum->n = 0; datum->keys = xmalloc(n * sizeof *datum->keys); datum->values = is_map ? xmalloc(n * sizeof *datum->values) : NULL; for (i = 0; i < n; i++) { const struct json *element = inner->u.array.elems[i]; const struct json *key = NULL; const struct json *value = NULL; if (!is_map) { key = element; } else { error = parse_json_pair(element, &key, &value); if (error) { goto error; } } error = ovsdb_atom_from_json(&datum->keys[i], &type->key, key, symtab); if (error) { goto error; } if (is_map) { error = ovsdb_atom_from_json(&datum->values[i], &type->value, value, symtab); if (error) { ovsdb_atom_destroy(&datum->keys[i], type->key.type); goto error; } } datum->n++; } return NULL; error: ovsdb_datum_destroy(datum, type); return error; } else { datum->n = 1; datum->keys = xmalloc(sizeof *datum->keys); datum->values = NULL; error = ovsdb_atom_from_json(&datum->keys[0], &type->key, json, symtab); if (error) { free(datum->keys); } return error; } } /* Parses 'json' as a datum of the type described by 'type'. If successful, * returns NULL and initializes 'datum' with the parsed datum. On failure, * returns an error and the contents of 'datum' are indeterminate. The caller * is responsible for freeing the error or the datum that is returned. * * Violations of constraints expressed by 'type' are treated as errors. * * If 'symtab' is nonnull, then named UUIDs in 'symtab' are accepted. Refer to * ovsdb/SPECS for information about this, and for the syntax that this * function accepts. */ struct ovsdb_error * ovsdb_datum_from_json(struct ovsdb_datum *datum, const struct ovsdb_type *type, const struct json *json, struct ovsdb_symbol_table *symtab) { struct ovsdb_error *error; error = ovsdb_datum_from_json__(datum, type, json, symtab); if (error) { return error; } error = ovsdb_datum_sort(datum, type->key.type); if (error) { ovsdb_datum_destroy(datum, type); } return error; } /* Converts 'datum', of the specified 'type', to JSON format, and returns the * JSON. The caller is responsible for freeing the returned JSON. * * 'type' constraints on datum->n are ignored. * * Refer to ovsdb/SPECS for the format of the JSON that this function * produces. */ struct json * ovsdb_datum_to_json(const struct ovsdb_datum *datum, const struct ovsdb_type *type) { if (ovsdb_type_is_map(type)) { struct json **elems; size_t i; elems = xmalloc(datum->n * sizeof *elems); for (i = 0; i < datum->n; i++) { elems[i] = json_array_create_2( ovsdb_atom_to_json(&datum->keys[i], type->key.type), ovsdb_atom_to_json(&datum->values[i], type->value.type)); } return wrap_json("map", json_array_create(elems, datum->n)); } else if (datum->n == 1) { return ovsdb_atom_to_json(&datum->keys[0], type->key.type); } else { struct json **elems; size_t i; elems = xmalloc(datum->n * sizeof *elems); for (i = 0; i < datum->n; i++) { elems[i] = ovsdb_atom_to_json(&datum->keys[i], type->key.type); } return wrap_json("set", json_array_create(elems, datum->n)); } } /* Returns strlen(json_to_string(ovsdb_datum_to_json(datum, type), 0)). */ size_t ovsdb_datum_json_length(const struct ovsdb_datum *datum, const struct ovsdb_type *type) { if (ovsdb_type_is_map(type)) { size_t length; /* ["map",[...]]. */ length = 10; if (datum->n > 0) { size_t i; /* Commas between pairs in the inner [...] */ length += datum->n - 1; /* [,] in each pair. */ length += datum->n * 3; /* Data. */ for (i = 0; i < datum->n; i++) { length += ovsdb_atom_json_length(&datum->keys[i], type->key.type); length += ovsdb_atom_json_length(&datum->values[i], type->value.type); } } return length; } else if (datum->n == 1) { return ovsdb_atom_json_length(&datum->keys[0], type->key.type); } else { size_t length; size_t i; /* ["set",[...]]. */ length = 10; if (datum->n > 0) { /* Commas between elements in the inner [...]. */ length += datum->n - 1; /* Data. */ for (i = 0; i < datum->n; i++) { length += ovsdb_atom_json_length(&datum->keys[i], type->key.type); } } return length; } } static const char * skip_spaces(const char *p) { while (isspace((unsigned char) *p)) { p++; } return p; } static char * parse_atom_token(const char **s, const struct ovsdb_base_type *base, union ovsdb_atom *atom, struct ovsdb_symbol_table *symtab) { char *token, *error; error = ovsdb_token_parse(s, &token); if (!error) { error = ovsdb_atom_from_string(atom, base, token, symtab); free(token); } return error; } static char * parse_key_value(const char **s, const struct ovsdb_type *type, union ovsdb_atom *key, union ovsdb_atom *value, struct ovsdb_symbol_table *symtab) { const char *start = *s; char *error; error = parse_atom_token(s, &type->key, key, symtab); if (!error && type->value.type != OVSDB_TYPE_VOID) { *s = skip_spaces(*s); if (**s == '=') { (*s)++; *s = skip_spaces(*s); error = parse_atom_token(s, &type->value, value, symtab); } else { error = xasprintf("%s: syntax error at \"%c\" expecting \"=\"", start, **s); } if (error) { ovsdb_atom_destroy(key, type->key.type); } } return error; } static void free_key_value(const struct ovsdb_type *type, union ovsdb_atom *key, union ovsdb_atom *value) { ovsdb_atom_destroy(key, type->key.type); if (type->value.type != OVSDB_TYPE_VOID) { ovsdb_atom_destroy(value, type->value.type); } } /* Initializes 'datum' as a datum of the given 'type', parsing its contents * from 's'. The format of 's' is a series of space or comma separated atoms * or, for a map, '='-delimited pairs of atoms. Each atom must in a format * acceptable to ovsdb_atom_from_string(). Optionally, a set may be enclosed * in "[]" or a map in "{}"; for an empty set or map these punctuators are * required. * * Optionally, a symbol table may be supplied as 'symtab'. It is passed to * ovsdb_atom_to_string(). */ char * ovsdb_datum_from_string(struct ovsdb_datum *datum, const struct ovsdb_type *type, const char *s, struct ovsdb_symbol_table *symtab) { bool is_map = ovsdb_type_is_map(type); struct ovsdb_error *dberror; const char *p; int end_delim; char *error; ovsdb_datum_init_empty(datum); /* Swallow a leading delimiter if there is one. */ p = skip_spaces(s); if (*p == (is_map ? '{' : '[')) { end_delim = is_map ? '}' : ']'; p = skip_spaces(p + 1); } else if (!*p) { if (is_map) { return xstrdup("use \"{}\" to specify the empty map"); } else { return xstrdup("use \"[]\" to specify the empty set"); } } else { end_delim = 0; } while (*p && *p != end_delim) { union ovsdb_atom key, value; if (ovsdb_token_is_delim(*p)) { char *type_str = ovsdb_type_to_english(type); error = xasprintf("%s: unexpected \"%c\" parsing %s", s, *p, type_str); free(type_str); goto error; } /* Add to datum. */ error = parse_key_value(&p, type, &key, &value, symtab); if (error) { goto error; } ovsdb_datum_add_unsafe(datum, &key, &value, type); free_key_value(type, &key, &value); /* Skip optional white space and comma. */ p = skip_spaces(p); if (*p == ',') { p = skip_spaces(p + 1); } } if (*p != end_delim) { error = xasprintf("%s: missing \"%c\" at end of data", s, end_delim); goto error; } if (end_delim) { p = skip_spaces(p + 1); if (*p) { error = xasprintf("%s: trailing garbage after \"%c\"", s, end_delim); goto error; } } if (datum->n < type->n_min) { error = xasprintf("%s: %u %s specified but the minimum number is %u", s, datum->n, is_map ? "pair(s)" : "value(s)", type->n_min); goto error; } else if (datum->n > type->n_max) { error = xasprintf("%s: %u %s specified but the maximum number is %u", s, datum->n, is_map ? "pair(s)" : "value(s)", type->n_max); goto error; } dberror = ovsdb_datum_sort(datum, type->key.type); if (dberror) { ovsdb_error_destroy(dberror); if (ovsdb_type_is_map(type)) { error = xasprintf("%s: map contains duplicate key", s); } else { error = xasprintf("%s: set contains duplicate value", s); } goto error; } return NULL; error: ovsdb_datum_destroy(datum, type); ovsdb_datum_init_empty(datum); return error; } /* Appends to 'out' the 'datum' (with the given 'type') in a format acceptable * to ovsdb_datum_from_string(). */ void ovsdb_datum_to_string(const struct ovsdb_datum *datum, const struct ovsdb_type *type, struct ds *out) { bool is_map = ovsdb_type_is_map(type); size_t i; if (type->n_max > 1 || !datum->n) { ds_put_char(out, is_map ? '{' : '['); } for (i = 0; i < datum->n; i++) { if (i > 0) { ds_put_cstr(out, ", "); } ovsdb_atom_to_string(&datum->keys[i], type->key.type, out); if (is_map) { ds_put_char(out, '='); ovsdb_atom_to_string(&datum->values[i], type->value.type, out); } } if (type->n_max > 1 || !datum->n) { ds_put_char(out, is_map ? '}' : ']'); } } /* Appends to 'out' the 'datum' (with the given 'type') in a bare string format * that cannot be parsed uniformly back into a datum but is easier for shell * scripts, etc., to deal with. */ void ovsdb_datum_to_bare(const struct ovsdb_datum *datum, const struct ovsdb_type *type, struct ds *out) { bool is_map = ovsdb_type_is_map(type); size_t i; for (i = 0; i < datum->n; i++) { if (i > 0) { ds_put_cstr(out, " "); } ovsdb_atom_to_bare(&datum->keys[i], type->key.type, out); if (is_map) { ds_put_char(out, '='); ovsdb_atom_to_bare(&datum->values[i], type->value.type, out); } } } /* Initializes 'datum' as a string-to-string map whose contents are taken from * 'smap'. Destroys 'smap'. */ void ovsdb_datum_from_smap(struct ovsdb_datum *datum, struct smap *smap) { struct smap_node *node, *next; size_t i; datum->n = smap_count(smap); datum->keys = xmalloc(datum->n * sizeof *datum->keys); datum->values = xmalloc(datum->n * sizeof *datum->values); i = 0; SMAP_FOR_EACH_SAFE (node, next, smap) { smap_steal(smap, node, &datum->keys[i].string, &datum->values[i].string); i++; } ovs_assert(i == datum->n); smap_destroy(smap); ovsdb_datum_sort_unique(datum, OVSDB_TYPE_STRING, OVSDB_TYPE_STRING); } static uint32_t hash_atoms(enum ovsdb_atomic_type type, const union ovsdb_atom *atoms, unsigned int n, uint32_t basis) { if (type != OVSDB_TYPE_VOID) { unsigned int i; for (i = 0; i < n; i++) { basis = ovsdb_atom_hash(&atoms[i], type, basis); } } return basis; } uint32_t ovsdb_datum_hash(const struct ovsdb_datum *datum, const struct ovsdb_type *type, uint32_t basis) { basis = hash_atoms(type->key.type, datum->keys, datum->n, basis); basis ^= (type->key.type << 24) | (type->value.type << 16) | datum->n; basis = hash_atoms(type->value.type, datum->values, datum->n, basis); return basis; } static int atom_arrays_compare_3way(const union ovsdb_atom *a, const union ovsdb_atom *b, enum ovsdb_atomic_type type, size_t n) { unsigned int i; for (i = 0; i < n; i++) { int cmp = ovsdb_atom_compare_3way(&a[i], &b[i], type); if (cmp) { return cmp; } } return 0; } bool ovsdb_datum_equals(const struct ovsdb_datum *a, const struct ovsdb_datum *b, const struct ovsdb_type *type) { return !ovsdb_datum_compare_3way(a, b, type); } int ovsdb_datum_compare_3way(const struct ovsdb_datum *a, const struct ovsdb_datum *b, const struct ovsdb_type *type) { int cmp; if (a->n != b->n) { return a->n < b->n ? -1 : 1; } cmp = atom_arrays_compare_3way(a->keys, b->keys, type->key.type, a->n); if (cmp) { return cmp; } return (type->value.type == OVSDB_TYPE_VOID ? 0 : atom_arrays_compare_3way(a->values, b->values, type->value.type, a->n)); } /* If 'key' is one of the keys in 'datum', returns its index within 'datum', * otherwise UINT_MAX. 'key.type' must be the type of the atoms stored in the * 'keys' array in 'datum'. */ unsigned int ovsdb_datum_find_key(const struct ovsdb_datum *datum, const union ovsdb_atom *key, enum ovsdb_atomic_type key_type) { unsigned int low = 0; unsigned int high = datum->n; while (low < high) { unsigned int idx = (low + high) / 2; int cmp = ovsdb_atom_compare_3way(key, &datum->keys[idx], key_type); if (cmp < 0) { high = idx; } else if (cmp > 0) { low = idx + 1; } else { return idx; } } return UINT_MAX; } /* If 'key' and 'value' is one of the key-value pairs in 'datum', returns its * index within 'datum', otherwise UINT_MAX. 'key.type' must be the type of * the atoms stored in the 'keys' array in 'datum'. 'value_type' may be the * type of the 'values' atoms or OVSDB_TYPE_VOID to compare only keys. */ unsigned int ovsdb_datum_find_key_value(const struct ovsdb_datum *datum, const union ovsdb_atom *key, enum ovsdb_atomic_type key_type, const union ovsdb_atom *value, enum ovsdb_atomic_type value_type) { unsigned int idx = ovsdb_datum_find_key(datum, key, key_type); if (idx != UINT_MAX && value_type != OVSDB_TYPE_VOID && !ovsdb_atom_equals(&datum->values[idx], value, value_type)) { idx = UINT_MAX; } return idx; } /* If atom 'i' in 'a' is also in 'b', returns its index in 'b', otherwise * UINT_MAX. 'type' must be the type of 'a' and 'b', except that * type->value.type may be set to OVSDB_TYPE_VOID to compare keys but not * values. */ static unsigned int ovsdb_datum_find(const struct ovsdb_datum *a, int i, const struct ovsdb_datum *b, const struct ovsdb_type *type) { return ovsdb_datum_find_key_value(b, &a->keys[i], type->key.type, a->values ? &a->values[i] : NULL, type->value.type); } /* Returns true if every element in 'a' is also in 'b', false otherwise. */ bool ovsdb_datum_includes_all(const struct ovsdb_datum *a, const struct ovsdb_datum *b, const struct ovsdb_type *type) { size_t i; if (a->n > b->n) { return false; } for (i = 0; i < a->n; i++) { if (ovsdb_datum_find(a, i, b, type) == UINT_MAX) { return false; } } return true; } /* Returns true if no element in 'a' is also in 'b', false otherwise. */ bool ovsdb_datum_excludes_all(const struct ovsdb_datum *a, const struct ovsdb_datum *b, const struct ovsdb_type *type) { size_t i; for (i = 0; i < a->n; i++) { if (ovsdb_datum_find(a, i, b, type) != UINT_MAX) { return false; } } return true; } static void ovsdb_datum_reallocate(struct ovsdb_datum *a, const struct ovsdb_type *type, unsigned int capacity) { a->keys = xrealloc(a->keys, capacity * sizeof *a->keys); if (type->value.type != OVSDB_TYPE_VOID) { a->values = xrealloc(a->values, capacity * sizeof *a->values); } } /* Removes the element with index 'idx' from 'datum', which has type 'type'. * If 'idx' is not the last element in 'datum', then the removed element is * replaced by the (former) last element. * * This function does not maintain ovsdb_datum invariants. Use * ovsdb_datum_sort() to check and restore these invariants. */ void ovsdb_datum_remove_unsafe(struct ovsdb_datum *datum, size_t idx, const struct ovsdb_type *type) { ovsdb_atom_destroy(&datum->keys[idx], type->key.type); datum->keys[idx] = datum->keys[datum->n - 1]; if (type->value.type != OVSDB_TYPE_VOID) { ovsdb_atom_destroy(&datum->values[idx], type->value.type); datum->values[idx] = datum->values[datum->n - 1]; } datum->n--; } /* Adds the element with the given 'key' and 'value' to 'datum', which must * have the specified 'type'. * * This function always allocates memory, so it is not an efficient way to add * a number of elements to a datum. * * This function does not maintain ovsdb_datum invariants. Use * ovsdb_datum_sort() to check and restore these invariants. (But a datum with * 0 or 1 elements cannot violate the invariants anyhow.) */ void ovsdb_datum_add_unsafe(struct ovsdb_datum *datum, const union ovsdb_atom *key, const union ovsdb_atom *value, const struct ovsdb_type *type) { size_t idx = datum->n++; datum->keys = xrealloc(datum->keys, datum->n * sizeof *datum->keys); ovsdb_atom_clone(&datum->keys[idx], key, type->key.type); if (type->value.type != OVSDB_TYPE_VOID) { datum->values = xrealloc(datum->values, datum->n * sizeof *datum->values); ovsdb_atom_clone(&datum->values[idx], value, type->value.type); } } void ovsdb_datum_union(struct ovsdb_datum *a, const struct ovsdb_datum *b, const struct ovsdb_type *type, bool replace) { unsigned int n; size_t bi; n = a->n; for (bi = 0; bi < b->n; bi++) { unsigned int ai; ai = ovsdb_datum_find_key(a, &b->keys[bi], type->key.type); if (ai == UINT_MAX) { if (n == a->n) { ovsdb_datum_reallocate(a, type, a->n + (b->n - bi)); } ovsdb_atom_clone(&a->keys[n], &b->keys[bi], type->key.type); if (type->value.type != OVSDB_TYPE_VOID) { ovsdb_atom_clone(&a->values[n], &b->values[bi], type->value.type); } n++; } else if (replace && type->value.type != OVSDB_TYPE_VOID) { ovsdb_atom_destroy(&a->values[ai], type->value.type); ovsdb_atom_clone(&a->values[ai], &b->values[bi], type->value.type); } } if (n != a->n) { struct ovsdb_error *error; a->n = n; error = ovsdb_datum_sort(a, type->key.type); ovs_assert(!error); } } void ovsdb_datum_subtract(struct ovsdb_datum *a, const struct ovsdb_type *a_type, const struct ovsdb_datum *b, const struct ovsdb_type *b_type) { bool changed = false; size_t i; ovs_assert(a_type->key.type == b_type->key.type); ovs_assert(a_type->value.type == b_type->value.type || b_type->value.type == OVSDB_TYPE_VOID); /* XXX The big-O of this could easily be improved. */ for (i = 0; i < a->n; ) { unsigned int idx = ovsdb_datum_find(a, i, b, b_type); if (idx != UINT_MAX) { changed = true; ovsdb_datum_remove_unsafe(a, i, a_type); } else { i++; } } if (changed) { ovsdb_datum_sort_assert(a, a_type->key.type); } } struct ovsdb_symbol_table * ovsdb_symbol_table_create(void) { struct ovsdb_symbol_table *symtab = xmalloc(sizeof *symtab); shash_init(&symtab->sh); return symtab; } void ovsdb_symbol_table_destroy(struct ovsdb_symbol_table *symtab) { if (symtab) { shash_destroy_free_data(&symtab->sh); free(symtab); } } struct ovsdb_symbol * ovsdb_symbol_table_get(const struct ovsdb_symbol_table *symtab, const char *name) { return shash_find_data(&symtab->sh, name); } struct ovsdb_symbol * ovsdb_symbol_table_put(struct ovsdb_symbol_table *symtab, const char *name, const struct uuid *uuid, bool created) { struct ovsdb_symbol *symbol; ovs_assert(!ovsdb_symbol_table_get(symtab, name)); symbol = xmalloc(sizeof *symbol); symbol->uuid = *uuid; symbol->created = created; symbol->strong_ref = false; symbol->weak_ref = false; shash_add(&symtab->sh, name, symbol); return symbol; } struct ovsdb_symbol * ovsdb_symbol_table_insert(struct ovsdb_symbol_table *symtab, const char *name) { struct ovsdb_symbol *symbol; symbol = ovsdb_symbol_table_get(symtab, name); if (!symbol) { struct uuid uuid; uuid_generate(&uuid); symbol = ovsdb_symbol_table_put(symtab, name, &uuid, false); } return symbol; } /* Extracts a token from the beginning of 's' and returns a pointer just after * the token. Stores the token itself into '*outp', which the caller is * responsible for freeing (with free()). * * If 's[0]' is a delimiter, the returned token is the empty string. * * A token extends from 's' to the first delimiter, as defined by * ovsdb_token_is_delim(), or until the end of the string. A delimiter can be * escaped with a backslash, in which case the backslash does not appear in the * output. Double quotes also cause delimiters to be ignored, but the double * quotes are retained in the output. (Backslashes inside double quotes are * not removed, either.) */ char * ovsdb_token_parse(const char **s, char **outp) { const char *p; struct ds out; bool in_quotes; char *error; ds_init(&out); in_quotes = false; for (p = *s; *p != '\0'; ) { int c = *p++; if (c == '\\') { if (in_quotes) { ds_put_char(&out, '\\'); } if (!*p) { error = xasprintf("%s: backslash at end of argument", *s); goto error; } ds_put_char(&out, *p++); } else if (!in_quotes && ovsdb_token_is_delim(c)) { p--; break; } else { ds_put_char(&out, c); if (c == '"') { in_quotes = !in_quotes; } } } if (in_quotes) { error = xasprintf("%s: quoted string extends past end of argument", *s); goto error; } *outp = ds_cstr(&out); *s = p; return NULL; error: ds_destroy(&out); *outp = NULL; return error; } /* Returns true if 'c' delimits tokens, or if 'c' is 0, and false otherwise. */ bool ovsdb_token_is_delim(unsigned char c) { return strchr(":=, []{}!<>", c) != NULL; } openvswitch-2.0.1+git20140120/lib/ovsdb-data.h000066400000000000000000000260151226605124000204210ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_DATA_H #define OVSDB_DATA_H 1 #include #include "compiler.h" #include "ovsdb-types.h" #include "shash.h" struct ds; struct ovsdb_symbol_table; struct smap; /* One value of an atomic type (given by enum ovs_atomic_type). */ union ovsdb_atom { int64_t integer; double real; bool boolean; char *string; struct uuid uuid; }; void ovsdb_atom_init_default(union ovsdb_atom *, enum ovsdb_atomic_type); const union ovsdb_atom *ovsdb_atom_default(enum ovsdb_atomic_type); bool ovsdb_atom_is_default(const union ovsdb_atom *, enum ovsdb_atomic_type); void ovsdb_atom_clone(union ovsdb_atom *, const union ovsdb_atom *, enum ovsdb_atomic_type); void ovsdb_atom_swap(union ovsdb_atom *, union ovsdb_atom *); /* Returns false if ovsdb_atom_destroy() is a no-op when it is applied to an * initialized atom of the given 'type', true if ovsdb_atom_destroy() actually * does something. * * This can be used to avoid calling ovsdb_atom_destroy() for each element in * an array of homogeneous atoms. (It's not worthwhile for a single atom.) */ static inline bool ovsdb_atom_needs_destruction(enum ovsdb_atomic_type type) { return type == OVSDB_TYPE_STRING; } /* Frees the contents of 'atom', which must have the specified 'type'. * * This does not actually call free(atom). If necessary, the caller must be * responsible for that. */ static inline void ovsdb_atom_destroy(union ovsdb_atom *atom, enum ovsdb_atomic_type type) { if (type == OVSDB_TYPE_STRING) { free(atom->string); } } uint32_t ovsdb_atom_hash(const union ovsdb_atom *, enum ovsdb_atomic_type, uint32_t basis); int ovsdb_atom_compare_3way(const union ovsdb_atom *, const union ovsdb_atom *, enum ovsdb_atomic_type); /* Returns true if 'a' and 'b', which are both of type 'type', has the same * contents, false if their contents differ. */ static inline bool ovsdb_atom_equals(const union ovsdb_atom *a, const union ovsdb_atom *b, enum ovsdb_atomic_type type) { return !ovsdb_atom_compare_3way(a, b, type); } struct ovsdb_error *ovsdb_atom_from_json(union ovsdb_atom *, const struct ovsdb_base_type *, const struct json *, struct ovsdb_symbol_table *) WARN_UNUSED_RESULT; struct json *ovsdb_atom_to_json(const union ovsdb_atom *, enum ovsdb_atomic_type); size_t ovsdb_atom_json_length(const union ovsdb_atom *, enum ovsdb_atomic_type); char *ovsdb_atom_from_string(union ovsdb_atom *, const struct ovsdb_base_type *, const char *, struct ovsdb_symbol_table *) WARN_UNUSED_RESULT; void ovsdb_atom_to_string(const union ovsdb_atom *, enum ovsdb_atomic_type, struct ds *); void ovsdb_atom_to_bare(const union ovsdb_atom *, enum ovsdb_atomic_type, struct ds *); struct ovsdb_error *ovsdb_atom_check_constraints( const union ovsdb_atom *, const struct ovsdb_base_type *) WARN_UNUSED_RESULT; /* An instance of an OVSDB type (given by struct ovsdb_type). * * - The 'keys' must be unique and in sorted order. Most functions that modify * an ovsdb_datum maintain these invariants. Functions that don't maintain * the invariants have names that end in "_unsafe". Use ovsdb_datum_sort() * to check and restore these invariants. * * - 'n' is constrained by the ovsdb_type's 'n_min' and 'n_max'. * * If 'n' is nonzero, then 'keys' points to an array of 'n' atoms of the type * specified by the ovsdb_type's 'key_type'. (Otherwise, 'keys' should be * null.) * * If 'n' is nonzero and the ovsdb_type's 'value_type' is not * OVSDB_TYPE_VOID, then 'values' points to an array of 'n' atoms of the type * specified by the 'value_type'. (Otherwise, 'values' should be null.) * * Thus, for 'n' > 0, 'keys' will always be nonnull and 'values' will be * nonnull only for "map" types. */ struct ovsdb_datum { unsigned int n; /* Number of 'keys' and 'values'. */ union ovsdb_atom *keys; /* Each of the ovsdb_type's 'key_type'. */ union ovsdb_atom *values; /* Each of the ovsdb_type's 'value_type'. */ }; /* Basics. */ void ovsdb_datum_init_empty(struct ovsdb_datum *); void ovsdb_datum_init_default(struct ovsdb_datum *, const struct ovsdb_type *); bool ovsdb_datum_is_default(const struct ovsdb_datum *, const struct ovsdb_type *); const struct ovsdb_datum *ovsdb_datum_default(const struct ovsdb_type *); void ovsdb_datum_clone(struct ovsdb_datum *, const struct ovsdb_datum *, const struct ovsdb_type *); void ovsdb_datum_destroy(struct ovsdb_datum *, const struct ovsdb_type *); void ovsdb_datum_swap(struct ovsdb_datum *, struct ovsdb_datum *); /* Checking and maintaining invariants. */ struct ovsdb_error *ovsdb_datum_sort(struct ovsdb_datum *, enum ovsdb_atomic_type key_type) WARN_UNUSED_RESULT; void ovsdb_datum_sort_assert(struct ovsdb_datum *, enum ovsdb_atomic_type key_type); size_t ovsdb_datum_sort_unique(struct ovsdb_datum *, enum ovsdb_atomic_type key_type, enum ovsdb_atomic_type value_type); struct ovsdb_error *ovsdb_datum_check_constraints( const struct ovsdb_datum *, const struct ovsdb_type *) WARN_UNUSED_RESULT; /* Type conversion. */ struct ovsdb_error *ovsdb_datum_from_json(struct ovsdb_datum *, const struct ovsdb_type *, const struct json *, struct ovsdb_symbol_table *) WARN_UNUSED_RESULT; struct json *ovsdb_datum_to_json(const struct ovsdb_datum *, const struct ovsdb_type *); size_t ovsdb_datum_json_length(const struct ovsdb_datum *, const struct ovsdb_type *); char *ovsdb_datum_from_string(struct ovsdb_datum *, const struct ovsdb_type *, const char *, struct ovsdb_symbol_table *) WARN_UNUSED_RESULT; void ovsdb_datum_to_string(const struct ovsdb_datum *, const struct ovsdb_type *, struct ds *); void ovsdb_datum_to_bare(const struct ovsdb_datum *, const struct ovsdb_type *, struct ds *); void ovsdb_datum_from_smap(struct ovsdb_datum *, struct smap *); /* Comparison. */ uint32_t ovsdb_datum_hash(const struct ovsdb_datum *, const struct ovsdb_type *, uint32_t basis); int ovsdb_datum_compare_3way(const struct ovsdb_datum *, const struct ovsdb_datum *, const struct ovsdb_type *); bool ovsdb_datum_equals(const struct ovsdb_datum *, const struct ovsdb_datum *, const struct ovsdb_type *); /* Search. */ unsigned int ovsdb_datum_find_key(const struct ovsdb_datum *, const union ovsdb_atom *key, enum ovsdb_atomic_type key_type); unsigned int ovsdb_datum_find_key_value(const struct ovsdb_datum *, const union ovsdb_atom *key, enum ovsdb_atomic_type key_type, const union ovsdb_atom *value, enum ovsdb_atomic_type value_type); /* Set operations. */ bool ovsdb_datum_includes_all(const struct ovsdb_datum *, const struct ovsdb_datum *, const struct ovsdb_type *); bool ovsdb_datum_excludes_all(const struct ovsdb_datum *, const struct ovsdb_datum *, const struct ovsdb_type *); void ovsdb_datum_union(struct ovsdb_datum *, const struct ovsdb_datum *, const struct ovsdb_type *, bool replace); void ovsdb_datum_subtract(struct ovsdb_datum *a, const struct ovsdb_type *a_type, const struct ovsdb_datum *b, const struct ovsdb_type *b_type); /* Raw operations that may not maintain the invariants. */ void ovsdb_datum_remove_unsafe(struct ovsdb_datum *, size_t idx, const struct ovsdb_type *); void ovsdb_datum_add_unsafe(struct ovsdb_datum *, const union ovsdb_atom *key, const union ovsdb_atom *value, const struct ovsdb_type *); /* Type checking. */ static inline bool ovsdb_datum_conforms_to_type(const struct ovsdb_datum *datum, const struct ovsdb_type *type) { return datum->n >= type->n_min && datum->n <= type->n_max; } /* A table mapping from names to data items. Currently the data items are * always UUIDs; perhaps this will be expanded in the future. */ struct ovsdb_symbol_table { struct shash sh; /* Maps from name to struct ovsdb_symbol *. */ }; struct ovsdb_symbol { struct uuid uuid; /* The UUID that the symbol represents. */ bool created; /* Already used to create row? */ bool strong_ref; /* Parsed a strong reference to this row? */ bool weak_ref; /* Parsed a weak reference to this row? */ }; struct ovsdb_symbol_table *ovsdb_symbol_table_create(void); void ovsdb_symbol_table_destroy(struct ovsdb_symbol_table *); struct ovsdb_symbol *ovsdb_symbol_table_get(const struct ovsdb_symbol_table *, const char *name); struct ovsdb_symbol *ovsdb_symbol_table_put(struct ovsdb_symbol_table *, const char *name, const struct uuid *, bool used); struct ovsdb_symbol *ovsdb_symbol_table_insert(struct ovsdb_symbol_table *, const char *name); /* Tokenization * * Used by ovsdb_atom_from_string() and ovsdb_datum_from_string(). */ char *ovsdb_token_parse(const char **, char **outp) WARN_UNUSED_RESULT; bool ovsdb_token_is_delim(unsigned char); #endif /* ovsdb-data.h */ openvswitch-2.0.1+git20140120/lib/ovsdb-error.c000066400000000000000000000146371226605124000206430ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ovsdb-error.h" #include #include "backtrace.h" #include "dynamic-string.h" #include "json.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ovsdb_error); struct ovsdb_error { const char *tag; /* String for "error" member. */ char *details; /* String for "details" member. */ char *syntax; /* String for "syntax" member. */ int errno_; /* Unix errno value, 0 if none. */ }; static struct ovsdb_error * ovsdb_error_valist(const char *tag, const char *details, va_list args) { struct ovsdb_error *error = xmalloc(sizeof *error); error->tag = tag ? tag : "ovsdb error"; error->details = details ? xvasprintf(details, args) : NULL; error->syntax = NULL; error->errno_ = 0; return error; } struct ovsdb_error * ovsdb_error(const char *tag, const char *details, ...) { struct ovsdb_error *error; va_list args; va_start(args, details); error = ovsdb_error_valist(tag, details, args); va_end(args); return error; } struct ovsdb_error * ovsdb_io_error(int errno_, const char *details, ...) { struct ovsdb_error *error; va_list args; va_start(args, details); error = ovsdb_error_valist("I/O error", details, args); va_end(args); error->errno_ = errno_; return error; } struct ovsdb_error * ovsdb_syntax_error(const struct json *json, const char *tag, const char *details, ...) { struct ovsdb_error *error; va_list args; va_start(args, details); error = ovsdb_error_valist(tag ? tag : "syntax error", details, args); va_end(args); if (json) { /* XXX this is much too much information in some cases */ error->syntax = json_to_string(json, JSSF_SORT); } return error; } struct ovsdb_error * ovsdb_wrap_error(struct ovsdb_error *error, const char *details, ...) { va_list args; char *msg; va_start(args, details); msg = xvasprintf(details, args); va_end(args); if (error->details) { char *new = xasprintf("%s: %s", msg, error->details); free(error->details); error->details = new; free(msg); } else { error->details = msg; } return error; } /* Returns an ovsdb_error that represents an internal error for file name * 'file' and line number 'line', with 'details' (formatted as with printf()) * as the associated message. The caller is responsible for freeing the * returned error. * * If 'inner_error' is nonnull then the returned error is wrapped around * 'inner_error'. Takes ownership of 'inner_error'. */ struct ovsdb_error * ovsdb_internal_error(struct ovsdb_error *inner_error, const char *file, int line, const char *details, ...) { struct ds ds = DS_EMPTY_INITIALIZER; struct backtrace backtrace; struct ovsdb_error *error; va_list args; ds_put_format(&ds, "%s:%d:", file, line); if (details) { ds_put_char(&ds, ' '); va_start(args, details); ds_put_format_valist(&ds, details, args); va_end(args); } backtrace_capture(&backtrace); if (backtrace.n_frames) { int i; ds_put_cstr(&ds, " (backtrace:"); for (i = 0; i < backtrace.n_frames; i++) { ds_put_format(&ds, " 0x%08"PRIxPTR, backtrace.frames[i]); } ds_put_char(&ds, ')'); } ds_put_format(&ds, " (%s %s)", program_name, VERSION); if (inner_error) { char *s = ovsdb_error_to_string(inner_error); ds_put_format(&ds, " (generated from: %s)", s); free(s); ovsdb_error_destroy(inner_error); } error = ovsdb_error("internal error", "%s", ds_cstr(&ds)); ds_destroy(&ds); return error; } void ovsdb_error_destroy(struct ovsdb_error *error) { if (error) { free(error->details); free(error->syntax); free(error); } } struct ovsdb_error * ovsdb_error_clone(const struct ovsdb_error *old) { if (old) { struct ovsdb_error *new = xmalloc(sizeof *new); new->tag = old->tag; new->details = old->details ? xstrdup(old->details) : NULL; new->syntax = old->syntax ? xstrdup(old->syntax) : NULL; new->errno_ = old->errno_; return new; } else { return NULL; } } struct json * ovsdb_error_to_json(const struct ovsdb_error *error) { struct json *json = json_object_create(); json_object_put_string(json, "error", error->tag); if (error->details) { json_object_put_string(json, "details", error->details); } if (error->syntax) { json_object_put_string(json, "syntax", error->syntax); } if (error->errno_) { json_object_put_string(json, "io-error", ovs_retval_to_string(error->errno_)); } return json; } char * ovsdb_error_to_string(const struct ovsdb_error *error) { struct ds ds = DS_EMPTY_INITIALIZER; if (error->syntax) { ds_put_format(&ds, "syntax \"%s\": ", error->syntax); } ds_put_cstr(&ds, error->tag); if (error->details) { ds_put_format(&ds, ": %s", error->details); } if (error->errno_) { ds_put_format(&ds, " (%s)", ovs_retval_to_string(error->errno_)); } return ds_steal_cstr(&ds); } const char * ovsdb_error_get_tag(const struct ovsdb_error *error) { return error->tag; } /* If 'error' is nonnull, logs it as an error and frees it. To be used in * situations where an error should never occur, but an 'ovsdb_error *' gets * passed back anyhow. */ void ovsdb_error_assert(struct ovsdb_error *error) { if (error) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); char *s = ovsdb_error_to_string(error); VLOG_ERR_RL(&rl, "unexpected ovsdb error: %s", s); free(s); ovsdb_error_destroy(error); } } openvswitch-2.0.1+git20140120/lib/ovsdb-error.h000066400000000000000000000052421226605124000206400ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_ERROR_H #define OVSDB_ERROR_H 1 #include "compiler.h" struct json; struct ovsdb_error *ovsdb_error(const char *tag, const char *details, ...) PRINTF_FORMAT(2, 3) WARN_UNUSED_RESULT; struct ovsdb_error *ovsdb_io_error(int error, const char *details, ...) PRINTF_FORMAT(2, 3) WARN_UNUSED_RESULT; struct ovsdb_error *ovsdb_syntax_error(const struct json *, const char *tag, const char *details, ...) PRINTF_FORMAT(3, 4) WARN_UNUSED_RESULT; struct ovsdb_error *ovsdb_wrap_error(struct ovsdb_error *error, const char *details, ...) PRINTF_FORMAT(2, 3); struct ovsdb_error *ovsdb_internal_error(struct ovsdb_error *error, const char *file, int line, const char *details, ...) PRINTF_FORMAT(4, 5) WARN_UNUSED_RESULT; /* Returns a pointer to an ovsdb_error that represents an internal error for * the current file name and line number with MSG as the associated message. * The caller is responsible for freeing the internal error. */ #define OVSDB_BUG(MSG) \ ovsdb_internal_error(NULL, __FILE__, __LINE__, "%s", MSG) /* Returns a pointer to an ovsdb_error that represents an internal error for * the current file name and line number, with MSG as the associated message. * If ERROR is nonnull then the internal error is wrapped around ERROR. Takes * ownership of ERROR. The caller is responsible for freeing the returned * error. */ #define OVSDB_WRAP_BUG(MSG, ERROR) \ ovsdb_internal_error(ERROR, __FILE__, __LINE__, "%s", MSG) void ovsdb_error_destroy(struct ovsdb_error *); struct ovsdb_error *ovsdb_error_clone(const struct ovsdb_error *) WARN_UNUSED_RESULT; char *ovsdb_error_to_string(const struct ovsdb_error *); struct json *ovsdb_error_to_json(const struct ovsdb_error *); const char *ovsdb_error_get_tag(const struct ovsdb_error *); void ovsdb_error_assert(struct ovsdb_error *); #endif /* ovsdb-error.h */ openvswitch-2.0.1+git20140120/lib/ovsdb-idl-provider.h000066400000000000000000000056111226605124000221070ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_IDL_PROVIDER_H #define OVSDB_IDL_PROVIDER_H 1 #include "hmap.h" #include "list.h" #include "ovsdb-idl.h" #include "ovsdb-types.h" #include "shash.h" #include "uuid.h" struct ovsdb_idl_row { struct hmap_node hmap_node; /* In struct ovsdb_idl_table's 'rows'. */ struct uuid uuid; /* Row "_uuid" field. */ struct list src_arcs; /* Forward arcs (ovsdb_idl_arc.src_node). */ struct list dst_arcs; /* Backward arcs (ovsdb_idl_arc.dst_node). */ struct ovsdb_idl_table *table; /* Containing table. */ struct ovsdb_datum *old; /* Committed data (null if orphaned). */ /* Transactional data. */ struct ovsdb_datum *new; /* Modified data (null to delete row). */ unsigned long int *prereqs; /* Bitmap of columns to verify in "old". */ unsigned long int *written; /* Bitmap of columns from "new" to write. */ struct hmap_node txn_node; /* Node in ovsdb_idl_txn's list. */ }; struct ovsdb_idl_column { char *name; struct ovsdb_type type; bool mutable; void (*parse)(struct ovsdb_idl_row *, const struct ovsdb_datum *); void (*unparse)(struct ovsdb_idl_row *); }; struct ovsdb_idl_table_class { char *name; bool is_root; const struct ovsdb_idl_column *columns; size_t n_columns; size_t allocation_size; void (*row_init)(struct ovsdb_idl_row *); }; struct ovsdb_idl_table { const struct ovsdb_idl_table_class *class; unsigned char *modes; /* OVSDB_IDL_* bitmasks, indexed by column. */ bool need_table; /* Monitor table even if no columns? */ struct shash columns; /* Contains "const struct ovsdb_idl_column *"s. */ struct hmap rows; /* Contains "struct ovsdb_idl_row"s. */ struct ovsdb_idl *idl; /* Containing idl. */ }; struct ovsdb_idl_class { const char *database; /* for this database. */ const struct ovsdb_idl_table_class *tables; size_t n_tables; }; struct ovsdb_idl_row *ovsdb_idl_get_row_arc( struct ovsdb_idl_row *src, struct ovsdb_idl_table_class *dst_table, const struct uuid *dst_uuid); void ovsdb_idl_txn_verify(const struct ovsdb_idl_row *, const struct ovsdb_idl_column *); struct ovsdb_idl_txn *ovsdb_idl_txn_get(const struct ovsdb_idl_row *); #endif /* ovsdb-idl-provider.h */ openvswitch-2.0.1+git20140120/lib/ovsdb-idl.c000066400000000000000000002461561226605124000202650ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ovsdb-idl.h" #include #include #include #include #include "bitmap.h" #include "dynamic-string.h" #include "fatal-signal.h" #include "json.h" #include "jsonrpc.h" #include "ovsdb-data.h" #include "ovsdb-error.h" #include "ovsdb-idl-provider.h" #include "poll-loop.h" #include "shash.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ovsdb_idl); /* An arc from one idl_row to another. When row A contains a UUID that * references row B, this is represented by an arc from A (the source) to B * (the destination). * * Arcs from a row to itself are omitted, that is, src and dst are always * different. * * Arcs are never duplicated, that is, even if there are multiple references * from A to B, there is only a single arc from A to B. * * Arcs are directed: an arc from A to B is the converse of an an arc from B to * A. Both an arc and its converse may both be present, if each row refers * to the other circularly. * * The source and destination row may be in the same table or in different * tables. */ struct ovsdb_idl_arc { struct list src_node; /* In src->src_arcs list. */ struct list dst_node; /* In dst->dst_arcs list. */ struct ovsdb_idl_row *src; /* Source row. */ struct ovsdb_idl_row *dst; /* Destination row. */ }; struct ovsdb_idl { const struct ovsdb_idl_class *class; struct jsonrpc_session *session; struct shash table_by_name; struct ovsdb_idl_table *tables; /* Contains "struct ovsdb_idl_table *"s.*/ struct json *monitor_request_id; unsigned int last_monitor_request_seqno; unsigned int change_seqno; bool verify_write_only; /* Database locking. */ char *lock_name; /* Name of lock we need, NULL if none. */ bool has_lock; /* Has db server told us we have the lock? */ bool is_lock_contended; /* Has db server told us we can't get lock? */ struct json *lock_request_id; /* JSON-RPC ID of in-flight lock request. */ /* Transaction support. */ struct ovsdb_idl_txn *txn; struct hmap outstanding_txns; }; struct ovsdb_idl_txn { struct hmap_node hmap_node; struct json *request_id; struct ovsdb_idl *idl; struct hmap txn_rows; enum ovsdb_idl_txn_status status; char *error; bool dry_run; struct ds comment; unsigned int commit_seqno; /* Increments. */ const char *inc_table; const char *inc_column; struct uuid inc_row; unsigned int inc_index; int64_t inc_new_value; /* Inserted rows. */ struct hmap inserted_rows; /* Contains "struct ovsdb_idl_txn_insert"s. */ }; struct ovsdb_idl_txn_insert { struct hmap_node hmap_node; /* In struct ovsdb_idl_txn's inserted_rows. */ struct uuid dummy; /* Dummy UUID used locally. */ int op_index; /* Index into transaction's operation array. */ struct uuid real; /* Real UUID used by database server. */ }; static struct vlog_rate_limit syntax_rl = VLOG_RATE_LIMIT_INIT(1, 5); static struct vlog_rate_limit semantic_rl = VLOG_RATE_LIMIT_INIT(1, 5); static void ovsdb_idl_clear(struct ovsdb_idl *); static void ovsdb_idl_send_monitor_request(struct ovsdb_idl *); static void ovsdb_idl_parse_update(struct ovsdb_idl *, const struct json *); static struct ovsdb_error *ovsdb_idl_parse_update__(struct ovsdb_idl *, const struct json *); static bool ovsdb_idl_process_update(struct ovsdb_idl_table *, const struct uuid *, const struct json *old, const struct json *new); static void ovsdb_idl_insert_row(struct ovsdb_idl_row *, const struct json *); static void ovsdb_idl_delete_row(struct ovsdb_idl_row *); static bool ovsdb_idl_modify_row(struct ovsdb_idl_row *, const struct json *); static bool ovsdb_idl_row_is_orphan(const struct ovsdb_idl_row *); static struct ovsdb_idl_row *ovsdb_idl_row_create__( const struct ovsdb_idl_table_class *); static struct ovsdb_idl_row *ovsdb_idl_row_create(struct ovsdb_idl_table *, const struct uuid *); static void ovsdb_idl_row_destroy(struct ovsdb_idl_row *); static void ovsdb_idl_row_parse(struct ovsdb_idl_row *); static void ovsdb_idl_row_unparse(struct ovsdb_idl_row *); static void ovsdb_idl_row_clear_old(struct ovsdb_idl_row *); static void ovsdb_idl_row_clear_new(struct ovsdb_idl_row *); static void ovsdb_idl_txn_abort_all(struct ovsdb_idl *); static bool ovsdb_idl_txn_process_reply(struct ovsdb_idl *, const struct jsonrpc_msg *msg); static void ovsdb_idl_send_lock_request(struct ovsdb_idl *); static void ovsdb_idl_send_unlock_request(struct ovsdb_idl *); static void ovsdb_idl_parse_lock_reply(struct ovsdb_idl *, const struct json *); static void ovsdb_idl_parse_lock_notify(struct ovsdb_idl *, const struct json *params, bool new_has_lock); /* Creates and returns a connection to database 'remote', which should be in a * form acceptable to jsonrpc_session_open(). The connection will maintain an * in-memory replica of the remote database whose schema is described by * 'class'. (Ordinarily 'class' is compiled from an OVSDB schema automatically * by ovsdb-idlc.) * * Passes 'retry' to jsonrpc_session_open(). See that function for * documentation. * * If 'monitor_everything_by_default' is true, then everything in the remote * database will be replicated by default. ovsdb_idl_omit() and * ovsdb_idl_omit_alert() may be used to selectively drop some columns from * monitoring. * * If 'monitor_everything_by_default' is false, then no columns or tables will * be replicated by default. ovsdb_idl_add_column() and ovsdb_idl_add_table() * must be used to choose some columns or tables to replicate. */ struct ovsdb_idl * ovsdb_idl_create(const char *remote, const struct ovsdb_idl_class *class, bool monitor_everything_by_default, bool retry) { struct ovsdb_idl *idl; uint8_t default_mode; size_t i; default_mode = (monitor_everything_by_default ? OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT : 0); idl = xzalloc(sizeof *idl); idl->class = class; idl->session = jsonrpc_session_open(remote, retry); shash_init(&idl->table_by_name); idl->tables = xmalloc(class->n_tables * sizeof *idl->tables); for (i = 0; i < class->n_tables; i++) { const struct ovsdb_idl_table_class *tc = &class->tables[i]; struct ovsdb_idl_table *table = &idl->tables[i]; size_t j; shash_add_assert(&idl->table_by_name, tc->name, table); table->class = tc; table->modes = xmalloc(tc->n_columns); memset(table->modes, default_mode, tc->n_columns); table->need_table = false; shash_init(&table->columns); for (j = 0; j < tc->n_columns; j++) { const struct ovsdb_idl_column *column = &tc->columns[j]; shash_add_assert(&table->columns, column->name, column); } hmap_init(&table->rows); table->idl = idl; } idl->last_monitor_request_seqno = UINT_MAX; hmap_init(&idl->outstanding_txns); return idl; } /* Destroys 'idl' and all of the data structures that it manages. */ void ovsdb_idl_destroy(struct ovsdb_idl *idl) { if (idl) { size_t i; ovs_assert(!idl->txn); ovsdb_idl_clear(idl); jsonrpc_session_close(idl->session); for (i = 0; i < idl->class->n_tables; i++) { struct ovsdb_idl_table *table = &idl->tables[i]; shash_destroy(&table->columns); hmap_destroy(&table->rows); free(table->modes); } shash_destroy(&idl->table_by_name); free(idl->tables); json_destroy(idl->monitor_request_id); free(idl->lock_name); json_destroy(idl->lock_request_id); hmap_destroy(&idl->outstanding_txns); free(idl); } } static void ovsdb_idl_clear(struct ovsdb_idl *idl) { bool changed = false; size_t i; for (i = 0; i < idl->class->n_tables; i++) { struct ovsdb_idl_table *table = &idl->tables[i]; struct ovsdb_idl_row *row, *next_row; if (hmap_is_empty(&table->rows)) { continue; } changed = true; HMAP_FOR_EACH_SAFE (row, next_row, hmap_node, &table->rows) { struct ovsdb_idl_arc *arc, *next_arc; if (!ovsdb_idl_row_is_orphan(row)) { ovsdb_idl_row_unparse(row); } LIST_FOR_EACH_SAFE (arc, next_arc, src_node, &row->src_arcs) { free(arc); } /* No need to do anything with dst_arcs: some node has those arcs * as forward arcs and will destroy them itself. */ ovsdb_idl_row_destroy(row); } } if (changed) { idl->change_seqno++; } } /* Processes a batch of messages from the database server on 'idl'. This may * cause the IDL's contents to change. The client may check for that with * ovsdb_idl_get_seqno(). */ void ovsdb_idl_run(struct ovsdb_idl *idl) { int i; ovs_assert(!idl->txn); jsonrpc_session_run(idl->session); for (i = 0; jsonrpc_session_is_connected(idl->session) && i < 50; i++) { struct jsonrpc_msg *msg; unsigned int seqno; seqno = jsonrpc_session_get_seqno(idl->session); if (idl->last_monitor_request_seqno != seqno) { idl->last_monitor_request_seqno = seqno; ovsdb_idl_txn_abort_all(idl); ovsdb_idl_send_monitor_request(idl); if (idl->lock_name) { ovsdb_idl_send_lock_request(idl); } break; } msg = jsonrpc_session_recv(idl->session); if (!msg) { break; } if (msg->type == JSONRPC_NOTIFY && !strcmp(msg->method, "update") && msg->params->type == JSON_ARRAY && msg->params->u.array.n == 2 && msg->params->u.array.elems[0]->type == JSON_NULL) { /* Database contents changed. */ ovsdb_idl_parse_update(idl, msg->params->u.array.elems[1]); } else if (msg->type == JSONRPC_REPLY && idl->monitor_request_id && json_equal(idl->monitor_request_id, msg->id)) { /* Reply to our "monitor" request. */ idl->change_seqno++; json_destroy(idl->monitor_request_id); idl->monitor_request_id = NULL; ovsdb_idl_clear(idl); ovsdb_idl_parse_update(idl, msg->result); } else if (msg->type == JSONRPC_REPLY && idl->lock_request_id && json_equal(idl->lock_request_id, msg->id)) { /* Reply to our "lock" request. */ ovsdb_idl_parse_lock_reply(idl, msg->result); } else if (msg->type == JSONRPC_NOTIFY && !strcmp(msg->method, "locked")) { /* We got our lock. */ ovsdb_idl_parse_lock_notify(idl, msg->params, true); } else if (msg->type == JSONRPC_NOTIFY && !strcmp(msg->method, "stolen")) { /* Someone else stole our lock. */ ovsdb_idl_parse_lock_notify(idl, msg->params, false); } else if (msg->type == JSONRPC_REPLY && msg->id->type == JSON_STRING && !strcmp(msg->id->u.string, "echo")) { /* Reply to our echo request. Ignore it. */ } else if ((msg->type == JSONRPC_ERROR || msg->type == JSONRPC_REPLY) && ovsdb_idl_txn_process_reply(idl, msg)) { /* ovsdb_idl_txn_process_reply() did everything needful. */ } else { /* This can happen if ovsdb_idl_txn_destroy() is called to destroy * a transaction before we receive the reply, so keep the log level * low. */ VLOG_DBG("%s: received unexpected %s message", jsonrpc_session_get_name(idl->session), jsonrpc_msg_type_to_string(msg->type)); } jsonrpc_msg_destroy(msg); } } /* Arranges for poll_block() to wake up when ovsdb_idl_run() has something to * do or when activity occurs on a transaction on 'idl'. */ void ovsdb_idl_wait(struct ovsdb_idl *idl) { jsonrpc_session_wait(idl->session); jsonrpc_session_recv_wait(idl->session); } /* Returns a "sequence number" that represents the state of 'idl'. When * ovsdb_idl_run() changes the database, the sequence number changes. The * initial fetch of the entire contents of the remote database is considered to * be one kind of change. Successfully acquiring a lock, if one has been * configured with ovsdb_idl_set_lock(), is also considered to be a change. * * As long as the sequence number does not change, the client may continue to * use any data structures it obtains from 'idl'. But when it changes, the * client must not access any of these data structures again, because they * could have freed or reused for other purposes. * * The sequence number can occasionally change even if the database does not. * This happens if the connection to the database drops and reconnects, which * causes the database contents to be reloaded even if they didn't change. (It * could also happen if the database server sends out a "change" that reflects * what the IDL already thought was in the database. The database server is * not supposed to do that, but bugs could in theory cause it to do so.) */ unsigned int ovsdb_idl_get_seqno(const struct ovsdb_idl *idl) { return idl->change_seqno; } /* Returns true if 'idl' successfully connected to the remote database and * retrieved its contents (even if the connection subsequently dropped and is * in the process of reconnecting). If so, then 'idl' contains an atomic * snapshot of the database's contents (but it might be arbitrarily old if the * connection dropped). * * Returns false if 'idl' has never connected or retrieved the database's * contents. If so, 'idl' is empty. */ bool ovsdb_idl_has_ever_connected(const struct ovsdb_idl *idl) { return ovsdb_idl_get_seqno(idl) != 0; } /* Forces 'idl' to drop its connection to the database and reconnect. In the * meantime, the contents of 'idl' will not change. */ void ovsdb_idl_force_reconnect(struct ovsdb_idl *idl) { jsonrpc_session_force_reconnect(idl->session); } /* Some IDL users should only write to write-only columns. Furthermore, * writing to a column which is not write-only can cause serious performance * degradations for these users. This function causes 'idl' to reject writes * to columns which are not marked write only using ovsdb_idl_omit_alert(). */ void ovsdb_idl_verify_write_only(struct ovsdb_idl *idl) { idl->verify_write_only = true; } bool ovsdb_idl_is_alive(const struct ovsdb_idl *idl) { return jsonrpc_session_is_alive(idl->session); } int ovsdb_idl_get_last_error(const struct ovsdb_idl *idl) { return jsonrpc_session_get_last_error(idl->session); } static unsigned char * ovsdb_idl_get_mode(struct ovsdb_idl *idl, const struct ovsdb_idl_column *column) { size_t i; ovs_assert(!idl->change_seqno); for (i = 0; i < idl->class->n_tables; i++) { const struct ovsdb_idl_table *table = &idl->tables[i]; const struct ovsdb_idl_table_class *tc = table->class; if (column >= tc->columns && column < &tc->columns[tc->n_columns]) { return &table->modes[column - tc->columns]; } } NOT_REACHED(); } static void add_ref_table(struct ovsdb_idl *idl, const struct ovsdb_base_type *base) { if (base->type == OVSDB_TYPE_UUID && base->u.uuid.refTableName) { struct ovsdb_idl_table *table; table = shash_find_data(&idl->table_by_name, base->u.uuid.refTableName); if (table) { table->need_table = true; } else { VLOG_WARN("%s IDL class missing referenced table %s", idl->class->database, base->u.uuid.refTableName); } } } /* Turns on OVSDB_IDL_MONITOR and OVSDB_IDL_ALERT for 'column' in 'idl'. Also * ensures that any tables referenced by 'column' will be replicated, even if * no columns in that table are selected for replication (see * ovsdb_idl_add_table() for more information). * * This function is only useful if 'monitor_everything_by_default' was false in * the call to ovsdb_idl_create(). This function should be called between * ovsdb_idl_create() and the first call to ovsdb_idl_run(). */ void ovsdb_idl_add_column(struct ovsdb_idl *idl, const struct ovsdb_idl_column *column) { *ovsdb_idl_get_mode(idl, column) = OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT; add_ref_table(idl, &column->type.key); add_ref_table(idl, &column->type.value); } /* Ensures that the table with class 'tc' will be replicated on 'idl' even if * no columns are selected for replication. This can be useful because it * allows 'idl' to keep track of what rows in the table actually exist, which * in turn allows columns that reference the table to have accurate contents. * (The IDL presents the database with references to rows that do not exist * removed.) * * This function is only useful if 'monitor_everything_by_default' was false in * the call to ovsdb_idl_create(). This function should be called between * ovsdb_idl_create() and the first call to ovsdb_idl_run(). */ void ovsdb_idl_add_table(struct ovsdb_idl *idl, const struct ovsdb_idl_table_class *tc) { size_t i; for (i = 0; i < idl->class->n_tables; i++) { struct ovsdb_idl_table *table = &idl->tables[i]; if (table->class == tc) { table->need_table = true; return; } } NOT_REACHED(); } /* Turns off OVSDB_IDL_ALERT for 'column' in 'idl'. * * This function should be called between ovsdb_idl_create() and the first call * to ovsdb_idl_run(). */ void ovsdb_idl_omit_alert(struct ovsdb_idl *idl, const struct ovsdb_idl_column *column) { *ovsdb_idl_get_mode(idl, column) &= ~OVSDB_IDL_ALERT; } /* Sets the mode for 'column' in 'idl' to 0. See the big comment above * OVSDB_IDL_MONITOR for details. * * This function should be called between ovsdb_idl_create() and the first call * to ovsdb_idl_run(). */ void ovsdb_idl_omit(struct ovsdb_idl *idl, const struct ovsdb_idl_column *column) { *ovsdb_idl_get_mode(idl, column) = 0; } static void ovsdb_idl_send_monitor_request(struct ovsdb_idl *idl) { struct json *monitor_requests; struct jsonrpc_msg *msg; size_t i; monitor_requests = json_object_create(); for (i = 0; i < idl->class->n_tables; i++) { const struct ovsdb_idl_table *table = &idl->tables[i]; const struct ovsdb_idl_table_class *tc = table->class; struct json *monitor_request, *columns; size_t j; columns = table->need_table ? json_array_create_empty() : NULL; for (j = 0; j < tc->n_columns; j++) { const struct ovsdb_idl_column *column = &tc->columns[j]; if (table->modes[j] & OVSDB_IDL_MONITOR) { if (!columns) { columns = json_array_create_empty(); } json_array_add(columns, json_string_create(column->name)); } } if (columns) { monitor_request = json_object_create(); json_object_put(monitor_request, "columns", columns); json_object_put(monitor_requests, tc->name, monitor_request); } } json_destroy(idl->monitor_request_id); msg = jsonrpc_create_request( "monitor", json_array_create_3(json_string_create(idl->class->database), json_null_create(), monitor_requests), &idl->monitor_request_id); jsonrpc_session_send(idl->session, msg); } static void ovsdb_idl_parse_update(struct ovsdb_idl *idl, const struct json *table_updates) { struct ovsdb_error *error = ovsdb_idl_parse_update__(idl, table_updates); if (error) { if (!VLOG_DROP_WARN(&syntax_rl)) { char *s = ovsdb_error_to_string(error); VLOG_WARN_RL(&syntax_rl, "%s", s); free(s); } ovsdb_error_destroy(error); } } static struct ovsdb_error * ovsdb_idl_parse_update__(struct ovsdb_idl *idl, const struct json *table_updates) { const struct shash_node *tables_node; if (table_updates->type != JSON_OBJECT) { return ovsdb_syntax_error(table_updates, NULL, " is not an object"); } SHASH_FOR_EACH (tables_node, json_object(table_updates)) { const struct json *table_update = tables_node->data; const struct shash_node *table_node; struct ovsdb_idl_table *table; table = shash_find_data(&idl->table_by_name, tables_node->name); if (!table) { return ovsdb_syntax_error( table_updates, NULL, " includes unknown table \"%s\"", tables_node->name); } if (table_update->type != JSON_OBJECT) { return ovsdb_syntax_error(table_update, NULL, " for table \"%s\" is " "not an object", table->class->name); } SHASH_FOR_EACH (table_node, json_object(table_update)) { const struct json *row_update = table_node->data; const struct json *old_json, *new_json; struct uuid uuid; if (!uuid_from_string(&uuid, table_node->name)) { return ovsdb_syntax_error(table_update, NULL, " for table \"%s\" " "contains bad UUID " "\"%s\" as member name", table->class->name, table_node->name); } if (row_update->type != JSON_OBJECT) { return ovsdb_syntax_error(row_update, NULL, " for table \"%s\" " "contains for %s that " "is not an object", table->class->name, table_node->name); } old_json = shash_find_data(json_object(row_update), "old"); new_json = shash_find_data(json_object(row_update), "new"); if (old_json && old_json->type != JSON_OBJECT) { return ovsdb_syntax_error(old_json, NULL, "\"old\" is not object"); } else if (new_json && new_json->type != JSON_OBJECT) { return ovsdb_syntax_error(new_json, NULL, "\"new\" is not object"); } else if ((old_json != NULL) + (new_json != NULL) != shash_count(json_object(row_update))) { return ovsdb_syntax_error(row_update, NULL, " contains unexpected " "member"); } else if (!old_json && !new_json) { return ovsdb_syntax_error(row_update, NULL, " missing \"old\" " "and \"new\" members"); } if (ovsdb_idl_process_update(table, &uuid, old_json, new_json)) { idl->change_seqno++; } } } return NULL; } static struct ovsdb_idl_row * ovsdb_idl_get_row(struct ovsdb_idl_table *table, const struct uuid *uuid) { struct ovsdb_idl_row *row; HMAP_FOR_EACH_WITH_HASH (row, hmap_node, uuid_hash(uuid), &table->rows) { if (uuid_equals(&row->uuid, uuid)) { return row; } } return NULL; } /* Returns true if a column with mode OVSDB_IDL_MODE_RW changed, false * otherwise. */ static bool ovsdb_idl_process_update(struct ovsdb_idl_table *table, const struct uuid *uuid, const struct json *old, const struct json *new) { struct ovsdb_idl_row *row; row = ovsdb_idl_get_row(table, uuid); if (!new) { /* Delete row. */ if (row && !ovsdb_idl_row_is_orphan(row)) { /* XXX perhaps we should check the 'old' values? */ ovsdb_idl_delete_row(row); } else { VLOG_WARN_RL(&semantic_rl, "cannot delete missing row "UUID_FMT" " "from table %s", UUID_ARGS(uuid), table->class->name); return false; } } else if (!old) { /* Insert row. */ if (!row) { ovsdb_idl_insert_row(ovsdb_idl_row_create(table, uuid), new); } else if (ovsdb_idl_row_is_orphan(row)) { ovsdb_idl_insert_row(row, new); } else { VLOG_WARN_RL(&semantic_rl, "cannot add existing row "UUID_FMT" to " "table %s", UUID_ARGS(uuid), table->class->name); return ovsdb_idl_modify_row(row, new); } } else { /* Modify row. */ if (row) { /* XXX perhaps we should check the 'old' values? */ if (!ovsdb_idl_row_is_orphan(row)) { return ovsdb_idl_modify_row(row, new); } else { VLOG_WARN_RL(&semantic_rl, "cannot modify missing but " "referenced row "UUID_FMT" in table %s", UUID_ARGS(uuid), table->class->name); ovsdb_idl_insert_row(row, new); } } else { VLOG_WARN_RL(&semantic_rl, "cannot modify missing row "UUID_FMT" " "in table %s", UUID_ARGS(uuid), table->class->name); ovsdb_idl_insert_row(ovsdb_idl_row_create(table, uuid), new); } } return true; } /* Returns true if a column with mode OVSDB_IDL_MODE_RW changed, false * otherwise. */ static bool ovsdb_idl_row_update(struct ovsdb_idl_row *row, const struct json *row_json) { struct ovsdb_idl_table *table = row->table; struct shash_node *node; bool changed = false; SHASH_FOR_EACH (node, json_object(row_json)) { const char *column_name = node->name; const struct ovsdb_idl_column *column; struct ovsdb_datum datum; struct ovsdb_error *error; column = shash_find_data(&table->columns, column_name); if (!column) { VLOG_WARN_RL(&syntax_rl, "unknown column %s updating row "UUID_FMT, column_name, UUID_ARGS(&row->uuid)); continue; } error = ovsdb_datum_from_json(&datum, &column->type, node->data, NULL); if (!error) { unsigned int column_idx = column - table->class->columns; struct ovsdb_datum *old = &row->old[column_idx]; if (!ovsdb_datum_equals(old, &datum, &column->type)) { ovsdb_datum_swap(old, &datum); if (table->modes[column_idx] & OVSDB_IDL_ALERT) { changed = true; } } else { /* Didn't really change but the OVSDB monitor protocol always * includes every value in a row. */ } ovsdb_datum_destroy(&datum, &column->type); } else { char *s = ovsdb_error_to_string(error); VLOG_WARN_RL(&syntax_rl, "error parsing column %s in row "UUID_FMT " in table %s: %s", column_name, UUID_ARGS(&row->uuid), table->class->name, s); free(s); ovsdb_error_destroy(error); } } return changed; } /* When a row A refers to row B through a column with a "refTable" constraint, * but row B does not exist, row B is called an "orphan row". Orphan rows * should not persist, because the database enforces referential integrity, but * they can appear transiently as changes from the database are received (the * database doesn't try to topologically sort them and circular references mean * it isn't always possible anyhow). * * This function returns true if 'row' is an orphan row, otherwise false. */ static bool ovsdb_idl_row_is_orphan(const struct ovsdb_idl_row *row) { return !row->old && !row->new; } /* Returns true if 'row' is conceptually part of the database as modified by * the current transaction (if any), false otherwise. * * This function will return true if 'row' is not an orphan (see the comment on * ovsdb_idl_row_is_orphan()) and: * * - 'row' exists in the database and has not been deleted within the * current transaction (if any). * * - 'row' was inserted within the current transaction and has not been * deleted. (In the latter case you should not have passed 'row' in at * all, because ovsdb_idl_txn_delete() freed it.) * * This function will return false if 'row' is an orphan or if 'row' was * deleted within the current transaction. */ static bool ovsdb_idl_row_exists(const struct ovsdb_idl_row *row) { return row->new != NULL; } static void ovsdb_idl_row_parse(struct ovsdb_idl_row *row) { const struct ovsdb_idl_table_class *class = row->table->class; size_t i; for (i = 0; i < class->n_columns; i++) { const struct ovsdb_idl_column *c = &class->columns[i]; (c->parse)(row, &row->old[i]); } } static void ovsdb_idl_row_unparse(struct ovsdb_idl_row *row) { const struct ovsdb_idl_table_class *class = row->table->class; size_t i; for (i = 0; i < class->n_columns; i++) { const struct ovsdb_idl_column *c = &class->columns[i]; (c->unparse)(row); } } static void ovsdb_idl_row_clear_old(struct ovsdb_idl_row *row) { ovs_assert(row->old == row->new); if (!ovsdb_idl_row_is_orphan(row)) { const struct ovsdb_idl_table_class *class = row->table->class; size_t i; for (i = 0; i < class->n_columns; i++) { ovsdb_datum_destroy(&row->old[i], &class->columns[i].type); } free(row->old); row->old = row->new = NULL; } } static void ovsdb_idl_row_clear_new(struct ovsdb_idl_row *row) { if (row->old != row->new) { if (row->new) { const struct ovsdb_idl_table_class *class = row->table->class; size_t i; if (row->written) { BITMAP_FOR_EACH_1 (i, class->n_columns, row->written) { ovsdb_datum_destroy(&row->new[i], &class->columns[i].type); } } free(row->new); free(row->written); row->written = NULL; } row->new = row->old; } } static void ovsdb_idl_row_clear_arcs(struct ovsdb_idl_row *row, bool destroy_dsts) { struct ovsdb_idl_arc *arc, *next; /* Delete all forward arcs. If 'destroy_dsts', destroy any orphaned rows * that this causes to be unreferenced. */ LIST_FOR_EACH_SAFE (arc, next, src_node, &row->src_arcs) { list_remove(&arc->dst_node); if (destroy_dsts && ovsdb_idl_row_is_orphan(arc->dst) && list_is_empty(&arc->dst->dst_arcs)) { ovsdb_idl_row_destroy(arc->dst); } free(arc); } list_init(&row->src_arcs); } /* Force nodes that reference 'row' to reparse. */ static void ovsdb_idl_row_reparse_backrefs(struct ovsdb_idl_row *row) { struct ovsdb_idl_arc *arc, *next; /* This is trickier than it looks. ovsdb_idl_row_clear_arcs() will destroy * 'arc', so we need to use the "safe" variant of list traversal. However, * calling an ovsdb_idl_column's 'parse' function will add an arc * equivalent to 'arc' to row->arcs. That could be a problem for * traversal, but it adds it at the beginning of the list to prevent us * from stumbling upon it again. * * (If duplicate arcs were possible then we would need to make sure that * 'next' didn't also point into 'arc''s destination, but we forbid * duplicate arcs.) */ LIST_FOR_EACH_SAFE (arc, next, dst_node, &row->dst_arcs) { struct ovsdb_idl_row *ref = arc->src; ovsdb_idl_row_unparse(ref); ovsdb_idl_row_clear_arcs(ref, false); ovsdb_idl_row_parse(ref); } } static struct ovsdb_idl_row * ovsdb_idl_row_create__(const struct ovsdb_idl_table_class *class) { struct ovsdb_idl_row *row = xzalloc(class->allocation_size); class->row_init(row); list_init(&row->src_arcs); list_init(&row->dst_arcs); hmap_node_nullify(&row->txn_node); return row; } static struct ovsdb_idl_row * ovsdb_idl_row_create(struct ovsdb_idl_table *table, const struct uuid *uuid) { struct ovsdb_idl_row *row = ovsdb_idl_row_create__(table->class); hmap_insert(&table->rows, &row->hmap_node, uuid_hash(uuid)); row->uuid = *uuid; row->table = table; return row; } static void ovsdb_idl_row_destroy(struct ovsdb_idl_row *row) { if (row) { ovsdb_idl_row_clear_old(row); hmap_remove(&row->table->rows, &row->hmap_node); free(row); } } static void ovsdb_idl_insert_row(struct ovsdb_idl_row *row, const struct json *row_json) { const struct ovsdb_idl_table_class *class = row->table->class; size_t i; ovs_assert(!row->old && !row->new); row->old = row->new = xmalloc(class->n_columns * sizeof *row->old); for (i = 0; i < class->n_columns; i++) { ovsdb_datum_init_default(&row->old[i], &class->columns[i].type); } ovsdb_idl_row_update(row, row_json); ovsdb_idl_row_parse(row); ovsdb_idl_row_reparse_backrefs(row); } static void ovsdb_idl_delete_row(struct ovsdb_idl_row *row) { ovsdb_idl_row_unparse(row); ovsdb_idl_row_clear_arcs(row, true); ovsdb_idl_row_clear_old(row); if (list_is_empty(&row->dst_arcs)) { ovsdb_idl_row_destroy(row); } else { ovsdb_idl_row_reparse_backrefs(row); } } /* Returns true if a column with mode OVSDB_IDL_MODE_RW changed, false * otherwise. */ static bool ovsdb_idl_modify_row(struct ovsdb_idl_row *row, const struct json *row_json) { bool changed; ovsdb_idl_row_unparse(row); ovsdb_idl_row_clear_arcs(row, true); changed = ovsdb_idl_row_update(row, row_json); ovsdb_idl_row_parse(row); return changed; } static bool may_add_arc(const struct ovsdb_idl_row *src, const struct ovsdb_idl_row *dst) { const struct ovsdb_idl_arc *arc; /* No self-arcs. */ if (src == dst) { return false; } /* No duplicate arcs. * * We only need to test whether the first arc in dst->dst_arcs originates * at 'src', since we add all of the arcs from a given source in a clump * (in a single call to ovsdb_idl_row_parse()) and new arcs are always * added at the front of the dst_arcs list. */ if (list_is_empty(&dst->dst_arcs)) { return true; } arc = CONTAINER_OF(dst->dst_arcs.next, struct ovsdb_idl_arc, dst_node); return arc->src != src; } static struct ovsdb_idl_table * ovsdb_idl_table_from_class(const struct ovsdb_idl *idl, const struct ovsdb_idl_table_class *table_class) { return &idl->tables[table_class - idl->class->tables]; } /* Called by ovsdb-idlc generated code. */ struct ovsdb_idl_row * ovsdb_idl_get_row_arc(struct ovsdb_idl_row *src, struct ovsdb_idl_table_class *dst_table_class, const struct uuid *dst_uuid) { struct ovsdb_idl *idl = src->table->idl; struct ovsdb_idl_table *dst_table; struct ovsdb_idl_arc *arc; struct ovsdb_idl_row *dst; dst_table = ovsdb_idl_table_from_class(idl, dst_table_class); dst = ovsdb_idl_get_row(dst_table, dst_uuid); if (idl->txn) { /* We're being called from ovsdb_idl_txn_write(). We must not update * any arcs, because the transaction will be backed out at commit or * abort time and we don't want our graph screwed up. * * Just return the destination row, if there is one and it has not been * deleted. */ if (dst && (hmap_node_is_null(&dst->txn_node) || dst->new)) { return dst; } return NULL; } else { /* We're being called from some other context. Update the graph. */ if (!dst) { dst = ovsdb_idl_row_create(dst_table, dst_uuid); } /* Add a new arc, if it wouldn't be a self-arc or a duplicate arc. */ if (may_add_arc(src, dst)) { /* The arc *must* be added at the front of the dst_arcs list. See * ovsdb_idl_row_reparse_backrefs() for details. */ arc = xmalloc(sizeof *arc); list_push_front(&src->src_arcs, &arc->src_node); list_push_front(&dst->dst_arcs, &arc->dst_node); arc->src = src; arc->dst = dst; } return !ovsdb_idl_row_is_orphan(dst) ? dst : NULL; } } /* Searches 'tc''s table in 'idl' for a row with UUID 'uuid'. Returns a * pointer to the row if there is one, otherwise a null pointer. */ const struct ovsdb_idl_row * ovsdb_idl_get_row_for_uuid(const struct ovsdb_idl *idl, const struct ovsdb_idl_table_class *tc, const struct uuid *uuid) { return ovsdb_idl_get_row(ovsdb_idl_table_from_class(idl, tc), uuid); } static struct ovsdb_idl_row * next_real_row(struct ovsdb_idl_table *table, struct hmap_node *node) { for (; node; node = hmap_next(&table->rows, node)) { struct ovsdb_idl_row *row; row = CONTAINER_OF(node, struct ovsdb_idl_row, hmap_node); if (ovsdb_idl_row_exists(row)) { return row; } } return NULL; } /* Returns a row in 'table_class''s table in 'idl', or a null pointer if that * table is empty. * * Database tables are internally maintained as hash tables, so adding or * removing rows while traversing the same table can cause some rows to be * visited twice or not at apply. */ const struct ovsdb_idl_row * ovsdb_idl_first_row(const struct ovsdb_idl *idl, const struct ovsdb_idl_table_class *table_class) { struct ovsdb_idl_table *table = ovsdb_idl_table_from_class(idl, table_class); return next_real_row(table, hmap_first(&table->rows)); } /* Returns a row following 'row' within its table, or a null pointer if 'row' * is the last row in its table. */ const struct ovsdb_idl_row * ovsdb_idl_next_row(const struct ovsdb_idl_row *row) { struct ovsdb_idl_table *table = row->table; return next_real_row(table, hmap_next(&table->rows, &row->hmap_node)); } /* Reads and returns the value of 'column' within 'row'. If an ongoing * transaction has changed 'column''s value, the modified value is returned. * * The caller must not modify or free the returned value. * * Various kinds of changes can invalidate the returned value: writing to the * same 'column' in 'row' (e.g. with ovsdb_idl_txn_write()), deleting 'row' * (e.g. with ovsdb_idl_txn_delete()), or completing an ongoing transaction * (e.g. with ovsdb_idl_txn_commit() or ovsdb_idl_txn_abort()). If the * returned value is needed for a long time, it is best to make a copy of it * with ovsdb_datum_clone(). */ const struct ovsdb_datum * ovsdb_idl_read(const struct ovsdb_idl_row *row, const struct ovsdb_idl_column *column) { const struct ovsdb_idl_table_class *class; size_t column_idx; ovs_assert(!ovsdb_idl_row_is_synthetic(row)); class = row->table->class; column_idx = column - class->columns; ovs_assert(row->new != NULL); ovs_assert(column_idx < class->n_columns); if (row->written && bitmap_is_set(row->written, column_idx)) { return &row->new[column_idx]; } else if (row->old) { return &row->old[column_idx]; } else { return ovsdb_datum_default(&column->type); } } /* Same as ovsdb_idl_read(), except that it also asserts that 'column' has key * type 'key_type' and value type 'value_type'. (Scalar and set types will * have a value type of OVSDB_TYPE_VOID.) * * This is useful in code that "knows" that a particular column has a given * type, so that it will abort if someone changes the column's type without * updating the code that uses it. */ const struct ovsdb_datum * ovsdb_idl_get(const struct ovsdb_idl_row *row, const struct ovsdb_idl_column *column, enum ovsdb_atomic_type key_type OVS_UNUSED, enum ovsdb_atomic_type value_type OVS_UNUSED) { ovs_assert(column->type.key.type == key_type); ovs_assert(column->type.value.type == value_type); return ovsdb_idl_read(row, column); } /* Returns false if 'row' was obtained from the IDL, true if it was initialized * to all-zero-bits by some other entity. If 'row' was set up some other way * then the return value is indeterminate. */ bool ovsdb_idl_row_is_synthetic(const struct ovsdb_idl_row *row) { return row->table == NULL; } /* Transactions. */ static void ovsdb_idl_txn_complete(struct ovsdb_idl_txn *txn, enum ovsdb_idl_txn_status); /* Returns a string representation of 'status'. The caller must not modify or * free the returned string. * * The return value is probably useful only for debug log messages and unit * tests. */ const char * ovsdb_idl_txn_status_to_string(enum ovsdb_idl_txn_status status) { switch (status) { case TXN_UNCOMMITTED: return "uncommitted"; case TXN_UNCHANGED: return "unchanged"; case TXN_INCOMPLETE: return "incomplete"; case TXN_ABORTED: return "aborted"; case TXN_SUCCESS: return "success"; case TXN_TRY_AGAIN: return "try again"; case TXN_NOT_LOCKED: return "not locked"; case TXN_ERROR: return "error"; } return ""; } /* Starts a new transaction on 'idl'. A given ovsdb_idl may only have a single * active transaction at a time. See the large comment in ovsdb-idl.h for * general information on transactions. */ struct ovsdb_idl_txn * ovsdb_idl_txn_create(struct ovsdb_idl *idl) { struct ovsdb_idl_txn *txn; ovs_assert(!idl->txn); idl->txn = txn = xmalloc(sizeof *txn); txn->request_id = NULL; txn->idl = idl; hmap_init(&txn->txn_rows); txn->status = TXN_UNCOMMITTED; txn->error = NULL; txn->dry_run = false; ds_init(&txn->comment); txn->commit_seqno = txn->idl->change_seqno; txn->inc_table = NULL; txn->inc_column = NULL; hmap_init(&txn->inserted_rows); return txn; } /* Appends 's', which is treated as a printf()-type format string, to the * comments that will be passed to the OVSDB server when 'txn' is committed. * (The comment will be committed to the OVSDB log, which "ovsdb-tool * show-log" can print in a relatively human-readable form.) */ void ovsdb_idl_txn_add_comment(struct ovsdb_idl_txn *txn, const char *s, ...) { va_list args; if (txn->comment.length) { ds_put_char(&txn->comment, '\n'); } va_start(args, s); ds_put_format_valist(&txn->comment, s, args); va_end(args); } /* Marks 'txn' as a transaction that will not actually modify the database. In * almost every way, the transaction is treated like other transactions. It * must be committed or aborted like other transactions, it will be sent to the * database server like other transactions, and so on. The only difference is * that the operations sent to the database server will include, as the last * step, an "abort" operation, so that any changes made by the transaction will * not actually take effect. */ void ovsdb_idl_txn_set_dry_run(struct ovsdb_idl_txn *txn) { txn->dry_run = true; } /* Causes 'txn', when committed, to increment the value of 'column' within * 'row' by 1. 'column' must have an integer type. After 'txn' commits * successfully, the client may retrieve the final (incremented) value of * 'column' with ovsdb_idl_txn_get_increment_new_value(). * * The client could accomplish something similar with ovsdb_idl_read(), * ovsdb_idl_txn_verify() and ovsdb_idl_txn_write(), or with ovsdb-idlc * generated wrappers for these functions. However, ovsdb_idl_txn_increment() * will never (by itself) fail because of a verify error. * * The intended use is for incrementing the "next_cfg" column in the * Open_vSwitch table. */ void ovsdb_idl_txn_increment(struct ovsdb_idl_txn *txn, const struct ovsdb_idl_row *row, const struct ovsdb_idl_column *column) { ovs_assert(!txn->inc_table); ovs_assert(column->type.key.type == OVSDB_TYPE_INTEGER); ovs_assert(column->type.value.type == OVSDB_TYPE_VOID); txn->inc_table = row->table->class->name; txn->inc_column = column->name; txn->inc_row = row->uuid; } /* Destroys 'txn' and frees all associated memory. If ovsdb_idl_txn_commit() * has been called for 'txn' but the commit is still incomplete (that is, the * last call returned TXN_INCOMPLETE) then the transaction may or may not still * end up committing at the database server, but the client will not be able to * get any further status information back. */ void ovsdb_idl_txn_destroy(struct ovsdb_idl_txn *txn) { struct ovsdb_idl_txn_insert *insert, *next; json_destroy(txn->request_id); if (txn->status == TXN_INCOMPLETE) { hmap_remove(&txn->idl->outstanding_txns, &txn->hmap_node); } ovsdb_idl_txn_abort(txn); ds_destroy(&txn->comment); free(txn->error); HMAP_FOR_EACH_SAFE (insert, next, hmap_node, &txn->inserted_rows) { free(insert); } hmap_destroy(&txn->inserted_rows); free(txn); } /* Causes poll_block() to wake up if 'txn' has completed committing. */ void ovsdb_idl_txn_wait(const struct ovsdb_idl_txn *txn) { if (txn->status != TXN_UNCOMMITTED && txn->status != TXN_INCOMPLETE) { poll_immediate_wake(); } } static struct json * where_uuid_equals(const struct uuid *uuid) { return json_array_create_1( json_array_create_3( json_string_create("_uuid"), json_string_create("=="), json_array_create_2( json_string_create("uuid"), json_string_create_nocopy( xasprintf(UUID_FMT, UUID_ARGS(uuid)))))); } static char * uuid_name_from_uuid(const struct uuid *uuid) { char *name; char *p; name = xasprintf("row"UUID_FMT, UUID_ARGS(uuid)); for (p = name; *p != '\0'; p++) { if (*p == '-') { *p = '_'; } } return name; } static const struct ovsdb_idl_row * ovsdb_idl_txn_get_row(const struct ovsdb_idl_txn *txn, const struct uuid *uuid) { const struct ovsdb_idl_row *row; HMAP_FOR_EACH_WITH_HASH (row, txn_node, uuid_hash(uuid), &txn->txn_rows) { if (uuid_equals(&row->uuid, uuid)) { return row; } } return NULL; } /* XXX there must be a cleaner way to do this */ static struct json * substitute_uuids(struct json *json, const struct ovsdb_idl_txn *txn) { if (json->type == JSON_ARRAY) { struct uuid uuid; size_t i; if (json->u.array.n == 2 && json->u.array.elems[0]->type == JSON_STRING && json->u.array.elems[1]->type == JSON_STRING && !strcmp(json->u.array.elems[0]->u.string, "uuid") && uuid_from_string(&uuid, json->u.array.elems[1]->u.string)) { const struct ovsdb_idl_row *row; row = ovsdb_idl_txn_get_row(txn, &uuid); if (row && !row->old && row->new) { json_destroy(json); return json_array_create_2( json_string_create("named-uuid"), json_string_create_nocopy(uuid_name_from_uuid(&uuid))); } } for (i = 0; i < json->u.array.n; i++) { json->u.array.elems[i] = substitute_uuids(json->u.array.elems[i], txn); } } else if (json->type == JSON_OBJECT) { struct shash_node *node; SHASH_FOR_EACH (node, json_object(json)) { node->data = substitute_uuids(node->data, txn); } } return json; } static void ovsdb_idl_txn_disassemble(struct ovsdb_idl_txn *txn) { struct ovsdb_idl_row *row, *next; /* This must happen early. Otherwise, ovsdb_idl_row_parse() will call an * ovsdb_idl_column's 'parse' function, which will call * ovsdb_idl_get_row_arc(), which will seen that the IDL is in a * transaction and fail to update the graph. */ txn->idl->txn = NULL; HMAP_FOR_EACH_SAFE (row, next, txn_node, &txn->txn_rows) { if (row->old) { if (row->written) { ovsdb_idl_row_unparse(row); ovsdb_idl_row_clear_arcs(row, false); ovsdb_idl_row_parse(row); } } else { ovsdb_idl_row_unparse(row); } ovsdb_idl_row_clear_new(row); free(row->prereqs); row->prereqs = NULL; free(row->written); row->written = NULL; hmap_remove(&txn->txn_rows, &row->txn_node); hmap_node_nullify(&row->txn_node); if (!row->old) { hmap_remove(&row->table->rows, &row->hmap_node); free(row); } } hmap_destroy(&txn->txn_rows); hmap_init(&txn->txn_rows); } /* Attempts to commit 'txn'. Returns the status of the commit operation, one * of the following TXN_* constants: * * TXN_INCOMPLETE: * * The transaction is in progress, but not yet complete. The caller * should call again later, after calling ovsdb_idl_run() to let the IDL * do OVSDB protocol processing. * * TXN_UNCHANGED: * * The transaction is complete. (It didn't actually change the database, * so the IDL didn't send any request to the database server.) * * TXN_ABORTED: * * The caller previously called ovsdb_idl_txn_abort(). * * TXN_SUCCESS: * * The transaction was successful. The update made by the transaction * (and possibly other changes made by other database clients) should * already be visible in the IDL. * * TXN_TRY_AGAIN: * * The transaction failed for some transient reason, e.g. because a * "verify" operation reported an inconsistency or due to a network * problem. The caller should wait for a change to the database, then * compose a new transaction, and commit the new transaction. * * Use the return value of ovsdb_idl_get_seqno() to wait for a change in * the database. It is important to use its return value *before* the * initial call to ovsdb_idl_txn_commit() as the baseline for this * purpose, because the change that one should wait for can happen after * the initial call but before the call that returns TXN_TRY_AGAIN, and * using some other baseline value in that situation could cause an * indefinite wait if the database rarely changes. * * TXN_NOT_LOCKED: * * The transaction failed because the IDL has been configured to require * a database lock (with ovsdb_idl_set_lock()) but didn't get it yet or * has already lost it. * * Committing a transaction rolls back all of the changes that it made to the * IDL's copy of the database. If the transaction commits successfully, then * the database server will send an update and, thus, the IDL will be updated * with the committed changes. */ enum ovsdb_idl_txn_status ovsdb_idl_txn_commit(struct ovsdb_idl_txn *txn) { struct ovsdb_idl_row *row; struct json *operations; bool any_updates; if (txn != txn->idl->txn) { return txn->status; } /* If we need a lock but don't have it, give up quickly. */ if (txn->idl->lock_name && !ovsdb_idl_has_lock(txn->idl)) { txn->status = TXN_NOT_LOCKED; ovsdb_idl_txn_disassemble(txn); return txn->status; } operations = json_array_create_1( json_string_create(txn->idl->class->database)); /* Assert that we have the required lock (avoiding a race). */ if (txn->idl->lock_name) { struct json *op = json_object_create(); json_array_add(operations, op); json_object_put_string(op, "op", "assert"); json_object_put_string(op, "lock", txn->idl->lock_name); } /* Add prerequisites and declarations of new rows. */ HMAP_FOR_EACH (row, txn_node, &txn->txn_rows) { /* XXX check that deleted rows exist even if no prereqs? */ if (row->prereqs) { const struct ovsdb_idl_table_class *class = row->table->class; size_t n_columns = class->n_columns; struct json *op, *columns, *row_json; size_t idx; op = json_object_create(); json_array_add(operations, op); json_object_put_string(op, "op", "wait"); json_object_put_string(op, "table", class->name); json_object_put(op, "timeout", json_integer_create(0)); json_object_put(op, "where", where_uuid_equals(&row->uuid)); json_object_put_string(op, "until", "=="); columns = json_array_create_empty(); json_object_put(op, "columns", columns); row_json = json_object_create(); json_object_put(op, "rows", json_array_create_1(row_json)); BITMAP_FOR_EACH_1 (idx, n_columns, row->prereqs) { const struct ovsdb_idl_column *column = &class->columns[idx]; json_array_add(columns, json_string_create(column->name)); json_object_put(row_json, column->name, ovsdb_datum_to_json(&row->old[idx], &column->type)); } } } /* Add updates. */ any_updates = false; HMAP_FOR_EACH (row, txn_node, &txn->txn_rows) { const struct ovsdb_idl_table_class *class = row->table->class; if (!row->new) { if (class->is_root) { struct json *op = json_object_create(); json_object_put_string(op, "op", "delete"); json_object_put_string(op, "table", class->name); json_object_put(op, "where", where_uuid_equals(&row->uuid)); json_array_add(operations, op); any_updates = true; } else { /* Let ovsdb-server decide whether to really delete it. */ } } else if (row->old != row->new) { struct json *row_json; struct json *op; size_t idx; op = json_object_create(); json_object_put_string(op, "op", row->old ? "update" : "insert"); json_object_put_string(op, "table", class->name); if (row->old) { json_object_put(op, "where", where_uuid_equals(&row->uuid)); } else { struct ovsdb_idl_txn_insert *insert; any_updates = true; json_object_put(op, "uuid-name", json_string_create_nocopy( uuid_name_from_uuid(&row->uuid))); insert = xmalloc(sizeof *insert); insert->dummy = row->uuid; insert->op_index = operations->u.array.n - 1; uuid_zero(&insert->real); hmap_insert(&txn->inserted_rows, &insert->hmap_node, uuid_hash(&insert->dummy)); } row_json = json_object_create(); json_object_put(op, "row", row_json); if (row->written) { BITMAP_FOR_EACH_1 (idx, class->n_columns, row->written) { const struct ovsdb_idl_column *column = &class->columns[idx]; if (row->old || !ovsdb_datum_is_default(&row->new[idx], &column->type)) { json_object_put(row_json, column->name, substitute_uuids( ovsdb_datum_to_json(&row->new[idx], &column->type), txn)); /* If anything really changed, consider it an update. * We can't suppress not-really-changed values earlier * or transactions would become nonatomic (see the big * comment inside ovsdb_idl_txn_write()). */ if (!any_updates && row->old && !ovsdb_datum_equals(&row->old[idx], &row->new[idx], &column->type)) { any_updates = true; } } } } if (!row->old || !shash_is_empty(json_object(row_json))) { json_array_add(operations, op); } else { json_destroy(op); } } } /* Add increment. */ if (txn->inc_table && any_updates) { struct json *op; txn->inc_index = operations->u.array.n - 1; op = json_object_create(); json_object_put_string(op, "op", "mutate"); json_object_put_string(op, "table", txn->inc_table); json_object_put(op, "where", substitute_uuids(where_uuid_equals(&txn->inc_row), txn)); json_object_put(op, "mutations", json_array_create_1( json_array_create_3( json_string_create(txn->inc_column), json_string_create("+="), json_integer_create(1)))); json_array_add(operations, op); op = json_object_create(); json_object_put_string(op, "op", "select"); json_object_put_string(op, "table", txn->inc_table); json_object_put(op, "where", substitute_uuids(where_uuid_equals(&txn->inc_row), txn)); json_object_put(op, "columns", json_array_create_1(json_string_create( txn->inc_column))); json_array_add(operations, op); } if (txn->comment.length) { struct json *op = json_object_create(); json_object_put_string(op, "op", "comment"); json_object_put_string(op, "comment", ds_cstr(&txn->comment)); json_array_add(operations, op); } if (txn->dry_run) { struct json *op = json_object_create(); json_object_put_string(op, "op", "abort"); json_array_add(operations, op); } if (!any_updates) { txn->status = TXN_UNCHANGED; json_destroy(operations); } else if (!jsonrpc_session_send( txn->idl->session, jsonrpc_create_request( "transact", operations, &txn->request_id))) { hmap_insert(&txn->idl->outstanding_txns, &txn->hmap_node, json_hash(txn->request_id, 0)); txn->status = TXN_INCOMPLETE; } else { txn->status = TXN_TRY_AGAIN; } ovsdb_idl_txn_disassemble(txn); return txn->status; } /* Attempts to commit 'txn', blocking until the commit either succeeds or * fails. Returns the final commit status, which may be any TXN_* value other * than TXN_INCOMPLETE. * * This function calls ovsdb_idl_run() on 'txn''s IDL, so it may cause the * return value of ovsdb_idl_get_seqno() to change. */ enum ovsdb_idl_txn_status ovsdb_idl_txn_commit_block(struct ovsdb_idl_txn *txn) { enum ovsdb_idl_txn_status status; fatal_signal_run(); while ((status = ovsdb_idl_txn_commit(txn)) == TXN_INCOMPLETE) { ovsdb_idl_run(txn->idl); ovsdb_idl_wait(txn->idl); ovsdb_idl_txn_wait(txn); poll_block(); } return status; } /* Returns the final (incremented) value of the column in 'txn' that was set to * be incremented by ovsdb_idl_txn_increment(). 'txn' must have committed * successfully. */ int64_t ovsdb_idl_txn_get_increment_new_value(const struct ovsdb_idl_txn *txn) { ovs_assert(txn->status == TXN_SUCCESS); return txn->inc_new_value; } /* Aborts 'txn' without sending it to the database server. This is effective * only if ovsdb_idl_txn_commit() has not yet been called for 'txn'. * Otherwise, it has no effect. * * Aborting a transaction doesn't free its memory. Use * ovsdb_idl_txn_destroy() to do that. */ void ovsdb_idl_txn_abort(struct ovsdb_idl_txn *txn) { ovsdb_idl_txn_disassemble(txn); if (txn->status == TXN_UNCOMMITTED || txn->status == TXN_INCOMPLETE) { txn->status = TXN_ABORTED; } } /* Returns a string that reports the error status for 'txn'. The caller must * not modify or free the returned string. A call to ovsdb_idl_txn_destroy() * for 'txn' may free the returned string. * * The return value is ordinarily one of the strings that * ovsdb_idl_txn_status_to_string() would return, but if the transaction failed * due to an error reported by the database server, the return value is that * error. */ const char * ovsdb_idl_txn_get_error(const struct ovsdb_idl_txn *txn) { if (txn->status != TXN_ERROR) { return ovsdb_idl_txn_status_to_string(txn->status); } else if (txn->error) { return txn->error; } else { return "no error details available"; } } static void ovsdb_idl_txn_set_error_json(struct ovsdb_idl_txn *txn, const struct json *json) { if (txn->error == NULL) { txn->error = json_to_string(json, JSSF_SORT); } } /* For transaction 'txn' that completed successfully, finds and returns the * permanent UUID that the database assigned to a newly inserted row, given the * 'uuid' that ovsdb_idl_txn_insert() assigned locally to that row. * * Returns NULL if 'uuid' is not a UUID assigned by ovsdb_idl_txn_insert() or * if it was assigned by that function and then deleted by * ovsdb_idl_txn_delete() within the same transaction. (Rows that are inserted * and then deleted within a single transaction are never sent to the database * server, so it never assigns them a permanent UUID.) */ const struct uuid * ovsdb_idl_txn_get_insert_uuid(const struct ovsdb_idl_txn *txn, const struct uuid *uuid) { const struct ovsdb_idl_txn_insert *insert; ovs_assert(txn->status == TXN_SUCCESS || txn->status == TXN_UNCHANGED); HMAP_FOR_EACH_IN_BUCKET (insert, hmap_node, uuid_hash(uuid), &txn->inserted_rows) { if (uuid_equals(uuid, &insert->dummy)) { return &insert->real; } } return NULL; } static void ovsdb_idl_txn_complete(struct ovsdb_idl_txn *txn, enum ovsdb_idl_txn_status status) { txn->status = status; hmap_remove(&txn->idl->outstanding_txns, &txn->hmap_node); } /* Writes 'datum' to the specified 'column' in 'row_'. Updates both 'row_' * itself and the structs derived from it (e.g. the "struct ovsrec_*", for * ovs-vswitchd). * * 'datum' must have the correct type for its column. The IDL does not check * that it meets schema constraints, but ovsdb-server will do so at commit time * so it had better be correct. * * A transaction must be in progress. Replication of 'column' must not have * been disabled (by calling ovsdb_idl_omit()). * * Usually this function is used indirectly through one of the "set" functions * generated by ovsdb-idlc. * * Takes ownership of what 'datum' points to (and in some cases destroys that * data before returning) but makes a copy of 'datum' itself. (Commonly * 'datum' is on the caller's stack.) */ static void ovsdb_idl_txn_write__(const struct ovsdb_idl_row *row_, const struct ovsdb_idl_column *column, struct ovsdb_datum *datum, bool owns_datum) { struct ovsdb_idl_row *row = CONST_CAST(struct ovsdb_idl_row *, row_); const struct ovsdb_idl_table_class *class; size_t column_idx; bool write_only; if (ovsdb_idl_row_is_synthetic(row)) { goto discard_datum; } class = row->table->class; column_idx = column - class->columns; write_only = row->table->modes[column_idx] == OVSDB_IDL_MONITOR; ovs_assert(row->new != NULL); ovs_assert(column_idx < class->n_columns); ovs_assert(row->old == NULL || row->table->modes[column_idx] & OVSDB_IDL_MONITOR); if (row->table->idl->verify_write_only && !write_only) { VLOG_ERR("Bug: Attempt to write to a read/write column (%s:%s) when" " explicitly configured not to.", class->name, column->name); goto discard_datum; } /* If this is a write-only column and the datum being written is the same * as the one already there, just skip the update entirely. This is worth * optimizing because we have a lot of columns that get periodically * refreshed into the database but don't actually change that often. * * We don't do this for read/write columns because that would break * atomicity of transactions--some other client might have written a * different value in that column since we read it. (But if a whole * transaction only does writes of existing values, without making any real * changes, we will drop the whole transaction later in * ovsdb_idl_txn_commit().) */ if (write_only && ovsdb_datum_equals(ovsdb_idl_read(row, column), datum, &column->type)) { goto discard_datum; } if (hmap_node_is_null(&row->txn_node)) { hmap_insert(&row->table->idl->txn->txn_rows, &row->txn_node, uuid_hash(&row->uuid)); } if (row->old == row->new) { row->new = xmalloc(class->n_columns * sizeof *row->new); } if (!row->written) { row->written = bitmap_allocate(class->n_columns); } if (bitmap_is_set(row->written, column_idx)) { ovsdb_datum_destroy(&row->new[column_idx], &column->type); } else { bitmap_set1(row->written, column_idx); } if (owns_datum) { row->new[column_idx] = *datum; } else { ovsdb_datum_clone(&row->new[column_idx], datum, &column->type); } (column->unparse)(row); (column->parse)(row, &row->new[column_idx]); return; discard_datum: if (owns_datum) { ovsdb_datum_destroy(datum, &column->type); } } void ovsdb_idl_txn_write(const struct ovsdb_idl_row *row, const struct ovsdb_idl_column *column, struct ovsdb_datum *datum) { ovsdb_idl_txn_write__(row, column, datum, true); } void ovsdb_idl_txn_write_clone(const struct ovsdb_idl_row *row, const struct ovsdb_idl_column *column, const struct ovsdb_datum *datum) { ovsdb_idl_txn_write__(row, column, CONST_CAST(struct ovsdb_datum *, datum), false); } /* Causes the original contents of 'column' in 'row_' to be verified as a * prerequisite to completing the transaction. That is, if 'column' in 'row_' * changed (or if 'row_' was deleted) between the time that the IDL originally * read its contents and the time that the transaction commits, then the * transaction aborts and ovsdb_idl_txn_commit() returns TXN_AGAIN_WAIT or * TXN_AGAIN_NOW (depending on whether the database change has already been * received). * * The intention is that, to ensure that no transaction commits based on dirty * reads, an application should call ovsdb_idl_txn_verify() on each data item * read as part of a read-modify-write operation. * * In some cases ovsdb_idl_txn_verify() reduces to a no-op, because the current * value of 'column' is already known: * * - If 'row_' is a row created by the current transaction (returned by * ovsdb_idl_txn_insert()). * * - If 'column' has already been modified (with ovsdb_idl_txn_write()) * within the current transaction. * * Because of the latter property, always call ovsdb_idl_txn_verify() *before* * ovsdb_idl_txn_write() for a given read-modify-write. * * A transaction must be in progress. * * Usually this function is used indirectly through one of the "verify" * functions generated by ovsdb-idlc. */ void ovsdb_idl_txn_verify(const struct ovsdb_idl_row *row_, const struct ovsdb_idl_column *column) { struct ovsdb_idl_row *row = CONST_CAST(struct ovsdb_idl_row *, row_); const struct ovsdb_idl_table_class *class; size_t column_idx; if (ovsdb_idl_row_is_synthetic(row)) { return; } class = row->table->class; column_idx = column - class->columns; ovs_assert(row->new != NULL); ovs_assert(row->old == NULL || row->table->modes[column_idx] & OVSDB_IDL_MONITOR); if (!row->old || (row->written && bitmap_is_set(row->written, column_idx))) { return; } if (hmap_node_is_null(&row->txn_node)) { hmap_insert(&row->table->idl->txn->txn_rows, &row->txn_node, uuid_hash(&row->uuid)); } if (!row->prereqs) { row->prereqs = bitmap_allocate(class->n_columns); } bitmap_set1(row->prereqs, column_idx); } /* Deletes 'row_' from its table. May free 'row_', so it must not be * accessed afterward. * * A transaction must be in progress. * * Usually this function is used indirectly through one of the "delete" * functions generated by ovsdb-idlc. */ void ovsdb_idl_txn_delete(const struct ovsdb_idl_row *row_) { struct ovsdb_idl_row *row = CONST_CAST(struct ovsdb_idl_row *, row_); if (ovsdb_idl_row_is_synthetic(row)) { return; } ovs_assert(row->new != NULL); if (!row->old) { ovsdb_idl_row_unparse(row); ovsdb_idl_row_clear_new(row); ovs_assert(!row->prereqs); hmap_remove(&row->table->rows, &row->hmap_node); hmap_remove(&row->table->idl->txn->txn_rows, &row->txn_node); free(row); return; } if (hmap_node_is_null(&row->txn_node)) { hmap_insert(&row->table->idl->txn->txn_rows, &row->txn_node, uuid_hash(&row->uuid)); } ovsdb_idl_row_clear_new(row); row->new = NULL; } /* Inserts and returns a new row in the table with the specified 'class' in the * database with open transaction 'txn'. * * The new row is assigned a provisional UUID. If 'uuid' is null then one is * randomly generated; otherwise 'uuid' should specify a randomly generated * UUID not otherwise in use. ovsdb-server will assign a different UUID when * 'txn' is committed, but the IDL will replace any uses of the provisional * UUID in the data to be to be committed by the UUID assigned by * ovsdb-server. * * Usually this function is used indirectly through one of the "insert" * functions generated by ovsdb-idlc. */ const struct ovsdb_idl_row * ovsdb_idl_txn_insert(struct ovsdb_idl_txn *txn, const struct ovsdb_idl_table_class *class, const struct uuid *uuid) { struct ovsdb_idl_row *row = ovsdb_idl_row_create__(class); if (uuid) { ovs_assert(!ovsdb_idl_txn_get_row(txn, uuid)); row->uuid = *uuid; } else { uuid_generate(&row->uuid); } row->table = ovsdb_idl_table_from_class(txn->idl, class); row->new = xmalloc(class->n_columns * sizeof *row->new); hmap_insert(&row->table->rows, &row->hmap_node, uuid_hash(&row->uuid)); hmap_insert(&txn->txn_rows, &row->txn_node, uuid_hash(&row->uuid)); return row; } static void ovsdb_idl_txn_abort_all(struct ovsdb_idl *idl) { struct ovsdb_idl_txn *txn; HMAP_FOR_EACH (txn, hmap_node, &idl->outstanding_txns) { ovsdb_idl_txn_complete(txn, TXN_TRY_AGAIN); } } static struct ovsdb_idl_txn * ovsdb_idl_txn_find(struct ovsdb_idl *idl, const struct json *id) { struct ovsdb_idl_txn *txn; HMAP_FOR_EACH_WITH_HASH (txn, hmap_node, json_hash(id, 0), &idl->outstanding_txns) { if (json_equal(id, txn->request_id)) { return txn; } } return NULL; } static bool check_json_type(const struct json *json, enum json_type type, const char *name) { if (!json) { VLOG_WARN_RL(&syntax_rl, "%s is missing", name); return false; } else if (json->type != type) { VLOG_WARN_RL(&syntax_rl, "%s is %s instead of %s", name, json_type_to_string(json->type), json_type_to_string(type)); return false; } else { return true; } } static bool ovsdb_idl_txn_process_inc_reply(struct ovsdb_idl_txn *txn, const struct json_array *results) { struct json *count, *rows, *row, *column; struct shash *mutate, *select; if (txn->inc_index + 2 > results->n) { VLOG_WARN_RL(&syntax_rl, "reply does not contain enough operations " "for increment (has %zu, needs %u)", results->n, txn->inc_index + 2); return false; } /* We know that this is a JSON object because the loop in * ovsdb_idl_txn_process_reply() checked. */ mutate = json_object(results->elems[txn->inc_index]); count = shash_find_data(mutate, "count"); if (!check_json_type(count, JSON_INTEGER, "\"mutate\" reply \"count\"")) { return false; } if (count->u.integer != 1) { VLOG_WARN_RL(&syntax_rl, "\"mutate\" reply \"count\" is %lld instead of 1", count->u.integer); return false; } select = json_object(results->elems[txn->inc_index + 1]); rows = shash_find_data(select, "rows"); if (!check_json_type(rows, JSON_ARRAY, "\"select\" reply \"rows\"")) { return false; } if (rows->u.array.n != 1) { VLOG_WARN_RL(&syntax_rl, "\"select\" reply \"rows\" has %zu elements " "instead of 1", rows->u.array.n); return false; } row = rows->u.array.elems[0]; if (!check_json_type(row, JSON_OBJECT, "\"select\" reply row")) { return false; } column = shash_find_data(json_object(row), txn->inc_column); if (!check_json_type(column, JSON_INTEGER, "\"select\" reply inc column")) { return false; } txn->inc_new_value = column->u.integer; return true; } static bool ovsdb_idl_txn_process_insert_reply(struct ovsdb_idl_txn_insert *insert, const struct json_array *results) { static const struct ovsdb_base_type uuid_type = OVSDB_BASE_UUID_INIT; struct ovsdb_error *error; struct json *json_uuid; union ovsdb_atom uuid; struct shash *reply; if (insert->op_index >= results->n) { VLOG_WARN_RL(&syntax_rl, "reply does not contain enough operations " "for insert (has %zu, needs %u)", results->n, insert->op_index); return false; } /* We know that this is a JSON object because the loop in * ovsdb_idl_txn_process_reply() checked. */ reply = json_object(results->elems[insert->op_index]); json_uuid = shash_find_data(reply, "uuid"); if (!check_json_type(json_uuid, JSON_ARRAY, "\"insert\" reply \"uuid\"")) { return false; } error = ovsdb_atom_from_json(&uuid, &uuid_type, json_uuid, NULL); if (error) { char *s = ovsdb_error_to_string(error); VLOG_WARN_RL(&syntax_rl, "\"insert\" reply \"uuid\" is not a JSON " "UUID: %s", s); free(s); ovsdb_error_destroy(error); return false; } insert->real = uuid.uuid; return true; } static bool ovsdb_idl_txn_process_reply(struct ovsdb_idl *idl, const struct jsonrpc_msg *msg) { struct ovsdb_idl_txn *txn; enum ovsdb_idl_txn_status status; txn = ovsdb_idl_txn_find(idl, msg->id); if (!txn) { return false; } if (msg->type == JSONRPC_ERROR) { status = TXN_ERROR; } else if (msg->result->type != JSON_ARRAY) { VLOG_WARN_RL(&syntax_rl, "reply to \"transact\" is not JSON array"); status = TXN_ERROR; } else { struct json_array *ops = &msg->result->u.array; int hard_errors = 0; int soft_errors = 0; int lock_errors = 0; size_t i; for (i = 0; i < ops->n; i++) { struct json *op = ops->elems[i]; if (op->type == JSON_NULL) { /* This isn't an error in itself but indicates that some prior * operation failed, so make sure that we know about it. */ soft_errors++; } else if (op->type == JSON_OBJECT) { struct json *error; error = shash_find_data(json_object(op), "error"); if (error) { if (error->type == JSON_STRING) { if (!strcmp(error->u.string, "timed out")) { soft_errors++; } else if (!strcmp(error->u.string, "not owner")) { lock_errors++; } else if (strcmp(error->u.string, "aborted")) { hard_errors++; ovsdb_idl_txn_set_error_json(txn, op); } } else { hard_errors++; ovsdb_idl_txn_set_error_json(txn, op); VLOG_WARN_RL(&syntax_rl, "\"error\" in reply is not JSON string"); } } } else { hard_errors++; ovsdb_idl_txn_set_error_json(txn, op); VLOG_WARN_RL(&syntax_rl, "operation reply is not JSON null or object"); } } if (!soft_errors && !hard_errors && !lock_errors) { struct ovsdb_idl_txn_insert *insert; if (txn->inc_table && !ovsdb_idl_txn_process_inc_reply(txn, ops)) { hard_errors++; } HMAP_FOR_EACH (insert, hmap_node, &txn->inserted_rows) { if (!ovsdb_idl_txn_process_insert_reply(insert, ops)) { hard_errors++; } } } status = (hard_errors ? TXN_ERROR : lock_errors ? TXN_NOT_LOCKED : soft_errors ? TXN_TRY_AGAIN : TXN_SUCCESS); } ovsdb_idl_txn_complete(txn, status); return true; } /* Returns the transaction currently active for 'row''s IDL. A transaction * must currently be active. */ struct ovsdb_idl_txn * ovsdb_idl_txn_get(const struct ovsdb_idl_row *row) { struct ovsdb_idl_txn *txn = row->table->idl->txn; ovs_assert(txn != NULL); return txn; } /* Returns the IDL on which 'txn' acts. */ struct ovsdb_idl * ovsdb_idl_txn_get_idl (struct ovsdb_idl_txn *txn) { return txn->idl; } /* If 'lock_name' is nonnull, configures 'idl' to obtain the named lock from * the database server and to avoid modifying the database when the lock cannot * be acquired (that is, when another client has the same lock). * * If 'lock_name' is NULL, drops the locking requirement and releases the * lock. */ void ovsdb_idl_set_lock(struct ovsdb_idl *idl, const char *lock_name) { ovs_assert(!idl->txn); ovs_assert(hmap_is_empty(&idl->outstanding_txns)); if (idl->lock_name && (!lock_name || strcmp(lock_name, idl->lock_name))) { /* Release previous lock. */ ovsdb_idl_send_unlock_request(idl); free(idl->lock_name); idl->lock_name = NULL; idl->is_lock_contended = false; } if (lock_name && !idl->lock_name) { /* Acquire new lock. */ idl->lock_name = xstrdup(lock_name); ovsdb_idl_send_lock_request(idl); } } /* Returns true if 'idl' is configured to obtain a lock and owns that lock. * * Locking and unlocking happens asynchronously from the database client's * point of view, so the information is only useful for optimization (e.g. if * the client doesn't have the lock then there's no point in trying to write to * the database). */ bool ovsdb_idl_has_lock(const struct ovsdb_idl *idl) { return idl->has_lock; } /* Returns true if 'idl' is configured to obtain a lock but the database server * has indicated that some other client already owns the requested lock. */ bool ovsdb_idl_is_lock_contended(const struct ovsdb_idl *idl) { return idl->is_lock_contended; } static void ovsdb_idl_update_has_lock(struct ovsdb_idl *idl, bool new_has_lock) { if (new_has_lock && !idl->has_lock) { if (!idl->monitor_request_id) { idl->change_seqno++; } else { /* We're waiting for a monitor reply, so don't signal that the * database changed. The monitor reply will increment change_seqno * anyhow. */ } idl->is_lock_contended = false; } idl->has_lock = new_has_lock; } static void ovsdb_idl_send_lock_request__(struct ovsdb_idl *idl, const char *method, struct json **idp) { ovsdb_idl_update_has_lock(idl, false); json_destroy(idl->lock_request_id); idl->lock_request_id = NULL; if (jsonrpc_session_is_connected(idl->session)) { struct json *params; params = json_array_create_1(json_string_create(idl->lock_name)); jsonrpc_session_send(idl->session, jsonrpc_create_request(method, params, idp)); } } static void ovsdb_idl_send_lock_request(struct ovsdb_idl *idl) { ovsdb_idl_send_lock_request__(idl, "lock", &idl->lock_request_id); } static void ovsdb_idl_send_unlock_request(struct ovsdb_idl *idl) { ovsdb_idl_send_lock_request__(idl, "unlock", NULL); } static void ovsdb_idl_parse_lock_reply(struct ovsdb_idl *idl, const struct json *result) { bool got_lock; json_destroy(idl->lock_request_id); idl->lock_request_id = NULL; if (result->type == JSON_OBJECT) { const struct json *locked; locked = shash_find_data(json_object(result), "locked"); got_lock = locked && locked->type == JSON_TRUE; } else { got_lock = false; } ovsdb_idl_update_has_lock(idl, got_lock); if (!got_lock) { idl->is_lock_contended = true; } } static void ovsdb_idl_parse_lock_notify(struct ovsdb_idl *idl, const struct json *params, bool new_has_lock) { if (idl->lock_name && params->type == JSON_ARRAY && json_array(params)->n > 0 && json_array(params)->elems[0]->type == JSON_STRING) { const char *lock_name = json_string(json_array(params)->elems[0]); if (!strcmp(idl->lock_name, lock_name)) { ovsdb_idl_update_has_lock(idl, new_has_lock); if (!new_has_lock) { idl->is_lock_contended = true; } } } } openvswitch-2.0.1+git20140120/lib/ovsdb-idl.h000066400000000000000000000235221226605124000202600ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_IDL_H #define OVSDB_IDL_H 1 /* Open vSwitch Database Interface Definition Language (OVSDB IDL). * * The OVSDB IDL maintains an in-memory replica of a database. It issues RPC * requests to an OVSDB database server and parses the responses, converting * raw JSON into data structures that are easier for clients to digest. Most * notably, references to rows via UUID become C pointers. * * The IDL also assists with issuing database transactions. The client creates * a transaction, manipulates the IDL data structures, and commits or aborts * the transaction. The IDL then composes and issues the necessary JSON-RPC * requests and reports to the client whether the transaction completed * successfully. */ #include #include #include "compiler.h" #include "ovsdb-types.h" struct json; struct ovsdb_datum; struct ovsdb_idl_class; struct ovsdb_idl_column; struct ovsdb_idl_table_class; struct uuid; struct ovsdb_idl *ovsdb_idl_create(const char *remote, const struct ovsdb_idl_class *, bool monitor_everything_by_default, bool retry); void ovsdb_idl_destroy(struct ovsdb_idl *); void ovsdb_idl_run(struct ovsdb_idl *); void ovsdb_idl_wait(struct ovsdb_idl *); void ovsdb_idl_set_lock(struct ovsdb_idl *, const char *lock_name); bool ovsdb_idl_has_lock(const struct ovsdb_idl *); bool ovsdb_idl_is_lock_contended(const struct ovsdb_idl *); unsigned int ovsdb_idl_get_seqno(const struct ovsdb_idl *); bool ovsdb_idl_has_ever_connected(const struct ovsdb_idl *); void ovsdb_idl_force_reconnect(struct ovsdb_idl *); void ovsdb_idl_verify_write_only(struct ovsdb_idl *); bool ovsdb_idl_is_alive(const struct ovsdb_idl *); int ovsdb_idl_get_last_error(const struct ovsdb_idl *); /* Choosing columns and tables to replicate. */ /* Modes with which the IDL can monitor a column. * * If no bits are set, the column is not monitored at all. Its value will * always appear to the client to be the default value for its type. * * If OVSDB_IDL_MONITOR is set, then the column is replicated. Its value will * reflect the value in the database. If OVSDB_IDL_ALERT is also set, then the * value returned by ovsdb_idl_get_seqno() will change when the column's value * changes. * * The possible mode combinations are: * * - 0, for a column that a client doesn't care about. * * - (OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT), for a column that a client wants * to track and possibly update. * * - OVSDB_IDL_MONITOR, for columns that a client treats as "write-only", * that is, it updates them but doesn't want to get alerted about its own * updates. It also won't be alerted about other clients' updates, so this * is suitable only for use by a client that "owns" a particular column. * * - OVDSB_IDL_ALERT without OVSDB_IDL_MONITOR is not valid. */ #define OVSDB_IDL_MONITOR (1 << 0) /* Monitor this column? */ #define OVSDB_IDL_ALERT (1 << 1) /* Alert client when column updated? */ void ovsdb_idl_add_column(struct ovsdb_idl *, const struct ovsdb_idl_column *); void ovsdb_idl_add_table(struct ovsdb_idl *, const struct ovsdb_idl_table_class *); void ovsdb_idl_omit(struct ovsdb_idl *, const struct ovsdb_idl_column *); void ovsdb_idl_omit_alert(struct ovsdb_idl *, const struct ovsdb_idl_column *); /* Reading the database replica. */ const struct ovsdb_idl_row *ovsdb_idl_get_row_for_uuid( const struct ovsdb_idl *, const struct ovsdb_idl_table_class *, const struct uuid *); const struct ovsdb_idl_row *ovsdb_idl_first_row( const struct ovsdb_idl *, const struct ovsdb_idl_table_class *); const struct ovsdb_idl_row *ovsdb_idl_next_row(const struct ovsdb_idl_row *); const struct ovsdb_datum *ovsdb_idl_read(const struct ovsdb_idl_row *, const struct ovsdb_idl_column *); const struct ovsdb_datum *ovsdb_idl_get(const struct ovsdb_idl_row *, const struct ovsdb_idl_column *, enum ovsdb_atomic_type key_type, enum ovsdb_atomic_type value_type); bool ovsdb_idl_row_is_synthetic(const struct ovsdb_idl_row *); /* Transactions. * * A transaction may modify the contents of a database by modifying the values * of columns, deleting rows, inserting rows, or adding checks that columns in * the database have not changed ("verify" operations), through * ovsdb_idl_txn_*() functions. (The OVSDB IDL code generator produces helper * functions that internally call the ovsdb_idl_txn_*() functions. These are * likely to be more convenient.) * * Reading and writing columns and inserting and deleting rows are all * straightforward. The reasons to verify columns are less obvious. * Verification is the key to maintaining transactional integrity. Because * OVSDB handles multiple clients, it can happen that between the time that * OVSDB client A reads a column and writes a new value, OVSDB client B has * written that column. Client A's write should not ordinarily overwrite * client B's, especially if the column in question is a "map" column that * contains several more or less independent data items. If client A adds a * "verify" operation before it writes the column, then the transaction fails * in case client B modifies it first. Client A will then see the new value of * the column and compose a new transaction based on the new contents written * by client B. * * When a transaction is complete, which must be before the next call to * ovsdb_idl_run() on 'idl', call ovsdb_idl_txn_commit() or * ovsdb_idl_txn_abort(). * * The life-cycle of a transaction looks like this: * * 1. Create the transaction and record the initial sequence number: * * seqno = ovsdb_idl_get_seqno(idl); * txn = ovsdb_idl_txn_create(idl); * * 2. Modify the database with ovsdb_idl_txn_*() functions directly or * indirectly. * * 3. Commit the transaction by calling ovsdb_idl_txn_commit(). The first call * to this function probably returns TXN_INCOMPLETE. The client must keep * calling again along as this remains true, calling ovsdb_idl_run() in * between to let the IDL do protocol processing. (If the client doesn't * have anything else to do in the meantime, it can use * ovsdb_idl_txn_commit_block() to avoid having to loop itself.) * * 4. If the final status is TXN_TRY_AGAIN, wait for ovsdb_idl_get_seqno() to * change from the saved 'seqno' (it's possible that it's already changed, * in which case the client should not wait at all), then start over from * step 1. Only a call to ovsdb_idl_run() will change the return value of * ovsdb_idl_get_seqno(). (ovsdb_idl_txn_commit_block() calls * ovsdb_idl_run().) */ enum ovsdb_idl_txn_status { TXN_UNCOMMITTED, /* Not yet committed or aborted. */ TXN_UNCHANGED, /* Transaction didn't include any changes. */ TXN_INCOMPLETE, /* Commit in progress, please wait. */ TXN_ABORTED, /* ovsdb_idl_txn_abort() called. */ TXN_SUCCESS, /* Commit successful. */ TXN_TRY_AGAIN, /* Commit failed because a "verify" operation * reported an inconsistency, due to a network * problem, or other transient failure. Wait * for a change, then try again. */ TXN_NOT_LOCKED, /* Server hasn't given us the lock yet. */ TXN_ERROR /* Commit failed due to a hard error. */ }; const char *ovsdb_idl_txn_status_to_string(enum ovsdb_idl_txn_status); struct ovsdb_idl_txn *ovsdb_idl_txn_create(struct ovsdb_idl *); void ovsdb_idl_txn_add_comment(struct ovsdb_idl_txn *, const char *, ...) PRINTF_FORMAT (2, 3); void ovsdb_idl_txn_set_dry_run(struct ovsdb_idl_txn *); void ovsdb_idl_txn_increment(struct ovsdb_idl_txn *, const struct ovsdb_idl_row *, const struct ovsdb_idl_column *); void ovsdb_idl_txn_destroy(struct ovsdb_idl_txn *); void ovsdb_idl_txn_wait(const struct ovsdb_idl_txn *); enum ovsdb_idl_txn_status ovsdb_idl_txn_commit(struct ovsdb_idl_txn *); enum ovsdb_idl_txn_status ovsdb_idl_txn_commit_block(struct ovsdb_idl_txn *); void ovsdb_idl_txn_abort(struct ovsdb_idl_txn *); const char *ovsdb_idl_txn_get_error(const struct ovsdb_idl_txn *); int64_t ovsdb_idl_txn_get_increment_new_value(const struct ovsdb_idl_txn *); const struct uuid *ovsdb_idl_txn_get_insert_uuid(const struct ovsdb_idl_txn *, const struct uuid *); void ovsdb_idl_txn_write(const struct ovsdb_idl_row *, const struct ovsdb_idl_column *, struct ovsdb_datum *); void ovsdb_idl_txn_write_clone(const struct ovsdb_idl_row *, const struct ovsdb_idl_column *, const struct ovsdb_datum *); void ovsdb_idl_txn_delete(const struct ovsdb_idl_row *); const struct ovsdb_idl_row *ovsdb_idl_txn_insert( struct ovsdb_idl_txn *, const struct ovsdb_idl_table_class *, const struct uuid *); struct ovsdb_idl *ovsdb_idl_txn_get_idl (struct ovsdb_idl_txn *); #endif /* ovsdb-idl.h */ openvswitch-2.0.1+git20140120/lib/ovsdb-parser.c000066400000000000000000000104311226605124000207720ustar00rootroot00000000000000/* Copyright (c) 2009, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ovsdb-parser.h" #include #include #include "ovsdb-error.h" void ovsdb_parser_init(struct ovsdb_parser *parser, const struct json *json, const char *name, ...) { va_list args; va_start(args, name); parser->name = xvasprintf(name, args); va_end(args); sset_init(&parser->used); parser->error = NULL; parser->json = (json && json->type == JSON_OBJECT ? json : NULL); if (!parser->json) { ovsdb_parser_raise_error(parser, "Object expected."); } } bool ovsdb_parser_is_id(const char *string) { unsigned char c; c = *string; if (!isalpha(c) && c != '_') { return false; } for (;;) { c = *++string; if (c == '\0') { return true; } else if (!isalpha(c) && !isdigit(c) && c != '_') { return false; } } } const struct json * ovsdb_parser_member(struct ovsdb_parser *parser, const char *name, enum ovsdb_parser_types types) { struct json *value; if (!parser->json) { return NULL; } value = shash_find_data(json_object(parser->json), name); if (!value) { if (!(types & OP_OPTIONAL)) { ovsdb_parser_raise_error(parser, "Required '%s' member is missing.", name); } return NULL; } if (((int) value->type >= 0 && value->type < JSON_N_TYPES && types & (1u << value->type)) || (types & OP_ID && value->type == JSON_STRING && ovsdb_parser_is_id(value->u.string))) { sset_add(&parser->used, name); return value; } else { ovsdb_parser_raise_error(parser, "Type mismatch for member '%s'.", name); return NULL; } } void ovsdb_parser_raise_error(struct ovsdb_parser *parser, const char *format, ...) { if (!parser->error) { struct ovsdb_error *error; va_list args; char *message; va_start(args, format); message = xvasprintf(format, args); va_end(args); error = ovsdb_syntax_error(parser->json, NULL, "Parsing %s failed: %s", parser->name, message); free(message); parser->error = error; } } struct ovsdb_error * ovsdb_parser_get_error(const struct ovsdb_parser *parser) { return parser->error ? ovsdb_error_clone(parser->error) : NULL; } bool ovsdb_parser_has_error(const struct ovsdb_parser *parser) { return parser->error != NULL; } struct ovsdb_error * ovsdb_parser_finish(struct ovsdb_parser *parser) { if (!parser->error) { const struct shash *object = json_object(parser->json); size_t n_unused; n_unused = shash_count(object) - sset_count(&parser->used); if (n_unused) { struct shash_node *node; SHASH_FOR_EACH (node, object) { if (!sset_contains(&parser->used, node->name)) { if (n_unused > 1) { ovsdb_parser_raise_error( parser, "Member '%s' and %zu other member%s " "are present but not allowed here.", node->name, n_unused - 1, n_unused > 2 ? "s" : ""); } else { ovsdb_parser_raise_error( parser, "Member '%s' is present but not allowed here.", node->name); } break; } } } } free(parser->name); sset_destroy(&parser->used); return parser->error; } openvswitch-2.0.1+git20140120/lib/ovsdb-parser.h000066400000000000000000000061771226605124000210130ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_PARSER_H #define OVSDB_PARSER_H 1 #include #include "compiler.h" #include "json.h" #include "sset.h" #include "util.h" struct ovsdb_parser { char *name; /* Used only in error messages. */ struct sset used; /* Already-parsed names from 'object'. */ const struct json *json; /* JSON object being parsed. */ struct ovsdb_error *error; /* Error signaled, if any. */ }; /* Check that the JSON types make the bitwise tricks below work OK. */ BUILD_ASSERT_DECL(JSON_NULL >= 0 && JSON_NULL < 10); BUILD_ASSERT_DECL(JSON_FALSE >= 0 && JSON_FALSE < 10); BUILD_ASSERT_DECL(JSON_TRUE >= 0 && JSON_TRUE < 10); BUILD_ASSERT_DECL(JSON_OBJECT >= 0 && JSON_OBJECT < 10); BUILD_ASSERT_DECL(JSON_ARRAY >= 0 && JSON_ARRAY < 10); BUILD_ASSERT_DECL(JSON_INTEGER >= 0 && JSON_INTEGER < 10); BUILD_ASSERT_DECL(JSON_REAL >= 0 && JSON_REAL < 10); BUILD_ASSERT_DECL(JSON_STRING >= 0 && JSON_STRING < 10); BUILD_ASSERT_DECL(JSON_N_TYPES == 8); enum ovsdb_parser_types { OP_NULL = 1 << JSON_NULL, /* null */ OP_FALSE = 1 << JSON_FALSE, /* false */ OP_TRUE = 1 << JSON_TRUE, /* true */ OP_OBJECT = 1 << JSON_OBJECT, /* {"a": b, "c": d, ...} */ OP_ARRAY = 1 << JSON_ARRAY, /* [1, 2, 3, ...] */ OP_INTEGER = 1 << JSON_INTEGER, /* 123. */ OP_NONINTEGER = 1 << JSON_REAL, /* 123.456. */ OP_STRING = 1 << JSON_STRING, /* "..." */ OP_ANY = (OP_NULL | OP_FALSE | OP_TRUE | OP_OBJECT | OP_ARRAY | OP_INTEGER | OP_NONINTEGER | OP_STRING), OP_BOOLEAN = OP_FALSE | OP_TRUE, OP_NUMBER = OP_INTEGER | OP_NONINTEGER, OP_ID = 1 << JSON_N_TYPES, /* "[_a-zA-Z][_a-zA-Z0-9]*" */ OP_OPTIONAL = 1 << (JSON_N_TYPES + 1) /* no value at all */ }; void ovsdb_parser_init(struct ovsdb_parser *, const struct json *, const char *name, ...) PRINTF_FORMAT(3, 4); const struct json *ovsdb_parser_member(struct ovsdb_parser *, const char *name, enum ovsdb_parser_types); void ovsdb_parser_raise_error(struct ovsdb_parser *parser, const char *format, ...) PRINTF_FORMAT(2, 3); bool ovsdb_parser_has_error(const struct ovsdb_parser *); struct ovsdb_error *ovsdb_parser_get_error(const struct ovsdb_parser *); struct ovsdb_error *ovsdb_parser_finish(struct ovsdb_parser *) WARN_UNUSED_RESULT; void ovsdb_parser_destroy(struct ovsdb_parser *); bool ovsdb_parser_is_id(const char *string); #endif /* ovsdb-parser.h */ openvswitch-2.0.1+git20140120/lib/ovsdb-types.c000066400000000000000000000474601226605124000206560ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ovsdb-types.h" #include #include #include "dynamic-string.h" #include "json.h" #include "ovs-thread.h" #include "ovsdb-data.h" #include "ovsdb-error.h" #include "ovsdb-parser.h" const struct ovsdb_type ovsdb_type_integer = OVSDB_TYPE_SCALAR_INITIALIZER(OVSDB_BASE_INTEGER_INIT); const struct ovsdb_type ovsdb_type_real = OVSDB_TYPE_SCALAR_INITIALIZER(OVSDB_BASE_REAL_INIT); const struct ovsdb_type ovsdb_type_boolean = OVSDB_TYPE_SCALAR_INITIALIZER(OVSDB_BASE_BOOLEAN_INIT); const struct ovsdb_type ovsdb_type_string = OVSDB_TYPE_SCALAR_INITIALIZER(OVSDB_BASE_STRING_INIT); const struct ovsdb_type ovsdb_type_uuid = OVSDB_TYPE_SCALAR_INITIALIZER(OVSDB_BASE_UUID_INIT); /* ovsdb_atomic_type */ const char * ovsdb_atomic_type_to_string(enum ovsdb_atomic_type type) { switch (type) { case OVSDB_TYPE_VOID: return "void"; case OVSDB_TYPE_INTEGER: return "integer"; case OVSDB_TYPE_REAL: return "real"; case OVSDB_TYPE_BOOLEAN: return "boolean"; case OVSDB_TYPE_STRING: return "string"; case OVSDB_TYPE_UUID: return "uuid"; case OVSDB_N_TYPES: default: return ""; } } struct json * ovsdb_atomic_type_to_json(enum ovsdb_atomic_type type) { return json_string_create(ovsdb_atomic_type_to_string(type)); } bool ovsdb_atomic_type_from_string(const char *string, enum ovsdb_atomic_type *type) { if (!strcmp(string, "integer")) { *type = OVSDB_TYPE_INTEGER; } else if (!strcmp(string, "real")) { *type = OVSDB_TYPE_REAL; } else if (!strcmp(string, "boolean")) { *type = OVSDB_TYPE_BOOLEAN; } else if (!strcmp(string, "string")) { *type = OVSDB_TYPE_STRING; } else if (!strcmp(string, "uuid")) { *type = OVSDB_TYPE_UUID; } else { return false; } return true; } struct ovsdb_error * ovsdb_atomic_type_from_json(enum ovsdb_atomic_type *type, const struct json *json) { if (json->type == JSON_STRING) { if (ovsdb_atomic_type_from_string(json_string(json), type)) { return NULL; } else { *type = OVSDB_TYPE_VOID; return ovsdb_syntax_error(json, NULL, "\"%s\" is not an atomic-type", json_string(json)); } } else { *type = OVSDB_TYPE_VOID; return ovsdb_syntax_error(json, NULL, "atomic-type expected"); } } /* ovsdb_base_type */ void ovsdb_base_type_init(struct ovsdb_base_type *base, enum ovsdb_atomic_type type) { base->type = type; base->enum_ = NULL; switch (base->type) { case OVSDB_TYPE_VOID: break; case OVSDB_TYPE_INTEGER: base->u.integer.min = INT64_MIN; base->u.integer.max = INT64_MAX; break; case OVSDB_TYPE_REAL: base->u.real.min = -DBL_MAX; base->u.real.max = DBL_MAX; break; case OVSDB_TYPE_BOOLEAN: break; case OVSDB_TYPE_STRING: base->u.string.minLen = 0; base->u.string.maxLen = UINT_MAX; break; case OVSDB_TYPE_UUID: base->u.uuid.refTableName = NULL; base->u.uuid.refTable = NULL; break; case OVSDB_N_TYPES: NOT_REACHED(); default: NOT_REACHED(); } } /* Returns the type of the 'enum_' member for an ovsdb_base_type whose 'type' * is 'atomic_type'. */ const struct ovsdb_type * ovsdb_base_type_get_enum_type(enum ovsdb_atomic_type atomic_type) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; static struct ovsdb_type *types[OVSDB_N_TYPES]; if (ovsthread_once_start(&once)) { enum ovsdb_atomic_type i; for (i = 0; i < OVSDB_N_TYPES; i++) { struct ovsdb_type *type; types[i] = type = xmalloc(sizeof *type); ovsdb_base_type_init(&type->key, i); ovsdb_base_type_init(&type->value, OVSDB_TYPE_VOID); type->n_min = 1; type->n_max = UINT_MAX; } ovsthread_once_done(&once); } return types[atomic_type]; } void ovsdb_base_type_clone(struct ovsdb_base_type *dst, const struct ovsdb_base_type *src) { *dst = *src; if (src->enum_) { dst->enum_ = xmalloc(sizeof *dst->enum_); ovsdb_datum_clone(dst->enum_, src->enum_, ovsdb_base_type_get_enum_type(dst->type)); } switch (dst->type) { case OVSDB_TYPE_VOID: case OVSDB_TYPE_INTEGER: case OVSDB_TYPE_REAL: case OVSDB_TYPE_BOOLEAN: break; case OVSDB_TYPE_STRING: break; case OVSDB_TYPE_UUID: if (dst->u.uuid.refTableName) { dst->u.uuid.refTableName = xstrdup(dst->u.uuid.refTableName); } break; case OVSDB_N_TYPES: default: NOT_REACHED(); } } void ovsdb_base_type_destroy(struct ovsdb_base_type *base) { if (base) { if (base->enum_) { ovsdb_datum_destroy(base->enum_, ovsdb_base_type_get_enum_type(base->type)); free(base->enum_); } switch (base->type) { case OVSDB_TYPE_VOID: case OVSDB_TYPE_INTEGER: case OVSDB_TYPE_REAL: case OVSDB_TYPE_BOOLEAN: break; case OVSDB_TYPE_STRING: break; case OVSDB_TYPE_UUID: free(base->u.uuid.refTableName); break; case OVSDB_N_TYPES: NOT_REACHED(); default: NOT_REACHED(); } } } bool ovsdb_base_type_is_valid(const struct ovsdb_base_type *base) { switch (base->type) { case OVSDB_TYPE_VOID: return true; case OVSDB_TYPE_INTEGER: return base->u.integer.min <= base->u.integer.max; case OVSDB_TYPE_REAL: return base->u.real.min <= base->u.real.max; case OVSDB_TYPE_BOOLEAN: return true; case OVSDB_TYPE_STRING: return base->u.string.minLen <= base->u.string.maxLen; case OVSDB_TYPE_UUID: return true; case OVSDB_N_TYPES: default: return false; } } bool ovsdb_base_type_has_constraints(const struct ovsdb_base_type *base) { if (base->enum_) { return true; } switch (base->type) { case OVSDB_TYPE_VOID: NOT_REACHED(); case OVSDB_TYPE_INTEGER: return (base->u.integer.min != INT64_MIN || base->u.integer.max != INT64_MAX); case OVSDB_TYPE_REAL: return (base->u.real.min != -DBL_MAX || base->u.real.max != DBL_MAX); case OVSDB_TYPE_BOOLEAN: return false; case OVSDB_TYPE_STRING: return base->u.string.minLen != 0 || base->u.string.maxLen != UINT_MAX; case OVSDB_TYPE_UUID: return base->u.uuid.refTableName != NULL; case OVSDB_N_TYPES: NOT_REACHED(); default: NOT_REACHED(); } } void ovsdb_base_type_clear_constraints(struct ovsdb_base_type *base) { enum ovsdb_atomic_type type = base->type; ovsdb_base_type_destroy(base); ovsdb_base_type_init(base, type); } static struct ovsdb_error * parse_optional_uint(struct ovsdb_parser *parser, const char *member, unsigned int *uint) { const struct json *json; json = ovsdb_parser_member(parser, member, OP_INTEGER | OP_OPTIONAL); if (json) { if (json->u.integer < 0 || json->u.integer > UINT_MAX) { return ovsdb_syntax_error(json, NULL, "%s out of valid range 0 to %u", member, UINT_MAX); } *uint = json->u.integer; } return NULL; } struct ovsdb_error * ovsdb_base_type_from_json(struct ovsdb_base_type *base, const struct json *json) { struct ovsdb_parser parser; struct ovsdb_error *error; const struct json *type, *enum_; if (json->type == JSON_STRING) { error = ovsdb_atomic_type_from_json(&base->type, json); if (error) { return error; } ovsdb_base_type_init(base, base->type); return NULL; } ovsdb_parser_init(&parser, json, "ovsdb type"); type = ovsdb_parser_member(&parser, "type", OP_STRING); if (ovsdb_parser_has_error(&parser)) { base->type = OVSDB_TYPE_VOID; return ovsdb_parser_finish(&parser); } error = ovsdb_atomic_type_from_json(&base->type, type); if (error) { return error; } ovsdb_base_type_init(base, base->type); enum_ = ovsdb_parser_member(&parser, "enum", OP_ANY | OP_OPTIONAL); if (enum_) { base->enum_ = xmalloc(sizeof *base->enum_); error = ovsdb_datum_from_json( base->enum_, ovsdb_base_type_get_enum_type(base->type), enum_, NULL); if (error) { free(base->enum_); base->enum_ = NULL; } } else if (base->type == OVSDB_TYPE_INTEGER) { const struct json *min, *max; min = ovsdb_parser_member(&parser, "minInteger", OP_INTEGER | OP_OPTIONAL); max = ovsdb_parser_member(&parser, "maxInteger", OP_INTEGER | OP_OPTIONAL); base->u.integer.min = min ? min->u.integer : INT64_MIN; base->u.integer.max = max ? max->u.integer : INT64_MAX; if (base->u.integer.min > base->u.integer.max) { error = ovsdb_syntax_error(json, NULL, "minInteger exceeds maxInteger"); } } else if (base->type == OVSDB_TYPE_REAL) { const struct json *min, *max; min = ovsdb_parser_member(&parser, "minReal", OP_NUMBER | OP_OPTIONAL); max = ovsdb_parser_member(&parser, "maxReal", OP_NUMBER | OP_OPTIONAL); base->u.real.min = min ? json_real(min) : -DBL_MAX; base->u.real.max = max ? json_real(max) : DBL_MAX; if (base->u.real.min > base->u.real.max) { error = ovsdb_syntax_error(json, NULL, "minReal exceeds maxReal"); } } else if (base->type == OVSDB_TYPE_STRING) { if (!error) { error = parse_optional_uint(&parser, "minLength", &base->u.string.minLen); } if (!error) { error = parse_optional_uint(&parser, "maxLength", &base->u.string.maxLen); } if (!error && base->u.string.minLen > base->u.string.maxLen) { error = ovsdb_syntax_error(json, NULL, "minLength exceeds maxLength"); } } else if (base->type == OVSDB_TYPE_UUID) { const struct json *refTable; refTable = ovsdb_parser_member(&parser, "refTable", OP_ID | OP_OPTIONAL); if (refTable) { const struct json *refType; base->u.uuid.refTableName = xstrdup(refTable->u.string); /* We can't set base->u.uuid.refTable here because we don't have * enough context (we might not even be running in ovsdb-server). * ovsdb_create() will set refTable later. */ refType = ovsdb_parser_member(&parser, "refType", OP_ID | OP_OPTIONAL); if (refType) { const char *refType_s = json_string(refType); if (!strcmp(refType_s, "strong")) { base->u.uuid.refType = OVSDB_REF_STRONG; } else if (!strcmp(refType_s, "weak")) { base->u.uuid.refType = OVSDB_REF_WEAK; } else { error = ovsdb_syntax_error(json, NULL, "refType must be " "\"strong\" or \"weak\" (not " "\"%s\")", refType_s); } } else { base->u.uuid.refType = OVSDB_REF_STRONG; } } } if (error) { ovsdb_error_destroy(ovsdb_parser_finish(&parser)); } else { error = ovsdb_parser_finish(&parser); } if (error) { ovsdb_base_type_destroy(base); base->type = OVSDB_TYPE_VOID; } return error; } struct json * ovsdb_base_type_to_json(const struct ovsdb_base_type *base) { struct json *json; if (!ovsdb_base_type_has_constraints(base)) { return json_string_create(ovsdb_atomic_type_to_string(base->type)); } json = json_object_create(); json_object_put_string(json, "type", ovsdb_atomic_type_to_string(base->type)); if (base->enum_) { const struct ovsdb_type *type; type = ovsdb_base_type_get_enum_type(base->type); json_object_put(json, "enum", ovsdb_datum_to_json(base->enum_, type)); } switch (base->type) { case OVSDB_TYPE_VOID: NOT_REACHED(); case OVSDB_TYPE_INTEGER: if (base->u.integer.min != INT64_MIN) { json_object_put(json, "minInteger", json_integer_create(base->u.integer.min)); } if (base->u.integer.max != INT64_MAX) { json_object_put(json, "maxInteger", json_integer_create(base->u.integer.max)); } break; case OVSDB_TYPE_REAL: if (base->u.real.min != -DBL_MAX) { json_object_put(json, "minReal", json_real_create(base->u.real.min)); } if (base->u.real.max != DBL_MAX) { json_object_put(json, "maxReal", json_real_create(base->u.real.max)); } break; case OVSDB_TYPE_BOOLEAN: break; case OVSDB_TYPE_STRING: if (base->u.string.minLen != 0) { json_object_put(json, "minLength", json_integer_create(base->u.string.minLen)); } if (base->u.string.maxLen != UINT_MAX) { json_object_put(json, "maxLength", json_integer_create(base->u.string.maxLen)); } break; case OVSDB_TYPE_UUID: if (base->u.uuid.refTableName) { json_object_put_string(json, "refTable", base->u.uuid.refTableName); if (base->u.uuid.refType == OVSDB_REF_WEAK) { json_object_put_string(json, "refType", "weak"); } } break; case OVSDB_N_TYPES: NOT_REACHED(); default: NOT_REACHED(); } return json; } /* ovsdb_type */ void ovsdb_type_clone(struct ovsdb_type *dst, const struct ovsdb_type *src) { ovsdb_base_type_clone(&dst->key, &src->key); ovsdb_base_type_clone(&dst->value, &src->value); dst->n_min = src->n_min; dst->n_max = src->n_max; } void ovsdb_type_destroy(struct ovsdb_type *type) { ovsdb_base_type_destroy(&type->key); ovsdb_base_type_destroy(&type->value); } bool ovsdb_type_is_valid(const struct ovsdb_type *type) { return (type->key.type != OVSDB_TYPE_VOID && ovsdb_base_type_is_valid(&type->key) && ovsdb_base_type_is_valid(&type->value) && type->n_min <= 1 && type->n_max >= 1); } static struct ovsdb_error * n_from_json(const struct json *json, unsigned int *n) { if (!json) { return NULL; } else if (json->type == JSON_INTEGER && json->u.integer >= 0 && json->u.integer < UINT_MAX) { *n = json->u.integer; return NULL; } else { return ovsdb_syntax_error(json, NULL, "bad min or max value"); } } char * ovsdb_type_to_english(const struct ovsdb_type *type) { const char *key = ovsdb_atomic_type_to_string(type->key.type); const char *value = ovsdb_atomic_type_to_string(type->value.type); if (ovsdb_type_is_scalar(type)) { return xstrdup(key); } else { struct ds s = DS_EMPTY_INITIALIZER; ds_put_cstr(&s, ovsdb_type_is_set(type) ? "set" : "map"); if (type->n_max == UINT_MAX) { if (type->n_min) { ds_put_format(&s, " of %u or more", type->n_min); } else { ds_put_cstr(&s, " of"); } } else if (type->n_min) { ds_put_format(&s, " of %u to %u", type->n_min, type->n_max); } else { ds_put_format(&s, " of up to %u", type->n_max); } if (ovsdb_type_is_set(type)) { ds_put_format(&s, " %ss", key); } else { ds_put_format(&s, " (%s, %s) pairs", key, value); } return ds_cstr(&s); } } struct ovsdb_error * ovsdb_type_from_json(struct ovsdb_type *type, const struct json *json) { ovsdb_base_type_init(&type->value, OVSDB_TYPE_VOID); type->n_min = 1; type->n_max = 1; if (json->type == JSON_STRING) { return ovsdb_base_type_from_json(&type->key, json); } else if (json->type == JSON_OBJECT) { const struct json *key, *value, *min, *max; struct ovsdb_error *error; struct ovsdb_parser parser; ovsdb_parser_init(&parser, json, "ovsdb type"); key = ovsdb_parser_member(&parser, "key", OP_STRING | OP_OBJECT); value = ovsdb_parser_member(&parser, "value", OP_STRING | OP_OBJECT | OP_OPTIONAL); min = ovsdb_parser_member(&parser, "min", OP_INTEGER | OP_OPTIONAL); max = ovsdb_parser_member(&parser, "max", OP_INTEGER | OP_STRING | OP_OPTIONAL); error = ovsdb_parser_finish(&parser); if (error) { return error; } error = ovsdb_base_type_from_json(&type->key, key); if (error) { return error; } if (value) { error = ovsdb_base_type_from_json(&type->value, value); if (error) { return error; } } error = n_from_json(min, &type->n_min); if (error) { return error; } if (max && max->type == JSON_STRING && !strcmp(max->u.string, "unlimited")) { type->n_max = UINT_MAX; } else { error = n_from_json(max, &type->n_max); if (error) { return error; } } if (!ovsdb_type_is_valid(type)) { return ovsdb_syntax_error(json, NULL, "ovsdb type fails constraint checks"); } return NULL; } else { return ovsdb_syntax_error(json, NULL, "ovsdb type expected"); } } struct json * ovsdb_type_to_json(const struct ovsdb_type *type) { if (ovsdb_type_is_scalar(type) && !ovsdb_base_type_has_constraints(&type->key)) { return ovsdb_base_type_to_json(&type->key); } else { struct json *json = json_object_create(); json_object_put(json, "key", ovsdb_base_type_to_json(&type->key)); if (type->value.type != OVSDB_TYPE_VOID) { json_object_put(json, "value", ovsdb_base_type_to_json(&type->value)); } if (type->n_min != 1) { json_object_put(json, "min", json_integer_create(type->n_min)); } if (type->n_max == UINT_MAX) { json_object_put_string(json, "max", "unlimited"); } else if (type->n_max != 1) { json_object_put(json, "max", json_integer_create(type->n_max)); } return json; } } openvswitch-2.0.1+git20140120/lib/ovsdb-types.h000066400000000000000000000176741226605124000206670ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_TYPES_H #define OVSDB_TYPES_H 1 #include #include #include #include "compiler.h" #include "uuid.h" struct json; /* An atomic type: one that OVSDB regards as a single unit of data. */ enum ovsdb_atomic_type { OVSDB_TYPE_VOID, /* No value. */ OVSDB_TYPE_INTEGER, /* Signed 64-bit integer. */ OVSDB_TYPE_REAL, /* IEEE 754 double-precision floating point. */ OVSDB_TYPE_BOOLEAN, /* True or false. */ OVSDB_TYPE_STRING, /* UTF-8 string. */ OVSDB_TYPE_UUID, /* RFC 4122 UUID referencing a table row. */ OVSDB_N_TYPES }; static inline bool ovsdb_atomic_type_is_valid(enum ovsdb_atomic_type); bool ovsdb_atomic_type_from_string(const char *, enum ovsdb_atomic_type *); struct ovsdb_error *ovsdb_atomic_type_from_json(enum ovsdb_atomic_type *, const struct json *); const char *ovsdb_atomic_type_to_string(enum ovsdb_atomic_type); struct json *ovsdb_atomic_type_to_json(enum ovsdb_atomic_type); /* An atomic type plus optional constraints. */ enum ovsdb_ref_type { OVSDB_REF_STRONG, /* Target must exist. */ OVSDB_REF_WEAK /* Delete reference if target disappears. */ }; struct ovsdb_base_type { enum ovsdb_atomic_type type; /* If nonnull, a datum with keys of type 'type' that expresses all the * valid values for this base_type. */ struct ovsdb_datum *enum_; union { struct ovsdb_integer_constraints { int64_t min; /* minInteger or INT64_MIN. */ int64_t max; /* maxInteger or INT64_MAX. */ } integer; struct ovsdb_real_constraints { double min; /* minReal or -DBL_MAX. */ double max; /* minReal or DBL_MAX. */ } real; /* No constraints for Boolean types. */ struct ovsdb_string_constraints { unsigned int minLen; /* minLength or 0. */ unsigned int maxLen; /* maxLength or UINT_MAX. */ } string; struct ovsdb_uuid_constraints { char *refTableName; /* Name of referenced table, or NULL. */ struct ovsdb_table *refTable; /* Referenced table, if available. */ enum ovsdb_ref_type refType; /* Reference type. */ } uuid; } u; }; #define OVSDB_BASE_VOID_INIT { .type = OVSDB_TYPE_VOID } #define OVSDB_BASE_INTEGER_INIT { .type = OVSDB_TYPE_INTEGER, \ .u.integer = { INT64_MIN, INT64_MAX } } #define OVSDB_BASE_REAL_INIT { .type = OVSDB_TYPE_REAL, \ .u.real = { -DBL_MAX, DBL_MAX } } #define OVSDB_BASE_BOOLEAN_INIT { .type = OVSDB_TYPE_BOOLEAN } #define OVSDB_BASE_STRING_INIT { .type = OVSDB_TYPE_STRING, \ .u.string = { 0, UINT_MAX } } #define OVSDB_BASE_UUID_INIT { .type = OVSDB_TYPE_UUID, \ .u.uuid = { NULL, NULL, 0 } } void ovsdb_base_type_init(struct ovsdb_base_type *, enum ovsdb_atomic_type); void ovsdb_base_type_clone(struct ovsdb_base_type *, const struct ovsdb_base_type *); void ovsdb_base_type_destroy(struct ovsdb_base_type *); bool ovsdb_base_type_is_valid(const struct ovsdb_base_type *); bool ovsdb_base_type_has_constraints(const struct ovsdb_base_type *); void ovsdb_base_type_clear_constraints(struct ovsdb_base_type *); const struct ovsdb_type *ovsdb_base_type_get_enum_type(enum ovsdb_atomic_type); struct ovsdb_error *ovsdb_base_type_from_json(struct ovsdb_base_type *, const struct json *) WARN_UNUSED_RESULT; struct json *ovsdb_base_type_to_json(const struct ovsdb_base_type *); static inline bool ovsdb_base_type_is_ref(const struct ovsdb_base_type *); static inline bool ovsdb_base_type_is_strong_ref( const struct ovsdb_base_type *); static inline bool ovsdb_base_type_is_weak_ref(const struct ovsdb_base_type *); /* An OVSDB type. * * Several rules constrain the valid types. See ovsdb_type_is_valid() (in * ovsdb-types.c) for details. * * If 'value_type' is OVSDB_TYPE_VOID, 'n_min' is 1, and 'n_max' is 1, then the * type is a single atomic 'key_type'. * * If 'value_type' is OVSDB_TYPE_VOID and 'n_min' or 'n_max' (or both) has a * value other than 1, then the type is a set of 'key_type'. If 'n_min' is 0 * and 'n_max' is 1, then the type can also be considered an optional * 'key_type'. * * If 'value_type' is not OVSDB_TYPE_VOID, then the type is a map from * 'key_type' to 'value_type'. If 'n_min' is 0 and 'n_max' is 1, then the type * can also be considered an optional pair of 'key_type' and 'value_type'. */ struct ovsdb_type { struct ovsdb_base_type key; struct ovsdb_base_type value; unsigned int n_min; unsigned int n_max; /* UINT_MAX stands in for "unlimited". */ }; #define OVSDB_TYPE_SCALAR_INITIALIZER(KEY) { KEY, OVSDB_BASE_VOID_INIT, 1, 1 } extern const struct ovsdb_type ovsdb_type_integer; extern const struct ovsdb_type ovsdb_type_real; extern const struct ovsdb_type ovsdb_type_boolean; extern const struct ovsdb_type ovsdb_type_string; extern const struct ovsdb_type ovsdb_type_uuid; void ovsdb_type_clone(struct ovsdb_type *, const struct ovsdb_type *); void ovsdb_type_destroy(struct ovsdb_type *); bool ovsdb_type_is_valid(const struct ovsdb_type *); static inline bool ovsdb_type_is_scalar(const struct ovsdb_type *); static inline bool ovsdb_type_is_optional(const struct ovsdb_type *); static inline bool ovsdb_type_is_composite(const struct ovsdb_type *); static inline bool ovsdb_type_is_set(const struct ovsdb_type *); static inline bool ovsdb_type_is_map(const struct ovsdb_type *); char *ovsdb_type_to_english(const struct ovsdb_type *); struct ovsdb_error *ovsdb_type_from_json(struct ovsdb_type *, const struct json *) WARN_UNUSED_RESULT; struct json *ovsdb_type_to_json(const struct ovsdb_type *); /* Inline function implementations. */ static inline bool ovsdb_atomic_type_is_valid(enum ovsdb_atomic_type atomic_type) { return (int) atomic_type >= 0 && atomic_type < OVSDB_N_TYPES; } static inline bool ovsdb_base_type_is_ref(const struct ovsdb_base_type *base) { return base->type == OVSDB_TYPE_UUID && base->u.uuid.refTableName; } static inline bool ovsdb_base_type_is_strong_ref(const struct ovsdb_base_type *base) { return (ovsdb_base_type_is_ref(base) && base->u.uuid.refType == OVSDB_REF_STRONG); } static inline bool ovsdb_base_type_is_weak_ref(const struct ovsdb_base_type *base) { return (ovsdb_base_type_is_ref(base) && base->u.uuid.refType == OVSDB_REF_WEAK); } static inline bool ovsdb_type_is_scalar(const struct ovsdb_type *type) { return (type->value.type == OVSDB_TYPE_VOID && type->n_min == 1 && type->n_max == 1); } static inline bool ovsdb_type_is_optional(const struct ovsdb_type *type) { return type->n_min == 0; } static inline bool ovsdb_type_is_composite(const struct ovsdb_type *type) { return type->n_max > 1; } static inline bool ovsdb_type_is_set(const struct ovsdb_type *type) { return (type->value.type == OVSDB_TYPE_VOID && (type->n_min != 1 || type->n_max != 1)); } static inline bool ovsdb_type_is_map(const struct ovsdb_type *type) { return type->value.type != OVSDB_TYPE_VOID; } #endif /* ovsdb-types.h */ openvswitch-2.0.1+git20140120/lib/packets.c000066400000000000000000000723031226605124000200230ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "packets.h" #include #include #include #include #include #include "byte-order.h" #include "csum.h" #include "crc32c.h" #include "flow.h" #include "hmap.h" #include "dynamic-string.h" #include "ofpbuf.h" #include "ovs-thread.h" #include "unaligned.h" const struct in6_addr in6addr_exact = IN6ADDR_EXACT_INIT; /* Parses 's' as a 16-digit hexadecimal number representing a datapath ID. On * success stores the dpid into '*dpidp' and returns true, on failure stores 0 * into '*dpidp' and returns false. * * Rejects an all-zeros dpid as invalid. */ bool dpid_from_string(const char *s, uint64_t *dpidp) { *dpidp = (strlen(s) == 16 && strspn(s, "0123456789abcdefABCDEF") == 16 ? strtoull(s, NULL, 16) : 0); return *dpidp != 0; } /* Returns true if 'ea' is a reserved address, that a bridge must never * forward, false otherwise. * * If you change this function's behavior, please update corresponding * documentation in vswitch.xml at the same time. */ bool eth_addr_is_reserved(const uint8_t ea[ETH_ADDR_LEN]) { struct eth_addr_node { struct hmap_node hmap_node; const uint64_t ea64; }; static struct eth_addr_node nodes[] = { /* STP, IEEE pause frames, and other reserved protocols. */ { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000000ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000001ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000002ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000003ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000004ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000005ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000006ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000007ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000008ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000009ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000aULL }, { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000bULL }, { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000cULL }, { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000dULL }, { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000eULL }, { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000fULL }, /* Extreme protocols. */ { HMAP_NODE_NULL_INITIALIZER, 0x00e02b000000ULL }, /* EDP. */ { HMAP_NODE_NULL_INITIALIZER, 0x00e02b000004ULL }, /* EAPS. */ { HMAP_NODE_NULL_INITIALIZER, 0x00e02b000006ULL }, /* EAPS. */ /* Cisco protocols. */ { HMAP_NODE_NULL_INITIALIZER, 0x01000c000000ULL }, /* ISL. */ { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccccULL }, /* PAgP, UDLD, CDP, * DTP, VTP. */ { HMAP_NODE_NULL_INITIALIZER, 0x01000ccccccdULL }, /* PVST+. */ { HMAP_NODE_NULL_INITIALIZER, 0x01000ccdcdcdULL }, /* STP Uplink Fast, * FlexLink. */ /* Cisco CFM. */ { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc0ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc1ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc2ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc3ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc4ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc5ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc6ULL }, { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc7ULL }, }; static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; struct eth_addr_node *node; static struct hmap addrs; uint64_t ea64; if (ovsthread_once_start(&once)) { hmap_init(&addrs); for (node = nodes; node < &nodes[ARRAY_SIZE(nodes)]; node++) { hmap_insert(&addrs, &node->hmap_node, hash_2words(node->ea64, node->ea64 >> 32)); } ovsthread_once_done(&once); } ea64 = eth_addr_to_uint64(ea); HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_2words(ea64, ea64 >> 32), &addrs) { if (node->ea64 == ea64) { return true; } } return false; } bool eth_addr_from_string(const char *s, uint8_t ea[ETH_ADDR_LEN]) { if (sscanf(s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea)) == ETH_ADDR_SCAN_COUNT) { return true; } else { memset(ea, 0, ETH_ADDR_LEN); return false; } } /* Fills 'b' with a Reverse ARP packet with Ethernet source address 'eth_src'. * This function is used by Open vSwitch to compose packets in cases where * context is important but content doesn't (or shouldn't) matter. * * The returned packet has enough headroom to insert an 802.1Q VLAN header if * desired. */ void compose_rarp(struct ofpbuf *b, const uint8_t eth_src[ETH_ADDR_LEN]) { struct eth_header *eth; struct arp_eth_header *arp; ofpbuf_clear(b); ofpbuf_prealloc_tailroom(b, 2 + ETH_HEADER_LEN + VLAN_HEADER_LEN + ARP_ETH_HEADER_LEN); ofpbuf_reserve(b, 2 + VLAN_HEADER_LEN); eth = ofpbuf_put_uninit(b, sizeof *eth); memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN); memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); eth->eth_type = htons(ETH_TYPE_RARP); arp = ofpbuf_put_uninit(b, sizeof *arp); arp->ar_hrd = htons(ARP_HRD_ETHERNET); arp->ar_pro = htons(ARP_PRO_IP); arp->ar_hln = sizeof arp->ar_sha; arp->ar_pln = sizeof arp->ar_spa; arp->ar_op = htons(ARP_OP_RARP); memcpy(arp->ar_sha, eth_src, ETH_ADDR_LEN); put_16aligned_be32(&arp->ar_spa, htonl(0)); memcpy(arp->ar_tha, eth_src, ETH_ADDR_LEN); put_16aligned_be32(&arp->ar_tpa, htonl(0)); } /* Insert VLAN header according to given TCI. Packet passed must be Ethernet * packet. Ignores the CFI bit of 'tci' using 0 instead. * * Also sets 'packet->l2' to point to the new Ethernet header. */ void eth_push_vlan(struct ofpbuf *packet, ovs_be16 tci) { struct eth_header *eh = packet->data; struct vlan_eth_header *veh; /* Insert new 802.1Q header. */ struct vlan_eth_header tmp; memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN); memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN); tmp.veth_type = htons(ETH_TYPE_VLAN); tmp.veth_tci = tci & htons(~VLAN_CFI); tmp.veth_next_type = eh->eth_type; veh = ofpbuf_push_uninit(packet, VLAN_HEADER_LEN); memcpy(veh, &tmp, sizeof tmp); packet->l2 = packet->data; } /* Removes outermost VLAN header (if any is present) from 'packet'. * * 'packet->l2_5' should initially point to 'packet''s outer-most MPLS header * or may be NULL if there are no MPLS headers. */ void eth_pop_vlan(struct ofpbuf *packet) { struct vlan_eth_header *veh = packet->l2; if (packet->size >= sizeof *veh && veh->veth_type == htons(ETH_TYPE_VLAN)) { struct eth_header tmp; memcpy(tmp.eth_dst, veh->veth_dst, ETH_ADDR_LEN); memcpy(tmp.eth_src, veh->veth_src, ETH_ADDR_LEN); tmp.eth_type = veh->veth_next_type; ofpbuf_pull(packet, VLAN_HEADER_LEN); packet->l2 = (char*)packet->l2 + VLAN_HEADER_LEN; memcpy(packet->data, &tmp, sizeof tmp); } } /* Return depth of mpls stack. * * 'packet->l2_5' should initially point to 'packet''s outer-most MPLS header * or may be NULL if there are no MPLS headers. */ uint16_t eth_mpls_depth(const struct ofpbuf *packet) { struct mpls_hdr *mh = packet->l2_5; uint16_t depth; if (!mh) { return 0; } depth = 0; while (packet->size >= ((char *)mh - (char *)packet->data) + sizeof *mh) { depth++; if (mh->mpls_lse & htonl(MPLS_BOS_MASK)) { break; } mh++; } return depth; } /* Set ethertype of the packet. */ void set_ethertype(struct ofpbuf *packet, ovs_be16 eth_type) { struct eth_header *eh = packet->data; if (eh->eth_type == htons(ETH_TYPE_VLAN)) { ovs_be16 *p; p = ALIGNED_CAST(ovs_be16 *, (char *)(packet->l2_5 ? packet->l2_5 : packet->l3) - 2); *p = eth_type; } else { eh->eth_type = eth_type; } } static bool is_mpls(struct ofpbuf *packet) { return packet->l2_5 != NULL; } /* Set time to live (TTL) of an MPLS label stack entry (LSE). */ void set_mpls_lse_ttl(ovs_be32 *lse, uint8_t ttl) { *lse &= ~htonl(MPLS_TTL_MASK); *lse |= htonl((ttl << MPLS_TTL_SHIFT) & MPLS_TTL_MASK); } /* Set traffic class (TC) of an MPLS label stack entry (LSE). */ void set_mpls_lse_tc(ovs_be32 *lse, uint8_t tc) { *lse &= ~htonl(MPLS_TC_MASK); *lse |= htonl((tc << MPLS_TC_SHIFT) & MPLS_TC_MASK); } /* Set label of an MPLS label stack entry (LSE). */ void set_mpls_lse_label(ovs_be32 *lse, ovs_be32 label) { *lse &= ~htonl(MPLS_LABEL_MASK); *lse |= htonl((ntohl(label) << MPLS_LABEL_SHIFT) & MPLS_LABEL_MASK); } /* Set bottom of stack (BoS) bit of an MPLS label stack entry (LSE). */ void set_mpls_lse_bos(ovs_be32 *lse, uint8_t bos) { *lse &= ~htonl(MPLS_BOS_MASK); *lse |= htonl((bos << MPLS_BOS_SHIFT) & MPLS_BOS_MASK); } /* Compose an MPLS label stack entry (LSE) from its components: * label, traffic class (TC), time to live (TTL) and * bottom of stack (BoS) bit. */ ovs_be32 set_mpls_lse_values(uint8_t ttl, uint8_t tc, uint8_t bos, ovs_be32 label) { ovs_be32 lse = htonl(0); set_mpls_lse_ttl(&lse, ttl); set_mpls_lse_tc(&lse, tc); set_mpls_lse_bos(&lse, bos); set_mpls_lse_label(&lse, label); return lse; } /* Push an new MPLS stack entry onto the MPLS stack and adjust 'packet->l2' and * 'packet->l2_5' accordingly. The new entry will be the outermost entry on * the stack. * * Previous to calling this function, 'packet->l2_5' must be set; if the MPLS * label to be pushed will be the first label in 'packet', then it should be * the same as 'packet->l3'. */ static void push_mpls_lse(struct ofpbuf *packet, struct mpls_hdr *mh) { char * header; size_t len; header = ofpbuf_push_uninit(packet, MPLS_HLEN); len = (char *)packet->l2_5 - (char *)packet->l2; memmove(header, packet->l2, len); memcpy(header + len, mh, sizeof *mh); packet->l2 = (char*)packet->l2 - MPLS_HLEN; packet->l2_5 = (char*)packet->l2_5 - MPLS_HLEN; } /* Set MPLS label stack entry to outermost MPLS header.*/ void set_mpls_lse(struct ofpbuf *packet, ovs_be32 mpls_lse) { struct mpls_hdr *mh = packet->l2_5; /* Packet type should be MPLS to set label stack entry. */ if (is_mpls(packet)) { /* Update mpls label stack entry. */ mh->mpls_lse = mpls_lse; } } /* Push MPLS label stack entry 'lse' onto 'packet' as the the outermost MPLS * header. If 'packet' does not already have any MPLS labels, then its * Ethertype is changed to 'ethtype' (which must be an MPLS Ethertype). */ void push_mpls(struct ofpbuf *packet, ovs_be16 ethtype, ovs_be32 lse) { struct mpls_hdr mh; if (!eth_type_mpls(ethtype)) { return; } if (!is_mpls(packet)) { /* Set ethtype and MPLS label stack entry. */ set_ethertype(packet, ethtype); packet->l2_5 = packet->l3; } /* Push new MPLS shim header onto packet. */ mh.mpls_lse = lse; push_mpls_lse(packet, &mh); } /* If 'packet' is an MPLS packet, removes its outermost MPLS label stack entry. * If the label that was removed was the only MPLS label, changes 'packet''s * Ethertype to 'ethtype' (which ordinarily should not be an MPLS * Ethertype). */ void pop_mpls(struct ofpbuf *packet, ovs_be16 ethtype) { struct mpls_hdr *mh = NULL; if (is_mpls(packet)) { size_t len; mh = packet->l2_5; len = (char*)packet->l2_5 - (char*)packet->l2; set_ethertype(packet, ethtype); if (mh->mpls_lse & htonl(MPLS_BOS_MASK)) { packet->l2_5 = NULL; } else { packet->l2_5 = (char*)packet->l2_5 + MPLS_HLEN; } /* Shift the l2 header forward. */ memmove((char*)packet->data + MPLS_HLEN, packet->data, len); packet->size -= MPLS_HLEN; packet->data = (char*)packet->data + MPLS_HLEN; packet->l2 = (char*)packet->l2 + MPLS_HLEN; } } /* Converts hex digits in 'hex' to an Ethernet packet in '*packetp'. The * caller must free '*packetp'. On success, returns NULL. On failure, returns * an error message and stores NULL in '*packetp'. * * Aligns the L3 header of '*packetp' on a 32-bit boundary. */ const char * eth_from_hex(const char *hex, struct ofpbuf **packetp) { struct ofpbuf *packet; /* Use 2 bytes of headroom to 32-bit align the L3 header. */ packet = *packetp = ofpbuf_new_with_headroom(strlen(hex) / 2, 2); if (ofpbuf_put_hex(packet, hex, NULL)[0] != '\0') { ofpbuf_delete(packet); *packetp = NULL; return "Trailing garbage in packet data"; } if (packet->size < ETH_HEADER_LEN) { ofpbuf_delete(packet); *packetp = NULL; return "Packet data too short for Ethernet"; } return NULL; } void eth_format_masked(const uint8_t eth[ETH_ADDR_LEN], const uint8_t mask[ETH_ADDR_LEN], struct ds *s) { ds_put_format(s, ETH_ADDR_FMT, ETH_ADDR_ARGS(eth)); if (mask && !eth_mask_is_exact(mask)) { ds_put_format(s, "/"ETH_ADDR_FMT, ETH_ADDR_ARGS(mask)); } } void eth_addr_bitand(const uint8_t src[ETH_ADDR_LEN], const uint8_t mask[ETH_ADDR_LEN], uint8_t dst[ETH_ADDR_LEN]) { int i; for (i = 0; i < ETH_ADDR_LEN; i++) { dst[i] = src[i] & mask[i]; } } /* Given the IP netmask 'netmask', returns the number of bits of the IP address * that it specifies, that is, the number of 1-bits in 'netmask'. * * If 'netmask' is not a CIDR netmask (see ip_is_cidr()), the return value will * still be in the valid range but isn't otherwise meaningful. */ int ip_count_cidr_bits(ovs_be32 netmask) { return 32 - ctz(ntohl(netmask)); } void ip_format_masked(ovs_be32 ip, ovs_be32 mask, struct ds *s) { ds_put_format(s, IP_FMT, IP_ARGS(ip)); if (mask != htonl(UINT32_MAX)) { if (ip_is_cidr(mask)) { ds_put_format(s, "/%d", ip_count_cidr_bits(mask)); } else { ds_put_format(s, "/"IP_FMT, IP_ARGS(mask)); } } } /* Stores the string representation of the IPv6 address 'addr' into the * character array 'addr_str', which must be at least INET6_ADDRSTRLEN * bytes long. */ void format_ipv6_addr(char *addr_str, const struct in6_addr *addr) { inet_ntop(AF_INET6, addr, addr_str, INET6_ADDRSTRLEN); } void print_ipv6_addr(struct ds *string, const struct in6_addr *addr) { char *dst; ds_reserve(string, string->length + INET6_ADDRSTRLEN); dst = string->string + string->length; format_ipv6_addr(dst, addr); string->length += strlen(dst); } void print_ipv6_masked(struct ds *s, const struct in6_addr *addr, const struct in6_addr *mask) { print_ipv6_addr(s, addr); if (mask && !ipv6_mask_is_exact(mask)) { if (ipv6_is_cidr(mask)) { int cidr_bits = ipv6_count_cidr_bits(mask); ds_put_format(s, "/%d", cidr_bits); } else { ds_put_char(s, '/'); print_ipv6_addr(s, mask); } } } struct in6_addr ipv6_addr_bitand(const struct in6_addr *a, const struct in6_addr *b) { int i; struct in6_addr dst; #ifdef s6_addr32 for (i=0; i<4; i++) { dst.s6_addr32[i] = a->s6_addr32[i] & b->s6_addr32[i]; } #else for (i=0; i<16; i++) { dst.s6_addr[i] = a->s6_addr[i] & b->s6_addr[i]; } #endif return dst; } /* Returns an in6_addr consisting of 'mask' high-order 1-bits and 128-N * low-order 0-bits. */ struct in6_addr ipv6_create_mask(int mask) { struct in6_addr netmask; uint8_t *netmaskp = &netmask.s6_addr[0]; memset(&netmask, 0, sizeof netmask); while (mask > 8) { *netmaskp = 0xff; netmaskp++; mask -= 8; } if (mask) { *netmaskp = 0xff << (8 - mask); } return netmask; } /* Given the IPv6 netmask 'netmask', returns the number of bits of the IPv6 * address that it specifies, that is, the number of 1-bits in 'netmask'. * 'netmask' must be a CIDR netmask (see ipv6_is_cidr()). * * If 'netmask' is not a CIDR netmask (see ipv6_is_cidr()), the return value * will still be in the valid range but isn't otherwise meaningful. */ int ipv6_count_cidr_bits(const struct in6_addr *netmask) { int i; int count = 0; const uint8_t *netmaskp = &netmask->s6_addr[0]; for (i=0; i<16; i++) { if (netmaskp[i] == 0xff) { count += 8; } else { uint8_t nm; for(nm = netmaskp[i]; nm; nm <<= 1) { count++; } break; } } return count; } /* Returns true if 'netmask' is a CIDR netmask, that is, if it consists of N * high-order 1-bits and 128-N low-order 0-bits. */ bool ipv6_is_cidr(const struct in6_addr *netmask) { const uint8_t *netmaskp = &netmask->s6_addr[0]; int i; for (i=0; i<16; i++) { if (netmaskp[i] != 0xff) { uint8_t x = ~netmaskp[i]; if (x & (x + 1)) { return false; } while (++i < 16) { if (netmaskp[i]) { return false; } } } } return true; } /* Populates 'b' with an Ethernet II packet headed with the given 'eth_dst', * 'eth_src' and 'eth_type' parameters. A payload of 'size' bytes is allocated * in 'b' and returned. This payload may be populated with appropriate * information by the caller. Sets 'b''s 'l2' and 'l3' pointers to the * Ethernet header and payload respectively. Aligns b->l3 on a 32-bit * boundary. * * The returned packet has enough headroom to insert an 802.1Q VLAN header if * desired. */ void * eth_compose(struct ofpbuf *b, const uint8_t eth_dst[ETH_ADDR_LEN], const uint8_t eth_src[ETH_ADDR_LEN], uint16_t eth_type, size_t size) { void *data; struct eth_header *eth; ofpbuf_clear(b); /* The magic 2 here ensures that the L3 header (when it is added later) * will be 32-bit aligned. */ ofpbuf_prealloc_tailroom(b, 2 + ETH_HEADER_LEN + VLAN_HEADER_LEN + size); ofpbuf_reserve(b, 2 + VLAN_HEADER_LEN); eth = ofpbuf_put_uninit(b, ETH_HEADER_LEN); data = ofpbuf_put_uninit(b, size); memcpy(eth->eth_dst, eth_dst, ETH_ADDR_LEN); memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); eth->eth_type = htons(eth_type); b->l2 = eth; b->l3 = data; return data; } static void packet_set_ipv4_addr(struct ofpbuf *packet, ovs_16aligned_be32 *addr, ovs_be32 new_addr) { struct ip_header *nh = packet->l3; ovs_be32 old_addr = get_16aligned_be32(addr); if (nh->ip_proto == IPPROTO_TCP && packet->l7) { struct tcp_header *th = packet->l4; th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr); } else if (nh->ip_proto == IPPROTO_UDP && packet->l7) { struct udp_header *uh = packet->l4; if (uh->udp_csum) { uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr); if (!uh->udp_csum) { uh->udp_csum = htons(0xffff); } } } nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr); put_16aligned_be32(addr, new_addr); } /* Returns true, if packet contains at least one routing header where * segements_left > 0. * * This function assumes that L3 and L4 markers are set in the packet. */ static bool packet_rh_present(struct ofpbuf *packet) { const struct ovs_16aligned_ip6_hdr *nh; int nexthdr; size_t len; size_t remaining; uint8_t *data = packet->l3; remaining = (uint8_t *)packet->l4 - (uint8_t *)packet->l3; if (remaining < sizeof *nh) { return false; } nh = ALIGNED_CAST(struct ovs_16aligned_ip6_hdr *, data); data += sizeof *nh; remaining -= sizeof *nh; nexthdr = nh->ip6_nxt; while (1) { if ((nexthdr != IPPROTO_HOPOPTS) && (nexthdr != IPPROTO_ROUTING) && (nexthdr != IPPROTO_DSTOPTS) && (nexthdr != IPPROTO_AH) && (nexthdr != IPPROTO_FRAGMENT)) { /* It's either a terminal header (e.g., TCP, UDP) or one we * don't understand. In either case, we're done with the * packet, so use it to fill in 'nw_proto'. */ break; } /* We only verify that at least 8 bytes of the next header are * available, but many of these headers are longer. Ensure that * accesses within the extension header are within those first 8 * bytes. All extension headers are required to be at least 8 * bytes. */ if (remaining < 8) { return false; } if (nexthdr == IPPROTO_AH) { /* A standard AH definition isn't available, but the fields * we care about are in the same location as the generic * option header--only the header length is calculated * differently. */ const struct ip6_ext *ext_hdr = (struct ip6_ext *)data; nexthdr = ext_hdr->ip6e_nxt; len = (ext_hdr->ip6e_len + 2) * 4; } else if (nexthdr == IPPROTO_FRAGMENT) { const struct ovs_16aligned_ip6_frag *frag_hdr = ALIGNED_CAST(struct ovs_16aligned_ip6_frag *, data); nexthdr = frag_hdr->ip6f_nxt; len = sizeof *frag_hdr; } else if (nexthdr == IPPROTO_ROUTING) { const struct ip6_rthdr *rh = (struct ip6_rthdr *)data; if (rh->ip6r_segleft > 0) { return true; } nexthdr = rh->ip6r_nxt; len = (rh->ip6r_len + 1) * 8; } else { const struct ip6_ext *ext_hdr = (struct ip6_ext *)data; nexthdr = ext_hdr->ip6e_nxt; len = (ext_hdr->ip6e_len + 1) * 8; } if (remaining < len) { return false; } remaining -= len; data += len; } return false; } static void packet_update_csum128(struct ofpbuf *packet, uint8_t proto, ovs_16aligned_be32 addr[4], const ovs_be32 new_addr[4]) { if (proto == IPPROTO_TCP && packet->l7) { struct tcp_header *th = packet->l4; th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr); } else if (proto == IPPROTO_UDP && packet->l7) { struct udp_header *uh = packet->l4; if (uh->udp_csum) { uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr); if (!uh->udp_csum) { uh->udp_csum = htons(0xffff); } } } } static void packet_set_ipv6_addr(struct ofpbuf *packet, uint8_t proto, ovs_16aligned_be32 *addr, const ovs_be32 new_addr[4], bool recalculate_csum) { if (recalculate_csum) { packet_update_csum128(packet, proto, addr, new_addr); } memcpy(addr, new_addr, sizeof(*addr)); } static void packet_set_ipv6_flow_label(ovs_16aligned_be32 *flow_label, ovs_be32 flow_key) { ovs_be32 old_label = get_16aligned_be32(flow_label); ovs_be32 new_label = (old_label & htonl(~IPV6_LABEL_MASK)) | flow_key; put_16aligned_be32(flow_label, new_label); } static void packet_set_ipv6_tc(ovs_16aligned_be32 *flow_label, uint8_t tc) { ovs_be32 old_label = get_16aligned_be32(flow_label); ovs_be32 new_label = (old_label & htonl(0xF00FFFFF)) | htonl(tc << 20); put_16aligned_be32(flow_label, new_label); } /* Modifies the IPv4 header fields of 'packet' to be consistent with 'src', * 'dst', 'tos', and 'ttl'. Updates 'packet''s L4 checksums as appropriate. * 'packet' must contain a valid IPv4 packet with correctly populated l[347] * markers. */ void packet_set_ipv4(struct ofpbuf *packet, ovs_be32 src, ovs_be32 dst, uint8_t tos, uint8_t ttl) { struct ip_header *nh = packet->l3; if (get_16aligned_be32(&nh->ip_src) != src) { packet_set_ipv4_addr(packet, &nh->ip_src, src); } if (get_16aligned_be32(&nh->ip_dst) != dst) { packet_set_ipv4_addr(packet, &nh->ip_dst, dst); } if (nh->ip_tos != tos) { uint8_t *field = &nh->ip_tos; nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field), htons((uint16_t) tos)); *field = tos; } if (nh->ip_ttl != ttl) { uint8_t *field = &nh->ip_ttl; nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8), htons(ttl << 8)); *field = ttl; } } /* Modifies the IPv6 header fields of 'packet' to be consistent with 'src', * 'dst', 'traffic class', and 'next hop'. Updates 'packet''s L4 checksums as * appropriate. 'packet' must contain a valid IPv6 packet with correctly * populated l[347] markers. */ void packet_set_ipv6(struct ofpbuf *packet, uint8_t proto, const ovs_be32 src[4], const ovs_be32 dst[4], uint8_t key_tc, ovs_be32 key_fl, uint8_t key_hl) { struct ovs_16aligned_ip6_hdr *nh = packet->l3; if (memcmp(&nh->ip6_src, src, sizeof(ovs_be32[4]))) { packet_set_ipv6_addr(packet, proto, nh->ip6_src.be32, src, true); } if (memcmp(&nh->ip6_dst, dst, sizeof(ovs_be32[4]))) { packet_set_ipv6_addr(packet, proto, nh->ip6_dst.be32, dst, !packet_rh_present(packet)); } packet_set_ipv6_tc(&nh->ip6_flow, key_tc); packet_set_ipv6_flow_label(&nh->ip6_flow, key_fl); nh->ip6_hlim = key_hl; } static void packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum) { if (*port != new_port) { *csum = recalc_csum16(*csum, *port, new_port); *port = new_port; } } /* Sets the TCP source and destination port ('src' and 'dst' respectively) of * the TCP header contained in 'packet'. 'packet' must be a valid TCP packet * with its l4 marker properly populated. */ void packet_set_tcp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst) { struct tcp_header *th = packet->l4; packet_set_port(&th->tcp_src, src, &th->tcp_csum); packet_set_port(&th->tcp_dst, dst, &th->tcp_csum); } /* Sets the UDP source and destination port ('src' and 'dst' respectively) of * the UDP header contained in 'packet'. 'packet' must be a valid UDP packet * with its l4 marker properly populated. */ void packet_set_udp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst) { struct udp_header *uh = packet->l4; if (uh->udp_csum) { packet_set_port(&uh->udp_src, src, &uh->udp_csum); packet_set_port(&uh->udp_dst, dst, &uh->udp_csum); if (!uh->udp_csum) { uh->udp_csum = htons(0xffff); } } else { uh->udp_src = src; uh->udp_dst = dst; } } /* Sets the SCTP source and destination port ('src' and 'dst' respectively) of * the SCTP header contained in 'packet'. 'packet' must be a valid SCTP packet * with its l4 marker properly populated. */ void packet_set_sctp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst) { struct sctp_header *sh = packet->l4; ovs_be32 old_csum, old_correct_csum, new_csum; uint16_t tp_len = packet->size - ((uint8_t*)sh - (uint8_t*)packet->data); old_csum = sh->sctp_csum; sh->sctp_csum = 0; old_correct_csum = crc32c(packet->l4, tp_len); sh->sctp_src = src; sh->sctp_dst = dst; new_csum = crc32c(packet->l4, tp_len); sh->sctp_csum = old_csum ^ old_correct_csum ^ new_csum; } /* If 'packet' is a TCP packet, returns the TCP flags. Otherwise, returns 0. * * 'flow' must be the flow corresponding to 'packet' and 'packet''s header * pointers must be properly initialized (e.g. with flow_extract()). */ uint8_t packet_get_tcp_flags(const struct ofpbuf *packet, const struct flow *flow) { if (dl_type_is_ip_any(flow->dl_type) && flow->nw_proto == IPPROTO_TCP && packet->l7) { const struct tcp_header *tcp = packet->l4; return TCP_FLAGS(tcp->tcp_ctl); } else { return 0; } } /* Appends a string representation of the TCP flags value 'tcp_flags' * (e.g. obtained via packet_get_tcp_flags() or TCP_FLAGS) to 's', in the * format used by tcpdump. */ void packet_format_tcp_flags(struct ds *s, uint8_t tcp_flags) { if (!tcp_flags) { ds_put_cstr(s, "none"); return; } if (tcp_flags & TCP_SYN) { ds_put_char(s, 'S'); } if (tcp_flags & TCP_FIN) { ds_put_char(s, 'F'); } if (tcp_flags & TCP_PSH) { ds_put_char(s, 'P'); } if (tcp_flags & TCP_RST) { ds_put_char(s, 'R'); } if (tcp_flags & TCP_URG) { ds_put_char(s, 'U'); } if (tcp_flags & TCP_ACK) { ds_put_char(s, '.'); } if (tcp_flags & 0x40) { ds_put_cstr(s, "[40]"); } if (tcp_flags & 0x80) { ds_put_cstr(s, "[80]"); } } openvswitch-2.0.1+git20140120/lib/packets.h000066400000000000000000000465171226605124000200400ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef PACKETS_H #define PACKETS_H 1 #include #include #include #include #include #include "compiler.h" #include "flow.h" #include "openvswitch/types.h" #include "random.h" #include "util.h" struct ofpbuf; struct ds; bool dpid_from_string(const char *s, uint64_t *dpidp); #define ETH_ADDR_LEN 6 static const uint8_t eth_addr_broadcast[ETH_ADDR_LEN] OVS_UNUSED = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static const uint8_t eth_addr_stp[ETH_ADDR_LEN] OVS_UNUSED = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x00 }; static const uint8_t eth_addr_lacp[ETH_ADDR_LEN] OVS_UNUSED = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }; static const uint8_t eth_addr_bfd[ETH_ADDR_LEN] OVS_UNUSED = { 0x00, 0x23, 0x20, 0x00, 0x00, 0x01 }; static inline bool eth_addr_is_broadcast(const uint8_t ea[6]) { return (ea[0] & ea[1] & ea[2] & ea[3] & ea[4] & ea[5]) == 0xff; } static inline bool eth_addr_is_multicast(const uint8_t ea[6]) { return ea[0] & 1; } static inline bool eth_addr_is_local(const uint8_t ea[6]) { /* Local if it is either a locally administered address or a Nicira random * address. */ return ea[0] & 2 || (ea[0] == 0x00 && ea[1] == 0x23 && ea[2] == 0x20 && ea[3] & 0x80); } static inline bool eth_addr_is_zero(const uint8_t ea[6]) { return !(ea[0] | ea[1] | ea[2] | ea[3] | ea[4] | ea[5]); } static inline int eth_mask_is_exact(const uint8_t ea[ETH_ADDR_LEN]) { return (ea[0] & ea[1] & ea[2] & ea[3] & ea[4] & ea[5]) == 0xff; } static inline int eth_addr_compare_3way(const uint8_t a[ETH_ADDR_LEN], const uint8_t b[ETH_ADDR_LEN]) { return memcmp(a, b, ETH_ADDR_LEN); } static inline bool eth_addr_equals(const uint8_t a[ETH_ADDR_LEN], const uint8_t b[ETH_ADDR_LEN]) { return !eth_addr_compare_3way(a, b); } static inline bool eth_addr_equal_except(const uint8_t a[ETH_ADDR_LEN], const uint8_t b[ETH_ADDR_LEN], const uint8_t mask[ETH_ADDR_LEN]) { return !(((a[0] ^ b[0]) & mask[0]) || ((a[1] ^ b[1]) & mask[1]) || ((a[2] ^ b[2]) & mask[2]) || ((a[3] ^ b[3]) & mask[3]) || ((a[4] ^ b[4]) & mask[4]) || ((a[5] ^ b[5]) & mask[5])); } static inline uint64_t eth_addr_to_uint64(const uint8_t ea[ETH_ADDR_LEN]) { return (((uint64_t) ea[0] << 40) | ((uint64_t) ea[1] << 32) | ((uint64_t) ea[2] << 24) | ((uint64_t) ea[3] << 16) | ((uint64_t) ea[4] << 8) | ea[5]); } static inline void eth_addr_from_uint64(uint64_t x, uint8_t ea[ETH_ADDR_LEN]) { ea[0] = x >> 40; ea[1] = x >> 32; ea[2] = x >> 24; ea[3] = x >> 16; ea[4] = x >> 8; ea[5] = x; } static inline void eth_addr_mark_random(uint8_t ea[ETH_ADDR_LEN]) { ea[0] &= ~1; /* Unicast. */ ea[0] |= 2; /* Private. */ } static inline void eth_addr_random(uint8_t ea[ETH_ADDR_LEN]) { random_bytes(ea, ETH_ADDR_LEN); eth_addr_mark_random(ea); } static inline void eth_addr_nicira_random(uint8_t ea[ETH_ADDR_LEN]) { eth_addr_random(ea); /* Set the OUI to the Nicira one. */ ea[0] = 0x00; ea[1] = 0x23; ea[2] = 0x20; /* Set the top bit to indicate random Nicira address. */ ea[3] |= 0x80; } bool eth_addr_is_reserved(const uint8_t ea[ETH_ADDR_LEN]); bool eth_addr_from_string(const char *, uint8_t ea[ETH_ADDR_LEN]); void compose_rarp(struct ofpbuf *, const uint8_t eth_src[ETH_ADDR_LEN]); void eth_push_vlan(struct ofpbuf *, ovs_be16 tci); void eth_pop_vlan(struct ofpbuf *); uint16_t eth_mpls_depth(const struct ofpbuf *packet); void set_ethertype(struct ofpbuf *packet, ovs_be16 eth_type); const char *eth_from_hex(const char *hex, struct ofpbuf **packetp); void eth_format_masked(const uint8_t eth[ETH_ADDR_LEN], const uint8_t mask[ETH_ADDR_LEN], struct ds *s); void eth_addr_bitand(const uint8_t src[ETH_ADDR_LEN], const uint8_t mask[ETH_ADDR_LEN], uint8_t dst[ETH_ADDR_LEN]); void set_mpls_lse(struct ofpbuf *, ovs_be32 label); void push_mpls(struct ofpbuf *packet, ovs_be16 ethtype, ovs_be32 lse); void pop_mpls(struct ofpbuf *, ovs_be16 ethtype); void set_mpls_lse_ttl(ovs_be32 *lse, uint8_t ttl); void set_mpls_lse_tc(ovs_be32 *lse, uint8_t tc); void set_mpls_lse_label(ovs_be32 *lse, ovs_be32 label); void set_mpls_lse_bos(ovs_be32 *lse, uint8_t bos); ovs_be32 set_mpls_lse_values(uint8_t ttl, uint8_t tc, uint8_t bos, ovs_be32 label); /* Example: * * uint8_t mac[ETH_ADDR_LEN]; * [...] * printf("The Ethernet address is "ETH_ADDR_FMT"\n", ETH_ADDR_ARGS(mac)); * */ #define ETH_ADDR_FMT \ "%02"PRIx8":%02"PRIx8":%02"PRIx8":%02"PRIx8":%02"PRIx8":%02"PRIx8 #define ETH_ADDR_ARGS(ea) \ (ea)[0], (ea)[1], (ea)[2], (ea)[3], (ea)[4], (ea)[5] /* Example: * * char *string = "1 00:11:22:33:44:55 2"; * uint8_t mac[ETH_ADDR_LEN]; * int a, b; * * if (sscanf(string, "%d"ETH_ADDR_SCAN_FMT"%d", * &a, ETH_ADDR_SCAN_ARGS(mac), &b) == 1 + ETH_ADDR_SCAN_COUNT + 1) { * ... * } */ #define ETH_ADDR_SCAN_FMT "%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8":%"SCNx8 #define ETH_ADDR_SCAN_ARGS(ea) \ &(ea)[0], &(ea)[1], &(ea)[2], &(ea)[3], &(ea)[4], &(ea)[5] #define ETH_ADDR_SCAN_COUNT 6 #define ETH_TYPE_IP 0x0800 #define ETH_TYPE_ARP 0x0806 #define ETH_TYPE_VLAN_8021Q 0x8100 #define ETH_TYPE_VLAN ETH_TYPE_VLAN_8021Q #define ETH_TYPE_VLAN_8021AD 0x88a8 #define ETH_TYPE_IPV6 0x86dd #define ETH_TYPE_LACP 0x8809 #define ETH_TYPE_RARP 0x8035 #define ETH_TYPE_MPLS 0x8847 #define ETH_TYPE_MPLS_MCAST 0x8848 static inline bool eth_type_mpls(ovs_be16 eth_type) { return eth_type == htons(ETH_TYPE_MPLS) || eth_type == htons(ETH_TYPE_MPLS_MCAST); } /* Minimum value for an Ethernet type. Values below this are IEEE 802.2 frame * lengths. */ #define ETH_TYPE_MIN 0x600 #define ETH_HEADER_LEN 14 #define ETH_PAYLOAD_MIN 46 #define ETH_PAYLOAD_MAX 1500 #define ETH_TOTAL_MIN (ETH_HEADER_LEN + ETH_PAYLOAD_MIN) #define ETH_TOTAL_MAX (ETH_HEADER_LEN + ETH_PAYLOAD_MAX) #define ETH_VLAN_TOTAL_MAX (ETH_HEADER_LEN + VLAN_HEADER_LEN + ETH_PAYLOAD_MAX) OVS_PACKED( struct eth_header { uint8_t eth_dst[ETH_ADDR_LEN]; uint8_t eth_src[ETH_ADDR_LEN]; ovs_be16 eth_type; }); BUILD_ASSERT_DECL(ETH_HEADER_LEN == sizeof(struct eth_header)); #define LLC_DSAP_SNAP 0xaa #define LLC_SSAP_SNAP 0xaa #define LLC_CNTL_SNAP 3 #define LLC_HEADER_LEN 3 OVS_PACKED( struct llc_header { uint8_t llc_dsap; uint8_t llc_ssap; uint8_t llc_cntl; }); BUILD_ASSERT_DECL(LLC_HEADER_LEN == sizeof(struct llc_header)); #define SNAP_ORG_ETHERNET "\0\0" /* The compiler adds a null byte, so sizeof(SNAP_ORG_ETHERNET) == 3. */ #define SNAP_HEADER_LEN 5 OVS_PACKED( struct snap_header { uint8_t snap_org[3]; ovs_be16 snap_type; }); BUILD_ASSERT_DECL(SNAP_HEADER_LEN == sizeof(struct snap_header)); #define LLC_SNAP_HEADER_LEN (LLC_HEADER_LEN + SNAP_HEADER_LEN) OVS_PACKED( struct llc_snap_header { struct llc_header llc; struct snap_header snap; }); BUILD_ASSERT_DECL(LLC_SNAP_HEADER_LEN == sizeof(struct llc_snap_header)); #define VLAN_VID_MASK 0x0fff #define VLAN_VID_SHIFT 0 #define VLAN_PCP_MASK 0xe000 #define VLAN_PCP_SHIFT 13 #define VLAN_CFI 0x1000 #define VLAN_CFI_SHIFT 12 /* Given the vlan_tci field from an 802.1Q header, in network byte order, * returns the VLAN ID in host byte order. */ static inline uint16_t vlan_tci_to_vid(ovs_be16 vlan_tci) { return (ntohs(vlan_tci) & VLAN_VID_MASK) >> VLAN_VID_SHIFT; } /* Given the vlan_tci field from an 802.1Q header, in network byte order, * returns the priority code point (PCP) in host byte order. */ static inline int vlan_tci_to_pcp(ovs_be16 vlan_tci) { return (ntohs(vlan_tci) & VLAN_PCP_MASK) >> VLAN_PCP_SHIFT; } /* Given the vlan_tci field from an 802.1Q header, in network byte order, * returns the Canonical Format Indicator (CFI). */ static inline int vlan_tci_to_cfi(ovs_be16 vlan_tci) { return (vlan_tci & htons(VLAN_CFI)) != 0; } #define VLAN_HEADER_LEN 4 struct vlan_header { ovs_be16 vlan_tci; /* Lowest 12 bits are VLAN ID. */ ovs_be16 vlan_next_type; }; BUILD_ASSERT_DECL(VLAN_HEADER_LEN == sizeof(struct vlan_header)); #define VLAN_ETH_HEADER_LEN (ETH_HEADER_LEN + VLAN_HEADER_LEN) OVS_PACKED( struct vlan_eth_header { uint8_t veth_dst[ETH_ADDR_LEN]; uint8_t veth_src[ETH_ADDR_LEN]; ovs_be16 veth_type; /* Always htons(ETH_TYPE_VLAN). */ ovs_be16 veth_tci; /* Lowest 12 bits are VLAN ID. */ ovs_be16 veth_next_type; }); BUILD_ASSERT_DECL(VLAN_ETH_HEADER_LEN == sizeof(struct vlan_eth_header)); /* MPLS related definitions */ #define MPLS_TTL_MASK 0x000000ff #define MPLS_TTL_SHIFT 0 #define MPLS_BOS_MASK 0x00000100 #define MPLS_BOS_SHIFT 8 #define MPLS_TC_MASK 0x00000e00 #define MPLS_TC_SHIFT 9 #define MPLS_LABEL_MASK 0xfffff000 #define MPLS_LABEL_SHIFT 12 #define MPLS_HLEN 4 struct mpls_hdr { ovs_be32 mpls_lse; }; BUILD_ASSERT_DECL(MPLS_HLEN == sizeof(struct mpls_hdr)); /* Given a mpls label stack entry in network byte order * return mpls label in host byte order */ static inline uint32_t mpls_lse_to_label(ovs_be32 mpls_lse) { return (ntohl(mpls_lse) & MPLS_LABEL_MASK) >> MPLS_LABEL_SHIFT; } /* Given a mpls label stack entry in network byte order * return mpls tc */ static inline uint8_t mpls_lse_to_tc(ovs_be32 mpls_lse) { return (ntohl(mpls_lse) & MPLS_TC_MASK) >> MPLS_TC_SHIFT; } /* Given a mpls label stack entry in network byte order * return mpls ttl */ static inline uint8_t mpls_lse_to_ttl(ovs_be32 mpls_lse) { return (ntohl(mpls_lse) & MPLS_TTL_MASK) >> MPLS_TTL_SHIFT; } /* Set TTL in mpls lse. */ static inline void flow_set_mpls_lse_ttl(ovs_be32 *mpls_lse, uint8_t ttl) { *mpls_lse &= ~htonl(MPLS_TTL_MASK); *mpls_lse |= htonl(ttl << MPLS_TTL_SHIFT); } /* Given a mpls label stack entry in network byte order * return mpls BoS bit */ static inline uint8_t mpls_lse_to_bos(ovs_be32 mpls_lse) { return (mpls_lse & htonl(MPLS_BOS_MASK)) != 0; } #define IP_FMT "%"PRIu32".%"PRIu32".%"PRIu32".%"PRIu32 #define IP_ARGS(ip) \ ntohl(ip) >> 24, \ (ntohl(ip) >> 16) & 0xff, \ (ntohl(ip) >> 8) & 0xff, \ ntohl(ip) & 0xff /* Example: * * char *string = "1 33.44.55.66 2"; * ovs_be32 ip; * int a, b; * * if (sscanf(string, "%d"IP_SCAN_FMT"%d", * &a, IP_SCAN_ARGS(&ip), &b) == 1 + IP_SCAN_COUNT + 1) { * ... * } */ #define IP_SCAN_FMT "%"SCNu8".%"SCNu8".%"SCNu8".%"SCNu8 #define IP_SCAN_ARGS(ip) \ ((void) (ovs_be32) *(ip), &((uint8_t *) ip)[0]), \ &((uint8_t *) ip)[1], \ &((uint8_t *) ip)[2], \ &((uint8_t *) ip)[3] #define IP_SCAN_COUNT 4 /* Returns true if 'netmask' is a CIDR netmask, that is, if it consists of N * high-order 1-bits and 32-N low-order 0-bits. */ static inline bool ip_is_cidr(ovs_be32 netmask) { uint32_t x = ~ntohl(netmask); return !(x & (x + 1)); } static inline bool ip_is_multicast(ovs_be32 ip) { return (ip & htonl(0xf0000000)) == htonl(0xe0000000); } int ip_count_cidr_bits(ovs_be32 netmask); void ip_format_masked(ovs_be32 ip, ovs_be32 mask, struct ds *); #define IP_VER(ip_ihl_ver) ((ip_ihl_ver) >> 4) #define IP_IHL(ip_ihl_ver) ((ip_ihl_ver) & 15) #define IP_IHL_VER(ihl, ver) (((ver) << 4) | (ihl)) #ifndef IPPROTO_SCTP #define IPPROTO_SCTP 132 #endif /* TOS fields. */ #define IP_ECN_NOT_ECT 0x0 #define IP_ECN_ECT_1 0x01 #define IP_ECN_ECT_0 0x02 #define IP_ECN_CE 0x03 #define IP_ECN_MASK 0x03 #define IP_DSCP_MASK 0xfc #define IP_VERSION 4 #define IP_DONT_FRAGMENT 0x4000 /* Don't fragment. */ #define IP_MORE_FRAGMENTS 0x2000 /* More fragments. */ #define IP_FRAG_OFF_MASK 0x1fff /* Fragment offset. */ #define IP_IS_FRAGMENT(ip_frag_off) \ ((ip_frag_off) & htons(IP_MORE_FRAGMENTS | IP_FRAG_OFF_MASK)) #define IP_HEADER_LEN 20 struct ip_header { uint8_t ip_ihl_ver; uint8_t ip_tos; ovs_be16 ip_tot_len; ovs_be16 ip_id; ovs_be16 ip_frag_off; uint8_t ip_ttl; uint8_t ip_proto; ovs_be16 ip_csum; ovs_16aligned_be32 ip_src; ovs_16aligned_be32 ip_dst; }; BUILD_ASSERT_DECL(IP_HEADER_LEN == sizeof(struct ip_header)); #define ICMP_HEADER_LEN 8 struct icmp_header { uint8_t icmp_type; uint8_t icmp_code; ovs_be16 icmp_csum; union { struct { ovs_be16 id; ovs_be16 seq; } echo; struct { ovs_be16 empty; ovs_be16 mtu; } frag; ovs_16aligned_be32 gateway; } icmp_fields; uint8_t icmp_data[0]; }; BUILD_ASSERT_DECL(ICMP_HEADER_LEN == sizeof(struct icmp_header)); #define SCTP_HEADER_LEN 12 struct sctp_header { ovs_be16 sctp_src; ovs_be16 sctp_dst; ovs_be32 sctp_vtag; ovs_be32 sctp_csum; }; BUILD_ASSERT_DECL(SCTP_HEADER_LEN == sizeof(struct sctp_header)); #define UDP_HEADER_LEN 8 struct udp_header { ovs_be16 udp_src; ovs_be16 udp_dst; ovs_be16 udp_len; ovs_be16 udp_csum; }; BUILD_ASSERT_DECL(UDP_HEADER_LEN == sizeof(struct udp_header)); #define TCP_FIN 0x01 #define TCP_SYN 0x02 #define TCP_RST 0x04 #define TCP_PSH 0x08 #define TCP_ACK 0x10 #define TCP_URG 0x20 #define TCP_CTL(flags, offset) (htons((flags) | ((offset) << 12))) #define TCP_FLAGS(tcp_ctl) (ntohs(tcp_ctl) & 0x003f) #define TCP_OFFSET(tcp_ctl) (ntohs(tcp_ctl) >> 12) #define TCP_HEADER_LEN 20 struct tcp_header { ovs_be16 tcp_src; ovs_be16 tcp_dst; ovs_16aligned_be32 tcp_seq; ovs_16aligned_be32 tcp_ack; ovs_be16 tcp_ctl; ovs_be16 tcp_winsz; ovs_be16 tcp_csum; ovs_be16 tcp_urg; }; BUILD_ASSERT_DECL(TCP_HEADER_LEN == sizeof(struct tcp_header)); #define ARP_HRD_ETHERNET 1 #define ARP_PRO_IP 0x0800 #define ARP_OP_REQUEST 1 #define ARP_OP_REPLY 2 #define ARP_OP_RARP 3 #define ARP_ETH_HEADER_LEN 28 struct arp_eth_header { /* Generic members. */ ovs_be16 ar_hrd; /* Hardware type. */ ovs_be16 ar_pro; /* Protocol type. */ uint8_t ar_hln; /* Hardware address length. */ uint8_t ar_pln; /* Protocol address length. */ ovs_be16 ar_op; /* Opcode. */ /* Ethernet+IPv4 specific members. */ uint8_t ar_sha[ETH_ADDR_LEN]; /* Sender hardware address. */ ovs_16aligned_be32 ar_spa; /* Sender protocol address. */ uint8_t ar_tha[ETH_ADDR_LEN]; /* Target hardware address. */ ovs_16aligned_be32 ar_tpa; /* Target protocol address. */ }; BUILD_ASSERT_DECL(ARP_ETH_HEADER_LEN == sizeof(struct arp_eth_header)); /* Like struct in6_addr, but whereas that struct requires 32-bit alignment on * most implementations, this one only requires 16-bit alignment. */ union ovs_16aligned_in6_addr { ovs_be16 be16[8]; ovs_16aligned_be32 be32[4]; }; /* Like struct in6_hdr, but whereas that struct requires 32-bit alignment, this * one only requires 16-bit alignment. */ struct ovs_16aligned_ip6_hdr { union { struct ovs_16aligned_ip6_hdrctl { ovs_16aligned_be32 ip6_un1_flow; ovs_be16 ip6_un1_plen; uint8_t ip6_un1_nxt; uint8_t ip6_un1_hlim; } ip6_un1; uint8_t ip6_un2_vfc; } ip6_ctlun; union ovs_16aligned_in6_addr ip6_src; union ovs_16aligned_in6_addr ip6_dst; }; /* Like struct in6_frag, but whereas that struct requires 32-bit alignment, * this one only requires 16-bit alignment. */ struct ovs_16aligned_ip6_frag { uint8_t ip6f_nxt; uint8_t ip6f_reserved; ovs_be16 ip6f_offlg; ovs_16aligned_be32 ip6f_ident; }; /* The IPv6 flow label is in the lower 20 bits of the first 32-bit word. */ #define IPV6_LABEL_MASK 0x000fffff /* Example: * * char *string = "1 ::1 2"; * char ipv6_s[IPV6_SCAN_LEN + 1]; * struct in6_addr ipv6; * * if (sscanf(string, "%d"IPV6_SCAN_FMT"%d", &a, ipv6_s, &b) == 3 * && inet_pton(AF_INET6, ipv6_s, &ipv6) == 1) { * ... * } */ #define IPV6_SCAN_FMT "%46[0123456789abcdefABCDEF:.]" #define IPV6_SCAN_LEN 46 extern const struct in6_addr in6addr_exact; #define IN6ADDR_EXACT_INIT { { { 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, \ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff } } } static inline bool ipv6_addr_equals(const struct in6_addr *a, const struct in6_addr *b) { #ifdef IN6_ARE_ADDR_EQUAL return IN6_ARE_ADDR_EQUAL(a, b); #else return !memcmp(a, b, sizeof(*a)); #endif } static inline bool ipv6_mask_is_any(const struct in6_addr *mask) { return ipv6_addr_equals(mask, &in6addr_any); } static inline bool ipv6_mask_is_exact(const struct in6_addr *mask) { return ipv6_addr_equals(mask, &in6addr_exact); } static inline bool dl_type_is_ip_any(ovs_be16 dl_type) { return dl_type == htons(ETH_TYPE_IP) || dl_type == htons(ETH_TYPE_IPV6); } static inline bool is_ip_any(const struct flow *flow) { return dl_type_is_ip_any(flow->dl_type); } void format_ipv6_addr(char *addr_str, const struct in6_addr *addr); void print_ipv6_addr(struct ds *string, const struct in6_addr *addr); void print_ipv6_masked(struct ds *string, const struct in6_addr *addr, const struct in6_addr *mask); struct in6_addr ipv6_addr_bitand(const struct in6_addr *src, const struct in6_addr *mask); struct in6_addr ipv6_create_mask(int mask); int ipv6_count_cidr_bits(const struct in6_addr *netmask); bool ipv6_is_cidr(const struct in6_addr *netmask); void *eth_compose(struct ofpbuf *, const uint8_t eth_dst[ETH_ADDR_LEN], const uint8_t eth_src[ETH_ADDR_LEN], uint16_t eth_type, size_t size); void *snap_compose(struct ofpbuf *, const uint8_t eth_dst[ETH_ADDR_LEN], const uint8_t eth_src[ETH_ADDR_LEN], unsigned int oui, uint16_t snap_type, size_t size); void packet_set_ipv4(struct ofpbuf *, ovs_be32 src, ovs_be32 dst, uint8_t tos, uint8_t ttl); void packet_set_ipv6(struct ofpbuf *, uint8_t proto, const ovs_be32 src[4], const ovs_be32 dst[4], uint8_t tc, ovs_be32 fl, uint8_t hlmit); void packet_set_tcp_port(struct ofpbuf *, ovs_be16 src, ovs_be16 dst); void packet_set_udp_port(struct ofpbuf *, ovs_be16 src, ovs_be16 dst); void packet_set_sctp_port(struct ofpbuf *, ovs_be16 src, ovs_be16 dst); uint8_t packet_get_tcp_flags(const struct ofpbuf *, const struct flow *); void packet_format_tcp_flags(struct ds *, uint8_t); #endif /* packets.h */ openvswitch-2.0.1+git20140120/lib/pcap-file.c000066400000000000000000000112271226605124000202270ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "pcap-file.h" #include #include #include #include "compiler.h" #include "ofpbuf.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(pcap); struct pcap_hdr { uint32_t magic_number; /* magic number */ uint16_t version_major; /* major version number */ uint16_t version_minor; /* minor version number */ int32_t thiszone; /* GMT to local correction */ uint32_t sigfigs; /* accuracy of timestamps */ uint32_t snaplen; /* max length of captured packets */ uint32_t network; /* data link type */ }; BUILD_ASSERT_DECL(sizeof(struct pcap_hdr) == 24); struct pcaprec_hdr { uint32_t ts_sec; /* timestamp seconds */ uint32_t ts_usec; /* timestamp microseconds */ uint32_t incl_len; /* number of octets of packet saved in file */ uint32_t orig_len; /* actual length of packet */ }; BUILD_ASSERT_DECL(sizeof(struct pcaprec_hdr) == 16); FILE * pcap_open(const char *file_name, const char *mode) { FILE *file; ovs_assert(!strcmp(mode, "rb") || !strcmp(mode, "wb")); file = fopen(file_name, mode); if (file == NULL) { VLOG_WARN("%s: failed to open pcap file for %s", file_name, mode[0] == 'r' ? "reading" : "writing"); return NULL; } if (mode[0] == 'r') { if (!pcap_read_header(file)) { fclose(file); return NULL; } } else { pcap_write_header(file); } return file; } int pcap_read_header(FILE *file) { struct pcap_hdr ph; if (fread(&ph, sizeof ph, 1, file) != 1) { int error = ferror(file) ? errno : EOF; VLOG_WARN("failed to read pcap header: %s", ovs_retval_to_string(error)); return error; } if (ph.magic_number != 0xa1b2c3d4 && ph.magic_number != 0xd4c3b2a1) { VLOG_WARN("bad magic 0x%08"PRIx32" reading pcap file " "(expected 0xa1b2c3d4 or 0xd4c3b2a1)", ph.magic_number); return EPROTO; } return 0; } void pcap_write_header(FILE *file) { /* The pcap reader is responsible for figuring out endianness based on the * magic number, so the lack of htonX calls here is intentional. */ struct pcap_hdr ph; ph.magic_number = 0xa1b2c3d4; ph.version_major = 2; ph.version_minor = 4; ph.thiszone = 0; ph.sigfigs = 0; ph.snaplen = 1518; ph.network = 1; /* Ethernet */ ignore(fwrite(&ph, sizeof ph, 1, file)); } int pcap_read(FILE *file, struct ofpbuf **bufp) { struct pcaprec_hdr prh; struct ofpbuf *buf; void *data; size_t len; *bufp = NULL; /* Read header. */ if (fread(&prh, sizeof prh, 1, file) != 1) { int error = ferror(file) ? errno : EOF; VLOG_WARN("failed to read pcap record header: %s", ovs_retval_to_string(error)); return error; } /* Calculate length. */ len = prh.incl_len; if (len > 0xffff) { uint32_t swapped_len = (((len & 0xff000000) >> 24) | ((len & 0x00ff0000) >> 8) | ((len & 0x0000ff00) << 8) | ((len & 0x000000ff) << 24)); if (swapped_len > 0xffff) { VLOG_WARN("bad packet length %zu or %"PRIu32" " "reading pcap file", len, swapped_len); return EPROTO; } len = swapped_len; } /* Read packet. */ buf = ofpbuf_new(len); data = ofpbuf_put_uninit(buf, len); if (fread(data, len, 1, file) != 1) { int error = ferror(file) ? errno : EOF; VLOG_WARN("failed to read pcap packet: %s", ovs_retval_to_string(error)); ofpbuf_delete(buf); return error; } *bufp = buf; return 0; } void pcap_write(FILE *file, struct ofpbuf *buf) { struct pcaprec_hdr prh; prh.ts_sec = 0; prh.ts_usec = 0; prh.incl_len = buf->size; prh.orig_len = buf->size; ignore(fwrite(&prh, sizeof prh, 1, file)); ignore(fwrite(buf->data, buf->size, 1, file)); } openvswitch-2.0.1+git20140120/lib/pcap-file.h000066400000000000000000000016161226605124000202350ustar00rootroot00000000000000/* * Copyright (c) 2009 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef PCAP_FILE_H #define PCAP_FILE_H 1 #include struct ofpbuf; FILE *pcap_open(const char *file_name, const char *mode); int pcap_read_header(FILE *); void pcap_write_header(FILE *); int pcap_read(FILE *, struct ofpbuf **); void pcap_write(FILE *, struct ofpbuf *); #endif /* pcap-file.h */ openvswitch-2.0.1+git20140120/lib/poll-loop.c000066400000000000000000000215061226605124000203050ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "poll-loop.h" #include #include #include #include #include #include "coverage.h" #include "dynamic-string.h" #include "fatal-signal.h" #include "list.h" #include "ovs-thread.h" #include "seq.h" #include "socket-util.h" #include "timeval.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(poll_loop); COVERAGE_DEFINE(poll_fd_wait); COVERAGE_DEFINE(poll_zero_timeout); struct poll_loop { /* All active poll waiters. */ struct pollfd *pollfds; /* Events to pass to poll(). */ const char **where; /* Where each pollfd was created. */ size_t n_waiters; /* Number of elems in 'where' and 'pollfds'. */ size_t allocated_waiters; /* Allocated elems in 'where' and 'pollfds'. */ /* Time at which to wake up the next call to poll_block(), LLONG_MIN to * wake up immediately, or LLONG_MAX to wait forever. */ long long int timeout_when; /* In msecs as returned by time_msec(). */ const char *timeout_where; /* Where 'timeout_when' was set. */ }; static struct poll_loop *poll_loop(void); /* Registers 'fd' as waiting for the specified 'events' (which should be POLLIN * or POLLOUT or POLLIN | POLLOUT). The following call to poll_block() will * wake up when 'fd' becomes ready for one or more of the requested events. * * The event registration is one-shot: only the following call to poll_block() * is affected. The event will need to be re-registered after poll_block() is * called if it is to persist. * * ('where' is used in debug logging. Commonly one would use poll_fd_wait() to * automatically provide the caller's source file and line number for * 'where'.) */ void poll_fd_wait_at(int fd, short int events, const char *where) { struct poll_loop *loop = poll_loop(); COVERAGE_INC(poll_fd_wait); if (loop->n_waiters >= loop->allocated_waiters) { loop->where = x2nrealloc(loop->where, &loop->allocated_waiters, sizeof *loop->where); loop->pollfds = xrealloc(loop->pollfds, (loop->allocated_waiters * sizeof *loop->pollfds)); } loop->where[loop->n_waiters] = where; loop->pollfds[loop->n_waiters].fd = fd; loop->pollfds[loop->n_waiters].events = events; loop->n_waiters++; } /* Causes the following call to poll_block() to block for no more than 'msec' * milliseconds. If 'msec' is nonpositive, the following call to poll_block() * will not block at all. * * The timer registration is one-shot: only the following call to poll_block() * is affected. The timer will need to be re-registered after poll_block() is * called if it is to persist. * * ('where' is used in debug logging. Commonly one would use poll_timer_wait() * to automatically provide the caller's source file and line number for * 'where'.) */ void poll_timer_wait_at(long long int msec, const char *where) { long long int now = time_msec(); long long int when; if (msec <= 0) { /* Wake up immediately. */ when = LLONG_MIN; } else if ((unsigned long long int) now + msec <= LLONG_MAX) { /* Normal case. */ when = now + msec; } else { /* now + msec would overflow. */ when = LLONG_MAX; } poll_timer_wait_until_at(when, where); } /* Causes the following call to poll_block() to wake up when the current time, * as returned by time_msec(), reaches 'when' or later. If 'when' is earlier * than the current time, the following call to poll_block() will not block at * all. * * The timer registration is one-shot: only the following call to poll_block() * is affected. The timer will need to be re-registered after poll_block() is * called if it is to persist. * * ('where' is used in debug logging. Commonly one would use * poll_timer_wait_until() to automatically provide the caller's source file * and line number for 'where'.) */ void poll_timer_wait_until_at(long long int when, const char *where) { struct poll_loop *loop = poll_loop(); if (when < loop->timeout_when) { loop->timeout_when = when; loop->timeout_where = where; } } /* Causes the following call to poll_block() to wake up immediately, without * blocking. * * ('where' is used in debug logging. Commonly one would use * poll_immediate_wake() to automatically provide the caller's source file and * line number for 'where'.) */ void poll_immediate_wake_at(const char *where) { poll_timer_wait_at(0, where); } /* Logs, if appropriate, that the poll loop was awakened by an event * registered at 'where' (typically a source file and line number). The other * arguments have two possible interpretations: * * - If 'pollfd' is nonnull then it should be the "struct pollfd" that caused * the wakeup. 'timeout' is ignored. * * - If 'pollfd' is NULL then 'timeout' is the number of milliseconds after * which the poll loop woke up. */ static void log_wakeup(const char *where, const struct pollfd *pollfd, int timeout) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); enum vlog_level level; int cpu_usage; struct ds s; cpu_usage = get_cpu_usage(); if (VLOG_IS_DBG_ENABLED()) { level = VLL_DBG; } else if (cpu_usage > 50 && !VLOG_DROP_INFO(&rl)) { level = VLL_INFO; } else { return; } ds_init(&s); ds_put_cstr(&s, "wakeup due to "); if (pollfd) { char *description = describe_fd(pollfd->fd); if (pollfd->revents & POLLIN) { ds_put_cstr(&s, "[POLLIN]"); } if (pollfd->revents & POLLOUT) { ds_put_cstr(&s, "[POLLOUT]"); } if (pollfd->revents & POLLERR) { ds_put_cstr(&s, "[POLLERR]"); } if (pollfd->revents & POLLHUP) { ds_put_cstr(&s, "[POLLHUP]"); } if (pollfd->revents & POLLNVAL) { ds_put_cstr(&s, "[POLLNVAL]"); } ds_put_format(&s, " on fd %d (%s)", pollfd->fd, description); free(description); } else { ds_put_format(&s, "%d-ms timeout", timeout); } if (where) { ds_put_format(&s, " at %s", where); } if (cpu_usage >= 0) { ds_put_format(&s, " (%d%% CPU usage)", cpu_usage); } VLOG(level, "%s", ds_cstr(&s)); ds_destroy(&s); } /* Blocks until one or more of the events registered with poll_fd_wait() * occurs, or until the minimum duration registered with poll_timer_wait() * elapses, or not at all if poll_immediate_wake() has been called. */ void poll_block(void) { struct poll_loop *loop = poll_loop(); int elapsed; int retval; /* Register fatal signal events before actually doing any real work for * poll_block. */ fatal_signal_wait(); if (loop->timeout_when == LLONG_MIN) { COVERAGE_INC(poll_zero_timeout); } retval = time_poll(loop->pollfds, loop->n_waiters, loop->timeout_when, &elapsed); if (retval < 0) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval)); } else if (!retval) { log_wakeup(loop->timeout_where, NULL, elapsed); } else if (get_cpu_usage() > 50 || VLOG_IS_DBG_ENABLED()) { size_t i; for (i = 0; i < loop->n_waiters; i++) { if (loop->pollfds[i].revents) { log_wakeup(loop->where[i], &loop->pollfds[i], 0); } } } loop->timeout_when = LLONG_MAX; loop->timeout_where = NULL; loop->n_waiters = 0; /* Handle any pending signals before doing anything else. */ fatal_signal_run(); seq_woke(); } static void free_poll_loop(void *loop_) { struct poll_loop *loop = loop_; free(loop->pollfds); free(loop->where); free(loop); } static struct poll_loop * poll_loop(void) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; static pthread_key_t key; struct poll_loop *loop; if (ovsthread_once_start(&once)) { xpthread_key_create(&key, free_poll_loop); ovsthread_once_done(&once); } loop = pthread_getspecific(key); if (!loop) { loop = xzalloc(sizeof *loop); xpthread_setspecific(key, loop); } return loop; } openvswitch-2.0.1+git20140120/lib/poll-loop.h000066400000000000000000000051371226605124000203140ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* High-level wrapper around the "poll" system call. * * The intended usage is for each thread's main loop to go about its business * servicing whatever events it needs to. Then, when it runs out of immediate * tasks, it calls each subordinate module's "wait" function, which in turn * calls one (or more) of the functions poll_fd_wait(), poll_immediate_wake(), * and poll_timer_wait() to register to be awakened when the appropriate event * occurs. Then the main loop calls poll_block(), which blocks until one of * the registered events happens. * * * Thread-safety * ============= * * The poll set is per-thread, so all functions in this module are thread-safe. */ #ifndef POLL_LOOP_H #define POLL_LOOP_H 1 #include #include "util.h" #ifdef __cplusplus extern "C" { #endif /* Schedule events to wake up the following poll_block(). * * The poll_loop logs the 'where' argument to each function at "debug" level * when an event causes a wakeup. Each of these ways to schedule an event has * a function and a macro wrapper. The macro version automatically supplies * the source code location of the caller. The function version allows the * caller to supply a location explicitly, which is useful if the caller's own * caller would be more useful in log output. See timer_wait_at() for an * example. */ void poll_fd_wait_at(int fd, short int events, const char *where); #define poll_fd_wait(fd, events) poll_fd_wait_at(fd, events, SOURCE_LOCATOR) void poll_timer_wait_at(long long int msec, const char *where); #define poll_timer_wait(msec) poll_timer_wait_at(msec, SOURCE_LOCATOR) void poll_timer_wait_until_at(long long int msec, const char *where); #define poll_timer_wait_until(msec) \ poll_timer_wait_until_at(msec, SOURCE_LOCATOR) void poll_immediate_wake_at(const char *where); #define poll_immediate_wake() poll_immediate_wake_at(SOURCE_LOCATOR) /* Wait until an event occurs. */ void poll_block(void); #ifdef __cplusplus } #endif #endif /* poll-loop.h */ openvswitch-2.0.1+git20140120/lib/process.c000066400000000000000000000225761226605124000200560ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "process.h" #include #include #include #include #include #include #include #include #include "coverage.h" #include "dynamic-string.h" #include "fatal-signal.h" #include "list.h" #include "ovs-thread.h" #include "poll-loop.h" #include "signals.h" #include "socket-util.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(process); COVERAGE_DEFINE(process_sigchld); COVERAGE_DEFINE(process_start); struct process { struct list node; char *name; pid_t pid; /* State. */ bool exited; int status; }; /* Pipe used to signal child termination. */ static int fds[2]; /* All processes. */ static struct list all_processes = LIST_INITIALIZER(&all_processes); static void sigchld_handler(int signr OVS_UNUSED); /* Initializes the process subsystem (if it is not already initialized). Calls * exit() if initialization fails. * * This function may not be called after creating any additional threads. * * Calling this function is optional; it will be called automatically by * process_start() if necessary. Calling it explicitly allows the client to * prevent the process from exiting at an unexpected time. */ void process_init(void) { static bool inited; struct sigaction sa; assert_single_threaded(); if (inited) { return; } inited = true; /* Create notification pipe. */ xpipe_nonblocking(fds); /* Set up child termination signal handler. */ memset(&sa, 0, sizeof sa); sa.sa_handler = sigchld_handler; sigemptyset(&sa.sa_mask); sa.sa_flags = SA_NOCLDSTOP | SA_RESTART; xsigaction(SIGCHLD, &sa, NULL); } char * process_escape_args(char **argv) { struct ds ds = DS_EMPTY_INITIALIZER; char **argp; for (argp = argv; *argp; argp++) { const char *arg = *argp; const char *p; if (argp != argv) { ds_put_char(&ds, ' '); } if (arg[strcspn(arg, " \t\r\n\v\\\'\"")]) { ds_put_char(&ds, '"'); for (p = arg; *p; p++) { if (*p == '\\' || *p == '\"') { ds_put_char(&ds, '\\'); } ds_put_char(&ds, *p); } ds_put_char(&ds, '"'); } else { ds_put_cstr(&ds, arg); } } return ds_cstr(&ds); } /* Prepare to start a process whose command-line arguments are given by the * null-terminated 'argv' array. Returns 0 if successful, otherwise a * positive errno value. */ static int process_prestart(char **argv) { char *binary; process_init(); /* Log the process to be started. */ if (VLOG_IS_DBG_ENABLED()) { char *args = process_escape_args(argv); VLOG_DBG("starting subprocess: %s", args); free(args); } /* execvp() will search PATH too, but the error in that case is more * obscure, since it is only reported post-fork. */ binary = process_search_path(argv[0]); if (!binary) { VLOG_ERR("%s not found in PATH", argv[0]); return ENOENT; } free(binary); return 0; } /* Creates and returns a new struct process with the specified 'name' and * 'pid'. */ static struct process * process_register(const char *name, pid_t pid) { struct process *p; const char *slash; p = xzalloc(sizeof *p); p->pid = pid; slash = strrchr(name, '/'); p->name = xstrdup(slash ? slash + 1 : name); p->exited = false; list_push_back(&all_processes, &p->node); return p; } /* Starts a subprocess with the arguments in the null-terminated argv[] array. * argv[0] is used as the name of the process. Searches the PATH environment * variable to find the program to execute. * * This function may not be called after creating any additional threads. * * All file descriptors are closed before executing the subprocess, except for * fds 0, 1, and 2. * * Returns 0 if successful, otherwise a positive errno value indicating the * error. If successful, '*pp' is assigned a new struct process that may be * used to query the process's status. On failure, '*pp' is set to NULL. */ int process_start(char **argv, struct process **pp) { pid_t pid; int error; assert_single_threaded(); *pp = NULL; COVERAGE_INC(process_start); error = process_prestart(argv); if (error) { return error; } pid = fork(); if (pid < 0) { VLOG_WARN("fork failed: %s", ovs_strerror(errno)); return errno; } else if (pid) { /* Running in parent process. */ *pp = process_register(argv[0], pid); return 0; } else { /* Running in child process. */ int fd_max = get_max_fds(); int fd; fatal_signal_fork(); for (fd = 3; fd < fd_max; fd++) { close(fd); } execvp(argv[0], argv); fprintf(stderr, "execvp(\"%s\") failed: %s\n", argv[0], ovs_strerror(errno)); _exit(1); } } /* Destroys process 'p'. */ void process_destroy(struct process *p) { if (p) { list_remove(&p->node); free(p->name); free(p); } } /* Sends signal 'signr' to process 'p'. Returns 0 if successful, otherwise a * positive errno value. */ int process_kill(const struct process *p, int signr) { return (p->exited ? ESRCH : !kill(p->pid, signr) ? 0 : errno); } /* Returns the pid of process 'p'. */ pid_t process_pid(const struct process *p) { return p->pid; } /* Returns the name of process 'p' (the name passed to process_start() with any * leading directories stripped). */ const char * process_name(const struct process *p) { return p->name; } /* Returns true if process 'p' has exited, false otherwise. */ bool process_exited(struct process *p) { return p->exited; } /* Returns process 'p''s exit status, as reported by waitpid(2). * process_status(p) may be called only after process_exited(p) has returned * true. */ int process_status(const struct process *p) { ovs_assert(p->exited); return p->status; } /* Given 'status', which is a process status in the form reported by waitpid(2) * and returned by process_status(), returns a string describing how the * process terminated. The caller is responsible for freeing the string when * it is no longer needed. */ char * process_status_msg(int status) { struct ds ds = DS_EMPTY_INITIALIZER; if (WIFEXITED(status)) { ds_put_format(&ds, "exit status %d", WEXITSTATUS(status)); } else if (WIFSIGNALED(status)) { char namebuf[SIGNAL_NAME_BUFSIZE]; ds_put_format(&ds, "killed (%s)", signal_name(WTERMSIG(status), namebuf, sizeof namebuf)); } else if (WIFSTOPPED(status)) { char namebuf[SIGNAL_NAME_BUFSIZE]; ds_put_format(&ds, "stopped (%s)", signal_name(WSTOPSIG(status), namebuf, sizeof namebuf)); } else { ds_put_format(&ds, "terminated abnormally (%x)", status); } if (WCOREDUMP(status)) { ds_put_cstr(&ds, ", core dumped"); } return ds_cstr(&ds); } /* Executes periodic maintenance activities required by the process module. */ void process_run(void) { char buf[_POSIX_PIPE_BUF]; if (!list_is_empty(&all_processes) && read(fds[0], buf, sizeof buf) > 0) { struct process *p; LIST_FOR_EACH (p, node, &all_processes) { if (!p->exited) { int retval, status; do { retval = waitpid(p->pid, &status, WNOHANG); } while (retval == -1 && errno == EINTR); if (retval == p->pid) { p->exited = true; p->status = status; } else if (retval < 0) { VLOG_WARN("waitpid: %s", ovs_strerror(errno)); p->exited = true; p->status = -1; } } } } } /* Causes the next call to poll_block() to wake up when process 'p' has * exited. */ void process_wait(struct process *p) { if (p->exited) { poll_immediate_wake(); } else { poll_fd_wait(fds[0], POLLIN); } } char * process_search_path(const char *name) { char *save_ptr = NULL; char *path, *dir; struct stat s; if (strchr(name, '/') || !getenv("PATH")) { return stat(name, &s) == 0 ? xstrdup(name) : NULL; } path = xstrdup(getenv("PATH")); for (dir = strtok_r(path, ":", &save_ptr); dir; dir = strtok_r(NULL, ":", &save_ptr)) { char *file = xasprintf("%s/%s", dir, name); if (stat(file, &s) == 0) { free(path); return file; } free(file); } free(path); return NULL; } static void sigchld_handler(int signr OVS_UNUSED) { ignore(write(fds[1], "", 1)); } openvswitch-2.0.1+git20140120/lib/process.h000066400000000000000000000031431226605124000200500ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef PROCESS_H #define PROCESS_H 1 #include #include struct process; /* Starting and monitoring subprocesses. * * process_init() and process_start() may safely be called only from a * single-threaded parent process. The parent process may safely create * additional threads afterward, as long as the remaining functions in this * group are called only from a single thread at any given time. */ void process_init(void); int process_start(char **argv, struct process **); void process_destroy(struct process *); int process_kill(const struct process *, int signr); pid_t process_pid(const struct process *); const char *process_name(const struct process *); bool process_exited(struct process *); int process_status(const struct process *); void process_run(void); void process_wait(struct process *); /* These functions are thread-safe. */ char *process_status_msg(int); char *process_escape_args(char **argv); char *process_search_path(const char *); #endif /* process.h */ openvswitch-2.0.1+git20140120/lib/random.c000066400000000000000000000054551226605124000176550ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "random.h" #include #include #include #include "entropy.h" #include "hash.h" #include "ovs-thread.h" #include "timeval.h" #include "util.h" /* This is the 32-bit PRNG recommended in G. Marsaglia, "Xorshift RNGs", * _Journal of Statistical Software_ 8:14 (July 2003). According to the paper, * it has a period of 2**32 - 1 and passes almost all tests of randomness. * * We use this PRNG instead of libc's rand() because rand() varies in quality * and because its maximum value also varies between 32767 and INT_MAX, whereas * we often want random numbers in the full range of uint32_t. * * This random number generator is intended for purposes that do not require * cryptographic-quality randomness. */ /* Current random state. */ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, seed, 0); static uint32_t random_next(void); void random_init(void) { uint32_t *seedp = seed_get(); while (!*seedp) { struct timeval tv; uint32_t entropy; pthread_t self; xgettimeofday(&tv); get_entropy_or_die(&entropy, 4); self = pthread_self(); *seedp = (tv.tv_sec ^ tv.tv_usec ^ entropy ^ hash_bytes(&self, sizeof self, 0)); } } void random_set_seed(uint32_t seed_) { ovs_assert(seed_); *seed_get() = seed_; } void random_bytes(void *p_, size_t n) { uint8_t *p = p_; random_init(); for (; n > 4; p += 4, n -= 4) { uint32_t x = random_next(); memcpy(p, &x, 4); } if (n) { uint32_t x = random_next(); memcpy(p, &x, n); } } uint8_t random_uint8(void) { return random_uint32(); } uint16_t random_uint16(void) { return random_uint32(); } uint32_t random_uint32(void) { random_init(); return random_next(); } uint64_t random_uint64(void) { uint64_t x; random_init(); x = random_next(); x |= (uint64_t) random_next() << 32; return x; } int random_range(int max) { return random_uint32() % max; } static uint32_t random_next(void) { uint32_t *seedp = seed_get_unsafe(); *seedp ^= *seedp << 13; *seedp ^= *seedp >> 17; *seedp ^= *seedp << 5; return *seedp; } openvswitch-2.0.1+git20140120/lib/random.h000066400000000000000000000016761226605124000176630ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef RANDOM_H #define RANDOM_H 1 #include #include void random_init(void); void random_set_seed(uint32_t); void random_bytes(void *, size_t); uint8_t random_uint8(void); uint16_t random_uint16(void); uint32_t random_uint32(void); uint64_t random_uint64(void); int random_range(int max); #endif /* random.h */ openvswitch-2.0.1+git20140120/lib/rconn.c000066400000000000000000001227371226605124000175170ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "rconn.h" #include #include #include #include #include "coverage.h" #include "ofp-msgs.h" #include "ofp-util.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "poll-loop.h" #include "sat-math.h" #include "timeval.h" #include "util.h" #include "vconn.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(rconn); COVERAGE_DEFINE(rconn_discarded); COVERAGE_DEFINE(rconn_overflow); COVERAGE_DEFINE(rconn_queued); COVERAGE_DEFINE(rconn_sent); #define STATES \ STATE(VOID, 1 << 0) \ STATE(BACKOFF, 1 << 1) \ STATE(CONNECTING, 1 << 2) \ STATE(ACTIVE, 1 << 3) \ STATE(IDLE, 1 << 4) enum state { #define STATE(NAME, VALUE) S_##NAME = VALUE, STATES #undef STATE }; static const char * state_name(enum state state) { switch (state) { #define STATE(NAME, VALUE) case S_##NAME: return #NAME; STATES #undef STATE } return "***ERROR***"; } /* A reliable connection to an OpenFlow switch or controller. * * See the large comment in rconn.h for more information. */ struct rconn { struct ovs_mutex mutex; enum state state; time_t state_entered; struct vconn *vconn; char *name; /* Human-readable descriptive name. */ char *target; /* vconn name, passed to vconn_open(). */ bool reliable; struct list txq; /* Contains "struct ofpbuf"s. */ int backoff; int max_backoff; time_t backoff_deadline; time_t last_connected; time_t last_disconnected; unsigned int packets_sent; unsigned int seqno; int last_error; /* In S_ACTIVE and S_IDLE, probably_admitted reports whether we believe * that the peer has made a (positive) admission control decision on our * connection. If we have not yet been (probably) admitted, then the * connection does not reset the timer used for deciding whether the switch * should go into fail-open mode. * * last_admitted reports the last time we believe such a positive admission * control decision was made. */ bool probably_admitted; time_t last_admitted; /* These values are simply for statistics reporting, not used directly by * anything internal to the rconn (or ofproto for that matter). */ unsigned int packets_received; unsigned int n_attempted_connections, n_successful_connections; time_t creation_time; unsigned long int total_time_connected; /* Throughout this file, "probe" is shorthand for "inactivity probe". When * no activity has been observed from the peer for a while, we send out an * echo request as an inactivity probe packet. We should receive back a * response. * * "Activity" is defined as either receiving an OpenFlow message from the * peer or successfully sending a message that had been in 'txq'. */ int probe_interval; /* Secs of inactivity before sending probe. */ time_t last_activity; /* Last time we saw some activity. */ /* When we create a vconn we obtain these values, to save them past the end * of the vconn's lifetime. Otherwise, in-band control will only allow * traffic when a vconn is actually open, but it is nice to allow ARP to * complete even between connection attempts, and it is also polite to * allow traffic from other switches to go through to the controller * whether or not we are connected. * * We don't cache the local port, because that changes from one connection * attempt to the next. */ ovs_be32 local_ip, remote_ip; ovs_be16 remote_port; uint8_t dscp; /* Messages sent or received are copied to the monitor connections. */ #define MAX_MONITORS 8 struct vconn *monitors[8]; size_t n_monitors; uint32_t allowed_versions; }; uint32_t rconn_get_allowed_versions(const struct rconn *rconn) { return rconn->allowed_versions; } static unsigned int elapsed_in_this_state(const struct rconn *rc) OVS_REQUIRES(rc->mutex); static unsigned int timeout(const struct rconn *rc) OVS_REQUIRES(rc->mutex); static bool timed_out(const struct rconn *rc) OVS_REQUIRES(rc->mutex); static void state_transition(struct rconn *rc, enum state) OVS_REQUIRES(rc->mutex); static void rconn_set_target__(struct rconn *rc, const char *target, const char *name) OVS_REQUIRES(rc->mutex); static int rconn_send__(struct rconn *rc, struct ofpbuf *, struct rconn_packet_counter *) OVS_REQUIRES(rc->mutex); static int try_send(struct rconn *rc) OVS_REQUIRES(rc->mutex); static void reconnect(struct rconn *rc) OVS_REQUIRES(rc->mutex); static void report_error(struct rconn *rc, int error) OVS_REQUIRES(rc->mutex); static void rconn_disconnect__(struct rconn *rc) OVS_REQUIRES(rc->mutex); static void disconnect(struct rconn *rc, int error) OVS_REQUIRES(rc->mutex); static void flush_queue(struct rconn *rc) OVS_REQUIRES(rc->mutex); static void close_monitor(struct rconn *rc, size_t idx, int retval) OVS_REQUIRES(rc->mutex); static void copy_to_monitor(struct rconn *, const struct ofpbuf *); static bool is_connected_state(enum state); static bool is_admitted_msg(const struct ofpbuf *); static bool rconn_logging_connection_attempts__(const struct rconn *rc) OVS_REQUIRES(rc->mutex); static int rconn_get_version__(const struct rconn *rconn) OVS_REQUIRES(rconn->mutex); /* The following prototypes duplicate those in rconn.h, but there we weren't * able to add the OVS_EXCLUDED annotations because the definition of struct * rconn was not visible. */ void rconn_set_max_backoff(struct rconn *rc, int max_backoff) OVS_EXCLUDED(rc->mutex); void rconn_connect(struct rconn *rc, const char *target, const char *name) OVS_EXCLUDED(rc->mutex); void rconn_connect_unreliably(struct rconn *rc, struct vconn *vconn, const char *name) OVS_EXCLUDED(rc->mutex); void rconn_reconnect(struct rconn *rc) OVS_EXCLUDED(rc->mutex); void rconn_disconnect(struct rconn *rc) OVS_EXCLUDED(rc->mutex); void rconn_run(struct rconn *rc) OVS_EXCLUDED(rc->mutex); void rconn_run_wait(struct rconn *rc) OVS_EXCLUDED(rc->mutex); struct ofpbuf *rconn_recv(struct rconn *rc) OVS_EXCLUDED(rc->mutex); void rconn_recv_wait(struct rconn *rc) OVS_EXCLUDED(rc->mutex); int rconn_send(struct rconn *rc, struct ofpbuf *b, struct rconn_packet_counter *counter) OVS_EXCLUDED(rc->mutex); int rconn_send_with_limit(struct rconn *rc, struct ofpbuf *b, struct rconn_packet_counter *counter, int queue_limit) OVS_EXCLUDED(rc->mutex); void rconn_add_monitor(struct rconn *rc, struct vconn *vconn) OVS_EXCLUDED(rc->mutex); void rconn_set_name(struct rconn *rc, const char *new_name) OVS_EXCLUDED(rc->mutex); bool rconn_is_admitted(const struct rconn *rconn) OVS_EXCLUDED(rconn->mutex); int rconn_failure_duration(const struct rconn *rconn) OVS_EXCLUDED(rconn->mutex); ovs_be16 rconn_get_local_port(const struct rconn *rconn) OVS_EXCLUDED(rconn->mutex); int rconn_get_version(const struct rconn *rconn) OVS_EXCLUDED(rconn->mutex); unsigned int rconn_count_txqlen(const struct rconn *rc) OVS_EXCLUDED(rc->mutex); /* Creates and returns a new rconn. * * 'probe_interval' is a number of seconds. If the interval passes once * without an OpenFlow message being received from the peer, the rconn sends * out an "echo request" message. If the interval passes again without a * message being received, the rconn disconnects and re-connects to the peer. * Setting 'probe_interval' to 0 disables this behavior. * * 'max_backoff' is the maximum number of seconds between attempts to connect * to the peer. The actual interval starts at 1 second and doubles on each * failure until it reaches 'max_backoff'. If 0 is specified, the default of * 8 seconds is used. * * The new rconn is initially unconnected. Use rconn_connect() or * rconn_connect_unreliably() to connect it. * * Connections made by the rconn will automatically negotiate an OpenFlow * protocol version acceptable to both peers on the connection. The version * negotiated will be one of those in the 'allowed_versions' bitmap: version * 'x' is allowed if allowed_versions & (1 << x) is nonzero. (The underlying * vconn will treat an 'allowed_versions' of 0 as OFPUTIL_DEFAULT_VERSIONS.) */ struct rconn * rconn_create(int probe_interval, int max_backoff, uint8_t dscp, uint32_t allowed_versions) { struct rconn *rc = xzalloc(sizeof *rc); ovs_mutex_init(&rc->mutex); rc->state = S_VOID; rc->state_entered = time_now(); rc->vconn = NULL; rc->name = xstrdup("void"); rc->target = xstrdup("void"); rc->reliable = false; list_init(&rc->txq); rc->backoff = 0; rc->max_backoff = max_backoff ? max_backoff : 8; rc->backoff_deadline = TIME_MIN; rc->last_connected = TIME_MIN; rc->last_disconnected = TIME_MIN; rc->seqno = 0; rc->packets_sent = 0; rc->probably_admitted = false; rc->last_admitted = time_now(); rc->packets_received = 0; rc->n_attempted_connections = 0; rc->n_successful_connections = 0; rc->creation_time = time_now(); rc->total_time_connected = 0; rc->last_activity = time_now(); rconn_set_probe_interval(rc, probe_interval); rconn_set_dscp(rc, dscp); rc->n_monitors = 0; rc->allowed_versions = allowed_versions; return rc; } void rconn_set_max_backoff(struct rconn *rc, int max_backoff) OVS_EXCLUDED(rc->mutex) { ovs_mutex_lock(&rc->mutex); rc->max_backoff = MAX(1, max_backoff); if (rc->state == S_BACKOFF && rc->backoff > max_backoff) { rc->backoff = max_backoff; if (rc->backoff_deadline > time_now() + max_backoff) { rc->backoff_deadline = time_now() + max_backoff; } } ovs_mutex_unlock(&rc->mutex); } int rconn_get_max_backoff(const struct rconn *rc) { return rc->max_backoff; } void rconn_set_dscp(struct rconn *rc, uint8_t dscp) { rc->dscp = dscp; } uint8_t rconn_get_dscp(const struct rconn *rc) { return rc->dscp; } void rconn_set_probe_interval(struct rconn *rc, int probe_interval) { rc->probe_interval = probe_interval ? MAX(5, probe_interval) : 0; } int rconn_get_probe_interval(const struct rconn *rc) { return rc->probe_interval; } /* Drops any existing connection on 'rc', then sets up 'rc' to connect to * 'target' and reconnect as needed. 'target' should be a remote OpenFlow * target in a form acceptable to vconn_open(). * * If 'name' is nonnull, then it is used in log messages in place of 'target'. * It should presumably give more information to a human reader than 'target', * but it need not be acceptable to vconn_open(). */ void rconn_connect(struct rconn *rc, const char *target, const char *name) OVS_EXCLUDED(rc->mutex) { ovs_mutex_lock(&rc->mutex); rconn_disconnect__(rc); rconn_set_target__(rc, target, name); rc->reliable = true; reconnect(rc); ovs_mutex_unlock(&rc->mutex); } /* Drops any existing connection on 'rc', then configures 'rc' to use * 'vconn'. If the connection on 'vconn' drops, 'rc' will not reconnect on it * own. * * By default, the target obtained from vconn_get_name(vconn) is used in log * messages. If 'name' is nonnull, then it is used instead. It should * presumably give more information to a human reader than the target, but it * need not be acceptable to vconn_open(). */ void rconn_connect_unreliably(struct rconn *rc, struct vconn *vconn, const char *name) OVS_EXCLUDED(rc->mutex) { ovs_assert(vconn != NULL); ovs_mutex_lock(&rc->mutex); rconn_disconnect__(rc); rconn_set_target__(rc, vconn_get_name(vconn), name); rc->reliable = false; rc->vconn = vconn; rc->last_connected = time_now(); state_transition(rc, S_ACTIVE); ovs_mutex_unlock(&rc->mutex); } /* If 'rc' is connected, forces it to drop the connection and reconnect. */ void rconn_reconnect(struct rconn *rc) OVS_EXCLUDED(rc->mutex) { ovs_mutex_lock(&rc->mutex); if (rc->state & (S_ACTIVE | S_IDLE)) { VLOG_INFO("%s: disconnecting", rc->name); disconnect(rc, 0); } ovs_mutex_unlock(&rc->mutex); } static void rconn_disconnect__(struct rconn *rc) OVS_REQUIRES(rc->mutex) { if (rc->state != S_VOID) { if (rc->vconn) { vconn_close(rc->vconn); rc->vconn = NULL; } rconn_set_target__(rc, "void", NULL); rc->reliable = false; rc->backoff = 0; rc->backoff_deadline = TIME_MIN; state_transition(rc, S_VOID); } } void rconn_disconnect(struct rconn *rc) OVS_EXCLUDED(rc->mutex) { ovs_mutex_lock(&rc->mutex); rconn_disconnect__(rc); ovs_mutex_unlock(&rc->mutex); } /* Disconnects 'rc' and frees the underlying storage. */ void rconn_destroy(struct rconn *rc) { if (rc) { size_t i; ovs_mutex_lock(&rc->mutex); free(rc->name); free(rc->target); vconn_close(rc->vconn); flush_queue(rc); ofpbuf_list_delete(&rc->txq); for (i = 0; i < rc->n_monitors; i++) { vconn_close(rc->monitors[i]); } ovs_mutex_unlock(&rc->mutex); ovs_mutex_destroy(&rc->mutex); free(rc); } } static unsigned int timeout_VOID(const struct rconn *rc OVS_UNUSED) OVS_REQUIRES(rc->mutex) { return UINT_MAX; } static void run_VOID(struct rconn *rc OVS_UNUSED) OVS_REQUIRES(rc->mutex) { /* Nothing to do. */ } static void reconnect(struct rconn *rc) OVS_REQUIRES(rc->mutex) { int retval; if (rconn_logging_connection_attempts__(rc)) { VLOG_INFO("%s: connecting...", rc->name); } rc->n_attempted_connections++; retval = vconn_open(rc->target, rc->allowed_versions, rc->dscp, &rc->vconn); if (!retval) { rc->remote_ip = vconn_get_remote_ip(rc->vconn); rc->local_ip = vconn_get_local_ip(rc->vconn); rc->remote_port = vconn_get_remote_port(rc->vconn); rc->backoff_deadline = time_now() + rc->backoff; state_transition(rc, S_CONNECTING); } else { VLOG_WARN("%s: connection failed (%s)", rc->name, ovs_strerror(retval)); rc->backoff_deadline = TIME_MAX; /* Prevent resetting backoff. */ disconnect(rc, retval); } } static unsigned int timeout_BACKOFF(const struct rconn *rc) OVS_REQUIRES(rc->mutex) { return rc->backoff; } static void run_BACKOFF(struct rconn *rc) OVS_REQUIRES(rc->mutex) { if (timed_out(rc)) { reconnect(rc); } } static unsigned int timeout_CONNECTING(const struct rconn *rc) OVS_REQUIRES(rc->mutex) { return MAX(1, rc->backoff); } static void run_CONNECTING(struct rconn *rc) OVS_REQUIRES(rc->mutex) { int retval = vconn_connect(rc->vconn); if (!retval) { VLOG_INFO("%s: connected", rc->name); rc->n_successful_connections++; state_transition(rc, S_ACTIVE); rc->last_connected = rc->state_entered; } else if (retval != EAGAIN) { if (rconn_logging_connection_attempts__(rc)) { VLOG_INFO("%s: connection failed (%s)", rc->name, ovs_strerror(retval)); } disconnect(rc, retval); } else if (timed_out(rc)) { if (rconn_logging_connection_attempts__(rc)) { VLOG_INFO("%s: connection timed out", rc->name); } rc->backoff_deadline = TIME_MAX; /* Prevent resetting backoff. */ disconnect(rc, ETIMEDOUT); } } static void do_tx_work(struct rconn *rc) OVS_REQUIRES(rc->mutex) { if (list_is_empty(&rc->txq)) { return; } while (!list_is_empty(&rc->txq)) { int error = try_send(rc); if (error) { break; } rc->last_activity = time_now(); } if (list_is_empty(&rc->txq)) { poll_immediate_wake(); } } static unsigned int timeout_ACTIVE(const struct rconn *rc) OVS_REQUIRES(rc->mutex) { if (rc->probe_interval) { unsigned int base = MAX(rc->last_activity, rc->state_entered); unsigned int arg = base + rc->probe_interval - rc->state_entered; return arg; } return UINT_MAX; } static void run_ACTIVE(struct rconn *rc) OVS_REQUIRES(rc->mutex) { if (timed_out(rc)) { unsigned int base = MAX(rc->last_activity, rc->state_entered); int version; VLOG_DBG("%s: idle %u seconds, sending inactivity probe", rc->name, (unsigned int) (time_now() - base)); version = rconn_get_version__(rc); ovs_assert(version >= 0 && version <= 0xff); /* Ordering is important here: rconn_send() can transition to BACKOFF, * and we don't want to transition back to IDLE if so, because then we * can end up queuing a packet with vconn == NULL and then *boom*. */ state_transition(rc, S_IDLE); rconn_send__(rc, make_echo_request(version), NULL); return; } do_tx_work(rc); } static unsigned int timeout_IDLE(const struct rconn *rc) OVS_REQUIRES(rc->mutex) { return rc->probe_interval; } static void run_IDLE(struct rconn *rc) OVS_REQUIRES(rc->mutex) { if (timed_out(rc)) { VLOG_ERR("%s: no response to inactivity probe after %u " "seconds, disconnecting", rc->name, elapsed_in_this_state(rc)); disconnect(rc, ETIMEDOUT); } else { do_tx_work(rc); } } /* Performs whatever activities are necessary to maintain 'rc': if 'rc' is * disconnected, attempts to (re)connect, backing off as necessary; if 'rc' is * connected, attempts to send packets in the send queue, if any. */ void rconn_run(struct rconn *rc) OVS_EXCLUDED(rc->mutex) { int old_state; size_t i; ovs_mutex_lock(&rc->mutex); if (rc->vconn) { vconn_run(rc->vconn); } for (i = 0; i < rc->n_monitors; ) { struct ofpbuf *msg; int retval; vconn_run(rc->monitors[i]); /* Drain any stray message that came in on the monitor connection. */ retval = vconn_recv(rc->monitors[i], &msg); if (!retval) { ofpbuf_delete(msg); } else if (retval != EAGAIN) { close_monitor(rc, i, retval); continue; } i++; } do { old_state = rc->state; switch (rc->state) { #define STATE(NAME, VALUE) case S_##NAME: run_##NAME(rc); break; STATES #undef STATE default: NOT_REACHED(); } } while (rc->state != old_state); ovs_mutex_unlock(&rc->mutex); } /* Causes the next call to poll_block() to wake up when rconn_run() should be * called on 'rc'. */ void rconn_run_wait(struct rconn *rc) OVS_EXCLUDED(rc->mutex) { unsigned int timeo; size_t i; ovs_mutex_lock(&rc->mutex); if (rc->vconn) { vconn_run_wait(rc->vconn); if ((rc->state & (S_ACTIVE | S_IDLE)) && !list_is_empty(&rc->txq)) { vconn_wait(rc->vconn, WAIT_SEND); } } for (i = 0; i < rc->n_monitors; i++) { vconn_run_wait(rc->monitors[i]); vconn_recv_wait(rc->monitors[i]); } timeo = timeout(rc); if (timeo != UINT_MAX) { long long int expires = sat_add(rc->state_entered, timeo); poll_timer_wait_until(expires * 1000); } ovs_mutex_unlock(&rc->mutex); } /* Attempts to receive a packet from 'rc'. If successful, returns the packet; * otherwise, returns a null pointer. The caller is responsible for freeing * the packet (with ofpbuf_delete()). */ struct ofpbuf * rconn_recv(struct rconn *rc) OVS_EXCLUDED(rc->mutex) { struct ofpbuf *buffer = NULL; ovs_mutex_lock(&rc->mutex); if (rc->state & (S_ACTIVE | S_IDLE)) { int error = vconn_recv(rc->vconn, &buffer); if (!error) { copy_to_monitor(rc, buffer); if (rc->probably_admitted || is_admitted_msg(buffer) || time_now() - rc->last_connected >= 30) { rc->probably_admitted = true; rc->last_admitted = time_now(); } rc->last_activity = time_now(); rc->packets_received++; if (rc->state == S_IDLE) { state_transition(rc, S_ACTIVE); } } else if (error != EAGAIN) { report_error(rc, error); disconnect(rc, error); } } ovs_mutex_unlock(&rc->mutex); return buffer; } /* Causes the next call to poll_block() to wake up when a packet may be ready * to be received by vconn_recv() on 'rc'. */ void rconn_recv_wait(struct rconn *rc) OVS_EXCLUDED(rc->mutex) { ovs_mutex_lock(&rc->mutex); if (rc->vconn) { vconn_wait(rc->vconn, WAIT_RECV); } ovs_mutex_unlock(&rc->mutex); } static int rconn_send__(struct rconn *rc, struct ofpbuf *b, struct rconn_packet_counter *counter) OVS_REQUIRES(rc->mutex) { if (rconn_is_connected(rc)) { COVERAGE_INC(rconn_queued); copy_to_monitor(rc, b); b->private_p = counter; if (counter) { rconn_packet_counter_inc(counter, b->size); } list_push_back(&rc->txq, &b->list_node); /* If the queue was empty before we added 'b', try to send some * packets. (But if the queue had packets in it, it's because the * vconn is backlogged and there's no point in stuffing more into it * now. We'll get back to that in rconn_run().) */ if (rc->txq.next == &b->list_node) { try_send(rc); } return 0; } else { ofpbuf_delete(b); return ENOTCONN; } } /* Sends 'b' on 'rc'. Returns 0 if successful, or ENOTCONN if 'rc' is not * currently connected. Takes ownership of 'b'. * * If 'counter' is non-null, then 'counter' will be incremented while the * packet is in flight, then decremented when it has been sent (or discarded * due to disconnection). Because 'b' may be sent (or discarded) before this * function returns, the caller may not be able to observe any change in * 'counter'. * * There is no rconn_send_wait() function: an rconn has a send queue that it * takes care of sending if you call rconn_run(), which will have the side * effect of waking up poll_block(). */ int rconn_send(struct rconn *rc, struct ofpbuf *b, struct rconn_packet_counter *counter) OVS_EXCLUDED(rc->mutex) { int error; ovs_mutex_lock(&rc->mutex); error = rconn_send__(rc, b, counter); ovs_mutex_unlock(&rc->mutex); return error; } /* Sends 'b' on 'rc'. Increments 'counter' while the packet is in flight; it * will be decremented when it has been sent (or discarded due to * disconnection). Returns 0 if successful, EAGAIN if 'counter->n' is already * at least as large as 'queue_limit', or ENOTCONN if 'rc' is not currently * connected. Regardless of return value, 'b' is destroyed. * * Because 'b' may be sent (or discarded) before this function returns, the * caller may not be able to observe any change in 'counter'. * * There is no rconn_send_wait() function: an rconn has a send queue that it * takes care of sending if you call rconn_run(), which will have the side * effect of waking up poll_block(). */ int rconn_send_with_limit(struct rconn *rc, struct ofpbuf *b, struct rconn_packet_counter *counter, int queue_limit) OVS_EXCLUDED(rc->mutex) { int error; ovs_mutex_lock(&rc->mutex); if (rconn_packet_counter_n_packets(counter) < queue_limit) { error = rconn_send__(rc, b, counter); } else { COVERAGE_INC(rconn_overflow); ofpbuf_delete(b); error = EAGAIN; } ovs_mutex_unlock(&rc->mutex); return error; } /* Returns the total number of packets successfully sent on the underlying * vconn. A packet is not counted as sent while it is still queued in the * rconn, only when it has been successfuly passed to the vconn. */ unsigned int rconn_packets_sent(const struct rconn *rc) { return rc->packets_sent; } /* Adds 'vconn' to 'rc' as a monitoring connection, to which all messages sent * and received on 'rconn' will be copied. 'rc' takes ownership of 'vconn'. */ void rconn_add_monitor(struct rconn *rc, struct vconn *vconn) OVS_EXCLUDED(rc->mutex) { ovs_mutex_lock(&rc->mutex); if (rc->n_monitors < ARRAY_SIZE(rc->monitors)) { VLOG_INFO("new monitor connection from %s", vconn_get_name(vconn)); rc->monitors[rc->n_monitors++] = vconn; } else { VLOG_DBG("too many monitor connections, discarding %s", vconn_get_name(vconn)); vconn_close(vconn); } ovs_mutex_unlock(&rc->mutex); } /* Returns 'rc''s name. This is a name for human consumption, appropriate for * use in log messages. It is not necessarily a name that may be passed * directly to, e.g., vconn_open(). */ const char * rconn_get_name(const struct rconn *rc) { return rc->name; } /* Sets 'rc''s name to 'new_name'. */ void rconn_set_name(struct rconn *rc, const char *new_name) OVS_EXCLUDED(rc->mutex) { ovs_mutex_lock(&rc->mutex); free(rc->name); rc->name = xstrdup(new_name); ovs_mutex_unlock(&rc->mutex); } /* Returns 'rc''s target. This is intended to be a string that may be passed * directly to, e.g., vconn_open(). */ const char * rconn_get_target(const struct rconn *rc) { return rc->target; } /* Returns true if 'rconn' is connected or in the process of reconnecting, * false if 'rconn' is disconnected and will not reconnect on its own. */ bool rconn_is_alive(const struct rconn *rconn) { return rconn->state != S_VOID; } /* Returns true if 'rconn' is connected, false otherwise. */ bool rconn_is_connected(const struct rconn *rconn) { return is_connected_state(rconn->state); } static bool rconn_is_admitted__(const struct rconn *rconn) OVS_REQUIRES(rconn->mutex) { return (rconn_is_connected(rconn) && rconn->last_admitted >= rconn->last_connected); } /* Returns true if 'rconn' is connected and thought to have been accepted by * the peer's admission-control policy. */ bool rconn_is_admitted(const struct rconn *rconn) OVS_EXCLUDED(rconn->mutex) { bool admitted; ovs_mutex_lock(&rconn->mutex); admitted = rconn_is_admitted__(rconn); ovs_mutex_unlock(&rconn->mutex); return admitted; } /* Returns 0 if 'rconn' is currently connected and considered to have been * accepted by the peer's admission-control policy, otherwise the number of * seconds since 'rconn' was last in such a state. */ int rconn_failure_duration(const struct rconn *rconn) OVS_EXCLUDED(rconn->mutex) { int duration; ovs_mutex_lock(&rconn->mutex); duration = (rconn_is_admitted__(rconn) ? 0 : time_now() - rconn->last_admitted); ovs_mutex_unlock(&rconn->mutex); return duration; } /* Returns the IP address of the peer, or 0 if the peer's IP address is not * known. */ ovs_be32 rconn_get_remote_ip(const struct rconn *rconn) { return rconn->remote_ip; } /* Returns the transport port of the peer, or 0 if the peer's port is not * known. */ ovs_be16 rconn_get_remote_port(const struct rconn *rconn) { return rconn->remote_port; } /* Returns the IP address used to connect to the peer, or 0 if the * connection is not an IP-based protocol or if its IP address is not * known. */ ovs_be32 rconn_get_local_ip(const struct rconn *rconn) { return rconn->local_ip; } /* Returns the transport port used to connect to the peer, or 0 if the * connection does not contain a port or if the port is not known. */ ovs_be16 rconn_get_local_port(const struct rconn *rconn) OVS_EXCLUDED(rconn->mutex) { ovs_be16 port; ovs_mutex_lock(&rconn->mutex); port = rconn->vconn ? vconn_get_local_port(rconn->vconn) : 0; ovs_mutex_unlock(&rconn->mutex); return port; } static int rconn_get_version__(const struct rconn *rconn) OVS_REQUIRES(rconn->mutex) { return rconn->vconn ? vconn_get_version(rconn->vconn) : -1; } /* Returns the OpenFlow version negotiated with the peer, or -1 if there is * currently no connection or if version negotiation is not yet complete. */ int rconn_get_version(const struct rconn *rconn) OVS_EXCLUDED(rconn->mutex) { int version; ovs_mutex_lock(&rconn->mutex); version = rconn_get_version__(rconn); ovs_mutex_unlock(&rconn->mutex); return version; } /* Returns the total number of packets successfully received by the underlying * vconn. */ unsigned int rconn_packets_received(const struct rconn *rc) { return rc->packets_received; } /* Returns a string representing the internal state of 'rc'. The caller must * not modify or free the string. */ const char * rconn_get_state(const struct rconn *rc) { return state_name(rc->state); } /* Returns the time at which the last successful connection was made by * 'rc'. Returns TIME_MIN if never connected. */ time_t rconn_get_last_connection(const struct rconn *rc) { return rc->last_connected; } /* Returns the time at which 'rc' was last disconnected. Returns TIME_MIN * if never disconnected. */ time_t rconn_get_last_disconnect(const struct rconn *rc) { return rc->last_disconnected; } /* Returns 'rc''s current connection sequence number, a number that changes * every time that 'rconn' connects or disconnects. */ unsigned int rconn_get_connection_seqno(const struct rconn *rc) { return rc->seqno; } /* Returns a value that explains why 'rc' last disconnected: * * - 0 means that the last disconnection was caused by a call to * rconn_disconnect(), or that 'rc' is new and has not yet completed its * initial connection or connection attempt. * * - EOF means that the connection was closed in the normal way by the peer. * * - A positive integer is an errno value that represents the error. */ int rconn_get_last_error(const struct rconn *rc) { return rc->last_error; } /* Returns the number of messages queued for transmission on 'rc'. */ unsigned int rconn_count_txqlen(const struct rconn *rc) OVS_EXCLUDED(rc->mutex) { unsigned int len; ovs_mutex_lock(&rc->mutex); len = list_size(&rc->txq); ovs_mutex_unlock(&rc->mutex); return len; } struct rconn_packet_counter * rconn_packet_counter_create(void) { struct rconn_packet_counter *c = xzalloc(sizeof *c); ovs_mutex_init(&c->mutex); ovs_mutex_lock(&c->mutex); c->ref_cnt = 1; ovs_mutex_unlock(&c->mutex); return c; } void rconn_packet_counter_destroy(struct rconn_packet_counter *c) { if (c) { bool dead; ovs_mutex_lock(&c->mutex); ovs_assert(c->ref_cnt > 0); dead = !--c->ref_cnt && !c->n_packets; ovs_mutex_unlock(&c->mutex); if (dead) { ovs_mutex_destroy(&c->mutex); free(c); } } } void rconn_packet_counter_inc(struct rconn_packet_counter *c, unsigned int n_bytes) { ovs_mutex_lock(&c->mutex); c->n_packets++; c->n_bytes += n_bytes; ovs_mutex_unlock(&c->mutex); } void rconn_packet_counter_dec(struct rconn_packet_counter *c, unsigned int n_bytes) { bool dead = false; ovs_mutex_lock(&c->mutex); ovs_assert(c->n_packets > 0); ovs_assert(c->n_packets == 1 ? c->n_bytes == n_bytes : c->n_bytes > n_bytes); c->n_packets--; c->n_bytes -= n_bytes; dead = !c->n_packets && !c->ref_cnt; ovs_mutex_unlock(&c->mutex); if (dead) { ovs_mutex_destroy(&c->mutex); free(c); } } unsigned int rconn_packet_counter_n_packets(const struct rconn_packet_counter *c) { unsigned int n; ovs_mutex_lock(&c->mutex); n = c->n_packets; ovs_mutex_unlock(&c->mutex); return n; } unsigned int rconn_packet_counter_n_bytes(const struct rconn_packet_counter *c) { unsigned int n; ovs_mutex_lock(&c->mutex); n = c->n_bytes; ovs_mutex_unlock(&c->mutex); return n; } /* Set rc->target and rc->name to 'target' and 'name', respectively. If 'name' * is null, 'target' is used. * * Also, clear out the cached IP address and port information, since changing * the target also likely changes these values. */ static void rconn_set_target__(struct rconn *rc, const char *target, const char *name) OVS_REQUIRES(rc->mutex) { free(rc->name); rc->name = xstrdup(name ? name : target); free(rc->target); rc->target = xstrdup(target); rc->local_ip = 0; rc->remote_ip = 0; rc->remote_port = 0; } /* Tries to send a packet from 'rc''s send buffer. Returns 0 if successful, * otherwise a positive errno value. */ static int try_send(struct rconn *rc) OVS_REQUIRES(rc->mutex) { struct ofpbuf *msg = ofpbuf_from_list(rc->txq.next); unsigned int n_bytes = msg->size; struct rconn_packet_counter *counter = msg->private_p; int retval; /* Eagerly remove 'msg' from the txq. We can't remove it from the list * after sending, if sending is successful, because it is then owned by the * vconn, which might have freed it already. */ list_remove(&msg->list_node); retval = vconn_send(rc->vconn, msg); if (retval) { list_push_front(&rc->txq, &msg->list_node); if (retval != EAGAIN) { report_error(rc, retval); disconnect(rc, retval); } return retval; } COVERAGE_INC(rconn_sent); rc->packets_sent++; if (counter) { rconn_packet_counter_dec(counter, n_bytes); } return 0; } /* Reports that 'error' caused 'rc' to disconnect. 'error' may be a positive * errno value, or it may be EOF to indicate that the connection was closed * normally. */ static void report_error(struct rconn *rc, int error) OVS_REQUIRES(rc->mutex) { if (error == EOF) { /* If 'rc' isn't reliable, then we don't really expect this connection * to last forever anyway (probably it's a connection that we received * via accept()), so use DBG level to avoid cluttering the logs. */ enum vlog_level level = rc->reliable ? VLL_INFO : VLL_DBG; VLOG(level, "%s: connection closed by peer", rc->name); } else { VLOG_WARN("%s: connection dropped (%s)", rc->name, ovs_strerror(error)); } } /* Disconnects 'rc' and records 'error' as the error that caused 'rc''s last * disconnection: * * - 0 means that this disconnection is due to a request by 'rc''s client, * not due to any kind of network error. * * - EOF means that the connection was closed in the normal way by the peer. * * - A positive integer is an errno value that represents the error. */ static void disconnect(struct rconn *rc, int error) OVS_REQUIRES(rc->mutex) { rc->last_error = error; if (rc->reliable) { time_t now = time_now(); if (rc->state & (S_CONNECTING | S_ACTIVE | S_IDLE)) { rc->last_disconnected = now; vconn_close(rc->vconn); rc->vconn = NULL; flush_queue(rc); } if (now >= rc->backoff_deadline) { rc->backoff = 1; } else if (rc->backoff < rc->max_backoff / 2) { rc->backoff = MAX(1, 2 * rc->backoff); VLOG_INFO("%s: waiting %d seconds before reconnect", rc->name, rc->backoff); } else { if (rconn_logging_connection_attempts__(rc)) { VLOG_INFO("%s: continuing to retry connections in the " "background but suppressing further logging", rc->name); } rc->backoff = rc->max_backoff; } rc->backoff_deadline = now + rc->backoff; state_transition(rc, S_BACKOFF); } else { rc->last_disconnected = time_now(); rconn_disconnect__(rc); } } /* Drops all the packets from 'rc''s send queue and decrements their queue * counts. */ static void flush_queue(struct rconn *rc) OVS_REQUIRES(rc->mutex) { if (list_is_empty(&rc->txq)) { return; } while (!list_is_empty(&rc->txq)) { struct ofpbuf *b = ofpbuf_from_list(list_pop_front(&rc->txq)); struct rconn_packet_counter *counter = b->private_p; if (counter) { rconn_packet_counter_dec(counter, b->size); } COVERAGE_INC(rconn_discarded); ofpbuf_delete(b); } poll_immediate_wake(); } static unsigned int elapsed_in_this_state(const struct rconn *rc) OVS_REQUIRES(rc->mutex) { return time_now() - rc->state_entered; } static unsigned int timeout(const struct rconn *rc) OVS_REQUIRES(rc->mutex) { switch (rc->state) { #define STATE(NAME, VALUE) case S_##NAME: return timeout_##NAME(rc); STATES #undef STATE default: NOT_REACHED(); } } static bool timed_out(const struct rconn *rc) OVS_REQUIRES(rc->mutex) { return time_now() >= sat_add(rc->state_entered, timeout(rc)); } static void state_transition(struct rconn *rc, enum state state) OVS_REQUIRES(rc->mutex) { rc->seqno += (rc->state == S_ACTIVE) != (state == S_ACTIVE); if (is_connected_state(state) && !is_connected_state(rc->state)) { rc->probably_admitted = false; } if (rconn_is_connected(rc)) { rc->total_time_connected += elapsed_in_this_state(rc); } VLOG_DBG("%s: entering %s", rc->name, state_name(state)); rc->state = state; rc->state_entered = time_now(); } static void close_monitor(struct rconn *rc, size_t idx, int retval) OVS_REQUIRES(rc->mutex) { VLOG_DBG("%s: closing monitor connection to %s: %s", rconn_get_name(rc), vconn_get_name(rc->monitors[idx]), ovs_retval_to_string(retval)); rc->monitors[idx] = rc->monitors[--rc->n_monitors]; } static void copy_to_monitor(struct rconn *rc, const struct ofpbuf *b) OVS_REQUIRES(rc->mutex) { struct ofpbuf *clone = NULL; int retval; size_t i; for (i = 0; i < rc->n_monitors; ) { struct vconn *vconn = rc->monitors[i]; if (!clone) { clone = ofpbuf_clone(b); } retval = vconn_send(vconn, clone); if (!retval) { clone = NULL; } else if (retval != EAGAIN) { close_monitor(rc, i, retval); continue; } i++; } ofpbuf_delete(clone); } static bool is_connected_state(enum state state) { return (state & (S_ACTIVE | S_IDLE)) != 0; } static bool is_admitted_msg(const struct ofpbuf *b) { enum ofptype type; enum ofperr error; error = ofptype_decode(&type, b->data); if (error) { return false; } switch (type) { case OFPTYPE_HELLO: case OFPTYPE_ERROR: case OFPTYPE_ECHO_REQUEST: case OFPTYPE_ECHO_REPLY: case OFPTYPE_FEATURES_REQUEST: case OFPTYPE_FEATURES_REPLY: case OFPTYPE_GET_CONFIG_REQUEST: case OFPTYPE_GET_CONFIG_REPLY: case OFPTYPE_SET_CONFIG: /* FIXME: Change the following once they are implemented: */ case OFPTYPE_QUEUE_GET_CONFIG_REQUEST: case OFPTYPE_QUEUE_GET_CONFIG_REPLY: case OFPTYPE_GET_ASYNC_REQUEST: case OFPTYPE_GET_ASYNC_REPLY: case OFPTYPE_GROUP_STATS_REQUEST: case OFPTYPE_GROUP_STATS_REPLY: case OFPTYPE_GROUP_DESC_STATS_REQUEST: case OFPTYPE_GROUP_DESC_STATS_REPLY: case OFPTYPE_GROUP_FEATURES_STATS_REQUEST: case OFPTYPE_GROUP_FEATURES_STATS_REPLY: case OFPTYPE_TABLE_FEATURES_STATS_REQUEST: case OFPTYPE_TABLE_FEATURES_STATS_REPLY: return false; case OFPTYPE_PACKET_IN: case OFPTYPE_FLOW_REMOVED: case OFPTYPE_PORT_STATUS: case OFPTYPE_PACKET_OUT: case OFPTYPE_FLOW_MOD: case OFPTYPE_PORT_MOD: case OFPTYPE_METER_MOD: case OFPTYPE_BARRIER_REQUEST: case OFPTYPE_BARRIER_REPLY: case OFPTYPE_DESC_STATS_REQUEST: case OFPTYPE_DESC_STATS_REPLY: case OFPTYPE_FLOW_STATS_REQUEST: case OFPTYPE_FLOW_STATS_REPLY: case OFPTYPE_AGGREGATE_STATS_REQUEST: case OFPTYPE_AGGREGATE_STATS_REPLY: case OFPTYPE_TABLE_STATS_REQUEST: case OFPTYPE_TABLE_STATS_REPLY: case OFPTYPE_PORT_STATS_REQUEST: case OFPTYPE_PORT_STATS_REPLY: case OFPTYPE_QUEUE_STATS_REQUEST: case OFPTYPE_QUEUE_STATS_REPLY: case OFPTYPE_PORT_DESC_STATS_REQUEST: case OFPTYPE_PORT_DESC_STATS_REPLY: case OFPTYPE_METER_STATS_REQUEST: case OFPTYPE_METER_STATS_REPLY: case OFPTYPE_METER_CONFIG_STATS_REQUEST: case OFPTYPE_METER_CONFIG_STATS_REPLY: case OFPTYPE_METER_FEATURES_STATS_REQUEST: case OFPTYPE_METER_FEATURES_STATS_REPLY: case OFPTYPE_ROLE_REQUEST: case OFPTYPE_ROLE_REPLY: case OFPTYPE_SET_FLOW_FORMAT: case OFPTYPE_FLOW_MOD_TABLE_ID: case OFPTYPE_SET_PACKET_IN_FORMAT: case OFPTYPE_FLOW_AGE: case OFPTYPE_SET_ASYNC_CONFIG: case OFPTYPE_SET_CONTROLLER_ID: case OFPTYPE_FLOW_MONITOR_STATS_REQUEST: case OFPTYPE_FLOW_MONITOR_STATS_REPLY: case OFPTYPE_FLOW_MONITOR_CANCEL: case OFPTYPE_FLOW_MONITOR_PAUSED: case OFPTYPE_FLOW_MONITOR_RESUMED: default: return true; } } /* Returns true if 'rc' is currently logging information about connection * attempts, false if logging should be suppressed because 'rc' hasn't * successuflly connected in too long. */ static bool rconn_logging_connection_attempts__(const struct rconn *rc) OVS_REQUIRES(rc->mutex) { return rc->backoff < rc->max_backoff; } openvswitch-2.0.1+git20140120/lib/rconn.h000066400000000000000000000106321226605124000175120ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef RCONN_H #define RCONN_H 1 #include #include #include #include "openvswitch/types.h" #include "ovs-thread.h" /* A wrapper around vconn that provides queuing and optionally reliability. * * An rconn maintains a message transmission queue of bounded length specified * by the caller. The rconn does not guarantee reliable delivery of * queued messages: all queued messages are dropped when reconnection becomes * necessary. * * An rconn optionally provides reliable communication, in this sense: the * rconn will re-connect, with exponential backoff, when the underlying vconn * disconnects. * * * Thread-safety * ============= * * Fully thread-safe. */ struct vconn; struct rconn_packet_counter; struct rconn *rconn_create(int inactivity_probe_interval, int max_backoff, uint8_t dscp, uint32_t allowed_versions); void rconn_set_dscp(struct rconn *rc, uint8_t dscp); uint32_t rconn_get_allowed_versions(const struct rconn *); uint8_t rconn_get_dscp(const struct rconn *rc); void rconn_set_max_backoff(struct rconn *, int max_backoff); int rconn_get_max_backoff(const struct rconn *); void rconn_set_probe_interval(struct rconn *, int inactivity_probe_interval); int rconn_get_probe_interval(const struct rconn *); void rconn_connect(struct rconn *, const char *target, const char *name); void rconn_connect_unreliably(struct rconn *, struct vconn *, const char *name); void rconn_reconnect(struct rconn *); void rconn_disconnect(struct rconn *); void rconn_destroy(struct rconn *); void rconn_run(struct rconn *); void rconn_run_wait(struct rconn *); struct ofpbuf *rconn_recv(struct rconn *); void rconn_recv_wait(struct rconn *); int rconn_send(struct rconn *, struct ofpbuf *, struct rconn_packet_counter *); int rconn_send_with_limit(struct rconn *, struct ofpbuf *, struct rconn_packet_counter *, int queue_limit); unsigned int rconn_packets_sent(const struct rconn *); unsigned int rconn_packets_received(const struct rconn *); void rconn_add_monitor(struct rconn *, struct vconn *); const char *rconn_get_name(const struct rconn *); void rconn_set_name(struct rconn *, const char *new_name); const char *rconn_get_target(const struct rconn *); bool rconn_is_alive(const struct rconn *); bool rconn_is_connected(const struct rconn *); bool rconn_is_admitted(const struct rconn *); int rconn_failure_duration(const struct rconn *); ovs_be32 rconn_get_remote_ip(const struct rconn *); ovs_be16 rconn_get_remote_port(const struct rconn *); ovs_be32 rconn_get_local_ip(const struct rconn *); ovs_be16 rconn_get_local_port(const struct rconn *); int rconn_get_version(const struct rconn *); const char *rconn_get_state(const struct rconn *); time_t rconn_get_last_connection(const struct rconn *); time_t rconn_get_last_disconnect(const struct rconn *); unsigned int rconn_get_connection_seqno(const struct rconn *); int rconn_get_last_error(const struct rconn *); unsigned int rconn_count_txqlen(const struct rconn *); /* Counts packets and bytes queued into an rconn by a given source. */ struct rconn_packet_counter { struct ovs_mutex mutex; unsigned int n_packets OVS_GUARDED; /* Number of packets queued. */ unsigned int n_bytes OVS_GUARDED; /* Number of bytes queued. */ int ref_cnt OVS_GUARDED; /* Number of owners. */ }; struct rconn_packet_counter *rconn_packet_counter_create(void); void rconn_packet_counter_destroy(struct rconn_packet_counter *); void rconn_packet_counter_inc(struct rconn_packet_counter *, unsigned n_bytes); void rconn_packet_counter_dec(struct rconn_packet_counter *, unsigned n_bytes); unsigned int rconn_packet_counter_n_packets( const struct rconn_packet_counter *); unsigned int rconn_packet_counter_n_bytes(const struct rconn_packet_counter *); #endif /* rconn.h */ openvswitch-2.0.1+git20140120/lib/reconnect.c000066400000000000000000000564631226605124000203620ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "reconnect.h" #include #include "poll-loop.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(reconnect); #define STATES \ STATE(VOID, 1 << 0) \ STATE(BACKOFF, 1 << 1) \ STATE(CONNECTING, 1 << 3) \ STATE(ACTIVE, 1 << 4) \ STATE(IDLE, 1 << 5) \ STATE(RECONNECT, 1 << 6) \ STATE(LISTENING, 1 << 7) enum state { #define STATE(NAME, VALUE) S_##NAME = VALUE, STATES #undef STATE }; static bool is_connected_state(enum state state) { return (state & (S_ACTIVE | S_IDLE)) != 0; } struct reconnect { /* Configuration. */ char *name; int min_backoff; int max_backoff; int probe_interval; bool passive; enum vlog_level info; /* Used for informational messages. */ /* State. */ enum state state; long long int state_entered; int backoff; long long int last_activity; long long int last_connected; long long int last_disconnected; unsigned int max_tries; /* These values are simply for statistics reporting, not otherwise used * directly by anything internal. */ long long int creation_time; unsigned int n_attempted_connections, n_successful_connections; unsigned int total_connected_duration; unsigned int seqno; }; static void reconnect_transition__(struct reconnect *, long long int now, enum state state); static long long int reconnect_deadline__(const struct reconnect *); static bool reconnect_may_retry(struct reconnect *); static const char * reconnect_state_name__(enum state state) { switch (state) { #define STATE(NAME, VALUE) case S_##NAME: return #NAME; STATES #undef STATE } return "***ERROR***"; } /* Creates and returns a new reconnect FSM with default settings. The FSM is * initially disabled. The caller will likely want to call reconnect_enable() * and reconnect_set_name() on the returned object. */ struct reconnect * reconnect_create(long long int now) { struct reconnect *fsm = xzalloc(sizeof *fsm); fsm->name = xstrdup("void"); fsm->min_backoff = RECONNECT_DEFAULT_MIN_BACKOFF; fsm->max_backoff = RECONNECT_DEFAULT_MAX_BACKOFF; fsm->probe_interval = RECONNECT_DEFAULT_PROBE_INTERVAL; fsm->passive = false; fsm->info = VLL_INFO; fsm->state = S_VOID; fsm->state_entered = now; fsm->backoff = 0; fsm->last_activity = now; fsm->last_connected = LLONG_MAX; fsm->last_disconnected = LLONG_MAX; fsm->max_tries = UINT_MAX; fsm->creation_time = now; return fsm; } /* Frees 'fsm'. */ void reconnect_destroy(struct reconnect *fsm) { if (fsm) { free(fsm->name); free(fsm); } } /* If 'quiet' is true, 'fsm' will log informational messages at level VLL_DBG, * by default keeping them out of log files. This is appropriate if the * connection is one that is expected to be short-lived, so that the log * messages are merely distracting. * * If 'quiet' is false, 'fsm' logs informational messages at level VLL_INFO. * This is the default. * * This setting has no effect on the log level of debugging, warning, or error * messages. */ void reconnect_set_quiet(struct reconnect *fsm, bool quiet) { fsm->info = quiet ? VLL_DBG : VLL_INFO; } /* Returns 'fsm''s name. */ const char * reconnect_get_name(const struct reconnect *fsm) { return fsm->name; } /* Sets 'fsm''s name to 'name'. If 'name' is null, then "void" is used * instead. * * The name set for 'fsm' is used in log messages. */ void reconnect_set_name(struct reconnect *fsm, const char *name) { free(fsm->name); fsm->name = xstrdup(name ? name : "void"); } /* Return the minimum number of milliseconds to back off between consecutive * connection attempts. The default is RECONNECT_DEFAULT_MIN_BACKOFF. */ int reconnect_get_min_backoff(const struct reconnect *fsm) { return fsm->min_backoff; } /* Return the maximum number of milliseconds to back off between consecutive * connection attempts. The default is RECONNECT_DEFAULT_MAX_BACKOFF. */ int reconnect_get_max_backoff(const struct reconnect *fsm) { return fsm->max_backoff; } /* Returns the "probe interval" for 'fsm' in milliseconds. If this is zero, it * disables the connection keepalive feature. If it is nonzero, then if the * interval passes while 'fsm' is connected and without reconnect_activity() * being called for 'fsm', reconnect_run() returns RECONNECT_PROBE. If the * interval passes again without reconnect_activity() being called, * reconnect_run() returns RECONNECT_DISCONNECT for 'fsm'. */ int reconnect_get_probe_interval(const struct reconnect *fsm) { return fsm->probe_interval; } /* Limits the maximum number of times that 'fsm' will ask the client to try to * reconnect to 'max_tries'. UINT_MAX (the default) means an unlimited number * of tries. * * After the number of tries has expired, the 'fsm' will disable itself * instead of backing off and retrying. */ void reconnect_set_max_tries(struct reconnect *fsm, unsigned int max_tries) { fsm->max_tries = max_tries; } /* Returns the current remaining number of connection attempts, UINT_MAX if * the number is unlimited. */ unsigned int reconnect_get_max_tries(struct reconnect *fsm) { return fsm->max_tries; } /* Configures the backoff parameters for 'fsm'. 'min_backoff' is the minimum * number of milliseconds, and 'max_backoff' is the maximum, between connection * attempts. The current backoff is also the duration that 'fsm' is willing to * wait for a given connection to succeed or fail. * * 'min_backoff' must be at least 1000, and 'max_backoff' must be greater than * or equal to 'min_backoff'. * * Pass 0 for 'min_backoff' or 'max_backoff' or both to use the defaults. */ void reconnect_set_backoff(struct reconnect *fsm, int min_backoff, int max_backoff) { fsm->min_backoff = MAX(min_backoff, 1000); fsm->max_backoff = (max_backoff ? MAX(max_backoff, 1000) : RECONNECT_DEFAULT_MAX_BACKOFF); if (fsm->min_backoff > fsm->max_backoff) { fsm->max_backoff = fsm->min_backoff; } if (fsm->state == S_BACKOFF && fsm->backoff > max_backoff) { fsm->backoff = max_backoff; } } /* Sets the "probe interval" for 'fsm' to 'probe_interval', in milliseconds. * If this is zero, it disables the connection keepalive feature. If it is * nonzero, then if the interval passes while 'fsm' is connected and without * reconnect_activity() being called for 'fsm', reconnect_run() returns * RECONNECT_PROBE. If the interval passes again without reconnect_activity() * being called, reconnect_run() returns RECONNECT_DISCONNECT for 'fsm'. * * If 'probe_interval' is nonzero, then it will be forced to a value of at * least 1000 ms. */ void reconnect_set_probe_interval(struct reconnect *fsm, int probe_interval) { fsm->probe_interval = probe_interval ? MAX(1000, probe_interval) : 0; } /* Returns true if 'fsm' is in passive mode, false if 'fsm' is in active mode * (the default). */ bool reconnect_is_passive(const struct reconnect *fsm) { return fsm->passive; } /* Configures 'fsm' for active or passive mode. In active mode (the default), * the FSM is attempting to connect to a remote host. In passive mode, the FSM * is listening for connections from a remote host. */ void reconnect_set_passive(struct reconnect *fsm, bool passive, long long int now) { if (fsm->passive != passive) { fsm->passive = passive; if (passive ? fsm->state & (S_CONNECTING | S_RECONNECT) : fsm->state == S_LISTENING && reconnect_may_retry(fsm)) { reconnect_transition__(fsm, now, S_BACKOFF); fsm->backoff = 0; } } } /* Returns true if 'fsm' has been enabled with reconnect_enable(). Calling * another function that indicates a change in connection state, such as * reconnect_disconnected() or reconnect_force_reconnect(), will also enable * a reconnect FSM. */ bool reconnect_is_enabled(const struct reconnect *fsm) { return fsm->state != S_VOID; } /* If 'fsm' is disabled (the default for newly created FSMs), enables it, so * that the next call to reconnect_run() for 'fsm' will return * RECONNECT_CONNECT. * * If 'fsm' is not disabled, this function has no effect. */ void reconnect_enable(struct reconnect *fsm, long long int now) { if (fsm->state == S_VOID && reconnect_may_retry(fsm)) { reconnect_transition__(fsm, now, S_BACKOFF); fsm->backoff = 0; } } /* Disables 'fsm'. Until 'fsm' is enabled again, reconnect_run() will always * return 0. */ void reconnect_disable(struct reconnect *fsm, long long int now) { if (fsm->state != S_VOID) { reconnect_transition__(fsm, now, S_VOID); } } /* If 'fsm' is enabled and currently connected (or attempting to connect), * forces reconnect_run() for 'fsm' to return RECONNECT_DISCONNECT the next * time it is called, which should cause the client to drop the connection (or * attempt), back off, and then reconnect. */ void reconnect_force_reconnect(struct reconnect *fsm, long long int now) { if (fsm->state & (S_CONNECTING | S_ACTIVE | S_IDLE)) { reconnect_transition__(fsm, now, S_RECONNECT); } } /* Tell 'fsm' that the connection dropped or that a connection attempt failed. * 'error' specifies the reason: a positive value represents an errno value, * EOF indicates that the connection was closed by the peer (e.g. read() * returned 0), and 0 indicates no specific error. * * The FSM will back off, then reconnect. */ void reconnect_disconnected(struct reconnect *fsm, long long int now, int error) { if (!(fsm->state & (S_BACKOFF | S_VOID))) { /* Report what happened. */ if (fsm->state & (S_ACTIVE | S_IDLE)) { if (error > 0) { VLOG_WARN("%s: connection dropped (%s)", fsm->name, ovs_strerror(error)); } else if (error == EOF) { VLOG(fsm->info, "%s: connection closed by peer", fsm->name); } else { VLOG(fsm->info, "%s: connection dropped", fsm->name); } } else if (fsm->state == S_LISTENING) { if (error > 0) { VLOG_WARN("%s: error listening for connections (%s)", fsm->name, ovs_strerror(error)); } else { VLOG(fsm->info, "%s: error listening for connections", fsm->name); } } else { const char *type = fsm->passive ? "listen" : "connection"; if (error > 0) { VLOG_WARN("%s: %s attempt failed (%s)", fsm->name, type, ovs_strerror(error)); } else { VLOG(fsm->info, "%s: %s attempt timed out", fsm->name, type); } } if (fsm->state & (S_ACTIVE | S_IDLE)) { fsm->last_disconnected = now; } /* Back off. */ if (fsm->state & (S_ACTIVE | S_IDLE) && (fsm->last_activity - fsm->last_connected >= fsm->backoff || fsm->passive)) { fsm->backoff = fsm->passive ? 0 : fsm->min_backoff; } else { if (fsm->backoff < fsm->min_backoff) { fsm->backoff = fsm->min_backoff; } else if (fsm->backoff >= fsm->max_backoff / 2) { fsm->backoff = fsm->max_backoff; } else { fsm->backoff *= 2; } if (fsm->passive) { VLOG(fsm->info, "%s: waiting %.3g seconds before trying to " "listen again", fsm->name, fsm->backoff / 1000.0); } else { VLOG(fsm->info, "%s: waiting %.3g seconds before reconnect", fsm->name, fsm->backoff / 1000.0); } } reconnect_transition__(fsm, now, reconnect_may_retry(fsm) ? S_BACKOFF : S_VOID); } } /* Tell 'fsm' that a connection or listening attempt is in progress. * * The FSM will start a timer, after which the connection or listening attempt * will be aborted (by returning RECONNECT_DISCONNECT from * reconnect_run()). */ void reconnect_connecting(struct reconnect *fsm, long long int now) { if (fsm->state != S_CONNECTING) { if (fsm->passive) { VLOG(fsm->info, "%s: listening...", fsm->name); } else { VLOG(fsm->info, "%s: connecting...", fsm->name); } reconnect_transition__(fsm, now, S_CONNECTING); } } /* Tell 'fsm' that the client is listening for connection attempts. This state * last indefinitely until the client reports some change. * * The natural progression from this state is for the client to report that a * connection has been accepted or is in progress of being accepted, by calling * reconnect_connecting() or reconnect_connected(). * * The client may also report that listening failed (e.g. accept() returned an * unexpected error such as ENOMEM) by calling reconnect_listen_error(), in * which case the FSM will back off and eventually return RECONNECT_CONNECT * from reconnect_run() to tell the client to try listening again. */ void reconnect_listening(struct reconnect *fsm, long long int now) { if (fsm->state != S_LISTENING) { VLOG(fsm->info, "%s: listening...", fsm->name); reconnect_transition__(fsm, now, S_LISTENING); } } /* Tell 'fsm' that the client's attempt to accept a connection failed * (e.g. accept() returned an unexpected error such as ENOMEM). * * If the FSM is currently listening (reconnect_listening() was called), it * will back off and eventually return RECONNECT_CONNECT from reconnect_run() * to tell the client to try listening again. If there is an active * connection, this will be delayed until that connection drops. */ void reconnect_listen_error(struct reconnect *fsm, long long int now, int error) { if (fsm->state == S_LISTENING) { reconnect_disconnected(fsm, now, error); } } /* Tell 'fsm' that the connection was successful. * * The FSM will start the probe interval timer, which is reset by * reconnect_activity(). If the timer expires, a probe will be sent (by * returning RECONNECT_PROBE from reconnect_run()). If the timer expires * again without being reset, the connection will be aborted (by returning * RECONNECT_DISCONNECT from reconnect_run()). */ void reconnect_connected(struct reconnect *fsm, long long int now) { if (!is_connected_state(fsm->state)) { reconnect_connecting(fsm, now); VLOG(fsm->info, "%s: connected", fsm->name); reconnect_transition__(fsm, now, S_ACTIVE); fsm->last_connected = now; } } /* Tell 'fsm' that the connection attempt failed. * * The FSM will back off and attempt to reconnect. */ void reconnect_connect_failed(struct reconnect *fsm, long long int now, int error) { reconnect_connecting(fsm, now); reconnect_disconnected(fsm, now, error); } /* Tell 'fsm' that some activity has occurred on the connection. This resets * the probe interval timer, so that the connection is known not to be idle. */ void reconnect_activity(struct reconnect *fsm, long long int now) { if (fsm->state != S_ACTIVE) { reconnect_transition__(fsm, now, S_ACTIVE); } fsm->last_activity = now; } static void reconnect_transition__(struct reconnect *fsm, long long int now, enum state state) { if (fsm->state == S_CONNECTING) { fsm->n_attempted_connections++; if (state == S_ACTIVE) { fsm->n_successful_connections++; } } if (is_connected_state(fsm->state) != is_connected_state(state)) { if (is_connected_state(fsm->state)) { fsm->total_connected_duration += now - fsm->last_connected; } fsm->seqno++; } VLOG_DBG("%s: entering %s", fsm->name, reconnect_state_name__(state)); fsm->state = state; fsm->state_entered = now; } static long long int reconnect_deadline__(const struct reconnect *fsm) { ovs_assert(fsm->state_entered != LLONG_MIN); switch (fsm->state) { case S_VOID: case S_LISTENING: return LLONG_MAX; case S_BACKOFF: return fsm->state_entered + fsm->backoff; case S_CONNECTING: return fsm->state_entered + MAX(1000, fsm->backoff); case S_ACTIVE: if (fsm->probe_interval) { long long int base = MAX(fsm->last_activity, fsm->state_entered); return base + fsm->probe_interval; } return LLONG_MAX; case S_IDLE: if (fsm->probe_interval) { return fsm->state_entered + fsm->probe_interval; } return LLONG_MAX; case S_RECONNECT: return fsm->state_entered; } NOT_REACHED(); } /* Assesses whether any action should be taken on 'fsm'. The return value is * one of: * * - 0: The client need not take any action. * * - Active client, RECONNECT_CONNECT: The client should start a connection * attempt and indicate this by calling reconnect_connecting(). If the * connection attempt has definitely succeeded, it should call * reconnect_connected(). If the connection attempt has definitely * failed, it should call reconnect_connect_failed(). * * The FSM is smart enough to back off correctly after successful * connections that quickly abort, so it is OK to call * reconnect_connected() after a low-level successful connection * (e.g. connect()) even if the connection might soon abort due to a * failure at a high-level (e.g. SSL negotiation failure). * * - Passive client, RECONNECT_CONNECT: The client should try to listen for * a connection, if it is not already listening. It should call * reconnect_listening() if successful, otherwise reconnect_connecting() * or reconnected_connect_failed() if the attempt is in progress or * definitely failed, respectively. * * A listening passive client should constantly attempt to accept a new * connection and report an accepted connection with * reconnect_connected(). * * - RECONNECT_DISCONNECT: The client should abort the current connection * or connection attempt or listen attempt and call * reconnect_disconnected() or reconnect_connect_failed() to indicate it. * * - RECONNECT_PROBE: The client should send some kind of request to the * peer that will elicit a response, to ensure that the connection is * indeed in working order. (This will only be returned if the "probe * interval" is nonzero--see reconnect_set_probe_interval()). */ enum reconnect_action reconnect_run(struct reconnect *fsm, long long int now) { if (now >= reconnect_deadline__(fsm)) { switch (fsm->state) { case S_VOID: return 0; case S_BACKOFF: return RECONNECT_CONNECT; case S_CONNECTING: return RECONNECT_DISCONNECT; case S_ACTIVE: VLOG_DBG("%s: idle %lld ms, sending inactivity probe", fsm->name, now - MAX(fsm->last_activity, fsm->state_entered)); reconnect_transition__(fsm, now, S_IDLE); return RECONNECT_PROBE; case S_IDLE: VLOG_ERR("%s: no response to inactivity probe after %.3g " "seconds, disconnecting", fsm->name, (now - fsm->state_entered) / 1000.0); return RECONNECT_DISCONNECT; case S_RECONNECT: return RECONNECT_DISCONNECT; case S_LISTENING: return 0; } NOT_REACHED(); } else { return 0; } } /* Causes the next call to poll_block() to wake up when reconnect_run() should * be called on 'fsm'. */ void reconnect_wait(struct reconnect *fsm, long long int now) { int timeout = reconnect_timeout(fsm, now); if (timeout >= 0) { poll_timer_wait(timeout); } } /* Returns the number of milliseconds after which reconnect_run() should be * called on 'fsm' if nothing else notable happens in the meantime, or a * negative number if this is currently unnecessary. */ int reconnect_timeout(struct reconnect *fsm, long long int now) { long long int deadline = reconnect_deadline__(fsm); if (deadline != LLONG_MAX) { long long int remaining = deadline - now; return MAX(0, MIN(INT_MAX, remaining)); } return -1; } /* Returns true if 'fsm' is currently believed to be connected, that is, if * reconnect_connected() was called more recently than any call to * reconnect_connect_failed() or reconnect_disconnected() or * reconnect_disable(), and false otherwise. */ bool reconnect_is_connected(const struct reconnect *fsm) { return is_connected_state(fsm->state); } /* Returns the number of milliseconds since 'fsm' last successfully connected * to its peer (even if it has since disconnected). Returns UINT_MAX if never * connected. */ unsigned int reconnect_get_last_connect_elapsed(const struct reconnect *fsm, long long int now) { return fsm->last_connected == LLONG_MAX ? UINT_MAX : now - fsm->last_connected; } /* Returns the number of milliseconds since 'fsm' last disconnected * from its peer (even if it has since reconnected). Returns UINT_MAX if never * disconnected. */ unsigned int reconnect_get_last_disconnect_elapsed(const struct reconnect *fsm, long long int now) { return fsm->last_disconnected == LLONG_MAX ? UINT_MAX : now - fsm->last_disconnected; } /* Copies various statistics for 'fsm' into '*stats'. */ void reconnect_get_stats(const struct reconnect *fsm, long long int now, struct reconnect_stats *stats) { stats->creation_time = fsm->creation_time; stats->last_activity = fsm->last_activity; stats->last_connected = fsm->last_connected; stats->last_disconnected = fsm->last_disconnected; stats->backoff = fsm->backoff; stats->seqno = fsm->seqno; stats->is_connected = reconnect_is_connected(fsm); stats->msec_since_connect = reconnect_get_last_connect_elapsed(fsm, now); stats->msec_since_disconnect = reconnect_get_last_disconnect_elapsed(fsm, now); stats->total_connected_duration = fsm->total_connected_duration + (is_connected_state(fsm->state) ? reconnect_get_last_connect_elapsed(fsm, now) : 0); stats->n_attempted_connections = fsm->n_attempted_connections; stats->n_successful_connections = fsm->n_successful_connections; stats->state = reconnect_state_name__(fsm->state); stats->state_elapsed = now - fsm->state_entered; } static bool reconnect_may_retry(struct reconnect *fsm) { bool may_retry = fsm->max_tries > 0; if (may_retry && fsm->max_tries != UINT_MAX) { fsm->max_tries--; } return may_retry; } openvswitch-2.0.1+git20140120/lib/reconnect.h000066400000000000000000000120611226605124000203510ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef RECONNECT_H #define RECONNECT_H 1 /* This library implements a finite-state machine for connecting and * reconnecting to a network resource with exponential backoff. It also * provides optional support for detecting a connection on which the peer is no * longer responding. * * The library does not implement anything networking related, only an FSM for * networking code to use. * * Many "reconnect" functions take a "now" argument. This makes testing easier * since there is no hidden state. When not testing, just pass the return * value of time_msec() from timeval.h. (Perhaps this design should be * revisited later.) */ #include struct reconnect *reconnect_create(long long int now); void reconnect_destroy(struct reconnect *); void reconnect_set_quiet(struct reconnect *, bool quiet); const char *reconnect_get_name(const struct reconnect *); void reconnect_set_name(struct reconnect *, const char *name); /* Defaults, all in msecs. */ #define RECONNECT_DEFAULT_MIN_BACKOFF 1000 #define RECONNECT_DEFAULT_MAX_BACKOFF 8000 #define RECONNECT_DEFAULT_PROBE_INTERVAL 5000 int reconnect_get_min_backoff(const struct reconnect *); int reconnect_get_max_backoff(const struct reconnect *); int reconnect_get_probe_interval(const struct reconnect *); void reconnect_set_max_tries(struct reconnect *, unsigned int max_tries); unsigned int reconnect_get_max_tries(struct reconnect *); void reconnect_set_backoff(struct reconnect *, int min_backoff, int max_backoff); void reconnect_set_probe_interval(struct reconnect *, int probe_interval); bool reconnect_is_passive(const struct reconnect *); void reconnect_set_passive(struct reconnect *, bool passive, long long int now); bool reconnect_is_enabled(const struct reconnect *); void reconnect_enable(struct reconnect *, long long int now); void reconnect_disable(struct reconnect *, long long int now); void reconnect_force_reconnect(struct reconnect *, long long int now); bool reconnect_is_connected(const struct reconnect *); unsigned int reconnect_get_last_connect_elapsed(const struct reconnect *, long long int now); unsigned int reconnect_get_last_disconnect_elapsed(const struct reconnect *, long long int now); void reconnect_disconnected(struct reconnect *, long long int now, int error); void reconnect_connecting(struct reconnect *, long long int now); void reconnect_listening(struct reconnect *, long long int now); void reconnect_listen_error(struct reconnect *, long long int now, int error); void reconnect_connected(struct reconnect *, long long int now); void reconnect_connect_failed(struct reconnect *, long long int now, int error); void reconnect_activity(struct reconnect *, long long int now); enum reconnect_action { RECONNECT_CONNECT = 1, RECONNECT_DISCONNECT, RECONNECT_PROBE, }; enum reconnect_action reconnect_run(struct reconnect *, long long int now); void reconnect_wait(struct reconnect *, long long int now); int reconnect_timeout(struct reconnect *, long long int now); struct reconnect_stats { /* All times and durations in this structure are in milliseconds. */ long long int creation_time; /* Time reconnect_create() called. */ long long int last_activity; /* Last call to reconnect_activity(). */ long long int last_connected; /* Last call to reconnect_connected(). */ long long int last_disconnected; /* Last call to reconnect_disconnected(). */ int backoff; /* Current backoff duration. */ unsigned int seqno; /* # of connections + # of disconnections. */ bool is_connected; /* Currently connected? */ unsigned int msec_since_connect; /* Time since last connect. */ unsigned int msec_since_disconnect; /* Time since last disconnect. */ unsigned int total_connected_duration; /* Sum of all connections. */ unsigned int n_attempted_connections; unsigned int n_successful_connections; /* These should only be provided to a human user for debugging purposes. * The client should not attempt to interpret them. */ const char *state; /* FSM state. */ unsigned int state_elapsed; /* Time since FSM state entered. */ }; void reconnect_get_stats(const struct reconnect *, long long int now, struct reconnect_stats *); #endif /* reconnect.h */ openvswitch-2.0.1+git20140120/lib/route-table-bsd.c000066400000000000000000000061321226605124000213570ustar00rootroot00000000000000/* * Copyright (c) 2012 Ed Maste. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "route-table.h" #include #include #include #include #include #include #include #include #include "vlog.h" VLOG_DEFINE_THIS_MODULE(route_table); static int pid; static unsigned int register_count = 0; bool route_table_get_name(ovs_be32 ip, char name[IFNAMSIZ]) { struct { struct rt_msghdr rtm; char space[512]; } rtmsg; struct rt_msghdr *rtm = &rtmsg.rtm; struct sockaddr_dl *ifp = NULL; struct sockaddr_in *sin; struct sockaddr *sa; static int seq; int i, len, namelen, rtsock; rtsock = socket(PF_ROUTE, SOCK_RAW, 0); if (rtsock < 0) return false; memset(&rtmsg, 0, sizeof(rtmsg)); rtm->rtm_msglen = sizeof(struct rt_msghdr) + sizeof(struct sockaddr_in); rtm->rtm_version = RTM_VERSION; rtm->rtm_type = RTM_GET; rtm->rtm_addrs = RTA_DST | RTA_IFP; rtm->rtm_seq = ++seq; sin = (struct sockaddr_in *)(rtm + 1); sin->sin_len = len = sizeof(struct sockaddr_in); sin->sin_family = AF_INET; sin->sin_addr.s_addr = ip; if ((write(rtsock, (char *)&rtmsg, rtm->rtm_msglen)) < 0) { close(rtsock); return false; } do { len = read(rtsock, (char *)&rtmsg, sizeof(rtmsg)); } while (len > 0 && (rtmsg.rtm.rtm_seq != seq || rtmsg.rtm.rtm_pid != pid)); close(rtsock); if (len < 0) { return false; } sa = (struct sockaddr *)(rtm + 1); for (i = 1; i; i <<= 1) { if (rtm->rtm_addrs & i) { if (i == RTA_IFP && sa->sa_family == AF_LINK && ((struct sockaddr_dl *)sa)->sdl_nlen) { ifp = (struct sockaddr_dl *)sa; namelen = ifp->sdl_nlen; if (namelen > IFNAMSIZ - 1) namelen = IFNAMSIZ - 1; memcpy(name, ifp->sdl_data, namelen); name[namelen] = '\0'; return true; } #if defined(__FreeBSD__) sa = (struct sockaddr *)((char *)sa + SA_SIZE(sa)); #elif defined(__NetBSD__) sa = (struct sockaddr *)((char *)sa + RT_ROUNDUP(sa->sa_len)); #else #error unimplemented #endif } } return false; } void route_table_register(void) { if (!register_count) { pid = getpid(); } register_count++; } void route_table_unregister(void) { register_count--; } void route_table_run(void) { } void route_table_wait(void) { } openvswitch-2.0.1+git20140120/lib/route-table-stub.c000066400000000000000000000020201226605124000215540ustar00rootroot00000000000000/* Copyright (c) 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "route-table.h" #include "compiler.h" bool route_table_get_name(ovs_be32 ip OVS_UNUSED, char name[IFNAMSIZ] OVS_UNUSED) { name[0] = '\0'; return false; } bool route_table_get_ifindex(ovs_be32 ip OVS_UNUSED, int *ifindex) { *ifindex = 0; return false; } void route_table_register(void) { } void route_table_unregister(void) { } void route_table_run(void) { } void route_table_wait(void) { } openvswitch-2.0.1+git20140120/lib/route-table.c000066400000000000000000000275561226605124000206260ustar00rootroot00000000000000/* * Copyright (c) 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "route-table.h" #include #include #include #include #include "hash.h" #include "hmap.h" #include "netlink.h" #include "netlink-notifier.h" #include "netlink-socket.h" #include "ofpbuf.h" #include "rtnetlink-link.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(route_table); struct route_data { /* Copied from struct rtmsg. */ unsigned char rtm_dst_len; /* Extracted from Netlink attributes. */ uint32_t rta_dst; /* Destination in host byte order. 0 if missing. */ int rta_oif; /* Output interface index. */ }; /* A digested version of a route message sent down by the kernel to indicate * that a route has changed. */ struct route_table_msg { bool relevant; /* Should this message be processed? */ int nlmsg_type; /* e.g. RTM_NEWROUTE, RTM_DELROUTE. */ struct route_data rd; /* Data parsed from this message. */ }; struct route_node { struct hmap_node node; /* Node in route_map. */ struct route_data rd; /* Data associated with this node. */ }; struct name_node { struct hmap_node node; /* Node in name_map. */ uint32_t ifi_index; /* Kernel interface index. */ char ifname[IFNAMSIZ]; /* Interface name. */ }; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); static unsigned int register_count = 0; static struct nln *nln = NULL; static struct route_table_msg rtmsg; static struct nln_notifier *route_notifier = NULL; static struct nln_notifier *name_notifier = NULL; static bool route_table_valid = false; static bool name_table_valid = false; static struct hmap route_map; static struct hmap name_map; static int route_table_reset(void); static void route_table_handle_msg(const struct route_table_msg *); static bool route_table_parse(struct ofpbuf *, struct route_table_msg *); static void route_table_change(const struct route_table_msg *, void *); static struct route_node *route_node_lookup(const struct route_data *); static struct route_node *route_node_lookup_by_ip(uint32_t ip); static void route_map_clear(void); static uint32_t hash_route_data(const struct route_data *); static void name_table_init(void); static void name_table_uninit(void); static int name_table_reset(void); static void name_table_change(const struct rtnetlink_link_change *, void *); static void name_map_clear(void); static struct name_node *name_node_lookup(int ifi_index); /* Populates 'name' with the name of the interface traffic destined for 'ip' * is likely to egress out of (see route_table_get_ifindex). * * Returns true if successful, otherwise false. */ bool route_table_get_name(ovs_be32 ip, char name[IFNAMSIZ]) { int ifindex; if (!name_table_valid) { name_table_reset(); } if (route_table_get_ifindex(ip, &ifindex)) { struct name_node *nn; nn = name_node_lookup(ifindex); if (nn) { ovs_strlcpy(name, nn->ifname, IFNAMSIZ); return true; } } return false; } /* Populates 'ifindex' with the interface index traffic destined for 'ip' is * likely to egress. There is no hard guarantee that traffic destined for 'ip' * will egress out the specified interface. 'ifindex' may refer to an * interface which is not physical (such as a bridge port). * * Returns true if successful, otherwise false. */ bool route_table_get_ifindex(ovs_be32 ip_, int *ifindex) { struct route_node *rn; uint32_t ip = ntohl(ip_); *ifindex = 0; if (!route_table_valid) { route_table_reset(); } rn = route_node_lookup_by_ip(ip); if (rn) { *ifindex = rn->rd.rta_oif; return true; } /* Choose a default route. */ HMAP_FOR_EACH(rn, node, &route_map) { if (rn->rd.rta_dst == 0 && rn->rd.rtm_dst_len == 0) { *ifindex = rn->rd.rta_oif; return true; } } return false; } /* Users of the route_table module should register themselves with this * function before making any other route_table function calls. */ void route_table_register(void) { if (!register_count) { ovs_assert(!nln); ovs_assert(!route_notifier); nln = nln_create(NETLINK_ROUTE, RTNLGRP_IPV4_ROUTE, (nln_parse_func *) route_table_parse, &rtmsg); route_notifier = nln_notifier_create(nln, (nln_notify_func *) route_table_change, NULL); hmap_init(&route_map); route_table_reset(); name_table_init(); } register_count++; } /* Users of the route_table module should unregister themselves with this * function when they will no longer be making any more route_table fuction * calls. */ void route_table_unregister(void) { register_count--; if (!register_count) { nln_notifier_destroy(route_notifier); route_notifier = NULL; nln_destroy(nln); nln = NULL; route_map_clear(); hmap_destroy(&route_map); name_table_uninit(); } } /* Run periodically to update the locally maintained routing table. */ void route_table_run(void) { if (nln) { rtnetlink_link_run(); nln_run(nln); } } /* Causes poll_block() to wake up when route_table updates are required. */ void route_table_wait(void) { if (nln) { rtnetlink_link_wait(); nln_wait(nln); } } static int route_table_reset(void) { struct nl_dump dump; struct rtgenmsg *rtmsg; struct ofpbuf request, reply; route_map_clear(); route_table_valid = true; ofpbuf_init(&request, 0); nl_msg_put_nlmsghdr(&request, sizeof *rtmsg, RTM_GETROUTE, NLM_F_REQUEST); rtmsg = ofpbuf_put_zeros(&request, sizeof *rtmsg); rtmsg->rtgen_family = AF_INET; nl_dump_start(&dump, NETLINK_ROUTE, &request); ofpbuf_uninit(&request); while (nl_dump_next(&dump, &reply)) { struct route_table_msg msg; if (route_table_parse(&reply, &msg)) { route_table_handle_msg(&msg); } } return nl_dump_done(&dump); } static bool route_table_parse(struct ofpbuf *buf, struct route_table_msg *change) { bool parsed; static const struct nl_policy policy[] = { [RTA_DST] = { .type = NL_A_U32, .optional = true }, [RTA_OIF] = { .type = NL_A_U32, .optional = false }, }; struct nlattr *attrs[ARRAY_SIZE(policy)]; parsed = nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct rtmsg), policy, attrs, ARRAY_SIZE(policy)); if (parsed) { const struct rtmsg *rtm; const struct nlmsghdr *nlmsg; nlmsg = buf->data; rtm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *rtm); if (rtm->rtm_family != AF_INET) { VLOG_DBG_RL(&rl, "received non AF_INET rtnetlink route message"); return false; } memset(change, 0, sizeof *change); change->relevant = true; if (rtm->rtm_scope == RT_SCOPE_NOWHERE) { change->relevant = false; } if (rtm->rtm_type != RTN_UNICAST && rtm->rtm_type != RTN_LOCAL) { change->relevant = false; } change->nlmsg_type = nlmsg->nlmsg_type; change->rd.rtm_dst_len = rtm->rtm_dst_len; change->rd.rta_oif = nl_attr_get_u32(attrs[RTA_OIF]); if (attrs[RTA_DST]) { change->rd.rta_dst = ntohl(nl_attr_get_be32(attrs[RTA_DST])); } } else { VLOG_DBG_RL(&rl, "received unparseable rtnetlink route message"); } return parsed; } static void route_table_change(const struct route_table_msg *change OVS_UNUSED, void *aux OVS_UNUSED) { route_table_valid = false; } static void route_table_handle_msg(const struct route_table_msg *change) { if (change->relevant && change->nlmsg_type == RTM_NEWROUTE && !route_node_lookup(&change->rd)) { struct route_node *rn; rn = xzalloc(sizeof *rn); memcpy(&rn->rd, &change->rd, sizeof change->rd); hmap_insert(&route_map, &rn->node, hash_route_data(&rn->rd)); } } static struct route_node * route_node_lookup(const struct route_data *rd) { struct route_node *rn; HMAP_FOR_EACH_WITH_HASH(rn, node, hash_route_data(rd), &route_map) { if (!memcmp(&rn->rd, rd, sizeof *rd)) { return rn; } } return NULL; } static struct route_node * route_node_lookup_by_ip(uint32_t ip) { int dst_len; struct route_node *rn, *rn_ret; dst_len = -1; rn_ret = NULL; HMAP_FOR_EACH(rn, node, &route_map) { uint32_t mask = 0xffffffff << (32 - rn->rd.rtm_dst_len); if (rn->rd.rta_dst == 0 && rn->rd.rtm_dst_len == 0) { /* Default route. */ continue; } if (rn->rd.rtm_dst_len > dst_len && (ip & mask) == (rn->rd.rta_dst & mask)) { rn_ret = rn; dst_len = rn->rd.rtm_dst_len; } } return rn_ret; } static void route_map_clear(void) { struct route_node *rn, *rn_next; HMAP_FOR_EACH_SAFE(rn, rn_next, node, &route_map) { hmap_remove(&route_map, &rn->node); free(rn); } } static uint32_t hash_route_data(const struct route_data *rd) { return hash_bytes(rd, sizeof *rd, 0); } /* name_table . */ static void name_table_init(void) { hmap_init(&name_map); name_notifier = rtnetlink_link_notifier_create(name_table_change, NULL); name_table_valid = false; } static void name_table_uninit(void) { rtnetlink_link_notifier_destroy(name_notifier); name_notifier = NULL; name_map_clear(); hmap_destroy(&name_map); } static int name_table_reset(void) { struct nl_dump dump; struct rtgenmsg *rtmsg; struct ofpbuf request, reply; name_table_valid = true; name_map_clear(); ofpbuf_init(&request, 0); nl_msg_put_nlmsghdr(&request, sizeof *rtmsg, RTM_GETLINK, NLM_F_REQUEST); rtmsg = ofpbuf_put_zeros(&request, sizeof *rtmsg); rtmsg->rtgen_family = AF_INET; nl_dump_start(&dump, NETLINK_ROUTE, &request); ofpbuf_uninit(&request); while (nl_dump_next(&dump, &reply)) { struct rtnetlink_link_change change; if (rtnetlink_link_parse(&reply, &change) && change.nlmsg_type == RTM_NEWLINK && !name_node_lookup(change.ifi_index)) { struct name_node *nn; nn = xzalloc(sizeof *nn); nn->ifi_index = change.ifi_index; ovs_strlcpy(nn->ifname, change.ifname, IFNAMSIZ); hmap_insert(&name_map, &nn->node, hash_int(nn->ifi_index, 0)); } } return nl_dump_done(&dump); } static void name_table_change(const struct rtnetlink_link_change *change OVS_UNUSED, void *aux OVS_UNUSED) { /* Changes to interface status can cause routing table changes that some * versions of the linux kernel do not advertise for some reason. */ route_table_valid = false; name_table_valid = false; } static struct name_node * name_node_lookup(int ifi_index) { struct name_node *nn; HMAP_FOR_EACH_WITH_HASH(nn, node, hash_int(ifi_index, 0), &name_map) { if (nn->ifi_index == ifi_index) { return nn; } } return NULL; } static void name_map_clear(void) { struct name_node *nn, *nn_next; HMAP_FOR_EACH_SAFE(nn, nn_next, node, &name_map) { hmap_remove(&name_map, &nn->node); free(nn); } } openvswitch-2.0.1+git20140120/lib/route-table.h000066400000000000000000000020451226605124000206150ustar00rootroot00000000000000/* * Copyright (c) 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ROUTE_TABLE_H #define ROUTE_TABLE_H 1 #include #include #include #include #include #include "openvswitch/types.h" bool route_table_get_ifindex(ovs_be32 ip, int *ifindex); bool route_table_get_name(ovs_be32 ip, char name[IFNAMSIZ]); void route_table_register(void); void route_table_unregister(void); void route_table_run(void); void route_table_wait(void); #endif /* route-table.h */ openvswitch-2.0.1+git20140120/lib/rtbsd.c000066400000000000000000000111751226605124000175070ustar00rootroot00000000000000/* * Copyright (c) 2011, 2013 Gaetano Catalli. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include "coverage.h" #include "socket-util.h" #include "poll-loop.h" #include "vlog.h" #include "rtbsd.h" VLOG_DEFINE_THIS_MODULE(rtbsd); COVERAGE_DEFINE(rtbsd_changed); /* PF_ROUTE socket. */ static int notify_sock = -1; /* All registered notifiers. */ static struct list all_notifiers = LIST_INITIALIZER(&all_notifiers); static void rtbsd_report_change(const struct if_msghdr *); static void rtbsd_report_notify_error(void); /* Registers 'cb' to be called with auxiliary data 'aux' with network device * change notifications. The notifier is stored in 'notifier', which the * caller must not modify or free. * * Returns 0 if successful, otherwise a positive errno value. */ int rtbsd_notifier_register(struct rtbsd_notifier *notifier, rtbsd_notify_func *cb, void *aux) { if (notify_sock < 0) { int error; notify_sock = socket(PF_ROUTE, SOCK_RAW, 0); if (notify_sock < 0) { VLOG_WARN("could not create PF_ROUTE socket: %s", ovs_strerror(errno)); return errno; } error = set_nonblocking(notify_sock); if (error) { VLOG_WARN("error set_nonblocking PF_ROUTE socket: %s", ovs_strerror(error)); return error; } } else { /* Catch up on notification work so that the new notifier won't * receive any stale notifications. XXX*/ rtbsd_notifier_run(); } list_push_back(&all_notifiers, ¬ifier->node); notifier->cb = cb; notifier->aux = aux; return 0; } /* Cancels notification on 'notifier', which must have previously been * registered with rtbsd_notifier_register(). */ void rtbsd_notifier_unregister(struct rtbsd_notifier *notifier) { list_remove(¬ifier->node); if (list_is_empty(&all_notifiers)) { close(notify_sock); notify_sock = -1; } } /* Calls all of the registered notifiers, passing along any as-yet-unreported * netdev change events. */ void rtbsd_notifier_run(void) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); struct if_msghdr msg; if (notify_sock < 0) { return; } for (;;) { int retval; msg.ifm_type = RTM_IFINFO; msg.ifm_version = RTM_VERSION; //XXX check if necessary /* read from PF_ROUTE socket */ retval = read(notify_sock, (char *)&msg, sizeof(msg)); if (retval >= 0) { /* received packet from PF_ROUTE socket * XXX check for bad packets */ if (msg.ifm_type == RTM_IFINFO) { rtbsd_report_change(&msg); } } else if (errno == EAGAIN) { return; } else { if (errno == ENOBUFS) { VLOG_WARN_RL(&rl, "PF_ROUTE receive buffer overflowed"); } else { VLOG_WARN_RL(&rl, "error reading PF_ROUTE socket: %s", ovs_strerror(errno)); } rtbsd_report_notify_error(); } } } /* Causes poll_block() to wake up when network device change notifications are * ready. */ void rtbsd_notifier_wait(void) { if (notify_sock >= 0) { poll_fd_wait(notify_sock, POLLIN); } } static void rtbsd_report_change(const struct if_msghdr *msg) { struct rtbsd_notifier *notifier; struct rtbsd_change change; COVERAGE_INC(rtbsd_changed); change.msg_type = msg->ifm_type; //XXX change.if_index = msg->ifm_index; if_indextoname(msg->ifm_index, change.if_name); change.master_ifindex = 0; //XXX LIST_FOR_EACH (notifier, node, &all_notifiers) { notifier->cb(&change, notifier->aux); } } /* If an error occurs the notifiers' callbacks are called with NULL changes */ static void rtbsd_report_notify_error(void) { struct rtbsd_notifier *notifier; LIST_FOR_EACH (notifier, node, &all_notifiers) { notifier->cb(NULL, notifier->aux); } } openvswitch-2.0.1+git20140120/lib/rtbsd.h000066400000000000000000000036051226605124000175130ustar00rootroot00000000000000/* * Copyright (c) 2011 Gaetano Catalli. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef RTBSD_H #define RTBSD_H 1 #include "list.h" /* * A digested version of a message received from a PF_ROUTE socket which * indicates that a network device has been created or destroyed or changed. */ struct rtbsd_change { /* Copied from struct if_msghdr. */ int msg_type; /* e.g. XXX. */ /* Copied from struct if_msghdr. */ int if_index; /* Index of network device. */ char if_name[IF_NAMESIZE]; /* Name of network device. */ int master_ifindex; /* Ifindex of datapath master (0 if none). */ }; /* * Function called to report that a netdev has changed. 'change' describes the * specific change. It may be null if the buffer of change information * overflowed, in which case the function must assume that every device may * have changed. 'aux' is as specified in the call to * rtbsd_notifier_register(). */ typedef void rtbsd_notify_func(const struct rtbsd_change *, void *aux); struct rtbsd_notifier { struct list node; rtbsd_notify_func *cb; void *aux; }; int rtbsd_notifier_register(struct rtbsd_notifier *, rtbsd_notify_func *, void *aux); void rtbsd_notifier_unregister(struct rtbsd_notifier *); void rtbsd_notifier_run(void); void rtbsd_notifier_wait(void); #endif /* rtbsd.h */ openvswitch-2.0.1+git20140120/lib/rtnetlink-link.c000066400000000000000000000103651226605124000213360ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "rtnetlink-link.h" #include #include #include #include "netlink.h" #include "netlink-notifier.h" #include "ofpbuf.h" static struct nln *nln = NULL; static struct rtnetlink_link_change rtn_change; /* Parses a rtnetlink message 'buf' into 'change'. If 'buf' is unparseable, * leaves 'change' untouched and returns false. Otherwise, populates 'change' * and returns true. */ bool rtnetlink_link_parse(struct ofpbuf *buf, struct rtnetlink_link_change *change) { bool parsed; /* Policy for RTNLGRP_LINK messages. * * There are *many* more fields in these messages, but currently we * only care about these fields. */ static const struct nl_policy policy[] = { [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false }, [IFLA_MASTER] = { .type = NL_A_U32, .optional = true }, [IFLA_MTU] = { .type = NL_A_U32, .optional = true }, [IFLA_ADDRESS] = { .type = NL_A_UNSPEC, .optional = true }, }; struct nlattr *attrs[ARRAY_SIZE(policy)]; parsed = nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg), policy, attrs, ARRAY_SIZE(policy)); if (parsed) { const struct nlmsghdr *nlmsg; const struct ifinfomsg *ifinfo; nlmsg = buf->data; ifinfo = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *ifinfo); change->nlmsg_type = nlmsg->nlmsg_type; change->ifi_index = ifinfo->ifi_index; change->ifname = nl_attr_get_string(attrs[IFLA_IFNAME]); change->ifi_flags = ifinfo->ifi_flags; change->master_ifindex = (attrs[IFLA_MASTER] ? nl_attr_get_u32(attrs[IFLA_MASTER]) : 0); change->mtu = (attrs[IFLA_MTU] ? nl_attr_get_u32(attrs[IFLA_MTU]) : 0); if (attrs[IFLA_ADDRESS] && nl_attr_get_size(attrs[IFLA_ADDRESS]) == ETH_ALEN) { memcpy(change->addr, nl_attr_get(attrs[IFLA_ADDRESS]), ETH_ALEN); } else { memset(change->addr, 0, ETH_ALEN); } } return parsed; } static bool rtnetlink_link_parse_cb(struct ofpbuf *buf, void *change) { return rtnetlink_link_parse(buf, change); } /* Registers 'cb' to be called with auxiliary data 'aux' with network device * change notifications. The notifier is stored in 'notifier', which the * caller must not modify or free. * * This is probably not the function that you want. You should probably be * using dpif_port_poll() or netdev_change_seq(), which unlike this function * are not Linux-specific. * * Returns an initialized nln_notifier if successful, NULL otherwise. */ struct nln_notifier * rtnetlink_link_notifier_create(rtnetlink_link_notify_func *cb, void *aux) { if (!nln) { nln = nln_create(NETLINK_ROUTE, RTNLGRP_LINK, rtnetlink_link_parse_cb, &rtn_change); } return nln_notifier_create(nln, (nln_notify_func *) cb, aux); } /* Destroys 'notifier', which must have previously been created with * rtnetlink_link_notifier_register(). */ void rtnetlink_link_notifier_destroy(struct nln_notifier *notifier) { nln_notifier_destroy(notifier); } /* Calls all of the registered notifiers, passing along any as-yet-unreported * netdev change events. */ void rtnetlink_link_run(void) { if (nln) { nln_run(nln); } } /* Causes poll_block() to wake up when network device change notifications are * ready. */ void rtnetlink_link_wait(void) { if (nln) { nln_wait(nln); } } openvswitch-2.0.1+git20140120/lib/rtnetlink-link.h000066400000000000000000000045571226605124000213510ustar00rootroot00000000000000/* * Copyright (c) 2009 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef RTNETLINK_LINK_H #define RTNETLINK_LINK_H 1 #include #include #include struct ofpbuf; struct nln_notifier; /* These functions are Linux specific, so they should be used directly only by * Linux-specific code. */ /* A digested version of an rtnetlink_link message sent down by the kernel to * indicate that a network device has been created, destroyed or changed. */ struct rtnetlink_link_change { /* Copied from struct nlmsghdr. */ int nlmsg_type; /* e.g. RTM_NEWLINK, RTM_DELLINK. */ /* Copied from struct ifinfomsg. */ int ifi_index; /* Index of network device. */ /* Extracted from Netlink attributes. */ const char *ifname; /* Name of network device. */ int master_ifindex; /* Ifindex of datapath master (0 if none). */ int mtu; /* Current MTU. */ uint8_t addr[ETH_ALEN]; unsigned int ifi_flags; /* Flags of network device. */ }; /* Function called to report that a netdev has changed. 'change' describes the * specific change. It may be null if the buffer of change information * overflowed, in which case the function must assume that every device may * have changed. 'aux' is as specified in the call to * rtnetlink_link_notifier_register(). */ typedef void rtnetlink_link_notify_func(const struct rtnetlink_link_change *change, void *aux); bool rtnetlink_link_parse(struct ofpbuf *buf, struct rtnetlink_link_change *change); struct nln_notifier * rtnetlink_link_notifier_create(rtnetlink_link_notify_func *, void *aux); void rtnetlink_link_notifier_destroy(struct nln_notifier *); void rtnetlink_link_run(void); void rtnetlink_link_wait(void); #endif /* rtnetlink-link.h */ openvswitch-2.0.1+git20140120/lib/sat-math.h000066400000000000000000000026251226605124000201140ustar00rootroot00000000000000/* * Copyright (c) 2008, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef SAT_MATH_H #define SAT_MATH_H 1 #include /* Saturating addition: overflow yields UINT_MAX. */ static inline unsigned int sat_add(unsigned int x, unsigned int y) { return x + y >= x ? x + y : UINT_MAX; } /* Saturating subtraction: underflow yields 0. */ static inline unsigned int sat_sub(unsigned int x, unsigned int y) { return x >= y ? x - y : 0; } /* Saturating multiplication of "unsigned int"s: overflow yields UINT_MAX. */ #define SAT_MUL(X, Y) \ ((Y) == 0 ? 0 \ : (X) <= UINT_MAX / (Y) ? (unsigned int) (X) * (unsigned int) (Y) \ : UINT_MAX) static inline unsigned int sat_mul(unsigned int x, unsigned int y) { return SAT_MUL(x, y); } #endif /* sat-math.h */ openvswitch-2.0.1+git20140120/lib/seq.c000066400000000000000000000166451226605124000171700ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "seq.h" #include #include "hash.h" #include "hmap.h" #include "latch.h" #include "list.h" #include "ovs-thread.h" #include "poll-loop.h" /* A sequence number object. */ struct seq { uint64_t value OVS_GUARDED; struct hmap waiters OVS_GUARDED; /* Contains 'struct seq_waiter's. */ }; /* A thread waiting on a particular seq. */ struct seq_waiter { struct seq *seq OVS_GUARDED; /* Seq being waited for. */ struct hmap_node hmap_node OVS_GUARDED; /* In 'seq->waiters'. */ unsigned int ovsthread_id OVS_GUARDED; /* Key in 'waiters' hmap. */ struct seq_thread *thread OVS_GUARDED; /* Thread preparing to wait. */ struct list list_node OVS_GUARDED; /* In 'thread->waiters'. */ uint64_t value OVS_GUARDED; /* seq->value we're waiting to change. */ }; /* A thread that might be waiting on one or more seqs. */ struct seq_thread { struct list waiters OVS_GUARDED; /* Contains 'struct seq_waiter's. */ struct latch latch OVS_GUARDED; /* Wakeup latch for this thread. */ bool waiting OVS_GUARDED; /* True if latch_wait() already called. */ }; static struct ovs_mutex seq_mutex = OVS_MUTEX_INITIALIZER; static uint64_t seq_next OVS_GUARDED_BY(seq_mutex) = 1; static pthread_key_t seq_thread_key; static void seq_init(void); static struct seq_thread *seq_thread_get(void) OVS_REQUIRES(seq_mutex); static void seq_thread_exit(void *thread_) OVS_EXCLUDED(seq_mutex); static void seq_thread_woke(struct seq_thread *) OVS_REQUIRES(seq_mutex); static void seq_waiter_destroy(struct seq_waiter *) OVS_REQUIRES(seq_mutex); static void seq_wake_waiters(struct seq *) OVS_REQUIRES(seq_mutex); /* Creates and returns a new 'seq' object. */ struct seq * OVS_EXCLUDED(seq_mutex) seq_create(void) { struct seq *seq; seq_init(); seq = xmalloc(sizeof *seq); ovs_mutex_lock(&seq_mutex); seq->value = seq_next++; hmap_init(&seq->waiters); ovs_mutex_unlock(&seq_mutex); return seq; } /* Destroys 'seq', waking up threads that were waiting on it, if any. */ void seq_destroy(struct seq *seq) OVS_EXCLUDED(seq_mutex) { ovs_mutex_lock(&seq_mutex); seq_wake_waiters(seq); hmap_destroy(&seq->waiters); free(seq); ovs_mutex_unlock(&seq_mutex); } /* Increments 'seq''s sequence number, waking up any threads that are waiting * on 'seq'. */ void seq_change(struct seq *seq) OVS_EXCLUDED(seq_mutex) { ovs_mutex_lock(&seq_mutex); seq->value = seq_next++; seq_wake_waiters(seq); ovs_mutex_unlock(&seq_mutex); } /* Returns 'seq''s current sequence number (which could change immediately). * * seq_read() and seq_wait() can be used together to yield a race-free wakeup * when an object changes, even without an ability to lock the object. See * Usage in seq.h for details. */ uint64_t seq_read(const struct seq *seq) OVS_EXCLUDED(seq_mutex) { uint64_t value; ovs_mutex_lock(&seq_mutex); value = seq->value; ovs_mutex_unlock(&seq_mutex); return value; } static void seq_wait__(struct seq *seq, uint64_t value) OVS_REQUIRES(seq_mutex) { unsigned int id = ovsthread_id_self(); uint32_t hash = hash_int(id, 0); struct seq_waiter *waiter; HMAP_FOR_EACH_IN_BUCKET (waiter, hmap_node, hash, &seq->waiters) { if (waiter->ovsthread_id == id) { if (waiter->value != value) { /* The current value is different from the value we've already * waited for, */ poll_immediate_wake(); } else { /* Already waiting on 'value', nothing more to do. */ } return; } } waiter = xmalloc(sizeof *waiter); waiter->seq = seq; hmap_insert(&seq->waiters, &waiter->hmap_node, hash); waiter->ovsthread_id = id; waiter->value = value; waiter->thread = seq_thread_get(); list_push_back(&waiter->thread->waiters, &waiter->list_node); if (!waiter->thread->waiting) { latch_wait(&waiter->thread->latch); waiter->thread->waiting = true; } } /* Causes the following poll_block() to wake up when 'seq''s sequence number * changes from 'value'. (If 'seq''s sequence number isn't 'value', then * poll_block() won't block at all.) * * seq_read() and seq_wait() can be used together to yield a race-free wakeup * when an object changes, even without an ability to lock the object. See * Usage in seq.h for details. */ void seq_wait(const struct seq *seq_, uint64_t value) OVS_EXCLUDED(seq_mutex) { struct seq *seq = CONST_CAST(struct seq *, seq_); ovs_mutex_lock(&seq_mutex); if (value == seq->value) { seq_wait__(seq, value); } else { poll_immediate_wake(); } ovs_mutex_unlock(&seq_mutex); } /* Called by poll_block() just before it returns, this function destroys any * seq_waiter objects associated with the current thread. */ void seq_woke(void) OVS_EXCLUDED(seq_mutex) { struct seq_thread *thread; seq_init(); thread = pthread_getspecific(seq_thread_key); if (thread) { ovs_mutex_lock(&seq_mutex); seq_thread_woke(thread); thread->waiting = false; ovs_mutex_unlock(&seq_mutex); } } static void seq_init(void) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; if (ovsthread_once_start(&once)) { xpthread_key_create(&seq_thread_key, seq_thread_exit); ovsthread_once_done(&once); } } static struct seq_thread * seq_thread_get(void) OVS_REQUIRES(seq_mutex) { struct seq_thread *thread = pthread_getspecific(seq_thread_key); if (!thread) { thread = xmalloc(sizeof *thread); list_init(&thread->waiters); latch_init(&thread->latch); thread->waiting = false; xpthread_setspecific(seq_thread_key, thread); } return thread; } static void seq_thread_exit(void *thread_) OVS_EXCLUDED(seq_mutex) { struct seq_thread *thread = thread_; ovs_mutex_lock(&seq_mutex); seq_thread_woke(thread); latch_destroy(&thread->latch); free(thread); ovs_mutex_unlock(&seq_mutex); } static void seq_thread_woke(struct seq_thread *thread) OVS_REQUIRES(seq_mutex) { struct seq_waiter *waiter, *next_waiter; LIST_FOR_EACH_SAFE (waiter, next_waiter, list_node, &thread->waiters) { ovs_assert(waiter->thread == thread); seq_waiter_destroy(waiter); } latch_poll(&thread->latch); } static void seq_waiter_destroy(struct seq_waiter *waiter) OVS_REQUIRES(seq_mutex) { hmap_remove(&waiter->seq->waiters, &waiter->hmap_node); list_remove(&waiter->list_node); free(waiter); } static void seq_wake_waiters(struct seq *seq) OVS_REQUIRES(seq_mutex) { struct seq_waiter *waiter, *next_waiter; HMAP_FOR_EACH_SAFE (waiter, next_waiter, hmap_node, &seq->waiters) { latch_set(&waiter->thread->latch); seq_waiter_destroy(waiter); } } openvswitch-2.0.1+git20140120/lib/seq.h000066400000000000000000000102441226605124000171620ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef SEQ_H #define SEQ_H 1 /* Thread-safe, pollable sequence number. * * * Motivation * ========== * * It is sometimes desirable to take an action whenever an object changes. * Suppose we associate a sequence number with an object and increment the * sequence number whenver we change the object. An observer can then record * the sequence number it sees. Later on, if the current sequence number * differs from the one it saw last, then the observer knows to examine the * object for changes. * * Code that wants to run when a sequence number changes is challenging to * implement in a multithreaded environment. A naive implementation, that * simply checks whether the sequence number changed and, if so, calls * poll_immediate_wake(), will fail when another thread increments the sequence * number after the check (including during poll_block()). * * struct seq is a solution. It implements a sequence number along with enough * internal infrastructure so that a thread waiting on a particular value will * wake up if the sequence number changes, or even if the "struct seq" is * destroyed. * * * Usage * ===== * * The object that includes a sequence number should use seq_create() and * seq_destroy() at creation and destruction, and seq_change() whenever the * object's observable state changes. * * An observer may seq_read() to read the current sequence number and * seq_wait() to cause poll_block() to wake up when the sequence number changes * from a specified value. * * To avoid races, observers should use seq_read() to check for changes, * process any changes, and then use seq_wait() to wait for a change from the * previously read value. That is, a correct usage looks something like this: * * new_seq = seq_read(seq); * if (new_seq != last_seq) { * ...process changes... * last_seq = new_seq; * } * seq_wait(seq, new_seq); * poll_block(); * * * Alternate Usage * =============== * * struct seq can also be used as a sort of pollable condition variable. * Suppose that we want a thread to process items in a queue, and thus to be * able to wake up whenever the queue is nonempty. This requires a lock to * protect the queue and a seq to signal that the queue has become nonempty, * e.g.: * * struct ovs_mutex mutex; * struct list queue OVS_GUARDED_BY(mutex); * struct seq nonempty_seq; * * To add an element to the queue: * * ovs_mutex_lock(&mutex); * list_push_back(&queue, ...element...); * if (list_is_singleton(&queue)) { // The 'if' test here is optional. * seq_change(&nonempty_seq); * } * ovs_mutex_unlock(&mutex); * * To wait for the queue to become nonempty: * * ovs_mutex_lock(&mutex); * if (list_is_empty(&queue)) { * seq_wait(&nonempty_seq, seq_read(&nonempty_seq)); * } else { * poll_immediate_wake(); * } * ovs_mutex_unlock(&mutex); * * (In the above code 'mutex' prevents the queue from changing between * seq_read() and seq_wait(). Otherwise, it would be necessary to seq_read(), * check for a nonempty queue, and then seq_wait() on the previously read * sequence number, as under Usage above.) * * * Thread-safety * ============= * * Fully thread safe. */ #include /* For implementation of an object with a sequence number attached. */ struct seq *seq_create(void); void seq_destroy(struct seq *); void seq_change(struct seq *); /* For observers. */ uint64_t seq_read(const struct seq *); void seq_wait(const struct seq *, uint64_t value); /* For poll_block() internal use. */ void seq_woke(void); #endif /* seq.h */ openvswitch-2.0.1+git20140120/lib/sflow.h000066400000000000000000000502331226605124000175260ustar00rootroot00000000000000/* Copyright (c) 2002-2009 InMon Corp. Licensed under the terms of either the * Sun Industry Standards Source License 1.1, that is available at: * http://host-sflow.sourceforge.net/sissl.html * or the InMon sFlow License, that is available at: * http://www.inmon.com/technology/sflowlicense.txt */ #ifndef SFLOW_H #define SFLOW_H 1 typedef enum { SFL_DSCLASS_IFINDEX = 0, SFL_DSCLASS_VLAN = 1, SFL_DSCLASS_PHYSICAL_ENTITY = 2, SFL_DSCLASS_LOGICAL_ENTITY = 3 } SFL_DSCLASS; enum SFLAddress_type { SFLADDRESSTYPE_IP_V4 = 1, SFLADDRESSTYPE_IP_V6 = 2 }; typedef struct { u_int32_t addr; } SFLIPv4; typedef struct { u_char addr[16]; } SFLIPv6; typedef union _SFLAddress_value { SFLIPv4 ip_v4; SFLIPv6 ip_v6; } SFLAddress_value; typedef struct _SFLAddress { u_int32_t type; /* enum SFLAddress_type */ SFLAddress_value address; } SFLAddress; /* Packet header data */ #define SFL_DEFAULT_HEADER_SIZE 128 #define SFL_DEFAULT_COLLECTOR_PORT 6343 #define SFL_DEFAULT_SAMPLING_RATE 400 #define SFL_DEFAULT_POLLING_INTERVAL 30 /* The header protocol describes the format of the sampled header */ enum SFLHeader_protocol { SFLHEADER_ETHERNET_ISO8023 = 1, SFLHEADER_ISO88024_TOKENBUS = 2, SFLHEADER_ISO88025_TOKENRING = 3, SFLHEADER_FDDI = 4, SFLHEADER_FRAME_RELAY = 5, SFLHEADER_X25 = 6, SFLHEADER_PPP = 7, SFLHEADER_SMDS = 8, SFLHEADER_AAL5 = 9, SFLHEADER_AAL5_IP = 10, /* e.g. Cisco AAL5 mux */ SFLHEADER_IPv4 = 11, SFLHEADER_IPv6 = 12, SFLHEADER_MPLS = 13 }; /* raw sampled header */ typedef struct _SFLSampled_header { u_int32_t header_protocol; /* (enum SFLHeader_protocol) */ u_int32_t frame_length; /* Original length of packet before sampling */ u_int32_t stripped; /* header/trailer bytes stripped by sender */ u_int32_t header_length; /* length of sampled header bytes to follow */ u_int8_t *header_bytes; /* Header bytes */ } SFLSampled_header; /* decoded ethernet header */ typedef struct _SFLSampled_ethernet { u_int32_t eth_len; /* The length of the MAC packet excluding lower layer encapsulations */ u_int8_t src_mac[8]; /* 6 bytes + 2 pad */ u_int8_t dst_mac[8]; u_int32_t eth_type; } SFLSampled_ethernet; /* decoded IP version 4 header */ typedef struct _SFLSampled_ipv4 { u_int32_t length; /* The length of the IP packet excluding lower layer encapsulations */ u_int32_t protocol; /* IP Protocol type (for example, TCP = 6, UDP = 17) */ SFLIPv4 src_ip; /* Source IP Address */ SFLIPv4 dst_ip; /* Destination IP Address */ u_int32_t src_port; /* TCP/UDP source port number or equivalent */ u_int32_t dst_port; /* TCP/UDP destination port number or equivalent */ u_int32_t tcp_flags; /* TCP flags */ u_int32_t tos; /* IP type of service */ } SFLSampled_ipv4; /* decoded IP version 6 data */ typedef struct _SFLSampled_ipv6 { u_int32_t length; /* The length of the IP packet excluding lower layer encapsulations */ u_int32_t protocol; /* IP Protocol type (for example, TCP = 6, UDP = 17) */ SFLIPv6 src_ip; /* Source IP Address */ SFLIPv6 dst_ip; /* Destination IP Address */ u_int32_t src_port; /* TCP/UDP source port number or equivalent */ u_int32_t dst_port; /* TCP/UDP destination port number or equivalent */ u_int32_t tcp_flags; /* TCP flags */ u_int32_t priority; /* IP priority */ } SFLSampled_ipv6; /* Extended data types */ /* Extended switch data */ typedef struct _SFLExtended_switch { u_int32_t src_vlan; /* The 802.1Q VLAN id of incomming frame */ u_int32_t src_priority; /* The 802.1p priority */ u_int32_t dst_vlan; /* The 802.1Q VLAN id of outgoing frame */ u_int32_t dst_priority; /* The 802.1p priority */ } SFLExtended_switch; /* Extended router data */ typedef struct _SFLExtended_router { SFLAddress nexthop; /* IP address of next hop router */ u_int32_t src_mask; /* Source address prefix mask bits */ u_int32_t dst_mask; /* Destination address prefix mask bits */ } SFLExtended_router; /* Extended gateway data */ enum SFLExtended_as_path_segment_type { SFLEXTENDED_AS_SET = 1, /* Unordered set of ASs */ SFLEXTENDED_AS_SEQUENCE = 2 /* Ordered sequence of ASs */ }; typedef struct _SFLExtended_as_path_segment { u_int32_t type; /* enum SFLExtended_as_path_segment_type */ u_int32_t length; /* number of AS numbers in set/sequence */ union { u_int32_t *set; u_int32_t *seq; } as; } SFLExtended_as_path_segment; typedef struct _SFLExtended_gateway { SFLAddress nexthop; /* Address of the border router that should be used for the destination network */ u_int32_t as; /* AS number for this gateway */ u_int32_t src_as; /* AS number of source (origin) */ u_int32_t src_peer_as; /* AS number of source peer */ u_int32_t dst_as_path_segments; /* number of segments in path */ SFLExtended_as_path_segment *dst_as_path; /* list of seqs or sets */ u_int32_t communities_length; /* number of communities */ u_int32_t *communities; /* set of communities */ u_int32_t localpref; /* LocalPref associated with this route */ } SFLExtended_gateway; typedef struct _SFLString { u_int32_t len; char *str; } SFLString; /* Extended user data */ typedef struct _SFLExtended_user { u_int32_t src_charset; /* MIBEnum value of character set used to encode a string - See RFC 2978 Where possible UTF-8 encoding (MIBEnum=106) should be used. A value of zero indicates an unknown encoding. */ SFLString src_user; u_int32_t dst_charset; SFLString dst_user; } SFLExtended_user; /* Extended URL data */ enum SFLExtended_url_direction { SFLEXTENDED_URL_SRC = 1, /* URL is associated with source address */ SFLEXTENDED_URL_DST = 2 /* URL is associated with destination address */ }; typedef struct _SFLExtended_url { u_int32_t direction; /* enum SFLExtended_url_direction */ SFLString url; /* URL associated with the packet flow. Must be URL encoded */ SFLString host; /* The host field from the HTTP header */ } SFLExtended_url; /* Extended MPLS data */ typedef struct _SFLLabelStack { u_int32_t depth; u_int32_t *stack; /* first entry is top of stack - see RFC 3032 for encoding */ } SFLLabelStack; typedef struct _SFLExtended_mpls { SFLAddress nextHop; /* Address of the next hop */ SFLLabelStack in_stack; SFLLabelStack out_stack; } SFLExtended_mpls; /* Extended NAT data Packet header records report addresses as seen at the sFlowDataSource. The extended_nat structure reports on translated source and/or destination addesses for this packet. If an address was not translated it should be equal to that reported for the header. */ typedef struct _SFLExtended_nat { SFLAddress src; /* Source address */ SFLAddress dst; /* Destination address */ } SFLExtended_nat; /* additional Extended MPLS stucts */ typedef struct _SFLExtended_mpls_tunnel { SFLString tunnel_lsp_name; /* Tunnel name */ u_int32_t tunnel_id; /* Tunnel ID */ u_int32_t tunnel_cos; /* Tunnel COS value */ } SFLExtended_mpls_tunnel; typedef struct _SFLExtended_mpls_vc { SFLString vc_instance_name; /* VC instance name */ u_int32_t vll_vc_id; /* VLL/VC instance ID */ u_int32_t vc_label_cos; /* VC Label COS value */ } SFLExtended_mpls_vc; /* Extended MPLS FEC - Definitions from MPLS-FTN-STD-MIB mplsFTNTable */ typedef struct _SFLExtended_mpls_FTN { SFLString mplsFTNDescr; u_int32_t mplsFTNMask; } SFLExtended_mpls_FTN; /* Extended MPLS LVP FEC - Definition from MPLS-LDP-STD-MIB mplsFecTable Note: mplsFecAddrType, mplsFecAddr information available from packet header */ typedef struct _SFLExtended_mpls_LDP_FEC { u_int32_t mplsFecAddrPrefixLength; } SFLExtended_mpls_LDP_FEC; /* Extended VLAN tunnel information Record outer VLAN encapsulations that have been stripped. extended_vlantunnel information should only be reported if all the following conditions are satisfied: 1. The packet has nested vlan tags, AND 2. The reporting device is VLAN aware, AND 3. One or more VLAN tags have been stripped, either because they represent proprietary encapsulations, or because switch hardware automatically strips the outer VLAN encapsulation. Reporting extended_vlantunnel information is not a substitute for reporting extended_switch information. extended_switch data must always be reported to describe the ingress/egress VLAN information for the packet. The extended_vlantunnel information only applies to nested VLAN tags, and then only when one or more tags has been stripped. */ typedef SFLLabelStack SFLVlanStack; typedef struct _SFLExtended_vlan_tunnel { SFLVlanStack stack; /* List of stripped 802.1Q TPID/TCI layers. Each TPID,TCI pair is represented as a single 32 bit integer. Layers listed from outermost to innermost. */ } SFLExtended_vlan_tunnel; enum SFLFlow_type_tag { /* enterprise = 0, format = ... */ SFLFLOW_HEADER = 1, /* Packet headers are sampled */ SFLFLOW_ETHERNET = 2, /* MAC layer information */ SFLFLOW_IPV4 = 3, /* IP version 4 data */ SFLFLOW_IPV6 = 4, /* IP version 6 data */ SFLFLOW_EX_SWITCH = 1001, /* Extended switch information */ SFLFLOW_EX_ROUTER = 1002, /* Extended router information */ SFLFLOW_EX_GATEWAY = 1003, /* Extended gateway router information */ SFLFLOW_EX_USER = 1004, /* Extended TACAS/RADIUS user information */ SFLFLOW_EX_URL = 1005, /* Extended URL information */ SFLFLOW_EX_MPLS = 1006, /* Extended MPLS information */ SFLFLOW_EX_NAT = 1007, /* Extended NAT information */ SFLFLOW_EX_MPLS_TUNNEL = 1008, /* additional MPLS information */ SFLFLOW_EX_MPLS_VC = 1009, SFLFLOW_EX_MPLS_FTN = 1010, SFLFLOW_EX_MPLS_LDP_FEC = 1011, SFLFLOW_EX_VLAN_TUNNEL = 1012, /* VLAN stack */ }; typedef union _SFLFlow_type { SFLSampled_header header; SFLSampled_ethernet ethernet; SFLSampled_ipv4 ipv4; SFLSampled_ipv6 ipv6; SFLExtended_switch sw; SFLExtended_router router; SFLExtended_gateway gateway; SFLExtended_user user; SFLExtended_url url; SFLExtended_mpls mpls; SFLExtended_nat nat; SFLExtended_mpls_tunnel mpls_tunnel; SFLExtended_mpls_vc mpls_vc; SFLExtended_mpls_FTN mpls_ftn; SFLExtended_mpls_LDP_FEC mpls_ldp_fec; SFLExtended_vlan_tunnel vlan_tunnel; } SFLFlow_type; typedef struct _SFLFlow_sample_element { struct _SFLFlow_sample_element *nxt; u_int32_t tag; /* SFLFlow_type_tag */ u_int32_t length; SFLFlow_type flowType; } SFLFlow_sample_element; enum SFL_sample_tag { SFLFLOW_SAMPLE = 1, /* enterprise = 0 : format = 1 */ SFLCOUNTERS_SAMPLE = 2, /* enterprise = 0 : format = 2 */ SFLFLOW_SAMPLE_EXPANDED = 3, /* enterprise = 0 : format = 3 */ SFLCOUNTERS_SAMPLE_EXPANDED = 4 /* enterprise = 0 : format = 4 */ }; /* Format of a single flow sample */ typedef struct _SFLFlow_sample { /* u_int32_t tag; */ /* SFL_sample_tag -- enterprise = 0 : format = 1 */ /* u_int32_t length; */ u_int32_t sequence_number; /* Incremented with each flow sample generated */ u_int32_t source_id; /* fsSourceId */ u_int32_t sampling_rate; /* fsPacketSamplingRate */ u_int32_t sample_pool; /* Total number of packets that could have been sampled (i.e. packets skipped by sampling process + total number of samples) */ u_int32_t drops; /* Number of times a packet was dropped due to lack of resources */ u_int32_t input; /* SNMP ifIndex of input interface. 0 if interface is not known. */ u_int32_t output; /* SNMP ifIndex of output interface, 0 if interface is not known. Set most significant bit to indicate multiple destination interfaces (i.e. in case of broadcast or multicast) and set lower order bits to indicate number of destination interfaces. Examples: 0x00000002 indicates ifIndex = 2 0x00000000 ifIndex unknown. 0x80000007 indicates a packet sent to 7 interfaces. 0x80000000 indicates a packet sent to an unknown number of interfaces greater than 1.*/ u_int32_t num_elements; SFLFlow_sample_element *elements; } SFLFlow_sample; /* same thing, but the expanded version (for full 32-bit ifIndex numbers) */ typedef struct _SFLFlow_sample_expanded { /* u_int32_t tag; */ /* SFL_sample_tag -- enterprise = 0 : format = 1 */ /* u_int32_t length; */ u_int32_t sequence_number; /* Incremented with each flow sample generated */ u_int32_t ds_class; /* EXPANDED */ u_int32_t ds_index; /* EXPANDED */ u_int32_t sampling_rate; /* fsPacketSamplingRate */ u_int32_t sample_pool; /* Total number of packets that could have been sampled (i.e. packets skipped by sampling process + total number of samples) */ u_int32_t drops; /* Number of times a packet was dropped due to lack of resources */ u_int32_t inputFormat; /* EXPANDED */ u_int32_t input; /* SNMP ifIndex of input interface. 0 if interface is not known. */ u_int32_t outputFormat; /* EXPANDED */ u_int32_t output; /* SNMP ifIndex of output interface, 0 if interface is not known. */ u_int32_t num_elements; SFLFlow_sample_element *elements; } SFLFlow_sample_expanded; /* Counter types */ /* Generic interface counters - see RFC 1573, 2233 */ typedef struct _SFLIf_counters { u_int32_t ifIndex; u_int32_t ifType; u_int64_t ifSpeed; u_int32_t ifDirection; /* Derived from MAU MIB (RFC 2668) 0 = unknown, 1 = full-duplex, 2 = half-duplex, 3 = in, 4 = out */ u_int32_t ifStatus; /* bit field with the following bits assigned: bit 0 = ifAdminStatus (0 = down, 1 = up) bit 1 = ifOperStatus (0 = down, 1 = up) */ u_int64_t ifInOctets; u_int32_t ifInUcastPkts; u_int32_t ifInMulticastPkts; u_int32_t ifInBroadcastPkts; u_int32_t ifInDiscards; u_int32_t ifInErrors; u_int32_t ifInUnknownProtos; u_int64_t ifOutOctets; u_int32_t ifOutUcastPkts; u_int32_t ifOutMulticastPkts; u_int32_t ifOutBroadcastPkts; u_int32_t ifOutDiscards; u_int32_t ifOutErrors; u_int32_t ifPromiscuousMode; } SFLIf_counters; /* Ethernet interface counters - see RFC 2358 */ typedef struct _SFLEthernet_counters { u_int32_t dot3StatsAlignmentErrors; u_int32_t dot3StatsFCSErrors; u_int32_t dot3StatsSingleCollisionFrames; u_int32_t dot3StatsMultipleCollisionFrames; u_int32_t dot3StatsSQETestErrors; u_int32_t dot3StatsDeferredTransmissions; u_int32_t dot3StatsLateCollisions; u_int32_t dot3StatsExcessiveCollisions; u_int32_t dot3StatsInternalMacTransmitErrors; u_int32_t dot3StatsCarrierSenseErrors; u_int32_t dot3StatsFrameTooLongs; u_int32_t dot3StatsInternalMacReceiveErrors; u_int32_t dot3StatsSymbolErrors; } SFLEthernet_counters; /* Token ring counters - see RFC 1748 */ typedef struct _SFLTokenring_counters { u_int32_t dot5StatsLineErrors; u_int32_t dot5StatsBurstErrors; u_int32_t dot5StatsACErrors; u_int32_t dot5StatsAbortTransErrors; u_int32_t dot5StatsInternalErrors; u_int32_t dot5StatsLostFrameErrors; u_int32_t dot5StatsReceiveCongestions; u_int32_t dot5StatsFrameCopiedErrors; u_int32_t dot5StatsTokenErrors; u_int32_t dot5StatsSoftErrors; u_int32_t dot5StatsHardErrors; u_int32_t dot5StatsSignalLoss; u_int32_t dot5StatsTransmitBeacons; u_int32_t dot5StatsRecoverys; u_int32_t dot5StatsLobeWires; u_int32_t dot5StatsRemoves; u_int32_t dot5StatsSingles; u_int32_t dot5StatsFreqErrors; } SFLTokenring_counters; /* 100 BaseVG interface counters - see RFC 2020 */ typedef struct _SFLVg_counters { u_int32_t dot12InHighPriorityFrames; u_int64_t dot12InHighPriorityOctets; u_int32_t dot12InNormPriorityFrames; u_int64_t dot12InNormPriorityOctets; u_int32_t dot12InIPMErrors; u_int32_t dot12InOversizeFrameErrors; u_int32_t dot12InDataErrors; u_int32_t dot12InNullAddressedFrames; u_int32_t dot12OutHighPriorityFrames; u_int64_t dot12OutHighPriorityOctets; u_int32_t dot12TransitionIntoTrainings; u_int64_t dot12HCInHighPriorityOctets; u_int64_t dot12HCInNormPriorityOctets; u_int64_t dot12HCOutHighPriorityOctets; } SFLVg_counters; typedef struct _SFLVlan_counters { u_int32_t vlan_id; u_int64_t octets; u_int32_t ucastPkts; u_int32_t multicastPkts; u_int32_t broadcastPkts; u_int32_t discards; } SFLVlan_counters; /* Counters data */ enum SFLCounters_type_tag { /* enterprise = 0, format = ... */ SFLCOUNTERS_GENERIC = 1, SFLCOUNTERS_ETHERNET = 2, SFLCOUNTERS_TOKENRING = 3, SFLCOUNTERS_VG = 4, SFLCOUNTERS_VLAN = 5 }; typedef union _SFLCounters_type { SFLIf_counters generic; SFLEthernet_counters ethernet; SFLTokenring_counters tokenring; SFLVg_counters vg; SFLVlan_counters vlan; } SFLCounters_type; typedef struct _SFLCounters_sample_element { struct _SFLCounters_sample_element *nxt; /* linked list */ u_int32_t tag; /* SFLCounters_type_tag */ u_int32_t length; SFLCounters_type counterBlock; } SFLCounters_sample_element; typedef struct _SFLCounters_sample { /* u_int32_t tag; */ /* SFL_sample_tag -- enterprise = 0 : format = 2 */ /* u_int32_t length; */ u_int32_t sequence_number; /* Incremented with each counters sample generated by this source_id */ u_int32_t source_id; /* fsSourceId */ u_int32_t num_elements; SFLCounters_sample_element *elements; } SFLCounters_sample; /* same thing, but the expanded version, so ds_index can be a full 32 bits */ typedef struct _SFLCounters_sample_expanded { /* u_int32_t tag; */ /* SFL_sample_tag -- enterprise = 0 : format = 2 */ /* u_int32_t length; */ u_int32_t sequence_number; /* Incremented with each counters sample generated by this source_id */ u_int32_t ds_class; /* EXPANDED */ u_int32_t ds_index; /* EXPANDED */ u_int32_t num_elements; SFLCounters_sample_element *elements; } SFLCounters_sample_expanded; #define SFLADD_ELEMENT(_sm, _el) do { (_el)->nxt = (_sm)->elements; (_sm)->elements = (_el); } while(0) /* Format of a sample datagram */ enum SFLDatagram_version { SFLDATAGRAM_VERSION2 = 2, SFLDATAGRAM_VERSION4 = 4, SFLDATAGRAM_VERSION5 = 5 }; typedef struct _SFLSample_datagram_hdr { u_int32_t datagram_version; /* (enum SFLDatagram_version) = VERSION5 = 5 */ SFLAddress agent_address; /* IP address of sampling agent */ u_int32_t sub_agent_id; /* Used to distinguishing between datagram streams from separate agent sub entities within an device. */ u_int32_t sequence_number; /* Incremented with each sample datagram generated */ u_int32_t uptime; /* Current time (in milliseconds since device last booted). Should be set as close to datagram transmission time as possible.*/ u_int32_t num_records; /* Number of tag-len-val flow/counter records to follow */ } SFLSample_datagram_hdr; #define SFL_MAX_DATAGRAM_SIZE 1500 #define SFL_MIN_DATAGRAM_SIZE 200 #define SFL_DEFAULT_DATAGRAM_SIZE 1400 #define SFL_DATA_PAD 400 #endif /* SFLOW_H */ openvswitch-2.0.1+git20140120/lib/sflow_agent.c000066400000000000000000000415341226605124000207030ustar00rootroot00000000000000/* Copyright (c) 2002-2009 InMon Corp. Licensed under the terms of either the * Sun Industry Standards Source License 1.1, that is available at: * http://host-sflow.sourceforge.net/sissl.html * or the InMon sFlow License, that is available at: * http://www.inmon.com/technology/sflowlicense.txt */ #include "sflow_api.h" #include "util.h" static void * sflAlloc(SFLAgent *agent, size_t bytes); static void sflFree(SFLAgent *agent, void *obj); static void sfl_agent_jumpTableAdd(SFLAgent *agent, SFLSampler *sampler); static void sfl_agent_jumpTableRemove(SFLAgent *agent, SFLSampler *sampler); /*________________--------------------------__________________ ________________ sfl_agent_init __________________ ----------------__________________________------------------ */ void sfl_agent_init(SFLAgent *agent, SFLAddress *myIP, /* IP address of this agent in net byte order */ u_int32_t subId, /* agent_sub_id */ time_t bootTime, /* agent boot time */ time_t now, /* time now */ void *magic, /* ptr to pass back in logging and alloc fns */ allocFn_t allocFn, freeFn_t freeFn, errorFn_t errorFn, sendFn_t sendFn) { /* first clear everything */ memset(agent, 0, sizeof(*agent)); /* now copy in the parameters */ agent->myIP = *myIP; /* structure copy */ agent->subId = subId; agent->bootTime = bootTime; agent->now = now; agent->magic = magic; agent->allocFn = allocFn; agent->freeFn = freeFn; agent->errorFn = errorFn; agent->sendFn = sendFn; #ifdef SFLOW_DO_SOCKET if(sendFn == NULL) { /* open the socket - really need one for v4 and another for v6? */ if((agent->receiverSocket4 = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)) == -1) sfl_agent_sysError(agent, "agent", "IPv4 socket open failed"); if((agent->receiverSocket6 = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP)) == -1) sfl_agent_sysError(agent, "agent", "IPv6 socket open failed"); } #endif } /*_________________---------------------------__________________ _________________ sfl_agent_release __________________ -----------------___________________________------------------ */ void sfl_agent_release(SFLAgent *agent) { /* release and free the samplers, pollers and receivers */ SFLSampler *sm = agent->samplers; SFLPoller *pl = agent->pollers; SFLReceiver *rcv = agent->receivers; for(; sm != NULL; ) { SFLSampler *nextSm = sm->nxt; sflFree(agent, sm); sm = nextSm; } agent->samplers = NULL; for(; pl != NULL; ) { SFLPoller *nextPl = pl->nxt; sflFree(agent, pl); pl = nextPl; } agent->pollers = NULL; for(; rcv != NULL; ) { SFLReceiver *nextRcv = rcv->nxt; sflFree(agent, rcv); rcv = nextRcv; } agent->receivers = NULL; #ifdef SFLOW_DO_SOCKET /* close the sockets */ if(agent->receiverSocket4 > 0) close(agent->receiverSocket4); if(agent->receiverSocket6 > 0) close(agent->receiverSocket6); #endif } /*_________________---------------------------__________________ _________________ sfl_agent_set_* __________________ -----------------___________________________------------------ */ void sfl_agent_set_agentAddress(SFLAgent *agent, SFLAddress *addr) { if(addr && memcmp(addr, &agent->myIP, sizeof(agent->myIP)) != 0) { /* change of address */ agent->myIP = *addr; /* structure copy */ /* reset sequence numbers here? */ } } void sfl_agent_set_agentSubId(SFLAgent *agent, u_int32_t subId) { if(subId != agent->subId) { /* change of subId */ agent->subId = subId; /* reset sequence numbers here? */ } } /*_________________---------------------------__________________ _________________ sfl_agent_tick __________________ -----------------___________________________------------------ */ void sfl_agent_tick(SFLAgent *agent, time_t now) { SFLReceiver *rcv = agent->receivers; SFLSampler *sm = agent->samplers; SFLPoller *pl = agent->pollers; agent->now = now; /* receivers use ticks to flush send data */ for(; rcv != NULL; rcv = rcv->nxt) sfl_receiver_tick(rcv, now); /* samplers use ticks to decide when they are sampling too fast */ for(; sm != NULL; sm = sm->nxt) sfl_sampler_tick(sm, now); /* pollers use ticks to decide when to ask for counters */ for(; pl != NULL; pl = pl->nxt) sfl_poller_tick(pl, now); } /*_________________---------------------------__________________ _________________ sfl_agent_addReceiver __________________ -----------------___________________________------------------ */ SFLReceiver *sfl_agent_addReceiver(SFLAgent *agent) { SFLReceiver *rcv = (SFLReceiver *)sflAlloc(agent, sizeof(SFLReceiver)); sfl_receiver_init(rcv, agent); /* add to end of list - to preserve the receiver index numbers for existing receivers */ { SFLReceiver *r, *prev = NULL; for(r = agent->receivers; r != NULL; prev = r, r = r->nxt); if(prev) prev->nxt = rcv; else agent->receivers = rcv; rcv->nxt = NULL; } return rcv; } /*_________________---------------------------__________________ _________________ sfl_dsi_compare __________________ -----------------___________________________------------------ Note that if there is a mixture of ds_classes for this agent, then the simple numeric comparison may not be correct - the sort order (for the purposes of the SNMP MIB) should really be determined by the OID that these numeric ds_class numbers are a shorthand for. For example, ds_class == 0 means ifIndex, which is the oid "1.3.6.1.2.1.2.2.1" */ static inline int sfl_dsi_compare(SFLDataSource_instance *pdsi1, SFLDataSource_instance *pdsi2) { /* could have used just memcmp(), but not sure if that would give the right answer on little-endian platforms. Safer to be explicit... */ int cmp = pdsi2->ds_class - pdsi1->ds_class; if(cmp == 0) cmp = pdsi2->ds_index - pdsi1->ds_index; if(cmp == 0) cmp = pdsi2->ds_instance - pdsi1->ds_instance; return cmp; } /*_________________---------------------------__________________ _________________ sfl_agent_addSampler __________________ -----------------___________________________------------------ */ SFLSampler *sfl_agent_addSampler(SFLAgent *agent, SFLDataSource_instance *pdsi) { /* Keep the list sorted. */ SFLSampler *prev = NULL, *sm = agent->samplers; for(; sm != NULL; prev = sm, sm = sm->nxt) { int64_t cmp = sfl_dsi_compare(pdsi, &sm->dsi); if(cmp == 0) return sm; /* found - return existing one */ if(cmp < 0) break; /* insert here */ } /* either we found the insert point, or reached the end of the list...*/ { SFLSampler *newsm = (SFLSampler *)sflAlloc(agent, sizeof(SFLSampler)); sfl_sampler_init(newsm, agent, pdsi); if(prev) prev->nxt = newsm; else agent->samplers = newsm; newsm->nxt = sm; /* see if we should go in the ifIndex jumpTable */ if(SFL_DS_CLASS(newsm->dsi) == 0) { SFLSampler *test = sfl_agent_getSamplerByIfIndex(agent, SFL_DS_INDEX(newsm->dsi)); if(test && (SFL_DS_INSTANCE(newsm->dsi) < SFL_DS_INSTANCE(test->dsi))) { /* replace with this new one because it has a lower ds_instance number */ sfl_agent_jumpTableRemove(agent, test); test = NULL; } if(test == NULL) sfl_agent_jumpTableAdd(agent, newsm); } return newsm; } } /*_________________---------------------------__________________ _________________ sfl_agent_addPoller __________________ -----------------___________________________------------------ */ SFLPoller *sfl_agent_addPoller(SFLAgent *agent, SFLDataSource_instance *pdsi, void *magic, /* ptr to pass back in getCountersFn() */ getCountersFn_t getCountersFn) { /* keep the list sorted */ SFLPoller *prev = NULL, *pl = agent->pollers; for(; pl != NULL; prev = pl, pl = pl->nxt) { int64_t cmp = sfl_dsi_compare(pdsi, &pl->dsi); if(cmp == 0) return pl; /* found - return existing one */ if(cmp < 0) break; /* insert here */ } /* either we found the insert point, or reached the end of the list... */ { SFLPoller *newpl = (SFLPoller *)sflAlloc(agent, sizeof(SFLPoller)); sfl_poller_init(newpl, agent, pdsi, magic, getCountersFn); if(prev) prev->nxt = newpl; else agent->pollers = newpl; newpl->nxt = pl; return newpl; } } /*_________________---------------------------__________________ _________________ sfl_agent_removeSampler __________________ -----------------___________________________------------------ */ int sfl_agent_removeSampler(SFLAgent *agent, SFLDataSource_instance *pdsi) { /* find it, unlink it and free it */ SFLSampler *prev = NULL, *sm = agent->samplers; for(; sm != NULL; prev = sm, sm = sm->nxt) { if(sfl_dsi_compare(pdsi, &sm->dsi) == 0) { if(prev == NULL) agent->samplers = sm->nxt; else prev->nxt = sm->nxt; sfl_agent_jumpTableRemove(agent, sm); sflFree(agent, sm); return 1; } } /* not found */ return 0; } /*_________________---------------------------__________________ _________________ sfl_agent_removePoller __________________ -----------------___________________________------------------ */ int sfl_agent_removePoller(SFLAgent *agent, SFLDataSource_instance *pdsi) { /* find it, unlink it and free it */ SFLPoller *prev = NULL, *pl = agent->pollers; for(; pl != NULL; prev = pl, pl = pl->nxt) { if(sfl_dsi_compare(pdsi, &pl->dsi) == 0) { if(prev == NULL) agent->pollers = pl->nxt; else prev->nxt = pl->nxt; sflFree(agent, pl); return 1; } } /* not found */ return 0; } /*_________________--------------------------------__________________ _________________ sfl_agent_jumpTableAdd __________________ -----------------________________________________------------------ */ static void sfl_agent_jumpTableAdd(SFLAgent *agent, SFLSampler *sampler) { u_int32_t hashIndex = SFL_DS_INDEX(sampler->dsi) % SFL_HASHTABLE_SIZ; sampler->hash_nxt = agent->jumpTable[hashIndex]; agent->jumpTable[hashIndex] = sampler; } /*_________________--------------------------------__________________ _________________ sfl_agent_jumpTableRemove __________________ -----------------________________________________------------------ */ static void sfl_agent_jumpTableRemove(SFLAgent *agent, SFLSampler *sampler) { u_int32_t hashIndex = SFL_DS_INDEX(sampler->dsi) % SFL_HASHTABLE_SIZ; SFLSampler *search = agent->jumpTable[hashIndex], *prev = NULL; for( ; search != NULL; prev = search, search = search->hash_nxt) if(search == sampler) break; if(search) { // found - unlink if(prev) prev->hash_nxt = search->hash_nxt; else agent->jumpTable[hashIndex] = search->hash_nxt; search->hash_nxt = NULL; } } /*_________________--------------------------------__________________ _________________ sfl_agent_getSamplerByIfIndex __________________ -----------------________________________________------------------ fast lookup (pointers cached in hash table). If there are multiple sampler instances for a given ifIndex, then this fn will return the one with the lowest instance number. Since the samplers list is sorted, this means the other instances will be accesible by following the sampler->nxt pointer (until the ds_class or ds_index changes). This is helpful if you need to offer the same flowSample to multiple samplers. */ SFLSampler *sfl_agent_getSamplerByIfIndex(SFLAgent *agent, u_int32_t ifIndex) { SFLSampler *search = agent->jumpTable[ifIndex % SFL_HASHTABLE_SIZ]; for( ; search != NULL; search = search->hash_nxt) if(SFL_DS_INDEX(search->dsi) == ifIndex) break; return search; } /*_________________---------------------------__________________ _________________ sfl_agent_getSampler __________________ -----------------___________________________------------------ */ SFLSampler *sfl_agent_getSampler(SFLAgent *agent, SFLDataSource_instance *pdsi) { /* find it and return it */ SFLSampler *sm = agent->samplers; for(; sm != NULL; sm = sm->nxt) if(sfl_dsi_compare(pdsi, &sm->dsi) == 0) return sm; /* not found */ return NULL; } /*_________________---------------------------__________________ _________________ sfl_agent_getPoller __________________ -----------------___________________________------------------ */ SFLPoller *sfl_agent_getPoller(SFLAgent *agent, SFLDataSource_instance *pdsi) { /* find it and return it */ SFLPoller *pl = agent->pollers; for(; pl != NULL; pl = pl->nxt) if(sfl_dsi_compare(pdsi, &pl->dsi) == 0) return pl; /* not found */ return NULL; } /*_________________---------------------------__________________ _________________ sfl_agent_getReceiver __________________ -----------------___________________________------------------ */ SFLReceiver *sfl_agent_getReceiver(SFLAgent *agent, u_int32_t receiverIndex) { u_int32_t rcvIdx = 0; SFLReceiver *rcv = agent->receivers; for(; rcv != NULL; rcv = rcv->nxt) if(receiverIndex == ++rcvIdx) return rcv; /* not found - ran off the end of the table */ return NULL; } /*_________________---------------------------__________________ _________________ sfl_agent_getNextSampler __________________ -----------------___________________________------------------ */ SFLSampler *sfl_agent_getNextSampler(SFLAgent *agent, SFLDataSource_instance *pdsi) { /* return the one lexograpically just after it - assume they are sorted correctly according to the lexographical ordering of the object ids */ SFLSampler *sm = sfl_agent_getSampler(agent, pdsi); return sm ? sm->nxt : NULL; } /*_________________---------------------------__________________ _________________ sfl_agent_getNextPoller __________________ -----------------___________________________------------------ */ SFLPoller *sfl_agent_getNextPoller(SFLAgent *agent, SFLDataSource_instance *pdsi) { /* return the one lexograpically just after it - assume they are sorted correctly according to the lexographical ordering of the object ids */ SFLPoller *pl = sfl_agent_getPoller(agent, pdsi); return pl ? pl->nxt : NULL; } /*_________________---------------------------__________________ _________________ sfl_agent_getNextReceiver __________________ -----------------___________________________------------------ */ SFLReceiver *sfl_agent_getNextReceiver(SFLAgent *agent, u_int32_t receiverIndex) { return sfl_agent_getReceiver(agent, receiverIndex + 1); } /*_________________---------------------------__________________ _________________ sfl_agent_resetReceiver __________________ -----------------___________________________------------------ */ void sfl_agent_resetReceiver(SFLAgent *agent, SFLReceiver *receiver) { /* tell samplers and pollers to stop sending to this receiver */ /* first get his receiverIndex */ u_int32_t rcvIdx = 0; SFLReceiver *rcv = agent->receivers; for(; rcv != NULL; rcv = rcv->nxt) { rcvIdx++; /* thanks to Diego Valverde for pointing out this bugfix */ if(rcv == receiver) { /* now tell anyone that is using it to stop */ SFLSampler *sm = agent->samplers; SFLPoller *pl = agent->pollers; for(; sm != NULL; sm = sm->nxt) if(sfl_sampler_get_sFlowFsReceiver(sm) == rcvIdx) sfl_sampler_set_sFlowFsReceiver(sm, 0); for(; pl != NULL; pl = pl->nxt) if(sfl_poller_get_sFlowCpReceiver(pl) == rcvIdx) sfl_poller_set_sFlowCpReceiver(pl, 0); break; } } } /*_________________---------------------------__________________ _________________ sfl_agent_error __________________ -----------------___________________________------------------ */ #define MAX_ERRMSG_LEN 1000 void sfl_agent_error(SFLAgent *agent, char *modName, char *msg) { char errm[MAX_ERRMSG_LEN]; snprintf(errm, sizeof errm, "sfl_agent_error: %s: %s\n", modName, msg); if(agent->errorFn) (*agent->errorFn)(agent->magic, agent, errm); else { fprintf(stderr, "%s\n", errm); fflush(stderr); } } /*_________________---------------------------__________________ _________________ sfl_agent_sysError __________________ -----------------___________________________------------------ */ void sfl_agent_sysError(SFLAgent *agent, char *modName, char *msg) { char errm[MAX_ERRMSG_LEN]; snprintf(errm, sizeof errm, "sfl_agent_sysError: %s: %s (errno = %d - %s)\n", modName, msg, errno, ovs_strerror(errno)); if(agent->errorFn) (*agent->errorFn)(agent->magic, agent, errm); else { fprintf(stderr, "%s\n", errm); fflush(stderr); } } /*_________________---------------------------__________________ _________________ alloc and free __________________ -----------------___________________________------------------ */ static void * sflAlloc(SFLAgent *agent, size_t bytes) { if(agent->allocFn) return (*agent->allocFn)(agent->magic, agent, bytes); else return SFL_ALLOC(bytes); } static void sflFree(SFLAgent *agent, void *obj) { if(agent->freeFn) (*agent->freeFn)(agent->magic, agent, obj); else SFL_FREE(obj); } openvswitch-2.0.1+git20140120/lib/sflow_api.h000066400000000000000000000321331226605124000203560ustar00rootroot00000000000000/* Copyright (c) 2002-2009 InMon Corp. Licensed under the terms of either the * Sun Industry Standards Source License 1.1, that is available at: * http://host-sflow.sourceforge.net/sissl.html * or the InMon sFlow License, that is available at: * http://www.inmon.com/technology/sflowlicense.txt */ #ifndef SFLOW_API_H #define SFLOW_API_H 1 /* define SFLOW_DO_SOCKET to 1 if you want the agent to send the packets itself, otherwise set the sendFn callback in sfl_agent_init.*/ /* #define SFLOW_DO_SOCKET */ #include #include #include #include #include #include #include /* for htonl */ #ifdef SFLOW_DO_SOCKET #include #include #include #include #endif #include "sflow.h" /* define SFLOW_SOFTWARE_SAMPLING to 1 if you need to use the sfl_sampler_takeSample routine and give it every packet */ /* #define SFLOW_SOFTWARE_SAMPLING */ /* uncomment this preprocessor flag (or compile with -DSFL_USE_32BIT_INDEX) if your ds_index numbers can ever be >= 2^30-1 (i.e. >= 0x3FFFFFFF) */ /* #define SFL_USE_32BIT_INDEX */ /* Used to combine ds_class, ds_index and instance into a single 64-bit number like this: __________________________________ | cls| index | instance | ---------------------------------- but now is opened up to a 12-byte struct to ensure that ds_index has a full 32-bit field, and to make accessing the components simpler. The macros have the same behavior as before, so this change should be transparent. The only difference is that these objects are now passed around by reference instead of by value, and the comparison is done using a fn. */ typedef struct _SFLDataSource_instance { u_int32_t ds_class; u_int32_t ds_index; u_int32_t ds_instance; } SFLDataSource_instance; #ifdef SFL_USE_32BIT_INDEX #define SFL_FLOW_SAMPLE_TYPE SFLFlow_sample_expanded #define SFL_COUNTERS_SAMPLE_TYPE SFLCounters_sample_expanded #else #define SFL_FLOW_SAMPLE_TYPE SFLFlow_sample #define SFL_COUNTERS_SAMPLE_TYPE SFLCounters_sample /* if index numbers are not going to use all 32 bits, then we can use the more compact encoding, with the dataSource class and index merged */ #define SFL_DS_DATASOURCE(dsi) (((dsi).ds_class << 24) + (dsi).ds_index) #endif #define SFL_DS_INSTANCE(dsi) (dsi).ds_instance #define SFL_DS_CLASS(dsi) (dsi).ds_class #define SFL_DS_INDEX(dsi) (dsi).ds_index #define SFL_DS_SET(dsi,clss,indx,inst) \ do { \ (dsi).ds_class = (clss); \ (dsi).ds_index = (indx); \ (dsi).ds_instance = (inst); \ } while(0) typedef struct _SFLSampleCollector { u_int32_t data[(SFL_MAX_DATAGRAM_SIZE + SFL_DATA_PAD) / sizeof(u_int32_t)]; u_int32_t *datap; /* packet fill pointer */ u_int32_t pktlen; /* accumulated size */ u_int32_t packetSeqNo; u_int32_t numSamples; } SFLSampleCollector; struct _SFLAgent; /* forward decl */ typedef struct _SFLReceiver { struct _SFLReceiver *nxt; /* MIB fields */ char *sFlowRcvrOwner; time_t sFlowRcvrTimeout; u_int32_t sFlowRcvrMaximumDatagramSize; SFLAddress sFlowRcvrAddress; u_int32_t sFlowRcvrPort; u_int32_t sFlowRcvrDatagramVersion; /* public fields */ struct _SFLAgent *agent; /* pointer to my agent */ /* private fields */ SFLSampleCollector sampleCollector; #ifdef SFLOW_DO_SOCKET struct sockaddr_in receiver4; struct sockaddr_in6 receiver6; #endif } SFLReceiver; typedef struct _SFLSampler { /* for linked list */ struct _SFLSampler *nxt; /* for hash lookup table */ struct _SFLSampler *hash_nxt; /* MIB fields */ SFLDataSource_instance dsi; u_int32_t sFlowFsReceiver; u_int32_t sFlowFsPacketSamplingRate; u_int32_t sFlowFsMaximumHeaderSize; /* public fields */ struct _SFLAgent *agent; /* pointer to my agent */ /* private fields */ SFLReceiver *myReceiver; u_int32_t skip; u_int32_t samplePool; u_int32_t flowSampleSeqNo; /* rate checking */ u_int32_t samplesThisTick; u_int32_t samplesLastTick; u_int32_t backoffThreshold; } SFLSampler; /* declare */ struct _SFLPoller; typedef void (*getCountersFn_t)(void *magic, /* callback to get counters */ struct _SFLPoller *sampler, /* called with self */ SFL_COUNTERS_SAMPLE_TYPE *cs); /* struct to fill in */ typedef struct _SFLPoller { /* for linked list */ struct _SFLPoller *nxt; /* MIB fields */ SFLDataSource_instance dsi; u_int32_t sFlowCpReceiver; time_t sFlowCpInterval; /* public fields */ struct _SFLAgent *agent; /* pointer to my agent */ void *magic; /* ptr to pass back in getCountersFn() */ getCountersFn_t getCountersFn; u_int32_t bridgePort; /* port number local to bridge */ /* private fields */ SFLReceiver *myReceiver; time_t countersCountdown; u_int32_t countersSampleSeqNo; } SFLPoller; typedef void *(*allocFn_t)(void *magic, /* callback to allocate space on heap */ struct _SFLAgent *agent, /* called with self */ size_t bytes); /* bytes requested */ typedef int (*freeFn_t)(void *magic, /* callback to free space on heap */ struct _SFLAgent *agent, /* called with self */ void *obj); /* obj to free */ typedef void (*errorFn_t)(void *magic, /* callback to log error message */ struct _SFLAgent *agent, /* called with self */ char *msg); /* error message */ typedef void (*sendFn_t)(void *magic, /* optional override fn to send packet */ struct _SFLAgent *agent, SFLReceiver *receiver, u_char *pkt, u_int32_t pktLen); /* prime numbers are good for hash tables */ #define SFL_HASHTABLE_SIZ 199 typedef struct _SFLAgent { SFLSampler *jumpTable[SFL_HASHTABLE_SIZ]; /* fast lookup table for samplers (by ifIndex) */ SFLSampler *samplers; /* the list of samplers */ SFLPoller *pollers; /* the list of samplers */ SFLReceiver *receivers; /* the array of receivers */ time_t bootTime; /* time when we booted or started */ time_t now; /* time now */ SFLAddress myIP; /* IP address of this node */ u_int32_t subId; /* sub_agent_id */ void *magic; /* ptr to pass back in logging and alloc fns */ allocFn_t allocFn; freeFn_t freeFn; errorFn_t errorFn; sendFn_t sendFn; #ifdef SFLOW_DO_SOCKET int receiverSocket4; int receiverSocket6; #endif } SFLAgent; /* call this at the start with a newly created agent */ void sfl_agent_init(SFLAgent *agent, SFLAddress *myIP, /* IP address of this agent */ u_int32_t subId, /* agent_sub_id */ time_t bootTime, /* agent boot time */ time_t now, /* time now */ void *magic, /* ptr to pass back in logging and alloc fns */ allocFn_t allocFn, freeFn_t freeFn, errorFn_t errorFn, sendFn_t sendFn); /* call this to create samplers */ SFLSampler *sfl_agent_addSampler(SFLAgent *agent, SFLDataSource_instance *pdsi); /* call this to create pollers */ SFLPoller *sfl_agent_addPoller(SFLAgent *agent, SFLDataSource_instance *pdsi, void *magic, /* ptr to pass back in getCountersFn() */ getCountersFn_t getCountersFn); /* call this to create receivers */ SFLReceiver *sfl_agent_addReceiver(SFLAgent *agent); /* call this to remove samplers */ int sfl_agent_removeSampler(SFLAgent *agent, SFLDataSource_instance *pdsi); /* call this to remove pollers */ int sfl_agent_removePoller(SFLAgent *agent, SFLDataSource_instance *pdsi); /* note: receivers should not be removed. Typically the receivers list will be created at init time and never changed */ /* call these fns to retrieve sampler, poller or receiver (e.g. for SNMP GET or GETNEXT operation) */ SFLSampler *sfl_agent_getSampler(SFLAgent *agent, SFLDataSource_instance *pdsi); SFLSampler *sfl_agent_getNextSampler(SFLAgent *agent, SFLDataSource_instance *pdsi); SFLPoller *sfl_agent_getPoller(SFLAgent *agent, SFLDataSource_instance *pdsi); SFLPoller *sfl_agent_getNextPoller(SFLAgent *agent, SFLDataSource_instance *pdsi); SFLReceiver *sfl_agent_getReceiver(SFLAgent *agent, u_int32_t receiverIndex); SFLReceiver *sfl_agent_getNextReceiver(SFLAgent *agent, u_int32_t receiverIndex); /* jump table access - for performance */ SFLSampler *sfl_agent_getSamplerByIfIndex(SFLAgent *agent, u_int32_t ifIndex); /* call these functions to GET and SET MIB values */ /* receiver */ char * sfl_receiver_get_sFlowRcvrOwner(SFLReceiver *receiver); void sfl_receiver_set_sFlowRcvrOwner(SFLReceiver *receiver, char *sFlowRcvrOwner); time_t sfl_receiver_get_sFlowRcvrTimeout(SFLReceiver *receiver); void sfl_receiver_set_sFlowRcvrTimeout(SFLReceiver *receiver, time_t sFlowRcvrTimeout); u_int32_t sfl_receiver_get_sFlowRcvrMaximumDatagramSize(SFLReceiver *receiver); void sfl_receiver_set_sFlowRcvrMaximumDatagramSize(SFLReceiver *receiver, u_int32_t sFlowRcvrMaximumDatagramSize); SFLAddress *sfl_receiver_get_sFlowRcvrAddress(SFLReceiver *receiver); void sfl_receiver_set_sFlowRcvrAddress(SFLReceiver *receiver, SFLAddress *sFlowRcvrAddress); u_int32_t sfl_receiver_get_sFlowRcvrPort(SFLReceiver *receiver); void sfl_receiver_set_sFlowRcvrPort(SFLReceiver *receiver, u_int32_t sFlowRcvrPort); /* sampler */ u_int32_t sfl_sampler_get_sFlowFsReceiver(SFLSampler *sampler); void sfl_sampler_set_sFlowFsReceiver(SFLSampler *sampler, u_int32_t sFlowFsReceiver); u_int32_t sfl_sampler_get_sFlowFsPacketSamplingRate(SFLSampler *sampler); void sfl_sampler_set_sFlowFsPacketSamplingRate(SFLSampler *sampler, u_int32_t sFlowFsPacketSamplingRate); u_int32_t sfl_sampler_get_sFlowFsMaximumHeaderSize(SFLSampler *sampler); void sfl_sampler_set_sFlowFsMaximumHeaderSize(SFLSampler *sampler, u_int32_t sFlowFsMaximumHeaderSize); u_int32_t sfl_sampler_get_samplesLastTick(SFLSampler *sampler); /* poller */ u_int32_t sfl_poller_get_sFlowCpReceiver(SFLPoller *poller); void sfl_poller_set_sFlowCpReceiver(SFLPoller *poller, u_int32_t sFlowCpReceiver); u_int32_t sfl_poller_get_sFlowCpInterval(SFLPoller *poller); void sfl_poller_set_sFlowCpInterval(SFLPoller *poller, u_int32_t sFlowCpInterval); /* fns to set the sflow agent address or sub-id */ void sfl_agent_set_agentAddress(SFLAgent *agent, SFLAddress *addr); void sfl_agent_set_agentSubId(SFLAgent *agent, u_int32_t subId); /* The poller may need a separate number to reference the local bridge port to get counters if it is not the same as the global ifIndex */ void sfl_poller_set_bridgePort(SFLPoller *poller, u_int32_t port_no); u_int32_t sfl_poller_get_bridgePort(SFLPoller *poller); /* call this to indicate a discontinuity with a counter like samplePool so that the sflow collector will ignore the next delta */ void sfl_sampler_resetFlowSeqNo(SFLSampler *sampler); /* call this to indicate a discontinuity with one or more of the counters so that the sflow collector will ignore the next delta */ void sfl_poller_resetCountersSeqNo(SFLPoller *poller); #ifdef SFLOW_SOFTWARE_SAMLING /* software sampling: call this with every packet - returns non-zero if the packet should be sampled (in which case you then call sfl_sampler_writeFlowSample()) */ int sfl_sampler_takeSample(SFLSampler *sampler); #endif /* call this to set a maximum samples-per-second threshold. If the sampler reaches this threshold it will automatically back off the sampling rate. A value of 0 disables the mechanism */ void sfl_sampler_set_backoffThreshold(SFLSampler *sampler, u_int32_t samplesPerSecond); u_int32_t sfl_sampler_get_backoffThreshold(SFLSampler *sampler); /* call this once per second (N.B. not on interrupt stack i.e. not hard real-time) */ void sfl_agent_tick(SFLAgent *agent, time_t now); /* call this with each flow sample */ void sfl_sampler_writeFlowSample(SFLSampler *sampler, SFL_FLOW_SAMPLE_TYPE *fs); /* call this to push counters samples (usually done in the getCountersFn callback) */ void sfl_poller_writeCountersSample(SFLPoller *poller, SFL_COUNTERS_SAMPLE_TYPE *cs); /* call this to deallocate resources */ void sfl_agent_release(SFLAgent *agent); /* internal fns */ void sfl_receiver_init(SFLReceiver *receiver, SFLAgent *agent); void sfl_sampler_init(SFLSampler *sampler, SFLAgent *agent, SFLDataSource_instance *pdsi); void sfl_poller_init(SFLPoller *poller, SFLAgent *agent, SFLDataSource_instance *pdsi, void *magic, getCountersFn_t getCountersFn); void sfl_receiver_tick(SFLReceiver *receiver, time_t now); void sfl_poller_tick(SFLPoller *poller, time_t now); void sfl_sampler_tick(SFLSampler *sampler, time_t now); int sfl_receiver_writeFlowSample(SFLReceiver *receiver, SFL_FLOW_SAMPLE_TYPE *fs); int sfl_receiver_writeCountersSample(SFLReceiver *receiver, SFL_COUNTERS_SAMPLE_TYPE *cs); void sfl_agent_resetReceiver(SFLAgent *agent, SFLReceiver *receiver); void sfl_agent_error(SFLAgent *agent, char *modName, char *msg); void sfl_agent_sysError(SFLAgent *agent, char *modName, char *msg); u_int32_t sfl_receiver_samplePacketsSent(SFLReceiver *receiver); #define SFL_ALLOC malloc #define SFL_FREE free #endif /* SFLOW_API_H */ openvswitch-2.0.1+git20140120/lib/sflow_poller.c000066400000000000000000000147341226605124000211040ustar00rootroot00000000000000/* Copyright (c) 2002-2009 InMon Corp. Licensed under the terms of either the * Sun Industry Standards Source License 1.1, that is available at: * http://host-sflow.sourceforge.net/sissl.html * or the InMon sFlow License, that is available at: * http://www.inmon.com/technology/sflowlicense.txt */ #include "sflow_api.h" /*_________________--------------------------__________________ _________________ sfl_poller_init __________________ -----------------__________________________------------------ */ void sfl_poller_init(SFLPoller *poller, SFLAgent *agent, SFLDataSource_instance *pdsi, void *magic, /* ptr to pass back in getCountersFn() */ getCountersFn_t getCountersFn) { /* copy the dsi in case it points to poller->dsi, which we are about to clear */ SFLDataSource_instance dsi = *pdsi; /* preserve the *nxt pointer too, in case we are resetting this poller and it is already part of the agent's linked list (thanks to Matt Woodly for pointing this out) */ SFLPoller *nxtPtr = poller->nxt; /* clear everything */ memset(poller, 0, sizeof(*poller)); /* restore the linked list ptr */ poller->nxt = nxtPtr; /* now copy in the parameters */ poller->agent = agent; poller->dsi = dsi; /* structure copy */ poller->magic = magic; poller->getCountersFn = getCountersFn; } /*_________________--------------------------__________________ _________________ reset __________________ -----------------__________________________------------------ */ static void reset(SFLPoller *poller) { SFLDataSource_instance dsi = poller->dsi; sfl_poller_init(poller, poller->agent, &dsi, poller->magic, poller->getCountersFn); } /*_________________---------------------------__________________ _________________ MIB access __________________ -----------------___________________________------------------ */ u_int32_t sfl_poller_get_sFlowCpReceiver(SFLPoller *poller) { return poller->sFlowCpReceiver; } void sfl_poller_set_sFlowCpReceiver(SFLPoller *poller, u_int32_t sFlowCpReceiver) { poller->sFlowCpReceiver = sFlowCpReceiver; if(sFlowCpReceiver == 0) reset(poller); else { /* retrieve and cache a direct pointer to my receiver */ poller->myReceiver = sfl_agent_getReceiver(poller->agent, poller->sFlowCpReceiver); } } u_int32_t sfl_poller_get_sFlowCpInterval(SFLPoller *poller) { return poller->sFlowCpInterval; } void sfl_poller_set_sFlowCpInterval(SFLPoller *poller, u_int32_t sFlowCpInterval) { poller->sFlowCpInterval = sFlowCpInterval; if(sFlowCpInterval) { /* Set the countersCountdown to be a randomly selected value between 1 and sFlowCpInterval. That way the counter polling will be desynchronised (on a 200-port switch, polling all the counters in one second could be harmful). In a large network, even this might not be ideal if time-synchroniziation between devices is close and counters are always polled on second boundaries. If 1000 different devices all send an sFlow datagram on the same second boundary it could result in an antisocial burst. However when counter-samples are packed into the export datagram they do not always result in that datagram being sent immediately. It is more likely that a subsequent packet-sample will be the one that triggers the datagram to be sent. The packet-sample events are not sychronized to any clock, so that results in excellent desynchronization (http://blog.sflow.com/2009/05/measurement-traffic.html). Another smoothing factor is that the tick() function called here is usually driven from a fairly "soft" polling loop rather than a hard real-time event. */ poller->countersCountdown = 1 + (random() % sFlowCpInterval); } else { /* Setting sFlowCpInterval to 0 disables counter polling altogether. Thanks to Andy Kitchingman for spotting this ommission. */ poller->countersCountdown = 0; } } /*_________________---------------------------------__________________ _________________ bridge port __________________ -----------------_________________________________------------------ May need a separate number to reference the local bridge port to get counters if it is not the same as the global ifIndex. */ void sfl_poller_set_bridgePort(SFLPoller *poller, u_int32_t port_no) { poller->bridgePort = port_no; } u_int32_t sfl_poller_get_bridgePort(SFLPoller *poller) { return poller->bridgePort; } /*_________________---------------------------------__________________ _________________ sequence number reset __________________ -----------------_________________________________------------------ Used to indicate a counter discontinuity so that the sflow collector will know to ignore the next delta. */ void sfl_poller_resetCountersSeqNo(SFLPoller *poller) { poller->countersSampleSeqNo = 0; } /*_________________---------------------------__________________ _________________ sfl_poller_tick __________________ -----------------___________________________------------------ */ void sfl_poller_tick(SFLPoller *poller, time_t now) { if(poller->countersCountdown == 0) return; /* counters retrieval was not enabled */ if(poller->sFlowCpReceiver == 0) return; if(--poller->countersCountdown == 0) { if(poller->getCountersFn != NULL) { /* call out for counters */ SFL_COUNTERS_SAMPLE_TYPE cs; memset(&cs, 0, sizeof(cs)); poller->getCountersFn(poller->magic, poller, &cs); /* this countersFn is expected to fill in some counter block elements and then call sfl_poller_writeCountersSample(poller, &cs); */ } /* reset the countdown */ poller->countersCountdown = poller->sFlowCpInterval; } } /*_________________---------------------------------__________________ _________________ sfl_poller_writeCountersSample __________________ -----------------_________________________________------------------ */ void sfl_poller_writeCountersSample(SFLPoller *poller, SFL_COUNTERS_SAMPLE_TYPE *cs) { /* fill in the rest of the header fields, and send to the receiver */ cs->sequence_number = ++poller->countersSampleSeqNo; #ifdef SFL_USE_32BIT_INDEX cs->ds_class = SFL_DS_CLASS(poller->dsi); cs->ds_index = SFL_DS_INDEX(poller->dsi); #else cs->source_id = SFL_DS_DATASOURCE(poller->dsi); #endif /* sent to my receiver */ if(poller->myReceiver) sfl_receiver_writeCountersSample(poller->myReceiver, cs); } openvswitch-2.0.1+git20140120/lib/sflow_receiver.c000066400000000000000000000760321226605124000214120ustar00rootroot00000000000000/* Copyright (c) 2002-2009 InMon Corp. Licensed under the terms of either the * Sun Industry Standards Source License 1.1, that is available at: * http://host-sflow.sourceforge.net/sissl.html * or the InMon sFlow License, that is available at: * http://www.inmon.com/technology/sflowlicense.txt */ #ifndef __CHECKER__ /* Don't run sparse on anything in this file. */ #include #include "sflow_api.h" static void resetSampleCollector(SFLReceiver *receiver); static void sendSample(SFLReceiver *receiver); static void sflError(SFLReceiver *receiver, char *errm); inline static void putNet32(SFLReceiver *receiver, u_int32_t val); inline static void putAddress(SFLReceiver *receiver, SFLAddress *addr); #ifdef SFLOW_DO_SOCKET static void initSocket(SFLReceiver *receiver); #endif /*_________________--------------------------__________________ _________________ sfl_receiver_init __________________ -----------------__________________________------------------ */ void sfl_receiver_init(SFLReceiver *receiver, SFLAgent *agent) { /* first clear everything */ memset(receiver, 0, sizeof(*receiver)); /* now copy in the parameters */ receiver->agent = agent; /* set defaults */ receiver->sFlowRcvrMaximumDatagramSize = SFL_DEFAULT_DATAGRAM_SIZE; receiver->sFlowRcvrPort = SFL_DEFAULT_COLLECTOR_PORT; #ifdef SFLOW_DO_SOCKET /* initialize the socket address */ initSocket(receiver); #endif /* preset some of the header fields */ receiver->sampleCollector.datap = receiver->sampleCollector.data; putNet32(receiver, SFLDATAGRAM_VERSION5); putAddress(receiver, &agent->myIP); putNet32(receiver, agent->subId); /* prepare to receive the first sample */ resetSampleCollector(receiver); } /*_________________---------------------------__________________ _________________ reset __________________ -----------------___________________________------------------ called on timeout, or when owner string is cleared */ static void reset(SFLReceiver *receiver) { // ask agent to tell samplers and pollers to stop sending samples sfl_agent_resetReceiver(receiver->agent, receiver); // reinitialize sfl_receiver_init(receiver, receiver->agent); } #ifdef SFLOW_DO_SOCKET /*_________________---------------------------__________________ _________________ initSocket __________________ -----------------___________________________------------------ */ static void initSocket(SFLReceiver *receiver) { if(receiver->sFlowRcvrAddress.type == SFLADDRESSTYPE_IP_V6) { struct sockaddr_in6 *sa6 = &receiver->receiver6; sa6->sin6_port = htons((u_int16_t)receiver->sFlowRcvrPort); sa6->sin6_family = AF_INET6; sa6->sin6_addr = receiver->sFlowRcvrAddress.address.ip_v6; } else { struct sockaddr_in *sa4 = &receiver->receiver4; sa4->sin_port = htons((u_int16_t)receiver->sFlowRcvrPort); sa4->sin_family = AF_INET; sa4->sin_addr = receiver->sFlowRcvrAddress.address.ip_v4; } } #endif /*_________________----------------------------------------_____________ _________________ MIB Vars _____________ -----------------________________________________________------------- */ char * sfl_receiver_get_sFlowRcvrOwner(SFLReceiver *receiver) { return receiver->sFlowRcvrOwner; } void sfl_receiver_set_sFlowRcvrOwner(SFLReceiver *receiver, char *sFlowRcvrOwner) { receiver->sFlowRcvrOwner = sFlowRcvrOwner; if(sFlowRcvrOwner == NULL || sFlowRcvrOwner[0] == '\0') { // reset condition! owner string was cleared reset(receiver); } } time_t sfl_receiver_get_sFlowRcvrTimeout(SFLReceiver *receiver) { return receiver->sFlowRcvrTimeout; } void sfl_receiver_set_sFlowRcvrTimeout(SFLReceiver *receiver, time_t sFlowRcvrTimeout) { receiver->sFlowRcvrTimeout =sFlowRcvrTimeout; } u_int32_t sfl_receiver_get_sFlowRcvrMaximumDatagramSize(SFLReceiver *receiver) { return receiver->sFlowRcvrMaximumDatagramSize; } void sfl_receiver_set_sFlowRcvrMaximumDatagramSize(SFLReceiver *receiver, u_int32_t sFlowRcvrMaximumDatagramSize) { u_int32_t mdz = sFlowRcvrMaximumDatagramSize; if(mdz < SFL_MIN_DATAGRAM_SIZE) mdz = SFL_MIN_DATAGRAM_SIZE; receiver->sFlowRcvrMaximumDatagramSize = mdz; } SFLAddress *sfl_receiver_get_sFlowRcvrAddress(SFLReceiver *receiver) { return &receiver->sFlowRcvrAddress; } void sfl_receiver_set_sFlowRcvrAddress(SFLReceiver *receiver, SFLAddress *sFlowRcvrAddress) { if(sFlowRcvrAddress) receiver->sFlowRcvrAddress = *sFlowRcvrAddress; // structure copy #ifdef SFLOW_DO_SOCKET initSocket(receiver); #endif } u_int32_t sfl_receiver_get_sFlowRcvrPort(SFLReceiver *receiver) { return receiver->sFlowRcvrPort; } void sfl_receiver_set_sFlowRcvrPort(SFLReceiver *receiver, u_int32_t sFlowRcvrPort) { receiver->sFlowRcvrPort = sFlowRcvrPort; // update the socket structure #ifdef SFLOW_DO_SOCKET initSocket(receiver); #endif } /*_________________---------------------------__________________ _________________ sfl_receiver_tick __________________ -----------------___________________________------------------ */ void sfl_receiver_tick(SFLReceiver *receiver, time_t now) { // if there are any samples to send, flush them now if(receiver->sampleCollector.numSamples > 0) sendSample(receiver); // check the timeout if(receiver->sFlowRcvrTimeout && (u_int32_t)receiver->sFlowRcvrTimeout != 0xFFFFFFFF) { // count down one tick and reset if we reach 0 if(--receiver->sFlowRcvrTimeout == 0) reset(receiver); } } /*_________________-----------------------------__________________ _________________ receiver write utilities __________________ -----------------_____________________________------------------ */ inline static void put32(SFLReceiver *receiver, u_int32_t val) { *receiver->sampleCollector.datap++ = val; } inline static void putNet32(SFLReceiver *receiver, u_int32_t val) { *receiver->sampleCollector.datap++ = htonl(val); } inline static void putNet32_run(SFLReceiver *receiver, void *obj, size_t quads) { u_int32_t *from = (u_int32_t *)obj; while(quads--) putNet32(receiver, *from++); } inline static void putNet64(SFLReceiver *receiver, u_int64_t val64) { u_int32_t *firstQuadPtr = receiver->sampleCollector.datap; // first copy the bytes in memcpy((u_char *)firstQuadPtr, &val64, 8); if(htonl(1) != 1) { // swap the bytes, and reverse the quads too u_int32_t tmp = *receiver->sampleCollector.datap++; *firstQuadPtr = htonl(*receiver->sampleCollector.datap); *receiver->sampleCollector.datap++ = htonl(tmp); } else receiver->sampleCollector.datap += 2; } inline static void put128(SFLReceiver *receiver, u_char *val) { memcpy(receiver->sampleCollector.datap, val, 16); receiver->sampleCollector.datap += 4; } inline static void putString(SFLReceiver *receiver, SFLString *s) { putNet32(receiver, s->len); memcpy(receiver->sampleCollector.datap, s->str, s->len); receiver->sampleCollector.datap += (s->len + 3) / 4; /* pad to 4-byte boundary */ if ((s->len % 4) != 0){ u_int8_t padding = 4 - (s->len % 4); memset(((u_int8_t*)receiver->sampleCollector.datap)-padding, 0, padding); } } inline static u_int32_t stringEncodingLength(SFLString *s) { // answer in bytes, so remember to mulitply by 4 after rounding up to nearest 4-byte boundary return 4 + (((s->len + 3) / 4) * 4); } inline static void putAddress(SFLReceiver *receiver, SFLAddress *addr) { // encode unspecified addresses as IPV4:0.0.0.0 - or should we flag this as an error? if(addr->type == 0) { putNet32(receiver, SFLADDRESSTYPE_IP_V4); put32(receiver, 0); } else { putNet32(receiver, addr->type); if(addr->type == SFLADDRESSTYPE_IP_V4) put32(receiver, addr->address.ip_v4.addr); else put128(receiver, addr->address.ip_v6.addr); } } inline static u_int32_t addressEncodingLength(SFLAddress *addr) { return (addr->type == SFLADDRESSTYPE_IP_V6) ? 20 : 8; // type + address (unspecified == IPV4) } inline static void putMACAddress(SFLReceiver *receiver, u_int8_t *mac) { memcpy(receiver->sampleCollector.datap, mac, 6); receiver->sampleCollector.datap += 2; } inline static void putSwitch(SFLReceiver *receiver, SFLExtended_switch *sw) { putNet32(receiver, sw->src_vlan); putNet32(receiver, sw->src_priority); putNet32(receiver, sw->dst_vlan); putNet32(receiver, sw->dst_priority); } inline static void putRouter(SFLReceiver *receiver, SFLExtended_router *router) { putAddress(receiver, &router->nexthop); putNet32(receiver, router->src_mask); putNet32(receiver, router->dst_mask); } inline static u_int32_t routerEncodingLength(SFLExtended_router *router) { return addressEncodingLength(&router->nexthop) + 8; } inline static void putGateway(SFLReceiver *receiver, SFLExtended_gateway *gw) { putAddress(receiver, &gw->nexthop); putNet32(receiver, gw->as); putNet32(receiver, gw->src_as); putNet32(receiver, gw->src_peer_as); putNet32(receiver, gw->dst_as_path_segments); { u_int32_t seg = 0; for(; seg < gw->dst_as_path_segments; seg++) { putNet32(receiver, gw->dst_as_path[seg].type); putNet32(receiver, gw->dst_as_path[seg].length); putNet32_run(receiver, gw->dst_as_path[seg].as.seq, gw->dst_as_path[seg].length); } } putNet32(receiver, gw->communities_length); putNet32_run(receiver, gw->communities, gw->communities_length); putNet32(receiver, gw->localpref); } inline static u_int32_t gatewayEncodingLength(SFLExtended_gateway *gw) { u_int32_t elemSiz = addressEncodingLength(&gw->nexthop); u_int32_t seg = 0; elemSiz += 16; // as, src_as, src_peer_as, dst_as_path_segments for(; seg < gw->dst_as_path_segments; seg++) { elemSiz += 8; // type, length elemSiz += 4 * gw->dst_as_path[seg].length; // set/seq bytes } elemSiz += 4; // communities_length elemSiz += 4 * gw->communities_length; // communities elemSiz += 4; // localpref return elemSiz; } inline static void putUser(SFLReceiver *receiver, SFLExtended_user *user) { putNet32(receiver, user->src_charset); putString(receiver, &user->src_user); putNet32(receiver, user->dst_charset); putString(receiver, &user->dst_user); } inline static u_int32_t userEncodingLength(SFLExtended_user *user) { return 4 + stringEncodingLength(&user->src_user) + 4 + stringEncodingLength(&user->dst_user); } inline static void putUrl(SFLReceiver *receiver, SFLExtended_url *url) { putNet32(receiver, url->direction); putString(receiver, &url->url); putString(receiver, &url->host); } inline static u_int32_t urlEncodingLength(SFLExtended_url *url) { return 4 + stringEncodingLength(&url->url) + stringEncodingLength(&url->host); } inline static void putLabelStack(SFLReceiver *receiver, SFLLabelStack *labelStack) { putNet32(receiver, labelStack->depth); putNet32_run(receiver, labelStack->stack, labelStack->depth); } inline static u_int32_t labelStackEncodingLength(SFLLabelStack *labelStack) { return 4 + (4 * labelStack->depth); } inline static void putMpls(SFLReceiver *receiver, SFLExtended_mpls *mpls) { putAddress(receiver, &mpls->nextHop); putLabelStack(receiver, &mpls->in_stack); putLabelStack(receiver, &mpls->out_stack); } inline static u_int32_t mplsEncodingLength(SFLExtended_mpls *mpls) { return addressEncodingLength(&mpls->nextHop) + labelStackEncodingLength(&mpls->in_stack) + labelStackEncodingLength(&mpls->out_stack); } inline static void putNat(SFLReceiver *receiver, SFLExtended_nat *nat) { putAddress(receiver, &nat->src); putAddress(receiver, &nat->dst); } inline static u_int32_t natEncodingLength(SFLExtended_nat *nat) { return addressEncodingLength(&nat->src) + addressEncodingLength(&nat->dst); } inline static void putMplsTunnel(SFLReceiver *receiver, SFLExtended_mpls_tunnel *tunnel) { putString(receiver, &tunnel->tunnel_lsp_name); putNet32(receiver, tunnel->tunnel_id); putNet32(receiver, tunnel->tunnel_cos); } inline static u_int32_t mplsTunnelEncodingLength(SFLExtended_mpls_tunnel *tunnel) { return stringEncodingLength(&tunnel->tunnel_lsp_name) + 8; } inline static void putMplsVc(SFLReceiver *receiver, SFLExtended_mpls_vc *vc) { putString(receiver, &vc->vc_instance_name); putNet32(receiver, vc->vll_vc_id); putNet32(receiver, vc->vc_label_cos); } inline static u_int32_t mplsVcEncodingLength(SFLExtended_mpls_vc *vc) { return stringEncodingLength( &vc->vc_instance_name) + 8; } inline static void putMplsFtn(SFLReceiver *receiver, SFLExtended_mpls_FTN *ftn) { putString(receiver, &ftn->mplsFTNDescr); putNet32(receiver, ftn->mplsFTNMask); } inline static u_int32_t mplsFtnEncodingLength(SFLExtended_mpls_FTN *ftn) { return stringEncodingLength( &ftn->mplsFTNDescr) + 4; } inline static void putMplsLdpFec(SFLReceiver *receiver, SFLExtended_mpls_LDP_FEC *ldpfec) { putNet32(receiver, ldpfec->mplsFecAddrPrefixLength); } inline static u_int32_t mplsLdpFecEncodingLength(SFLExtended_mpls_LDP_FEC *ldpfec) { return 4; } inline static void putVlanTunnel(SFLReceiver *receiver, SFLExtended_vlan_tunnel *vlanTunnel) { putLabelStack(receiver, &vlanTunnel->stack); } inline static u_int32_t vlanTunnelEncodingLength(SFLExtended_vlan_tunnel *vlanTunnel) { return labelStackEncodingLength(&vlanTunnel->stack); } inline static void putGenericCounters(SFLReceiver *receiver, SFLIf_counters *counters) { putNet32(receiver, counters->ifIndex); putNet32(receiver, counters->ifType); putNet64(receiver, counters->ifSpeed); putNet32(receiver, counters->ifDirection); putNet32(receiver, counters->ifStatus); putNet64(receiver, counters->ifInOctets); putNet32(receiver, counters->ifInUcastPkts); putNet32(receiver, counters->ifInMulticastPkts); putNet32(receiver, counters->ifInBroadcastPkts); putNet32(receiver, counters->ifInDiscards); putNet32(receiver, counters->ifInErrors); putNet32(receiver, counters->ifInUnknownProtos); putNet64(receiver, counters->ifOutOctets); putNet32(receiver, counters->ifOutUcastPkts); putNet32(receiver, counters->ifOutMulticastPkts); putNet32(receiver, counters->ifOutBroadcastPkts); putNet32(receiver, counters->ifOutDiscards); putNet32(receiver, counters->ifOutErrors); putNet32(receiver, counters->ifPromiscuousMode); } /*_________________-----------------------------__________________ _________________ computeFlowSampleSize __________________ -----------------_____________________________------------------ */ static int computeFlowSampleSize(SFLReceiver *receiver, SFL_FLOW_SAMPLE_TYPE *fs) { SFLFlow_sample_element *elem = fs->elements; #ifdef SFL_USE_32BIT_INDEX u_int siz = 52; /* tag, length, sequence_number, ds_class, ds_index, sampling_rate, sample_pool, drops, inputFormat, input, outputFormat, output, number of elements */ #else u_int siz = 40; /* tag, length, sequence_number, source_id, sampling_rate, sample_pool, drops, input, output, number of elements */ #endif fs->num_elements = 0; /* we're going to count them again even if this was set by the client */ for(; elem != NULL; elem = elem->nxt) { u_int elemSiz = 0; fs->num_elements++; siz += 8; /* tag, length */ switch(elem->tag) { case SFLFLOW_HEADER: elemSiz = 16; /* header_protocol, frame_length, stripped, header_length */ elemSiz += ((elem->flowType.header.header_length + 3) / 4) * 4; /* header, rounded up to nearest 4 bytes */ break; case SFLFLOW_ETHERNET: elemSiz = sizeof(SFLSampled_ethernet); break; case SFLFLOW_IPV4: elemSiz = sizeof(SFLSampled_ipv4); break; case SFLFLOW_IPV6: elemSiz = sizeof(SFLSampled_ipv6); break; case SFLFLOW_EX_SWITCH: elemSiz = sizeof(SFLExtended_switch); break; case SFLFLOW_EX_ROUTER: elemSiz = routerEncodingLength(&elem->flowType.router); break; case SFLFLOW_EX_GATEWAY: elemSiz = gatewayEncodingLength(&elem->flowType.gateway); break; case SFLFLOW_EX_USER: elemSiz = userEncodingLength(&elem->flowType.user); break; case SFLFLOW_EX_URL: elemSiz = urlEncodingLength(&elem->flowType.url); break; case SFLFLOW_EX_MPLS: elemSiz = mplsEncodingLength(&elem->flowType.mpls); break; case SFLFLOW_EX_NAT: elemSiz = natEncodingLength(&elem->flowType.nat); break; case SFLFLOW_EX_MPLS_TUNNEL: elemSiz = mplsTunnelEncodingLength(&elem->flowType.mpls_tunnel); break; case SFLFLOW_EX_MPLS_VC: elemSiz = mplsVcEncodingLength(&elem->flowType.mpls_vc); break; case SFLFLOW_EX_MPLS_FTN: elemSiz = mplsFtnEncodingLength(&elem->flowType.mpls_ftn); break; case SFLFLOW_EX_MPLS_LDP_FEC: elemSiz = mplsLdpFecEncodingLength(&elem->flowType.mpls_ldp_fec); break; case SFLFLOW_EX_VLAN_TUNNEL: elemSiz = vlanTunnelEncodingLength(&elem->flowType.vlan_tunnel); break; default: sflError(receiver, "unexpected packet_data_tag"); return -1; break; } // cache the element size, and accumulate it into the overall FlowSample size elem->length = elemSiz; siz += elemSiz; } return siz; } /*_________________-------------------------------__________________ _________________ sfl_receiver_writeFlowSample __________________ -----------------_______________________________------------------ */ int sfl_receiver_writeFlowSample(SFLReceiver *receiver, SFL_FLOW_SAMPLE_TYPE *fs) { int packedSize; if(fs == NULL) return -1; if((packedSize = computeFlowSampleSize(receiver, fs)) == -1) return -1; // check in case this one sample alone is too big for the datagram // in fact - if it is even half as big then we should ditch it. Very // important to avoid overruning the packet buffer. if(packedSize > (int)(receiver->sFlowRcvrMaximumDatagramSize / 2)) { sflError(receiver, "flow sample too big for datagram"); return -1; } // if the sample pkt is full enough so that this sample might put // it over the limit, then we should send it now before going on. if((receiver->sampleCollector.pktlen + packedSize) >= receiver->sFlowRcvrMaximumDatagramSize) sendSample(receiver); receiver->sampleCollector.numSamples++; #ifdef SFL_USE_32BIT_INDEX putNet32(receiver, SFLFLOW_SAMPLE_EXPANDED); #else putNet32(receiver, SFLFLOW_SAMPLE); #endif putNet32(receiver, packedSize - 8); // don't include tag and len putNet32(receiver, fs->sequence_number); #ifdef SFL_USE_32BIT_INDEX putNet32(receiver, fs->ds_class); putNet32(receiver, fs->ds_index); #else putNet32(receiver, fs->source_id); #endif putNet32(receiver, fs->sampling_rate); putNet32(receiver, fs->sample_pool); putNet32(receiver, fs->drops); #ifdef SFL_USE_32BIT_INDEX putNet32(receiver, fs->inputFormat); putNet32(receiver, fs->input); putNet32(receiver, fs->outputFormat); putNet32(receiver, fs->output); #else putNet32(receiver, fs->input); putNet32(receiver, fs->output); #endif putNet32(receiver, fs->num_elements); { SFLFlow_sample_element *elem = fs->elements; for(; elem != NULL; elem = elem->nxt) { putNet32(receiver, elem->tag); putNet32(receiver, elem->length); // length cached in computeFlowSampleSize() switch(elem->tag) { case SFLFLOW_HEADER: putNet32(receiver, elem->flowType.header.header_protocol); putNet32(receiver, elem->flowType.header.frame_length); putNet32(receiver, elem->flowType.header.stripped); putNet32(receiver, elem->flowType.header.header_length); /* the header */ memcpy(receiver->sampleCollector.datap, elem->flowType.header.header_bytes, elem->flowType.header.header_length); /* round up to multiple of 4 to preserve alignment */ receiver->sampleCollector.datap += ((elem->flowType.header.header_length + 3) / 4); break; case SFLFLOW_ETHERNET: putNet32(receiver, elem->flowType.ethernet.eth_len); putMACAddress(receiver, elem->flowType.ethernet.src_mac); putMACAddress(receiver, elem->flowType.ethernet.dst_mac); putNet32(receiver, elem->flowType.ethernet.eth_type); break; case SFLFLOW_IPV4: putNet32(receiver, elem->flowType.ipv4.length); putNet32(receiver, elem->flowType.ipv4.protocol); put32(receiver, elem->flowType.ipv4.src_ip.addr); put32(receiver, elem->flowType.ipv4.dst_ip.addr); putNet32(receiver, elem->flowType.ipv4.src_port); putNet32(receiver, elem->flowType.ipv4.dst_port); putNet32(receiver, elem->flowType.ipv4.tcp_flags); putNet32(receiver, elem->flowType.ipv4.tos); break; case SFLFLOW_IPV6: putNet32(receiver, elem->flowType.ipv6.length); putNet32(receiver, elem->flowType.ipv6.protocol); put128(receiver, elem->flowType.ipv6.src_ip.addr); put128(receiver, elem->flowType.ipv6.dst_ip.addr); putNet32(receiver, elem->flowType.ipv6.src_port); putNet32(receiver, elem->flowType.ipv6.dst_port); putNet32(receiver, elem->flowType.ipv6.tcp_flags); putNet32(receiver, elem->flowType.ipv6.priority); break; case SFLFLOW_EX_SWITCH: putSwitch(receiver, &elem->flowType.sw); break; case SFLFLOW_EX_ROUTER: putRouter(receiver, &elem->flowType.router); break; case SFLFLOW_EX_GATEWAY: putGateway(receiver, &elem->flowType.gateway); break; case SFLFLOW_EX_USER: putUser(receiver, &elem->flowType.user); break; case SFLFLOW_EX_URL: putUrl(receiver, &elem->flowType.url); break; case SFLFLOW_EX_MPLS: putMpls(receiver, &elem->flowType.mpls); break; case SFLFLOW_EX_NAT: putNat(receiver, &elem->flowType.nat); break; case SFLFLOW_EX_MPLS_TUNNEL: putMplsTunnel(receiver, &elem->flowType.mpls_tunnel); break; case SFLFLOW_EX_MPLS_VC: putMplsVc(receiver, &elem->flowType.mpls_vc); break; case SFLFLOW_EX_MPLS_FTN: putMplsFtn(receiver, &elem->flowType.mpls_ftn); break; case SFLFLOW_EX_MPLS_LDP_FEC: putMplsLdpFec(receiver, &elem->flowType.mpls_ldp_fec); break; case SFLFLOW_EX_VLAN_TUNNEL: putVlanTunnel(receiver, &elem->flowType.vlan_tunnel); break; default: sflError(receiver, "unexpected packet_data_tag"); return -1; break; } } } // sanity check assert(((u_char *)receiver->sampleCollector.datap - (u_char *)receiver->sampleCollector.data - receiver->sampleCollector.pktlen) == (u_int32_t)packedSize); // update the pktlen receiver->sampleCollector.pktlen = (u_char *)receiver->sampleCollector.datap - (u_char *)receiver->sampleCollector.data; return packedSize; } /*_________________-----------------------------__________________ _________________ computeCountersSampleSize __________________ -----------------_____________________________------------------ */ static int computeCountersSampleSize(SFLReceiver *receiver, SFL_COUNTERS_SAMPLE_TYPE *cs) { SFLCounters_sample_element *elem = cs->elements; #ifdef SFL_USE_32BIT_INDEX u_int siz = 24; /* tag, length, sequence_number, ds_class, ds_index, number of elements */ #else u_int siz = 20; /* tag, length, sequence_number, source_id, number of elements */ #endif cs->num_elements = 0; /* we're going to count them again even if this was set by the client */ for(; elem != NULL; elem = elem->nxt) { u_int elemSiz = 0; cs->num_elements++; siz += 8; /* tag, length */ switch(elem->tag) { case SFLCOUNTERS_GENERIC: elemSiz = sizeof(elem->counterBlock.generic); break; case SFLCOUNTERS_ETHERNET: elemSiz = sizeof(elem->counterBlock.ethernet); break; case SFLCOUNTERS_TOKENRING: elemSiz = sizeof(elem->counterBlock.tokenring); break; case SFLCOUNTERS_VG: elemSiz = sizeof(elem->counterBlock.vg); break; case SFLCOUNTERS_VLAN: elemSiz = sizeof(elem->counterBlock.vlan); break; default: sflError(receiver, "unexpected counters_tag"); return -1; break; } // cache the element size, and accumulate it into the overall FlowSample size elem->length = elemSiz; siz += elemSiz; } return siz; } /*_________________----------------------------------__________________ _________________ sfl_receiver_writeCountersSample __________________ -----------------__________________________________------------------ */ int sfl_receiver_writeCountersSample(SFLReceiver *receiver, SFL_COUNTERS_SAMPLE_TYPE *cs) { int packedSize; if(cs == NULL) return -1; // if the sample pkt is full enough so that this sample might put // it over the limit, then we should send it now. if((packedSize = computeCountersSampleSize(receiver, cs)) == -1) return -1; // check in case this one sample alone is too big for the datagram // in fact - if it is even half as big then we should ditch it. Very // important to avoid overruning the packet buffer. if(packedSize > (int)(receiver->sFlowRcvrMaximumDatagramSize / 2)) { sflError(receiver, "counters sample too big for datagram"); return -1; } if((receiver->sampleCollector.pktlen + packedSize) >= receiver->sFlowRcvrMaximumDatagramSize) sendSample(receiver); receiver->sampleCollector.numSamples++; #ifdef SFL_USE_32BIT_INDEX putNet32(receiver, SFLCOUNTERS_SAMPLE_EXPANDED); #else putNet32(receiver, SFLCOUNTERS_SAMPLE); #endif putNet32(receiver, packedSize - 8); // tag and length not included putNet32(receiver, cs->sequence_number); #ifdef SFL_USE_32BIT_INDEX putNet32(receiver, cs->ds_class); putNet32(receiver, cs->ds_index); #else putNet32(receiver, cs->source_id); #endif putNet32(receiver, cs->num_elements); { SFLCounters_sample_element *elem = cs->elements; for(; elem != NULL; elem = elem->nxt) { putNet32(receiver, elem->tag); putNet32(receiver, elem->length); // length cached in computeCountersSampleSize() switch(elem->tag) { case SFLCOUNTERS_GENERIC: putGenericCounters(receiver, &(elem->counterBlock.generic)); break; case SFLCOUNTERS_ETHERNET: // all these counters are 32-bit putNet32_run(receiver, &elem->counterBlock.ethernet, sizeof(elem->counterBlock.ethernet) / 4); break; case SFLCOUNTERS_TOKENRING: // all these counters are 32-bit putNet32_run(receiver, &elem->counterBlock.tokenring, sizeof(elem->counterBlock.tokenring) / 4); break; case SFLCOUNTERS_VG: // mixed sizes putNet32(receiver, elem->counterBlock.vg.dot12InHighPriorityFrames); putNet64(receiver, elem->counterBlock.vg.dot12InHighPriorityOctets); putNet32(receiver, elem->counterBlock.vg.dot12InNormPriorityFrames); putNet64(receiver, elem->counterBlock.vg.dot12InNormPriorityOctets); putNet32(receiver, elem->counterBlock.vg.dot12InIPMErrors); putNet32(receiver, elem->counterBlock.vg.dot12InOversizeFrameErrors); putNet32(receiver, elem->counterBlock.vg.dot12InDataErrors); putNet32(receiver, elem->counterBlock.vg.dot12InNullAddressedFrames); putNet32(receiver, elem->counterBlock.vg.dot12OutHighPriorityFrames); putNet64(receiver, elem->counterBlock.vg.dot12OutHighPriorityOctets); putNet32(receiver, elem->counterBlock.vg.dot12TransitionIntoTrainings); putNet64(receiver, elem->counterBlock.vg.dot12HCInHighPriorityOctets); putNet64(receiver, elem->counterBlock.vg.dot12HCInNormPriorityOctets); putNet64(receiver, elem->counterBlock.vg.dot12HCOutHighPriorityOctets); break; case SFLCOUNTERS_VLAN: // mixed sizes putNet32(receiver, elem->counterBlock.vlan.vlan_id); putNet64(receiver, elem->counterBlock.vlan.octets); putNet32(receiver, elem->counterBlock.vlan.ucastPkts); putNet32(receiver, elem->counterBlock.vlan.multicastPkts); putNet32(receiver, elem->counterBlock.vlan.broadcastPkts); putNet32(receiver, elem->counterBlock.vlan.discards); break; default: sflError(receiver, "unexpected counters_tag"); return -1; break; } } } // sanity check assert(((u_char *)receiver->sampleCollector.datap - (u_char *)receiver->sampleCollector.data - receiver->sampleCollector.pktlen) == (u_int32_t)packedSize); // update the pktlen receiver->sampleCollector.pktlen = (u_char *)receiver->sampleCollector.datap - (u_char *)receiver->sampleCollector.data; return packedSize; } /*_________________---------------------------------__________________ _________________ sfl_receiver_samplePacketsSent __________________ -----------------_________________________________------------------ */ u_int32_t sfl_receiver_samplePacketsSent(SFLReceiver *receiver) { return receiver->sampleCollector.packetSeqNo; } /*_________________---------------------------__________________ _________________ sendSample __________________ -----------------___________________________------------------ */ static void sendSample(SFLReceiver *receiver) { /* construct and send out the sample, then reset for the next one... */ /* first fill in the header with the latest values */ /* version, agent_address and sub_agent_id were pre-set. */ u_int32_t hdrIdx = (receiver->agent->myIP.type == SFLADDRESSTYPE_IP_V6) ? 7 : 4; receiver->sampleCollector.data[hdrIdx++] = htonl(++receiver->sampleCollector.packetSeqNo); /* seq no */ receiver->sampleCollector.data[hdrIdx++] = htonl((receiver->agent->now - receiver->agent->bootTime) * 1000); /* uptime */ receiver->sampleCollector.data[hdrIdx++] = htonl(receiver->sampleCollector.numSamples); /* num samples */ /* send */ if(receiver->agent->sendFn) (*receiver->agent->sendFn)(receiver->agent->magic, receiver->agent, receiver, (u_char *)receiver->sampleCollector.data, receiver->sampleCollector.pktlen); else { #ifdef SFLOW_DO_SOCKET /* send it myself */ if (receiver->sFlowRcvrAddress.type == SFLADDRESSTYPE_IP_V6) { u_int32_t soclen = sizeof(struct sockaddr_in6); int result = sendto(receiver->agent->receiverSocket6, receiver->sampleCollector.data, receiver->sampleCollector.pktlen, 0, (struct sockaddr *)&receiver->receiver6, soclen); if(result == -1 && errno != EINTR) sfl_agent_sysError(receiver->agent, "receiver", "IPv6 socket sendto error"); if(result == 0) sfl_agent_error(receiver->agent, "receiver", "IPv6 socket sendto returned 0"); } else { u_int32_t soclen = sizeof(struct sockaddr_in); int result = sendto(receiver->agent->receiverSocket4, receiver->sampleCollector.data, receiver->sampleCollector.pktlen, 0, (struct sockaddr *)&receiver->receiver4, soclen); if(result == -1 && errno != EINTR) sfl_agent_sysError(receiver->agent, "receiver", "socket sendto error"); if(result == 0) sfl_agent_error(receiver->agent, "receiver", "socket sendto returned 0"); } #endif } /* reset for the next time */ resetSampleCollector(receiver); } /*_________________---------------------------__________________ _________________ resetSampleCollector __________________ -----------------___________________________------------------ */ static void resetSampleCollector(SFLReceiver *receiver) { receiver->sampleCollector.pktlen = 0; receiver->sampleCollector.numSamples = 0; /* point the datap to just after the header */ receiver->sampleCollector.datap = (receiver->agent->myIP.type == SFLADDRESSTYPE_IP_V6) ? (receiver->sampleCollector.data + 10) : (receiver->sampleCollector.data + 7); receiver->sampleCollector.pktlen = (u_char *)receiver->sampleCollector.datap - (u_char *)receiver->sampleCollector.data; } /*_________________---------------------------__________________ _________________ sflError __________________ -----------------___________________________------------------ */ static void sflError(SFLReceiver *receiver, char *msg) { sfl_agent_error(receiver->agent, "receiver", msg); resetSampleCollector(receiver); } #endif /* !__CHECKER__ */ openvswitch-2.0.1+git20140120/lib/sflow_sampler.c000066400000000000000000000157301226605124000212470ustar00rootroot00000000000000/* Copyright (c) 2002-2009 InMon Corp. Licensed under the terms of either the * Sun Industry Standards Source License 1.1, that is available at: * http://host-sflow.sourceforge.net/sissl.html * or the InMon sFlow License, that is available at: * http://www.inmon.com/technology/sflowlicense.txt */ #include "sflow_api.h" /*_________________--------------------------__________________ _________________ sfl_sampler_init __________________ -----------------__________________________------------------ */ void sfl_sampler_init(SFLSampler *sampler, SFLAgent *agent, SFLDataSource_instance *pdsi) { /* copy the dsi in case it points to sampler->dsi, which we are about to clear. (Thanks to Jagjit Choudray of Force 10 Networks for pointing out this bug) */ SFLDataSource_instance dsi = *pdsi; /* preserve the *nxt pointer too, in case we are resetting this poller and it is already part of the agent's linked list (thanks to Matt Woodly for pointing this out, and to Andy Kitchingman for pointing out that it applies to the hash_nxt ptr too) */ SFLSampler *nxtPtr = sampler->nxt; SFLSampler *hashPtr = sampler->hash_nxt; /* clear everything */ memset(sampler, 0, sizeof(*sampler)); /* restore the linked list and hash-table ptr */ sampler->nxt = nxtPtr; sampler->hash_nxt = hashPtr; /* now copy in the parameters */ sampler->agent = agent; sampler->dsi = dsi; /* set defaults */ sampler->sFlowFsMaximumHeaderSize = SFL_DEFAULT_HEADER_SIZE; sampler->sFlowFsPacketSamplingRate = SFL_DEFAULT_SAMPLING_RATE; } /*_________________--------------------------__________________ _________________ reset __________________ -----------------__________________________------------------ */ static void reset(SFLSampler *sampler) { SFLDataSource_instance dsi = sampler->dsi; sfl_sampler_init(sampler, sampler->agent, &dsi); } /*_________________---------------------------__________________ _________________ MIB access __________________ -----------------___________________________------------------ */ u_int32_t sfl_sampler_get_sFlowFsReceiver(SFLSampler *sampler) { return sampler->sFlowFsReceiver; } void sfl_sampler_set_sFlowFsReceiver(SFLSampler *sampler, u_int32_t sFlowFsReceiver) { sampler->sFlowFsReceiver = sFlowFsReceiver; if(sFlowFsReceiver == 0) reset(sampler); else { /* retrieve and cache a direct pointer to my receiver */ sampler->myReceiver = sfl_agent_getReceiver(sampler->agent, sampler->sFlowFsReceiver); } } u_int32_t sfl_sampler_get_sFlowFsPacketSamplingRate(SFLSampler *sampler) { return sampler->sFlowFsPacketSamplingRate; } void sfl_sampler_set_sFlowFsPacketSamplingRate(SFLSampler *sampler, u_int32_t sFlowFsPacketSamplingRate) { sampler->sFlowFsPacketSamplingRate = sFlowFsPacketSamplingRate; } u_int32_t sfl_sampler_get_sFlowFsMaximumHeaderSize(SFLSampler *sampler) { return sampler->sFlowFsMaximumHeaderSize; } void sfl_sampler_set_sFlowFsMaximumHeaderSize(SFLSampler *sampler, u_int32_t sFlowFsMaximumHeaderSize) { sampler->sFlowFsMaximumHeaderSize = sFlowFsMaximumHeaderSize; } /* call this to set a maximum samples-per-second threshold. If the sampler reaches this threshold it will automatically back off the sampling rate. A value of 0 disables the mechanism */ void sfl_sampler_set_backoffThreshold(SFLSampler *sampler, u_int32_t samplesPerSecond) { sampler->backoffThreshold = samplesPerSecond; } u_int32_t sfl_sampler_get_backoffThreshold(SFLSampler *sampler) { return sampler->backoffThreshold; } u_int32_t sfl_sampler_get_samplesLastTick(SFLSampler *sampler) { return sampler->samplesLastTick; } /*_________________---------------------------------__________________ _________________ sequence number reset __________________ -----------------_________________________________------------------ Used by the agent to indicate a samplePool discontinuity so that the sflow collector will know to ignore the next delta. */ void sfl_sampler_resetFlowSeqNo(SFLSampler *sampler) { sampler->flowSampleSeqNo = 0; } /*_________________---------------------------__________________ _________________ sfl_sampler_tick __________________ -----------------___________________________------------------ */ void sfl_sampler_tick(SFLSampler *sampler, time_t now) { if(sampler->backoffThreshold && sampler->samplesThisTick > sampler->backoffThreshold) { /* automatic backoff. If using hardware sampling then this is where you have to * call out to change the sampling rate and make sure that any other registers/variables * that hold this value are updated. */ sampler->sFlowFsPacketSamplingRate *= 2; } sampler->samplesLastTick = sampler->samplesThisTick; sampler->samplesThisTick = 0; } /*_________________------------------------------__________________ _________________ sfl_sampler_writeFlowSample __________________ -----------------______________________________------------------ */ void sfl_sampler_writeFlowSample(SFLSampler *sampler, SFL_FLOW_SAMPLE_TYPE *fs) { if(fs == NULL) return; sampler->samplesThisTick++; /* increment the sequence number */ fs->sequence_number = ++sampler->flowSampleSeqNo; /* copy the other header fields in */ #ifdef SFL_USE_32BIT_INDEX fs->ds_class = SFL_DS_CLASS(sampler->dsi); fs->ds_index = SFL_DS_INDEX(sampler->dsi); #else fs->source_id = SFL_DS_DATASOURCE(sampler->dsi); #endif /* the sampling rate may have been set already. */ if(fs->sampling_rate == 0) fs->sampling_rate = sampler->sFlowFsPacketSamplingRate; /* the samplePool may be maintained upstream too. */ if( fs->sample_pool == 0) fs->sample_pool = sampler->samplePool; /* sent to my receiver */ if(sampler->myReceiver) sfl_receiver_writeFlowSample(sampler->myReceiver, fs); } #ifdef SFLOW_SOFTWARE_SAMPLING /* ================== software sampling ========================*/ /*_________________---------------------------__________________ _________________ nextRandomSkip __________________ -----------------___________________________------------------ */ inline static u_int32_t nextRandomSkip(u_int32_t mean) { if(mean == 0 || mean == 1) return 1; return ((random() % ((2 * mean) - 1)) + 1); } /*_________________---------------------------__________________ _________________ sfl_sampler_takeSample __________________ -----------------___________________________------------------ */ int sfl_sampler_takeSample(SFLSampler *sampler) { if(sampler->skip == 0) { /* first time - seed the random number generator */ srandom(SFL_DS_INDEX(sampler->dsi)); sampler->skip = nextRandomSkip(sampler->sFlowFsPacketSamplingRate); } /* increment the samplePool */ sampler->samplePool++; if(--sampler->skip == 0) { /* reached zero. Set the next skip and return true. */ sampler->skip = nextRandomSkip(sampler->sFlowFsPacketSamplingRate); return 1; } return 0; } #endif /* SFLOW_SOFTWARE_SAMPLING */ openvswitch-2.0.1+git20140120/lib/sha1.c000066400000000000000000000214031226605124000172200ustar00rootroot00000000000000/* * This file is from the Apache Portable Runtime Library. * The full upstream copyright and license statement is included below. * Modifications copyright (c) 2009, 2010 Nicira, Inc. */ /* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* This software also makes use of the following component: * * NIST Secure Hash Algorithm * heavily modified by Uwe Hollerbach uh@alumni.caltech edu * from Peter C. Gutmann's implementation as found in * Applied Cryptography by Bruce Schneier * This code is hereby placed in the public domain */ #include #include "sha1.h" #include #include #include "compiler.h" #include "util.h" /* a bit faster & bigger, if defined */ #define UNROLL_LOOPS /* SHA f()-functions */ static inline uint32_t f1(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (~x & z); } static inline uint32_t f2(uint32_t x, uint32_t y, uint32_t z) { return x ^ y ^ z; } static inline uint32_t f3(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (x & z) | (y & z); } static inline uint32_t f4(uint32_t x, uint32_t y, uint32_t z) { return x ^ y ^ z; } /* SHA constants */ #define CONST1 0x5a827999L #define CONST2 0x6ed9eba1L #define CONST3 0x8f1bbcdcL #define CONST4 0xca62c1d6L /* 32-bit rotate */ static inline uint32_t rotate32(uint32_t x, int n) { return ((x << n) | (x >> (32 - n))); } #define FUNC(n, i) \ do { \ temp = rotate32(A, 5) + f##n(B, C, D) + E + W[i] + CONST##n; \ E = D; \ D = C; \ C = rotate32(B, 30); \ B = A; \ A = temp; \ } while (0) #define SHA_BLOCK_SIZE 64 /* Do SHA transformation. */ static void sha_transform(struct sha1_ctx *sha_info) { int i; uint32_t temp, A, B, C, D, E, W[80]; for (i = 0; i < 16; ++i) { W[i] = sha_info->data[i]; } for (i = 16; i < 80; ++i) { W[i] = W[i-3] ^ W[i-8] ^ W[i-14] ^ W[i-16]; W[i] = rotate32(W[i], 1); } A = sha_info->digest[0]; B = sha_info->digest[1]; C = sha_info->digest[2]; D = sha_info->digest[3]; E = sha_info->digest[4]; #ifdef UNROLL_LOOPS FUNC(1, 0); FUNC(1, 1); FUNC(1, 2); FUNC(1, 3); FUNC(1, 4); FUNC(1, 5); FUNC(1, 6); FUNC(1, 7); FUNC(1, 8); FUNC(1, 9); FUNC(1,10); FUNC(1,11); FUNC(1,12); FUNC(1,13); FUNC(1,14); FUNC(1,15); FUNC(1,16); FUNC(1,17); FUNC(1,18); FUNC(1,19); FUNC(2,20); FUNC(2,21); FUNC(2,22); FUNC(2,23); FUNC(2,24); FUNC(2,25); FUNC(2,26); FUNC(2,27); FUNC(2,28); FUNC(2,29); FUNC(2,30); FUNC(2,31); FUNC(2,32); FUNC(2,33); FUNC(2,34); FUNC(2,35); FUNC(2,36); FUNC(2,37); FUNC(2,38); FUNC(2,39); FUNC(3,40); FUNC(3,41); FUNC(3,42); FUNC(3,43); FUNC(3,44); FUNC(3,45); FUNC(3,46); FUNC(3,47); FUNC(3,48); FUNC(3,49); FUNC(3,50); FUNC(3,51); FUNC(3,52); FUNC(3,53); FUNC(3,54); FUNC(3,55); FUNC(3,56); FUNC(3,57); FUNC(3,58); FUNC(3,59); FUNC(4,60); FUNC(4,61); FUNC(4,62); FUNC(4,63); FUNC(4,64); FUNC(4,65); FUNC(4,66); FUNC(4,67); FUNC(4,68); FUNC(4,69); FUNC(4,70); FUNC(4,71); FUNC(4,72); FUNC(4,73); FUNC(4,74); FUNC(4,75); FUNC(4,76); FUNC(4,77); FUNC(4,78); FUNC(4,79); #else /* !UNROLL_LOOPS */ for (i = 0; i < 20; ++i) { FUNC(1,i); } for (i = 20; i < 40; ++i) { FUNC(2,i); } for (i = 40; i < 60; ++i) { FUNC(3,i); } for (i = 60; i < 80; ++i) { FUNC(4,i); } #endif /* !UNROLL_LOOPS */ sha_info->digest[0] += A; sha_info->digest[1] += B; sha_info->digest[2] += C; sha_info->digest[3] += D; sha_info->digest[4] += E; } /* 'count' is the number of bytes to do an endian flip. */ static void maybe_byte_reverse(uint32_t *buffer OVS_UNUSED, int count OVS_UNUSED) { #if !WORDS_BIGENDIAN int i; uint8_t ct[4], *cp; count /= sizeof(uint32_t); cp = (uint8_t *) buffer; for (i = 0; i < count; i++) { ct[0] = cp[0]; ct[1] = cp[1]; ct[2] = cp[2]; ct[3] = cp[3]; cp[0] = ct[3]; cp[1] = ct[2]; cp[2] = ct[1]; cp[3] = ct[0]; cp += sizeof(uint32_t); } #endif } /* * Initialize the SHA digest. * context: The SHA context to initialize */ void sha1_init(struct sha1_ctx *sha_info) { sha_info->digest[0] = 0x67452301L; sha_info->digest[1] = 0xefcdab89L; sha_info->digest[2] = 0x98badcfeL; sha_info->digest[3] = 0x10325476L; sha_info->digest[4] = 0xc3d2e1f0L; sha_info->count_lo = 0L; sha_info->count_hi = 0L; sha_info->local = 0; } /* * Update the SHA digest. * context: The SHA1 context to update. * input: The buffer to add to the SHA digest. * inputLen: The length of the input buffer. */ void sha1_update(struct sha1_ctx *ctx, const void *buffer_, size_t count) { const uint8_t *buffer = buffer_; unsigned int i; if ((ctx->count_lo + (count << 3)) < ctx->count_lo) { ctx->count_hi++; } ctx->count_lo += count << 3; ctx->count_hi += count >> 29; if (ctx->local) { i = SHA_BLOCK_SIZE - ctx->local; if (i > count) { i = count; } memcpy(((uint8_t *) ctx->data) + ctx->local, buffer, i); count -= i; buffer += i; ctx->local += i; if (ctx->local == SHA_BLOCK_SIZE) { maybe_byte_reverse(ctx->data, SHA_BLOCK_SIZE); sha_transform(ctx); } else { return; } } while (count >= SHA_BLOCK_SIZE) { memcpy(ctx->data, buffer, SHA_BLOCK_SIZE); buffer += SHA_BLOCK_SIZE; count -= SHA_BLOCK_SIZE; maybe_byte_reverse(ctx->data, SHA_BLOCK_SIZE); sha_transform(ctx); } memcpy(ctx->data, buffer, count); ctx->local = count; } /* * Finish computing the SHA digest. * digest: the output buffer in which to store the digest. * context: The context to finalize. */ void sha1_final(struct sha1_ctx *ctx, uint8_t digest[SHA1_DIGEST_SIZE]) { int count, i, j; uint32_t lo_bit_count, hi_bit_count, k; lo_bit_count = ctx->count_lo; hi_bit_count = ctx->count_hi; count = (int) ((lo_bit_count >> 3) & 0x3f); ((uint8_t *) ctx->data)[count++] = 0x80; if (count > SHA_BLOCK_SIZE - 8) { memset(((uint8_t *) ctx->data) + count, 0, SHA_BLOCK_SIZE - count); maybe_byte_reverse(ctx->data, SHA_BLOCK_SIZE); sha_transform(ctx); memset((uint8_t *) ctx->data, 0, SHA_BLOCK_SIZE - 8); } else { memset(((uint8_t *) ctx->data) + count, 0, SHA_BLOCK_SIZE - 8 - count); } maybe_byte_reverse(ctx->data, SHA_BLOCK_SIZE); ctx->data[14] = hi_bit_count; ctx->data[15] = lo_bit_count; sha_transform(ctx); for (i = j = 0; j < SHA1_DIGEST_SIZE; i++) { k = ctx->digest[i]; digest[j++] = k >> 24; digest[j++] = k >> 16; digest[j++] = k >> 8; digest[j++] = k; } } /* Computes the hash of 'n' bytes in 'data' into 'digest'. */ void sha1_bytes(const void *data, size_t n, uint8_t digest[SHA1_DIGEST_SIZE]) { struct sha1_ctx ctx; sha1_init(&ctx); sha1_update(&ctx, data, n); sha1_final(&ctx, digest); } void sha1_to_hex(const uint8_t digest[SHA1_DIGEST_SIZE], char hex[SHA1_HEX_DIGEST_LEN + 1]) { int i; for (i = 0; i < SHA1_DIGEST_SIZE; i++) { *hex++ = "0123456789abcdef"[digest[i] >> 4]; *hex++ = "0123456789abcdef"[digest[i] & 15]; } *hex = '\0'; } bool sha1_from_hex(uint8_t digest[SHA1_DIGEST_SIZE], const char *hex) { int i; for (i = 0; i < SHA1_DIGEST_SIZE; i++) { bool ok; digest[i] = hexits_value(hex, 2, &ok); if (!ok) { return false; } hex += 2; } return true; } openvswitch-2.0.1+git20140120/lib/sha1.h000066400000000000000000000051731226605124000172330ustar00rootroot00000000000000/* * This file is from the Apache Portable Runtime Library. * The full upstream copyright and license statement is included below. * Modifications copyright (c) 2009 Nicira, Inc. */ /* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* NIST Secure Hash Algorithm * heavily modified by Uwe Hollerbach uh@alumni.caltech edu * from Peter C. Gutmann's implementation as found in * Applied Cryptography by Bruce Schneier * This code is hereby placed in the public domain */ #ifndef SHA1_H #define SHA1_H #include #include #include #define SHA1_DIGEST_SIZE 20 /* Size of the SHA1 digest. */ #define SHA1_HEX_DIGEST_LEN 40 /* Length of SHA1 digest as hex in ASCII. */ /* SHA1 context structure. */ struct sha1_ctx { uint32_t digest[5]; /* Message digest. */ uint32_t count_lo, count_hi; /* 64-bit bit counts. */ uint32_t data[16]; /* SHA data buffer */ int local; /* Unprocessed amount in data. */ }; void sha1_init(struct sha1_ctx *); void sha1_update(struct sha1_ctx *, const void *, size_t); void sha1_final(struct sha1_ctx *, uint8_t digest[SHA1_DIGEST_SIZE]); void sha1_bytes(const void *, size_t, uint8_t digest[SHA1_DIGEST_SIZE]); #define SHA1_FMT \ "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x" \ "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x" #define SHA1_ARGS(DIGEST) \ ((DIGEST)[0]), ((DIGEST)[1]), ((DIGEST)[2]), ((DIGEST)[3]), \ ((DIGEST)[4]), ((DIGEST)[5]), ((DIGEST)[6]), ((DIGEST)[7]), \ ((DIGEST)[8]), ((DIGEST)[9]), ((DIGEST)[10]), ((DIGEST)[11]), \ ((DIGEST)[12]), ((DIGEST)[13]), ((DIGEST)[14]), ((DIGEST)[15]), \ ((DIGEST)[16]), ((DIGEST)[17]), ((DIGEST)[18]), ((DIGEST)[19]) void sha1_to_hex(const uint8_t digest[SHA1_DIGEST_SIZE], char hex[SHA1_HEX_DIGEST_LEN + 1]); bool sha1_from_hex(uint8_t digest[SHA1_DIGEST_SIZE], const char *hex); #endif /* sha1.h */ openvswitch-2.0.1+git20140120/lib/shash.c000066400000000000000000000170611226605124000174770ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "shash.h" #include "hash.h" static struct shash_node *shash_find__(const struct shash *, const char *name, size_t name_len, size_t hash); static size_t hash_name(const char *name) { return hash_string(name, 0); } void shash_init(struct shash *sh) { hmap_init(&sh->map); } void shash_destroy(struct shash *sh) { if (sh) { shash_clear(sh); hmap_destroy(&sh->map); } } /* Like shash_destroy(), but also free() each node's 'data'. */ void shash_destroy_free_data(struct shash *sh) { if (sh) { shash_clear_free_data(sh); hmap_destroy(&sh->map); } } void shash_swap(struct shash *a, struct shash *b) { hmap_swap(&a->map, &b->map); } void shash_moved(struct shash *sh) { hmap_moved(&sh->map); } void shash_clear(struct shash *sh) { struct shash_node *node, *next; SHASH_FOR_EACH_SAFE (node, next, sh) { hmap_remove(&sh->map, &node->node); free(node->name); free(node); } } /* Like shash_clear(), but also free() each node's 'data'. */ void shash_clear_free_data(struct shash *sh) { struct shash_node *node, *next; SHASH_FOR_EACH_SAFE (node, next, sh) { hmap_remove(&sh->map, &node->node); free(node->data); free(node->name); free(node); } } bool shash_is_empty(const struct shash *shash) { return hmap_is_empty(&shash->map); } size_t shash_count(const struct shash *shash) { return hmap_count(&shash->map); } static struct shash_node * shash_add_nocopy__(struct shash *sh, char *name, const void *data, size_t hash) { struct shash_node *node = xmalloc(sizeof *node); node->name = name; node->data = CONST_CAST(void *, data); hmap_insert(&sh->map, &node->node, hash); return node; } /* It is the caller's responsibility to avoid duplicate names, if that is * desirable. */ struct shash_node * shash_add_nocopy(struct shash *sh, char *name, const void *data) { return shash_add_nocopy__(sh, name, data, hash_name(name)); } /* It is the caller's responsibility to avoid duplicate names, if that is * desirable. */ struct shash_node * shash_add(struct shash *sh, const char *name, const void *data) { return shash_add_nocopy(sh, xstrdup(name), data); } bool shash_add_once(struct shash *sh, const char *name, const void *data) { if (!shash_find(sh, name)) { shash_add(sh, name, data); return true; } else { return false; } } void shash_add_assert(struct shash *sh, const char *name, const void *data) { bool added OVS_UNUSED = shash_add_once(sh, name, data); ovs_assert(added); } /* Searches for 'name' in 'sh'. If it does not already exist, adds it along * with 'data' and returns NULL. If it does already exist, replaces its data * by 'data' and returns the data that it formerly contained. */ void * shash_replace(struct shash *sh, const char *name, const void *data) { size_t hash = hash_name(name); struct shash_node *node; node = shash_find__(sh, name, strlen(name), hash); if (!node) { shash_add_nocopy__(sh, xstrdup(name), data, hash); return NULL; } else { void *old_data = node->data; node->data = CONST_CAST(void *, data); return old_data; } } /* Deletes 'node' from 'sh' and frees the node's name. The caller is still * responsible for freeing the node's data, if necessary. */ void shash_delete(struct shash *sh, struct shash_node *node) { free(shash_steal(sh, node)); } /* Deletes 'node' from 'sh'. Neither the node's name nor its data is freed; * instead, ownership is transferred to the caller. Returns the node's * name. */ char * shash_steal(struct shash *sh, struct shash_node *node) { char *name = node->name; hmap_remove(&sh->map, &node->node); free(node); return name; } static struct shash_node * shash_find__(const struct shash *sh, const char *name, size_t name_len, size_t hash) { struct shash_node *node; HMAP_FOR_EACH_WITH_HASH (node, node, hash, &sh->map) { if (!strncmp(node->name, name, name_len) && !node->name[name_len]) { return node; } } return NULL; } /* If there are duplicates, returns a random element. */ struct shash_node * shash_find(const struct shash *sh, const char *name) { return shash_find__(sh, name, strlen(name), hash_name(name)); } /* Finds and returns a shash_node within 'sh' that has the given 'name' that is * exactly 'len' bytes long. Returns NULL if no node in 'sh' has that name. */ struct shash_node * shash_find_len(const struct shash *sh, const char *name, size_t len) { return shash_find__(sh, name, len, hash_bytes(name, len, 0)); } void * shash_find_data(const struct shash *sh, const char *name) { struct shash_node *node = shash_find(sh, name); return node ? node->data : NULL; } void * shash_find_and_delete(struct shash *sh, const char *name) { struct shash_node *node = shash_find(sh, name); if (node) { void *data = node->data; shash_delete(sh, node); return data; } else { return NULL; } } void * shash_find_and_delete_assert(struct shash *sh, const char *name) { void *data = shash_find_and_delete(sh, name); ovs_assert(data != NULL); return data; } struct shash_node * shash_first(const struct shash *shash) { struct hmap_node *node = hmap_first(&shash->map); return node ? CONTAINER_OF(node, struct shash_node, node) : NULL; } static int compare_nodes_by_name(const void *a_, const void *b_) { const struct shash_node *const *a = a_; const struct shash_node *const *b = b_; return strcmp((*a)->name, (*b)->name); } const struct shash_node ** shash_sort(const struct shash *sh) { if (shash_is_empty(sh)) { return NULL; } else { const struct shash_node **nodes; struct shash_node *node; size_t i, n; n = shash_count(sh); nodes = xmalloc(n * sizeof *nodes); i = 0; SHASH_FOR_EACH (node, sh) { nodes[i++] = node; } ovs_assert(i == n); qsort(nodes, n, sizeof *nodes, compare_nodes_by_name); return nodes; } } /* Returns true if 'a' and 'b' contain the same keys (regardless of their * values), false otherwise. */ bool shash_equal_keys(const struct shash *a, const struct shash *b) { struct shash_node *node; if (hmap_count(&a->map) != hmap_count(&b->map)) { return false; } SHASH_FOR_EACH (node, a) { if (!shash_find(b, node->name)) { return false; } } return true; } /* Chooses and returns a randomly selected node from 'sh', which must not be * empty. * * I wouldn't depend on this algorithm to be fair, since I haven't analyzed it. * But it does at least ensure that any node in 'sh' can be chosen. */ struct shash_node * shash_random_node(struct shash *sh) { return CONTAINER_OF(hmap_random_node(&sh->map), struct shash_node, node); } openvswitch-2.0.1+git20140120/lib/shash.h000066400000000000000000000047761226605124000175150ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef SHASH_H #define SHASH_H 1 #include "hmap.h" #ifdef __cplusplus extern "C" { #endif struct shash_node { struct hmap_node node; char *name; void *data; }; struct shash { struct hmap map; }; #define SHASH_INITIALIZER(SHASH) { HMAP_INITIALIZER(&(SHASH)->map) } #define SHASH_FOR_EACH(SHASH_NODE, SHASH) \ HMAP_FOR_EACH (SHASH_NODE, node, &(SHASH)->map) #define SHASH_FOR_EACH_SAFE(SHASH_NODE, NEXT, SHASH) \ HMAP_FOR_EACH_SAFE (SHASH_NODE, NEXT, node, &(SHASH)->map) void shash_init(struct shash *); void shash_destroy(struct shash *); void shash_destroy_free_data(struct shash *); void shash_swap(struct shash *, struct shash *); void shash_moved(struct shash *); void shash_clear(struct shash *); void shash_clear_free_data(struct shash *); bool shash_is_empty(const struct shash *); size_t shash_count(const struct shash *); struct shash_node *shash_add(struct shash *, const char *, const void *); struct shash_node *shash_add_nocopy(struct shash *, char *, const void *); bool shash_add_once(struct shash *, const char *, const void *); void shash_add_assert(struct shash *, const char *, const void *); void *shash_replace(struct shash *, const char *, const void *data); void shash_delete(struct shash *, struct shash_node *); char *shash_steal(struct shash *, struct shash_node *); struct shash_node *shash_find(const struct shash *, const char *); struct shash_node *shash_find_len(const struct shash *, const char *, size_t); void *shash_find_data(const struct shash *, const char *); void *shash_find_and_delete(struct shash *, const char *); void *shash_find_and_delete_assert(struct shash *, const char *); struct shash_node *shash_first(const struct shash *); const struct shash_node **shash_sort(const struct shash *); bool shash_equal_keys(const struct shash *, const struct shash *); struct shash_node *shash_random_node(struct shash *); #ifdef __cplusplus } #endif #endif /* shash.h */ openvswitch-2.0.1+git20140120/lib/signals.c000066400000000000000000000072351226605124000200330ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "signals.h" #include #include #include #include #include #include "poll-loop.h" #include "socket-util.h" #include "type-props.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(signals); #if defined(_NSIG) #define N_SIGNALS _NSIG #elif defined(NSIG) #define N_SIGNALS NSIG #else /* We could try harder to get the maximum signal number, but in practice we * only care about SIGHUP, which is normally signal 1 anyway. */ #define N_SIGNALS 32 #endif struct signal { int fds[2]; }; static struct signal signals[N_SIGNALS]; static void signal_handler(int signr); /* Sets up a handler for 'signr' and returns a structure that represents it. * * Only one handler for a given signal may be registered. */ struct signal * signal_register(int signr) { struct sigaction sa; struct signal *s; ovs_assert(signr >= 1 && signr < N_SIGNALS); /* Create a pipe. */ s = &signals[signr]; ovs_assert(!s->fds[0] && !s->fds[1]); xpipe_nonblocking(s->fds); /* Install signal handler. */ memset(&sa, 0, sizeof sa); sa.sa_handler = signal_handler; sigemptyset(&sa.sa_mask); sa.sa_flags = SA_RESTART; xsigaction(signr, &sa, NULL); return s; } /* Returns true if signal 's' has been received since the last call to this * function with argument 's'. */ bool signal_poll(struct signal *s) { char buf[_POSIX_PIPE_BUF]; return read(s->fds[0], buf, sizeof buf) > 0; } /* Causes the next call to poll_block() to wake up when signal_poll(s) would * return true. */ void signal_wait(struct signal *s) { poll_fd_wait(s->fds[0], POLLIN); } static void signal_handler(int signr) { if (signr >= 1 && signr < N_SIGNALS) { ignore(write(signals[signr].fds[1], "", 1)); } } /* Returns the name of signal 'signum' as a string. The return value is either * a statically allocated constant string or the 'bufsize'-byte buffer * 'namebuf'. 'bufsize' should be at least SIGNAL_NAME_BUFSIZE. * * The string is probably a (possibly multi-word) description of the signal * (e.g. "Hangup") instead of just the stringified version of the macro * (e.g. "SIGHUP"). */ const char * signal_name(int signum, char *namebuf, size_t bufsize) { #if HAVE_DECL_SYS_SIGLIST if (signum >= 0 && signum < ARRAY_SIZE(sys_siglist)) { const char *name = sys_siglist[signum]; if (name) { return name; } } #endif snprintf(namebuf, bufsize, "signal %d", signum); return namebuf; } void xsigaction(int signum, const struct sigaction *new, struct sigaction *old) { if (sigaction(signum, new, old)) { char namebuf[SIGNAL_NAME_BUFSIZE]; VLOG_FATAL("sigaction(%s) failed (%s)", signal_name(signum, namebuf, sizeof namebuf), ovs_strerror(errno)); } } void xpthread_sigmask(int how, const sigset_t *new, sigset_t *old) { int error = pthread_sigmask(how, new, old); if (error) { VLOG_FATAL("pthread_sigmask failed (%s)", ovs_strerror(error)); } } openvswitch-2.0.1+git20140120/lib/signals.h000066400000000000000000000021711226605124000200320ustar00rootroot00000000000000/* * Copyright (c) 2008, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef SIGNALS_H #define SIGNALS_H 1 #include #include #include #include "type-props.h" struct signal *signal_register(int signr); bool signal_poll(struct signal *); void signal_wait(struct signal *); enum { SIGNAL_NAME_BUFSIZE = 7 + INT_STRLEN(int) + 1 }; const char *signal_name(int signum, char *namebuf, size_t bufsize); void xsigaction(int signum, const struct sigaction *, struct sigaction *old); void xpthread_sigmask(int how, const sigset_t *, sigset_t *old); #endif /* signals.h */ openvswitch-2.0.1+git20140120/lib/simap.c000066400000000000000000000164371226605124000175100ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "simap.h" #include "hash.h" static size_t hash_name(const char *, size_t length); static struct simap_node *simap_find__(const struct simap *, const char *name, size_t name_len, size_t hash); static struct simap_node *simap_add_nocopy__(struct simap *, char *name, unsigned int data, size_t hash); static int compare_nodes_by_name(const void *a_, const void *b_); /* Initializes 'simap' as an empty string-to-integer map. */ void simap_init(struct simap *simap) { hmap_init(&simap->map); } /* Frees all the data that 'simap' contains. */ void simap_destroy(struct simap *simap) { if (simap) { simap_clear(simap); hmap_destroy(&simap->map); } } /* Exchanges the contents of 'a' and 'b'. */ void simap_swap(struct simap *a, struct simap *b) { hmap_swap(&a->map, &b->map); } /* Adjusts 'simap' so that it is still valid after it has been moved around in * memory (e.g. due to realloc()). */ void simap_moved(struct simap *simap) { hmap_moved(&simap->map); } /* Removes all of the mappings from 'simap' and frees them. */ void simap_clear(struct simap *simap) { struct simap_node *node, *next; SIMAP_FOR_EACH_SAFE (node, next, simap) { hmap_remove(&simap->map, &node->node); free(node->name); free(node); } } /* Returns true if 'simap' contains no mappings, false if it contains at least * one. */ bool simap_is_empty(const struct simap *simap) { return hmap_is_empty(&simap->map); } /* Returns the number of mappings in 'simap'. */ size_t simap_count(const struct simap *simap) { return hmap_count(&simap->map); } /* Inserts a mapping from 'name' to 'data' into 'simap', replacing any * existing mapping for 'name'. Returns true if a new mapping was added, * false if an existing mapping's value was replaced. * * The caller retains ownership of 'name'. */ bool simap_put(struct simap *simap, const char *name, unsigned int data) { size_t length = strlen(name); size_t hash = hash_name(name, length); struct simap_node *node; node = simap_find__(simap, name, length, hash); if (node) { node->data = data; return false; } else { simap_add_nocopy__(simap, xmemdup0(name, length), data, hash); return true; } } /* Increases the data value in the mapping for 'name' by 'amt', or inserts a * mapping from 'name' to 'amt' if no such mapping exists. Returns the * new total data value for the mapping. * * If 'amt' is zero, this function does nothing and returns 0. That is, this * function won't create a mapping with a initial value of 0. * * The caller retains ownership of 'name'. */ unsigned int simap_increase(struct simap *simap, const char *name, unsigned int amt) { if (amt) { size_t length = strlen(name); size_t hash = hash_name(name, length); struct simap_node *node; node = simap_find__(simap, name, length, hash); if (node) { node->data += amt; } else { node = simap_add_nocopy__(simap, xmemdup0(name, length), amt, hash); } return node->data; } else { return 0; } } /* Deletes 'node' from 'simap' and frees its associated memory. */ void simap_delete(struct simap *simap, struct simap_node *node) { hmap_remove(&simap->map, &node->node); free(node->name); free(node); } /* Searches for 'name' in 'simap'. If found, deletes it and returns true. If * not found, returns false without modifying 'simap'. */ bool simap_find_and_delete(struct simap *simap, const char *name) { struct simap_node *node = simap_find(simap, name); if (node) { simap_delete(simap, node); return true; } return false; } /* Searches 'simap' for a mapping with the given 'name'. Returns it, if found, * or a null pointer if not. */ struct simap_node * simap_find(const struct simap *simap, const char *name) { return simap_find_len(simap, name, strlen(name)); } /* Searches 'simap' for a mapping whose name is the first 'name_len' bytes * starting at 'name'. Returns it, if found, or a null pointer if not. */ struct simap_node * simap_find_len(const struct simap *simap, const char *name, size_t len) { return simap_find__(simap, name, len, hash_name(name, len)); } /* Searches 'simap' for a mapping with the given 'name'. Returns the * associated data value, if found, otherwise zero. */ unsigned int simap_get(const struct simap *simap, const char *name) { struct simap_node *node = simap_find(simap, name); return node ? node->data : 0; } /* Returns true if 'simap' contains a copy of 'name', false otherwise. */ bool simap_contains(const struct simap *simap, const char *name) { return simap_find(simap, name) != NULL; } /* Returns an array that contains a pointer to each mapping in 'simap', * ordered alphabetically by name. The returned array has simap_count(simap) * elements. * * The caller is responsible for freeing the returned array (with free()). It * should not free the individual "simap_node"s in the array, because they are * still part of 'simap'. */ const struct simap_node ** simap_sort(const struct simap *simap) { if (simap_is_empty(simap)) { return NULL; } else { const struct simap_node **nodes; struct simap_node *node; size_t i, n; n = simap_count(simap); nodes = xmalloc(n * sizeof *nodes); i = 0; SIMAP_FOR_EACH (node, simap) { nodes[i++] = node; } ovs_assert(i == n); qsort(nodes, n, sizeof *nodes, compare_nodes_by_name); return nodes; } } static size_t hash_name(const char *name, size_t length) { return hash_bytes(name, length, 0); } static struct simap_node * simap_find__(const struct simap *simap, const char *name, size_t name_len, size_t hash) { struct simap_node *node; HMAP_FOR_EACH_WITH_HASH (node, node, hash, &simap->map) { if (!strncmp(node->name, name, name_len) && !node->name[name_len]) { return node; } } return NULL; } static struct simap_node * simap_add_nocopy__(struct simap *simap, char *name, unsigned int data, size_t hash) { struct simap_node *node = xmalloc(sizeof *node); node->name = name; node->data = data; hmap_insert(&simap->map, &node->node, hash); return node; } static int compare_nodes_by_name(const void *a_, const void *b_) { const struct simap_node *const *a = a_; const struct simap_node *const *b = b_; return strcmp((*a)->name, (*b)->name); } openvswitch-2.0.1+git20140120/lib/simap.h000066400000000000000000000042331226605124000175040ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef SIMAP_H #define SIMAP_H 1 #include "hmap.h" #ifdef __cplusplus extern "C" { #endif /* A map from strings to unsigned integers. */ struct simap { struct hmap map; /* Contains "struct simap_node"s. */ }; struct simap_node { struct hmap_node node; /* In struct simap's 'map' hmap. */ char *name; unsigned int data; }; #define SIMAP_INITIALIZER(SIMAP) { HMAP_INITIALIZER(&(SIMAP)->map) } #define SIMAP_FOR_EACH(SIMAP_NODE, SIMAP) \ HMAP_FOR_EACH (SIMAP_NODE, node, &(SIMAP)->map) #define SIMAP_FOR_EACH_SAFE(SIMAP_NODE, NEXT, SIMAP) \ HMAP_FOR_EACH_SAFE (SIMAP_NODE, NEXT, node, &(SIMAP)->map) void simap_init(struct simap *); void simap_destroy(struct simap *); void simap_swap(struct simap *, struct simap *); void simap_moved(struct simap *); void simap_clear(struct simap *); bool simap_is_empty(const struct simap *); size_t simap_count(const struct simap *); bool simap_put(struct simap *, const char *, unsigned int); unsigned int simap_increase(struct simap *, const char *, unsigned int); unsigned int simap_get(const struct simap *, const char *); struct simap_node *simap_find(const struct simap *, const char *); struct simap_node *simap_find_len(const struct simap *, const char *, size_t len); bool simap_contains(const struct simap *, const char *); void simap_delete(struct simap *, struct simap_node *); bool simap_find_and_delete(struct simap *, const char *); const struct simap_node **simap_sort(const struct simap *); #ifdef __cplusplus } #endif #endif /* simap.h */ openvswitch-2.0.1+git20140120/lib/smap.c000066400000000000000000000205251226605124000173300ustar00rootroot00000000000000/* Copyright (c) 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "smap.h" #include #include "hash.h" #include "json.h" static struct smap_node *smap_add__(struct smap *, char *, void *, size_t hash); static struct smap_node *smap_find__(const struct smap *, const char *key, size_t key_len, size_t hash); static int compare_nodes_by_key(const void *, const void *); /* Public Functions. */ void smap_init(struct smap *smap) { hmap_init(&smap->map); } void smap_destroy(struct smap *smap) { if (smap) { smap_clear(smap); hmap_destroy(&smap->map); } } /* Adds 'key' paired with 'value' to 'smap'. It is the caller's responsibility * to avoid duplicate keys if desirable. */ struct smap_node * smap_add(struct smap *smap, const char *key, const char *value) { size_t key_len = strlen(key); return smap_add__(smap, xmemdup0(key, key_len), xstrdup(value), hash_bytes(key, key_len, 0)); } /* Attempts to add 'key' to 'smap' associated with 'value'. If 'key' already * exists in 'smap', does nothing and returns false. Otherwise, performs the * addition and returns true. */ bool smap_add_once(struct smap *smap, const char *key, const char *value) { if (!smap_get(smap, key)) { smap_add(smap, key, value); return true; } else { return false; } } /* Adds 'key' paired with a value derived from 'format' (similar to printf). * It is the caller's responsibility to avoid duplicate keys if desirable. */ void smap_add_format(struct smap *smap, const char *key, const char *format, ...) { size_t key_len; va_list args; char *value; va_start(args, format); value = xvasprintf(format, args); va_end(args); key_len = strlen(key); smap_add__(smap, xmemdup0(key, key_len), value, hash_bytes(key, key_len, 0)); } /* Searches for 'key' in 'smap'. If it does not already exists, adds it. * Otherwise, changes its value to 'value'. */ void smap_replace(struct smap *smap, const char *key, const char *value) { size_t key_len = strlen(key); size_t hash = hash_bytes(key, key_len, 0); struct smap_node *node; node = smap_find__(smap, key, key_len, hash); if (node) { free(node->value); node->value = xstrdup(value); } else { smap_add__(smap, xmemdup0(key, key_len), xstrdup(value), hash); } } /* If 'key' is in 'smap', removes it. Otherwise does nothing. */ void smap_remove(struct smap *smap, const char *key) { struct smap_node *node = smap_get_node(smap, key); if (node) { smap_remove_node(smap, node); } } /* Removes 'node' from 'smap'. */ void smap_remove_node(struct smap *smap, struct smap_node *node) { hmap_remove(&smap->map, &node->node); free(node->key); free(node->value); free(node); } /* Deletes 'node' from 'smap'. * * If 'keyp' is nonnull, stores the node's key in '*keyp' and transfers * ownership to the caller. Otherwise, frees the node's key. Similarly for * 'valuep' and the node's value. */ void smap_steal(struct smap *smap, struct smap_node *node, char **keyp, char **valuep) { if (keyp) { *keyp = node->key; } else { free(node->key); } if (valuep) { *valuep = node->value; } else { free(node->value); } hmap_remove(&smap->map, &node->node); free(node); } /* Removes all key-value pairs from 'smap'. */ void smap_clear(struct smap *smap) { struct smap_node *node, *next; SMAP_FOR_EACH_SAFE (node, next, smap) { smap_remove_node(smap, node); } } /* Returns the value associated with 'key' in 'smap', or NULL. */ const char * smap_get(const struct smap *smap, const char *key) { struct smap_node *node = smap_get_node(smap, key); return node ? node->value : NULL; } /* Returns the node associated with 'key' in 'smap', or NULL. */ struct smap_node * smap_get_node(const struct smap *smap, const char *key) { size_t key_len = strlen(key); return smap_find__(smap, key, key_len, hash_bytes(key, key_len, 0)); } /* Gets the value associated with 'key' in 'smap' and converts it to a boolean. * If 'key' is not in 'smap', or its value is neither "true" nor "false", * returns 'def'. */ bool smap_get_bool(const struct smap *smap, const char *key, bool def) { const char *value = smap_get(smap, key); if (!value) { return def; } if (def) { return strcasecmp("false", value) != 0; } else { return !strcasecmp("true", value); } } /* Gets the value associated with 'key' in 'smap' and converts it to an int * using atoi(). If 'key' is not in 'smap', returns 'def'. */ int smap_get_int(const struct smap *smap, const char *key, int def) { const char *value = smap_get(smap, key); return value ? atoi(value) : def; } /* Returns true of there are no elements in 'smap'. */ bool smap_is_empty(const struct smap *smap) { return hmap_is_empty(&smap->map); } /* Returns the number of elements in 'smap'. */ size_t smap_count(const struct smap *smap) { return hmap_count(&smap->map); } /* Initializes 'dst' as a clone of 'src. */ void smap_clone(struct smap *dst, const struct smap *src) { const struct smap_node *node; smap_init(dst); SMAP_FOR_EACH (node, src) { smap_add__(dst, xstrdup(node->key), xstrdup(node->value), node->node.hash); } } /* Returns an array of nodes sorted on key or NULL if 'smap' is empty. The * caller is responsible for freeing this array. */ const struct smap_node ** smap_sort(const struct smap *smap) { if (smap_is_empty(smap)) { return NULL; } else { const struct smap_node **nodes; struct smap_node *node; size_t i, n; n = smap_count(smap); nodes = xmalloc(n * sizeof *nodes); i = 0; SMAP_FOR_EACH (node, smap) { nodes[i++] = node; } ovs_assert(i == n); qsort(nodes, n, sizeof *nodes, compare_nodes_by_key); return nodes; } } /* Adds each of the key-value pairs from 'json' (which must be a JSON object * whose values are strings) to 'smap'. * * The caller must have initialized 'smap'. * * The caller retains ownership of 'json' and everything in it. */ void smap_from_json(struct smap *smap, const struct json *json) { const struct shash_node *node; SHASH_FOR_EACH (node, json_object(json)) { const struct json *value = node->data; smap_add(smap, node->name, json_string(value)); } } /* Returns a JSON object that maps from the keys in 'smap' to their values. * * The caller owns the returned value and must eventually json_destroy() it. * * The caller retains ownership of 'smap' and everything in it. */ struct json * smap_to_json(const struct smap *smap) { const struct smap_node *node; struct json *json; json = json_object_create(); SMAP_FOR_EACH (node, smap) { json_object_put_string(json, node->key, node->value); } return json; } /* Private Helpers. */ static struct smap_node * smap_add__(struct smap *smap, char *key, void *value, size_t hash) { struct smap_node *node = xmalloc(sizeof *node); node->key = key; node->value = value; hmap_insert(&smap->map, &node->node, hash); return node; } static struct smap_node * smap_find__(const struct smap *smap, const char *key, size_t key_len, size_t hash) { struct smap_node *node; HMAP_FOR_EACH_WITH_HASH (node, node, hash, &smap->map) { if (!strncmp(node->key, key, key_len) && !node->key[key_len]) { return node; } } return NULL; } static int compare_nodes_by_key(const void *a_, const void *b_) { const struct smap_node *const *a = a_; const struct smap_node *const *b = b_; return strcmp((*a)->key, (*b)->key); } openvswitch-2.0.1+git20140120/lib/smap.h000066400000000000000000000044551226605124000173410ustar00rootroot00000000000000/* Copyright (c) 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef SMAP_H #define SMAP_H 1 #include "hmap.h" struct json; /* A map from string to string. */ struct smap { struct hmap map; /* Contains "struct smap_node"s. */ }; struct smap_node { struct hmap_node node; /* In struct smap's 'map' hmap. */ char *key; char *value; }; #define SMAP_INITIALIZER(SMAP) { HMAP_INITIALIZER(&(SMAP)->map) } #define SMAP_FOR_EACH(SMAP_NODE, SMAP) \ HMAP_FOR_EACH (SMAP_NODE, node, &(SMAP)->map) #define SMAP_FOR_EACH_SAFE(SMAP_NODE, NEXT, SMAP) \ HMAP_FOR_EACH_SAFE (SMAP_NODE, NEXT, node, &(SMAP)->map) void smap_init(struct smap *); void smap_destroy(struct smap *); struct smap_node *smap_add(struct smap *, const char *, const char *); bool smap_add_once(struct smap *, const char *, const char *); void smap_add_format(struct smap *, const char *key, const char *, ...) PRINTF_FORMAT(3, 4); void smap_replace(struct smap *, const char *, const char *); void smap_remove(struct smap *, const char *); void smap_remove_node(struct smap *, struct smap_node *); void smap_steal(struct smap *, struct smap_node *, char **keyp, char **valuep); void smap_clear(struct smap *); const char *smap_get(const struct smap *, const char *); struct smap_node *smap_get_node(const struct smap *, const char *); bool smap_get_bool(const struct smap *smap, const char *key, bool def); int smap_get_int(const struct smap *smap, const char *key, int def); bool smap_is_empty(const struct smap *); size_t smap_count(const struct smap *); void smap_clone(struct smap *dst, const struct smap *src); const struct smap_node **smap_sort(const struct smap *); void smap_from_json(struct smap *, const struct json *); struct json *smap_to_json(const struct smap *); #endif /* smap.h */ openvswitch-2.0.1+git20140120/lib/socket-util.c000066400000000000000000001170541226605124000206370ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "socket-util.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "dynamic-string.h" #include "fatal-signal.h" #include "packets.h" #include "poll-loop.h" #include "util.h" #include "vlog.h" #if AF_PACKET && LINUX_DATAPATH #include #endif #ifdef HAVE_NETLINK #include "netlink-protocol.h" #include "netlink-socket.h" #endif VLOG_DEFINE_THIS_MODULE(socket_util); /* #ifdefs make it a pain to maintain code: you have to try to build both ways. * Thus, this file compiles all of the code regardless of the target, by * writing "if (LINUX_DATAPATH)" instead of "#ifdef __linux__". */ #ifndef LINUX_DATAPATH #define LINUX_DATAPATH 0 #endif #ifndef O_DIRECTORY #define O_DIRECTORY 0 #endif static int getsockopt_int(int fd, int level, int option, const char *optname, int *valuep); /* Sets 'fd' to non-blocking mode. Returns 0 if successful, otherwise a * positive errno value. */ int set_nonblocking(int fd) { int flags = fcntl(fd, F_GETFL, 0); if (flags != -1) { if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) != -1) { return 0; } else { VLOG_ERR("fcntl(F_SETFL) failed: %s", ovs_strerror(errno)); return errno; } } else { VLOG_ERR("fcntl(F_GETFL) failed: %s", ovs_strerror(errno)); return errno; } } void xset_nonblocking(int fd) { if (set_nonblocking(fd)) { exit(EXIT_FAILURE); } } int set_dscp(int fd, uint8_t dscp) { int val; if (dscp > 63) { return EINVAL; } val = dscp << 2; if (setsockopt(fd, IPPROTO_IP, IP_TOS, &val, sizeof val)) { return errno; } return 0; } static bool rlim_is_finite(rlim_t limit) { if (limit == RLIM_INFINITY) { return false; } #ifdef RLIM_SAVED_CUR /* FreeBSD 8.0 lacks RLIM_SAVED_CUR. */ if (limit == RLIM_SAVED_CUR) { return false; } #endif #ifdef RLIM_SAVED_MAX /* FreeBSD 8.0 lacks RLIM_SAVED_MAX. */ if (limit == RLIM_SAVED_MAX) { return false; } #endif return true; } /* Returns the maximum valid FD value, plus 1. */ int get_max_fds(void) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; static int max_fds; if (ovsthread_once_start(&once)) { struct rlimit r; if (!getrlimit(RLIMIT_NOFILE, &r) && rlim_is_finite(r.rlim_cur)) { max_fds = r.rlim_cur; } else { VLOG_WARN("failed to obtain fd limit, defaulting to 1024"); max_fds = 1024; } ovsthread_once_done(&once); } return max_fds; } /* Translates 'host_name', which must be a string representation of an IP * address, into a numeric IP address in '*addr'. Returns 0 if successful, * otherwise a positive errno value. */ int lookup_ip(const char *host_name, struct in_addr *addr) { if (!inet_aton(host_name, addr)) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_ERR_RL(&rl, "\"%s\" is not a valid IP address", host_name); return ENOENT; } return 0; } /* Translates 'host_name', which must be a string representation of an IPv6 * address, into a numeric IPv6 address in '*addr'. Returns 0 if successful, * otherwise a positive errno value. */ int lookup_ipv6(const char *host_name, struct in6_addr *addr) { if (inet_pton(AF_INET6, host_name, addr) != 1) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_ERR_RL(&rl, "\"%s\" is not a valid IPv6 address", host_name); return ENOENT; } return 0; } /* Translates 'host_name', which must be a host name or a string representation * of an IP address, into a numeric IP address in '*addr'. Returns 0 if * successful, otherwise a positive errno value. * * Most Open vSwitch code should not use this because it causes deadlocks: * getaddrinfo() sends out a DNS request but that starts a new flow for which * OVS must set up a flow, but it can't because it's waiting for a DNS reply. * The synchronous lookup also delays other activity. (Of course we can solve * this but it doesn't seem worthwhile quite yet.) */ int lookup_hostname(const char *host_name, struct in_addr *addr) { struct addrinfo *result; struct addrinfo hints; if (inet_aton(host_name, addr)) { return 0; } memset(&hints, 0, sizeof hints); hints.ai_family = AF_INET; switch (getaddrinfo(host_name, NULL, &hints, &result)) { case 0: *addr = ALIGNED_CAST(struct sockaddr_in *, result->ai_addr)->sin_addr; freeaddrinfo(result); return 0; #ifdef EAI_ADDRFAMILY case EAI_ADDRFAMILY: #endif case EAI_NONAME: case EAI_SERVICE: return ENOENT; case EAI_AGAIN: return EAGAIN; case EAI_BADFLAGS: case EAI_FAMILY: case EAI_SOCKTYPE: return EINVAL; case EAI_FAIL: return EIO; case EAI_MEMORY: return ENOMEM; #ifdef EAI_NODATA case EAI_NODATA: return ENXIO; #endif case EAI_SYSTEM: return errno; default: return EPROTO; } } int check_connection_completion(int fd) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 10); struct pollfd pfd; int retval; pfd.fd = fd; pfd.events = POLLOUT; do { retval = poll(&pfd, 1, 0); } while (retval < 0 && errno == EINTR); if (retval == 1) { if (pfd.revents & POLLERR) { ssize_t n = send(fd, "", 1, MSG_DONTWAIT); if (n < 0) { return errno; } else { VLOG_ERR_RL(&rl, "poll return POLLERR but send succeeded"); return EPROTO; } } return 0; } else if (retval < 0) { VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(errno)); return errno; } else { return EAGAIN; } } /* Drain all the data currently in the receive queue of a datagram socket (and * possibly additional data). There is no way to know how many packets are in * the receive queue, but we do know that the total number of bytes queued does * not exceed the receive buffer size, so we pull packets until none are left * or we've read that many bytes. */ int drain_rcvbuf(int fd) { int rcvbuf; rcvbuf = get_socket_rcvbuf(fd); if (rcvbuf < 0) { return -rcvbuf; } while (rcvbuf > 0) { /* In Linux, specifying MSG_TRUNC in the flags argument causes the * datagram length to be returned, even if that is longer than the * buffer provided. Thus, we can use a 1-byte buffer to discard the * incoming datagram and still be able to account how many bytes were * removed from the receive buffer. * * On other Unix-like OSes, MSG_TRUNC has no effect in the flags * argument. */ char buffer[LINUX_DATAPATH ? 1 : 2048]; ssize_t n_bytes = recv(fd, buffer, sizeof buffer, MSG_TRUNC | MSG_DONTWAIT); if (n_bytes <= 0 || n_bytes >= rcvbuf) { break; } rcvbuf -= n_bytes; } return 0; } /* Returns the size of socket 'sock''s receive buffer (SO_RCVBUF), or a * negative errno value if an error occurs. */ int get_socket_rcvbuf(int sock) { int rcvbuf; int error; error = getsockopt_int(sock, SOL_SOCKET, SO_RCVBUF, "SO_RCVBUF", &rcvbuf); return error ? -error : rcvbuf; } /* Reads and discards up to 'n' datagrams from 'fd', stopping as soon as no * more data can be immediately read. ('fd' should therefore be in * non-blocking mode.)*/ void drain_fd(int fd, size_t n_packets) { for (; n_packets > 0; n_packets--) { /* 'buffer' only needs to be 1 byte long in most circumstances. This * size is defensive against the possibility that we someday want to * use a Linux tap device without TUN_NO_PI, in which case a buffer * smaller than sizeof(struct tun_pi) will give EINVAL on read. */ char buffer[128]; if (read(fd, buffer, sizeof buffer) <= 0) { break; } } } /* Stores in '*un' a sockaddr_un that refers to file 'name'. Stores in * '*un_len' the size of the sockaddr_un. */ static void make_sockaddr_un__(const char *name, struct sockaddr_un *un, socklen_t *un_len) { un->sun_family = AF_UNIX; ovs_strzcpy(un->sun_path, name, sizeof un->sun_path); *un_len = (offsetof(struct sockaddr_un, sun_path) + strlen (un->sun_path) + 1); } /* Stores in '*un' a sockaddr_un that refers to file 'name'. Stores in * '*un_len' the size of the sockaddr_un. * * Returns 0 on success, otherwise a positive errno value. On success, * '*dirfdp' is either -1 or a nonnegative file descriptor that the caller * should close after using '*un' to bind or connect. On failure, '*dirfdp' is * -1. */ static int make_sockaddr_un(const char *name, struct sockaddr_un *un, socklen_t *un_len, int *dirfdp) { enum { MAX_UN_LEN = sizeof un->sun_path - 1 }; *dirfdp = -1; if (strlen(name) > MAX_UN_LEN) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); if (LINUX_DATAPATH) { /* 'name' is too long to fit in a sockaddr_un, but we have a * workaround for that on Linux: shorten it by opening a file * descriptor for the directory part of the name and indirecting * through /proc/self/fd//. */ char *dir, *base; char *short_name; int dirfd; dir = dir_name(name); base = base_name(name); dirfd = open(dir, O_DIRECTORY | O_RDONLY); if (dirfd < 0) { free(base); free(dir); return errno; } short_name = xasprintf("/proc/self/fd/%d/%s", dirfd, base); free(dir); free(base); if (strlen(short_name) <= MAX_UN_LEN) { make_sockaddr_un__(short_name, un, un_len); free(short_name); *dirfdp = dirfd; return 0; } free(short_name); close(dirfd); VLOG_WARN_RL(&rl, "Unix socket name %s is longer than maximum " "%d bytes (even shortened)", name, MAX_UN_LEN); } else { /* 'name' is too long and we have no workaround. */ VLOG_WARN_RL(&rl, "Unix socket name %s is longer than maximum " "%d bytes", name, MAX_UN_LEN); } return ENAMETOOLONG; } else { make_sockaddr_un__(name, un, un_len); return 0; } } /* Binds Unix domain socket 'fd' to a file with permissions 0700. */ static int bind_unix_socket(int fd, struct sockaddr *sun, socklen_t sun_len) { /* According to _Unix Network Programming_, umask should affect bind(). */ mode_t old_umask = umask(0077); int error = bind(fd, sun, sun_len) ? errno : 0; umask(old_umask); return error; } /* Creates a Unix domain socket in the given 'style' (either SOCK_DGRAM or * SOCK_STREAM) that is bound to '*bind_path' (if 'bind_path' is non-null) and * connected to '*connect_path' (if 'connect_path' is non-null). If 'nonblock' * is true, the socket is made non-blocking. * * Returns the socket's fd if successful, otherwise a negative errno value. */ int make_unix_socket(int style, bool nonblock, const char *bind_path, const char *connect_path) { int error; int fd; fd = socket(PF_UNIX, style, 0); if (fd < 0) { return -errno; } /* Set nonblocking mode right away, if we want it. This prevents blocking * in connect(), if connect_path != NULL. (In turn, that's a corner case: * it will only happen if style is SOCK_STREAM or SOCK_SEQPACKET, and only * if a backlog of un-accepted connections has built up in the kernel.) */ if (nonblock) { error = set_nonblocking(fd); if (error) { goto error; } } if (bind_path) { struct sockaddr_un un; socklen_t un_len; int dirfd; if (unlink(bind_path) && errno != ENOENT) { VLOG_WARN("unlinking \"%s\": %s\n", bind_path, ovs_strerror(errno)); } fatal_signal_add_file_to_unlink(bind_path); error = make_sockaddr_un(bind_path, &un, &un_len, &dirfd); if (!error) { error = bind_unix_socket(fd, (struct sockaddr *) &un, un_len); } if (dirfd >= 0) { close(dirfd); } if (error) { goto error; } } if (connect_path) { struct sockaddr_un un; socklen_t un_len; int dirfd; error = make_sockaddr_un(connect_path, &un, &un_len, &dirfd); if (!error && connect(fd, (struct sockaddr*) &un, un_len) && errno != EINPROGRESS) { error = errno; } if (dirfd >= 0) { close(dirfd); } if (error) { goto error; } } return fd; error: if (error == EAGAIN) { error = EPROTO; } if (bind_path) { fatal_signal_unlink_file_now(bind_path); } close(fd); return -error; } int get_unix_name_len(socklen_t sun_len) { return (sun_len >= offsetof(struct sockaddr_un, sun_path) ? sun_len - offsetof(struct sockaddr_un, sun_path) : 0); } ovs_be32 guess_netmask(ovs_be32 ip_) { uint32_t ip = ntohl(ip_); return ((ip >> 31) == 0 ? htonl(0xff000000) /* Class A */ : (ip >> 30) == 2 ? htonl(0xffff0000) /* Class B */ : (ip >> 29) == 6 ? htonl(0xffffff00) /* Class C */ : htonl(0)); /* ??? */ } /* Parses 'target', which should be a string in the format "[:]". * is required. If 'default_port' is nonzero then is optional * and defaults to 'default_port'. * * On success, returns true and stores the parsed remote address into '*sinp'. * On failure, logs an error, stores zeros into '*sinp', and returns false. */ bool inet_parse_active(const char *target_, uint16_t default_port, struct sockaddr_in *sinp) { char *target = xstrdup(target_); char *save_ptr = NULL; const char *host_name; const char *port_string; bool ok = false; /* Defaults. */ sinp->sin_family = AF_INET; sinp->sin_port = htons(default_port); /* Tokenize. */ host_name = strtok_r(target, ":", &save_ptr); port_string = strtok_r(NULL, ":", &save_ptr); if (!host_name) { VLOG_ERR("%s: bad peer name format", target_); goto exit; } /* Look up IP, port. */ if (lookup_ip(host_name, &sinp->sin_addr)) { goto exit; } if (port_string && atoi(port_string)) { sinp->sin_port = htons(atoi(port_string)); } else if (!default_port) { VLOG_ERR("%s: port number must be specified", target_); goto exit; } ok = true; exit: if (!ok) { memset(sinp, 0, sizeof *sinp); } free(target); return ok; } /* Opens a non-blocking IPv4 socket of the specified 'style' and connects to * 'target', which should be a string in the format "[:]". * is required. If 'default_port' is nonzero then is optional and * defaults to 'default_port'. * * 'style' should be SOCK_STREAM (for TCP) or SOCK_DGRAM (for UDP). * * On success, returns 0 (indicating connection complete) or EAGAIN (indicating * connection in progress), in which case the new file descriptor is stored * into '*fdp'. On failure, returns a positive errno value other than EAGAIN * and stores -1 into '*fdp'. * * If 'sinp' is non-null, then on success the target address is stored into * '*sinp'. * * 'dscp' becomes the DSCP bits in the IP headers for the new connection. It * should be in the range [0, 63] and will automatically be shifted to the * appropriately place in the IP tos field. */ int inet_open_active(int style, const char *target, uint16_t default_port, struct sockaddr_in *sinp, int *fdp, uint8_t dscp) { struct sockaddr_in sin; int fd = -1; int error; /* Parse. */ if (!inet_parse_active(target, default_port, &sin)) { error = EAFNOSUPPORT; goto exit; } /* Create non-blocking socket. */ fd = socket(AF_INET, style, 0); if (fd < 0) { VLOG_ERR("%s: socket: %s", target, ovs_strerror(errno)); error = errno; goto exit; } error = set_nonblocking(fd); if (error) { goto exit; } /* The dscp bits must be configured before connect() to ensure that the TOS * field is set during the connection establishment. If set after * connect(), the handshake SYN frames will be sent with a TOS of 0. */ error = set_dscp(fd, dscp); if (error) { VLOG_ERR("%s: socket: %s", target, ovs_strerror(error)); goto exit; } /* Connect. */ error = connect(fd, (struct sockaddr *) &sin, sizeof sin) == 0 ? 0 : errno; if (error == EINPROGRESS) { error = EAGAIN; } exit: if (!error || error == EAGAIN) { if (sinp) { *sinp = sin; } } else if (fd >= 0) { close(fd); fd = -1; } *fdp = fd; return error; } /* Parses 'target', which should be a string in the format "[][:]": * * - If 'default_port' is -1, then is required. Otherwise, if * is omitted, then 'default_port' is used instead. * * - If (or 'default_port', if used) is 0, then no port is bound * and the TCP/IP stack will select a port. * * - If is omitted then the IP address is wildcarded. * * If successful, stores the address into '*sinp' and returns true; otherwise * zeros '*sinp' and returns false. */ bool inet_parse_passive(const char *target_, int default_port, struct sockaddr_in *sinp) { char *target = xstrdup(target_); char *string_ptr = target; const char *host_name; const char *port_string; bool ok = false; int port; /* Address defaults. */ memset(sinp, 0, sizeof *sinp); sinp->sin_family = AF_INET; sinp->sin_addr.s_addr = htonl(INADDR_ANY); sinp->sin_port = htons(default_port); /* Parse optional port number. */ port_string = strsep(&string_ptr, ":"); if (port_string && str_to_int(port_string, 10, &port)) { sinp->sin_port = htons(port); } else if (default_port < 0) { VLOG_ERR("%s: port number must be specified", target_); goto exit; } /* Parse optional bind IP. */ host_name = strsep(&string_ptr, ":"); if (host_name && host_name[0] && lookup_ip(host_name, &sinp->sin_addr)) { goto exit; } ok = true; exit: if (!ok) { memset(sinp, 0, sizeof *sinp); } free(target); return ok; } /* Opens a non-blocking IPv4 socket of the specified 'style', binds to * 'target', and listens for incoming connections. Parses 'target' in the same * way was inet_parse_passive(). * * 'style' should be SOCK_STREAM (for TCP) or SOCK_DGRAM (for UDP). * * For TCP, the socket will have SO_REUSEADDR turned on. * * On success, returns a non-negative file descriptor. On failure, returns a * negative errno value. * * If 'sinp' is non-null, then on success the bound address is stored into * '*sinp'. * * 'dscp' becomes the DSCP bits in the IP headers for the new connection. It * should be in the range [0, 63] and will automatically be shifted to the * appropriately place in the IP tos field. */ int inet_open_passive(int style, const char *target, int default_port, struct sockaddr_in *sinp, uint8_t dscp) { bool kernel_chooses_port; struct sockaddr_in sin; int fd = 0, error; unsigned int yes = 1; if (!inet_parse_passive(target, default_port, &sin)) { return -EAFNOSUPPORT; } /* Create non-blocking socket, set SO_REUSEADDR. */ fd = socket(AF_INET, style, 0); if (fd < 0) { error = errno; VLOG_ERR("%s: socket: %s", target, ovs_strerror(error)); return -error; } error = set_nonblocking(fd); if (error) { goto error; } if (style == SOCK_STREAM && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof yes) < 0) { error = errno; VLOG_ERR("%s: setsockopt(SO_REUSEADDR): %s", target, ovs_strerror(error)); goto error; } /* Bind. */ if (bind(fd, (struct sockaddr *) &sin, sizeof sin) < 0) { error = errno; VLOG_ERR("%s: bind: %s", target, ovs_strerror(error)); goto error; } /* The dscp bits must be configured before connect() to ensure that the TOS * field is set during the connection establishment. If set after * connect(), the handshake SYN frames will be sent with a TOS of 0. */ error = set_dscp(fd, dscp); if (error) { VLOG_ERR("%s: socket: %s", target, ovs_strerror(error)); goto error; } /* Listen. */ if (style == SOCK_STREAM && listen(fd, 10) < 0) { error = errno; VLOG_ERR("%s: listen: %s", target, ovs_strerror(error)); goto error; } kernel_chooses_port = sin.sin_port == htons(0); if (sinp || kernel_chooses_port) { socklen_t sin_len = sizeof sin; if (getsockname(fd, (struct sockaddr *) &sin, &sin_len) < 0) { error = errno; VLOG_ERR("%s: getsockname: %s", target, ovs_strerror(error)); goto error; } if (sin.sin_family != AF_INET || sin_len != sizeof sin) { error = EAFNOSUPPORT; VLOG_ERR("%s: getsockname: invalid socket name", target); goto error; } if (sinp) { *sinp = sin; } if (kernel_chooses_port) { VLOG_INFO("%s: listening on port %"PRIu16, target, ntohs(sin.sin_port)); } } return fd; error: close(fd); return -error; } /* Returns a readable and writable fd for /dev/null, if successful, otherwise * a negative errno value. The caller must not close the returned fd (because * the same fd will be handed out to subsequent callers). */ int get_null_fd(void) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; static int null_fd; if (ovsthread_once_start(&once)) { null_fd = open("/dev/null", O_RDWR); if (null_fd < 0) { int error = errno; VLOG_ERR("could not open /dev/null: %s", ovs_strerror(error)); null_fd = -error; } ovsthread_once_done(&once); } return null_fd; } int read_fully(int fd, void *p_, size_t size, size_t *bytes_read) { uint8_t *p = p_; *bytes_read = 0; while (size > 0) { ssize_t retval = read(fd, p, size); if (retval > 0) { *bytes_read += retval; size -= retval; p += retval; } else if (retval == 0) { return EOF; } else if (errno != EINTR) { return errno; } } return 0; } int write_fully(int fd, const void *p_, size_t size, size_t *bytes_written) { const uint8_t *p = p_; *bytes_written = 0; while (size > 0) { ssize_t retval = write(fd, p, size); if (retval > 0) { *bytes_written += retval; size -= retval; p += retval; } else if (retval == 0) { VLOG_WARN("write returned 0"); return EPROTO; } else if (errno != EINTR) { return errno; } } return 0; } /* Given file name 'file_name', fsyncs the directory in which it is contained. * Returns 0 if successful, otherwise a positive errno value. */ int fsync_parent_dir(const char *file_name) { int error = 0; char *dir; int fd; dir = dir_name(file_name); fd = open(dir, O_RDONLY); if (fd >= 0) { if (fsync(fd)) { if (errno == EINVAL || errno == EROFS) { /* This directory does not support synchronization. Not * really an error. */ } else { error = errno; VLOG_ERR("%s: fsync failed (%s)", dir, ovs_strerror(error)); } } close(fd); } else { error = errno; VLOG_ERR("%s: open failed (%s)", dir, ovs_strerror(error)); } free(dir); return error; } /* Obtains the modification time of the file named 'file_name' to the greatest * supported precision. If successful, stores the mtime in '*mtime' and * returns 0. On error, returns a positive errno value and stores zeros in * '*mtime'. */ int get_mtime(const char *file_name, struct timespec *mtime) { struct stat s; if (!stat(file_name, &s)) { mtime->tv_sec = s.st_mtime; #if HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC mtime->tv_nsec = s.st_mtim.tv_nsec; #elif HAVE_STRUCT_STAT_ST_MTIMENSEC mtime->tv_nsec = s.st_mtimensec; #else mtime->tv_nsec = 0; #endif return 0; } else { mtime->tv_sec = mtime->tv_nsec = 0; return errno; } } void xpipe(int fds[2]) { if (pipe(fds)) { VLOG_FATAL("failed to create pipe (%s)", ovs_strerror(errno)); } } void xpipe_nonblocking(int fds[2]) { xpipe(fds); xset_nonblocking(fds[0]); xset_nonblocking(fds[1]); } void xsocketpair(int domain, int type, int protocol, int fds[2]) { if (socketpair(domain, type, protocol, fds)) { VLOG_FATAL("failed to create socketpair (%s)", ovs_strerror(errno)); } } static int getsockopt_int(int fd, int level, int option, const char *optname, int *valuep) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 10); socklen_t len; int value; int error; len = sizeof value; if (getsockopt(fd, level, option, &value, &len)) { error = errno; VLOG_ERR_RL(&rl, "getsockopt(%s): %s", optname, ovs_strerror(error)); } else if (len != sizeof value) { error = EINVAL; VLOG_ERR_RL(&rl, "getsockopt(%s): value is %u bytes (expected %zu)", optname, (unsigned int) len, sizeof value); } else { error = 0; } *valuep = error ? 0 : value; return error; } static void describe_sockaddr(struct ds *string, int fd, int (*getaddr)(int, struct sockaddr *, socklen_t *)) { struct sockaddr_storage ss; socklen_t len = sizeof ss; if (!getaddr(fd, (struct sockaddr *) &ss, &len)) { if (ss.ss_family == AF_INET) { struct sockaddr_in sin; memcpy(&sin, &ss, sizeof sin); ds_put_format(string, IP_FMT":%"PRIu16, IP_ARGS(sin.sin_addr.s_addr), ntohs(sin.sin_port)); } else if (ss.ss_family == AF_UNIX) { struct sockaddr_un sun; const char *null; size_t maxlen; memcpy(&sun, &ss, sizeof sun); maxlen = len - offsetof(struct sockaddr_un, sun_path); null = memchr(sun.sun_path, '\0', maxlen); ds_put_buffer(string, sun.sun_path, null ? null - sun.sun_path : maxlen); } #ifdef HAVE_NETLINK else if (ss.ss_family == AF_NETLINK) { int protocol; /* SO_PROTOCOL was introduced in 2.6.32. Support it regardless of the version * of the Linux kernel headers in use at build time. */ #ifndef SO_PROTOCOL #define SO_PROTOCOL 38 #endif if (!getsockopt_int(fd, SOL_SOCKET, SO_PROTOCOL, "SO_PROTOCOL", &protocol)) { switch (protocol) { case NETLINK_ROUTE: ds_put_cstr(string, "NETLINK_ROUTE"); break; case NETLINK_GENERIC: ds_put_cstr(string, "NETLINK_GENERIC"); break; default: ds_put_format(string, "AF_NETLINK family %d", protocol); break; } } else { ds_put_cstr(string, "AF_NETLINK"); } } #endif #if AF_PACKET && LINUX_DATAPATH else if (ss.ss_family == AF_PACKET) { struct sockaddr_ll sll; memcpy(&sll, &ss, sizeof sll); ds_put_cstr(string, "AF_PACKET"); if (sll.sll_ifindex) { char name[IFNAMSIZ]; if (if_indextoname(sll.sll_ifindex, name)) { ds_put_format(string, "(%s)", name); } else { ds_put_format(string, "(ifindex=%d)", sll.sll_ifindex); } } if (sll.sll_protocol) { ds_put_format(string, "(protocol=0x%"PRIu16")", ntohs(sll.sll_protocol)); } } #endif else if (ss.ss_family == AF_UNSPEC) { ds_put_cstr(string, "AF_UNSPEC"); } else { ds_put_format(string, "AF_%d", (int) ss.ss_family); } } } #ifdef LINUX_DATAPATH static void put_fd_filename(struct ds *string, int fd) { char buf[1024]; char *linkname; int n; linkname = xasprintf("/proc/self/fd/%d", fd); n = readlink(linkname, buf, sizeof buf); if (n > 0) { ds_put_char(string, ' '); ds_put_buffer(string, buf, n); if (n > sizeof buf) { ds_put_cstr(string, "..."); } } free(linkname); } #endif /* Returns a malloc()'d string describing 'fd', for use in logging. */ char * describe_fd(int fd) { struct ds string; struct stat s; ds_init(&string); if (fstat(fd, &s)) { ds_put_format(&string, "fstat failed (%s)", ovs_strerror(errno)); } else if (S_ISSOCK(s.st_mode)) { describe_sockaddr(&string, fd, getsockname); ds_put_cstr(&string, "<->"); describe_sockaddr(&string, fd, getpeername); } else { ds_put_cstr(&string, (isatty(fd) ? "tty" : S_ISDIR(s.st_mode) ? "directory" : S_ISCHR(s.st_mode) ? "character device" : S_ISBLK(s.st_mode) ? "block device" : S_ISREG(s.st_mode) ? "file" : S_ISFIFO(s.st_mode) ? "FIFO" : S_ISLNK(s.st_mode) ? "symbolic link" : "unknown")); #ifdef LINUX_DATAPATH put_fd_filename(&string, fd); #endif } return ds_steal_cstr(&string); } /* Returns the total of the 'iov_len' members of the 'n_iovs' in 'iovs'. * The caller must ensure that the total does not exceed SIZE_MAX. */ size_t iovec_len(const struct iovec iovs[], size_t n_iovs) { size_t len = 0; size_t i; for (i = 0; i < n_iovs; i++) { len += iovs[i].iov_len; } return len; } /* Returns true if all of the 'n_iovs' iovecs in 'iovs' have length zero. */ bool iovec_is_empty(const struct iovec iovs[], size_t n_iovs) { size_t i; for (i = 0; i < n_iovs; i++) { if (iovs[i].iov_len) { return false; } } return true; } /* Sends the 'n_iovs' iovecs of data in 'iovs' and the 'n_fds' file descriptors * in 'fds' on Unix domain socket 'sock'. Returns the number of bytes * successfully sent or -1 if an error occurred. On error, sets errno * appropriately. */ int send_iovec_and_fds(int sock, const struct iovec *iovs, size_t n_iovs, const int fds[], size_t n_fds) { ovs_assert(sock >= 0); if (n_fds > 0) { union { struct cmsghdr cm; char control[CMSG_SPACE(SOUTIL_MAX_FDS * sizeof *fds)]; } cmsg; struct msghdr msg; ovs_assert(!iovec_is_empty(iovs, n_iovs)); ovs_assert(n_fds <= SOUTIL_MAX_FDS); memset(&cmsg, 0, sizeof cmsg); cmsg.cm.cmsg_len = CMSG_LEN(n_fds * sizeof *fds); cmsg.cm.cmsg_level = SOL_SOCKET; cmsg.cm.cmsg_type = SCM_RIGHTS; memcpy(CMSG_DATA(&cmsg.cm), fds, n_fds * sizeof *fds); msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_iov = CONST_CAST(struct iovec *, iovs); msg.msg_iovlen = n_iovs; msg.msg_control = &cmsg.cm; msg.msg_controllen = CMSG_SPACE(n_fds * sizeof *fds); msg.msg_flags = 0; return sendmsg(sock, &msg, 0); } else { return writev(sock, iovs, n_iovs); } } /* Sends the 'n_iovs' iovecs of data in 'iovs' and the 'n_fds' file descriptors * in 'fds' on Unix domain socket 'sock'. If 'skip_bytes' is nonzero, then the * first 'skip_bytes' of data in the iovecs are not sent, and none of the file * descriptors are sent. The function continues to retry sending until an * error (other than EINTR) occurs or all the data and fds are sent. * * Returns 0 if all the data and fds were successfully sent, otherwise a * positive errno value. Regardless of success, stores the number of bytes * sent (always at least 'skip_bytes') in '*bytes_sent'. (If at least one byte * is sent, then all the fds have been sent.) * * 'skip_bytes' must be less than or equal to iovec_len(iovs, n_iovs). */ int send_iovec_and_fds_fully(int sock, const struct iovec iovs[], size_t n_iovs, const int fds[], size_t n_fds, size_t skip_bytes, size_t *bytes_sent) { *bytes_sent = 0; while (n_iovs > 0) { int retval; if (skip_bytes) { retval = skip_bytes; skip_bytes = 0; } else if (!*bytes_sent) { retval = send_iovec_and_fds(sock, iovs, n_iovs, fds, n_fds); } else { retval = writev(sock, iovs, n_iovs); } if (retval > 0) { *bytes_sent += retval; while (retval > 0) { const uint8_t *base = iovs->iov_base; size_t len = iovs->iov_len; if (retval < len) { size_t sent; int error; error = write_fully(sock, base + retval, len - retval, &sent); *bytes_sent += sent; retval += sent; if (error) { return error; } } retval -= len; iovs++; n_iovs--; } } else if (retval == 0) { if (iovec_is_empty(iovs, n_iovs)) { break; } VLOG_WARN("send returned 0"); return EPROTO; } else if (errno != EINTR) { return errno; } } return 0; } /* Sends the 'n_iovs' iovecs of data in 'iovs' and the 'n_fds' file descriptors * in 'fds' on Unix domain socket 'sock'. The function continues to retry * sending until an error (other than EAGAIN or EINTR) occurs or all the data * and fds are sent. Upon EAGAIN, the function blocks until the socket is * ready for more data. * * Returns 0 if all the data and fds were successfully sent, otherwise a * positive errno value. */ int send_iovec_and_fds_fully_block(int sock, const struct iovec iovs[], size_t n_iovs, const int fds[], size_t n_fds) { size_t sent = 0; for (;;) { int error; error = send_iovec_and_fds_fully(sock, iovs, n_iovs, fds, n_fds, sent, &sent); if (error != EAGAIN) { return error; } poll_fd_wait(sock, POLLOUT); poll_block(); } } /* Attempts to receive from Unix domain socket 'sock' up to 'size' bytes of * data into 'data' and up to SOUTIL_MAX_FDS file descriptors into 'fds'. * * - Upon success, returns the number of bytes of data copied into 'data' * and stores the number of received file descriptors into '*n_fdsp'. * * - On failure, returns a negative errno value and stores 0 in * '*n_fdsp'. * * - On EOF, returns 0 and stores 0 in '*n_fdsp'. */ int recv_data_and_fds(int sock, void *data, size_t size, int fds[SOUTIL_MAX_FDS], size_t *n_fdsp) { union { struct cmsghdr cm; char control[CMSG_SPACE(SOUTIL_MAX_FDS * sizeof *fds)]; } cmsg; struct msghdr msg; int retval; struct cmsghdr *p; size_t i; *n_fdsp = 0; do { struct iovec iov; iov.iov_base = data; iov.iov_len = size; msg.msg_name = NULL; msg.msg_namelen = 0; msg.msg_iov = &iov; msg.msg_iovlen = 1; msg.msg_control = &cmsg.cm; msg.msg_controllen = sizeof cmsg.control; msg.msg_flags = 0; retval = recvmsg(sock, &msg, 0); } while (retval < 0 && errno == EINTR); if (retval <= 0) { return retval < 0 ? -errno : 0; } for (p = CMSG_FIRSTHDR(&msg); p; p = CMSG_NXTHDR(&msg, p)) { if (p->cmsg_level != SOL_SOCKET || p->cmsg_type != SCM_RIGHTS) { VLOG_ERR("unexpected control message %d:%d", p->cmsg_level, p->cmsg_type); goto error; } else if (*n_fdsp) { VLOG_ERR("multiple SCM_RIGHTS received"); goto error; } else { size_t n_fds = (p->cmsg_len - CMSG_LEN(0)) / sizeof *fds; const int *fds_data = ALIGNED_CAST(const int *, CMSG_DATA(p)); ovs_assert(n_fds > 0); if (n_fds > SOUTIL_MAX_FDS) { VLOG_ERR("%zu fds received but only %d supported", n_fds, SOUTIL_MAX_FDS); for (i = 0; i < n_fds; i++) { close(fds_data[i]); } goto error; } *n_fdsp = n_fds; memcpy(fds, fds_data, n_fds * sizeof *fds); } } return retval; error: for (i = 0; i < *n_fdsp; i++) { close(fds[i]); } *n_fdsp = 0; return EPROTO; } /* Calls ioctl() on an AF_INET sock, passing the specified 'command' and * 'arg'. Returns 0 if successful, otherwise a positive errno value. */ int af_inet_ioctl(unsigned long int command, const void *arg) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; static int sock; if (ovsthread_once_start(&once)) { sock = socket(AF_INET, SOCK_DGRAM, 0); if (sock < 0) { sock = -errno; VLOG_ERR("failed to create inet socket: %s", ovs_strerror(errno)); } ovsthread_once_done(&once); } return (sock < 0 ? -sock : ioctl(sock, command, arg) == -1 ? errno : 0); } int af_inet_ifreq_ioctl(const char *name, struct ifreq *ifr, unsigned long int cmd, const char *cmd_name) { int error; ovs_strzcpy(ifr->ifr_name, name, sizeof ifr->ifr_name); error = af_inet_ioctl(cmd, ifr); if (error) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); VLOG_DBG_RL(&rl, "%s: ioctl(%s) failed: %s", name, cmd_name, ovs_strerror(error)); } return error; } openvswitch-2.0.1+git20140120/lib/socket-util.h000066400000000000000000000076631226605124000206500ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef SOCKET_UTIL_H #define SOCKET_UTIL_H 1 #include #include #include #include #include #include "openvswitch/types.h" #include #include int set_nonblocking(int fd); void xset_nonblocking(int fd); int set_dscp(int fd, uint8_t dscp); int get_max_fds(void); int lookup_ip(const char *host_name, struct in_addr *address); int lookup_ipv6(const char *host_name, struct in6_addr *address); int lookup_hostname(const char *host_name, struct in_addr *); int get_socket_rcvbuf(int sock); int check_connection_completion(int fd); int drain_rcvbuf(int fd); void drain_fd(int fd, size_t n_packets); int make_unix_socket(int style, bool nonblock, const char *bind_path, const char *connect_path); int get_unix_name_len(socklen_t sun_len); ovs_be32 guess_netmask(ovs_be32 ip); int get_null_fd(void); bool inet_parse_active(const char *target, uint16_t default_port, struct sockaddr_in *sinp); int inet_open_active(int style, const char *target, uint16_t default_port, struct sockaddr_in *sinp, int *fdp, uint8_t dscp); bool inet_parse_passive(const char *target, int default_port, struct sockaddr_in *sinp); int inet_open_passive(int style, const char *target, int default_port, struct sockaddr_in *sinp, uint8_t dscp); int read_fully(int fd, void *, size_t, size_t *bytes_read); int write_fully(int fd, const void *, size_t, size_t *bytes_written); int fsync_parent_dir(const char *file_name); int get_mtime(const char *file_name, struct timespec *mtime); void xpipe(int fds[2]); void xpipe_nonblocking(int fds[2]); char *describe_fd(int fd); /* Default value of dscp bits for connection between controller and manager. * Value of IPTOS_PREC_INTERNETCONTROL = 0xc0 which is defined * in is used. */ #define DSCP_DEFAULT (IPTOS_PREC_INTERNETCONTROL >> 2) /* Maximum number of fds that we support sending or receiving at one time * across a Unix domain socket. */ #define SOUTIL_MAX_FDS 8 /* Iovecs. */ size_t iovec_len(const struct iovec *iovs, size_t n_iovs); bool iovec_is_empty(const struct iovec *iovs, size_t n_iovs); /* Functions particularly useful for Unix domain sockets. */ void xsocketpair(int domain, int type, int protocol, int fds[2]); int send_iovec_and_fds(int sock, const struct iovec *iovs, size_t n_iovs, const int fds[], size_t n_fds); int send_iovec_and_fds_fully(int sock, const struct iovec *iovs, size_t n_iovs, const int fds[], size_t n_fds, size_t skip_bytes, size_t *bytes_sent); int send_iovec_and_fds_fully_block(int sock, const struct iovec *iovs, size_t n_iovs, const int fds[], size_t n_fds); int recv_data_and_fds(int sock, void *data, size_t size, int fds[SOUTIL_MAX_FDS], size_t *n_fdsp); /* Helpers for calling ioctl() on an AF_INET socket. */ struct ifreq; int af_inet_ioctl(unsigned long int command, const void *arg); int af_inet_ifreq_ioctl(const char *name, struct ifreq *, unsigned long int cmd, const char *cmd_name); #endif /* socket-util.h */ openvswitch-2.0.1+git20140120/lib/sort.c000066400000000000000000000032371226605124000173600ustar00rootroot00000000000000/* Copyright (c) 2009 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "sort.h" #include "random.h" static size_t partition(size_t p, size_t r, int (*compare)(size_t a, size_t b, void *aux), void (*swap)(size_t a, size_t b, void *aux), void *aux) { size_t x = r - 1; size_t i, j; i = p; for (j = p; j < x; j++) { if (compare(j, x, aux) <= 0) { swap(i++, j, aux); } } swap(i, x, aux); return i; } static void quicksort(size_t p, size_t r, int (*compare)(size_t a, size_t b, void *aux), void (*swap)(size_t a, size_t b, void *aux), void *aux) { size_t i, q; if (r - p < 2) { return; } i = random_range(r - p) + p; if (r - 1 != i) { swap(r - 1, i, aux); } q = partition(p, r, compare, swap, aux); quicksort(p, q, compare, swap, aux); quicksort(q, r, compare, swap, aux); } void sort(size_t count, int (*compare)(size_t a, size_t b, void *aux), void (*swap)(size_t a, size_t b, void *aux), void *aux) { quicksort(0, count, compare, swap, aux); } openvswitch-2.0.1+git20140120/lib/sort.h000066400000000000000000000015001226605124000173540ustar00rootroot00000000000000/* Copyright (c) 2009 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef SORT_H #define SORT_H 1 #include void sort(size_t count, int (*compare)(size_t a, size_t b, void *aux), void (*swap)(size_t a, size_t b, void *aux), void *aux); #endif /* sort.h */ openvswitch-2.0.1+git20140120/lib/sset.c000066400000000000000000000167721226605124000173570ustar00rootroot00000000000000/* * Copyright (c) 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "sset.h" #include "hash.h" static uint32_t hash_name__(const char *name, size_t length) { return hash_bytes(name, length, 0); } static uint32_t hash_name(const char *name) { return hash_name__(name, strlen(name)); } static struct sset_node * sset_find__(const struct sset *set, const char *name, size_t hash) { struct sset_node *node; HMAP_FOR_EACH_WITH_HASH (node, hmap_node, hash, &set->map) { if (!strcmp(node->name, name)) { return node; } } return NULL; } static struct sset_node * sset_add__(struct sset *set, const char *name, size_t length, size_t hash) { struct sset_node *node = xmalloc(length + sizeof *node); memcpy(node->name, name, length + 1); hmap_insert(&set->map, &node->hmap_node, hash); return node; } /* Initializes 'set' as an empty set of strings. */ void sset_init(struct sset *set) { hmap_init(&set->map); } /* Destroys 'sets'. */ void sset_destroy(struct sset *set) { if (set) { sset_clear(set); hmap_destroy(&set->map); } } /* Initializes 'set' to contain the same strings as 'orig'. */ void sset_clone(struct sset *set, const struct sset *orig) { struct sset_node *node; sset_init(set); HMAP_FOR_EACH (node, hmap_node, &orig->map) { sset_add__(set, node->name, strlen(node->name), node->hmap_node.hash); } } /* Exchanges the contents of 'a' and 'b'. */ void sset_swap(struct sset *a, struct sset *b) { hmap_swap(&a->map, &b->map); } /* Adjusts 'set' so that it is still valid after it has been moved around in * memory (e.g. due to realloc()). */ void sset_moved(struct sset *set) { hmap_moved(&set->map); } /* Returns true if 'set' contains no strings, false if it contains at least one * string. */ bool sset_is_empty(const struct sset *set) { return hmap_is_empty(&set->map); } /* Returns the number of strings in 'set'. */ size_t sset_count(const struct sset *set) { return hmap_count(&set->map); } /* Adds 'name' to 'set'. If 'name' is new, returns the new sset_node; * otherwise (if a copy of 'name' already existed in 'set'), returns NULL. */ struct sset_node * sset_add(struct sset *set, const char *name) { size_t length = strlen(name); uint32_t hash = hash_name__(name, length); return (sset_find__(set, name, hash) ? NULL : sset_add__(set, name, length, hash)); } /* Adds a copy of 'name' to 'set' and frees 'name'. * * If 'name' is new, returns the new sset_node; otherwise (if a copy of 'name' * already existed in 'set'), returns NULL. */ struct sset_node * sset_add_and_free(struct sset *set, char *name) { struct sset_node *node = sset_add(set, name); free(name); return node; } /* Adds 'name' to 'set'. Assert-fails if a copy of 'name' was already in * 'set'. */ void sset_add_assert(struct sset *set, const char *name) { bool added OVS_UNUSED = sset_add(set, name); ovs_assert(added); } /* Adds a copy of each of the 'n' names in 'names' to 'set'. */ void sset_add_array(struct sset *set, char **names, size_t n) { size_t i; for (i = 0; i < n; i++) { sset_add(set, names[i]); } } /* Removes all of the strings from 'set'. */ void sset_clear(struct sset *set) { const char *name, *next; SSET_FOR_EACH_SAFE (name, next, set) { sset_delete(set, SSET_NODE_FROM_NAME(name)); } } /* Deletes 'node' from 'set' and frees 'node'. */ void sset_delete(struct sset *set, struct sset_node *node) { hmap_remove(&set->map, &node->hmap_node); free(node); } /* Searches for 'name' in 'set'. If found, deletes it and returns true. If * not found, returns false without modifying 'set'. */ bool sset_find_and_delete(struct sset *set, const char *name) { struct sset_node *node = sset_find(set, name); if (node) { sset_delete(set, node); } return node != NULL; } /* Searches for 'name' in 'set' and deletes it. Assert-fails if 'name' is not * in 'set'. */ void sset_find_and_delete_assert(struct sset *set, const char *name) { bool deleted OVS_UNUSED = sset_find_and_delete(set, name); ovs_assert(deleted); } /* Removes a string from 'set' and returns a copy of it. The caller must free * the returned string (with free()). * * 'set' must not be empty. * * This is not a very good way to iterate through an sset: it copies each name * and it takes O(n**2) time to remove all the names. Use SSET_FOR_EACH_SAFE * instead, if you can. */ char * sset_pop(struct sset *set) { const char *name = SSET_FIRST(set); char *copy = xstrdup(name); sset_delete(set, SSET_NODE_FROM_NAME(name)); return copy; } /* Searches for 'name' in 'set'. Returns its node, if found, otherwise a null * pointer. */ struct sset_node * sset_find(const struct sset *set, const char *name) { return sset_find__(set, name, hash_name(name)); } /* Returns true if 'set' contains a copy of 'name', false otherwise. */ bool sset_contains(const struct sset *set, const char *name) { return sset_find(set, name) != NULL; } /* Returns true if 'a' and 'b' contain the same strings, false otherwise. */ bool sset_equals(const struct sset *a, const struct sset *b) { struct sset_node *node; if (sset_count(a) != sset_count(b)) { return false; } HMAP_FOR_EACH (node, hmap_node, &a->map) { if (!sset_find__(b, node->name, node->hmap_node.hash)) { return false; } } return true; } /* Returns the next node in 'set' in hash order, or NULL if no nodes remain in * 'set'. Uses '*bucketp' and '*offsetp' to determine where to begin * iteration, and stores new values to pass on the next iteration into them * before returning. * * It's better to use plain SSET_FOR_EACH and related functions, since they are * faster and better at dealing with ssets that change during iteration. * * Before beginning iteration, store 0 into '*bucketp' and '*offsetp'. */ struct sset_node * sset_at_position(const struct sset *set, uint32_t *bucketp, uint32_t *offsetp) { struct hmap_node *hmap_node; hmap_node = hmap_at_position(&set->map, bucketp, offsetp); return SSET_NODE_FROM_HMAP_NODE(hmap_node); } static int compare_string_pointers(const void *a_, const void *b_) { const char *const *a = a_; const char *const *b = b_; return strcmp(*a, *b); } /* Returns a null-terminated array of pointers to the strings in 'set', sorted * alphabetically. The caller must free the returned array when it is no * longer needed, but the strings in the array belong to 'set' and thus must * not be modified or freed. */ const char ** sset_sort(const struct sset *set) { size_t n = sset_count(set); const char **array; const char *s; size_t i; array = xmalloc(sizeof *array * (n + 1)); i = 0; SSET_FOR_EACH (s, set) { array[i++] = s; } ovs_assert(i == n); array[n] = NULL; qsort(array, n, sizeof *array, compare_string_pointers); return array; } openvswitch-2.0.1+git20140120/lib/sset.h000066400000000000000000000064731226605124000173610ustar00rootroot00000000000000/* * Copyright (c) 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef SSET_H #define SSET_H #include "hmap.h" #include "util.h" #ifdef __cplusplus extern "C" { #endif struct sset_node { struct hmap_node hmap_node; char name[1]; }; /* A set of strings. */ struct sset { struct hmap map; }; #define SSET_INITIALIZER(SSET) { HMAP_INITIALIZER(&(SSET)->map) } /* Basics. */ void sset_init(struct sset *); void sset_destroy(struct sset *); void sset_clone(struct sset *, const struct sset *); void sset_swap(struct sset *, struct sset *); void sset_moved(struct sset *); /* Count. */ bool sset_is_empty(const struct sset *); size_t sset_count(const struct sset *); /* Insertion. */ struct sset_node *sset_add(struct sset *, const char *); struct sset_node *sset_add_and_free(struct sset *, char *); void sset_add_assert(struct sset *, const char *); void sset_add_array(struct sset *, char **, size_t n); /* Deletion. */ void sset_clear(struct sset *); void sset_delete(struct sset *, struct sset_node *); bool sset_find_and_delete(struct sset *, const char *); void sset_find_and_delete_assert(struct sset *, const char *); char *sset_pop(struct sset *); /* Search. */ struct sset_node *sset_find(const struct sset *, const char *); bool sset_contains(const struct sset *, const char *); bool sset_equals(const struct sset *, const struct sset *); struct sset_node *sset_at_position(const struct sset *, uint32_t *bucketp, uint32_t *offsetp); /* Iteration macros. */ #define SSET_FOR_EACH(NAME, SSET) \ for ((NAME) = SSET_FIRST(SSET); \ NAME != NULL; \ (NAME) = SSET_NEXT(SSET, NAME)) #define SSET_FOR_EACH_SAFE(NAME, NEXT, SSET) \ for ((NAME) = SSET_FIRST(SSET); \ (NAME != NULL \ ? (NEXT) = SSET_NEXT(SSET, NAME), true \ : false); \ (NAME) = (NEXT)) const char **sset_sort(const struct sset *); /* Implementation helper macros. */ #define SSET_NODE_FROM_HMAP_NODE(HMAP_NODE) \ CONTAINER_OF(HMAP_NODE, struct sset_node, hmap_node) #define SSET_NAME_FROM_HMAP_NODE(HMAP_NODE) \ HMAP_NODE == NULL \ ? NULL \ : (CONST_CAST(const char *, (SSET_NODE_FROM_HMAP_NODE(HMAP_NODE)->name))) #define SSET_NODE_FROM_NAME(NAME) CONTAINER_OF(NAME, struct sset_node, name) #define SSET_FIRST(SSET) SSET_NAME_FROM_HMAP_NODE(hmap_first(&(SSET)->map)) #define SSET_NEXT(SSET, NAME) \ SSET_NAME_FROM_HMAP_NODE( \ hmap_next(&(SSET)->map, &SSET_NODE_FROM_NAME(NAME)->hmap_node)) #ifdef __cplusplus } #endif #endif /* sset.h */ openvswitch-2.0.1+git20140120/lib/ssl-bootstrap-syn.man000066400000000000000000000000571226605124000223420ustar00rootroot00000000000000.br [\fB\-\-bootstrap\-ca\-cert=\fIcacert.pem] openvswitch-2.0.1+git20140120/lib/ssl-bootstrap.man000066400000000000000000000017661226605124000215430ustar00rootroot00000000000000.IP "\fB\-\-bootstrap\-ca\-cert=\fIcacert.pem\fR" When \fIcacert.pem\fR exists, this option has the same effect as \fB\-C\fR or \fB\-\-ca\-cert\fR. If it does not exist, then \fB\*(PN\fR will attempt to obtain the CA certificate from the SSL peer on its first SSL connection and save it to the named PEM file. If it is successful, it will immediately drop the connection and reconnect, and from then on all SSL connections must be authenticated by a certificate signed by the CA certificate thus obtained. .IP \fBThis option exposes the SSL connection to a man-in-the-middle attack obtaining the initial CA certificate\fR, but it may be useful for bootstrapping. .IP This option is only useful if the SSL peer sends its CA certificate as part of the SSL certificate chain. The SSL protocol does not require the server to send the CA certificate, but \fB\*(SN\fR(8) can be configured to do so with the \fB\-\-peer\-ca\-cert\fR option. .IP This option is mutually exclusive with \fB\-C\fR and \fB\-\-ca\-cert\fR. openvswitch-2.0.1+git20140120/lib/ssl-peer-ca-cert.man000066400000000000000000000013311226605124000217610ustar00rootroot00000000000000.IP "\fB\-\-peer\-ca\-cert=\fIpeer-cacert.pem\fR" Specifies a PEM file that contains one or more additional certificates to send to SSL peers. \fIpeer-cacert.pem\fR should be the CA certificate used to sign \fB\*(PN\fR's own certificate, that is, the certificate specified on \fB\-c\fR or \fB\-\-certificate\fR. If \fB\*(PN\fR's certificate is self-signed, then \fB\-\-certificate\fR and \fB\-\-peer\-ca\-cert\fR should specify the same file. .IP This option is not useful in normal operation, because the SSL peer must already have the CA certificate for the peer to have any confidence in \fB\*(PN\fR's identity. However, this offers a way for a new installation to bootstrap the CA certificate on its first SSL connection. openvswitch-2.0.1+git20140120/lib/ssl-syn.man000066400000000000000000000002401226605124000203210ustar00rootroot00000000000000.IP "Public key infrastructure options:" [\fB\-\-private\-key=\fIprivkey.pem\fR] .br [\fB\-\-certificate=\fIcert.pem\fR] .br [\fB\-\-ca\-cert=\fIcacert.pem\fR] openvswitch-2.0.1+git20140120/lib/ssl.man000066400000000000000000000022631226605124000175210ustar00rootroot00000000000000.de IQ . br . ns . IP "\\$1" .. .IP "\fB\-p\fR \fIprivkey.pem\fR" .IQ "\fB\-\-private\-key=\fIprivkey.pem\fR" Specifies a PEM file containing the private key used as \fB\*(PN\fR's identity for outgoing SSL connections. . .IP "\fB\-c\fR \fIcert.pem\fR" .IQ "\fB\-\-certificate=\fIcert.pem\fR" Specifies a PEM file containing a certificate that certifies the private key specified on \fB\-p\fR or \fB\-\-private\-key\fR to be trustworthy. The certificate must be signed by the certificate authority (CA) that the peer in SSL connections will use to verify it. . .IP "\fB\-C\fR \fIcacert.pem\fR" .IQ "\fB\-\-ca\-cert=\fIcacert.pem\fR" Specifies a PEM file containing the CA certificate that \fB\*(PN\fR should use to verify certificates presented to it by SSL peers. (This may be the same certificate that SSL peers use to verify the certificate specified on \fB\-c\fR or \fB\-\-certificate\fR, or it may be a different one, depending on the PKI design in use.) . .IP "\fB\-C none\fR" .IQ "\fB\-\-ca\-cert=none\fR" Disables verification of certificates presented by SSL peers. This introduces a security risk, because it means that certificates cannot be verified to be those of known trusted hosts. openvswitch-2.0.1+git20140120/lib/stp.c000066400000000000000000001372651226605124000172100ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* Based on sample implementation in 802.1D-1998. Above copyright and license * applies to all modifications. */ #include #include "stp.h" #include #include #include #include #include #include "byte-order.h" #include "ofpbuf.h" #include "packets.h" #include "unixctl.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(stp); #define STP_PROTOCOL_ID 0x0000 #define STP_PROTOCOL_VERSION 0x00 #define STP_TYPE_CONFIG 0x00 #define STP_TYPE_TCN 0x80 OVS_PACKED( struct stp_bpdu_header { ovs_be16 protocol_id; /* STP_PROTOCOL_ID. */ uint8_t protocol_version; /* STP_PROTOCOL_VERSION. */ uint8_t bpdu_type; /* One of STP_TYPE_*. */ }); BUILD_ASSERT_DECL(sizeof(struct stp_bpdu_header) == 4); enum stp_config_bpdu_flags { STP_CONFIG_TOPOLOGY_CHANGE_ACK = 0x80, STP_CONFIG_TOPOLOGY_CHANGE = 0x01 }; OVS_PACKED( struct stp_config_bpdu { struct stp_bpdu_header header; /* Type STP_TYPE_CONFIG. */ uint8_t flags; /* STP_CONFIG_* flags. */ ovs_be64 root_id; /* 8.5.1.1: Bridge believed to be root. */ ovs_be32 root_path_cost; /* 8.5.1.2: Cost of path to root. */ ovs_be64 bridge_id; /* 8.5.1.3: ID of transmitting bridge. */ ovs_be16 port_id; /* 8.5.1.4: Port transmitting the BPDU. */ ovs_be16 message_age; /* 8.5.1.5: Age of BPDU at tx time. */ ovs_be16 max_age; /* 8.5.1.6: Timeout for received data. */ ovs_be16 hello_time; /* 8.5.1.7: Time between BPDU generation. */ ovs_be16 forward_delay; /* 8.5.1.8: State progression delay. */ }); BUILD_ASSERT_DECL(sizeof(struct stp_config_bpdu) == 35); OVS_PACKED( struct stp_tcn_bpdu { struct stp_bpdu_header header; /* Type STP_TYPE_TCN. */ }); BUILD_ASSERT_DECL(sizeof(struct stp_tcn_bpdu) == 4); struct stp_timer { bool active; /* Timer in use? */ int value; /* Current value of timer, counting up. */ }; struct stp_port { struct stp *stp; void *aux; /* Auxiliary data the user may retrieve. */ int port_id; /* 8.5.5.1: Unique port identifier. */ enum stp_state state; /* 8.5.5.2: Current state. */ int path_cost; /* 8.5.5.3: Cost of tx/rx on this port. */ stp_identifier designated_root; /* 8.5.5.4. */ int designated_cost; /* 8.5.5.5: Path cost to root on port. */ stp_identifier designated_bridge; /* 8.5.5.6. */ int designated_port; /* 8.5.5.7: Port to send config msgs on. */ bool topology_change_ack; /* 8.5.5.8: Flag for next config BPDU. */ bool config_pending; /* 8.5.5.9: Send BPDU when hold expires? */ bool change_detection_enabled; /* 8.5.5.10: Detect topology changes? */ struct stp_timer message_age_timer; /* 8.5.6.1: Age of received info. */ struct stp_timer forward_delay_timer; /* 8.5.6.2: State change timer. */ struct stp_timer hold_timer; /* 8.5.6.3: BPDU rate limit timer. */ int tx_count; /* Number of BPDUs transmitted. */ int rx_count; /* Number of valid BPDUs received. */ int error_count; /* Number of bad BPDUs received. */ bool state_changed; }; struct stp { struct list node; /* Node in all_stps list. */ /* Static bridge data. */ char *name; /* Human-readable name for log messages. */ stp_identifier bridge_id; /* 8.5.3.7: This bridge. */ int max_age; /* 8.5.3.4: Time to drop received data. */ int hello_time; /* 8.5.3.5: Time between sending BPDUs. */ int forward_delay; /* 8.5.3.6: Delay between state changes. */ int bridge_max_age; /* 8.5.3.8: max_age when we're root. */ int bridge_hello_time; /* 8.5.3.9: hello_time as root. */ int bridge_forward_delay; /* 8.5.3.10: forward_delay as root. */ int rq_max_age; /* User-requested max age, in ms. */ int rq_hello_time; /* User-requested hello time, in ms. */ int rq_forward_delay; /* User-requested forward delay, in ms. */ int elapsed_remainder; /* Left-over msecs from last stp_tick(). */ /* Dynamic bridge data. */ stp_identifier designated_root; /* 8.5.3.1: Bridge believed to be root. */ unsigned int root_path_cost; /* 8.5.3.2: Cost of path to root. */ struct stp_port *root_port; /* 8.5.3.3: Lowest cost port to root. */ bool topology_change_detected; /* 8.5.3.11: Detected a topology change? */ bool topology_change; /* 8.5.3.12: Received topology change? */ /* Bridge timers. */ struct stp_timer hello_timer; /* 8.5.4.1: Hello timer. */ struct stp_timer tcn_timer; /* 8.5.4.2: Topology change timer. */ struct stp_timer topology_change_timer; /* 8.5.4.3. */ /* Ports. */ struct stp_port ports[STP_MAX_PORTS]; /* Interface to client. */ bool fdb_needs_flush; /* MAC learning tables needs flushing. */ struct stp_port *first_changed_port; void (*send_bpdu)(struct ofpbuf *bpdu, int port_no, void *aux); void *aux; atomic_int ref_cnt; }; static struct ovs_mutex mutex; static struct list all_stps__ = LIST_INITIALIZER(&all_stps__); static struct list *const all_stps OVS_GUARDED_BY(mutex) = &all_stps__; #define FOR_EACH_ENABLED_PORT(PORT, STP) \ for ((PORT) = stp_next_enabled_port((STP), (STP)->ports); \ (PORT); \ (PORT) = stp_next_enabled_port((STP), (PORT) + 1)) static struct stp_port * stp_next_enabled_port(const struct stp *stp, const struct stp_port *port) OVS_REQUIRES(mutex) { for (; port < &stp->ports[ARRAY_SIZE(stp->ports)]; port++) { if (port->state != STP_DISABLED) { return CONST_CAST(struct stp_port *, port); } } return NULL; } #define MESSAGE_AGE_INCREMENT 1 static void stp_transmit_config(struct stp_port *) OVS_REQUIRES(mutex); static bool stp_supersedes_port_info(const struct stp_port *, const struct stp_config_bpdu *) OVS_REQUIRES(mutex); static void stp_record_config_information(struct stp_port *, const struct stp_config_bpdu *) OVS_REQUIRES(mutex); static void stp_record_config_timeout_values(struct stp *, const struct stp_config_bpdu *) OVS_REQUIRES(mutex); static bool stp_is_designated_port(const struct stp_port *) OVS_REQUIRES(mutex); static void stp_config_bpdu_generation(struct stp *) OVS_REQUIRES(mutex); static void stp_transmit_tcn(struct stp *) OVS_REQUIRES(mutex); static void stp_configuration_update(struct stp *) OVS_REQUIRES(mutex); static bool stp_supersedes_root(const struct stp_port *root, const struct stp_port *) OVS_REQUIRES(mutex); static void stp_root_selection(struct stp *) OVS_REQUIRES(mutex); static void stp_designated_port_selection(struct stp *) OVS_REQUIRES(mutex); static void stp_become_designated_port(struct stp_port *) OVS_REQUIRES(mutex); static void stp_port_state_selection(struct stp *) OVS_REQUIRES(mutex); static void stp_make_forwarding(struct stp_port *) OVS_REQUIRES(mutex); static void stp_make_blocking(struct stp_port *) OVS_REQUIRES(mutex); static void stp_set_port_state(struct stp_port *, enum stp_state) OVS_REQUIRES(mutex); static void stp_topology_change_detection(struct stp *) OVS_REQUIRES(mutex); static void stp_topology_change_acknowledged(struct stp *) OVS_REQUIRES(mutex); static void stp_acknowledge_topology_change(struct stp_port *) OVS_REQUIRES(mutex); static void stp_received_config_bpdu(struct stp *, struct stp_port *, const struct stp_config_bpdu *) OVS_REQUIRES(mutex); static void stp_received_tcn_bpdu(struct stp *, struct stp_port *) OVS_REQUIRES(mutex); static void stp_hello_timer_expiry(struct stp *) OVS_REQUIRES(mutex); static void stp_message_age_timer_expiry(struct stp_port *) OVS_REQUIRES(mutex); static bool stp_is_designated_for_some_port(const struct stp *) OVS_REQUIRES(mutex); static void stp_forward_delay_timer_expiry(struct stp_port *) OVS_REQUIRES(mutex); static void stp_tcn_timer_expiry(struct stp *) OVS_REQUIRES(mutex); static void stp_topology_change_timer_expiry(struct stp *) OVS_REQUIRES(mutex); static void stp_hold_timer_expiry(struct stp_port *) OVS_REQUIRES(mutex); static void stp_initialize_port(struct stp_port *, enum stp_state) OVS_REQUIRES(mutex); static void stp_become_root_bridge(struct stp *) OVS_REQUIRES(mutex); static void stp_update_bridge_timers(struct stp *) OVS_REQUIRES(mutex); static int clamp(int x, int min, int max); static int ms_to_timer(int ms); static int timer_to_ms(int timer); static void stp_start_timer(struct stp_timer *, int value); static void stp_stop_timer(struct stp_timer *); static bool stp_timer_expired(struct stp_timer *, int elapsed, int timeout); static void stp_send_bpdu(struct stp_port *, const void *, size_t) OVS_REQUIRES(mutex); static void stp_unixctl_tcn(struct unixctl_conn *, int argc, const char *argv[], void *aux); void stp_init(void) { unixctl_command_register("stp/tcn", "[bridge]", 0, 1, stp_unixctl_tcn, NULL); } /* Creates and returns a new STP instance that initially has no ports enabled. * * 'bridge_id' should be a 48-bit MAC address as returned by * eth_addr_to_uint64(). 'bridge_id' may also have a priority value in its top * 16 bits; if those bits are set to 0, STP_DEFAULT_BRIDGE_PRIORITY is used. * (This priority may be changed with stp_set_bridge_priority().) * * When the bridge needs to send out a BPDU, it calls 'send_bpdu'. This * callback may be called from stp_tick() or stp_received_bpdu(). The * arguments to 'send_bpdu' are an STP BPDU encapsulated in 'bpdu', * the spanning tree port number 'port_no' that should transmit the * packet, and auxiliary data to be passed to the callback in 'aux'. */ struct stp * stp_create(const char *name, stp_identifier bridge_id, void (*send_bpdu)(struct ofpbuf *bpdu, int port_no, void *aux), void *aux) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; struct stp *stp; struct stp_port *p; if (ovsthread_once_start(&once)) { /* We need a recursive mutex because stp_send_bpdu() could loop back * into the stp module through a patch port. This happens * intentionally as part of the unit tests. Ideally we'd ditch * the call back function, but for now this is what we have. */ ovs_mutex_init_recursive(&mutex); ovsthread_once_done(&once); } ovs_mutex_lock(&mutex); stp = xzalloc(sizeof *stp); stp->name = xstrdup(name); stp->bridge_id = bridge_id; if (!(stp->bridge_id >> 48)) { stp->bridge_id |= (uint64_t) STP_DEFAULT_BRIDGE_PRIORITY << 48; } stp->rq_max_age = STP_DEFAULT_MAX_AGE; stp->rq_hello_time = STP_DEFAULT_HELLO_TIME; stp->rq_forward_delay = STP_DEFAULT_FWD_DELAY; stp_update_bridge_timers(stp); stp->max_age = stp->bridge_max_age; stp->hello_time = stp->bridge_hello_time; stp->forward_delay = stp->bridge_forward_delay; stp->designated_root = stp->bridge_id; stp->root_path_cost = 0; stp->root_port = NULL; stp->topology_change_detected = false; stp->topology_change = false; stp_stop_timer(&stp->tcn_timer); stp_stop_timer(&stp->topology_change_timer); stp_start_timer(&stp->hello_timer, 0); stp->send_bpdu = send_bpdu; stp->aux = aux; stp->first_changed_port = &stp->ports[ARRAY_SIZE(stp->ports)]; for (p = stp->ports; p < &stp->ports[ARRAY_SIZE(stp->ports)]; p++) { p->stp = stp; p->port_id = (stp_port_no(p) + 1) | (STP_DEFAULT_PORT_PRIORITY << 8); p->path_cost = 19; /* Recommended default for 100 Mb/s link. */ stp_initialize_port(p, STP_DISABLED); } atomic_init(&stp->ref_cnt, 1); list_push_back(all_stps, &stp->node); ovs_mutex_unlock(&mutex); return stp; } struct stp * stp_ref(const struct stp *stp_) { struct stp *stp = CONST_CAST(struct stp *, stp_); if (stp) { int orig; atomic_add(&stp->ref_cnt, 1, &orig); ovs_assert(orig > 0); } return stp; } /* Destroys 'stp'. */ void stp_unref(struct stp *stp) { int orig; if (!stp) { return; } atomic_sub(&stp->ref_cnt, 1, &orig); ovs_assert(orig > 0); if (orig == 1) { ovs_mutex_lock(&mutex); list_remove(&stp->node); ovs_mutex_unlock(&mutex); free(stp->name); free(stp); } } /* Runs 'stp' given that 'ms' milliseconds have passed. */ void stp_tick(struct stp *stp, int ms) { struct stp_port *p; int elapsed; ovs_mutex_lock(&mutex); /* Convert 'ms' to STP timer ticks. Preserve any leftover milliseconds * from previous stp_tick() calls so that we don't lose STP ticks when we * are called too frequently. */ ms = clamp(ms, 0, INT_MAX - 1000) + stp->elapsed_remainder; elapsed = ms_to_timer(ms); stp->elapsed_remainder = ms - timer_to_ms(elapsed); if (!elapsed) { goto out; } if (stp_timer_expired(&stp->hello_timer, elapsed, stp->hello_time)) { stp_hello_timer_expiry(stp); } if (stp_timer_expired(&stp->tcn_timer, elapsed, stp->bridge_hello_time)) { stp_tcn_timer_expiry(stp); } if (stp_timer_expired(&stp->topology_change_timer, elapsed, stp->max_age + stp->forward_delay)) { stp_topology_change_timer_expiry(stp); } FOR_EACH_ENABLED_PORT (p, stp) { if (stp_timer_expired(&p->message_age_timer, elapsed, stp->max_age)) { stp_message_age_timer_expiry(p); } } FOR_EACH_ENABLED_PORT (p, stp) { if (stp_timer_expired(&p->forward_delay_timer, elapsed, stp->forward_delay)) { stp_forward_delay_timer_expiry(p); } if (stp_timer_expired(&p->hold_timer, elapsed, ms_to_timer(1000))) { stp_hold_timer_expiry(p); } } out: ovs_mutex_unlock(&mutex); } static void set_bridge_id(struct stp *stp, stp_identifier new_bridge_id) OVS_REQUIRES(mutex) { if (new_bridge_id != stp->bridge_id) { bool root; struct stp_port *p; root = stp_is_root_bridge(stp); FOR_EACH_ENABLED_PORT (p, stp) { if (stp_is_designated_port(p)) { p->designated_bridge = new_bridge_id; } } stp->bridge_id = new_bridge_id; stp_configuration_update(stp); stp_port_state_selection(stp); if (stp_is_root_bridge(stp) && !root) { stp_become_root_bridge(stp); } } } void stp_set_bridge_id(struct stp *stp, stp_identifier bridge_id) { const uint64_t mac_bits = (UINT64_C(1) << 48) - 1; const uint64_t pri_bits = ~mac_bits; ovs_mutex_lock(&mutex); set_bridge_id(stp, (stp->bridge_id & pri_bits) | (bridge_id & mac_bits)); ovs_mutex_unlock(&mutex); } void stp_set_bridge_priority(struct stp *stp, uint16_t new_priority) { const uint64_t mac_bits = (UINT64_C(1) << 48) - 1; ovs_mutex_lock(&mutex); set_bridge_id(stp, ((stp->bridge_id & mac_bits) | ((uint64_t) new_priority << 48))); ovs_mutex_unlock(&mutex); } /* Sets the desired hello time for 'stp' to 'ms', in milliseconds. The actual * hello time is clamped to the range of 1 to 10 seconds and subject to the * relationship (bridge_max_age >= 2 * (bridge_hello_time + 1 s)). The bridge * hello time is only used when 'stp' is the root bridge. */ void stp_set_hello_time(struct stp *stp, int ms) { ovs_mutex_lock(&mutex); stp->rq_hello_time = ms; stp_update_bridge_timers(stp); ovs_mutex_unlock(&mutex); } /* Sets the desired max age for 'stp' to 'ms', in milliseconds. The actual max * age is clamped to the range of 6 to 40 seconds and subject to the * relationships (2 * (bridge_forward_delay - 1 s) >= bridge_max_age) and * (bridge_max_age >= 2 * (bridge_hello_time + 1 s)). The bridge max age is * only used when 'stp' is the root bridge. */ void stp_set_max_age(struct stp *stp, int ms) { ovs_mutex_lock(&mutex); stp->rq_max_age = ms; stp_update_bridge_timers(stp); ovs_mutex_unlock(&mutex); } /* Sets the desired forward delay for 'stp' to 'ms', in milliseconds. The * actual forward delay is clamped to the range of 4 to 30 seconds and subject * to the relationship (2 * (bridge_forward_delay - 1 s) >= bridge_max_age). * The bridge forward delay is only used when 'stp' is the root bridge. */ void stp_set_forward_delay(struct stp *stp, int ms) { ovs_mutex_lock(&mutex); stp->rq_forward_delay = ms; stp_update_bridge_timers(stp); ovs_mutex_unlock(&mutex); } /* Returns the name given to 'stp' in the call to stp_create(). */ const char * stp_get_name(const struct stp *stp) { char *name; ovs_mutex_lock(&mutex); name = stp->name; ovs_mutex_unlock(&mutex); return name; } /* Returns the bridge ID for 'stp'. */ stp_identifier stp_get_bridge_id(const struct stp *stp) { stp_identifier bridge_id; ovs_mutex_lock(&mutex); bridge_id = stp->bridge_id; ovs_mutex_unlock(&mutex); return bridge_id; } /* Returns the bridge ID of the bridge currently believed to be the root. */ stp_identifier stp_get_designated_root(const struct stp *stp) { stp_identifier designated_root; ovs_mutex_lock(&mutex); designated_root = stp->designated_root; ovs_mutex_unlock(&mutex); return designated_root; } /* Returns true if 'stp' believes itself to the be root of the spanning tree, * false otherwise. */ bool stp_is_root_bridge(const struct stp *stp) { bool is_root; ovs_mutex_lock(&mutex); is_root = stp->bridge_id == stp->designated_root; ovs_mutex_unlock(&mutex); return is_root; } /* Returns the cost of the path from 'stp' to the root of the spanning tree. */ int stp_get_root_path_cost(const struct stp *stp) { int cost; ovs_mutex_lock(&mutex); cost = stp->root_path_cost; ovs_mutex_unlock(&mutex); return cost; } /* Returns the bridge hello time, in ms. The returned value is not necessarily * the value passed to stp_set_hello_time(): it is clamped to the valid range * and quantized to the STP timer resolution. */ int stp_get_hello_time(const struct stp *stp) { int time; ovs_mutex_lock(&mutex); time = timer_to_ms(stp->bridge_hello_time); ovs_mutex_unlock(&mutex); return time; } /* Returns the bridge max age, in ms. The returned value is not necessarily * the value passed to stp_set_max_age(): it is clamped to the valid range, * quantized to the STP timer resolution, and adjusted to match the constraints * due to the hello time. */ int stp_get_max_age(const struct stp *stp) { int time; ovs_mutex_lock(&mutex); time = timer_to_ms(stp->bridge_max_age); ovs_mutex_unlock(&mutex); return time; } /* Returns the bridge forward delay, in ms. The returned value is not * necessarily the value passed to stp_set_forward_delay(): it is clamped to * the valid range, quantized to the STP timer resolution, and adjusted to * match the constraints due to the forward delay. */ int stp_get_forward_delay(const struct stp *stp) { int time; ovs_mutex_lock(&mutex); time = timer_to_ms(stp->bridge_forward_delay); ovs_mutex_unlock(&mutex); return time; } /* Returns true if something has happened to 'stp' which necessitates flushing * the client's MAC learning table. Calling this function resets 'stp' so that * future calls will return false until flushing is required again. */ bool stp_check_and_reset_fdb_flush(struct stp *stp) { bool needs_flush; ovs_mutex_lock(&mutex); needs_flush = stp->fdb_needs_flush; stp->fdb_needs_flush = false; ovs_mutex_unlock(&mutex); return needs_flush; } /* Returns the port in 'stp' with index 'port_no', which must be between 0 and * STP_MAX_PORTS. */ struct stp_port * stp_get_port(struct stp *stp, int port_no) { struct stp_port *port; ovs_mutex_lock(&mutex); ovs_assert(port_no >= 0 && port_no < ARRAY_SIZE(stp->ports)); port = &stp->ports[port_no]; ovs_mutex_unlock(&mutex); return port; } /* Returns the port connecting 'stp' to the root bridge, or a null pointer if * there is no such port. */ struct stp_port * stp_get_root_port(struct stp *stp) { struct stp_port *port; ovs_mutex_lock(&mutex); port = stp->root_port; ovs_mutex_unlock(&mutex); return port; } /* Finds a port whose state has changed. If successful, stores the port whose * state changed in '*portp' and returns true. If no port has changed, stores * NULL in '*portp' and returns false. */ bool stp_get_changed_port(struct stp *stp, struct stp_port **portp) { struct stp_port *end, *p; bool changed = false; ovs_mutex_lock(&mutex); end = &stp->ports[ARRAY_SIZE(stp->ports)]; for (p = stp->first_changed_port; p < end; p++) { if (p->state_changed) { p->state_changed = false; stp->first_changed_port = p + 1; *portp = p; changed = true; goto out; } } stp->first_changed_port = end; *portp = NULL; out: ovs_mutex_unlock(&mutex); return changed; } /* Returns the name for the given 'state' (for use in debugging and log * messages). */ const char * stp_state_name(enum stp_state state) { switch (state) { case STP_DISABLED: return "disabled"; case STP_LISTENING: return "listening"; case STP_LEARNING: return "learning"; case STP_FORWARDING: return "forwarding"; case STP_BLOCKING: return "blocking"; default: NOT_REACHED(); } } /* Returns true if 'state' is one in which packets received on a port should * be forwarded, false otherwise. * * Returns true if 'state' is STP_DISABLED, since presumably in that case the * port should still work, just not have STP applied to it. */ bool stp_forward_in_state(enum stp_state state) { return (state & (STP_DISABLED | STP_FORWARDING)) != 0; } /* Returns true if 'state' is one in which MAC learning should be done on * packets received on a port, false otherwise. * * Returns true if 'state' is STP_DISABLED, since presumably in that case the * port should still work, just not have STP applied to it. */ bool stp_learn_in_state(enum stp_state state) { return (state & (STP_DISABLED | STP_LEARNING | STP_FORWARDING)) != 0; } /* Returns the name for the given 'role' (for use in debugging and log * messages). */ const char * stp_role_name(enum stp_role role) { switch (role) { case STP_ROLE_ROOT: return "root"; case STP_ROLE_DESIGNATED: return "designated"; case STP_ROLE_ALTERNATE: return "alternate"; case STP_ROLE_DISABLED: return "disabled"; default: NOT_REACHED(); } } /* Notifies the STP entity that bridge protocol data unit 'bpdu', which is * 'bpdu_size' bytes in length, was received on port 'p'. * * This function may call the 'send_bpdu' function provided to stp_create(). */ void stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size) { struct stp *stp = p->stp; const struct stp_bpdu_header *header; ovs_mutex_lock(&mutex); if (p->state == STP_DISABLED) { goto out; } if (bpdu_size < sizeof(struct stp_bpdu_header)) { VLOG_WARN("%s: received runt %zu-byte BPDU", stp->name, bpdu_size); p->error_count++; goto out; } header = bpdu; if (header->protocol_id != htons(STP_PROTOCOL_ID)) { VLOG_WARN("%s: received BPDU with unexpected protocol ID %"PRIu16, stp->name, ntohs(header->protocol_id)); p->error_count++; goto out; } if (header->protocol_version != STP_PROTOCOL_VERSION) { VLOG_DBG("%s: received BPDU with unexpected protocol version %"PRIu8, stp->name, header->protocol_version); } switch (header->bpdu_type) { case STP_TYPE_CONFIG: if (bpdu_size < sizeof(struct stp_config_bpdu)) { VLOG_WARN("%s: received config BPDU with invalid size %zu", stp->name, bpdu_size); p->error_count++; goto out; } stp_received_config_bpdu(stp, p, bpdu); break; case STP_TYPE_TCN: if (bpdu_size != sizeof(struct stp_tcn_bpdu)) { VLOG_WARN("%s: received TCN BPDU with invalid size %zu", stp->name, bpdu_size); p->error_count++; goto out; } stp_received_tcn_bpdu(stp, p); break; default: VLOG_WARN("%s: received BPDU of unexpected type %"PRIu8, stp->name, header->bpdu_type); p->error_count++; goto out; } p->rx_count++; out: ovs_mutex_unlock(&mutex); } /* Returns the STP entity in which 'p' is nested. */ struct stp * stp_port_get_stp(struct stp_port *p) { struct stp *stp; ovs_mutex_lock(&mutex); stp = p->stp; ovs_mutex_unlock(&mutex); return stp; } /* Sets the 'aux' member of 'p'. * * The 'aux' member will be reset to NULL when stp_port_disable() is * called or stp_port_enable() is called when the port is in a Disabled * state. */ void stp_port_set_aux(struct stp_port *p, void *aux) { ovs_mutex_lock(&mutex); p->aux = aux; ovs_mutex_unlock(&mutex); } /* Returns the 'aux' member of 'p'. */ void * stp_port_get_aux(struct stp_port *p) { void *aux; ovs_mutex_lock(&mutex); aux = p->aux; ovs_mutex_unlock(&mutex); return aux; } /* Returns the index of port 'p' within its bridge. */ int stp_port_no(const struct stp_port *p) { struct stp *stp; int index; ovs_mutex_lock(&mutex); stp = p->stp; ovs_assert(p >= stp->ports && p < &stp->ports[ARRAY_SIZE(stp->ports)]); index = p - p->stp->ports; ovs_mutex_unlock(&mutex); return index; } /* Returns the port ID for 'p'. */ int stp_port_get_id(const struct stp_port *p) { int port_id; ovs_mutex_lock(&mutex); port_id = p->port_id; ovs_mutex_unlock(&mutex); return port_id; } /* Returns the state of port 'p'. */ enum stp_state stp_port_get_state(const struct stp_port *p) { enum stp_state state; ovs_mutex_lock(&mutex); state = p->state; ovs_mutex_unlock(&mutex); return state; } /* Returns the role of port 'p'. */ enum stp_role stp_port_get_role(const struct stp_port *p) { struct stp_port *root_port; enum stp_role role; ovs_mutex_lock(&mutex); root_port = p->stp->root_port; if (root_port && root_port->port_id == p->port_id) { role = STP_ROLE_ROOT; } else if (stp_is_designated_port(p)) { role = STP_ROLE_DESIGNATED; } else if (p->state == STP_DISABLED) { role = STP_ROLE_DISABLED; } else { role = STP_ROLE_ALTERNATE; } ovs_mutex_unlock(&mutex); return role; } /* Retrieves BPDU transmit and receive counts for 'p'. */ void stp_port_get_counts(const struct stp_port *p, int *tx_count, int *rx_count, int *error_count) { ovs_mutex_lock(&mutex); *tx_count = p->tx_count; *rx_count = p->rx_count; *error_count = p->error_count; ovs_mutex_unlock(&mutex); } /* Disables STP on port 'p'. */ void stp_port_disable(struct stp_port *p) { struct stp *stp; ovs_mutex_lock(&mutex); stp = p->stp; if (p->state != STP_DISABLED) { bool root = stp_is_root_bridge(stp); stp_become_designated_port(p); stp_set_port_state(p, STP_DISABLED); p->topology_change_ack = false; p->config_pending = false; stp_stop_timer(&p->message_age_timer); stp_stop_timer(&p->forward_delay_timer); stp_configuration_update(stp); stp_port_state_selection(stp); if (stp_is_root_bridge(stp) && !root) { stp_become_root_bridge(stp); } p->aux = NULL; } ovs_mutex_unlock(&mutex); } /* Enables STP on port 'p'. The port will initially be in "blocking" state. */ void stp_port_enable(struct stp_port *p) { ovs_mutex_lock(&mutex); if (p->state == STP_DISABLED) { stp_initialize_port(p, STP_BLOCKING); stp_port_state_selection(p->stp); } ovs_mutex_unlock(&mutex); } /* Sets the priority of port 'p' to 'new_priority'. Lower numerical values * are interpreted as higher priorities. */ void stp_port_set_priority(struct stp_port *p, uint8_t new_priority) { uint16_t new_port_id; ovs_mutex_lock(&mutex); new_port_id = (p->port_id & 0xff) | (new_priority << 8); if (p->port_id != new_port_id) { struct stp *stp = p->stp; if (stp_is_designated_port(p)) { p->designated_port = new_port_id; } p->port_id = new_port_id; if (stp->bridge_id == p->designated_bridge && p->port_id < p->designated_port) { stp_become_designated_port(p); stp_port_state_selection(stp); } } ovs_mutex_unlock(&mutex); } /* Convert 'speed' (measured in Mb/s) into the path cost. */ uint16_t stp_convert_speed_to_cost(unsigned int speed) { uint16_t ret; ovs_mutex_lock(&mutex); ret = speed >= 10000 ? 2 /* 10 Gb/s. */ : speed >= 1000 ? 4 /* 1 Gb/s. */ : speed >= 100 ? 19 /* 100 Mb/s. */ : speed >= 16 ? 62 /* 16 Mb/s. */ : speed >= 10 ? 100 /* 10 Mb/s. */ : speed >= 4 ? 250 /* 4 Mb/s. */ : 19; /* 100 Mb/s (guess). */ ovs_mutex_unlock(&mutex); return ret; } /* Sets the path cost of port 'p' to 'path_cost'. Lower values are generally * used to indicate faster links. Use stp_port_set_speed() to automatically * generate a default path cost from a link speed. */ void stp_port_set_path_cost(struct stp_port *p, uint16_t path_cost) { ovs_mutex_lock(&mutex); if (p->path_cost != path_cost) { struct stp *stp = p->stp; p->path_cost = path_cost; stp_configuration_update(stp); stp_port_state_selection(stp); } ovs_mutex_unlock(&mutex); } /* Sets the path cost of port 'p' based on 'speed' (measured in Mb/s). */ void stp_port_set_speed(struct stp_port *p, unsigned int speed) { stp_port_set_path_cost(p, stp_convert_speed_to_cost(speed)); } /* Enables topology change detection on port 'p'. */ void stp_port_enable_change_detection(struct stp_port *p) { p->change_detection_enabled = true; } /* Disables topology change detection on port 'p'. */ void stp_port_disable_change_detection(struct stp_port *p) { p->change_detection_enabled = false; } static void stp_transmit_config(struct stp_port *p) OVS_REQUIRES(mutex) { struct stp *stp = p->stp; bool root = stp_is_root_bridge(stp); if (!root && !stp->root_port) { return; } if (p->hold_timer.active) { p->config_pending = true; } else { struct stp_config_bpdu config; memset(&config, 0, sizeof config); config.header.protocol_id = htons(STP_PROTOCOL_ID); config.header.protocol_version = STP_PROTOCOL_VERSION; config.header.bpdu_type = STP_TYPE_CONFIG; config.flags = 0; if (p->topology_change_ack) { config.flags |= STP_CONFIG_TOPOLOGY_CHANGE_ACK; } if (stp->topology_change) { config.flags |= STP_CONFIG_TOPOLOGY_CHANGE; } config.root_id = htonll(stp->designated_root); config.root_path_cost = htonl(stp->root_path_cost); config.bridge_id = htonll(stp->bridge_id); config.port_id = htons(p->port_id); if (root) { config.message_age = htons(0); } else { config.message_age = htons(stp->root_port->message_age_timer.value + MESSAGE_AGE_INCREMENT); } config.max_age = htons(stp->max_age); config.hello_time = htons(stp->hello_time); config.forward_delay = htons(stp->forward_delay); if (ntohs(config.message_age) < stp->max_age) { p->topology_change_ack = false; p->config_pending = false; stp_send_bpdu(p, &config, sizeof config); stp_start_timer(&p->hold_timer, 0); } } } static bool stp_supersedes_port_info(const struct stp_port *p, const struct stp_config_bpdu *config) OVS_REQUIRES(mutex) { if (ntohll(config->root_id) != p->designated_root) { return ntohll(config->root_id) < p->designated_root; } else if (ntohl(config->root_path_cost) != p->designated_cost) { return ntohl(config->root_path_cost) < p->designated_cost; } else if (ntohll(config->bridge_id) != p->designated_bridge) { return ntohll(config->bridge_id) < p->designated_bridge; } else { return (ntohll(config->bridge_id) != p->stp->bridge_id || ntohs(config->port_id) <= p->designated_port); } } static void stp_record_config_information(struct stp_port *p, const struct stp_config_bpdu *config) OVS_REQUIRES(mutex) { p->designated_root = ntohll(config->root_id); p->designated_cost = ntohl(config->root_path_cost); p->designated_bridge = ntohll(config->bridge_id); p->designated_port = ntohs(config->port_id); stp_start_timer(&p->message_age_timer, ntohs(config->message_age)); } static void stp_record_config_timeout_values(struct stp *stp, const struct stp_config_bpdu *config) OVS_REQUIRES(mutex) { stp->max_age = ntohs(config->max_age); stp->hello_time = ntohs(config->hello_time); stp->forward_delay = ntohs(config->forward_delay); stp->topology_change = config->flags & STP_CONFIG_TOPOLOGY_CHANGE; } static bool stp_is_designated_port(const struct stp_port *p) OVS_REQUIRES(mutex) { return (p->designated_bridge == p->stp->bridge_id && p->designated_port == p->port_id); } static void stp_config_bpdu_generation(struct stp *stp) OVS_REQUIRES(mutex) { struct stp_port *p; FOR_EACH_ENABLED_PORT (p, stp) { if (stp_is_designated_port(p)) { stp_transmit_config(p); } } } static void stp_transmit_tcn(struct stp *stp) OVS_REQUIRES(mutex) { struct stp_port *p = stp->root_port; struct stp_tcn_bpdu tcn_bpdu; if (!p) { return; } tcn_bpdu.header.protocol_id = htons(STP_PROTOCOL_ID); tcn_bpdu.header.protocol_version = STP_PROTOCOL_VERSION; tcn_bpdu.header.bpdu_type = STP_TYPE_TCN; stp_send_bpdu(p, &tcn_bpdu, sizeof tcn_bpdu); } static void stp_configuration_update(struct stp *stp) OVS_REQUIRES(mutex) { stp_root_selection(stp); stp_designated_port_selection(stp); } static bool stp_supersedes_root(const struct stp_port *root, const struct stp_port *p) OVS_REQUIRES(mutex) { int p_cost = p->designated_cost + p->path_cost; int root_cost = root->designated_cost + root->path_cost; if (p->designated_root != root->designated_root) { return p->designated_root < root->designated_root; } else if (p_cost != root_cost) { return p_cost < root_cost; } else if (p->designated_bridge != root->designated_bridge) { return p->designated_bridge < root->designated_bridge; } else if (p->designated_port != root->designated_port) { return p->designated_port < root->designated_port; } else { return p->port_id < root->port_id; } } static void stp_root_selection(struct stp *stp) OVS_REQUIRES(mutex) { struct stp_port *p, *root; root = NULL; FOR_EACH_ENABLED_PORT (p, stp) { if (stp_is_designated_port(p) || p->designated_root >= stp->bridge_id) { continue; } if (root && !stp_supersedes_root(root, p)) { continue; } root = p; } stp->root_port = root; if (!root) { stp->designated_root = stp->bridge_id; stp->root_path_cost = 0; } else { stp->designated_root = root->designated_root; stp->root_path_cost = root->designated_cost + root->path_cost; } } static void stp_designated_port_selection(struct stp *stp) OVS_REQUIRES(mutex) { struct stp_port *p; FOR_EACH_ENABLED_PORT (p, stp) { if (stp_is_designated_port(p) || p->designated_root != stp->designated_root || stp->root_path_cost < p->designated_cost || (stp->root_path_cost == p->designated_cost && (stp->bridge_id < p->designated_bridge || (stp->bridge_id == p->designated_bridge && p->port_id <= p->designated_port)))) { stp_become_designated_port(p); } } } static void stp_become_designated_port(struct stp_port *p) OVS_REQUIRES(mutex) { struct stp *stp = p->stp; p->designated_root = stp->designated_root; p->designated_cost = stp->root_path_cost; p->designated_bridge = stp->bridge_id; p->designated_port = p->port_id; } static void stp_port_state_selection(struct stp *stp) OVS_REQUIRES(mutex) { struct stp_port *p; FOR_EACH_ENABLED_PORT (p, stp) { if (p == stp->root_port) { p->config_pending = false; p->topology_change_ack = false; stp_make_forwarding(p); } else if (stp_is_designated_port(p)) { stp_stop_timer(&p->message_age_timer); stp_make_forwarding(p); } else { p->config_pending = false; p->topology_change_ack = false; stp_make_blocking(p); } } } static void stp_make_forwarding(struct stp_port *p) OVS_REQUIRES(mutex) { if (p->state == STP_BLOCKING) { stp_set_port_state(p, STP_LISTENING); stp_start_timer(&p->forward_delay_timer, 0); } } static void stp_make_blocking(struct stp_port *p) OVS_REQUIRES(mutex) { if (!(p->state & (STP_DISABLED | STP_BLOCKING))) { if (p->state & (STP_FORWARDING | STP_LEARNING)) { if (p->change_detection_enabled) { stp_topology_change_detection(p->stp); } } stp_set_port_state(p, STP_BLOCKING); stp_stop_timer(&p->forward_delay_timer); } } static void stp_set_port_state(struct stp_port *p, enum stp_state state) OVS_REQUIRES(mutex) { if (state != p->state && !p->state_changed) { p->state_changed = true; if (p < p->stp->first_changed_port) { p->stp->first_changed_port = p; } } p->state = state; } static void stp_topology_change_detection(struct stp *stp) OVS_REQUIRES(mutex) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); if (stp_is_root_bridge(stp)) { stp->topology_change = true; stp_start_timer(&stp->topology_change_timer, 0); } else if (!stp->topology_change_detected) { stp_transmit_tcn(stp); stp_start_timer(&stp->tcn_timer, 0); } stp->fdb_needs_flush = true; stp->topology_change_detected = true; VLOG_INFO_RL(&rl, "%s: detected topology change.", stp->name); } static void stp_topology_change_acknowledged(struct stp *stp) OVS_REQUIRES(mutex) { stp->topology_change_detected = false; stp_stop_timer(&stp->tcn_timer); } static void stp_acknowledge_topology_change(struct stp_port *p) OVS_REQUIRES(mutex) { p->topology_change_ack = true; stp_transmit_config(p); } static void stp_received_config_bpdu(struct stp *stp, struct stp_port *p, const struct stp_config_bpdu *config) OVS_REQUIRES(mutex) { if (ntohs(config->message_age) >= ntohs(config->max_age)) { VLOG_WARN("%s: received config BPDU with message age (%u) greater " "than max age (%u)", stp->name, ntohs(config->message_age), ntohs(config->max_age)); return; } if (p->state != STP_DISABLED) { bool root = stp_is_root_bridge(stp); if (stp_supersedes_port_info(p, config)) { stp_record_config_information(p, config); stp_configuration_update(stp); stp_port_state_selection(stp); if (!stp_is_root_bridge(stp) && root) { stp_stop_timer(&stp->hello_timer); if (stp->topology_change_detected) { stp_stop_timer(&stp->topology_change_timer); stp_transmit_tcn(stp); stp_start_timer(&stp->tcn_timer, 0); } } if (p == stp->root_port) { stp_record_config_timeout_values(stp, config); stp_config_bpdu_generation(stp); if (config->flags & STP_CONFIG_TOPOLOGY_CHANGE_ACK) { stp_topology_change_acknowledged(stp); } if (config->flags & STP_CONFIG_TOPOLOGY_CHANGE) { stp->fdb_needs_flush = true; } } } else if (stp_is_designated_port(p)) { stp_transmit_config(p); } } } static void stp_received_tcn_bpdu(struct stp *stp, struct stp_port *p) OVS_REQUIRES(mutex) { if (p->state != STP_DISABLED) { if (stp_is_designated_port(p)) { stp_topology_change_detection(stp); stp_acknowledge_topology_change(p); } } } static void stp_hello_timer_expiry(struct stp *stp) OVS_REQUIRES(mutex) { stp_config_bpdu_generation(stp); stp_start_timer(&stp->hello_timer, 0); } static void stp_message_age_timer_expiry(struct stp_port *p) OVS_REQUIRES(mutex) { struct stp *stp = p->stp; bool root = stp_is_root_bridge(stp); stp_become_designated_port(p); stp_configuration_update(stp); stp_port_state_selection(stp); if (stp_is_root_bridge(stp) && !root) { stp->max_age = stp->bridge_max_age; stp->hello_time = stp->bridge_hello_time; stp->forward_delay = stp->bridge_forward_delay; stp_topology_change_detection(stp); stp_stop_timer(&stp->tcn_timer); stp_config_bpdu_generation(stp); stp_start_timer(&stp->hello_timer, 0); } } static bool stp_is_designated_for_some_port(const struct stp *stp) OVS_REQUIRES(mutex) { const struct stp_port *p; FOR_EACH_ENABLED_PORT (p, stp) { if (p->designated_bridge == stp->bridge_id) { return true; } } return false; } static void stp_forward_delay_timer_expiry(struct stp_port *p) OVS_REQUIRES(mutex) { if (p->state == STP_LISTENING) { stp_set_port_state(p, STP_LEARNING); stp_start_timer(&p->forward_delay_timer, 0); } else if (p->state == STP_LEARNING) { stp_set_port_state(p, STP_FORWARDING); if (stp_is_designated_for_some_port(p->stp)) { if (p->change_detection_enabled) { stp_topology_change_detection(p->stp); } } } } static void stp_tcn_timer_expiry(struct stp *stp) OVS_REQUIRES(mutex) { stp_transmit_tcn(stp); stp_start_timer(&stp->tcn_timer, 0); } static void stp_topology_change_timer_expiry(struct stp *stp) OVS_REQUIRES(mutex) { stp->topology_change_detected = false; stp->topology_change = false; } static void stp_hold_timer_expiry(struct stp_port *p) OVS_REQUIRES(mutex) { if (p->config_pending) { stp_transmit_config(p); } } static void stp_initialize_port(struct stp_port *p, enum stp_state state) OVS_REQUIRES(mutex) { ovs_assert(state & (STP_DISABLED | STP_BLOCKING)); stp_become_designated_port(p); stp_set_port_state(p, state); p->topology_change_ack = false; p->config_pending = false; p->change_detection_enabled = true; p->aux = NULL; stp_stop_timer(&p->message_age_timer); stp_stop_timer(&p->forward_delay_timer); stp_stop_timer(&p->hold_timer); p->tx_count = p->rx_count = p->error_count = 0; } static void stp_become_root_bridge(struct stp *stp) OVS_REQUIRES(mutex) { stp->max_age = stp->bridge_max_age; stp->hello_time = stp->bridge_hello_time; stp->forward_delay = stp->bridge_forward_delay; stp_topology_change_detection(stp); stp_stop_timer(&stp->tcn_timer); stp_config_bpdu_generation(stp); stp_start_timer(&stp->hello_timer, 0); } static void stp_start_timer(struct stp_timer *timer, int value) OVS_REQUIRES(mutex) { timer->value = value; timer->active = true; } static void stp_stop_timer(struct stp_timer *timer) OVS_REQUIRES(mutex) { timer->active = false; } static bool stp_timer_expired(struct stp_timer *timer, int elapsed, int timeout) OVS_REQUIRES(mutex) { if (timer->active) { timer->value += elapsed; if (timer->value >= timeout) { timer->active = false; return true; } } return false; } /* Returns the number of whole STP timer ticks in 'ms' milliseconds. There * are 256 STP timer ticks per second. */ static int ms_to_timer(int ms) { return ms * 0x100 / 1000; } /* Returns the number of whole milliseconds in 'timer' STP timer ticks. There * are 256 STP timer ticks per second. */ static int timer_to_ms(int timer) { return timer * 1000 / 0x100; } static int clamp(int x, int min, int max) { return x < min ? min : x > max ? max : x; } static void stp_update_bridge_timers(struct stp *stp) OVS_REQUIRES(mutex) { int ht, ma, fd; ht = clamp(stp->rq_hello_time, 1000, 10000); ma = clamp(stp->rq_max_age, MAX(2 * (ht + 1000), 6000), 40000); fd = clamp(stp->rq_forward_delay, ma / 2 + 1000, 30000); stp->bridge_hello_time = ms_to_timer(ht); stp->bridge_max_age = ms_to_timer(ma); stp->bridge_forward_delay = ms_to_timer(fd); if (stp_is_root_bridge(stp)) { stp->max_age = stp->bridge_max_age; stp->hello_time = stp->bridge_hello_time; stp->forward_delay = stp->bridge_forward_delay; } } static void stp_send_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size) OVS_REQUIRES(mutex) { struct eth_header *eth; struct llc_header *llc; struct ofpbuf *pkt; /* Skeleton. */ pkt = ofpbuf_new(ETH_HEADER_LEN + LLC_HEADER_LEN + bpdu_size); pkt->l2 = eth = ofpbuf_put_zeros(pkt, sizeof *eth); llc = ofpbuf_put_zeros(pkt, sizeof *llc); pkt->l3 = ofpbuf_put(pkt, bpdu, bpdu_size); /* 802.2 header. */ memcpy(eth->eth_dst, eth_addr_stp, ETH_ADDR_LEN); /* p->stp->send_bpdu() must fill in source address. */ eth->eth_type = htons(pkt->size - ETH_HEADER_LEN); /* LLC header. */ llc->llc_dsap = STP_LLC_DSAP; llc->llc_ssap = STP_LLC_SSAP; llc->llc_cntl = STP_LLC_CNTL; p->stp->send_bpdu(pkt, stp_port_no(p), p->stp->aux); p->tx_count++; } /* Unixctl. */ static struct stp * stp_find(const char *name) OVS_REQUIRES(mutex) { struct stp *stp; LIST_FOR_EACH (stp, node, all_stps) { if (!strcmp(stp->name, name)) { return stp; } } return NULL; } static void stp_unixctl_tcn(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) { ovs_mutex_lock(&mutex); if (argc > 1) { struct stp *stp = stp_find(argv[1]); if (!stp) { unixctl_command_reply_error(conn, "no such stp object"); goto out; } stp_topology_change_detection(stp); } else { struct stp *stp; LIST_FOR_EACH (stp, node, all_stps) { stp_topology_change_detection(stp); } } unixctl_command_reply(conn, "OK"); out: ovs_mutex_unlock(&mutex); } openvswitch-2.0.1+git20140120/lib/stp.h000066400000000000000000000135201226605124000172000ustar00rootroot00000000000000/* * Copyright (c) 2008, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef STP_H #define STP_H 1 /* This is an implementation of Spanning Tree Protocol as described in IEEE * 802.1D-1998, clauses 8 and 9. Section numbers refer to this standard. */ #include #include #include "compiler.h" #include "util.h" struct ofpbuf; /* LLC field values used for STP frames. */ #define STP_LLC_SSAP 0x42 #define STP_LLC_DSAP 0x42 #define STP_LLC_CNTL 0x03 /* Bridge and port priorities that should be used by default. */ #define STP_DEFAULT_BRIDGE_PRIORITY 32768 #define STP_DEFAULT_PORT_PRIORITY 128 /* Default time values. */ #define STP_DEFAULT_MAX_AGE 20000 #define STP_DEFAULT_HELLO_TIME 2000 #define STP_DEFAULT_FWD_DELAY 15000 /* Bridge identifier. Top 16 bits are a priority value (numerically lower * values are higher priorities). Bottom 48 bits are MAC address of bridge. */ typedef uint64_t stp_identifier; #define STP_ID_FMT "%04"PRIx16".%012"PRIx64 #define STP_ID_ARGS(stp_id) \ (uint16_t)((stp_id) >> 48), \ (uint64_t)((stp_id) & 0xffffffffffffULL) #define STP_PORT_ID_FMT "%04"PRIx16 /* Basic STP functionality. */ #define STP_MAX_PORTS 255 void stp_init(void); struct stp *stp_create(const char *name, stp_identifier bridge_id, void (*send_bpdu)(struct ofpbuf *bpdu, int port_no, void *aux), void *aux); struct stp *stp_ref(const struct stp *); void stp_unref(struct stp *); void stp_tick(struct stp *, int ms); void stp_set_bridge_id(struct stp *, stp_identifier bridge_id); void stp_set_bridge_priority(struct stp *, uint16_t new_priority); void stp_set_hello_time(struct stp *, int ms); void stp_set_max_age(struct stp *, int ms); void stp_set_forward_delay(struct stp *, int ms); /* STP properties. */ const char *stp_get_name(const struct stp *); stp_identifier stp_get_bridge_id(const struct stp *); stp_identifier stp_get_designated_root(const struct stp *); bool stp_is_root_bridge(const struct stp *); int stp_get_root_path_cost(const struct stp *); int stp_get_hello_time(const struct stp *); int stp_get_max_age(const struct stp *); int stp_get_forward_delay(const struct stp *); bool stp_check_and_reset_fdb_flush(struct stp *); /* Obtaining STP ports. */ struct stp_port *stp_get_port(struct stp *, int port_no); struct stp_port *stp_get_root_port(struct stp *); bool stp_get_changed_port(struct stp *, struct stp_port **portp); /* State of an STP port. * * A port is in exactly one state at any given time, but distinct bits are used * for states to allow testing for more than one state with a bit mask. * * The STP_DISABLED state means that the port is disabled by management. * In our implementation, this state means that the port does not * participate in the spanning tree, but it still forwards traffic as if * it were in the STP_FORWARDING state. This may be different from * other implementations. * * The following diagram describes the various states and what they are * allowed to do in OVS: * * FWD LRN TX_BPDU RX_BPDU * --- --- ------- ------- * Disabled Y - - - * Blocking - - - Y * Listening - - Y Y * Learning - Y Y Y * Forwarding Y Y Y Y * * Once again, note that the disabled state forwards traffic, which is * likely different than the spec would indicate. */ enum stp_state { STP_DISABLED = 1 << 0, /* 8.4.5: See note above. */ STP_LISTENING = 1 << 1, /* 8.4.2: Not learning or relaying frames. */ STP_LEARNING = 1 << 2, /* 8.4.3: Learning but not relaying frames. */ STP_FORWARDING = 1 << 3, /* 8.4.4: Learning and relaying frames. */ STP_BLOCKING = 1 << 4 /* 8.4.1: Initial boot state. */ }; const char *stp_state_name(enum stp_state); bool stp_forward_in_state(enum stp_state); bool stp_learn_in_state(enum stp_state); /* Role of an STP port. */ enum stp_role { STP_ROLE_ROOT, /* Path to root bridge. */ STP_ROLE_DESIGNATED, /* Path to LAN segments. */ STP_ROLE_ALTERNATE, /* Backup path to root bridge. */ STP_ROLE_DISABLED /* Port does not participate in STP. */ }; const char *stp_role_name(enum stp_role); void stp_received_bpdu(struct stp_port *, const void *bpdu, size_t bpdu_size); struct stp *stp_port_get_stp(struct stp_port *); void stp_port_set_aux(struct stp_port *, void *); void *stp_port_get_aux(struct stp_port *); int stp_port_no(const struct stp_port *); int stp_port_get_id(const struct stp_port *); enum stp_state stp_port_get_state(const struct stp_port *); enum stp_role stp_port_get_role(const struct stp_port *); void stp_port_get_counts(const struct stp_port *, int *tx_count, int *rx_count, int *error_count); void stp_port_enable(struct stp_port *); void stp_port_disable(struct stp_port *); void stp_port_set_priority(struct stp_port *, uint8_t new_priority); uint16_t stp_convert_speed_to_cost(unsigned int speed); void stp_port_set_path_cost(struct stp_port *, uint16_t path_cost); void stp_port_set_speed(struct stp_port *, unsigned int speed); void stp_port_enable_change_detection(struct stp_port *); void stp_port_disable_change_detection(struct stp_port *); #endif /* stp.h */ openvswitch-2.0.1+git20140120/lib/stream-fd.c000066400000000000000000000160031226605124000202460ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "stream-fd.h" #include #include #include #include #include #include #include #include "fatal-signal.h" #include "poll-loop.h" #include "socket-util.h" #include "util.h" #include "stream-provider.h" #include "stream.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(stream_fd); /* Active file descriptor stream. */ struct stream_fd { struct stream stream; int fd; }; static const struct stream_class stream_fd_class; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 25); static void maybe_unlink_and_free(char *path); /* Creates a new stream named 'name' that will send and receive data on 'fd' * and stores a pointer to the stream in '*streamp'. Initial connection status * 'connect_status' is interpreted as described for stream_init(). * * Returns 0 if successful, otherwise a positive errno value. (The current * implementation never fails.) */ int new_fd_stream(const char *name, int fd, int connect_status, struct stream **streamp) { struct stream_fd *s; s = xmalloc(sizeof *s); stream_init(&s->stream, &stream_fd_class, connect_status, name); s->fd = fd; *streamp = &s->stream; return 0; } static struct stream_fd * stream_fd_cast(struct stream *stream) { stream_assert_class(stream, &stream_fd_class); return CONTAINER_OF(stream, struct stream_fd, stream); } static void fd_close(struct stream *stream) { struct stream_fd *s = stream_fd_cast(stream); close(s->fd); free(s); } static int fd_connect(struct stream *stream) { struct stream_fd *s = stream_fd_cast(stream); return check_connection_completion(s->fd); } static ssize_t fd_recv(struct stream *stream, void *buffer, size_t n) { struct stream_fd *s = stream_fd_cast(stream); ssize_t retval; retval = read(s->fd, buffer, n); return retval >= 0 ? retval : -errno; } static ssize_t fd_send(struct stream *stream, const void *buffer, size_t n) { struct stream_fd *s = stream_fd_cast(stream); ssize_t retval; retval = write(s->fd, buffer, n); return (retval > 0 ? retval : retval == 0 ? -EAGAIN : -errno); } static void fd_wait(struct stream *stream, enum stream_wait_type wait) { struct stream_fd *s = stream_fd_cast(stream); switch (wait) { case STREAM_CONNECT: case STREAM_SEND: poll_fd_wait(s->fd, POLLOUT); break; case STREAM_RECV: poll_fd_wait(s->fd, POLLIN); break; default: NOT_REACHED(); } } static const struct stream_class stream_fd_class = { "fd", /* name */ false, /* needs_probes */ NULL, /* open */ fd_close, /* close */ fd_connect, /* connect */ fd_recv, /* recv */ fd_send, /* send */ NULL, /* run */ NULL, /* run_wait */ fd_wait, /* wait */ }; /* Passive file descriptor stream. */ struct fd_pstream { struct pstream pstream; int fd; int (*accept_cb)(int fd, const struct sockaddr *, size_t sa_len, struct stream **); int (*set_dscp_cb)(int fd, uint8_t dscp); char *unlink_path; }; static const struct pstream_class fd_pstream_class; static struct fd_pstream * fd_pstream_cast(struct pstream *pstream) { pstream_assert_class(pstream, &fd_pstream_class); return CONTAINER_OF(pstream, struct fd_pstream, pstream); } /* Creates a new pstream named 'name' that will accept new socket connections * on 'fd' and stores a pointer to the stream in '*pstreamp'. * * When a connection has been accepted, 'accept_cb' will be called with the new * socket fd 'fd' and the remote address of the connection 'sa' and 'sa_len'. * accept_cb must return 0 if the connection is successful, in which case it * must initialize '*streamp' to the new stream, or a positive errno value on * error. In either case accept_cb takes ownership of the 'fd' passed in. * * When '*pstreamp' is closed, then 'unlink_path' (if nonnull) will be passed * to fatal_signal_unlink_file_now() and freed with free(). * * Returns 0 if successful, otherwise a positive errno value. (The current * implementation never fails.) */ int new_fd_pstream(const char *name, int fd, int (*accept_cb)(int fd, const struct sockaddr *sa, size_t sa_len, struct stream **streamp), int (*set_dscp_cb)(int fd, uint8_t dscp), char *unlink_path, struct pstream **pstreamp) { struct fd_pstream *ps = xmalloc(sizeof *ps); pstream_init(&ps->pstream, &fd_pstream_class, name); ps->fd = fd; ps->accept_cb = accept_cb; ps->set_dscp_cb = set_dscp_cb; ps->unlink_path = unlink_path; *pstreamp = &ps->pstream; return 0; } static void pfd_close(struct pstream *pstream) { struct fd_pstream *ps = fd_pstream_cast(pstream); close(ps->fd); maybe_unlink_and_free(ps->unlink_path); free(ps); } static int pfd_accept(struct pstream *pstream, struct stream **new_streamp) { struct fd_pstream *ps = fd_pstream_cast(pstream); struct sockaddr_storage ss; socklen_t ss_len = sizeof ss; int new_fd; int retval; new_fd = accept(ps->fd, (struct sockaddr *) &ss, &ss_len); if (new_fd < 0) { retval = errno; if (retval != EAGAIN) { VLOG_DBG_RL(&rl, "accept: %s", ovs_strerror(retval)); } return retval; } retval = set_nonblocking(new_fd); if (retval) { close(new_fd); return retval; } return ps->accept_cb(new_fd, (const struct sockaddr *) &ss, ss_len, new_streamp); } static void pfd_wait(struct pstream *pstream) { struct fd_pstream *ps = fd_pstream_cast(pstream); poll_fd_wait(ps->fd, POLLIN); } static int pfd_set_dscp(struct pstream *pstream, uint8_t dscp) { struct fd_pstream *ps = fd_pstream_cast(pstream); if (ps->set_dscp_cb) { return ps->set_dscp_cb(ps->fd, dscp); } return 0; } static const struct pstream_class fd_pstream_class = { "pstream", false, NULL, pfd_close, pfd_accept, pfd_wait, pfd_set_dscp, }; /* Helper functions. */ static void maybe_unlink_and_free(char *path) { if (path) { fatal_signal_unlink_file_now(path); free(path); } } openvswitch-2.0.1+git20140120/lib/stream-fd.h000066400000000000000000000023201226605124000202500ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef STREAM_FD_H #define STREAM_FD_H 1 #include #include #include struct stream; struct pstream; struct sockaddr; int new_fd_stream(const char *name, int fd, int connect_status, struct stream **streamp); int new_fd_pstream(const char *name, int fd, int (*accept_cb)(int fd, const struct sockaddr *, size_t sa_len, struct stream **), int (*set_dscp_cb)(int fd, uint8_t dscp), char *unlink_path, struct pstream **pstreamp); #endif /* stream-fd.h */ openvswitch-2.0.1+git20140120/lib/stream-nossl.c000066400000000000000000000034341226605124000210170ustar00rootroot00000000000000/* * Copyright (c) 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "stream-ssl.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(stream_nossl); /* Dummy function definitions, used when OVS is built without OpenSSL. */ bool stream_ssl_is_configured(void) { return false; } static void NO_RETURN nossl_option(const char *detail) { VLOG_FATAL("%s specified but Open vSwitch was built without SSL support", detail); } void stream_ssl_set_private_key_file(const char *file_name) { if (file_name != NULL) { nossl_option("Private key"); } } void stream_ssl_set_certificate_file(const char *file_name) { if (file_name != NULL) { nossl_option("Certificate"); } } void stream_ssl_set_ca_cert_file(const char *file_name, bool bootstrap OVS_UNUSED) { if (file_name != NULL) { nossl_option("CA certificate"); } } void stream_ssl_set_peer_ca_cert_file(const char *file_name) { if (file_name != NULL) { nossl_option("Peer CA certificate"); } } void stream_ssl_set_key_and_cert(const char *private_key_file, const char *certificate_file) { stream_ssl_set_private_key_file(private_key_file); stream_ssl_set_certificate_file(certificate_file); } openvswitch-2.0.1+git20140120/lib/stream-provider.h000066400000000000000000000200031226605124000215070ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef STREAM_PROVIDER_H #define STREAM_PROVIDER_H 1 #include #include "stream.h" /* Active stream connection. */ /* Active stream connection. * * This structure should be treated as opaque by implementation. */ struct stream { const struct stream_class *class; int state; int error; ovs_be32 remote_ip; ovs_be16 remote_port; ovs_be32 local_ip; ovs_be16 local_port; char *name; }; void stream_init(struct stream *, const struct stream_class *, int connect_status, const char *name); void stream_set_remote_ip(struct stream *, ovs_be32 remote_ip); void stream_set_remote_port(struct stream *, ovs_be16 remote_port); void stream_set_local_ip(struct stream *, ovs_be32 local_ip); void stream_set_local_port(struct stream *, ovs_be16 local_port); static inline void stream_assert_class(const struct stream *stream, const struct stream_class *class) { ovs_assert(stream->class == class); } struct stream_class { /* Prefix for connection names, e.g. "tcp", "ssl", "unix". */ const char *name; /* True if this stream needs periodic probes to verify connectivity. For * streams which need probes, it can take a long time to notice the * connection was dropped. */ bool needs_probes; /* Attempts to connect to a peer. 'name' is the full connection name * provided by the user, e.g. "tcp:1.2.3.4". This name is useful for error * messages but must not be modified. * * 'suffix' is a copy of 'name' following the colon and may be modified. * 'dscp' is the DSCP value that the new connection should use in the IP * packets it sends. * * Returns 0 if successful, otherwise a positive errno value. If * successful, stores a pointer to the new connection in '*streamp'. * * The open function must not block waiting for a connection to complete. * If the connection cannot be completed immediately, it should return * EAGAIN (not EINPROGRESS, as returned by the connect system call) and * continue the connection in the background. */ int (*open)(const char *name, char *suffix, struct stream **streamp, uint8_t dscp); /* Closes 'stream' and frees associated memory. */ void (*close)(struct stream *stream); /* Tries to complete the connection on 'stream'. If 'stream''s connection * is complete, returns 0 if the connection was successful or a positive * errno value if it failed. If the connection is still in progress, * returns EAGAIN. * * The connect function must not block waiting for the connection to * complete; instead, it should return EAGAIN immediately. */ int (*connect)(struct stream *stream); /* Tries to receive up to 'n' bytes from 'stream' into 'buffer', and * returns: * * - If successful, the number of bytes received (between 1 and 'n'). * * - On error, a negative errno value. * * - 0, if the connection has been closed in the normal fashion. * * The recv function will not be passed a zero 'n'. * * The recv function must not block waiting for data to arrive. If no data * have been received, it should return -EAGAIN immediately. */ ssize_t (*recv)(struct stream *stream, void *buffer, size_t n); /* Tries to send up to 'n' bytes of 'buffer' on 'stream', and returns: * * - If successful, the number of bytes sent (between 1 and 'n'). * * - On error, a negative errno value. * * - Never returns 0. * * The send function will not be passed a zero 'n'. * * The send function must not block. If no bytes can be immediately * accepted for transmission, it should return -EAGAIN immediately. */ ssize_t (*send)(struct stream *stream, const void *buffer, size_t n); /* Allows 'stream' to perform maintenance activities, such as flushing * output buffers. * * May be null if 'stream' doesn't have anything to do here. */ void (*run)(struct stream *stream); /* Arranges for the poll loop to wake up when 'stream' needs to perform * maintenance activities. * * May be null if 'stream' doesn't have anything to do here. */ void (*run_wait)(struct stream *stream); /* Arranges for the poll loop to wake up when 'stream' is ready to take an * action of the given 'type'. */ void (*wait)(struct stream *stream, enum stream_wait_type type); }; /* Passive listener for incoming stream connections. * * This structure should be treated as opaque by stream implementations. */ struct pstream { const struct pstream_class *class; char *name; ovs_be16 bound_port; }; void pstream_init(struct pstream *, const struct pstream_class *, const char *name); void pstream_set_bound_port(struct pstream *, ovs_be16 bound_port); static inline void pstream_assert_class(const struct pstream *pstream, const struct pstream_class *class) { ovs_assert(pstream->class == class); } struct pstream_class { /* Prefix for connection names, e.g. "ptcp", "pssl", "punix". */ const char *name; /* True if this pstream needs periodic probes to verify connectivity. For * pstreams which need probes, it can take a long time to notice the * connection was dropped. */ bool needs_probes; /* Attempts to start listening for stream connections. 'name' is the full * connection name provided by the user, e.g. "ptcp:1234". This name is * useful for error messages but must not be modified. * * 'suffix' is a copy of 'name' following the colon and may be modified. * 'dscp' is the DSCP value that the new connection should use in the IP * packets it sends. * * Returns 0 if successful, otherwise a positive errno value. If * successful, stores a pointer to the new connection in '*pstreamp'. * * The listen function must not block. If the connection cannot be * completed immediately, it should return EAGAIN (not EINPROGRESS, as * returned by the connect system call) and continue the connection in the * background. */ int (*listen)(const char *name, char *suffix, struct pstream **pstreamp, uint8_t dscp); /* Closes 'pstream' and frees associated memory. */ void (*close)(struct pstream *pstream); /* Tries to accept a new connection on 'pstream'. If successful, stores * the new connection in '*new_streamp' and returns 0. Otherwise, returns * a positive errno value. * * The accept function must not block waiting for a connection. If no * connection is ready to be accepted, it should return EAGAIN. */ int (*accept)(struct pstream *pstream, struct stream **new_streamp); /* Arranges for the poll loop to wake up when a connection is ready to be * accepted on 'pstream'. */ void (*wait)(struct pstream *pstream); /* Set DSCP value of the listening socket. */ int (*set_dscp)(struct pstream *pstream, uint8_t dscp); }; /* Active and passive stream classes. */ extern const struct stream_class tcp_stream_class; extern const struct pstream_class ptcp_pstream_class; extern const struct stream_class unix_stream_class; extern const struct pstream_class punix_pstream_class; #ifdef HAVE_OPENSSL extern const struct stream_class ssl_stream_class; extern const struct pstream_class pssl_pstream_class; #endif #endif /* stream-provider.h */ openvswitch-2.0.1+git20140120/lib/stream-ssl.c000066400000000000000000001230731226605124000204640ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "stream-ssl.h" #include "dhparams.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "coverage.h" #include "dynamic-string.h" #include "entropy.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "packets.h" #include "poll-loop.h" #include "shash.h" #include "socket-util.h" #include "util.h" #include "stream-provider.h" #include "stream.h" #include "timeval.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(stream_ssl); /* Active SSL. */ enum ssl_state { STATE_TCP_CONNECTING, STATE_SSL_CONNECTING }; enum session_type { CLIENT, SERVER }; struct ssl_stream { struct stream stream; enum ssl_state state; enum session_type type; int fd; SSL *ssl; struct ofpbuf *txbuf; unsigned int session_nr; /* rx_want and tx_want record the result of the last call to SSL_read() * and SSL_write(), respectively: * * - If the call reported that data needed to be read from the file * descriptor, the corresponding member is set to SSL_READING. * * - If the call reported that data needed to be written to the file * descriptor, the corresponding member is set to SSL_WRITING. * * - Otherwise, the member is set to SSL_NOTHING, indicating that the * call completed successfully (or with an error) and that there is no * need to block. * * These are needed because there is no way to ask OpenSSL what a data read * or write would require without giving it a buffer to receive into or * data to send, respectively. (Note that the SSL_want() status is * overwritten by each SSL_read() or SSL_write() call, so we can't rely on * its value.) * * A single call to SSL_read() or SSL_write() can perform both reading * and writing and thus invalidate not one of these values but actually * both. Consider this situation, for example: * * - SSL_write() blocks on a read, so tx_want gets SSL_READING. * * - SSL_read() laters succeeds reading from 'fd' and clears out the * whole receive buffer, so rx_want gets SSL_READING. * * - Client calls stream_wait(STREAM_RECV) and stream_wait(STREAM_SEND) * and blocks. * * - Now we're stuck blocking until the peer sends us data, even though * SSL_write() could now succeed, which could easily be a deadlock * condition. * * On the other hand, we can't reset both tx_want and rx_want on every call * to SSL_read() or SSL_write(), because that would produce livelock, * e.g. in this situation: * * - SSL_write() blocks, so tx_want gets SSL_READING or SSL_WRITING. * * - SSL_read() blocks, so rx_want gets SSL_READING or SSL_WRITING, * but tx_want gets reset to SSL_NOTHING. * * - Client calls stream_wait(STREAM_RECV) and stream_wait(STREAM_SEND) * and blocks. * * - Client wakes up immediately since SSL_NOTHING in tx_want indicates * that no blocking is necessary. * * The solution we adopt here is to set tx_want to SSL_NOTHING after * calling SSL_read() only if the SSL state of the connection changed, * which indicates that an SSL-level renegotiation made some progress, and * similarly for rx_want and SSL_write(). This prevents both the * deadlock and livelock situations above. */ int rx_want, tx_want; /* A few bytes of header data in case SSL negotiation fails. */ uint8_t head[2]; short int n_head; }; /* SSL context created by ssl_init(). */ static SSL_CTX *ctx; struct ssl_config_file { bool read; /* Whether the file was successfully read. */ char *file_name; /* Configured file name, if any. */ struct timespec mtime; /* File mtime as of last time we read it. */ }; /* SSL configuration files. */ static struct ssl_config_file private_key; static struct ssl_config_file certificate; static struct ssl_config_file ca_cert; /* Ordinarily, the SSL client and server verify each other's certificates using * a CA certificate. Setting this to false disables this behavior. (This is a * security risk.) */ static bool verify_peer_cert = true; /* Ordinarily, we require a CA certificate for the peer to be locally * available. We can, however, bootstrap the CA certificate from the peer at * the beginning of our first connection then use that certificate on all * subsequent connections, saving it to a file for use in future runs also. In * this case, 'bootstrap_ca_cert' is true. */ static bool bootstrap_ca_cert; /* Session number. Used in debug logging messages to uniquely identify a * session. */ static unsigned int next_session_nr; /* Who knows what can trigger various SSL errors, so let's throttle them down * quite a bit. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 25); static int ssl_init(void); static int do_ssl_init(void); static bool ssl_wants_io(int ssl_error); static void ssl_close(struct stream *); static void ssl_clear_txbuf(struct ssl_stream *); static void interpret_queued_ssl_error(const char *function); static int interpret_ssl_error(const char *function, int ret, int error, int *want); static DH *tmp_dh_callback(SSL *ssl, int is_export OVS_UNUSED, int keylength); static void log_ca_cert(const char *file_name, X509 *cert); static void stream_ssl_set_ca_cert_file__(const char *file_name, bool bootstrap, bool force); static void ssl_protocol_cb(int write_p, int version, int content_type, const void *, size_t, SSL *, void *sslv_); static bool update_ssl_config(struct ssl_config_file *, const char *file_name); static short int want_to_poll_events(int want) { switch (want) { case SSL_NOTHING: NOT_REACHED(); case SSL_READING: return POLLIN; case SSL_WRITING: return POLLOUT; default: NOT_REACHED(); } } static int new_ssl_stream(const char *name, int fd, enum session_type type, enum ssl_state state, const struct sockaddr_in *remote, struct stream **streamp) { struct sockaddr_in local; socklen_t local_len = sizeof local; struct ssl_stream *sslv; SSL *ssl = NULL; int on = 1; int retval; /* Check for all the needful configuration. */ retval = 0; if (!private_key.read) { VLOG_ERR("Private key must be configured to use SSL"); retval = ENOPROTOOPT; } if (!certificate.read) { VLOG_ERR("Certificate must be configured to use SSL"); retval = ENOPROTOOPT; } if (!ca_cert.read && verify_peer_cert && !bootstrap_ca_cert) { VLOG_ERR("CA certificate must be configured to use SSL"); retval = ENOPROTOOPT; } if (!retval && !SSL_CTX_check_private_key(ctx)) { VLOG_ERR("Private key does not match certificate public key: %s", ERR_error_string(ERR_get_error(), NULL)); retval = ENOPROTOOPT; } if (retval) { goto error; } /* Get the local IP and port information */ retval = getsockname(fd, (struct sockaddr *) &local, &local_len); if (retval) { memset(&local, 0, sizeof local); } /* Disable Nagle. */ retval = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof on); if (retval) { VLOG_ERR("%s: setsockopt(TCP_NODELAY): %s", name, ovs_strerror(errno)); retval = errno; goto error; } /* Create and configure OpenSSL stream. */ ssl = SSL_new(ctx); if (ssl == NULL) { VLOG_ERR("SSL_new: %s", ERR_error_string(ERR_get_error(), NULL)); retval = ENOPROTOOPT; goto error; } if (SSL_set_fd(ssl, fd) == 0) { VLOG_ERR("SSL_set_fd: %s", ERR_error_string(ERR_get_error(), NULL)); retval = ENOPROTOOPT; goto error; } if (!verify_peer_cert || (bootstrap_ca_cert && type == CLIENT)) { SSL_set_verify(ssl, SSL_VERIFY_NONE, NULL); } /* Create and return the ssl_stream. */ sslv = xmalloc(sizeof *sslv); stream_init(&sslv->stream, &ssl_stream_class, EAGAIN, name); stream_set_remote_ip(&sslv->stream, remote->sin_addr.s_addr); stream_set_remote_port(&sslv->stream, remote->sin_port); stream_set_local_ip(&sslv->stream, local.sin_addr.s_addr); stream_set_local_port(&sslv->stream, local.sin_port); sslv->state = state; sslv->type = type; sslv->fd = fd; sslv->ssl = ssl; sslv->txbuf = NULL; sslv->rx_want = sslv->tx_want = SSL_NOTHING; sslv->session_nr = next_session_nr++; sslv->n_head = 0; if (VLOG_IS_DBG_ENABLED()) { SSL_set_msg_callback(ssl, ssl_protocol_cb); SSL_set_msg_callback_arg(ssl, sslv); } *streamp = &sslv->stream; return 0; error: if (ssl) { SSL_free(ssl); } close(fd); return retval; } static struct ssl_stream * ssl_stream_cast(struct stream *stream) { stream_assert_class(stream, &ssl_stream_class); return CONTAINER_OF(stream, struct ssl_stream, stream); } static int ssl_open(const char *name, char *suffix, struct stream **streamp, uint8_t dscp) { struct sockaddr_in sin; int error, fd; error = ssl_init(); if (error) { return error; } error = inet_open_active(SOCK_STREAM, suffix, OFP_SSL_PORT, &sin, &fd, dscp); if (fd >= 0) { int state = error ? STATE_TCP_CONNECTING : STATE_SSL_CONNECTING; return new_ssl_stream(name, fd, CLIENT, state, &sin, streamp); } else { VLOG_ERR("%s: connect: %s", name, ovs_strerror(error)); return error; } } static int do_ca_cert_bootstrap(struct stream *stream) { struct ssl_stream *sslv = ssl_stream_cast(stream); STACK_OF(X509) *chain; X509 *cert; FILE *file; int error; int fd; chain = SSL_get_peer_cert_chain(sslv->ssl); if (!chain || !sk_X509_num(chain)) { VLOG_ERR("could not bootstrap CA cert: no certificate presented by " "peer"); return EPROTO; } cert = sk_X509_value(chain, sk_X509_num(chain) - 1); /* Check that 'cert' is self-signed. Otherwise it is not a CA * certificate and we should not attempt to use it as one. */ error = X509_check_issued(cert, cert); if (error) { VLOG_ERR("could not bootstrap CA cert: obtained certificate is " "not self-signed (%s)", X509_verify_cert_error_string(error)); if (sk_X509_num(chain) < 2) { VLOG_ERR("only one certificate was received, so probably the peer " "is not configured to send its CA certificate"); } return EPROTO; } fd = open(ca_cert.file_name, O_CREAT | O_EXCL | O_WRONLY, 0444); if (fd < 0) { if (errno == EEXIST) { VLOG_INFO_RL(&rl, "reading CA cert %s created by another process", ca_cert.file_name); stream_ssl_set_ca_cert_file__(ca_cert.file_name, true, true); return EPROTO; } else { VLOG_ERR("could not bootstrap CA cert: creating %s failed: %s", ca_cert.file_name, ovs_strerror(errno)); return errno; } } file = fdopen(fd, "w"); if (!file) { error = errno; VLOG_ERR("could not bootstrap CA cert: fdopen failed: %s", ovs_strerror(error)); unlink(ca_cert.file_name); return error; } if (!PEM_write_X509(file, cert)) { VLOG_ERR("could not bootstrap CA cert: PEM_write_X509 to %s failed: " "%s", ca_cert.file_name, ERR_error_string(ERR_get_error(), NULL)); fclose(file); unlink(ca_cert.file_name); return EIO; } if (fclose(file)) { error = errno; VLOG_ERR("could not bootstrap CA cert: writing %s failed: %s", ca_cert.file_name, ovs_strerror(error)); unlink(ca_cert.file_name); return error; } VLOG_INFO("successfully bootstrapped CA cert to %s", ca_cert.file_name); log_ca_cert(ca_cert.file_name, cert); bootstrap_ca_cert = false; ca_cert.read = true; /* SSL_CTX_add_client_CA makes a copy of cert's relevant data. */ SSL_CTX_add_client_CA(ctx, cert); /* SSL_CTX_use_certificate() takes ownership of the certificate passed in. * 'cert' is owned by sslv->ssl, so we need to duplicate it. */ cert = X509_dup(cert); if (!cert) { out_of_memory(); } SSL_CTX_set_cert_store(ctx, X509_STORE_new()); if (SSL_CTX_load_verify_locations(ctx, ca_cert.file_name, NULL) != 1) { VLOG_ERR("SSL_CTX_load_verify_locations: %s", ERR_error_string(ERR_get_error(), NULL)); return EPROTO; } VLOG_INFO("killing successful connection to retry using CA cert"); return EPROTO; } static int ssl_connect(struct stream *stream) { struct ssl_stream *sslv = ssl_stream_cast(stream); int retval; switch (sslv->state) { case STATE_TCP_CONNECTING: retval = check_connection_completion(sslv->fd); if (retval) { return retval; } sslv->state = STATE_SSL_CONNECTING; /* Fall through. */ case STATE_SSL_CONNECTING: /* Capture the first few bytes of received data so that we can guess * what kind of funny data we've been sent if SSL negotiation fails. */ if (sslv->n_head <= 0) { sslv->n_head = recv(sslv->fd, sslv->head, sizeof sslv->head, MSG_PEEK); } retval = (sslv->type == CLIENT ? SSL_connect(sslv->ssl) : SSL_accept(sslv->ssl)); if (retval != 1) { int error = SSL_get_error(sslv->ssl, retval); if (retval < 0 && ssl_wants_io(error)) { return EAGAIN; } else { int unused; interpret_ssl_error((sslv->type == CLIENT ? "SSL_connect" : "SSL_accept"), retval, error, &unused); shutdown(sslv->fd, SHUT_RDWR); stream_report_content(sslv->head, sslv->n_head, STREAM_SSL, THIS_MODULE, stream_get_name(stream)); return EPROTO; } } else if (bootstrap_ca_cert) { return do_ca_cert_bootstrap(stream); } else if (verify_peer_cert && ((SSL_get_verify_mode(sslv->ssl) & (SSL_VERIFY_NONE | SSL_VERIFY_PEER)) != SSL_VERIFY_PEER)) { /* Two or more SSL connections completed at the same time while we * were in bootstrap mode. Only one of these can finish the * bootstrap successfully. The other one(s) must be rejected * because they were not verified against the bootstrapped CA * certificate. (Alternatively we could verify them against the CA * certificate, but that's more trouble than it's worth. These * connections will succeed the next time they retry, assuming that * they have a certificate against the correct CA.) */ VLOG_INFO("rejecting SSL connection during bootstrap race window"); return EPROTO; } else { return 0; } } NOT_REACHED(); } static void ssl_close(struct stream *stream) { struct ssl_stream *sslv = ssl_stream_cast(stream); ssl_clear_txbuf(sslv); /* Attempt clean shutdown of the SSL connection. This will work most of * the time, as long as the kernel send buffer has some free space and the * SSL connection isn't renegotiating, etc. That has to be good enough, * since we don't have any way to continue the close operation in the * background. */ SSL_shutdown(sslv->ssl); /* SSL_shutdown() might have signaled an error, in which case we need to * flush it out of the OpenSSL error queue or the next OpenSSL operation * will falsely signal an error. */ ERR_clear_error(); SSL_free(sslv->ssl); close(sslv->fd); free(sslv); } static void interpret_queued_ssl_error(const char *function) { int queued_error = ERR_get_error(); if (queued_error != 0) { VLOG_WARN_RL(&rl, "%s: %s", function, ERR_error_string(queued_error, NULL)); } else { VLOG_ERR_RL(&rl, "%s: SSL_ERROR_SSL without queued error", function); } } static int interpret_ssl_error(const char *function, int ret, int error, int *want) { *want = SSL_NOTHING; switch (error) { case SSL_ERROR_NONE: VLOG_ERR_RL(&rl, "%s: unexpected SSL_ERROR_NONE", function); break; case SSL_ERROR_ZERO_RETURN: VLOG_ERR_RL(&rl, "%s: unexpected SSL_ERROR_ZERO_RETURN", function); break; case SSL_ERROR_WANT_READ: *want = SSL_READING; return EAGAIN; case SSL_ERROR_WANT_WRITE: *want = SSL_WRITING; return EAGAIN; case SSL_ERROR_WANT_CONNECT: VLOG_ERR_RL(&rl, "%s: unexpected SSL_ERROR_WANT_CONNECT", function); break; case SSL_ERROR_WANT_ACCEPT: VLOG_ERR_RL(&rl, "%s: unexpected SSL_ERROR_WANT_ACCEPT", function); break; case SSL_ERROR_WANT_X509_LOOKUP: VLOG_ERR_RL(&rl, "%s: unexpected SSL_ERROR_WANT_X509_LOOKUP", function); break; case SSL_ERROR_SYSCALL: { int queued_error = ERR_get_error(); if (queued_error == 0) { if (ret < 0) { int status = errno; VLOG_WARN_RL(&rl, "%s: system error (%s)", function, ovs_strerror(status)); return status; } else { VLOG_WARN_RL(&rl, "%s: unexpected SSL connection close", function); return EPROTO; } } else { VLOG_WARN_RL(&rl, "%s: %s", function, ERR_error_string(queued_error, NULL)); break; } } case SSL_ERROR_SSL: interpret_queued_ssl_error(function); break; default: VLOG_ERR_RL(&rl, "%s: bad SSL error code %d", function, error); break; } return EIO; } static ssize_t ssl_recv(struct stream *stream, void *buffer, size_t n) { struct ssl_stream *sslv = ssl_stream_cast(stream); int old_state; ssize_t ret; /* Behavior of zero-byte SSL_read is poorly defined. */ ovs_assert(n > 0); old_state = SSL_get_state(sslv->ssl); ret = SSL_read(sslv->ssl, buffer, n); if (old_state != SSL_get_state(sslv->ssl)) { sslv->tx_want = SSL_NOTHING; } sslv->rx_want = SSL_NOTHING; if (ret > 0) { return ret; } else { int error = SSL_get_error(sslv->ssl, ret); if (error == SSL_ERROR_ZERO_RETURN) { return 0; } else { return -interpret_ssl_error("SSL_read", ret, error, &sslv->rx_want); } } } static void ssl_clear_txbuf(struct ssl_stream *sslv) { ofpbuf_delete(sslv->txbuf); sslv->txbuf = NULL; } static int ssl_do_tx(struct stream *stream) { struct ssl_stream *sslv = ssl_stream_cast(stream); for (;;) { int old_state = SSL_get_state(sslv->ssl); int ret = SSL_write(sslv->ssl, sslv->txbuf->data, sslv->txbuf->size); if (old_state != SSL_get_state(sslv->ssl)) { sslv->rx_want = SSL_NOTHING; } sslv->tx_want = SSL_NOTHING; if (ret > 0) { ofpbuf_pull(sslv->txbuf, ret); if (sslv->txbuf->size == 0) { return 0; } } else { int ssl_error = SSL_get_error(sslv->ssl, ret); if (ssl_error == SSL_ERROR_ZERO_RETURN) { VLOG_WARN_RL(&rl, "SSL_write: connection closed"); return EPIPE; } else { return interpret_ssl_error("SSL_write", ret, ssl_error, &sslv->tx_want); } } } } static ssize_t ssl_send(struct stream *stream, const void *buffer, size_t n) { struct ssl_stream *sslv = ssl_stream_cast(stream); if (sslv->txbuf) { return -EAGAIN; } else { int error; sslv->txbuf = ofpbuf_clone_data(buffer, n); error = ssl_do_tx(stream); switch (error) { case 0: ssl_clear_txbuf(sslv); return n; case EAGAIN: return n; default: sslv->txbuf = NULL; return -error; } } } static void ssl_run(struct stream *stream) { struct ssl_stream *sslv = ssl_stream_cast(stream); if (sslv->txbuf && ssl_do_tx(stream) != EAGAIN) { ssl_clear_txbuf(sslv); } } static void ssl_run_wait(struct stream *stream) { struct ssl_stream *sslv = ssl_stream_cast(stream); if (sslv->tx_want != SSL_NOTHING) { poll_fd_wait(sslv->fd, want_to_poll_events(sslv->tx_want)); } } static void ssl_wait(struct stream *stream, enum stream_wait_type wait) { struct ssl_stream *sslv = ssl_stream_cast(stream); switch (wait) { case STREAM_CONNECT: if (stream_connect(stream) != EAGAIN) { poll_immediate_wake(); } else { switch (sslv->state) { case STATE_TCP_CONNECTING: poll_fd_wait(sslv->fd, POLLOUT); break; case STATE_SSL_CONNECTING: /* ssl_connect() called SSL_accept() or SSL_connect(), which * set up the status that we test here. */ poll_fd_wait(sslv->fd, want_to_poll_events(SSL_want(sslv->ssl))); break; default: NOT_REACHED(); } } break; case STREAM_RECV: if (sslv->rx_want != SSL_NOTHING) { poll_fd_wait(sslv->fd, want_to_poll_events(sslv->rx_want)); } else { poll_immediate_wake(); } break; case STREAM_SEND: if (!sslv->txbuf) { /* We have room in our tx queue. */ poll_immediate_wake(); } else { /* stream_run_wait() will do the right thing; don't bother with * redundancy. */ } break; default: NOT_REACHED(); } } const struct stream_class ssl_stream_class = { "ssl", /* name */ true, /* needs_probes */ ssl_open, /* open */ ssl_close, /* close */ ssl_connect, /* connect */ ssl_recv, /* recv */ ssl_send, /* send */ ssl_run, /* run */ ssl_run_wait, /* run_wait */ ssl_wait, /* wait */ }; /* Passive SSL. */ struct pssl_pstream { struct pstream pstream; int fd; }; const struct pstream_class pssl_pstream_class; static struct pssl_pstream * pssl_pstream_cast(struct pstream *pstream) { pstream_assert_class(pstream, &pssl_pstream_class); return CONTAINER_OF(pstream, struct pssl_pstream, pstream); } static int pssl_open(const char *name OVS_UNUSED, char *suffix, struct pstream **pstreamp, uint8_t dscp) { struct pssl_pstream *pssl; struct sockaddr_in sin; char bound_name[128]; int retval; int fd; retval = ssl_init(); if (retval) { return retval; } fd = inet_open_passive(SOCK_STREAM, suffix, OFP_SSL_PORT, &sin, dscp); if (fd < 0) { return -fd; } sprintf(bound_name, "pssl:%"PRIu16":"IP_FMT, ntohs(sin.sin_port), IP_ARGS(sin.sin_addr.s_addr)); pssl = xmalloc(sizeof *pssl); pstream_init(&pssl->pstream, &pssl_pstream_class, bound_name); pstream_set_bound_port(&pssl->pstream, sin.sin_port); pssl->fd = fd; *pstreamp = &pssl->pstream; return 0; } static void pssl_close(struct pstream *pstream) { struct pssl_pstream *pssl = pssl_pstream_cast(pstream); close(pssl->fd); free(pssl); } static int pssl_accept(struct pstream *pstream, struct stream **new_streamp) { struct pssl_pstream *pssl = pssl_pstream_cast(pstream); struct sockaddr_in sin; socklen_t sin_len = sizeof sin; char name[128]; int new_fd; int error; new_fd = accept(pssl->fd, (struct sockaddr *) &sin, &sin_len); if (new_fd < 0) { error = errno; if (error != EAGAIN) { VLOG_DBG_RL(&rl, "accept: %s", ovs_strerror(error)); } return error; } error = set_nonblocking(new_fd); if (error) { close(new_fd); return error; } sprintf(name, "ssl:"IP_FMT, IP_ARGS(sin.sin_addr.s_addr)); if (sin.sin_port != htons(OFP_SSL_PORT)) { sprintf(strchr(name, '\0'), ":%"PRIu16, ntohs(sin.sin_port)); } return new_ssl_stream(name, new_fd, SERVER, STATE_SSL_CONNECTING, &sin, new_streamp); } static void pssl_wait(struct pstream *pstream) { struct pssl_pstream *pssl = pssl_pstream_cast(pstream); poll_fd_wait(pssl->fd, POLLIN); } static int pssl_set_dscp(struct pstream *pstream, uint8_t dscp) { struct pssl_pstream *pssl = pssl_pstream_cast(pstream); return set_dscp(pssl->fd, dscp); } const struct pstream_class pssl_pstream_class = { "pssl", true, pssl_open, pssl_close, pssl_accept, pssl_wait, pssl_set_dscp, }; /* * Returns true if OpenSSL error is WANT_READ or WANT_WRITE, indicating that * OpenSSL is requesting that we call it back when the socket is ready for read * or writing, respectively. */ static bool ssl_wants_io(int ssl_error) { return (ssl_error == SSL_ERROR_WANT_WRITE || ssl_error == SSL_ERROR_WANT_READ); } static int ssl_init(void) { static int init_status = -1; if (init_status < 0) { init_status = do_ssl_init(); ovs_assert(init_status >= 0); } return init_status; } static int do_ssl_init(void) { SSL_METHOD *method; SSL_library_init(); SSL_load_error_strings(); if (!RAND_status()) { /* We occasionally see OpenSSL fail to seed its random number generator * in heavily loaded hypervisors. I suspect the following scenario: * * 1. OpenSSL calls read() to get 32 bytes from /dev/urandom. * 2. The kernel generates 10 bytes of randomness and copies it out. * 3. A signal arrives (perhaps SIGALRM). * 4. The kernel interrupts the system call to service the signal. * 5. Userspace gets 10 bytes of entropy. * 6. OpenSSL doesn't read again to get the final 22 bytes. Therefore * OpenSSL doesn't have enough entropy to consider itself * initialized. * * The only part I'm not entirely sure about is #6, because the OpenSSL * code is so hard to read. */ uint8_t seed[32]; int retval; VLOG_WARN("OpenSSL random seeding failed, reseeding ourselves"); retval = get_entropy(seed, sizeof seed); if (retval) { VLOG_ERR("failed to obtain entropy (%s)", ovs_retval_to_string(retval)); return retval > 0 ? retval : ENOPROTOOPT; } RAND_seed(seed, sizeof seed); } /* New OpenSSL changed TLSv1_method() to return a "const" pointer, so the * cast is needed to avoid a warning with those newer versions. */ method = CONST_CAST(SSL_METHOD *, TLSv1_method()); if (method == NULL) { VLOG_ERR("TLSv1_method: %s", ERR_error_string(ERR_get_error(), NULL)); return ENOPROTOOPT; } ctx = SSL_CTX_new(method); if (ctx == NULL) { VLOG_ERR("SSL_CTX_new: %s", ERR_error_string(ERR_get_error(), NULL)); return ENOPROTOOPT; } SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3); SSL_CTX_set_tmp_dh_callback(ctx, tmp_dh_callback); SSL_CTX_set_mode(ctx, SSL_MODE_ENABLE_PARTIAL_WRITE); SSL_CTX_set_mode(ctx, SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER); SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER | SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL); return 0; } static DH * tmp_dh_callback(SSL *ssl OVS_UNUSED, int is_export OVS_UNUSED, int keylength) { struct dh { int keylength; DH *dh; DH *(*constructor)(void); }; static struct dh dh_table[] = { {1024, NULL, get_dh1024}, {2048, NULL, get_dh2048}, {4096, NULL, get_dh4096}, }; struct dh *dh; for (dh = dh_table; dh < &dh_table[ARRAY_SIZE(dh_table)]; dh++) { if (dh->keylength == keylength) { if (!dh->dh) { dh->dh = dh->constructor(); if (!dh->dh) { out_of_memory(); } } return dh->dh; } } VLOG_ERR_RL(&rl, "no Diffie-Hellman parameters for key length %d", keylength); return NULL; } /* Returns true if SSL is at least partially configured. */ bool stream_ssl_is_configured(void) { return private_key.file_name || certificate.file_name || ca_cert.file_name; } static bool update_ssl_config(struct ssl_config_file *config, const char *file_name) { struct timespec mtime; int error; if (ssl_init() || !file_name) { return false; } /* If the file name hasn't changed and neither has the file contents, stop * here. */ error = get_mtime(file_name, &mtime); if (error && error != ENOENT) { VLOG_ERR_RL(&rl, "%s: stat failed (%s)", file_name, ovs_strerror(error)); } if (config->file_name && !strcmp(config->file_name, file_name) && mtime.tv_sec == config->mtime.tv_sec && mtime.tv_nsec == config->mtime.tv_nsec) { return false; } /* Update 'config'. */ config->mtime = mtime; if (file_name != config->file_name) { free(config->file_name); config->file_name = xstrdup(file_name); } return true; } static void stream_ssl_set_private_key_file__(const char *file_name) { if (SSL_CTX_use_PrivateKey_file(ctx, file_name, SSL_FILETYPE_PEM) == 1) { private_key.read = true; } else { VLOG_ERR("SSL_use_PrivateKey_file: %s", ERR_error_string(ERR_get_error(), NULL)); } } void stream_ssl_set_private_key_file(const char *file_name) { if (update_ssl_config(&private_key, file_name)) { stream_ssl_set_private_key_file__(file_name); } } static void stream_ssl_set_certificate_file__(const char *file_name) { if (SSL_CTX_use_certificate_chain_file(ctx, file_name) == 1) { certificate.read = true; } else { VLOG_ERR("SSL_use_certificate_file: %s", ERR_error_string(ERR_get_error(), NULL)); } } void stream_ssl_set_certificate_file(const char *file_name) { if (update_ssl_config(&certificate, file_name)) { stream_ssl_set_certificate_file__(file_name); } } /* Sets the private key and certificate files in one operation. Use this * interface, instead of calling stream_ssl_set_private_key_file() and * stream_ssl_set_certificate_file() individually, in the main loop of a * long-running program whose key and certificate might change at runtime. * * This is important because of OpenSSL's behavior. If an OpenSSL context * already has a certificate, and stream_ssl_set_private_key_file() is called * to install a new private key, OpenSSL will report an error because the new * private key does not match the old certificate. The other order, of setting * a new certificate, then setting a new private key, does work. * * If this were the only problem, calling stream_ssl_set_certificate_file() * before stream_ssl_set_private_key_file() would fix it. But, if the private * key is changed before the certificate (e.g. someone "scp"s or "mv"s the new * private key in place before the certificate), then OpenSSL would reject that * change, and then the change of certificate would succeed, but there would be * no associated private key (because it had only changed once and therefore * there was no point in re-reading it). * * This function avoids both problems by, whenever either the certificate or * the private key file changes, re-reading both of them, in the correct order. */ void stream_ssl_set_key_and_cert(const char *private_key_file, const char *certificate_file) { if (update_ssl_config(&private_key, private_key_file) || update_ssl_config(&certificate, certificate_file)) { stream_ssl_set_certificate_file__(certificate_file); stream_ssl_set_private_key_file__(private_key_file); } } /* Reads the X509 certificate or certificates in file 'file_name'. On success, * stores the address of the first element in an array of pointers to * certificates in '*certs' and the number of certificates in the array in * '*n_certs', and returns 0. On failure, stores a null pointer in '*certs', 0 * in '*n_certs', and returns a positive errno value. * * The caller is responsible for freeing '*certs'. */ static int read_cert_file(const char *file_name, X509 ***certs, size_t *n_certs) { FILE *file; size_t allocated_certs = 0; *certs = NULL; *n_certs = 0; file = fopen(file_name, "r"); if (!file) { VLOG_ERR("failed to open %s for reading: %s", file_name, ovs_strerror(errno)); return errno; } for (;;) { X509 *certificate; int c; /* Read certificate from file. */ certificate = PEM_read_X509(file, NULL, NULL, NULL); if (!certificate) { size_t i; VLOG_ERR("PEM_read_X509 failed reading %s: %s", file_name, ERR_error_string(ERR_get_error(), NULL)); for (i = 0; i < *n_certs; i++) { X509_free((*certs)[i]); } free(*certs); *certs = NULL; *n_certs = 0; return EIO; } /* Add certificate to array. */ if (*n_certs >= allocated_certs) { *certs = x2nrealloc(*certs, &allocated_certs, sizeof **certs); } (*certs)[(*n_certs)++] = certificate; /* Are there additional certificates in the file? */ do { c = getc(file); } while (isspace(c)); if (c == EOF) { break; } ungetc(c, file); } fclose(file); return 0; } /* Sets 'file_name' as the name of a file containing one or more X509 * certificates to send to the peer. Typical use in OpenFlow is to send the CA * certificate to the peer, which enables a switch to pick up the controller's * CA certificate on its first connection. */ void stream_ssl_set_peer_ca_cert_file(const char *file_name) { X509 **certs; size_t n_certs; size_t i; if (ssl_init()) { return; } if (!read_cert_file(file_name, &certs, &n_certs)) { for (i = 0; i < n_certs; i++) { if (SSL_CTX_add_extra_chain_cert(ctx, certs[i]) != 1) { VLOG_ERR("SSL_CTX_add_extra_chain_cert: %s", ERR_error_string(ERR_get_error(), NULL)); } } free(certs); } } /* Logs fingerprint of CA certificate 'cert' obtained from 'file_name'. */ static void log_ca_cert(const char *file_name, X509 *cert) { unsigned char digest[EVP_MAX_MD_SIZE]; unsigned int n_bytes; struct ds fp; char *subject; ds_init(&fp); if (!X509_digest(cert, EVP_sha1(), digest, &n_bytes)) { ds_put_cstr(&fp, ""); } else { unsigned int i; for (i = 0; i < n_bytes; i++) { if (i) { ds_put_char(&fp, ':'); } ds_put_format(&fp, "%02hhx", digest[i]); } } subject = X509_NAME_oneline(X509_get_subject_name(cert), NULL, 0); VLOG_INFO("Trusting CA cert from %s (%s) (fingerprint %s)", file_name, subject ? subject : "", ds_cstr(&fp)); OPENSSL_free(subject); ds_destroy(&fp); } static void stream_ssl_set_ca_cert_file__(const char *file_name, bool bootstrap, bool force) { X509 **certs; size_t n_certs; struct stat s; if (!update_ssl_config(&ca_cert, file_name) && !force) { return; } if (!strcmp(file_name, "none")) { verify_peer_cert = false; VLOG_WARN("Peer certificate validation disabled " "(this is a security risk)"); } else if (bootstrap && stat(file_name, &s) && errno == ENOENT) { bootstrap_ca_cert = true; } else if (!read_cert_file(file_name, &certs, &n_certs)) { size_t i; /* Set up list of CAs that the server will accept from the client. */ for (i = 0; i < n_certs; i++) { /* SSL_CTX_add_client_CA makes a copy of the relevant data. */ if (SSL_CTX_add_client_CA(ctx, certs[i]) != 1) { VLOG_ERR("failed to add client certificate %zu from %s: %s", i, file_name, ERR_error_string(ERR_get_error(), NULL)); } else { log_ca_cert(file_name, certs[i]); } X509_free(certs[i]); } free(certs); /* Set up CAs for OpenSSL to trust in verifying the peer's * certificate. */ SSL_CTX_set_cert_store(ctx, X509_STORE_new()); if (SSL_CTX_load_verify_locations(ctx, file_name, NULL) != 1) { VLOG_ERR("SSL_CTX_load_verify_locations: %s", ERR_error_string(ERR_get_error(), NULL)); return; } bootstrap_ca_cert = false; } ca_cert.read = true; } /* Sets 'file_name' as the name of the file from which to read the CA * certificate used to verify the peer within SSL connections. If 'bootstrap' * is false, the file must exist. If 'bootstrap' is false, then the file is * read if it is exists; if it does not, then it will be created from the CA * certificate received from the peer on the first SSL connection. */ void stream_ssl_set_ca_cert_file(const char *file_name, bool bootstrap) { stream_ssl_set_ca_cert_file__(file_name, bootstrap, false); } /* SSL protocol logging. */ static const char * ssl_alert_level_to_string(uint8_t type) { switch (type) { case 1: return "warning"; case 2: return "fatal"; default: return ""; } } static const char * ssl_alert_description_to_string(uint8_t type) { switch (type) { case 0: return "close_notify"; case 10: return "unexpected_message"; case 20: return "bad_record_mac"; case 21: return "decryption_failed"; case 22: return "record_overflow"; case 30: return "decompression_failure"; case 40: return "handshake_failure"; case 42: return "bad_certificate"; case 43: return "unsupported_certificate"; case 44: return "certificate_revoked"; case 45: return "certificate_expired"; case 46: return "certificate_unknown"; case 47: return "illegal_parameter"; case 48: return "unknown_ca"; case 49: return "access_denied"; case 50: return "decode_error"; case 51: return "decrypt_error"; case 60: return "export_restriction"; case 70: return "protocol_version"; case 71: return "insufficient_security"; case 80: return "internal_error"; case 90: return "user_canceled"; case 100: return "no_renegotiation"; default: return ""; } } static const char * ssl_handshake_type_to_string(uint8_t type) { switch (type) { case 0: return "hello_request"; case 1: return "client_hello"; case 2: return "server_hello"; case 11: return "certificate"; case 12: return "server_key_exchange"; case 13: return "certificate_request"; case 14: return "server_hello_done"; case 15: return "certificate_verify"; case 16: return "client_key_exchange"; case 20: return "finished"; default: return ""; } } static void ssl_protocol_cb(int write_p, int version OVS_UNUSED, int content_type, const void *buf_, size_t len, SSL *ssl OVS_UNUSED, void *sslv_) { const struct ssl_stream *sslv = sslv_; const uint8_t *buf = buf_; struct ds details; if (!VLOG_IS_DBG_ENABLED()) { return; } ds_init(&details); if (content_type == 20) { ds_put_cstr(&details, "change_cipher_spec"); } else if (content_type == 21) { ds_put_format(&details, "alert: %s, %s", ssl_alert_level_to_string(buf[0]), ssl_alert_description_to_string(buf[1])); } else if (content_type == 22) { ds_put_format(&details, "handshake: %s", ssl_handshake_type_to_string(buf[0])); } else { ds_put_format(&details, "type %d", content_type); } VLOG_DBG("%s%u%s%s %s (%zu bytes)", sslv->type == CLIENT ? "client" : "server", sslv->session_nr, write_p ? "-->" : "<--", stream_get_name(&sslv->stream), ds_cstr(&details), len); ds_destroy(&details); } openvswitch-2.0.1+git20140120/lib/stream-ssl.h000066400000000000000000000037371226605124000204750ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef STREAM_SSL_H #define STREAM_SSL_H 1 #include bool stream_ssl_is_configured(void); void stream_ssl_set_private_key_file(const char *file_name); void stream_ssl_set_certificate_file(const char *file_name); void stream_ssl_set_ca_cert_file(const char *file_name, bool bootstrap); void stream_ssl_set_peer_ca_cert_file(const char *file_name); void stream_ssl_set_key_and_cert(const char *private_key_file, const char *certificate_file); #define STREAM_SSL_LONG_OPTIONS \ {"private-key", required_argument, NULL, 'p'}, \ {"certificate", required_argument, NULL, 'c'}, \ {"ca-cert", required_argument, NULL, 'C'} #define STREAM_SSL_OPTION_HANDLERS \ case 'p': \ stream_ssl_set_private_key_file(optarg); \ break; \ \ case 'c': \ stream_ssl_set_certificate_file(optarg); \ break; \ \ case 'C': \ stream_ssl_set_ca_cert_file(optarg, false); \ break; #endif /* stream-ssl.h */ openvswitch-2.0.1+git20140120/lib/stream-tcp.c000066400000000000000000000106071226605124000204470ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "stream.h" #include #include #include #include #include #include #include #include #include #include "packets.h" #include "socket-util.h" #include "util.h" #include "stream-provider.h" #include "stream-fd.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(stream_tcp); /* Active TCP. */ static int new_tcp_stream(const char *name, int fd, int connect_status, const struct sockaddr_in *remote, struct stream **streamp) { struct sockaddr_in local; socklen_t local_len = sizeof local; int on = 1; int retval; /* Get the local IP and port information */ retval = getsockname(fd, (struct sockaddr *)&local, &local_len); if (retval) { memset(&local, 0, sizeof local); } retval = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof on); if (retval) { VLOG_ERR("%s: setsockopt(TCP_NODELAY): %s", name, ovs_strerror(errno)); close(fd); return errno; } retval = new_fd_stream(name, fd, connect_status, streamp); if (!retval) { struct stream *stream = *streamp; stream_set_remote_ip(stream, remote->sin_addr.s_addr); stream_set_remote_port(stream, remote->sin_port); stream_set_local_ip(stream, local.sin_addr.s_addr); stream_set_local_port(stream, local.sin_port); } return retval; } static int tcp_open(const char *name, char *suffix, struct stream **streamp, uint8_t dscp) { struct sockaddr_in sin; int fd, error; error = inet_open_active(SOCK_STREAM, suffix, 0, &sin, &fd, dscp); if (fd >= 0) { return new_tcp_stream(name, fd, error, &sin, streamp); } else { VLOG_ERR("%s: connect: %s", name, ovs_strerror(error)); return error; } } const struct stream_class tcp_stream_class = { "tcp", /* name */ true, /* needs_probes */ tcp_open, /* open */ NULL, /* close */ NULL, /* connect */ NULL, /* recv */ NULL, /* send */ NULL, /* run */ NULL, /* run_wait */ NULL, /* wait */ }; /* Passive TCP. */ static int ptcp_accept(int fd, const struct sockaddr *sa, size_t sa_len, struct stream **streamp); static int ptcp_open(const char *name OVS_UNUSED, char *suffix, struct pstream **pstreamp, uint8_t dscp) { struct sockaddr_in sin; char bound_name[128]; int error; int fd; fd = inet_open_passive(SOCK_STREAM, suffix, -1, &sin, dscp); if (fd < 0) { return -fd; } sprintf(bound_name, "ptcp:%"PRIu16":"IP_FMT, ntohs(sin.sin_port), IP_ARGS(sin.sin_addr.s_addr)); error = new_fd_pstream(bound_name, fd, ptcp_accept, set_dscp, NULL, pstreamp); if (!error) { pstream_set_bound_port(*pstreamp, sin.sin_port); } return error; } static int ptcp_accept(int fd, const struct sockaddr *sa, size_t sa_len, struct stream **streamp) { const struct sockaddr_in *sin = ALIGNED_CAST(const struct sockaddr_in *, sa); char name[128]; if (sa_len == sizeof(struct sockaddr_in) && sin->sin_family == AF_INET) { sprintf(name, "tcp:"IP_FMT, IP_ARGS(sin->sin_addr.s_addr)); sprintf(strchr(name, '\0'), ":%"PRIu16, ntohs(sin->sin_port)); } else { strcpy(name, "tcp"); } return new_tcp_stream(name, fd, 0, sin, streamp); } const struct pstream_class ptcp_pstream_class = { "ptcp", true, ptcp_open, NULL, NULL, NULL, NULL, }; openvswitch-2.0.1+git20140120/lib/stream-unix.c000066400000000000000000000070321226605124000206420ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "stream.h" #include #include #include #include #include #include #include #include #include #include #include "packets.h" #include "poll-loop.h" #include "socket-util.h" #include "dirs.h" #include "util.h" #include "stream-provider.h" #include "stream-fd.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(stream_unix); /* Active UNIX socket. */ static int unix_open(const char *name, char *suffix, struct stream **streamp, uint8_t dscp OVS_UNUSED) { char *connect_path; int fd; connect_path = abs_file_name(ovs_rundir(), suffix); fd = make_unix_socket(SOCK_STREAM, true, NULL, connect_path); if (fd < 0) { VLOG_DBG("%s: connection failed (%s)", connect_path, ovs_strerror(-fd)); free(connect_path); return -fd; } free(connect_path); return new_fd_stream(name, fd, check_connection_completion(fd), streamp); } const struct stream_class unix_stream_class = { "unix", /* name */ false, /* needs_probes */ unix_open, /* open */ NULL, /* close */ NULL, /* connect */ NULL, /* recv */ NULL, /* send */ NULL, /* run */ NULL, /* run_wait */ NULL, /* wait */ }; /* Passive UNIX socket. */ static int punix_accept(int fd, const struct sockaddr *sa, size_t sa_len, struct stream **streamp); static int punix_open(const char *name OVS_UNUSED, char *suffix, struct pstream **pstreamp, uint8_t dscp OVS_UNUSED) { char *bind_path; int fd, error; bind_path = abs_file_name(ovs_rundir(), suffix); fd = make_unix_socket(SOCK_STREAM, true, bind_path, NULL); if (fd < 0) { VLOG_ERR("%s: binding failed: %s", bind_path, ovs_strerror(errno)); free(bind_path); return errno; } if (listen(fd, 10) < 0) { error = errno; VLOG_ERR("%s: listen: %s", name, ovs_strerror(error)); close(fd); free(bind_path); return error; } return new_fd_pstream(name, fd, punix_accept, NULL, bind_path, pstreamp); } static int punix_accept(int fd, const struct sockaddr *sa, size_t sa_len, struct stream **streamp) { const struct sockaddr_un *sun = (const struct sockaddr_un *) sa; int name_len = get_unix_name_len(sa_len); char name[128]; if (name_len > 0) { snprintf(name, sizeof name, "unix:%.*s", name_len, sun->sun_path); } else { strcpy(name, "unix"); } return new_fd_stream(name, fd, 0, streamp); } const struct pstream_class punix_pstream_class = { "punix", false, punix_open, NULL, NULL, NULL, NULL, }; openvswitch-2.0.1+git20140120/lib/stream.c000066400000000000000000000600051226605124000176600ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "stream-provider.h" #include #include #include #include #include #include #include "coverage.h" #include "dynamic-string.h" #include "fatal-signal.h" #include "flow.h" #include "ofp-print.h" #include "ofpbuf.h" #include "openflow/nicira-ext.h" #include "openflow/openflow.h" #include "packets.h" #include "poll-loop.h" #include "random.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(stream); COVERAGE_DEFINE(pstream_open); COVERAGE_DEFINE(stream_open); /* State of an active stream.*/ enum stream_state { SCS_CONNECTING, /* Underlying stream is not connected. */ SCS_CONNECTED, /* Connection established. */ SCS_DISCONNECTED /* Connection failed or connection closed. */ }; static const struct stream_class *stream_classes[] = { &tcp_stream_class, &unix_stream_class, #ifdef HAVE_OPENSSL &ssl_stream_class, #endif }; static const struct pstream_class *pstream_classes[] = { &ptcp_pstream_class, &punix_pstream_class, #ifdef HAVE_OPENSSL &pssl_pstream_class, #endif }; /* Check the validity of the stream class structures. */ static void check_stream_classes(void) { #ifndef NDEBUG size_t i; for (i = 0; i < ARRAY_SIZE(stream_classes); i++) { const struct stream_class *class = stream_classes[i]; ovs_assert(class->name != NULL); ovs_assert(class->open != NULL); if (class->close || class->recv || class->send || class->run || class->run_wait || class->wait) { ovs_assert(class->close != NULL); ovs_assert(class->recv != NULL); ovs_assert(class->send != NULL); ovs_assert(class->wait != NULL); } else { /* This class delegates to another one. */ } } for (i = 0; i < ARRAY_SIZE(pstream_classes); i++) { const struct pstream_class *class = pstream_classes[i]; ovs_assert(class->name != NULL); ovs_assert(class->listen != NULL); if (class->close || class->accept || class->wait) { ovs_assert(class->close != NULL); ovs_assert(class->accept != NULL); ovs_assert(class->wait != NULL); } else { /* This class delegates to another one. */ } } #endif } /* Prints information on active (if 'active') and passive (if 'passive') * connection methods supported by the stream. */ void stream_usage(const char *name, bool active, bool passive, bool bootstrap OVS_UNUSED) { /* Really this should be implemented via callbacks into the stream * providers, but that seems too heavy-weight to bother with at the * moment. */ printf("\n"); if (active) { printf("Active %s connection methods:\n", name); printf(" tcp:IP:PORT " "PORT at remote IP\n"); #ifdef HAVE_OPENSSL printf(" ssl:IP:PORT " "SSL PORT at remote IP\n"); #endif printf(" unix:FILE " "Unix domain socket named FILE\n"); } if (passive) { printf("Passive %s connection methods:\n", name); printf(" ptcp:PORT[:IP] " "listen to TCP PORT on IP\n"); #ifdef HAVE_OPENSSL printf(" pssl:PORT[:IP] " "listen for SSL on PORT on IP\n"); #endif printf(" punix:FILE " "listen on Unix domain socket FILE\n"); } #ifdef HAVE_OPENSSL printf("PKI configuration (required to use SSL):\n" " -p, --private-key=FILE file with private key\n" " -c, --certificate=FILE file with certificate for private key\n" " -C, --ca-cert=FILE file with peer CA certificate\n"); if (bootstrap) { printf(" --bootstrap-ca-cert=FILE file with peer CA certificate " "to read or create\n"); } #endif } /* Given 'name', a stream name in the form "TYPE:ARGS", stores the class * named "TYPE" into '*classp' and returns 0. Returns EAFNOSUPPORT and stores * a null pointer into '*classp' if 'name' is in the wrong form or if no such * class exists. */ static int stream_lookup_class(const char *name, const struct stream_class **classp) { size_t prefix_len; size_t i; check_stream_classes(); *classp = NULL; prefix_len = strcspn(name, ":"); if (name[prefix_len] == '\0') { return EAFNOSUPPORT; } for (i = 0; i < ARRAY_SIZE(stream_classes); i++) { const struct stream_class *class = stream_classes[i]; if (strlen(class->name) == prefix_len && !memcmp(class->name, name, prefix_len)) { *classp = class; return 0; } } return EAFNOSUPPORT; } /* Returns 0 if 'name' is a stream name in the form "TYPE:ARGS" and TYPE is * a supported stream type, otherwise EAFNOSUPPORT. */ int stream_verify_name(const char *name) { const struct stream_class *class; return stream_lookup_class(name, &class); } /* Attempts to connect a stream to a remote peer. 'name' is a connection name * in the form "TYPE:ARGS", where TYPE is an active stream class's name and * ARGS are stream class-specific. * * Returns 0 if successful, otherwise a positive errno value. If successful, * stores a pointer to the new connection in '*streamp', otherwise a null * pointer. */ int stream_open(const char *name, struct stream **streamp, uint8_t dscp) { const struct stream_class *class; struct stream *stream; char *suffix_copy; int error; COVERAGE_INC(stream_open); /* Look up the class. */ error = stream_lookup_class(name, &class); if (!class) { goto error; } /* Call class's "open" function. */ suffix_copy = xstrdup(strchr(name, ':') + 1); error = class->open(name, suffix_copy, &stream, dscp); free(suffix_copy); if (error) { goto error; } /* Success. */ *streamp = stream; return 0; error: *streamp = NULL; return error; } /* Blocks until a previously started stream connection attempt succeeds or * fails. 'error' should be the value returned by stream_open() and 'streamp' * should point to the stream pointer set by stream_open(). Returns 0 if * successful, otherwise a positive errno value other than EAGAIN or * EINPROGRESS. If successful, leaves '*streamp' untouched; on error, closes * '*streamp' and sets '*streamp' to null. * * Typical usage: * error = stream_open_block(stream_open("tcp:1.2.3.4:5", &stream), &stream); */ int stream_open_block(int error, struct stream **streamp) { struct stream *stream = *streamp; fatal_signal_run(); if (!error) { while ((error = stream_connect(stream)) == EAGAIN) { stream_run(stream); stream_run_wait(stream); stream_connect_wait(stream); poll_block(); } ovs_assert(error != EINPROGRESS); } if (error) { stream_close(stream); *streamp = NULL; } else { *streamp = stream; } return error; } /* Closes 'stream'. */ void stream_close(struct stream *stream) { if (stream != NULL) { char *name = stream->name; (stream->class->close)(stream); free(name); } } /* Returns the name of 'stream', that is, the string passed to * stream_open(). */ const char * stream_get_name(const struct stream *stream) { return stream ? stream->name : "(null)"; } /* Returns the IP address of the peer, or 0 if the peer is not connected over * an IP-based protocol or if its IP address is not yet known. */ ovs_be32 stream_get_remote_ip(const struct stream *stream) { return stream->remote_ip; } /* Returns the transport port of the peer, or 0 if the connection does not * contain a port or if the port is not yet known. */ ovs_be16 stream_get_remote_port(const struct stream *stream) { return stream->remote_port; } /* Returns the IP address used to connect to the peer, or 0 if the connection * is not an IP-based protocol or if its IP address is not yet known. */ ovs_be32 stream_get_local_ip(const struct stream *stream) { return stream->local_ip; } /* Returns the transport port used to connect to the peer, or 0 if the * connection does not contain a port or if the port is not yet known. */ ovs_be16 stream_get_local_port(const struct stream *stream) { return stream->local_port; } static void scs_connecting(struct stream *stream) { int retval = (stream->class->connect)(stream); ovs_assert(retval != EINPROGRESS); if (!retval) { stream->state = SCS_CONNECTED; } else if (retval != EAGAIN) { stream->state = SCS_DISCONNECTED; stream->error = retval; } } /* Tries to complete the connection on 'stream'. If 'stream''s connection is * complete, returns 0 if the connection was successful or a positive errno * value if it failed. If the connection is still in progress, returns * EAGAIN. */ int stream_connect(struct stream *stream) { enum stream_state last_state; do { last_state = stream->state; switch (stream->state) { case SCS_CONNECTING: scs_connecting(stream); break; case SCS_CONNECTED: return 0; case SCS_DISCONNECTED: return stream->error; default: NOT_REACHED(); } } while (stream->state != last_state); return EAGAIN; } /* Tries to receive up to 'n' bytes from 'stream' into 'buffer', and returns: * * - If successful, the number of bytes received (between 1 and 'n'). * * - On error, a negative errno value. * * - 0, if the connection has been closed in the normal fashion, or if 'n' * is zero. * * The recv function will not block waiting for a packet to arrive. If no * data have been received, it returns -EAGAIN immediately. */ int stream_recv(struct stream *stream, void *buffer, size_t n) { int retval = stream_connect(stream); return (retval ? -retval : n == 0 ? 0 : (stream->class->recv)(stream, buffer, n)); } /* Tries to send up to 'n' bytes of 'buffer' on 'stream', and returns: * * - If successful, the number of bytes sent (between 1 and 'n'). 0 is * only a valid return value if 'n' is 0. * * - On error, a negative errno value. * * The send function will not block. If no bytes can be immediately accepted * for transmission, it returns -EAGAIN immediately. */ int stream_send(struct stream *stream, const void *buffer, size_t n) { int retval = stream_connect(stream); return (retval ? -retval : n == 0 ? 0 : (stream->class->send)(stream, buffer, n)); } /* Allows 'stream' to perform maintenance activities, such as flushing * output buffers. */ void stream_run(struct stream *stream) { if (stream->class->run) { (stream->class->run)(stream); } } /* Arranges for the poll loop to wake up when 'stream' needs to perform * maintenance activities. */ void stream_run_wait(struct stream *stream) { if (stream->class->run_wait) { (stream->class->run_wait)(stream); } } /* Arranges for the poll loop to wake up when 'stream' is ready to take an * action of the given 'type'. */ void stream_wait(struct stream *stream, enum stream_wait_type wait) { ovs_assert(wait == STREAM_CONNECT || wait == STREAM_RECV || wait == STREAM_SEND); switch (stream->state) { case SCS_CONNECTING: wait = STREAM_CONNECT; break; case SCS_DISCONNECTED: poll_immediate_wake(); return; } (stream->class->wait)(stream, wait); } void stream_connect_wait(struct stream *stream) { stream_wait(stream, STREAM_CONNECT); } void stream_recv_wait(struct stream *stream) { stream_wait(stream, STREAM_RECV); } void stream_send_wait(struct stream *stream) { stream_wait(stream, STREAM_SEND); } /* Given 'name', a pstream name in the form "TYPE:ARGS", stores the class * named "TYPE" into '*classp' and returns 0. Returns EAFNOSUPPORT and stores * a null pointer into '*classp' if 'name' is in the wrong form or if no such * class exists. */ static int pstream_lookup_class(const char *name, const struct pstream_class **classp) { size_t prefix_len; size_t i; check_stream_classes(); *classp = NULL; prefix_len = strcspn(name, ":"); if (name[prefix_len] == '\0') { return EAFNOSUPPORT; } for (i = 0; i < ARRAY_SIZE(pstream_classes); i++) { const struct pstream_class *class = pstream_classes[i]; if (strlen(class->name) == prefix_len && !memcmp(class->name, name, prefix_len)) { *classp = class; return 0; } } return EAFNOSUPPORT; } /* Returns 0 if 'name' is a pstream name in the form "TYPE:ARGS" and TYPE is * a supported pstream type, otherwise EAFNOSUPPORT. */ int pstream_verify_name(const char *name) { const struct pstream_class *class; return pstream_lookup_class(name, &class); } /* Returns 1 if the stream or pstream specified by 'name' needs periodic probes * to verify connectivity. For [p]streams which need probes, it can take a * long time to notice the connection has been dropped. Returns 0 if the * stream or pstream does not need probes, and -1 if 'name' is not valid. */ int stream_or_pstream_needs_probes(const char *name) { const struct pstream_class *pclass; const struct stream_class *class; if (!stream_lookup_class(name, &class)) { return class->needs_probes; } else if (!pstream_lookup_class(name, &pclass)) { return pclass->needs_probes; } else { return -1; } } /* Attempts to start listening for remote stream connections. 'name' is a * connection name in the form "TYPE:ARGS", where TYPE is an passive stream * class's name and ARGS are stream class-specific. * * Returns 0 if successful, otherwise a positive errno value. If successful, * stores a pointer to the new connection in '*pstreamp', otherwise a null * pointer. */ int pstream_open(const char *name, struct pstream **pstreamp, uint8_t dscp) { const struct pstream_class *class; struct pstream *pstream; char *suffix_copy; int error; COVERAGE_INC(pstream_open); /* Look up the class. */ error = pstream_lookup_class(name, &class); if (!class) { goto error; } /* Call class's "open" function. */ suffix_copy = xstrdup(strchr(name, ':') + 1); error = class->listen(name, suffix_copy, &pstream, dscp); free(suffix_copy); if (error) { goto error; } /* Success. */ *pstreamp = pstream; return 0; error: *pstreamp = NULL; return error; } /* Returns the name that was used to open 'pstream'. The caller must not * modify or free the name. */ const char * pstream_get_name(const struct pstream *pstream) { return pstream->name; } /* Closes 'pstream'. */ void pstream_close(struct pstream *pstream) { if (pstream != NULL) { char *name = pstream->name; (pstream->class->close)(pstream); free(name); } } /* Tries to accept a new connection on 'pstream'. If successful, stores the * new connection in '*new_stream' and returns 0. Otherwise, returns a * positive errno value. * * pstream_accept() will not block waiting for a connection. If no connection * is ready to be accepted, it returns EAGAIN immediately. */ int pstream_accept(struct pstream *pstream, struct stream **new_stream) { int retval = (pstream->class->accept)(pstream, new_stream); if (retval) { *new_stream = NULL; } else { ovs_assert((*new_stream)->state != SCS_CONNECTING || (*new_stream)->class->connect); } return retval; } /* Tries to accept a new connection on 'pstream'. If successful, stores the * new connection in '*new_stream' and returns 0. Otherwise, returns a * positive errno value. * * pstream_accept_block() blocks until a connection is ready or until an error * occurs. It will not return EAGAIN. */ int pstream_accept_block(struct pstream *pstream, struct stream **new_stream) { int error; fatal_signal_run(); while ((error = pstream_accept(pstream, new_stream)) == EAGAIN) { pstream_wait(pstream); poll_block(); } if (error) { *new_stream = NULL; } return error; } void pstream_wait(struct pstream *pstream) { (pstream->class->wait)(pstream); } int pstream_set_dscp(struct pstream *pstream, uint8_t dscp) { if (pstream->class->set_dscp) { return pstream->class->set_dscp(pstream, dscp); } return 0; } /* Returns the transport port on which 'pstream' is listening, or 0 if the * concept doesn't apply. */ ovs_be16 pstream_get_bound_port(const struct pstream *pstream) { return pstream->bound_port; } /* Initializes 'stream' as a new stream named 'name', implemented via 'class'. * The initial connection status, supplied as 'connect_status', is interpreted * as follows: * * - 0: 'stream' is connected. Its 'send' and 'recv' functions may be * called in the normal fashion. * * - EAGAIN: 'stream' is trying to complete a connection. Its 'connect' * function should be called to complete the connection. * * - Other positive errno values indicate that the connection failed with * the specified error. * * After calling this function, stream_close() must be used to destroy * 'stream', otherwise resources will be leaked. * * The caller retains ownership of 'name'. */ void stream_init(struct stream *stream, const struct stream_class *class, int connect_status, const char *name) { memset(stream, 0, sizeof *stream); stream->class = class; stream->state = (connect_status == EAGAIN ? SCS_CONNECTING : !connect_status ? SCS_CONNECTED : SCS_DISCONNECTED); stream->error = connect_status; stream->name = xstrdup(name); ovs_assert(stream->state != SCS_CONNECTING || class->connect); } void stream_set_remote_ip(struct stream *stream, ovs_be32 ip) { stream->remote_ip = ip; } void stream_set_remote_port(struct stream *stream, ovs_be16 port) { stream->remote_port = port; } void stream_set_local_ip(struct stream *stream, ovs_be32 ip) { stream->local_ip = ip; } void stream_set_local_port(struct stream *stream, ovs_be16 port) { stream->local_port = port; } void pstream_init(struct pstream *pstream, const struct pstream_class *class, const char *name) { memset(pstream, 0, sizeof *pstream); pstream->class = class; pstream->name = xstrdup(name); } void pstream_set_bound_port(struct pstream *pstream, ovs_be16 port) { pstream->bound_port = port; } static int count_fields(const char *s_) { char *s, *field, *save_ptr; int n = 0; save_ptr = NULL; s = xstrdup(s_); for (field = strtok_r(s, ":", &save_ptr); field != NULL; field = strtok_r(NULL, ":", &save_ptr)) { n++; } free(s); return n; } /* Like stream_open(), but for tcp streams the port defaults to * 'default_tcp_port' if no port number is given and for SSL streams the port * defaults to 'default_ssl_port' if no port number is given. */ int stream_open_with_default_ports(const char *name_, uint16_t default_tcp_port, uint16_t default_ssl_port, struct stream **streamp, uint8_t dscp) { char *name; int error; if (!strncmp(name_, "tcp:", 4) && count_fields(name_) < 3) { name = xasprintf("%s:%d", name_, default_tcp_port); } else if (!strncmp(name_, "ssl:", 4) && count_fields(name_) < 3) { name = xasprintf("%s:%d", name_, default_ssl_port); } else { name = xstrdup(name_); } error = stream_open(name, streamp, dscp); free(name); return error; } /* Like pstream_open(), but for ptcp streams the port defaults to * 'default_ptcp_port' if no port number is given and for passive SSL streams * the port defaults to 'default_pssl_port' if no port number is given. */ int pstream_open_with_default_ports(const char *name_, uint16_t default_ptcp_port, uint16_t default_pssl_port, struct pstream **pstreamp, uint8_t dscp) { char *name; int error; if (!strncmp(name_, "ptcp:", 5) && count_fields(name_) < 2) { name = xasprintf("%s%d", name_, default_ptcp_port); } else if (!strncmp(name_, "pssl:", 5) && count_fields(name_) < 2) { name = xasprintf("%s%d", name_, default_pssl_port); } else { name = xstrdup(name_); } error = pstream_open(name, pstreamp, dscp); free(name); return error; } /* * This function extracts IP address and port from the target string. * * - On success, function returns true and fills *sin structure with port * and IP address. If port was absent in target string then it will use * corresponding default port value. * - On error, function returns false and *sin contains garbage. */ bool stream_parse_target_with_default_ports(const char *target, uint16_t default_tcp_port, uint16_t default_ssl_port, struct sockaddr_in *sin) { return (!strncmp(target, "tcp:", 4) && inet_parse_active(target + 4, default_tcp_port, sin)) || (!strncmp(target, "ssl:", 4) && inet_parse_active(target + 4, default_ssl_port, sin)); } /* Attempts to guess the content type of a stream whose first few bytes were * the 'size' bytes of 'data'. */ static enum stream_content_type stream_guess_content(const uint8_t *data, ssize_t size) { if (size >= 2) { #define PAIR(A, B) (((A) << 8) | (B)) switch (PAIR(data[0], data[1])) { case PAIR(0x16, 0x03): /* Handshake, version 3. */ return STREAM_SSL; case PAIR('{', '"'): return STREAM_JSONRPC; case PAIR(OFP10_VERSION, 0 /* OFPT_HELLO */): return STREAM_OPENFLOW; } } return STREAM_UNKNOWN; } /* Returns a string represenation of 'type'. */ static const char * stream_content_type_to_string(enum stream_content_type type) { switch (type) { case STREAM_UNKNOWN: default: return "unknown"; case STREAM_JSONRPC: return "JSON-RPC"; case STREAM_OPENFLOW: return "OpenFlow"; case STREAM_SSL: return "SSL"; } } /* Attempts to guess the content type of a stream whose first few bytes were * the 'size' bytes of 'data'. If this is done successfully, and the guessed * content type is other than 'expected_type', then log a message in vlog * module 'module', naming 'stream_name' as the source, explaining what * content was expected and what was actually received. */ void stream_report_content(const void *data, ssize_t size, enum stream_content_type expected_type, struct vlog_module *module, const char *stream_name) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); enum stream_content_type actual_type; actual_type = stream_guess_content(data, size); if (actual_type != expected_type && actual_type != STREAM_UNKNOWN) { vlog_rate_limit(module, VLL_WARN, &rl, "%s: received %s data on %s channel", stream_name, stream_content_type_to_string(actual_type), stream_content_type_to_string(expected_type)); } } openvswitch-2.0.1+git20140120/lib/stream.h000066400000000000000000000071601226605124000176700ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef STREAM_H #define STREAM_H 1 #include #include #include #include #include "openvswitch/types.h" #include "vlog.h" #include "socket-util.h" struct pstream; struct stream; void stream_usage(const char *name, bool active, bool passive, bool bootstrap); /* Bidirectional byte streams. */ int stream_verify_name(const char *name); int stream_open(const char *name, struct stream **, uint8_t dscp); int stream_open_block(int error, struct stream **); void stream_close(struct stream *); const char *stream_get_name(const struct stream *); ovs_be32 stream_get_remote_ip(const struct stream *); ovs_be16 stream_get_remote_port(const struct stream *); ovs_be32 stream_get_local_ip(const struct stream *); ovs_be16 stream_get_local_port(const struct stream *); int stream_connect(struct stream *); int stream_recv(struct stream *, void *buffer, size_t n); int stream_send(struct stream *, const void *buffer, size_t n); void stream_run(struct stream *); void stream_run_wait(struct stream *); enum stream_wait_type { STREAM_CONNECT, STREAM_RECV, STREAM_SEND }; void stream_wait(struct stream *, enum stream_wait_type); void stream_connect_wait(struct stream *); void stream_recv_wait(struct stream *); void stream_send_wait(struct stream *); /* Passive streams: listeners for incoming stream connections. */ int pstream_verify_name(const char *name); int pstream_open(const char *name, struct pstream **, uint8_t dscp); const char *pstream_get_name(const struct pstream *); void pstream_close(struct pstream *); int pstream_accept(struct pstream *, struct stream **); int pstream_accept_block(struct pstream *, struct stream **); void pstream_wait(struct pstream *); int pstream_set_dscp(struct pstream *, uint8_t dscp); ovs_be16 pstream_get_bound_port(const struct pstream *); /* Convenience functions. */ int stream_open_with_default_ports(const char *name, uint16_t default_tcp_port, uint16_t default_ssl_port, struct stream **, uint8_t dscp); int pstream_open_with_default_ports(const char *name, uint16_t default_ptcp_port, uint16_t default_pssl_port, struct pstream **, uint8_t dscp); bool stream_parse_target_with_default_ports(const char *target, uint16_t default_tcp_port, uint16_t default_ssl_port, struct sockaddr_in *sin); int stream_or_pstream_needs_probes(const char *name); /* Error reporting. */ enum stream_content_type { STREAM_UNKNOWN, STREAM_OPENFLOW, STREAM_SSL, STREAM_JSONRPC }; void stream_report_content(const void *, ssize_t, enum stream_content_type, struct vlog_module *, const char *stream_name); #endif /* stream.h */ openvswitch-2.0.1+git20140120/lib/string.c000066400000000000000000000014511226605124000176730ustar00rootroot00000000000000/* * Copyright (c) 2009, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #ifndef HAVE_STRNLEN size_t strnlen(const char *s, size_t maxlen) { const char *end = memchr(s, '\0', maxlen); return end ? end - s : maxlen; } #endif openvswitch-2.0.1+git20140120/lib/string.h000066400000000000000000000023501226605124000176770ustar00rootroot00000000000000/* * Copyright (c) 2009, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef STRING_WRAPPER_H #define STRING_WRAPPER_H 1 #include_next /* Glibc 2.7 has a bug in strtok_r when compiling with optimization that can * cause segfaults if the delimiters argument is a compile-time constant that * has exactly 1 character: * * http://sources.redhat.com/bugzilla/show_bug.cgi?id=5614 * * The bug is only present in the inline version of strtok_r(), so force the * out-of-line version to be used instead. */ #if HAVE_STRTOK_R_BUG #undef strtok_r #endif #ifndef HAVE_STRNLEN #undef strnlen #define strnlen rpl_strnlen size_t strnlen(const char *, size_t maxlen); #endif #endif /* string.h wrapper */ openvswitch-2.0.1+git20140120/lib/svec.c000066400000000000000000000175751226605124000173430ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "svec.h" #include #include #include #include "dynamic-string.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(svec); void svec_init(struct svec *svec) { svec->names = NULL; svec->n = 0; svec->allocated = 0; } void svec_clone(struct svec *svec, const struct svec *other) { svec_init(svec); svec_append(svec, other); } void svec_destroy(struct svec *svec) { svec_clear(svec); free(svec->names); } void svec_clear(struct svec *svec) { size_t i; for (i = 0; i < svec->n; i++) { free(svec->names[i]); } svec->n = 0; } bool svec_is_empty(const struct svec *svec) { return svec->n == 0; } void svec_add(struct svec *svec, const char *name) { svec_add_nocopy(svec, xstrdup(name)); } void svec_del(struct svec *svec, const char *name) { size_t offset; offset = svec_find(svec, name); if (offset != SIZE_MAX) { free(svec->names[offset]); memmove(&svec->names[offset], &svec->names[offset + 1], sizeof *svec->names * (svec->n - offset - 1)); svec->n--; } } static void svec_expand(struct svec *svec) { if (svec->n >= svec->allocated) { svec->names = x2nrealloc(svec->names, &svec->allocated, sizeof *svec->names); } } void svec_add_nocopy(struct svec *svec, char *name) { svec_expand(svec); svec->names[svec->n++] = name; } void svec_append(struct svec *svec, const struct svec *other) { size_t i; for (i = 0; i < other->n; i++) { svec_add(svec, other->names[i]); } } void svec_terminate(struct svec *svec) { svec_expand(svec); svec->names[svec->n] = NULL; } static int compare_strings(const void *a_, const void *b_) { char *const *a = a_; char *const *b = b_; return strcmp(*a, *b); } void svec_sort(struct svec *svec) { qsort(svec->names, svec->n, sizeof *svec->names, compare_strings); } void svec_sort_unique(struct svec *svec) { svec_sort(svec); svec_unique(svec); } void svec_unique(struct svec *svec) { ovs_assert(svec_is_sorted(svec)); if (svec->n > 1) { /* This algorithm is lazy and sub-optimal, but it's "obviously correct" * and asymptotically optimal . */ struct svec tmp; size_t i; svec_init(&tmp); svec_add(&tmp, svec->names[0]); for (i = 1; i < svec->n; i++) { if (strcmp(svec->names[i - 1], svec->names[i])) { svec_add(&tmp, svec->names[i]); } } svec_swap(&tmp, svec); svec_destroy(&tmp); } } void svec_compact(struct svec *svec) { size_t i, j; for (i = j = 0; i < svec->n; i++) { if (svec->names[i] != NULL) { svec->names[j++] = svec->names[i]; } } svec->n = j; } void svec_diff(const struct svec *a, const struct svec *b, struct svec *a_only, struct svec *both, struct svec *b_only) { size_t i, j; ovs_assert(svec_is_sorted(a)); ovs_assert(svec_is_sorted(b)); if (a_only) { svec_init(a_only); } if (both) { svec_init(both); } if (b_only) { svec_init(b_only); } for (i = j = 0; i < a->n && j < b->n; ) { int cmp = strcmp(a->names[i], b->names[j]); if (cmp < 0) { if (a_only) { svec_add(a_only, a->names[i]); } i++; } else if (cmp > 0) { if (b_only) { svec_add(b_only, b->names[j]); } j++; } else { if (both) { svec_add(both, a->names[i]); } i++; j++; } } if (a_only) { for (; i < a->n; i++) { svec_add(a_only, a->names[i]); } } if (b_only) { for (; j < b->n; j++) { svec_add(b_only, b->names[j]); } } } bool svec_contains(const struct svec *svec, const char *name) { return svec_find(svec, name) != SIZE_MAX; } size_t svec_find(const struct svec *svec, const char *name) { char **p; ovs_assert(svec_is_sorted(svec)); p = bsearch(&name, svec->names, svec->n, sizeof *svec->names, compare_strings); return p ? p - svec->names : SIZE_MAX; } bool svec_is_sorted(const struct svec *svec) { size_t i; for (i = 1; i < svec->n; i++) { if (strcmp(svec->names[i - 1], svec->names[i]) > 0) { return false; } } return true; } bool svec_is_unique(const struct svec *svec) { return svec_get_duplicate(svec) == NULL; } const char * svec_get_duplicate(const struct svec *svec) { ovs_assert(svec_is_sorted(svec)); if (svec->n > 1) { size_t i; for (i = 1; i < svec->n; i++) { if (!strcmp(svec->names[i - 1], svec->names[i])) { return svec->names[i]; } } } return NULL; } void svec_swap(struct svec *a, struct svec *b) { struct svec tmp = *a; *a = *b; *b = tmp; } void svec_print(const struct svec *svec, const char *title) { size_t i; printf("%s:\n", title); for (i = 0; i < svec->n; i++) { printf("\"%s\"\n", svec->names[i]); } } /* Breaks 'words' into words at white space, respecting shell-like quoting * conventions, and appends the words to 'svec'. */ void svec_parse_words(struct svec *svec, const char *words) { struct ds word = DS_EMPTY_INITIALIZER; const char *p, *q; for (p = words; *p != '\0'; p = q) { int quote = 0; while (isspace((unsigned char) *p)) { p++; } if (*p == '\0') { break; } ds_clear(&word); for (q = p; *q != '\0'; q++) { if (*q == quote) { quote = 0; } else if (*q == '\'' || *q == '"') { quote = *q; } else if (*q == '\\' && (!quote || quote == '"')) { q++; if (*q == '\0') { VLOG_WARN("%s: ends in trailing backslash", words); break; } ds_put_char(&word, *q); } else if (isspace((unsigned char) *q) && !quote) { q++; break; } else { ds_put_char(&word, *q); } } svec_add(svec, ds_cstr(&word)); if (quote) { VLOG_WARN("%s: word ends inside quoted string", words); } } ds_destroy(&word); } bool svec_equal(const struct svec *a, const struct svec *b) { size_t i; if (a->n != b->n) { return false; } for (i = 0; i < a->n; i++) { if (strcmp(a->names[i], b->names[i])) { return false; } } return true; } char * svec_join(const struct svec *svec, const char *delimiter, const char *terminator) { struct ds ds; size_t i; ds_init(&ds); for (i = 0; i < svec->n; i++) { if (i) { ds_put_cstr(&ds, delimiter); } ds_put_cstr(&ds, svec->names[i]); } ds_put_cstr(&ds, terminator); return ds_cstr(&ds); } const char * svec_back(const struct svec *svec) { ovs_assert(svec->n); return svec->names[svec->n - 1]; } void svec_pop_back(struct svec *svec) { ovs_assert(svec->n); free(svec->names[--svec->n]); } openvswitch-2.0.1+git20140120/lib/svec.h000066400000000000000000000050651226605124000173370ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef SVEC_H #define SVEC_H 1 #include #include #ifdef __cplusplus extern "C" { #endif struct svec { char **names; size_t n; size_t allocated; }; #define SVEC_EMPTY_INITIALIZER { NULL, 0, 0 } void svec_init(struct svec *); void svec_clone(struct svec *, const struct svec *); void svec_destroy(struct svec *); void svec_clear(struct svec *); bool svec_is_empty(const struct svec *); void svec_add(struct svec *, const char *); void svec_add_nocopy(struct svec *, char *); void svec_del(struct svec *, const char *); void svec_append(struct svec *, const struct svec *); void svec_terminate(struct svec *); void svec_sort(struct svec *); void svec_sort_unique(struct svec *); void svec_unique(struct svec *); void svec_compact(struct svec *); void svec_diff(const struct svec *a, const struct svec *b, struct svec *a_only, struct svec *both, struct svec *b_only); bool svec_contains(const struct svec *, const char *); size_t svec_find(const struct svec *, const char *); bool svec_is_sorted(const struct svec *); bool svec_is_unique(const struct svec *); const char *svec_get_duplicate(const struct svec *); void svec_swap(struct svec *a, struct svec *b); void svec_print(const struct svec *svec, const char *title); void svec_parse_words(struct svec *svec, const char *words); bool svec_equal(const struct svec *, const struct svec *); char *svec_join(const struct svec *, const char *delimiter, const char *terminator); const char *svec_back(const struct svec *); void svec_pop_back(struct svec *); /* Iterates over the names in SVEC, assigning each name in turn to NAME and its * index to INDEX. */ #define SVEC_FOR_EACH(INDEX, NAME, SVEC) \ for ((INDEX) = 0; \ ((INDEX) < (SVEC)->n \ ? (NAME) = (SVEC)->names[INDEX], 1 \ : 0); \ (INDEX)++) #ifdef __cplusplus } #endif #endif /* svec.h */ openvswitch-2.0.1+git20140120/lib/table.c000066400000000000000000000370151226605124000174610ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "table.h" #include "dynamic-string.h" #include "json.h" #include "ovsdb-data.h" #include "ovsdb-error.h" #include "timeval.h" #include "util.h" struct column { char *heading; }; static char * cell_to_text(struct cell *cell, const struct table_style *style) { if (!cell->text) { if (cell->json) { if (style->cell_format == CF_JSON || !cell->type) { cell->text = json_to_string(cell->json, JSSF_SORT); } else { struct ovsdb_datum datum; struct ovsdb_error *error; struct ds s; error = ovsdb_datum_from_json(&datum, cell->type, cell->json, NULL); if (!error) { ds_init(&s); if (style->cell_format == CF_STRING) { ovsdb_datum_to_string(&datum, cell->type, &s); } else { ovsdb_datum_to_bare(&datum, cell->type, &s); } ovsdb_datum_destroy(&datum, cell->type); cell->text = ds_steal_cstr(&s); } else { cell->text = json_to_string(cell->json, JSSF_SORT); ovsdb_error_destroy(error); } } } else { cell->text = xstrdup(""); } } return cell->text; } static void cell_destroy(struct cell *cell) { free(cell->text); json_destroy(cell->json); } /* Initializes 'table' as an empty table. * * The caller should then: * * 1. Call table_add_column() once for each column. * 2. For each row: * 2a. Call table_add_row(). * 2b. For each column in the cell, call table_add_cell() and fill in * the returned cell. * 3. Call table_print() to print the final table. * 4. Free the table with table_destroy(). */ void table_init(struct table *table) { memset(table, 0, sizeof *table); } /* Destroys 'table' and frees all associated storage. (However, the client * owns the 'type' members pointed to by cells, so these are not destroyed.) */ void table_destroy(struct table *table) { if (table) { size_t i; for (i = 0; i < table->n_columns; i++) { free(table->columns[i].heading); } free(table->columns); for (i = 0; i < table->n_columns * table->n_rows; i++) { cell_destroy(&table->cells[i]); } free(table->cells); free(table->caption); } } /* Sets 'caption' as the caption for 'table'. * * 'table' takes ownership of 'caption'. */ void table_set_caption(struct table *table, char *caption) { free(table->caption); table->caption = caption; } /* Turns printing a timestamp along with 'table' on or off, according to * 'timestamp'. */ void table_set_timestamp(struct table *table, bool timestamp) { table->timestamp = timestamp; } /* Adds a new column to 'table' just to the right of any existing column, with * 'heading' as a title for the column. 'heading' must be a valid printf() * format specifier. * * Columns must be added before any data is put into 'table'. */ void table_add_column(struct table *table, const char *heading, ...) { struct column *column; va_list args; ovs_assert(!table->n_rows); if (table->n_columns >= table->allocated_columns) { table->columns = x2nrealloc(table->columns, &table->allocated_columns, sizeof *table->columns); } column = &table->columns[table->n_columns++]; va_start(args, heading); column->heading = xvasprintf(heading, args); va_end(args); } static struct cell * table_cell__(const struct table *table, size_t row, size_t column) { return &table->cells[column + row * table->n_columns]; } /* Adds a new row to 'table'. The table's columns must already have been added * with table_add_column(). * * The row is initially empty; use table_add_cell() to start filling it in. */ void table_add_row(struct table *table) { size_t x, y; if (table->n_rows >= table->allocated_rows) { table->cells = x2nrealloc(table->cells, &table->allocated_rows, table->n_columns * sizeof *table->cells); } y = table->n_rows++; table->current_column = 0; for (x = 0; x < table->n_columns; x++) { struct cell *cell = table_cell__(table, y, x); memset(cell, 0, sizeof *cell); } } /* Adds a new cell in the current row of 'table', which must have been added * with table_add_row(). Cells are filled in the same order that the columns * were added with table_add_column(). * * The caller is responsible for filling in the returned cell, in one of two * fashions: * * - If the cell should contain an ovsdb_datum, formatted according to the * table style, then fill in the 'json' member with the JSON representation * of the datum and 'type' with its type. * * - If the cell should contain a fixed text string, then the caller should * assign that string to the 'text' member. This is undesirable if the * cell actually contains OVSDB data because 'text' cannot be formatted * according to the table style; it is always output verbatim. */ struct cell * table_add_cell(struct table *table) { size_t x, y; ovs_assert(table->n_rows > 0); ovs_assert(table->current_column < table->n_columns); x = table->current_column++; y = table->n_rows - 1; return table_cell__(table, y, x); } static void table_print_table_line__(struct ds *line) { puts(ds_cstr(line)); ds_clear(line); } static char * table_format_timestamp__(void) { return xastrftime_msec("%Y-%m-%d %H:%M:%S.###", time_wall_msec(), true); } static void table_print_timestamp__(const struct table *table) { if (table->timestamp) { char *s = table_format_timestamp__(); puts(s); free(s); } } static void table_print_table__(const struct table *table, const struct table_style *style) { static int n = 0; struct ds line = DS_EMPTY_INITIALIZER; int *widths; size_t x, y; if (n++ > 0) { putchar('\n'); } table_print_timestamp__(table); if (table->caption) { puts(table->caption); } widths = xmalloc(table->n_columns * sizeof *widths); for (x = 0; x < table->n_columns; x++) { const struct column *column = &table->columns[x]; widths[x] = strlen(column->heading); for (y = 0; y < table->n_rows; y++) { const char *text = cell_to_text(table_cell__(table, y, x), style); size_t length = strlen(text); if (length > widths[x]) { widths[x] = length; } } } if (style->headings) { for (x = 0; x < table->n_columns; x++) { const struct column *column = &table->columns[x]; if (x) { ds_put_char(&line, ' '); } ds_put_format(&line, "%-*s", widths[x], column->heading); } table_print_table_line__(&line); for (x = 0; x < table->n_columns; x++) { if (x) { ds_put_char(&line, ' '); } ds_put_char_multiple(&line, '-', widths[x]); } table_print_table_line__(&line); } for (y = 0; y < table->n_rows; y++) { for (x = 0; x < table->n_columns; x++) { const char *text = cell_to_text(table_cell__(table, y, x), style); if (x) { ds_put_char(&line, ' '); } ds_put_format(&line, "%-*s", widths[x], text); } table_print_table_line__(&line); } ds_destroy(&line); free(widths); } static void table_print_list__(const struct table *table, const struct table_style *style) { static int n = 0; size_t x, y; if (n++ > 0) { putchar('\n'); } table_print_timestamp__(table); if (table->caption) { puts(table->caption); } for (y = 0; y < table->n_rows; y++) { if (y > 0) { putchar('\n'); } for (x = 0; x < table->n_columns; x++) { const char *text = cell_to_text(table_cell__(table, y, x), style); if (style->headings) { printf("%-20s: ", table->columns[x].heading); } puts(text); } } } static void table_escape_html_text__(const char *s, size_t n) { size_t i; for (i = 0; i < n; i++) { char c = s[i]; switch (c) { case '&': fputs("&", stdout); break; case '<': fputs("<", stdout); break; case '>': fputs(">", stdout); break; case '"': fputs(""", stdout); break; default: putchar(c); break; } } } static void table_print_html_cell__(const char *element, const char *content) { const char *p; printf(" <%s>", element); for (p = content; *p; ) { struct uuid uuid; if (uuid_from_string_prefix(&uuid, p)) { printf("%.*s", UUID_LEN, p, 8, p); p += UUID_LEN; } else { table_escape_html_text__(p, 1); p++; } } printf("\n", element); } static void table_print_html__(const struct table *table, const struct table_style *style) { size_t x, y; table_print_timestamp__(table); fputs("
\n", stdout); if (table->caption) { table_print_html_cell__("caption", table->caption); } if (style->headings) { fputs(" \n", stdout); for (x = 0; x < table->n_columns; x++) { const struct column *column = &table->columns[x]; table_print_html_cell__("th", column->heading); } fputs(" \n", stdout); } for (y = 0; y < table->n_rows; y++) { fputs(" \n", stdout); for (x = 0; x < table->n_columns; x++) { const char *content; content = cell_to_text(table_cell__(table, y, x), style); if (!strcmp(table->columns[x].heading, "_uuid")) { fputs(" \n", stdout); } else { table_print_html_cell__("td", content); } } fputs(" \n", stdout); } fputs("
", stdout); table_escape_html_text__(content, 8); fputs("
\n", stdout); } static void table_print_csv_cell__(const char *content) { const char *p; if (!strpbrk(content, "\n\",")) { fputs(content, stdout); } else { putchar('"'); for (p = content; *p != '\0'; p++) { switch (*p) { case '"': fputs("\"\"", stdout); break; default: putchar(*p); break; } } putchar('"'); } } static void table_print_csv__(const struct table *table, const struct table_style *style) { static int n = 0; size_t x, y; if (n++ > 0) { putchar('\n'); } table_print_timestamp__(table); if (table->caption) { puts(table->caption); } if (style->headings) { for (x = 0; x < table->n_columns; x++) { const struct column *column = &table->columns[x]; if (x) { putchar(','); } table_print_csv_cell__(column->heading); } putchar('\n'); } for (y = 0; y < table->n_rows; y++) { for (x = 0; x < table->n_columns; x++) { if (x) { putchar(','); } table_print_csv_cell__(cell_to_text(table_cell__(table, y, x), style)); } putchar('\n'); } } static void table_print_json__(const struct table *table, const struct table_style *style) { struct json *json, *headings, *data; size_t x, y; char *s; json = json_object_create(); if (table->caption) { json_object_put_string(json, "caption", table->caption); } if (table->timestamp) { char *s = table_format_timestamp__(); json_object_put_string(json, "time", s); free(s); } headings = json_array_create_empty(); for (x = 0; x < table->n_columns; x++) { const struct column *column = &table->columns[x]; json_array_add(headings, json_string_create(column->heading)); } json_object_put(json, "headings", headings); data = json_array_create_empty(); for (y = 0; y < table->n_rows; y++) { struct json *row = json_array_create_empty(); for (x = 0; x < table->n_columns; x++) { const struct cell *cell = table_cell__(table, y, x); if (cell->text) { json_array_add(row, json_string_create(cell->text)); } else if (cell->json) { json_array_add(row, json_clone(cell->json)); } else { json_array_add(row, json_null_create()); } } json_array_add(data, row); } json_object_put(json, "data", data); s = json_to_string(json, style->json_flags); json_destroy(json); puts(s); free(s); } /* Parses 'format' as the argument to a --format command line option, updating * 'style->format'. */ void table_parse_format(struct table_style *style, const char *format) { if (!strcmp(format, "table")) { style->format = TF_TABLE; } else if (!strcmp(format, "list")) { style->format = TF_LIST; } else if (!strcmp(format, "html")) { style->format = TF_HTML; } else if (!strcmp(format, "csv")) { style->format = TF_CSV; } else if (!strcmp(format, "json")) { style->format = TF_JSON; } else { ovs_fatal(0, "unknown output format \"%s\"", format); } } /* Parses 'format' as the argument to a --data command line option, updating * 'style->cell_format'. */ void table_parse_cell_format(struct table_style *style, const char *format) { if (!strcmp(format, "string")) { style->cell_format = CF_STRING; } else if (!strcmp(format, "bare")) { style->cell_format = CF_BARE; } else if (!strcmp(format, "json")) { style->cell_format = CF_JSON; } else { ovs_fatal(0, "unknown data format \"%s\"", format); } } /* Outputs 'table' on stdout in the specified 'style'. */ void table_print(const struct table *table, const struct table_style *style) { switch (style->format) { case TF_TABLE: table_print_table__(table, style); break; case TF_LIST: table_print_list__(table, style); break; case TF_HTML: table_print_html__(table, style); break; case TF_CSV: table_print_csv__(table, style); break; case TF_JSON: table_print_json__(table, style); break; } } openvswitch-2.0.1+git20140120/lib/table.h000066400000000000000000000103031226605124000174550ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef TABLE_H #define TABLE_H 1 #include #include #include "compiler.h" struct table_style; /* Manipulating tables and their rows and columns. */ struct table { struct cell *cells; struct column *columns; size_t n_columns, allocated_columns; size_t n_rows, allocated_rows; size_t current_column; char *caption; bool timestamp; }; void table_init(struct table *); void table_destroy(struct table *); void table_set_caption(struct table *, char *caption); void table_set_timestamp(struct table *, bool timestamp); void table_add_column(struct table *, const char *heading, ...) PRINTF_FORMAT(2, 3); void table_add_row(struct table *); /* Table cells. */ struct cell { /* Literal text. */ char *text; /* JSON. */ struct json *json; const struct ovsdb_type *type; }; struct cell *table_add_cell(struct table *); /* Table formatting. */ enum table_format { TF_TABLE, /* 2-d table. */ TF_LIST, /* One cell per line, one row per paragraph. */ TF_HTML, /* HTML table. */ TF_CSV, /* Comma-separated lines. */ TF_JSON /* JSON. */ }; enum cell_format { CF_STRING, /* String format. */ CF_BARE, /* String format without most punctuation. */ CF_JSON /* JSON. */ }; struct table_style { enum table_format format; /* TF_*. */ enum cell_format cell_format; /* CF_*. */ bool headings; /* Include headings? */ int json_flags; /* CF_JSON: Flags for json_to_string(). */ }; #define TABLE_STYLE_DEFAULT { TF_TABLE, CF_STRING, true, JSSF_SORT } #define TABLE_OPTION_ENUMS \ OPT_NO_HEADINGS, \ OPT_PRETTY, \ OPT_BARE #define TABLE_LONG_OPTIONS \ {"format", required_argument, NULL, 'f'}, \ {"data", required_argument, NULL, 'd'}, \ {"no-headings", no_argument, NULL, OPT_NO_HEADINGS}, \ {"pretty", no_argument, NULL, OPT_PRETTY}, \ {"bare", no_argument, NULL, OPT_BARE} #define TABLE_OPTION_HANDLERS(STYLE) \ case 'f': \ table_parse_format(STYLE, optarg); \ break; \ \ case 'd': \ table_parse_cell_format(STYLE, optarg); \ break; \ \ case OPT_NO_HEADINGS: \ (STYLE)->headings = false; \ break; \ \ case OPT_PRETTY: \ (STYLE)->json_flags |= JSSF_PRETTY; \ break; \ \ case OPT_BARE: \ (STYLE)->format = TF_LIST; \ (STYLE)->cell_format = CF_BARE; \ (STYLE)->headings = false; \ break; void table_parse_format(struct table_style *, const char *format); void table_parse_cell_format(struct table_style *, const char *format); void table_print(const struct table *, const struct table_style *); #endif /* table.h */ openvswitch-2.0.1+git20140120/lib/table.man000066400000000000000000000051061226605124000200060ustar00rootroot00000000000000.IP "\fB\-f \fIformat\fR" .IQ "\fB\-\-format=\fIformat\fR" Sets the type of table formatting. The following types of \fIformat\fR are available: .RS .ie '\*(PN'ovs\-vsctl' .IP "\fBtable\fR" .el .IP "\fBtable\fR (default)" 2-D text tables with aligned columns. .ie '\*(PN'ovs\-vsctl' .IP "\fBlist\fR (default)" .el .IP "\fBlist\fR" A list with one column per line and rows separated by a blank line. .IP "\fBhtml\fR" HTML tables. .IP "\fBcsv\fR" Comma-separated values as defined in RFC 4180. .IP "\fBjson\fR" JSON format as defined in RFC 4627. The output is a sequence of JSON objects, each of which corresponds to one table. Each JSON object has the following members with the noted values: .RS .IP "\fBcaption\fR" The table's caption. This member is omitted if the table has no caption. .IP "\fBheadings\fR" An array with one element per table column. Each array element is a string giving the corresponding column's heading. .IP "\fBdata\fR" An array with one element per table row. Each element is also an array with one element per table column. The elements of this second-level array are the cells that constitute the table. Cells that represent OVSDB data or data types are expressed in the format described in the OVSDB specification; other cells are simply expressed as text strings. .RE .RE . .IP "\fB\-d \fIformat\fR" .IQ "\fB\-\-data=\fIformat\fR" Sets the formatting for cells within output tables. The following types of \fIformat\fR are available: .RS .IP "\fBstring\fR (default)" The simple format described in the \fBDatabase Values\fR .ie '\*(PN'ovs\-vsctl' section below. .el section of \fBovs\-vsctl\fR(8). .IP "\fBbare\fR" The simple format with punctuation stripped off: \fB[]\fR and \fB{}\fR are omitted around sets, maps, and empty columns, items within sets and maps are space-separated, and strings are never quoted. This format may be easier for scripts to parse. .IP "\fBjson\fR" JSON. .RE .IP The \fBjson\fR output format always outputs cells in JSON format, ignoring this option. . .IP "\fB\-\-no\-heading\fR" This option suppresses the heading row that otherwise appears in the first row of table output. . .IP "\fB\-\-pretty\fR" By default, JSON in output is printed as compactly as possible. This option causes JSON in output to be printed in a more readable fashion. Members of objects and elements of arrays are printed one per line, with indentation. .IP This option does not affect JSON in tables, which is always printed compactly. .IP "\fB\-\-bare\fR" Equivalent to \fB\-\-format=list \-\-data=bare \-\-no\-headings\fR. openvswitch-2.0.1+git20140120/lib/timer.c000066400000000000000000000025061226605124000175070ustar00rootroot00000000000000/* * Copyright (c) 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "timer.h" #include "poll-loop.h" #include "timeval.h" /* Returns the number of milliseconds until 'timer' expires. */ long long int timer_msecs_until_expired(const struct timer *timer) { switch (timer->t) { case LLONG_MAX: return LLONG_MAX; case LLONG_MIN: return 0; default: return timer->t - time_msec(); } } /* Causes poll_block() to wake when 'timer' expires. * * ('where' is used in debug logging. Commonly one would use timer_wait() to * automatically provide the caller's source file and line number for * 'where'.) */ void timer_wait_at(const struct timer *timer, const char *where) { if (timer->t < LLONG_MAX) { poll_timer_wait_until_at(timer->t, where); } } openvswitch-2.0.1+git20140120/lib/timer.h000066400000000000000000000034421226605124000175140ustar00rootroot00000000000000/* * Copyright (c) 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef TIMER_H #define TIMER_H 1 #include #include "timeval.h" #include "util.h" struct timer { long long int t; }; long long int timer_msecs_until_expired(const struct timer *); void timer_wait_at(const struct timer *, const char *where); #define timer_wait(timer) timer_wait_at(timer, SOURCE_LOCATOR) /* Causes 'timer' to expire when 'duration' milliseconds have passed. * * May be used to initialize 'timer'. */ static inline void timer_set_duration(struct timer *timer, long long int duration) { timer->t = time_msec() + duration; } /* Causes 'timer' never to expire. * * May be used to initialize 'timer'. */ static inline void timer_set_infinite(struct timer *timer) { timer->t = LLONG_MAX; } /* Causes 'timer' to expire immediately. * * May be used to initialize 'timer'. */ static inline void timer_set_expired(struct timer *timer) { timer->t = LLONG_MIN; } /* True if 'timer' has expired. */ static inline bool timer_expired(const struct timer *timer) { return time_msec() >= timer->t; } /* Returns ture if 'timer' will never expire. */ static inline bool timer_is_infinite(const struct timer *timer) { return timer->t == LLONG_MAX; } #endif /* timer.h */ openvswitch-2.0.1+git20140120/lib/timeval.c000066400000000000000000000424001226605124000200250ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "timeval.h" #include #include #include #include #include #include #include #include #include #include "coverage.h" #include "dummy.h" #include "dynamic-string.h" #include "fatal-signal.h" #include "hash.h" #include "hmap.h" #include "ovs-thread.h" #include "signals.h" #include "unixctl.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(timeval); struct clock { clockid_t id; /* CLOCK_MONOTONIC or CLOCK_REALTIME. */ struct ovs_rwlock rwlock; /* Mutual exclusion for 'cache'. */ /* Features for use by unit tests. Protected by 'rwlock'. */ struct timespec warp; /* Offset added for unit tests. */ bool stopped; /* Disables real-time updates if true. */ /* Relevant only if CACHE_TIME is true. */ volatile sig_atomic_t tick; /* Has the timer ticked? Set by signal. */ struct timespec cache; /* Last time read from kernel. */ }; /* Our clocks. */ static struct clock monotonic_clock; /* CLOCK_MONOTONIC, if available. */ static struct clock wall_clock; /* CLOCK_REALTIME. */ /* The monotonic time at which the time module was initialized. */ static long long int boot_time; /* Monotonic time in milliseconds at which to die with SIGALRM (if not * LLONG_MAX). */ static long long int deadline = LLONG_MAX; /* Monotonic time, in milliseconds, at which the last call to time_poll() woke * up. */ DEFINE_STATIC_PER_THREAD_DATA(long long int, last_wakeup, 0); static void set_up_timer(void); static void set_up_signal(int flags); static void sigalrm_handler(int); static void block_sigalrm(sigset_t *); static void unblock_sigalrm(const sigset_t *); static void log_poll_interval(long long int last_wakeup); static struct rusage *get_recent_rusage(void); static void refresh_rusage(void); static void timespec_add(struct timespec *sum, const struct timespec *a, const struct timespec *b); static void init_clock(struct clock *c, clockid_t id) { memset(c, 0, sizeof *c); c->id = id; ovs_rwlock_init(&c->rwlock); xclock_gettime(c->id, &c->cache); } static void do_init_time(void) { struct timespec ts; coverage_init(); init_clock(&monotonic_clock, (!clock_gettime(CLOCK_MONOTONIC, &ts) ? CLOCK_MONOTONIC : CLOCK_REALTIME)); init_clock(&wall_clock, CLOCK_REALTIME); boot_time = timespec_to_msec(&monotonic_clock.cache); set_up_signal(SA_RESTART); set_up_timer(); } /* Initializes the timetracking module, if not already initialized. */ static void time_init(void) { static pthread_once_t once = PTHREAD_ONCE_INIT; pthread_once(&once, do_init_time); } static void set_up_signal(int flags) { struct sigaction sa; memset(&sa, 0, sizeof sa); sa.sa_handler = sigalrm_handler; sigemptyset(&sa.sa_mask); sa.sa_flags = flags; xsigaction(SIGALRM, &sa, NULL); } static void set_up_timer(void) { static timer_t timer_id; /* "static" to avoid apparent memory leak. */ struct itimerspec itimer; if (!CACHE_TIME) { return; } if (timer_create(monotonic_clock.id, NULL, &timer_id)) { VLOG_FATAL("timer_create failed (%s)", ovs_strerror(errno)); } itimer.it_interval.tv_sec = 0; itimer.it_interval.tv_nsec = TIME_UPDATE_INTERVAL * 1000 * 1000; itimer.it_value = itimer.it_interval; if (timer_settime(timer_id, 0, &itimer, NULL)) { VLOG_FATAL("timer_settime failed (%s)", ovs_strerror(errno)); } } /* Set up the interval timer, to ensure that time advances even without calling * time_refresh(). * * A child created with fork() does not inherit the parent's interval timer, so * this function needs to be called from the child after fork(). */ void time_postfork(void) { assert_single_threaded(); time_init(); set_up_timer(); } /* Forces a refresh of the current time from the kernel. It is not usually * necessary to call this function, since the time will be refreshed * automatically at least every TIME_UPDATE_INTERVAL milliseconds. If * CACHE_TIME is false, we will always refresh the current time so this * function has no effect. */ void time_refresh(void) { monotonic_clock.tick = wall_clock.tick = true; } static void time_timespec__(struct clock *c, struct timespec *ts) { time_init(); for (;;) { /* Use the cached time by preference, but fall through if there's been * a clock tick. */ ovs_rwlock_rdlock(&c->rwlock); if (c->stopped || !c->tick) { timespec_add(ts, &c->cache, &c->warp); ovs_rwlock_unlock(&c->rwlock); return; } ovs_rwlock_unlock(&c->rwlock); /* Refresh the cache. */ ovs_rwlock_wrlock(&c->rwlock); if (c->tick) { c->tick = false; xclock_gettime(c->id, &c->cache); } ovs_rwlock_unlock(&c->rwlock); } } /* Stores a monotonic timer, accurate within TIME_UPDATE_INTERVAL ms, into * '*ts'. */ void time_timespec(struct timespec *ts) { time_timespec__(&monotonic_clock, ts); } /* Stores the current time, accurate within TIME_UPDATE_INTERVAL ms, into * '*ts'. */ void time_wall_timespec(struct timespec *ts) { time_timespec__(&wall_clock, ts); } static time_t time_sec__(struct clock *c) { struct timespec ts; time_timespec__(c, &ts); return ts.tv_sec; } /* Returns a monotonic timer, in seconds. */ time_t time_now(void) { return time_sec__(&monotonic_clock); } /* Returns the current time, in seconds. */ time_t time_wall(void) { return time_sec__(&wall_clock); } static long long int time_msec__(struct clock *c) { struct timespec ts; time_timespec__(c, &ts); return timespec_to_msec(&ts); } /* Returns a monotonic timer, in ms (within TIME_UPDATE_INTERVAL ms). */ long long int time_msec(void) { return time_msec__(&monotonic_clock); } /* Returns the current time, in ms (within TIME_UPDATE_INTERVAL ms). */ long long int time_wall_msec(void) { return time_msec__(&wall_clock); } /* Configures the program to die with SIGALRM 'secs' seconds from now, if * 'secs' is nonzero, or disables the feature if 'secs' is zero. */ void time_alarm(unsigned int secs) { long long int now; long long int msecs; assert_single_threaded(); time_init(); time_refresh(); now = time_msec(); msecs = secs * 1000LL; deadline = now < LLONG_MAX - msecs ? now + msecs : LLONG_MAX; } /* Like poll(), except: * * - The timeout is specified as an absolute time, as defined by * time_msec(), instead of a duration. * * - On error, returns a negative error code (instead of setting errno). * * - If interrupted by a signal, retries automatically until the original * timeout is reached. (Because of this property, this function will * never return -EINTR.) * * - As a side effect, refreshes the current time (like time_refresh()). * * Stores the number of milliseconds elapsed during poll in '*elapsed'. */ int time_poll(struct pollfd *pollfds, int n_pollfds, long long int timeout_when, int *elapsed) { long long int *last_wakeup = last_wakeup_get(); long long int start; sigset_t oldsigs; bool blocked; int retval; time_init(); time_refresh(); if (*last_wakeup) { log_poll_interval(*last_wakeup); } coverage_clear(); start = time_msec(); blocked = false; timeout_when = MIN(timeout_when, deadline); for (;;) { long long int now = time_msec(); int time_left; if (now >= timeout_when) { time_left = 0; } else if ((unsigned long long int) timeout_when - now > INT_MAX) { time_left = INT_MAX; } else { time_left = timeout_when - now; } retval = poll(pollfds, n_pollfds, time_left); if (retval < 0) { retval = -errno; } time_refresh(); if (deadline <= time_msec()) { fatal_signal_handler(SIGALRM); if (retval < 0) { retval = 0; } break; } if (retval != -EINTR) { break; } if (!blocked && CACHE_TIME) { block_sigalrm(&oldsigs); blocked = true; } } if (blocked) { unblock_sigalrm(&oldsigs); } *last_wakeup = time_msec(); refresh_rusage(); *elapsed = *last_wakeup - start; return retval; } static void sigalrm_handler(int sig_nr OVS_UNUSED) { monotonic_clock.tick = wall_clock.tick = true; } static void block_sigalrm(sigset_t *oldsigs) { sigset_t sigalrm; sigemptyset(&sigalrm); sigaddset(&sigalrm, SIGALRM); xpthread_sigmask(SIG_BLOCK, &sigalrm, oldsigs); } static void unblock_sigalrm(const sigset_t *oldsigs) { xpthread_sigmask(SIG_SETMASK, oldsigs, NULL); } long long int timespec_to_msec(const struct timespec *ts) { return (long long int) ts->tv_sec * 1000 + ts->tv_nsec / (1000 * 1000); } long long int timeval_to_msec(const struct timeval *tv) { return (long long int) tv->tv_sec * 1000 + tv->tv_usec / 1000; } /* Returns the monotonic time at which the "time" module was initialized, in * milliseconds. */ long long int time_boot_msec(void) { time_init(); return boot_time; } void xgettimeofday(struct timeval *tv) { if (gettimeofday(tv, NULL) == -1) { VLOG_FATAL("gettimeofday failed (%s)", ovs_strerror(errno)); } } void xclock_gettime(clock_t id, struct timespec *ts) { if (clock_gettime(id, ts) == -1) { /* It seems like a bad idea to try to use vlog here because it is * likely to try to check the current time. */ ovs_abort(errno, "xclock_gettime() failed"); } } static long long int timeval_diff_msec(const struct timeval *a, const struct timeval *b) { return timeval_to_msec(a) - timeval_to_msec(b); } static void timespec_add(struct timespec *sum, const struct timespec *a, const struct timespec *b) { struct timespec tmp; tmp.tv_sec = a->tv_sec + b->tv_sec; tmp.tv_nsec = a->tv_nsec + b->tv_nsec; if (tmp.tv_nsec >= 1000 * 1000 * 1000) { tmp.tv_nsec -= 1000 * 1000 * 1000; tmp.tv_sec++; } *sum = tmp; } static void log_poll_interval(long long int last_wakeup) { long long int interval = time_msec() - last_wakeup; if (interval >= 1000 && !monotonic_clock.warp.tv_sec && !monotonic_clock.warp.tv_nsec) { const struct rusage *last_rusage = get_recent_rusage(); struct rusage rusage; getrusage(RUSAGE_SELF, &rusage); VLOG_WARN("Unreasonably long %lldms poll interval" " (%lldms user, %lldms system)", interval, timeval_diff_msec(&rusage.ru_utime, &last_rusage->ru_utime), timeval_diff_msec(&rusage.ru_stime, &last_rusage->ru_stime)); if (rusage.ru_minflt > last_rusage->ru_minflt || rusage.ru_majflt > last_rusage->ru_majflt) { VLOG_WARN("faults: %ld minor, %ld major", rusage.ru_minflt - last_rusage->ru_minflt, rusage.ru_majflt - last_rusage->ru_majflt); } if (rusage.ru_inblock > last_rusage->ru_inblock || rusage.ru_oublock > last_rusage->ru_oublock) { VLOG_WARN("disk: %ld reads, %ld writes", rusage.ru_inblock - last_rusage->ru_inblock, rusage.ru_oublock - last_rusage->ru_oublock); } if (rusage.ru_nvcsw > last_rusage->ru_nvcsw || rusage.ru_nivcsw > last_rusage->ru_nivcsw) { VLOG_WARN("context switches: %ld voluntary, %ld involuntary", rusage.ru_nvcsw - last_rusage->ru_nvcsw, rusage.ru_nivcsw - last_rusage->ru_nivcsw); } coverage_log(); } } /* CPU usage tracking. */ struct cpu_usage { long long int when; /* Time that this sample was taken. */ unsigned long long int cpu; /* Total user+system CPU usage when sampled. */ }; struct cpu_tracker { struct cpu_usage older; struct cpu_usage newer; int cpu_usage; struct rusage recent_rusage; }; DEFINE_PER_THREAD_MALLOCED_DATA(struct cpu_tracker *, cpu_tracker_var); static struct cpu_tracker * get_cpu_tracker(void) { struct cpu_tracker *t = cpu_tracker_var_get(); if (!t) { t = xzalloc(sizeof *t); t->older.when = LLONG_MIN; t->newer.when = LLONG_MIN; cpu_tracker_var_set_unsafe(t); } return t; } static struct rusage * get_recent_rusage(void) { return &get_cpu_tracker()->recent_rusage; } static int getrusage_thread(struct rusage *rusage OVS_UNUSED) { #ifdef RUSAGE_THREAD return getrusage(RUSAGE_THREAD, rusage); #else errno = EINVAL; return -1; #endif } static void refresh_rusage(void) { struct cpu_tracker *t = get_cpu_tracker(); struct rusage *recent_rusage = &t->recent_rusage; if (!getrusage_thread(recent_rusage)) { long long int now = time_msec(); if (now >= t->newer.when + 3 * 1000) { t->older = t->newer; t->newer.when = now; t->newer.cpu = (timeval_to_msec(&recent_rusage->ru_utime) + timeval_to_msec(&recent_rusage->ru_stime)); if (t->older.when != LLONG_MIN && t->newer.cpu > t->older.cpu) { unsigned int dividend = t->newer.cpu - t->older.cpu; unsigned int divisor = (t->newer.when - t->older.when) / 100; t->cpu_usage = divisor > 0 ? dividend / divisor : -1; } else { t->cpu_usage = -1; } } } } /* Returns an estimate of this process's CPU usage, as a percentage, over the * past few seconds of wall-clock time. Returns -1 if no estimate is available * (which will happen if the process has not been running long enough to have * an estimate, and can happen for other reasons as well). */ int get_cpu_usage(void) { return get_cpu_tracker()->cpu_usage; } /* Unixctl interface. */ /* "time/stop" stops the monotonic time returned by e.g. time_msec() from * advancing, except due to later calls to "time/warp". */ static void timeval_stop_cb(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { ovs_rwlock_wrlock(&monotonic_clock.rwlock); monotonic_clock.stopped = true; ovs_rwlock_unlock(&monotonic_clock.rwlock); unixctl_command_reply(conn, NULL); } /* "time/warp MSECS" advances the current monotonic time by the specified * number of milliseconds. Unless "time/stop" has also been executed, the * monotonic clock continues to tick forward at the normal rate afterward. * * Does not affect wall clock readings. */ static void timeval_warp_cb(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) { struct timespec ts; int msecs; msecs = atoi(argv[1]); if (msecs <= 0) { unixctl_command_reply_error(conn, "invalid MSECS"); return; } ts.tv_sec = msecs / 1000; ts.tv_nsec = (msecs % 1000) * 1000 * 1000; ovs_rwlock_wrlock(&monotonic_clock.rwlock); timespec_add(&monotonic_clock.warp, &monotonic_clock.warp, &ts); ovs_rwlock_unlock(&monotonic_clock.rwlock); unixctl_command_reply(conn, "warped"); } void timeval_dummy_register(void) { unixctl_command_register("time/stop", "", 0, 0, timeval_stop_cb, NULL); unixctl_command_register("time/warp", "MSECS", 1, 1, timeval_warp_cb, NULL); } /* strftime() with an extension for high-resolution timestamps. Any '#'s in * 'format' will be replaced by subseconds, e.g. use "%S.###" to obtain results * like "01.123". */ size_t strftime_msec(char *s, size_t max, const char *format, const struct tm_msec *tm) { size_t n; n = strftime(s, max, format, &tm->tm); if (n) { char decimals[4]; char *p; sprintf(decimals, "%03d", tm->msec); for (p = strchr(s, '#'); p; p = strchr(p, '#')) { char *d = decimals; while (*p == '#') { *p++ = *d ? *d++ : '0'; } } } return n; } struct tm_msec * localtime_msec(long long int now, struct tm_msec *result) { time_t now_sec = now / 1000; localtime_r(&now_sec, &result->tm); result->msec = now % 1000; return result; } struct tm_msec * gmtime_msec(long long int now, struct tm_msec *result) { time_t now_sec = now / 1000; gmtime_r(&now_sec, &result->tm); result->msec = now % 1000; return result; } openvswitch-2.0.1+git20140120/lib/timeval.h000066400000000000000000000054411226605124000200360ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef TIMEVAL_H #define TIMEVAL_H 1 #include #include "type-props.h" #include "util.h" #ifdef __cplusplus extern "C" { #endif struct ds; struct pollfd; struct timespec; struct timeval; /* POSIX allows floating-point time_t, but we don't support it. */ BUILD_ASSERT_DECL(TYPE_IS_INTEGER(time_t)); /* We do try to cater to unsigned time_t, but I want to know about it if we * ever encounter such a platform. */ BUILD_ASSERT_DECL(TYPE_IS_SIGNED(time_t)); #define TIME_MAX TYPE_MAXIMUM(time_t) #define TIME_MIN TYPE_MINIMUM(time_t) /* Interval between updates to the reported time, in ms. This should not be * adjusted much below 10 ms or so with the current implementation, or too * much time will be wasted in signal handlers and calls to clock_gettime(). */ #define TIME_UPDATE_INTERVAL 25 /* True on systems that support a monotonic clock. Compared to just getting * the value of a variable, clock_gettime() is somewhat expensive, even on * systems that try hard to optimize it (such as x86-64 Linux), so it's * worthwhile to minimize calls via caching. */ #ifndef CACHE_TIME #if defined ESX #define CACHE_TIME 0 #else #define CACHE_TIME 1 #endif #endif /* ifndef CACHE_TIME */ struct tm_msec { struct tm tm; int msec; }; void time_postfork(void); void time_refresh(void); time_t time_now(void); time_t time_wall(void); long long int time_msec(void); long long int time_wall_msec(void); void time_timespec(struct timespec *); void time_wall_timespec(struct timespec *); void time_alarm(unsigned int secs); int time_poll(struct pollfd *, int n_pollfds, long long int timeout_when, int *elapsed); long long int timespec_to_msec(const struct timespec *); long long int timeval_to_msec(const struct timeval *); struct tm_msec *localtime_msec(long long int now, struct tm_msec *result); struct tm_msec *gmtime_msec(long long int now, struct tm_msec *result); size_t strftime_msec(char *s, size_t max, const char *format, const struct tm_msec *); void xgettimeofday(struct timeval *); void xclock_gettime(clock_t, struct timespec *); int get_cpu_usage(void); long long int time_boot_msec(void); #ifdef __cplusplus } #endif #endif /* timeval.h */ openvswitch-2.0.1+git20140120/lib/token-bucket.c000066400000000000000000000053221226605124000207610ustar00rootroot00000000000000/* * Copyright (c) 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "token-bucket.h" #include "poll-loop.h" #include "sat-math.h" #include "timeval.h" #include "util.h" /* Initializes 'tb' to accumulate 'rate' tokens per millisecond, with a * maximum of 'burst' tokens. * * The token bucket is initially full. * * It may be more convenient to use TOKEN_BUCKET_INIT. */ void token_bucket_init(struct token_bucket *tb, unsigned int rate, unsigned int burst) { tb->rate = rate; tb->burst = burst; tb->tokens = 0; tb->last_fill = LLONG_MIN; } /* Changes 'tb' to accumulate 'rate' tokens per millisecond, with a maximum of * 'burst' tokens. * * 'tb' must already have been initialized with TOKEN_BUCKET_INIT or * token_bucket_init(). */ void token_bucket_set(struct token_bucket *tb, unsigned int rate, unsigned int burst) { tb->rate = rate; tb->burst = burst; if (burst > tb->tokens) { tb->tokens = burst; } } /* Attempts to remove 'n' tokens from 'tb'. Returns true if successful, false * if 'tb' contained fewer than 'n' tokens (and thus 'n' tokens could not be * removed) . */ bool token_bucket_withdraw(struct token_bucket *tb, unsigned int n) { if (tb->tokens < n) { long long int now = time_msec(); if (now > tb->last_fill) { unsigned long long int elapsed_ull = (unsigned long long int) now - tb->last_fill; unsigned int elapsed = MIN(UINT_MAX, elapsed_ull); unsigned int add = sat_mul(tb->rate, elapsed); unsigned int tokens = sat_add(tb->tokens, add); tb->tokens = MIN(tokens, tb->burst); tb->last_fill = now; } if (tb->tokens < n) { return false; } } tb->tokens -= n; return true; } /* Causes the poll loop to wake up when at least 'n' tokens will be available * for withdrawal from 'tb'. */ void token_bucket_wait(struct token_bucket *tb, unsigned int n) { if (tb->tokens >= n) { poll_immediate_wake(); } else { unsigned int need = n - tb->tokens; poll_timer_wait_until(tb->last_fill + need / tb->rate + 1); } } openvswitch-2.0.1+git20140120/lib/token-bucket.h000066400000000000000000000027201226605124000207650ustar00rootroot00000000000000/* * Copyright (c) 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef TOKEN_BUCKET_H #define TOKEN_BUCKET_H 1 #include #include struct token_bucket { /* Configuration settings. */ unsigned int rate; /* Tokens added per millisecond. */ unsigned int burst; /* Max cumulative tokens credit. */ /* Current status. */ unsigned int tokens; /* Current number of tokens. */ long long int last_fill; /* Last time tokens added. */ }; #define TOKEN_BUCKET_INIT(RATE, BURST) { RATE, BURST, 0, LLONG_MIN } void token_bucket_init(struct token_bucket *, unsigned int rate, unsigned int burst); void token_bucket_set(struct token_bucket *, unsigned int rate, unsigned int burst); bool token_bucket_withdraw(struct token_bucket *, unsigned int n); void token_bucket_wait(struct token_bucket *, unsigned int n); #endif /* token-bucket.h */ openvswitch-2.0.1+git20140120/lib/type-props.h000066400000000000000000000027051226605124000205170ustar00rootroot00000000000000/* * Copyright (c) 2008, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef TYPE_PROPS_H #define TYPE_PROPS_H 1 #include #define TYPE_IS_INTEGER(TYPE) ((TYPE) 1.5 == (TYPE) 1) #define TYPE_IS_SIGNED(TYPE) ((TYPE) 1 > (TYPE) -1) #define TYPE_VALUE_BITS(TYPE) (sizeof(TYPE) * CHAR_BIT - TYPE_IS_SIGNED(TYPE)) #define TYPE_MINIMUM(TYPE) (TYPE_IS_SIGNED(TYPE) \ ? ~(TYPE)0 << TYPE_VALUE_BITS(TYPE) \ : 0) #define TYPE_MAXIMUM(TYPE) (TYPE_IS_SIGNED(TYPE) \ ? ~(~(TYPE)0 << TYPE_VALUE_BITS(TYPE)) \ : (TYPE)-1) /* Number of decimal digits required to format an integer of the given TYPE. * Includes space for a sign, if TYPE is signed, but not for a null * terminator. * * The value is an overestimate. */ #define INT_STRLEN(TYPE) (TYPE_IS_SIGNED(TYPE) + TYPE_VALUE_BITS(TYPE) / 3 + 1) #endif /* type-props.h */ openvswitch-2.0.1+git20140120/lib/unaligned.h000066400000000000000000000174531226605124000203510ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef UNALIGNED_H #define UNALIGNED_H 1 #include #include "byte-order.h" #include "openvswitch/types.h" #include "type-props.h" #include "util.h" /* Public API. */ static inline uint16_t get_unaligned_u16(const uint16_t *); static inline uint32_t get_unaligned_u32(const uint32_t *); static inline void put_unaligned_u16(uint16_t *, uint16_t); static inline void put_unaligned_u32(uint32_t *, uint32_t); static inline void put_unaligned_u64(uint64_t *, uint64_t); static inline ovs_be16 get_unaligned_be16(const ovs_be16 *); static inline ovs_be32 get_unaligned_be32(const ovs_be32 *); static inline ovs_be64 get_unaligned_be64(const ovs_be64 *); static inline void put_unaligned_be16(ovs_be16 *, ovs_be16); static inline void put_unaligned_be32(ovs_be32 *, ovs_be32); static inline void put_unaligned_be64(ovs_be64 *, ovs_be64); #ifdef __GNUC__ /* GCC implementations. */ #define GCC_UNALIGNED_ACCESSORS(TYPE, ABBREV) \ struct unaligned_##ABBREV { \ TYPE x __attribute__((__packed__)); \ }; \ static inline struct unaligned_##ABBREV * \ unaligned_##ABBREV(const TYPE *p) \ { \ return (struct unaligned_##ABBREV *) p; \ } \ \ static inline TYPE \ get_unaligned_##ABBREV(const TYPE *p) \ { \ return unaligned_##ABBREV(p)->x; \ } \ \ static inline void \ put_unaligned_##ABBREV(TYPE *p, TYPE x) \ { \ unaligned_##ABBREV(p)->x = x; \ } GCC_UNALIGNED_ACCESSORS(uint16_t, u16); GCC_UNALIGNED_ACCESSORS(uint32_t, u32); GCC_UNALIGNED_ACCESSORS(uint64_t, u64__); /* Special case: see below. */ GCC_UNALIGNED_ACCESSORS(ovs_be16, be16); GCC_UNALIGNED_ACCESSORS(ovs_be32, be32); GCC_UNALIGNED_ACCESSORS(ovs_be64, be64); #else /* Generic implementations. */ static inline uint16_t get_unaligned_u16(const uint16_t *p_) { const uint8_t *p = (const uint8_t *) p_; return ntohs((p[0] << 8) | p[1]); } static inline void put_unaligned_u16(uint16_t *p_, uint16_t x_) { uint8_t *p = (uint8_t *) p_; uint16_t x = ntohs(x_); p[0] = x >> 8; p[1] = x; } static inline uint32_t get_unaligned_u32(const uint32_t *p_) { const uint8_t *p = (const uint8_t *) p_; return ntohl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]); } static inline void put_unaligned_u32(uint32_t *p_, uint32_t x_) { uint8_t *p = (uint8_t *) p_; uint32_t x = ntohl(x_); p[0] = x >> 24; p[1] = x >> 16; p[2] = x >> 8; p[3] = x; } static inline uint64_t get_unaligned_u64__(const uint64_t *p_) { const uint8_t *p = (const uint8_t *) p_; return ntohll(((uint64_t) p[0] << 56) | ((uint64_t) p[1] << 48) | ((uint64_t) p[2] << 40) | ((uint64_t) p[3] << 32) | (p[4] << 24) | (p[5] << 16) | (p[6] << 8) | p[7]); } static inline void put_unaligned_u64__(uint64_t *p_, uint64_t x_) { uint8_t *p = (uint8_t *) p_; uint64_t x = ntohll(x_); p[0] = x >> 56; p[1] = x >> 48; p[2] = x >> 40; p[3] = x >> 32; p[4] = x >> 24; p[5] = x >> 16; p[6] = x >> 8; p[7] = x; } /* Only sparse cares about the difference between uint_t and ovs_be, and * that takes the GCC branch, so there's no point in working too hard on these * accessors. */ #define get_unaligned_be16 get_unaligned_u16 #define get_unaligned_be32 get_unaligned_u32 #define get_unaligned_be64 get_unaligned_u64 #define put_unaligned_be16 put_unaligned_u16 #define put_unaligned_be32 put_unaligned_u32 #define put_unaligned_be64 put_unaligned_u64 #endif /* uint64_t get_unaligned_u64(uint64_t *p); * * Returns the value of the possibly misaligned uint64_t at 'p'. 'p' may * actually be any type that points to a 64-bit integer. That is, on Unix-like * 32-bit ABIs, it may point to an "unsigned long long int", and on Unix-like * 64-bit ABIs, it may point to an "unsigned long int" or an "unsigned long * long int". * * This is special-cased because on some Linux targets, the kernel __u64 is * unsigned long long int and the userspace uint64_t is unsigned long int, so * that any single function prototype would fail to accept one or the other. * * Below, "sizeof (*(P) % 1)" verifies that *P has an integer type, since * operands to % must be integers. */ #define get_unaligned_u64(P) \ (BUILD_ASSERT(sizeof *(P) == 8), \ BUILD_ASSERT_GCCONLY(!TYPE_IS_SIGNED(typeof(*(P)))), \ (void) sizeof (*(P) % 1), \ get_unaligned_u64__((const uint64_t *) (P))) /* Stores 'x' at possibly misaligned address 'p'. * * put_unaligned_u64() could be overloaded in the same way as * get_unaligned_u64(), but so far it has not proven necessary. */ static inline void put_unaligned_u64(uint64_t *p, uint64_t x) { put_unaligned_u64__(p, x); } /* Returns the value in 'x'. */ static inline uint32_t get_16aligned_u32(const ovs_16aligned_u32 *x) { return ((uint32_t) x->hi << 16) | x->lo; } /* Stores 'value' in 'x'. */ static inline void put_16aligned_u32(ovs_16aligned_u32 *x, uint32_t value) { x->hi = value >> 16; x->lo = value; } /* Returns the value in 'x'. */ static inline uint64_t get_32aligned_u64(const ovs_32aligned_u64 *x) { return ((uint64_t) x->hi << 32) | x->lo; } /* Stores 'value' in 'x'. */ static inline void put_32aligned_u64(ovs_32aligned_u64 *x, uint64_t value) { x->hi = value >> 32; x->lo = value; } #ifndef __CHECKER__ /* Returns the value of 'x'. */ static inline ovs_be32 get_16aligned_be32(const ovs_16aligned_be32 *x) { #ifdef WORDS_BIGENDIAN return ((ovs_be32) x->hi << 16) | x->lo; #else return ((ovs_be32) x->lo << 16) | x->hi; #endif } /* Stores network byte order 'value' into 'x'. */ static inline void put_16aligned_be32(ovs_16aligned_be32 *x, ovs_be32 value) { #if WORDS_BIGENDIAN x->hi = value >> 16; x->lo = value; #else x->hi = value; x->lo = value >> 16; #endif } /* Returns the value of 'x'. */ static inline ovs_be64 get_32aligned_be64(const ovs_32aligned_be64 *x) { #ifdef WORDS_BIGENDIAN return ((ovs_be64) x->hi << 32) | x->lo; #else return ((ovs_be64) x->lo << 32) | x->hi; #endif } /* Stores network byte order 'value' into 'x'. */ static inline void put_32aligned_be64(ovs_32aligned_be64 *x, ovs_be64 value) { #if WORDS_BIGENDIAN x->hi = value >> 32; x->lo = value; #else x->hi = value; x->lo = value >> 32; #endif } #else /* __CHECKER__ */ /* Making sparse happy with these functions also makes them unreadable, so * don't bother to show it their implementations. */ ovs_be32 get_16aligned_be32(const ovs_16aligned_be32 *); void put_16aligned_be32(ovs_16aligned_be32 *, ovs_be32); ovs_be64 get_32aligned_be64(const ovs_32aligned_be64 *); void put_32aligned_be64(ovs_32aligned_be64 *, ovs_be64); #endif #endif /* unaligned.h */ openvswitch-2.0.1+git20140120/lib/unicode.c000066400000000000000000000106461226605124000200210ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "unicode.h" #include #include "dynamic-string.h" #include "util.h" /* Returns the unicode code point corresponding to leading surrogate 'leading' * and trailing surrogate 'trailing'. The return value will not make any * sense if 'leading' or 'trailing' are not in the correct ranges for leading * or trailing surrogates. */ int utf16_decode_surrogate_pair(int leading, int trailing) { /* * Leading surrogate: 110110wwwwxxxxxx * Trailing surrogate: 110111xxxxxxxxxx * Code point: 000uuuuuxxxxxxxxxxxxxxxx */ int w = (leading >> 6) & 0xf; int u = w + 1; int x0 = leading & 0x3f; int x1 = trailing & 0x3ff; return (u << 16) | (x0 << 10) | x1; } /* Returns the number of Unicode characters in UTF-8 string 's'. */ size_t utf8_length(const char *s_) { const uint8_t *s; size_t length; length = 0; for (s = (const uint8_t *) s_; *s != '\0'; s++) { /* The most-significant bits of the first byte in a character are one * of 2#01, 2#00, or 2#11. 2#10 is a continuation byte. */ length += (*s & 0xc0) != 0x80; } return length; } static char * invalid_utf8_sequence(const uint8_t *s, int n, size_t *lengthp) { struct ds msg; int i; if (lengthp) { *lengthp = 0; } ds_init(&msg); ds_put_cstr(&msg, "invalid UTF-8 sequence"); for (i = 0; i < n; i++) { ds_put_format(&msg, " 0x%02"PRIx8, s[i]); } return ds_steal_cstr(&msg); } struct utf8_sequence { uint8_t octets[5][2]; }; static const struct utf8_sequence * lookup_utf8_sequence(uint8_t c) { static const struct utf8_sequence seqs[] = { { { { 0x01, 0x7f }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } } }, { { { 0xc2, 0xdf }, { 0x80, 0xbf }, { 0, 0 }, { 0, 0 }, { 0, 0 } } }, { { { 0xe0, 0xe0 }, { 0xa0, 0xbf }, { 0x80, 0xbf }, {0,0}, {0, 0 } } }, { { { 0xe1, 0xec }, { 0x80, 0xbf }, { 0x80, 0xbf }, { 0, 0 }, { 0, 0 } } }, { { { 0xed, 0xed }, { 0x80, 0x9f }, { 0x80, 0xbf }, { 0, 0 }, { 0, 0 } } }, { { { 0xee, 0xef }, { 0x80, 0xbf }, { 0x80, 0xbf }, { 0, 0 }, { 0, 0 } } }, { { { 0xf0, 0xf0 }, { 0x90, 0xbf }, { 0x80, 0xbf }, { 0x80, 0xbf }, { 0, 0 } } }, { { { 0xf1, 0xf3 }, { 0x80, 0xbf }, { 0x80, 0xbf }, { 0x80, 0xbf }, { 0, 0 } } }, { { { 0xf4, 0xf4 }, { 0x80, 0x8f }, { 0x80, 0xbf }, { 0x80, 0xbf }, { 0, 0 } } }, }; size_t i; for (i = 0; i < ARRAY_SIZE(seqs); i++) { const uint8_t *o = seqs[i].octets[0]; if (c >= o[0] && c <= o[1]) { return &seqs[i]; } } return NULL; } /* Checks that 's' is a valid, null-terminated UTF-8 string. If so, returns a * null pointer and sets '*lengthp' to the number of Unicode characters in * 's'. If not, returns an error message that the caller must free and sets * '*lengthp' to 0. * * 'lengthp' may be NULL if the length is not needed. */ char * utf8_validate(const char *s_, size_t *lengthp) { size_t length = 0; const uint8_t *s; for (s = (const uint8_t *) s_; *s != '\0'; ) { length++; if (s[0] < 0x80) { s++; } else { const struct utf8_sequence *seq; int i; seq = lookup_utf8_sequence(s[0]); if (!seq) { return invalid_utf8_sequence(s, 1, lengthp); } for (i = 1; seq->octets[i][0]; i++) { const uint8_t *o = seq->octets[i]; if (s[i] < o[0] || s[i] > o[1]) { return invalid_utf8_sequence(s, i + 1, lengthp); } } s += i; } } if (lengthp) { *lengthp = length; } return NULL; } openvswitch-2.0.1+git20140120/lib/unicode.h000066400000000000000000000030411226605124000200150ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef UNICODE_H #define UNICODE_H 1 #include #include #include "compiler.h" /* Returns true if 'c' is a Unicode code point, otherwise false. */ static inline bool uc_is_code_point(int c) { return c >= 0 && c <= 0x10ffff; } /* Returns true if 'c' is a Unicode code point for a leading surrogate. */ static inline bool uc_is_leading_surrogate(int c) { return c >= 0xd800 && c <= 0xdbff; } /* Returns true if 'c' is a Unicode code point for a trailing surrogate. */ static inline bool uc_is_trailing_surrogate(int c) { return c >= 0xdc00 && c <= 0xdfff; } /* Returns true if 'c' is a Unicode code point for a leading or trailing * surrogate. */ static inline bool uc_is_surrogate(int c) { return c >= 0xd800 && c <= 0xdfff; } int utf16_decode_surrogate_pair(int leading, int trailing); size_t utf8_length(const char *); char *utf8_validate(const char *, size_t *lengthp) WARN_UNUSED_RESULT; #endif /* unicode.h */ openvswitch-2.0.1+git20140120/lib/unixctl-syn.man000066400000000000000000000000761226605124000212150ustar00rootroot00000000000000.IP "Runtime management options:" \fB\-\-unixctl=\fIsocket\fR openvswitch-2.0.1+git20140120/lib/unixctl.c000066400000000000000000000345611226605124000200630ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "unixctl.h" #include #include #include "coverage.h" #include "dirs.h" #include "dynamic-string.h" #include "json.h" #include "jsonrpc.h" #include "list.h" #include "poll-loop.h" #include "shash.h" #include "stream.h" #include "svec.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(unixctl); COVERAGE_DEFINE(unixctl_received); COVERAGE_DEFINE(unixctl_replied); struct unixctl_command { const char *usage; int min_args, max_args; unixctl_cb_func *cb; void *aux; }; struct unixctl_conn { struct list node; struct jsonrpc *rpc; /* Only one request can be in progress at a time. While the request is * being processed, 'request_id' is populated, otherwise it is null. */ struct json *request_id; /* ID of the currently active request. */ }; /* Server for control connection. */ struct unixctl_server { struct pstream *listener; struct list conns; }; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); static struct shash commands = SHASH_INITIALIZER(&commands); static void unixctl_help(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { struct ds ds = DS_EMPTY_INITIALIZER; const struct shash_node **nodes = shash_sort(&commands); size_t i; ds_put_cstr(&ds, "The available commands are:\n"); for (i = 0; i < shash_count(&commands); i++) { const struct shash_node *node = nodes[i]; const struct unixctl_command *command = node->data; ds_put_format(&ds, " %-23s %s\n", node->name, command->usage); } free(nodes); unixctl_command_reply(conn, ds_cstr(&ds)); ds_destroy(&ds); } static void unixctl_version(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { unixctl_command_reply(conn, get_program_version()); } /* Registers a unixctl command with the given 'name'. 'usage' describes the * arguments to the command; it is used only for presentation to the user in * "help" output. * * 'cb' is called when the command is received. It is passed an array * containing the command name and arguments, plus a copy of 'aux'. Normally * 'cb' should reply by calling unixctl_command_reply() or * unixctl_command_reply_error() before it returns, but if the command cannot * be handled immediately then it can defer the reply until later. A given * connection can only process a single request at a time, so a reply must be * made eventually to avoid blocking that connection. */ void unixctl_command_register(const char *name, const char *usage, int min_args, int max_args, unixctl_cb_func *cb, void *aux) { struct unixctl_command *command; struct unixctl_command *lookup = shash_find_data(&commands, name); ovs_assert(!lookup || lookup->cb == cb); if (lookup) { return; } command = xmalloc(sizeof *command); command->usage = usage; command->min_args = min_args; command->max_args = max_args; command->cb = cb; command->aux = aux; shash_add(&commands, name, command); } static void unixctl_command_reply__(struct unixctl_conn *conn, bool success, const char *body) { struct json *body_json; struct jsonrpc_msg *reply; COVERAGE_INC(unixctl_replied); ovs_assert(conn->request_id); if (!body) { body = ""; } if (body[0] && body[strlen(body) - 1] != '\n') { body_json = json_string_create_nocopy(xasprintf("%s\n", body)); } else { body_json = json_string_create(body); } if (success) { reply = jsonrpc_create_reply(body_json, conn->request_id); } else { reply = jsonrpc_create_error(body_json, conn->request_id); } /* If jsonrpc_send() returns an error, the run loop will take care of the * problem eventually. */ jsonrpc_send(conn->rpc, reply); json_destroy(conn->request_id); conn->request_id = NULL; } /* Replies to the active unixctl connection 'conn'. 'result' is sent to the * client indicating the command was processed successfully. Only one call to * unixctl_command_reply() or unixctl_command_reply_error() may be made per * request. */ void unixctl_command_reply(struct unixctl_conn *conn, const char *result) { unixctl_command_reply__(conn, true, result); } /* Replies to the active unixctl connection 'conn'. 'error' is sent to the * client indicating an error occurred processing the command. Only one call to * unixctl_command_reply() or unixctl_command_reply_error() may be made per * request. */ void unixctl_command_reply_error(struct unixctl_conn *conn, const char *error) { unixctl_command_reply__(conn, false, error); } /* Creates a unixctl server listening on 'path', which may be: * * - NULL, in which case /..ctl is used. * * - "none", in which case the function will return successfully but * no socket will actually be created. * * - A name that does not start with '/', in which case it is put in * . * * - An absolute path (starting with '/') that gives the exact name of * the Unix domain socket to listen on. * * A program that (optionally) daemonizes itself should call this function * *after* daemonization, so that the socket name contains the pid of the * daemon instead of the pid of the program that exited. (Otherwise, * "ovs-appctl --target=" will fail.) * * Returns 0 if successful, otherwise a positive errno value. If successful, * sets '*serverp' to the new unixctl_server (or to NULL if 'path' was "none"), * otherwise to NULL. */ int unixctl_server_create(const char *path, struct unixctl_server **serverp) { struct unixctl_server *server; struct pstream *listener; char *punix_path; int error; *serverp = NULL; if (path && !strcmp(path, "none")) { return 0; } if (path) { char *abs_path = abs_file_name(ovs_rundir(), path); punix_path = xasprintf("punix:%s", abs_path); free(abs_path); } else { punix_path = xasprintf("punix:%s/%s.%ld.ctl", ovs_rundir(), program_name, (long int) getpid()); } error = pstream_open(punix_path, &listener, 0); if (error) { ovs_error(error, "could not initialize control socket %s", punix_path); goto exit; } unixctl_command_register("help", "", 0, 0, unixctl_help, NULL); unixctl_command_register("version", "", 0, 0, unixctl_version, NULL); server = xmalloc(sizeof *server); server->listener = listener; list_init(&server->conns); *serverp = server; exit: free(punix_path); return error; } static void process_command(struct unixctl_conn *conn, struct jsonrpc_msg *request) { char *error = NULL; struct unixctl_command *command; struct json_array *params; COVERAGE_INC(unixctl_received); conn->request_id = json_clone(request->id); params = json_array(request->params); command = shash_find_data(&commands, request->method); if (!command) { error = xasprintf("\"%s\" is not a valid command", request->method); } else if (params->n < command->min_args) { error = xasprintf("\"%s\" command requires at least %d arguments", request->method, command->min_args); } else if (params->n > command->max_args) { error = xasprintf("\"%s\" command takes at most %d arguments", request->method, command->max_args); } else { struct svec argv = SVEC_EMPTY_INITIALIZER; int i; svec_add(&argv, request->method); for (i = 0; i < params->n; i++) { if (params->elems[i]->type != JSON_STRING) { error = xasprintf("\"%s\" command has non-string argument", request->method); break; } svec_add(&argv, json_string(params->elems[i])); } svec_terminate(&argv); if (!error) { command->cb(conn, argv.n, (const char **) argv.names, command->aux); } svec_destroy(&argv); } if (error) { unixctl_command_reply_error(conn, error); free(error); } } static int run_connection(struct unixctl_conn *conn) { int error, i; jsonrpc_run(conn->rpc); error = jsonrpc_get_status(conn->rpc); if (error || jsonrpc_get_backlog(conn->rpc)) { return error; } for (i = 0; i < 10; i++) { struct jsonrpc_msg *msg; if (error || conn->request_id) { break; } jsonrpc_recv(conn->rpc, &msg); if (msg) { if (msg->type == JSONRPC_REQUEST) { process_command(conn, msg); } else { VLOG_WARN_RL(&rl, "%s: received unexpected %s message", jsonrpc_get_name(conn->rpc), jsonrpc_msg_type_to_string(msg->type)); error = EINVAL; } jsonrpc_msg_destroy(msg); } error = error ? error : jsonrpc_get_status(conn->rpc); } return error; } static void kill_connection(struct unixctl_conn *conn) { list_remove(&conn->node); jsonrpc_close(conn->rpc); json_destroy(conn->request_id); free(conn); } void unixctl_server_run(struct unixctl_server *server) { struct unixctl_conn *conn, *next; int i; if (!server) { return; } for (i = 0; i < 10; i++) { struct stream *stream; int error; error = pstream_accept(server->listener, &stream); if (!error) { struct unixctl_conn *conn = xzalloc(sizeof *conn); list_push_back(&server->conns, &conn->node); conn->rpc = jsonrpc_open(stream); } else if (error == EAGAIN) { break; } else { VLOG_WARN_RL(&rl, "%s: accept failed: %s", pstream_get_name(server->listener), ovs_strerror(error)); } } LIST_FOR_EACH_SAFE (conn, next, node, &server->conns) { int error = run_connection(conn); if (error && error != EAGAIN) { kill_connection(conn); } } } void unixctl_server_wait(struct unixctl_server *server) { struct unixctl_conn *conn; if (!server) { return; } pstream_wait(server->listener); LIST_FOR_EACH (conn, node, &server->conns) { jsonrpc_wait(conn->rpc); if (!jsonrpc_get_backlog(conn->rpc)) { jsonrpc_recv_wait(conn->rpc); } } } /* Destroys 'server' and stops listening for connections. */ void unixctl_server_destroy(struct unixctl_server *server) { if (server) { struct unixctl_conn *conn, *next; LIST_FOR_EACH_SAFE (conn, next, node, &server->conns) { kill_connection(conn); } pstream_close(server->listener); free(server); } } /* Connects to a unixctl server socket. 'path' should be the name of a unixctl * server socket. If it does not start with '/', it will be prefixed with the * rundir (e.g. /usr/local/var/run/openvswitch). * * Returns 0 if successful, otherwise a positive errno value. If successful, * sets '*client' to the new jsonrpc, otherwise to NULL. */ int unixctl_client_create(const char *path, struct jsonrpc **client) { char *abs_path, *unix_path; struct stream *stream; int error; *client = NULL; abs_path = abs_file_name(ovs_rundir(), path); unix_path = xasprintf("unix:%s", abs_path); error = stream_open_block(stream_open(unix_path, &stream, DSCP_DEFAULT), &stream); free(unix_path); free(abs_path); if (error) { VLOG_WARN("failed to connect to %s", path); return error; } *client = jsonrpc_open(stream); return 0; } /* Executes 'command' on the server with an argument vector 'argv' containing * 'argc' elements. If successfully communicated with the server, returns 0 * and sets '*result', or '*err' (not both) to the result or error the server * returned. Otherwise, sets '*result' and '*err' to NULL and returns a * positive errno value. The caller is responsible for freeing '*result' or * '*err' if not NULL. */ int unixctl_client_transact(struct jsonrpc *client, const char *command, int argc, char *argv[], char **result, char **err) { struct jsonrpc_msg *request, *reply; struct json **json_args, *params; int error, i; *result = NULL; *err = NULL; json_args = xmalloc(argc * sizeof *json_args); for (i = 0; i < argc; i++) { json_args[i] = json_string_create(argv[i]); } params = json_array_create(json_args, argc); request = jsonrpc_create_request(command, params, NULL); error = jsonrpc_transact_block(client, request, &reply); if (error) { VLOG_WARN("error communicating with %s: %s", jsonrpc_get_name(client), ovs_retval_to_string(error)); return error; } if (reply->error) { if (reply->error->type == JSON_STRING) { *err = xstrdup(json_string(reply->error)); } else { VLOG_WARN("%s: unexpected error type in JSON RPC reply: %s", jsonrpc_get_name(client), json_type_to_string(reply->error->type)); error = EINVAL; } } else if (reply->result) { if (reply->result->type == JSON_STRING) { *result = xstrdup(json_string(reply->result)); } else { VLOG_WARN("%s: unexpected result type in JSON rpc reply: %s", jsonrpc_get_name(client), json_type_to_string(reply->result->type)); error = EINVAL; } } jsonrpc_msg_destroy(reply); return error; } openvswitch-2.0.1+git20140120/lib/unixctl.h000066400000000000000000000035711226605124000200650ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef UNIXCTL_H #define UNIXCTL_H 1 #ifdef __cplusplus extern "C" { #endif /* Server for Unix domain socket control connection. */ struct unixctl_server; int unixctl_server_create(const char *path, struct unixctl_server **); void unixctl_server_run(struct unixctl_server *); void unixctl_server_wait(struct unixctl_server *); void unixctl_server_destroy(struct unixctl_server *); /* Client for Unix domain socket control connection. */ struct jsonrpc; int unixctl_client_create(const char *path, struct jsonrpc **client); int unixctl_client_transact(struct jsonrpc *client, const char *command, int argc, char *argv[], char **result, char **error); /* Command registration. */ struct unixctl_conn; typedef void unixctl_cb_func(struct unixctl_conn *, int argc, const char *argv[], void *aux); void unixctl_command_register(const char *name, const char *usage, int min_args, int max_args, unixctl_cb_func *cb, void *aux); void unixctl_command_reply_error(struct unixctl_conn *, const char *error); void unixctl_command_reply(struct unixctl_conn *, const char *body); #ifdef __cplusplus } #endif #endif /* unixctl.h */ openvswitch-2.0.1+git20140120/lib/unixctl.man000066400000000000000000000007571226605124000204140ustar00rootroot00000000000000.IP "\fB\-\-unixctl=\fIsocket\fR" Sets the name of the control socket on which \fB\*(PN\fR listens for runtime management commands (see \fBRUNTIME MANAGEMENT COMMANDS\fR, below). If \fIsocket\fR does not begin with \fB/\fR, it is interpreted as relative to \fB@RUNDIR@\fR. If \fB\-\-unixctl\fR is not used at all, the default socket is \fB@RUNDIR@/\*(PN.\fIpid\fB.ctl\fR, where \fIpid\fR is \fB\*(PN\fR's process ID. Specifying \fBnone\fR for \fIsocket\fR disables the control socket feature. openvswitch-2.0.1+git20140120/lib/util.c000066400000000000000000000773551226605124000173620ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "util.h" #include #include #include #include #include #include #include #include #include #include #include "byte-order.h" #include "coverage.h" #include "ovs-thread.h" #include "vlog.h" #ifdef HAVE_PTHREAD_SET_NAME_NP #include #endif VLOG_DEFINE_THIS_MODULE(util); COVERAGE_DEFINE(util_xalloc); /* argv[0] without directory names. */ const char *program_name; /* Name for the currently running thread or process, for log messages, process * listings, and debuggers. */ DEFINE_PER_THREAD_MALLOCED_DATA(char *, subprogram_name); /* --version option output. */ static char *program_version; /* Buffer used by ovs_strerror(). */ DEFINE_STATIC_PER_THREAD_DATA(struct { char s[128]; }, strerror_buffer, { "" }); void ovs_assert_failure(const char *where, const char *function, const char *condition) { /* Prevent an infinite loop (or stack overflow) in case VLOG_ABORT happens * to trigger an assertion failure of its own. */ static int reentry = 0; switch (reentry++) { case 0: VLOG_ABORT("%s: assertion %s failed in %s()", where, condition, function); NOT_REACHED(); case 1: fprintf(stderr, "%s: assertion %s failed in %s()", where, condition, function); abort(); default: abort(); } } void out_of_memory(void) { ovs_abort(0, "virtual memory exhausted"); } void * xcalloc(size_t count, size_t size) { void *p = count && size ? calloc(count, size) : malloc(1); COVERAGE_INC(util_xalloc); if (p == NULL) { out_of_memory(); } return p; } void * xzalloc(size_t size) { return xcalloc(1, size); } void * xmalloc(size_t size) { void *p = malloc(size ? size : 1); COVERAGE_INC(util_xalloc); if (p == NULL) { out_of_memory(); } return p; } void * xrealloc(void *p, size_t size) { p = realloc(p, size ? size : 1); COVERAGE_INC(util_xalloc); if (p == NULL) { out_of_memory(); } return p; } void * xmemdup(const void *p_, size_t size) { void *p = xmalloc(size); memcpy(p, p_, size); return p; } char * xmemdup0(const char *p_, size_t length) { char *p = xmalloc(length + 1); memcpy(p, p_, length); p[length] = '\0'; return p; } char * xstrdup(const char *s) { return xmemdup0(s, strlen(s)); } char * xvasprintf(const char *format, va_list args) { va_list args2; size_t needed; char *s; va_copy(args2, args); needed = vsnprintf(NULL, 0, format, args); s = xmalloc(needed + 1); vsnprintf(s, needed + 1, format, args2); va_end(args2); return s; } void * x2nrealloc(void *p, size_t *n, size_t s) { *n = *n == 0 ? 1 : 2 * *n; return xrealloc(p, *n * s); } char * xasprintf(const char *format, ...) { va_list args; char *s; va_start(args, format); s = xvasprintf(format, args); va_end(args); return s; } /* Similar to strlcpy() from OpenBSD, but it never reads more than 'size - 1' * bytes from 'src' and doesn't return anything. */ void ovs_strlcpy(char *dst, const char *src, size_t size) { if (size > 0) { size_t len = strnlen(src, size - 1); memcpy(dst, src, len); dst[len] = '\0'; } } /* Copies 'src' to 'dst'. Reads no more than 'size - 1' bytes from 'src'. * Always null-terminates 'dst' (if 'size' is nonzero), and writes a zero byte * to every otherwise unused byte in 'dst'. * * Except for performance, the following call: * ovs_strzcpy(dst, src, size); * is equivalent to these two calls: * memset(dst, '\0', size); * ovs_strlcpy(dst, src, size); * * (Thus, ovs_strzcpy() is similar to strncpy() without some of the pitfalls.) */ void ovs_strzcpy(char *dst, const char *src, size_t size) { if (size > 0) { size_t len = strnlen(src, size - 1); memcpy(dst, src, len); memset(dst + len, '\0', size - len); } } /* Prints 'format' on stderr, formatting it like printf() does. If 'err_no' is * nonzero, then it is formatted with ovs_retval_to_string() and appended to * the message inside parentheses. Then, terminates with abort(). * * This function is preferred to ovs_fatal() in a situation where it would make * sense for a monitoring process to restart the daemon. * * 'format' should not end with a new-line, because this function will add one * itself. */ void ovs_abort(int err_no, const char *format, ...) { va_list args; va_start(args, format); ovs_abort_valist(err_no, format, args); } /* Same as ovs_abort() except that the arguments are supplied as a va_list. */ void ovs_abort_valist(int err_no, const char *format, va_list args) { ovs_error_valist(err_no, format, args); abort(); } /* Prints 'format' on stderr, formatting it like printf() does. If 'err_no' is * nonzero, then it is formatted with ovs_retval_to_string() and appended to * the message inside parentheses. Then, terminates with EXIT_FAILURE. * * 'format' should not end with a new-line, because this function will add one * itself. */ void ovs_fatal(int err_no, const char *format, ...) { va_list args; va_start(args, format); ovs_fatal_valist(err_no, format, args); } /* Same as ovs_fatal() except that the arguments are supplied as a va_list. */ void ovs_fatal_valist(int err_no, const char *format, va_list args) { ovs_error_valist(err_no, format, args); exit(EXIT_FAILURE); } /* Prints 'format' on stderr, formatting it like printf() does. If 'err_no' is * nonzero, then it is formatted with ovs_retval_to_string() and appended to * the message inside parentheses. * * 'format' should not end with a new-line, because this function will add one * itself. */ void ovs_error(int err_no, const char *format, ...) { va_list args; va_start(args, format); ovs_error_valist(err_no, format, args); va_end(args); } /* Same as ovs_error() except that the arguments are supplied as a va_list. */ void ovs_error_valist(int err_no, const char *format, va_list args) { const char *subprogram_name = get_subprogram_name(); int save_errno = errno; if (subprogram_name[0]) { fprintf(stderr, "%s(%s): ", program_name, subprogram_name); } else { fprintf(stderr, "%s: ", program_name); } vfprintf(stderr, format, args); if (err_no != 0) { fprintf(stderr, " (%s)", ovs_retval_to_string(err_no)); } putc('\n', stderr); errno = save_errno; } /* Many OVS functions return an int which is one of: * - 0: no error yet * - >0: errno value * - EOF: end of file (not necessarily an error; depends on the function called) * * Returns the appropriate human-readable string. The caller must copy the * string if it wants to hold onto it, as the storage may be overwritten on * subsequent function calls. */ const char * ovs_retval_to_string(int retval) { return (!retval ? "" : retval == EOF ? "End of file" : ovs_strerror(retval)); } const char * ovs_strerror(int error) { enum { BUFSIZE = sizeof strerror_buffer_get()->s }; int save_errno; char *buffer; char *s; save_errno = errno; buffer = strerror_buffer_get()->s; #if STRERROR_R_CHAR_P /* GNU style strerror_r() might return an immutable static string, or it * might write and return 'buffer', but in either case we can pass the * returned string directly to the caller. */ s = strerror_r(error, buffer, BUFSIZE); #else /* strerror_r() returns an int. */ s = buffer; if (strerror_r(error, buffer, BUFSIZE)) { /* strerror_r() is only allowed to fail on ERANGE (because the buffer * is too short). We don't check the actual failure reason because * POSIX requires strerror_r() to return the error but old glibc * (before 2.13) returns -1 and sets errno. */ snprintf(buffer, BUFSIZE, "Unknown error %d", error); } #endif errno = save_errno; return s; } /* Sets global "program_name" and "program_version" variables. Should * be called at the beginning of main() with "argv[0]" as the argument * to 'argv0'. * * 'version' should contain the version of the caller's program. If 'version' * is the same as the VERSION #define, the caller is assumed to be part of Open * vSwitch. Otherwise, it is assumed to be an external program linking against * the Open vSwitch libraries. * * The 'date' and 'time' arguments should likely be called with * "__DATE__" and "__TIME__" to use the time the binary was built. * Alternatively, the "set_program_name" macro may be called to do this * automatically. */ void set_program_name__(const char *argv0, const char *version, const char *date, const char *time) { const char *slash = strrchr(argv0, '/'); assert_single_threaded(); program_name = slash ? slash + 1 : argv0; free(program_version); if (!strcmp(version, VERSION)) { program_version = xasprintf("%s (Open vSwitch) "VERSION"\n" "Compiled %s %s\n", program_name, date, time); } else { program_version = xasprintf("%s %s\n" "Open vSwitch Library "VERSION"\n" "Compiled %s %s\n", program_name, version, date, time); } } /* Returns the name of the currently running thread or process. */ const char * get_subprogram_name(void) { const char *name = subprogram_name_get(); return name ? name : ""; } /* Sets 'name' as the name of the currently running thread or process. (This * appears in log messages and may also be visible in system process listings * and debuggers.) */ void set_subprogram_name(const char *name) { const char *pname = name[0] ? name : program_name; free(subprogram_name_set(xstrdup(name))); #if HAVE_GLIBC_PTHREAD_SETNAME_NP pthread_setname_np(pthread_self(), pname); #elif HAVE_NETBSD_PTHREAD_SETNAME_NP pthread_setname_np(pthread_self(), "%s", pname); #elif HAVE_PTHREAD_SET_NAME_NP pthread_set_name_np(pthread_self(), pname); #endif } /* Returns a pointer to a string describing the program version. The * caller must not modify or free the returned string. */ const char * get_program_version(void) { return program_version; } /* Print the version information for the program. */ void ovs_print_version(uint8_t min_ofp, uint8_t max_ofp) { printf("%s", program_version); if (min_ofp || max_ofp) { printf("OpenFlow versions %#x:%#x\n", min_ofp, max_ofp); } } /* Writes the 'size' bytes in 'buf' to 'stream' as hex bytes arranged 16 per * line. Numeric offsets are also included, starting at 'ofs' for the first * byte in 'buf'. If 'ascii' is true then the corresponding ASCII characters * are also rendered alongside. */ void ovs_hex_dump(FILE *stream, const void *buf_, size_t size, uintptr_t ofs, bool ascii) { const uint8_t *buf = buf_; const size_t per_line = 16; /* Maximum bytes per line. */ while (size > 0) { size_t start, end, n; size_t i; /* Number of bytes on this line. */ start = ofs % per_line; end = per_line; if (end - start > size) end = start + size; n = end - start; /* Print line. */ fprintf(stream, "%08jx ", (uintmax_t) ROUND_DOWN(ofs, per_line)); for (i = 0; i < start; i++) fprintf(stream, " "); for (; i < end; i++) fprintf(stream, "%02hhx%c", buf[i - start], i == per_line / 2 - 1? '-' : ' '); if (ascii) { for (; i < per_line; i++) fprintf(stream, " "); fprintf(stream, "|"); for (i = 0; i < start; i++) fprintf(stream, " "); for (; i < end; i++) { int c = buf[i - start]; putc(c >= 32 && c < 127 ? c : '.', stream); } for (; i < per_line; i++) fprintf(stream, " "); fprintf(stream, "|"); } fprintf(stream, "\n"); ofs += n; buf += n; size -= n; } } bool str_to_int(const char *s, int base, int *i) { long long ll; bool ok = str_to_llong(s, base, &ll); *i = ll; return ok; } bool str_to_long(const char *s, int base, long *li) { long long ll; bool ok = str_to_llong(s, base, &ll); *li = ll; return ok; } bool str_to_llong(const char *s, int base, long long *x) { int save_errno = errno; char *tail; errno = 0; *x = strtoll(s, &tail, base); if (errno == EINVAL || errno == ERANGE || tail == s || *tail != '\0') { errno = save_errno; *x = 0; return false; } else { errno = save_errno; return true; } } bool str_to_uint(const char *s, int base, unsigned int *u) { return str_to_int(s, base, (int *) u); } bool str_to_ulong(const char *s, int base, unsigned long *ul) { return str_to_long(s, base, (long *) ul); } bool str_to_ullong(const char *s, int base, unsigned long long *ull) { return str_to_llong(s, base, (long long *) ull); } /* Converts floating-point string 's' into a double. If successful, stores * the double in '*d' and returns true; on failure, stores 0 in '*d' and * returns false. * * Underflow (e.g. "1e-9999") is not considered an error, but overflow * (e.g. "1e9999)" is. */ bool str_to_double(const char *s, double *d) { int save_errno = errno; char *tail; errno = 0; *d = strtod(s, &tail); if (errno == EINVAL || (errno == ERANGE && *d != 0) || tail == s || *tail != '\0') { errno = save_errno; *d = 0; return false; } else { errno = save_errno; return true; } } /* Returns the value of 'c' as a hexadecimal digit. */ int hexit_value(int c) { switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return c - '0'; case 'a': case 'A': return 0xa; case 'b': case 'B': return 0xb; case 'c': case 'C': return 0xc; case 'd': case 'D': return 0xd; case 'e': case 'E': return 0xe; case 'f': case 'F': return 0xf; default: return -1; } } /* Returns the integer value of the 'n' hexadecimal digits starting at 's', or * UINT_MAX if one of those "digits" is not really a hex digit. If 'ok' is * nonnull, '*ok' is set to true if the conversion succeeds or to false if a * non-hex digit is detected. */ unsigned int hexits_value(const char *s, size_t n, bool *ok) { unsigned int value; size_t i; value = 0; for (i = 0; i < n; i++) { int hexit = hexit_value(s[i]); if (hexit < 0) { if (ok) { *ok = false; } return UINT_MAX; } value = (value << 4) + hexit; } if (ok) { *ok = true; } return value; } /* Returns the current working directory as a malloc()'d string, or a null * pointer if the current working directory cannot be determined. */ char * get_cwd(void) { long int path_max; size_t size; /* Get maximum path length or at least a reasonable estimate. */ path_max = pathconf(".", _PC_PATH_MAX); size = (path_max < 0 ? 1024 : path_max > 10240 ? 10240 : path_max); /* Get current working directory. */ for (;;) { char *buf = xmalloc(size); if (getcwd(buf, size)) { return xrealloc(buf, strlen(buf) + 1); } else { int error = errno; free(buf); if (error != ERANGE) { VLOG_WARN("getcwd failed (%s)", ovs_strerror(error)); return NULL; } size *= 2; } } } static char * all_slashes_name(const char *s) { return xstrdup(s[0] == '/' && s[1] == '/' && s[2] != '/' ? "//" : s[0] == '/' ? "/" : "."); } /* Returns the directory name portion of 'file_name' as a malloc()'d string, * similar to the POSIX dirname() function but thread-safe. */ char * dir_name(const char *file_name) { size_t len = strlen(file_name); while (len > 0 && file_name[len - 1] == '/') { len--; } while (len > 0 && file_name[len - 1] != '/') { len--; } while (len > 0 && file_name[len - 1] == '/') { len--; } return len ? xmemdup0(file_name, len) : all_slashes_name(file_name); } /* Returns the file name portion of 'file_name' as a malloc()'d string, * similar to the POSIX basename() function but thread-safe. */ char * base_name(const char *file_name) { size_t end, start; end = strlen(file_name); while (end > 0 && file_name[end - 1] == '/') { end--; } if (!end) { return all_slashes_name(file_name); } start = end; while (start > 0 && file_name[start - 1] != '/') { start--; } return xmemdup0(file_name + start, end - start); } /* If 'file_name' starts with '/', returns a copy of 'file_name'. Otherwise, * returns an absolute path to 'file_name' considering it relative to 'dir', * which itself must be absolute. 'dir' may be null or the empty string, in * which case the current working directory is used. * * Returns a null pointer if 'dir' is null and getcwd() fails. */ char * abs_file_name(const char *dir, const char *file_name) { if (file_name[0] == '/') { return xstrdup(file_name); } else if (dir && dir[0]) { char *separator = dir[strlen(dir) - 1] == '/' ? "" : "/"; return xasprintf("%s%s%s", dir, separator, file_name); } else { char *cwd = get_cwd(); if (cwd) { char *abs_name = xasprintf("%s/%s", cwd, file_name); free(cwd); return abs_name; } else { return NULL; } } } /* Like readlink(), but returns the link name as a null-terminated string in * allocated memory that the caller must eventually free (with free()). * Returns NULL on error, in which case errno is set appropriately. */ char * xreadlink(const char *filename) { size_t size; for (size = 64; ; size *= 2) { char *buf = xmalloc(size); ssize_t retval = readlink(filename, buf, size); int error = errno; if (retval >= 0 && retval < size) { buf[retval] = '\0'; return buf; } free(buf); if (retval < 0) { errno = error; return NULL; } } } /* Returns a version of 'filename' with symlinks in the final component * dereferenced. This differs from realpath() in that: * * - 'filename' need not exist. * * - If 'filename' does exist as a symlink, its referent need not exist. * * - Only symlinks in the final component of 'filename' are dereferenced. * * The caller must eventually free the returned string (with free()). */ char * follow_symlinks(const char *filename) { struct stat s; char *fn; int i; fn = xstrdup(filename); for (i = 0; i < 10; i++) { char *linkname; char *next_fn; if (lstat(fn, &s) != 0 || !S_ISLNK(s.st_mode)) { return fn; } linkname = xreadlink(fn); if (!linkname) { VLOG_WARN("%s: readlink failed (%s)", filename, ovs_strerror(errno)); return fn; } if (linkname[0] == '/') { /* Target of symlink is absolute so use it raw. */ next_fn = linkname; } else { /* Target of symlink is relative so add to 'fn''s directory. */ char *dir = dir_name(fn); if (!strcmp(dir, ".")) { next_fn = linkname; } else { char *separator = dir[strlen(dir) - 1] == '/' ? "" : "/"; next_fn = xasprintf("%s%s%s", dir, separator, linkname); free(linkname); } free(dir); } free(fn); fn = next_fn; } VLOG_WARN("%s: too many levels of symlinks", filename); free(fn); return xstrdup(filename); } /* Pass a value to this function if it is marked with * __attribute__((warn_unused_result)) and you genuinely want to ignore * its return value. (Note that every scalar type can be implicitly * converted to bool.) */ void ignore(bool x OVS_UNUSED) { } /* Returns an appropriate delimiter for inserting just before the 0-based item * 'index' in a list that has 'total' items in it. */ const char * english_list_delimiter(size_t index, size_t total) { return (index == 0 ? "" : index < total - 1 ? ", " : total > 2 ? ", and " : " and "); } /* Given a 32 bit word 'n', calculates floor(log_2('n')). This is equivalent * to finding the bit position of the most significant one bit in 'n'. It is * an error to call this function with 'n' == 0. */ int log_2_floor(uint32_t n) { ovs_assert(n); #if !defined(UINT_MAX) || !defined(UINT32_MAX) #error "Someone screwed up the #includes." #elif __GNUC__ >= 4 && UINT_MAX == UINT32_MAX return 31 - __builtin_clz(n); #else { int log = 0; #define BIN_SEARCH_STEP(BITS) \ if (n >= (1 << BITS)) { \ log += BITS; \ n >>= BITS; \ } BIN_SEARCH_STEP(16); BIN_SEARCH_STEP(8); BIN_SEARCH_STEP(4); BIN_SEARCH_STEP(2); BIN_SEARCH_STEP(1); #undef BIN_SEARCH_STEP return log; } #endif } /* Given a 32 bit word 'n', calculates ceil(log_2('n')). It is an error to * call this function with 'n' == 0. */ int log_2_ceil(uint32_t n) { return log_2_floor(n) + !is_pow2(n); } /* Returns the number of trailing 0-bits in 'n'. Undefined if 'n' == 0. */ #if !defined(UINT_MAX) || !defined(UINT32_MAX) #error "Someone screwed up the #includes." #elif __GNUC__ >= 4 && UINT_MAX == UINT32_MAX /* Defined inline in util.h. */ #else static int raw_ctz(uint32_t n) { unsigned int k; int count = 31; #define CTZ_STEP(X) \ k = n << (X); \ if (k) { \ count -= X; \ n = k; \ } CTZ_STEP(16); CTZ_STEP(8); CTZ_STEP(4); CTZ_STEP(2); CTZ_STEP(1); #undef CTZ_STEP return count; } #endif /* Returns the number of 1-bits in 'x', between 0 and 32 inclusive. */ unsigned int popcount(uint32_t x) { /* In my testing, this implementation is over twice as fast as any other * portable implementation that I tried, including GCC 4.4 * __builtin_popcount(), although nonportable asm("popcnt") was over 50% * faster. */ #define INIT1(X) \ ((((X) & (1 << 0)) != 0) + \ (((X) & (1 << 1)) != 0) + \ (((X) & (1 << 2)) != 0) + \ (((X) & (1 << 3)) != 0) + \ (((X) & (1 << 4)) != 0) + \ (((X) & (1 << 5)) != 0) + \ (((X) & (1 << 6)) != 0) + \ (((X) & (1 << 7)) != 0)) #define INIT2(X) INIT1(X), INIT1((X) + 1) #define INIT4(X) INIT2(X), INIT2((X) + 2) #define INIT8(X) INIT4(X), INIT4((X) + 4) #define INIT16(X) INIT8(X), INIT8((X) + 8) #define INIT32(X) INIT16(X), INIT16((X) + 16) #define INIT64(X) INIT32(X), INIT32((X) + 32) static const uint8_t popcount8[256] = { INIT64(0), INIT64(64), INIT64(128), INIT64(192) }; return (popcount8[x & 0xff] + popcount8[(x >> 8) & 0xff] + popcount8[(x >> 16) & 0xff] + popcount8[x >> 24]); } /* Returns true if the 'n' bytes starting at 'p' are zeros. */ bool is_all_zeros(const uint8_t *p, size_t n) { size_t i; for (i = 0; i < n; i++) { if (p[i] != 0x00) { return false; } } return true; } /* Returns true if the 'n' bytes starting at 'p' are 0xff. */ bool is_all_ones(const uint8_t *p, size_t n) { size_t i; for (i = 0; i < n; i++) { if (p[i] != 0xff) { return false; } } return true; } /* Copies 'n_bits' bits starting from bit 'src_ofs' in 'src' to the 'n_bits' * starting from bit 'dst_ofs' in 'dst'. 'src' is 'src_len' bytes long and * 'dst' is 'dst_len' bytes long. * * If you consider all of 'src' to be a single unsigned integer in network byte * order, then bit N is the bit with value 2**N. That is, bit 0 is the bit * with value 1 in src[src_len - 1], bit 1 is the bit with value 2, bit 2 is * the bit with value 4, ..., bit 8 is the bit with value 1 in src[src_len - * 2], and so on. Similarly for 'dst'. * * Required invariants: * src_ofs + n_bits <= src_len * 8 * dst_ofs + n_bits <= dst_len * 8 * 'src' and 'dst' must not overlap. */ void bitwise_copy(const void *src_, unsigned int src_len, unsigned int src_ofs, void *dst_, unsigned int dst_len, unsigned int dst_ofs, unsigned int n_bits) { const uint8_t *src = src_; uint8_t *dst = dst_; src += src_len - (src_ofs / 8 + 1); src_ofs %= 8; dst += dst_len - (dst_ofs / 8 + 1); dst_ofs %= 8; if (src_ofs == 0 && dst_ofs == 0) { unsigned int n_bytes = n_bits / 8; if (n_bytes) { dst -= n_bytes - 1; src -= n_bytes - 1; memcpy(dst, src, n_bytes); n_bits %= 8; src--; dst--; } if (n_bits) { uint8_t mask = (1 << n_bits) - 1; *dst = (*dst & ~mask) | (*src & mask); } } else { while (n_bits > 0) { unsigned int max_copy = 8 - MAX(src_ofs, dst_ofs); unsigned int chunk = MIN(n_bits, max_copy); uint8_t mask = ((1 << chunk) - 1) << dst_ofs; *dst &= ~mask; *dst |= ((*src >> src_ofs) << dst_ofs) & mask; src_ofs += chunk; if (src_ofs == 8) { src--; src_ofs = 0; } dst_ofs += chunk; if (dst_ofs == 8) { dst--; dst_ofs = 0; } n_bits -= chunk; } } } /* Zeros the 'n_bits' bits starting from bit 'dst_ofs' in 'dst'. 'dst' is * 'dst_len' bytes long. * * If you consider all of 'dst' to be a single unsigned integer in network byte * order, then bit N is the bit with value 2**N. That is, bit 0 is the bit * with value 1 in dst[dst_len - 1], bit 1 is the bit with value 2, bit 2 is * the bit with value 4, ..., bit 8 is the bit with value 1 in dst[dst_len - * 2], and so on. * * Required invariant: * dst_ofs + n_bits <= dst_len * 8 */ void bitwise_zero(void *dst_, unsigned int dst_len, unsigned dst_ofs, unsigned int n_bits) { uint8_t *dst = dst_; if (!n_bits) { return; } dst += dst_len - (dst_ofs / 8 + 1); dst_ofs %= 8; if (dst_ofs) { unsigned int chunk = MIN(n_bits, 8 - dst_ofs); *dst &= ~(((1 << chunk) - 1) << dst_ofs); n_bits -= chunk; if (!n_bits) { return; } dst--; } while (n_bits >= 8) { *dst-- = 0; n_bits -= 8; } if (n_bits) { *dst &= ~((1 << n_bits) - 1); } } /* Sets to 1 all of the 'n_bits' bits starting from bit 'dst_ofs' in 'dst'. * 'dst' is 'dst_len' bytes long. * * If you consider all of 'dst' to be a single unsigned integer in network byte * order, then bit N is the bit with value 2**N. That is, bit 0 is the bit * with value 1 in dst[dst_len - 1], bit 1 is the bit with value 2, bit 2 is * the bit with value 4, ..., bit 8 is the bit with value 1 in dst[dst_len - * 2], and so on. * * Required invariant: * dst_ofs + n_bits <= dst_len * 8 */ void bitwise_one(void *dst_, unsigned int dst_len, unsigned dst_ofs, unsigned int n_bits) { uint8_t *dst = dst_; if (!n_bits) { return; } dst += dst_len - (dst_ofs / 8 + 1); dst_ofs %= 8; if (dst_ofs) { unsigned int chunk = MIN(n_bits, 8 - dst_ofs); *dst |= ((1 << chunk) - 1) << dst_ofs; n_bits -= chunk; if (!n_bits) { return; } dst--; } while (n_bits >= 8) { *dst-- = 0xff; n_bits -= 8; } if (n_bits) { *dst |= (1 << n_bits) - 1; } } /* Scans the 'n_bits' bits starting from bit 'dst_ofs' in 'dst' for 1-bits. * Returns false if any 1-bits are found, otherwise true. 'dst' is 'dst_len' * bytes long. * * If you consider all of 'dst' to be a single unsigned integer in network byte * order, then bit N is the bit with value 2**N. That is, bit 0 is the bit * with value 1 in dst[dst_len - 1], bit 1 is the bit with value 2, bit 2 is * the bit with value 4, ..., bit 8 is the bit with value 1 in dst[dst_len - * 2], and so on. * * Required invariant: * dst_ofs + n_bits <= dst_len * 8 */ bool bitwise_is_all_zeros(const void *p_, unsigned int len, unsigned int ofs, unsigned int n_bits) { const uint8_t *p = p_; if (!n_bits) { return true; } p += len - (ofs / 8 + 1); ofs %= 8; if (ofs) { unsigned int chunk = MIN(n_bits, 8 - ofs); if (*p & (((1 << chunk) - 1) << ofs)) { return false; } n_bits -= chunk; if (!n_bits) { return true; } p--; } while (n_bits >= 8) { if (*p) { return false; } n_bits -= 8; p--; } if (n_bits && *p & ((1 << n_bits) - 1)) { return false; } return true; } /* Copies the 'n_bits' low-order bits of 'value' into the 'n_bits' bits * starting at bit 'dst_ofs' in 'dst', which is 'dst_len' bytes long. * * If you consider all of 'dst' to be a single unsigned integer in network byte * order, then bit N is the bit with value 2**N. That is, bit 0 is the bit * with value 1 in dst[dst_len - 1], bit 1 is the bit with value 2, bit 2 is * the bit with value 4, ..., bit 8 is the bit with value 1 in dst[dst_len - * 2], and so on. * * Required invariants: * dst_ofs + n_bits <= dst_len * 8 * n_bits <= 64 */ void bitwise_put(uint64_t value, void *dst, unsigned int dst_len, unsigned int dst_ofs, unsigned int n_bits) { ovs_be64 n_value = htonll(value); bitwise_copy(&n_value, sizeof n_value, 0, dst, dst_len, dst_ofs, n_bits); } /* Returns the value of the 'n_bits' bits starting at bit 'src_ofs' in 'src', * which is 'src_len' bytes long. * * If you consider all of 'src' to be a single unsigned integer in network byte * order, then bit N is the bit with value 2**N. That is, bit 0 is the bit * with value 1 in src[src_len - 1], bit 1 is the bit with value 2, bit 2 is * the bit with value 4, ..., bit 8 is the bit with value 1 in src[src_len - * 2], and so on. * * Required invariants: * src_ofs + n_bits <= src_len * 8 * n_bits <= 64 */ uint64_t bitwise_get(const void *src, unsigned int src_len, unsigned int src_ofs, unsigned int n_bits) { ovs_be64 value = htonll(0); bitwise_copy(src, src_len, src_ofs, &value, sizeof value, 0, n_bits); return ntohll(value); } openvswitch-2.0.1+git20140120/lib/util.h000066400000000000000000000277151226605124000173620ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef UTIL_H #define UTIL_H 1 #include #include #include #include #include #include #include #include #include "compiler.h" #include "openvswitch/types.h" #ifndef va_copy #ifdef __va_copy #define va_copy __va_copy #else #define va_copy(dst, src) ((dst) = (src)) #endif #endif #ifdef __CHECKER__ #define BUILD_ASSERT(EXPR) ((void) 0) #define BUILD_ASSERT_DECL(EXPR) extern int (*build_assert(void))[1] #elif !defined(__cplusplus) /* Build-time assertion building block. */ #define BUILD_ASSERT__(EXPR) \ sizeof(struct { unsigned int build_assert_failed : (EXPR) ? 1 : -1; }) /* Build-time assertion for use in a statement context. */ #define BUILD_ASSERT(EXPR) (void) BUILD_ASSERT__(EXPR) /* Build-time assertion for use in a declaration context. */ #define BUILD_ASSERT_DECL(EXPR) \ extern int (*build_assert(void))[BUILD_ASSERT__(EXPR)] #else /* __cplusplus */ #include #define BUILD_ASSERT BOOST_STATIC_ASSERT #define BUILD_ASSERT_DECL BOOST_STATIC_ASSERT #endif /* __cplusplus */ #ifdef __GNUC__ #define BUILD_ASSERT_GCCONLY(EXPR) BUILD_ASSERT(EXPR) #define BUILD_ASSERT_DECL_GCCONLY(EXPR) BUILD_ASSERT_DECL(EXPR) #else #define BUILD_ASSERT_GCCONLY(EXPR) ((void) 0) #define BUILD_ASSERT_DECL_GCCONLY(EXPR) ((void) 0) #endif /* Like the standard assert macro, except: * * - Writes the failure message to the log. * * - Not affected by NDEBUG. */ #define ovs_assert(CONDITION) \ if (!OVS_LIKELY(CONDITION)) { \ ovs_assert_failure(SOURCE_LOCATOR, __func__, #CONDITION); \ } void ovs_assert_failure(const char *, const char *, const char *) NO_RETURN; /* Casts 'pointer' to 'type' and issues a compiler warning if the cast changes * anything other than an outermost "const" or "volatile" qualifier. * * The cast to int is present only to suppress an "expression using sizeof * bool" warning from "sparse" (see * http://permalink.gmane.org/gmane.comp.parsers.sparse/2967). */ #define CONST_CAST(TYPE, POINTER) \ ((void) sizeof ((int) ((POINTER) == (TYPE) (POINTER))), \ (TYPE) (POINTER)) extern const char *program_name; /* Returns the number of elements in ARRAY. */ #define ARRAY_SIZE(ARRAY) (sizeof ARRAY / sizeof *ARRAY) /* Returns X / Y, rounding up. X must be nonnegative to round correctly. */ #define DIV_ROUND_UP(X, Y) (((X) + ((Y) - 1)) / (Y)) /* Returns X rounded up to the nearest multiple of Y. */ #define ROUND_UP(X, Y) (DIV_ROUND_UP(X, Y) * (Y)) /* Returns X rounded down to the nearest multiple of Y. */ #define ROUND_DOWN(X, Y) ((X) / (Y) * (Y)) /* Returns true if X is a power of 2, otherwise false. */ #define IS_POW2(X) ((X) && !((X) & ((X) - 1))) static inline bool is_pow2(uintmax_t x) { return IS_POW2(x); } /* Returns X rounded up to a power of 2. X must be a constant expression. */ #define ROUND_UP_POW2(X) RUP2__(X) #define RUP2__(X) (RUP2_1(X) + 1) #define RUP2_1(X) (RUP2_2(X) | (RUP2_2(X) >> 16)) #define RUP2_2(X) (RUP2_3(X) | (RUP2_3(X) >> 8)) #define RUP2_3(X) (RUP2_4(X) | (RUP2_4(X) >> 4)) #define RUP2_4(X) (RUP2_5(X) | (RUP2_5(X) >> 2)) #define RUP2_5(X) (RUP2_6(X) | (RUP2_6(X) >> 1)) #define RUP2_6(X) ((X) - 1) /* Returns X rounded down to a power of 2. X must be a constant expression. */ #define ROUND_DOWN_POW2(X) RDP2__(X) #define RDP2__(X) (RDP2_1(X) - (RDP2_1(X) >> 1)) #define RDP2_1(X) (RDP2_2(X) | (RDP2_2(X) >> 16)) #define RDP2_2(X) (RDP2_3(X) | (RDP2_3(X) >> 8)) #define RDP2_3(X) (RDP2_4(X) | (RDP2_4(X) >> 4)) #define RDP2_4(X) (RDP2_5(X) | (RDP2_5(X) >> 2)) #define RDP2_5(X) ( (X) | ( (X) >> 1)) #ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) #endif #ifndef MAX #define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) #endif #define NOT_REACHED() abort() /* Expands to a string that looks like ":", e.g. "tmp.c:10". * * See http://c-faq.com/ansi/stringize.html for an explanation of STRINGIZE and * STRINGIZE2. */ #define SOURCE_LOCATOR __FILE__ ":" STRINGIZE(__LINE__) #define STRINGIZE(ARG) STRINGIZE2(ARG) #define STRINGIZE2(ARG) #ARG /* Given a pointer-typed lvalue OBJECT, expands to a pointer type that may be * assigned to OBJECT. */ #ifdef __GNUC__ #define OVS_TYPEOF(OBJECT) typeof(OBJECT) #else #define OVS_TYPEOF(OBJECT) void * #endif /* Given OBJECT of type pointer-to-structure, expands to the offset of MEMBER * within an instance of the structure. * * The GCC-specific version avoids the technicality of undefined behavior if * OBJECT is null, invalid, or not yet initialized. This makes some static * checkers (like Coverity) happier. But the non-GCC version does not actually * dereference any pointer, so it would be surprising for it to cause any * problems in practice. */ #ifdef __GNUC__ #define OBJECT_OFFSETOF(OBJECT, MEMBER) offsetof(typeof(*(OBJECT)), MEMBER) #else #define OBJECT_OFFSETOF(OBJECT, MEMBER) \ ((char *) &(OBJECT)->MEMBER - (char *) (OBJECT)) #endif /* Given POINTER, the address of the given MEMBER in a STRUCT object, returns the STRUCT object. */ #define CONTAINER_OF(POINTER, STRUCT, MEMBER) \ ((STRUCT *) (void *) ((char *) (POINTER) - offsetof (STRUCT, MEMBER))) /* Given POINTER, the address of the given MEMBER within an object of the type * that that OBJECT points to, returns OBJECT as an assignment-compatible * pointer type (either the correct pointer type or "void *"). OBJECT must be * an lvalue. * * This is the same as CONTAINER_OF except that it infers the structure type * from the type of '*OBJECT'. */ #define OBJECT_CONTAINING(POINTER, OBJECT, MEMBER) \ ((OVS_TYPEOF(OBJECT)) (void *) \ ((char *) (POINTER) - OBJECT_OFFSETOF(OBJECT, MEMBER))) /* Given POINTER, the address of the given MEMBER within an object of the type * that that OBJECT points to, assigns the address of the outer object to * OBJECT, which must be an lvalue. * * Evaluates to (void) 0 as the result is not to be used. */ #define ASSIGN_CONTAINER(OBJECT, POINTER, MEMBER) \ ((OBJECT) = OBJECT_CONTAINING(POINTER, OBJECT, MEMBER), (void) 0) /* Given ATTR, and TYPE, cast the ATTR to TYPE by first casting ATTR to * (void *). This is to suppress the alignment warning issued by clang. */ #define ALIGNED_CAST(TYPE, ATTR) ((TYPE) (void *) (ATTR)) #ifdef __cplusplus extern "C" { #endif void set_program_name__(const char *name, const char *version, const char *date, const char *time); #define set_program_name(name) \ set_program_name__(name, VERSION, __DATE__, __TIME__) const char *get_subprogram_name(void); void set_subprogram_name(const char *name); const char *get_program_version(void); void ovs_print_version(uint8_t min_ofp, uint8_t max_ofp); void out_of_memory(void) NO_RETURN; void *xmalloc(size_t) MALLOC_LIKE; void *xcalloc(size_t, size_t) MALLOC_LIKE; void *xzalloc(size_t) MALLOC_LIKE; void *xrealloc(void *, size_t); void *xmemdup(const void *, size_t) MALLOC_LIKE; char *xmemdup0(const char *, size_t) MALLOC_LIKE; char *xstrdup(const char *) MALLOC_LIKE; char *xasprintf(const char *format, ...) PRINTF_FORMAT(1, 2) MALLOC_LIKE; char *xvasprintf(const char *format, va_list) PRINTF_FORMAT(1, 0) MALLOC_LIKE; void *x2nrealloc(void *p, size_t *n, size_t s); void ovs_strlcpy(char *dst, const char *src, size_t size); void ovs_strzcpy(char *dst, const char *src, size_t size); void ovs_abort(int err_no, const char *format, ...) PRINTF_FORMAT(2, 3) NO_RETURN; void ovs_abort_valist(int err_no, const char *format, va_list) PRINTF_FORMAT(2, 0) NO_RETURN; void ovs_fatal(int err_no, const char *format, ...) PRINTF_FORMAT(2, 3) NO_RETURN; void ovs_fatal_valist(int err_no, const char *format, va_list) PRINTF_FORMAT(2, 0) NO_RETURN; void ovs_error(int err_no, const char *format, ...) PRINTF_FORMAT(2, 3); void ovs_error_valist(int err_no, const char *format, va_list) PRINTF_FORMAT(2, 0); const char *ovs_retval_to_string(int); const char *ovs_strerror(int); void ovs_hex_dump(FILE *, const void *, size_t, uintptr_t offset, bool ascii); bool str_to_int(const char *, int base, int *); bool str_to_long(const char *, int base, long *); bool str_to_llong(const char *, int base, long long *); bool str_to_uint(const char *, int base, unsigned int *); bool str_to_ulong(const char *, int base, unsigned long *); bool str_to_ullong(const char *, int base, unsigned long long *); bool str_to_double(const char *, double *); int hexit_value(int c); unsigned int hexits_value(const char *s, size_t n, bool *ok); const char *english_list_delimiter(size_t index, size_t total); char *get_cwd(void); char *dir_name(const char *file_name); char *base_name(const char *file_name); char *abs_file_name(const char *dir, const char *file_name); char *xreadlink(const char *filename); char *follow_symlinks(const char *filename); void ignore(bool x OVS_UNUSED); /* Bitwise tests. */ /* Returns the number of trailing 0-bits in 'n'. Undefined if 'n' == 0. * * This compiles to a single machine instruction ("bsf") with GCC on x86. */ #if !defined(UINT_MAX) || !defined(UINT32_MAX) #error "Someone screwed up the #includes." #elif __GNUC__ >= 4 && UINT_MAX == UINT32_MAX static inline int raw_ctz(uint32_t n) { return __builtin_ctz(n); } #else /* Defined in util.c. */ int raw_ctz(uint32_t n); #endif /* Returns the number of trailing 0-bits in 'n', or 32 if 'n' is 0. */ static inline int ctz(uint32_t n) { return n ? raw_ctz(n) : 32; } int log_2_floor(uint32_t); int log_2_ceil(uint32_t); unsigned int popcount(uint32_t); /* Returns the rightmost 1-bit in 'x' (e.g. 01011000 => 00001000), or 0 if 'x' * is 0. */ static inline uintmax_t rightmost_1bit(uintmax_t x) { return x & -x; } /* Returns 'x' with its rightmost 1-bit changed to a zero (e.g. 01011000 => * 01010000), or 0 if 'x' is 0. */ static inline uintmax_t zero_rightmost_1bit(uintmax_t x) { return x & (x - 1); } /* Returns the index of the rightmost 1-bit in 'x' (e.g. 01011000 => 3), or 32 * if 'x' is 0. * * Unlike the other functions for rightmost 1-bits, this function only works * with 32-bit integers. */ static inline uint32_t rightmost_1bit_idx(uint32_t x) { return x ? ctz(x) : 32; } /* Returns the index of the rightmost 1-bit in 'x' (e.g. 01011000 => 6), or 32 * if 'x' is 0. * * This function only works with 32-bit integers. */ static inline uint32_t leftmost_1bit_idx(uint32_t x) { return x ? log_2_floor(x) : 32; } bool is_all_zeros(const uint8_t *, size_t); bool is_all_ones(const uint8_t *, size_t); void bitwise_copy(const void *src, unsigned int src_len, unsigned int src_ofs, void *dst, unsigned int dst_len, unsigned int dst_ofs, unsigned int n_bits); void bitwise_zero(void *dst_, unsigned int dst_len, unsigned dst_ofs, unsigned int n_bits); void bitwise_one(void *dst_, unsigned int dst_len, unsigned dst_ofs, unsigned int n_bits); bool bitwise_is_all_zeros(const void *, unsigned int len, unsigned int ofs, unsigned int n_bits); void bitwise_put(uint64_t value, void *dst, unsigned int dst_len, unsigned int dst_ofs, unsigned int n_bits); uint64_t bitwise_get(const void *src, unsigned int src_len, unsigned int src_ofs, unsigned int n_bits); #ifdef __cplusplus } #endif #endif /* util.h */ openvswitch-2.0.1+git20140120/lib/uuid.c000066400000000000000000000157011226605124000173360ustar00rootroot00000000000000/* Copyright (c) 2008, 2009, 2010, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "uuid.h" #include #include #include #include #include #include #include "aes128.h" #include "entropy.h" #include "ovs-thread.h" #include "sha1.h" #include "timeval.h" #include "util.h" static struct aes128 key; static uint64_t counter[2]; BUILD_ASSERT_DECL(sizeof counter == 16); static void do_init(void); /* * Initialize the UUID module. Aborts the program with an error message if * initialization fails (which should never happen on a properly configured * machine.) * * Currently initialization is only needed by uuid_generate(). uuid_generate() * will automatically call uuid_init() itself, so it's only necessary to call * this function explicitly if you want to abort the program earlier than the * first UUID generation in case of failure. */ void uuid_init(void) { static pthread_once_t once = PTHREAD_ONCE_INIT; pthread_once(&once, do_init); } /* Generates a new random UUID in 'uuid'. * * We go to some trouble to ensure as best we can that the generated UUID has * these properties: * * - Uniqueness. The random number generator is seeded using both the * system clock and the system random number generator, plus a few * other identifiers, which is about as good as we can get in any kind * of simple way. * * - Unpredictability. In some situations it could be bad for an * adversary to be able to guess the next UUID to be generated with some * probability of success. This property may or may not be important * for our purposes, but it is better if we can get it. * * To ensure both of these, we start by taking our seed data and passing it * through SHA-1. We use the result as an AES-128 key. We also generate a * random 16-byte value[*] which we then use as the counter for CTR mode. To * generate a UUID in a manner compliant with the above goals, we merely * increment the counter and encrypt it. * * [*] It is not actually important that the initial value of the counter be * random. AES-128 in counter mode is secure either way. */ void uuid_generate(struct uuid *uuid) { static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; uint64_t copy[2]; uuid_init(); /* Copy out the counter's current value, then increment it. */ ovs_mutex_lock(&mutex); copy[0] = counter[0]; copy[1] = counter[1]; if (++counter[1] == 0) { counter[0]++; } ovs_mutex_unlock(&mutex); /* AES output is exactly 16 bytes, so we encrypt directly into 'uuid'. */ aes128_encrypt(&key, copy, uuid); /* Set bits to indicate a random UUID. See RFC 4122 section 4.4. */ uuid->parts[2] &= ~0xc0000000; uuid->parts[2] |= 0x80000000; uuid->parts[1] &= ~0x0000f000; uuid->parts[1] |= 0x00004000; } /* Sets 'uuid' to all-zero-bits. */ void uuid_zero(struct uuid *uuid) { uuid->parts[0] = uuid->parts[1] = uuid->parts[2] = uuid->parts[3] = 0; } /* Returns true if 'uuid' is all zero, otherwise false. */ bool uuid_is_zero(const struct uuid *uuid) { return (!uuid->parts[0] && !uuid->parts[1] && !uuid->parts[2] && !uuid->parts[3]); } /* Compares 'a' and 'b'. Returns a negative value if 'a < b', zero if 'a == * b', or positive if 'a > b'. The ordering is lexicographical order of the * conventional way of writing out UUIDs as strings. */ int uuid_compare_3way(const struct uuid *a, const struct uuid *b) { if (a->parts[0] != b->parts[0]) { return a->parts[0] > b->parts[0] ? 1 : -1; } else if (a->parts[1] != b->parts[1]) { return a->parts[1] > b->parts[1] ? 1 : -1; } else if (a->parts[2] != b->parts[2]) { return a->parts[2] > b->parts[2] ? 1 : -1; } else if (a->parts[3] != b->parts[3]) { return a->parts[3] > b->parts[3] ? 1 : -1; } else { return 0; } } /* Attempts to convert string 's' into a UUID in 'uuid'. Returns true if * successful, which will be the case only if 's' has the exact format * specified by RFC 4122. Returns false on failure. On failure, 'uuid' will * be set to all-zero-bits. */ bool uuid_from_string(struct uuid *uuid, const char *s) { if (!uuid_from_string_prefix(uuid, s)) { return false; } else if (s[UUID_LEN] != '\0') { uuid_zero(uuid); return false; } else { return true; } } /* Same as uuid_from_string() but s[UUID_LEN] is not required to be a null byte * to succeed; that is, 's' need only begin with UUID syntax, not consist * entirely of it. */ bool uuid_from_string_prefix(struct uuid *uuid, const char *s) { /* 0 1 2 3 */ /* 012345678901234567890123456789012345 */ /* ------------------------------------ */ /* 00000000-1111-1111-2222-222233333333 */ bool ok; uuid->parts[0] = hexits_value(s, 8, &ok); if (!ok || s[8] != '-') { goto error; } uuid->parts[1] = hexits_value(s + 9, 4, &ok) << 16; if (!ok || s[13] != '-') { goto error; } uuid->parts[1] += hexits_value(s + 14, 4, &ok); if (!ok || s[18] != '-') { goto error; } uuid->parts[2] = hexits_value(s + 19, 4, &ok) << 16; if (!ok || s[23] != '-') { goto error; } uuid->parts[2] += hexits_value(s + 24, 4, &ok); if (!ok) { goto error; } uuid->parts[3] = hexits_value(s + 28, 8, &ok); if (!ok) { goto error; } return true; error: uuid_zero(uuid); return false; } static void do_init(void) { uint8_t sha1[SHA1_DIGEST_SIZE]; struct sha1_ctx sha1_ctx; uint8_t random_seed[16]; struct timeval now; pid_t pid, ppid; uid_t uid; gid_t gid; /* Get seed data. */ get_entropy_or_die(random_seed, sizeof random_seed); xgettimeofday(&now); pid = getpid(); ppid = getppid(); uid = getuid(); gid = getgid(); /* Convert seed into key. */ sha1_init(&sha1_ctx); sha1_update(&sha1_ctx, random_seed, sizeof random_seed); sha1_update(&sha1_ctx, &pid, sizeof pid); sha1_update(&sha1_ctx, &ppid, sizeof ppid); sha1_update(&sha1_ctx, &uid, sizeof uid); sha1_update(&sha1_ctx, &gid, sizeof gid); sha1_final(&sha1_ctx, sha1); /* Generate key. */ BUILD_ASSERT(sizeof sha1 >= 16); aes128_schedule(&key, sha1); /* Generate initial counter. */ get_entropy_or_die(counter, sizeof counter); } openvswitch-2.0.1+git20140120/lib/uuid.h000066400000000000000000000053171226605124000173450ustar00rootroot00000000000000/* Copyright (c) 2008, 2009, 2010 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef UUID_H #define UUID_H 1 #include #include #include #include "util.h" #define UUID_BIT 128 /* Number of bits in a UUID. */ #define UUID_OCTET (UUID_BIT / 8) /* Number of bytes in a UUID. */ /* A Universally Unique IDentifier (UUID) compliant with RFC 4122. * * Each of the parts is stored in host byte order, but the parts themselves are * ordered from left to right. That is, (parts[0] >> 24) is the first 8 bits * of the UUID when output in the standard form, and (parts[3] & 0xff) is the * final 8 bits. */ struct uuid { uint32_t parts[4]; }; BUILD_ASSERT_DECL(sizeof(struct uuid) == UUID_OCTET); /* Formats a UUID as a string, in the conventional format. * * Example: * struct uuid uuid = ...; * printf("This UUID is "UUID_FMT"\n", UUID_ARGS(&uuid)); * */ #define UUID_LEN 36 #define UUID_FMT "%08x-%04x-%04x-%04x-%04x%08x" #define UUID_ARGS(UUID) \ ((unsigned int) ((UUID)->parts[0])), \ ((unsigned int) ((UUID)->parts[1] >> 16)), \ ((unsigned int) ((UUID)->parts[1] & 0xffff)), \ ((unsigned int) ((UUID)->parts[2] >> 16)), \ ((unsigned int) ((UUID)->parts[2] & 0xffff)), \ ((unsigned int) ((UUID)->parts[3])) /* Returns a hash value for 'uuid'. This hash value is the same regardless of * whether we are running on a 32-bit or 64-bit or big-endian or little-endian * architecture. */ static inline size_t uuid_hash(const struct uuid *uuid) { return uuid->parts[0]; } /* Returns true if 'a == b', false otherwise. */ static inline bool uuid_equals(const struct uuid *a, const struct uuid *b) { return (a->parts[0] == b->parts[0] && a->parts[1] == b->parts[1] && a->parts[2] == b->parts[2] && a->parts[3] == b->parts[3]); } void uuid_init(void); void uuid_generate(struct uuid *); void uuid_zero(struct uuid *); bool uuid_is_zero(const struct uuid *); int uuid_compare_3way(const struct uuid *, const struct uuid *); bool uuid_from_string(struct uuid *, const char *); bool uuid_from_string_prefix(struct uuid *, const char *); #endif /* uuid.h */ openvswitch-2.0.1+git20140120/lib/valgrind.h000066400000000000000000000014141226605124000201770ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef VALGRIND_H #define VALGRIND_H 1 #ifdef HAVE_VALGRIND_VALGRIND_H #include #else #define RUNNING_ON_VALGRIND 0 #endif #endif /* valgrind.h */ openvswitch-2.0.1+git20140120/lib/vconn-active.man000066400000000000000000000010451226605124000213110ustar00rootroot00000000000000.IP "\fBssl:\fIip\fR[\fB:\fIport\fR]" The specified SSL \fIport\fR (default: 6633) on the host at the given \fIip\fR, which must be expressed as an IP address (not a DNS name). The \fB\-\-private\-key\fR, \fB\-\-certificate\fR, and \fB\-\-ca\-cert\fR options are mandatory when this form is used. . .IP "\fBtcp:\fIip\fR[\fB:\fIport\fR]" The specified TCP \fIport\fR (default: 6633) on the host at the given \fIip\fR, which must be expressed as an IP address (not a DNS name). . .TP \fBunix:\fIfile\fR The Unix domain server socket named \fIfile\fR. openvswitch-2.0.1+git20140120/lib/vconn-passive.man000066400000000000000000000014031226605124000215060ustar00rootroot00000000000000.IP "\fBpssl:\fR[\fIport\fR][\fB:\fIip\fR]" Listens for OpenFlow SSL connections on \fIport\fR (default: 6633). The \fB\-\-private\-key\fR, \fB\-\-certificate\fR, and \fB\-\-ca\-cert\fR options are mandatory when this form is used. By default, connections are not bound to a particular local IP address, but \fIip\fR may be specified to listen only for connections to the given \fIip\fR. . .IP "\fBptcp:\fR[\fIport\fR][\fB:\fIip\fR]" Listens for OpenFlow TCP connections on \fIport\fR (default: 6633). By default, connections are not bound to a particular local IP address, but \fIip\fR may be specified to listen only for connections to the given \fIip\fR. . .IP "\fBpunix:\fIfile\fR" Listens for OpenFlow connections on the Unix domain server socket named \fIfile\fR. openvswitch-2.0.1+git20140120/lib/vconn-provider.h000066400000000000000000000205141226605124000213460ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef VCONN_PROVIDER_H #define VCONN_PROVIDER_H 1 /* Provider interface to vconns, which provide a virtual connection to an * OpenFlow device. */ #include "vconn.h" #include "util.h" #include "openflow/openflow-common.h" /* Active virtual connection to an OpenFlow device. */ /* Active virtual connection to an OpenFlow device. * * This structure should be treated as opaque by vconn implementations. */ struct vconn { const struct vconn_class *class; int state; int error; /* OpenFlow versions. */ uint32_t allowed_versions; /* Bitmap of versions we will accept. */ uint32_t peer_versions; /* Peer's bitmap of versions it will accept. */ enum ofp_version version; /* Negotiated version (or 0). */ bool recv_any_version; /* True to receive a message of any version. */ ovs_be32 remote_ip; ovs_be16 remote_port; ovs_be32 local_ip; ovs_be16 local_port; char *name; }; void vconn_init(struct vconn *, const struct vconn_class *, int connect_status, const char *name, uint32_t allowed_versions); void vconn_free_data(struct vconn *vconn); void vconn_set_remote_ip(struct vconn *, ovs_be32 remote_ip); void vconn_set_remote_port(struct vconn *, ovs_be16 remote_port); void vconn_set_local_ip(struct vconn *, ovs_be32 local_ip); void vconn_set_local_port(struct vconn *, ovs_be16 local_port); static inline void vconn_assert_class(const struct vconn *vconn, const struct vconn_class *class) { ovs_assert(vconn->class == class); } struct vconn_class { /* Prefix for connection names, e.g. "nl", "tcp". */ const char *name; /* Attempts to connect to an OpenFlow device. 'name' is the full * connection name provided by the user, e.g. "tcp:1.2.3.4". This name is * useful for error messages but must not be modified. * * 'allowed_verions' is the OpenFlow versions that may be * negotiated for a connection. * * 'suffix' is a copy of 'name' following the colon and may be modified. * 'dscp' is the DSCP value that the new connection should use in the IP * packets it sends. * * Returns 0 if successful, otherwise a positive errno value. If * successful, stores a pointer to the new connection in '*vconnp'. * * The open function must not block waiting for a connection to complete. * If the connection cannot be completed immediately, it should return * EAGAIN (not EINPROGRESS, as returned by the connect system call) and * continue the connection in the background. */ int (*open)(const char *name, uint32_t allowed_versions, char *suffix, struct vconn **vconnp, uint8_t dscp); /* Closes 'vconn' and frees associated memory. */ void (*close)(struct vconn *vconn); /* Tries to complete the connection on 'vconn'. If 'vconn''s connection is * complete, returns 0 if the connection was successful or a positive errno * value if it failed. If the connection is still in progress, returns * EAGAIN. * * The connect function must not block waiting for the connection to * complete; instead, it should return EAGAIN immediately. */ int (*connect)(struct vconn *vconn); /* Tries to receive an OpenFlow message from 'vconn'. If successful, * stores the received message into '*msgp' and returns 0. The caller is * responsible for destroying the message with ofpbuf_delete(). On * failure, returns a positive errno value and stores a null pointer into * '*msgp'. * * If the connection has been closed in the normal fashion, returns EOF. * * The recv function must not block waiting for a packet to arrive. If no * packets have been received, it should return EAGAIN. */ int (*recv)(struct vconn *vconn, struct ofpbuf **msgp); /* Tries to queue 'msg' for transmission on 'vconn'. If successful, * returns 0, in which case ownership of 'msg' is transferred to the vconn. * Success does not guarantee that 'msg' has been or ever will be delivered * to the peer, only that it has been queued for transmission. * * Returns a positive errno value on failure, in which case the caller * retains ownership of 'msg'. * * The send function must not block. If 'msg' cannot be immediately * accepted for transmission, it should return EAGAIN. */ int (*send)(struct vconn *vconn, struct ofpbuf *msg); /* Allows 'vconn' to perform maintenance activities, such as flushing * output buffers. * * May be null if 'vconn' doesn't have anything to do here. */ void (*run)(struct vconn *vconn); /* Arranges for the poll loop to wake up when 'vconn' needs to perform * maintenance activities. * * May be null if 'vconn' doesn't have anything to do here. */ void (*run_wait)(struct vconn *vconn); /* Arranges for the poll loop to wake up when 'vconn' is ready to take an * action of the given 'type'. */ void (*wait)(struct vconn *vconn, enum vconn_wait_type type); }; /* Passive virtual connection to an OpenFlow device. * * This structure should be treated as opaque by vconn implementations. */ struct pvconn { const struct pvconn_class *class; char *name; uint32_t allowed_versions; }; void pvconn_init(struct pvconn *pvconn, const struct pvconn_class *class, const char *name, uint32_t allowed_versions); static inline void pvconn_assert_class(const struct pvconn *pvconn, const struct pvconn_class *class) { ovs_assert(pvconn->class == class); } struct pvconn_class { /* Prefix for connection names, e.g. "ptcp", "pssl". */ const char *name; /* Attempts to start listening for OpenFlow connections. 'name' is the * full connection name provided by the user, e.g. "ptcp:1234". This name * is useful for error messages but must not be modified. * * 'allowed_versions' is the OpenFlow protocol versions that may * be negotiated for a session. * * 'suffix' is a copy of 'name' following the colon and may be modified. * 'dscp' is the DSCP value that the new connection should use in the IP * packets it sends. * * Returns 0 if successful, otherwise a positive errno value. If * successful, stores a pointer to the new connection in '*pvconnp'. * * The listen function must not block. If the connection cannot be * completed immediately, it should return EAGAIN (not EINPROGRESS, as * returned by the connect system call) and continue the connection in the * background. */ int (*listen)(const char *name, uint32_t allowed_versions, char *suffix, struct pvconn **pvconnp, uint8_t dscp); /* Closes 'pvconn' and frees associated memory. */ void (*close)(struct pvconn *pvconn); /* Tries to accept a new connection on 'pvconn'. If successful, stores the * new connection in '*new_vconnp' and returns 0. Otherwise, returns a * positive errno value. * * The accept function must not block waiting for a connection. If no * connection is ready to be accepted, it should return EAGAIN. */ int (*accept)(struct pvconn *pvconn, struct vconn **new_vconnp); /* Arranges for the poll loop to wake up when a connection is ready to be * accepted on 'pvconn'. */ void (*wait)(struct pvconn *pvconn); }; /* Active and passive vconn classes. */ extern const struct vconn_class tcp_vconn_class; extern const struct pvconn_class ptcp_pvconn_class; extern const struct vconn_class unix_vconn_class; extern const struct pvconn_class punix_pvconn_class; #ifdef HAVE_OPENSSL extern const struct vconn_class ssl_vconn_class; extern const struct pvconn_class pssl_pvconn_class; #endif #endif /* vconn-provider.h */ openvswitch-2.0.1+git20140120/lib/vconn-stream.c000066400000000000000000000255431226605124000210110ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include "fatal-signal.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "poll-loop.h" #include "socket-util.h" #include "stream.h" #include "util.h" #include "vconn-provider.h" #include "vconn.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(vconn_stream); /* Active stream socket vconn. */ struct vconn_stream { struct vconn vconn; struct stream *stream; struct ofpbuf *rxbuf; struct ofpbuf *txbuf; int n_packets; }; static const struct vconn_class stream_vconn_class; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 25); static void vconn_stream_clear_txbuf(struct vconn_stream *); static struct vconn * vconn_stream_new(struct stream *stream, int connect_status, uint32_t allowed_versions) { struct vconn_stream *s; s = xmalloc(sizeof *s); vconn_init(&s->vconn, &stream_vconn_class, connect_status, stream_get_name(stream), allowed_versions); s->stream = stream; s->txbuf = NULL; s->rxbuf = NULL; s->n_packets = 0; s->vconn.remote_ip = stream_get_remote_ip(stream); s->vconn.remote_port = stream_get_remote_port(stream); s->vconn.local_ip = stream_get_local_ip(stream); s->vconn.local_port = stream_get_local_port(stream); return &s->vconn; } /* Creates a new vconn that will send and receive data on a stream named 'name' * and stores a pointer to the vconn in '*vconnp'. * * Returns 0 if successful, otherwise a positive errno value. */ static int vconn_stream_open(const char *name, uint32_t allowed_versions, char *suffix OVS_UNUSED, struct vconn **vconnp, uint8_t dscp) { struct stream *stream; int error; error = stream_open_with_default_ports(name, OFP_TCP_PORT, OFP_SSL_PORT, &stream, dscp); if (!error) { error = stream_connect(stream); if (!error || error == EAGAIN) { *vconnp = vconn_stream_new(stream, error, allowed_versions); return 0; } } stream_close(stream); return error; } static struct vconn_stream * vconn_stream_cast(struct vconn *vconn) { return CONTAINER_OF(vconn, struct vconn_stream, vconn); } static void vconn_stream_close(struct vconn *vconn) { struct vconn_stream *s = vconn_stream_cast(vconn); if ((vconn->error == EPROTO || s->n_packets < 1) && s->rxbuf) { stream_report_content(s->rxbuf->data, s->rxbuf->size, STREAM_OPENFLOW, THIS_MODULE, vconn_get_name(vconn)); } stream_close(s->stream); vconn_stream_clear_txbuf(s); ofpbuf_delete(s->rxbuf); free(s); } static int vconn_stream_connect(struct vconn *vconn) { struct vconn_stream *s = vconn_stream_cast(vconn); return stream_connect(s->stream); } static int vconn_stream_recv__(struct vconn_stream *s, int rx_len) { struct ofpbuf *rx = s->rxbuf; int want_bytes, retval; want_bytes = rx_len - rx->size; ofpbuf_prealloc_tailroom(rx, want_bytes); retval = stream_recv(s->stream, ofpbuf_tail(rx), want_bytes); if (retval > 0) { rx->size += retval; return retval == want_bytes ? 0 : EAGAIN; } else if (retval == 0) { if (rx->size) { VLOG_ERR_RL(&rl, "connection dropped mid-packet"); return EPROTO; } return EOF; } else { return -retval; } } static int vconn_stream_recv(struct vconn *vconn, struct ofpbuf **bufferp) { struct vconn_stream *s = vconn_stream_cast(vconn); const struct ofp_header *oh; int rx_len; /* Allocate new receive buffer if we don't have one. */ if (s->rxbuf == NULL) { s->rxbuf = ofpbuf_new(1564); } /* Read ofp_header. */ if (s->rxbuf->size < sizeof(struct ofp_header)) { int retval = vconn_stream_recv__(s, sizeof(struct ofp_header)); if (retval) { return retval; } } /* Read payload. */ oh = s->rxbuf->data; rx_len = ntohs(oh->length); if (rx_len < sizeof(struct ofp_header)) { VLOG_ERR_RL(&rl, "received too-short ofp_header (%d bytes)", rx_len); return EPROTO; } else if (s->rxbuf->size < rx_len) { int retval = vconn_stream_recv__(s, rx_len); if (retval) { return retval; } } s->n_packets++; *bufferp = s->rxbuf; s->rxbuf = NULL; return 0; } static void vconn_stream_clear_txbuf(struct vconn_stream *s) { ofpbuf_delete(s->txbuf); s->txbuf = NULL; } static int vconn_stream_send(struct vconn *vconn, struct ofpbuf *buffer) { struct vconn_stream *s = vconn_stream_cast(vconn); ssize_t retval; if (s->txbuf) { return EAGAIN; } retval = stream_send(s->stream, buffer->data, buffer->size); if (retval == buffer->size) { ofpbuf_delete(buffer); return 0; } else if (retval >= 0 || retval == -EAGAIN) { s->txbuf = buffer; if (retval > 0) { ofpbuf_pull(buffer, retval); } return 0; } else { return -retval; } } static void vconn_stream_run(struct vconn *vconn) { struct vconn_stream *s = vconn_stream_cast(vconn); ssize_t retval; stream_run(s->stream); if (!s->txbuf) { return; } retval = stream_send(s->stream, s->txbuf->data, s->txbuf->size); if (retval < 0) { if (retval != -EAGAIN) { VLOG_ERR_RL(&rl, "send: %s", ovs_strerror(-retval)); vconn_stream_clear_txbuf(s); return; } } else if (retval > 0) { ofpbuf_pull(s->txbuf, retval); if (!s->txbuf->size) { vconn_stream_clear_txbuf(s); return; } } } static void vconn_stream_run_wait(struct vconn *vconn) { struct vconn_stream *s = vconn_stream_cast(vconn); stream_run_wait(s->stream); if (s->txbuf) { stream_send_wait(s->stream); } } static void vconn_stream_wait(struct vconn *vconn, enum vconn_wait_type wait) { struct vconn_stream *s = vconn_stream_cast(vconn); switch (wait) { case WAIT_CONNECT: stream_connect_wait(s->stream); break; case WAIT_SEND: if (!s->txbuf) { stream_send_wait(s->stream); } else { /* Nothing to do: need to drain txbuf first. * vconn_stream_run_wait() will arrange to wake up when there room * to send data, so there's no point in calling poll_fd_wait() * redundantly here. */ } break; case WAIT_RECV: stream_recv_wait(s->stream); break; default: NOT_REACHED(); } } /* Passive stream socket vconn. */ struct pvconn_pstream { struct pvconn pvconn; struct pstream *pstream; }; static const struct pvconn_class pstream_pvconn_class; static struct pvconn_pstream * pvconn_pstream_cast(struct pvconn *pvconn) { return CONTAINER_OF(pvconn, struct pvconn_pstream, pvconn); } /* Creates a new pvconn named 'name' that will accept new connections using * pstream_accept() and stores a pointer to the pvconn in '*pvconnp'. * * Returns 0 if successful, otherwise a positive errno value. (The current * implementation never fails.) */ static int pvconn_pstream_listen(const char *name, uint32_t allowed_versions, char *suffix OVS_UNUSED, struct pvconn **pvconnp, uint8_t dscp) { struct pvconn_pstream *ps; struct pstream *pstream; int error; error = pstream_open_with_default_ports(name, OFP_TCP_PORT, OFP_SSL_PORT, &pstream, dscp); if (error) { return error; } ps = xmalloc(sizeof *ps); pvconn_init(&ps->pvconn, &pstream_pvconn_class, name, allowed_versions); ps->pstream = pstream; *pvconnp = &ps->pvconn; return 0; } static void pvconn_pstream_close(struct pvconn *pvconn) { struct pvconn_pstream *ps = pvconn_pstream_cast(pvconn); pstream_close(ps->pstream); free(ps); } static int pvconn_pstream_accept(struct pvconn *pvconn, struct vconn **new_vconnp) { struct pvconn_pstream *ps = pvconn_pstream_cast(pvconn); struct stream *stream; int error; error = pstream_accept(ps->pstream, &stream); if (error) { if (error != EAGAIN) { VLOG_DBG_RL(&rl, "%s: accept: %s", pstream_get_name(ps->pstream), ovs_strerror(error)); } return error; } *new_vconnp = vconn_stream_new(stream, 0, pvconn->allowed_versions); return 0; } static void pvconn_pstream_wait(struct pvconn *pvconn) { struct pvconn_pstream *ps = pvconn_pstream_cast(pvconn); pstream_wait(ps->pstream); } /* Stream-based vconns and pvconns. */ #define STREAM_INIT(NAME) \ { \ NAME, \ vconn_stream_open, \ vconn_stream_close, \ vconn_stream_connect, \ vconn_stream_recv, \ vconn_stream_send, \ vconn_stream_run, \ vconn_stream_run_wait, \ vconn_stream_wait, \ } #define PSTREAM_INIT(NAME) \ { \ NAME, \ pvconn_pstream_listen, \ pvconn_pstream_close, \ pvconn_pstream_accept, \ pvconn_pstream_wait \ } static const struct vconn_class stream_vconn_class = STREAM_INIT("stream"); static const struct pvconn_class pstream_pvconn_class = PSTREAM_INIT("pstream"); const struct vconn_class tcp_vconn_class = STREAM_INIT("tcp"); const struct pvconn_class ptcp_pvconn_class = PSTREAM_INIT("ptcp"); const struct vconn_class unix_vconn_class = STREAM_INIT("unix"); const struct pvconn_class punix_pvconn_class = PSTREAM_INIT("punix"); #ifdef HAVE_OPENSSL const struct vconn_class ssl_vconn_class = STREAM_INIT("ssl"); const struct pvconn_class pssl_pvconn_class = PSTREAM_INIT("pssl"); #endif openvswitch-2.0.1+git20140120/lib/vconn.c000066400000000000000000001025541226605124000175160ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "vconn-provider.h" #include #include #include #include #include #include #include "coverage.h" #include "dynamic-string.h" #include "fatal-signal.h" #include "flow.h" #include "ofp-errors.h" #include "ofp-msgs.h" #include "ofp-print.h" #include "ofp-util.h" #include "ofpbuf.h" #include "openflow/nicira-ext.h" #include "openflow/openflow.h" #include "packets.h" #include "poll-loop.h" #include "random.h" #include "util.h" #include "vlog.h" #include "socket-util.h" VLOG_DEFINE_THIS_MODULE(vconn); COVERAGE_DEFINE(vconn_open); COVERAGE_DEFINE(vconn_received); COVERAGE_DEFINE(vconn_sent); /* State of an active vconn.*/ enum vconn_state { /* This is the ordinary progression of states. */ VCS_CONNECTING, /* Underlying vconn is not connected. */ VCS_SEND_HELLO, /* Waiting to send OFPT_HELLO message. */ VCS_RECV_HELLO, /* Waiting to receive OFPT_HELLO message. */ VCS_CONNECTED, /* Connection established. */ /* These states are entered only when something goes wrong. */ VCS_SEND_ERROR, /* Sending OFPT_ERROR message. */ VCS_DISCONNECTED /* Connection failed or connection closed. */ }; static const struct vconn_class *vconn_classes[] = { &tcp_vconn_class, &unix_vconn_class, #ifdef HAVE_OPENSSL &ssl_vconn_class, #endif }; static const struct pvconn_class *pvconn_classes[] = { &ptcp_pvconn_class, &punix_pvconn_class, #ifdef HAVE_OPENSSL &pssl_pvconn_class, #endif }; /* Rate limit for individual OpenFlow messages going over the vconn, output at * DBG level. This is very high because, if these are enabled, it is because * we really need to see them. */ static struct vlog_rate_limit ofmsg_rl = VLOG_RATE_LIMIT_INIT(600, 600); /* Rate limit for OpenFlow message parse errors. These always indicate a bug * in the peer and so there's not much point in showing a lot of them. */ static struct vlog_rate_limit bad_ofmsg_rl = VLOG_RATE_LIMIT_INIT(1, 5); static int do_recv(struct vconn *, struct ofpbuf **); static int do_send(struct vconn *, struct ofpbuf *); /* Check the validity of the vconn class structures. */ static void check_vconn_classes(void) { #ifndef NDEBUG size_t i; for (i = 0; i < ARRAY_SIZE(vconn_classes); i++) { const struct vconn_class *class = vconn_classes[i]; ovs_assert(class->name != NULL); ovs_assert(class->open != NULL); if (class->close || class->recv || class->send || class->run || class->run_wait || class->wait) { ovs_assert(class->close != NULL); ovs_assert(class->recv != NULL); ovs_assert(class->send != NULL); ovs_assert(class->wait != NULL); } else { /* This class delegates to another one. */ } } for (i = 0; i < ARRAY_SIZE(pvconn_classes); i++) { const struct pvconn_class *class = pvconn_classes[i]; ovs_assert(class->name != NULL); ovs_assert(class->listen != NULL); if (class->close || class->accept || class->wait) { ovs_assert(class->close != NULL); ovs_assert(class->accept != NULL); ovs_assert(class->wait != NULL); } else { /* This class delegates to another one. */ } } #endif } /* Prints information on active (if 'active') and passive (if 'passive') * connection methods supported by the vconn. If 'bootstrap' is true, also * advertises options to bootstrap the CA certificate. */ void vconn_usage(bool active, bool passive, bool bootstrap OVS_UNUSED) { /* Really this should be implemented via callbacks into the vconn * providers, but that seems too heavy-weight to bother with at the * moment. */ printf("\n"); if (active) { printf("Active OpenFlow connection methods:\n"); printf(" tcp:IP[:PORT] " "PORT (default: %d) at remote IP\n", OFP_TCP_PORT); #ifdef HAVE_OPENSSL printf(" ssl:IP[:PORT] " "SSL PORT (default: %d) at remote IP\n", OFP_SSL_PORT); #endif printf(" unix:FILE Unix domain socket named FILE\n"); } if (passive) { printf("Passive OpenFlow connection methods:\n"); printf(" ptcp:[PORT][:IP] " "listen to TCP PORT (default: %d) on IP\n", OFP_TCP_PORT); #ifdef HAVE_OPENSSL printf(" pssl:[PORT][:IP] " "listen for SSL on PORT (default: %d) on IP\n", OFP_SSL_PORT); #endif printf(" punix:FILE " "listen on Unix domain socket FILE\n"); } #ifdef HAVE_OPENSSL printf("PKI configuration (required to use SSL):\n" " -p, --private-key=FILE file with private key\n" " -c, --certificate=FILE file with certificate for private key\n" " -C, --ca-cert=FILE file with peer CA certificate\n"); if (bootstrap) { printf(" --bootstrap-ca-cert=FILE file with peer CA certificate " "to read or create\n"); } #endif } /* Given 'name', a connection name in the form "TYPE:ARGS", stores the class * named "TYPE" into '*classp' and returns 0. Returns EAFNOSUPPORT and stores * a null pointer into '*classp' if 'name' is in the wrong form or if no such * class exists. */ static int vconn_lookup_class(const char *name, const struct vconn_class **classp) { size_t prefix_len; prefix_len = strcspn(name, ":"); if (name[prefix_len] != '\0') { size_t i; for (i = 0; i < ARRAY_SIZE(vconn_classes); i++) { const struct vconn_class *class = vconn_classes[i]; if (strlen(class->name) == prefix_len && !memcmp(class->name, name, prefix_len)) { *classp = class; return 0; } } } *classp = NULL; return EAFNOSUPPORT; } /* Returns 0 if 'name' is a connection name in the form "TYPE:ARGS" and TYPE is * a supported connection type, otherwise EAFNOSUPPORT. */ int vconn_verify_name(const char *name) { const struct vconn_class *class; return vconn_lookup_class(name, &class); } /* Attempts to connect to an OpenFlow device. 'name' is a connection name in * the form "TYPE:ARGS", where TYPE is an active vconn class's name and ARGS * are vconn class-specific. * * The vconn will automatically negotiate an OpenFlow protocol version * acceptable to both peers on the connection. The version negotiated will be * one of those in the 'allowed_versions' bitmap: version 'x' is allowed if * allowed_versions & (1 << x) is nonzero. If 'allowed_versions' is zero, then * OFPUTIL_DEFAULT_VERSIONS are allowed. * * Returns 0 if successful, otherwise a positive errno value. If successful, * stores a pointer to the new connection in '*vconnp', otherwise a null * pointer. */ int vconn_open(const char *name, uint32_t allowed_versions, uint8_t dscp, struct vconn **vconnp) { const struct vconn_class *class; struct vconn *vconn; char *suffix_copy; int error; COVERAGE_INC(vconn_open); check_vconn_classes(); if (!allowed_versions) { allowed_versions = OFPUTIL_DEFAULT_VERSIONS; } /* Look up the class. */ error = vconn_lookup_class(name, &class); if (!class) { goto error; } /* Call class's "open" function. */ suffix_copy = xstrdup(strchr(name, ':') + 1); error = class->open(name, allowed_versions, suffix_copy, &vconn, dscp); free(suffix_copy); if (error) { goto error; } /* Success. */ ovs_assert(vconn->state != VCS_CONNECTING || vconn->class->connect); *vconnp = vconn; return 0; error: *vconnp = NULL; return error; } /* Allows 'vconn' to perform maintenance activities, such as flushing output * buffers. */ void vconn_run(struct vconn *vconn) { if (vconn->state == VCS_CONNECTING || vconn->state == VCS_SEND_HELLO || vconn->state == VCS_RECV_HELLO) { vconn_connect(vconn); } if (vconn->class->run) { (vconn->class->run)(vconn); } } /* Arranges for the poll loop to wake up when 'vconn' needs to perform * maintenance activities. */ void vconn_run_wait(struct vconn *vconn) { if (vconn->state == VCS_CONNECTING || vconn->state == VCS_SEND_HELLO || vconn->state == VCS_RECV_HELLO) { vconn_connect_wait(vconn); } if (vconn->class->run_wait) { (vconn->class->run_wait)(vconn); } } int vconn_open_block(const char *name, uint32_t allowed_versions, uint8_t dscp, struct vconn **vconnp) { struct vconn *vconn; int error; fatal_signal_run(); error = vconn_open(name, allowed_versions, dscp, &vconn); if (!error) { error = vconn_connect_block(vconn); } if (error) { vconn_close(vconn); *vconnp = NULL; } else { *vconnp = vconn; } return error; } /* Closes 'vconn'. */ void vconn_close(struct vconn *vconn) { if (vconn != NULL) { char *name = vconn->name; (vconn->class->close)(vconn); free(name); } } /* Returns the name of 'vconn', that is, the string passed to vconn_open(). */ const char * vconn_get_name(const struct vconn *vconn) { return vconn->name; } /* Returns the allowed_versions of 'vconn', that is, * the allowed_versions passed to vconn_open(). */ uint32_t vconn_get_allowed_versions(const struct vconn *vconn) { return vconn->allowed_versions; } /* Sets the allowed_versions of 'vconn', overriding * the allowed_versions passed to vconn_open(). */ void vconn_set_allowed_versions(struct vconn *vconn, uint32_t allowed_versions) { vconn->allowed_versions = allowed_versions; } /* Returns the IP address of the peer, or 0 if the peer is not connected over * an IP-based protocol or if its IP address is not yet known. */ ovs_be32 vconn_get_remote_ip(const struct vconn *vconn) { return vconn->remote_ip; } /* Returns the transport port of the peer, or 0 if the connection does not * contain a port or if the port is not yet known. */ ovs_be16 vconn_get_remote_port(const struct vconn *vconn) { return vconn->remote_port; } /* Returns the IP address used to connect to the peer, or 0 if the * connection is not an IP-based protocol or if its IP address is not * yet known. */ ovs_be32 vconn_get_local_ip(const struct vconn *vconn) { return vconn->local_ip; } /* Returns the transport port used to connect to the peer, or 0 if the * connection does not contain a port or if the port is not yet known. */ ovs_be16 vconn_get_local_port(const struct vconn *vconn) { return vconn->local_port; } /* Returns the OpenFlow version negotiated with the peer, or -1 if version * negotiation is not yet complete. * * A vconn that has successfully connected (that is, vconn_connect() or * vconn_send() or vconn_recv() has returned 0) always negotiated a version. */ int vconn_get_version(const struct vconn *vconn) { return vconn->version ? vconn->version : -1; } /* By default, a vconn accepts only OpenFlow messages whose version matches the * one negotiated for the connection. A message received with a different * version is an error that causes the vconn to drop the connection. * * This functions allows 'vconn' to accept messages with any OpenFlow version. * This is useful in the special case where 'vconn' is used as an rconn * "monitor" connection (see rconn_add_monitor()), that is, where 'vconn' is * used as a target for mirroring OpenFlow messages for debugging and * troubleshooting. * * This function should be called after a successful vconn_open() or * pvconn_accept() but before the connection completes, that is, before * vconn_connect() returns success. Otherwise, messages that arrive on 'vconn' * beforehand with an unexpected version will the vconn to drop the * connection. */ void vconn_set_recv_any_version(struct vconn *vconn) { vconn->recv_any_version = true; } static void vcs_connecting(struct vconn *vconn) { int retval = (vconn->class->connect)(vconn); ovs_assert(retval != EINPROGRESS); if (!retval) { vconn->state = VCS_SEND_HELLO; } else if (retval != EAGAIN) { vconn->state = VCS_DISCONNECTED; vconn->error = retval; } } static void vcs_send_hello(struct vconn *vconn) { struct ofpbuf *b; int retval; b = ofputil_encode_hello(vconn->allowed_versions); retval = do_send(vconn, b); if (!retval) { vconn->state = VCS_RECV_HELLO; } else { ofpbuf_delete(b); if (retval != EAGAIN) { vconn->state = VCS_DISCONNECTED; vconn->error = retval; } } } static char * version_bitmap_to_string(uint32_t bitmap) { struct ds s; ds_init(&s); if (!bitmap) { ds_put_cstr(&s, "no versions"); } else if (is_pow2(bitmap)) { ds_put_cstr(&s, "version "); ofputil_format_version(&s, leftmost_1bit_idx(bitmap)); } else if (is_pow2((bitmap >> 1) + 1)) { ds_put_cstr(&s, "version "); ofputil_format_version(&s, leftmost_1bit_idx(bitmap)); ds_put_cstr(&s, " and earlier"); } else { ds_put_cstr(&s, "versions "); ofputil_format_version_bitmap(&s, bitmap); } return ds_steal_cstr(&s); } static void vcs_recv_hello(struct vconn *vconn) { struct ofpbuf *b; int retval; retval = do_recv(vconn, &b); if (!retval) { enum ofptype type; enum ofperr error; error = ofptype_decode(&type, b->data); if (!error && type == OFPTYPE_HELLO) { char *peer_s, *local_s; uint32_t common_versions; if (!ofputil_decode_hello(b->data, &vconn->peer_versions)) { struct ds msg = DS_EMPTY_INITIALIZER; ds_put_format(&msg, "%s: unknown data in hello:\n", vconn->name); ds_put_hex_dump(&msg, b->data, b->size, 0, true); VLOG_WARN_RL(&bad_ofmsg_rl, "%s", ds_cstr(&msg)); ds_destroy(&msg); } local_s = version_bitmap_to_string(vconn->allowed_versions); peer_s = version_bitmap_to_string(vconn->peer_versions); common_versions = vconn->peer_versions & vconn->allowed_versions; if (!common_versions) { vconn->version = leftmost_1bit_idx(vconn->peer_versions); VLOG_WARN_RL(&bad_ofmsg_rl, "%s: version negotiation failed (we support " "%s, peer supports %s)", vconn->name, local_s, peer_s); vconn->state = VCS_SEND_ERROR; } else { vconn->version = leftmost_1bit_idx(common_versions); VLOG_DBG("%s: negotiated OpenFlow version 0x%02x " "(we support %s, peer supports %s)", vconn->name, vconn->version, local_s, peer_s); vconn->state = VCS_CONNECTED; } free(local_s); free(peer_s); ofpbuf_delete(b); return; } else { char *s = ofp_to_string(b->data, b->size, 1); VLOG_WARN_RL(&bad_ofmsg_rl, "%s: received message while expecting hello: %s", vconn->name, s); free(s); retval = EPROTO; ofpbuf_delete(b); } } if (retval != EAGAIN) { vconn->state = VCS_DISCONNECTED; vconn->error = retval == EOF ? ECONNRESET : retval; } } static void vcs_send_error(struct vconn *vconn) { struct ofpbuf *b; char s[128]; int retval; char *local_s, *peer_s; local_s = version_bitmap_to_string(vconn->allowed_versions); peer_s = version_bitmap_to_string(vconn->peer_versions); snprintf(s, sizeof s, "We support %s, you support %s, no common versions.", local_s, peer_s); free(peer_s); free(local_s); b = ofperr_encode_hello(OFPERR_OFPHFC_INCOMPATIBLE, vconn->version, s); retval = do_send(vconn, b); if (retval) { ofpbuf_delete(b); } if (retval != EAGAIN) { vconn->state = VCS_DISCONNECTED; vconn->error = retval ? retval : EPROTO; } } /* Tries to complete the connection on 'vconn'. If 'vconn''s connection is * complete, returns 0 if the connection was successful or a positive errno * value if it failed. If the connection is still in progress, returns * EAGAIN. */ int vconn_connect(struct vconn *vconn) { enum vconn_state last_state; do { last_state = vconn->state; switch (vconn->state) { case VCS_CONNECTING: vcs_connecting(vconn); break; case VCS_SEND_HELLO: vcs_send_hello(vconn); break; case VCS_RECV_HELLO: vcs_recv_hello(vconn); break; case VCS_CONNECTED: return 0; case VCS_SEND_ERROR: vcs_send_error(vconn); break; case VCS_DISCONNECTED: return vconn->error; default: NOT_REACHED(); } } while (vconn->state != last_state); return EAGAIN; } /* Tries to receive an OpenFlow message from 'vconn'. If successful, stores * the received message into '*msgp' and returns 0. The caller is responsible * for destroying the message with ofpbuf_delete(). On failure, returns a * positive errno value and stores a null pointer into '*msgp'. On normal * connection close, returns EOF. * * vconn_recv will not block waiting for a packet to arrive. If no packets * have been received, it returns EAGAIN immediately. */ int vconn_recv(struct vconn *vconn, struct ofpbuf **msgp) { struct ofpbuf *msg; int retval; retval = vconn_connect(vconn); if (!retval) { retval = do_recv(vconn, &msg); } if (!retval && !vconn->recv_any_version) { const struct ofp_header *oh = msg->data; if (oh->version != vconn->version) { enum ofptype type; if (ofptype_decode(&type, msg->data) || (type != OFPTYPE_HELLO && type != OFPTYPE_ERROR && type != OFPTYPE_ECHO_REQUEST && type != OFPTYPE_ECHO_REPLY)) { VLOG_ERR_RL(&bad_ofmsg_rl, "%s: received OpenFlow version " "0x%02"PRIx8" != expected %02x", vconn->name, oh->version, vconn->version); ofpbuf_delete(msg); retval = EPROTO; } } } *msgp = retval ? NULL : msg; return retval; } static int do_recv(struct vconn *vconn, struct ofpbuf **msgp) { int retval = (vconn->class->recv)(vconn, msgp); if (!retval) { COVERAGE_INC(vconn_received); if (VLOG_IS_DBG_ENABLED()) { char *s = ofp_to_string((*msgp)->data, (*msgp)->size, 1); VLOG_DBG_RL(&ofmsg_rl, "%s: received: %s", vconn->name, s); free(s); } } return retval; } /* Tries to queue 'msg' for transmission on 'vconn'. If successful, returns 0, * in which case ownership of 'msg' is transferred to the vconn. Success does * not guarantee that 'msg' has been or ever will be delivered to the peer, * only that it has been queued for transmission. * * Returns a positive errno value on failure, in which case the caller * retains ownership of 'msg'. * * vconn_send will not block. If 'msg' cannot be immediately accepted for * transmission, it returns EAGAIN immediately. */ int vconn_send(struct vconn *vconn, struct ofpbuf *msg) { int retval = vconn_connect(vconn); if (!retval) { retval = do_send(vconn, msg); } return retval; } static int do_send(struct vconn *vconn, struct ofpbuf *msg) { int retval; ovs_assert(msg->size >= sizeof(struct ofp_header)); ofpmsg_update_length(msg); if (!VLOG_IS_DBG_ENABLED()) { COVERAGE_INC(vconn_sent); retval = (vconn->class->send)(vconn, msg); } else { char *s = ofp_to_string(msg->data, msg->size, 1); retval = (vconn->class->send)(vconn, msg); if (retval != EAGAIN) { VLOG_DBG_RL(&ofmsg_rl, "%s: sent (%s): %s", vconn->name, ovs_strerror(retval), s); } free(s); } return retval; } /* Same as vconn_connect(), except that it waits until the connection on * 'vconn' completes or fails. Thus, it will never return EAGAIN. */ int vconn_connect_block(struct vconn *vconn) { int error; while ((error = vconn_connect(vconn)) == EAGAIN) { vconn_run(vconn); vconn_run_wait(vconn); vconn_connect_wait(vconn); poll_block(); } ovs_assert(error != EINPROGRESS); return error; } /* Same as vconn_send, except that it waits until 'msg' can be transmitted. */ int vconn_send_block(struct vconn *vconn, struct ofpbuf *msg) { int retval; fatal_signal_run(); while ((retval = vconn_send(vconn, msg)) == EAGAIN) { vconn_run(vconn); vconn_run_wait(vconn); vconn_send_wait(vconn); poll_block(); } return retval; } /* Same as vconn_recv, except that it waits until a message is received. */ int vconn_recv_block(struct vconn *vconn, struct ofpbuf **msgp) { int retval; fatal_signal_run(); while ((retval = vconn_recv(vconn, msgp)) == EAGAIN) { vconn_run(vconn); vconn_run_wait(vconn); vconn_recv_wait(vconn); poll_block(); } return retval; } /* Waits until a message with a transaction ID matching 'xid' is received on * 'vconn'. Returns 0 if successful, in which case the reply is stored in * '*replyp' for the caller to examine and free. Otherwise returns a positive * errno value, or EOF, and sets '*replyp' to null. * * 'request' is always destroyed, regardless of the return value. */ int vconn_recv_xid(struct vconn *vconn, ovs_be32 xid, struct ofpbuf **replyp) { for (;;) { ovs_be32 recv_xid; struct ofpbuf *reply; int error; error = vconn_recv_block(vconn, &reply); if (error) { *replyp = NULL; return error; } recv_xid = ((struct ofp_header *) reply->data)->xid; if (xid == recv_xid) { *replyp = reply; return 0; } VLOG_DBG_RL(&bad_ofmsg_rl, "%s: received reply with xid %08"PRIx32 " != expected %08"PRIx32, vconn->name, ntohl(recv_xid), ntohl(xid)); ofpbuf_delete(reply); } } /* Sends 'request' to 'vconn' and blocks until it receives a reply with a * matching transaction ID. Returns 0 if successful, in which case the reply * is stored in '*replyp' for the caller to examine and free. Otherwise * returns a positive errno value, or EOF, and sets '*replyp' to null. * * 'request' should be an OpenFlow request that requires a reply. Otherwise, * if there is no reply, this function can end up blocking forever (or until * the peer drops the connection). * * 'request' is always destroyed, regardless of the return value. */ int vconn_transact(struct vconn *vconn, struct ofpbuf *request, struct ofpbuf **replyp) { ovs_be32 send_xid = ((struct ofp_header *) request->data)->xid; int error; *replyp = NULL; error = vconn_send_block(vconn, request); if (error) { ofpbuf_delete(request); } return error ? error : vconn_recv_xid(vconn, send_xid, replyp); } /* Sends 'request' followed by a barrier request to 'vconn', then blocks until * it receives a reply to the barrier. If successful, stores the reply to * 'request' in '*replyp', if one was received, and otherwise NULL, then * returns 0. Otherwise returns a positive errno value, or EOF, and sets * '*replyp' to null. * * This function is useful for sending an OpenFlow request that doesn't * ordinarily include a reply but might report an error in special * circumstances. * * 'request' is always destroyed, regardless of the return value. */ int vconn_transact_noreply(struct vconn *vconn, struct ofpbuf *request, struct ofpbuf **replyp) { ovs_be32 request_xid; ovs_be32 barrier_xid; struct ofpbuf *barrier; int error; *replyp = NULL; /* Send request. */ request_xid = ((struct ofp_header *) request->data)->xid; error = vconn_send_block(vconn, request); if (error) { ofpbuf_delete(request); return error; } /* Send barrier. */ barrier = ofputil_encode_barrier_request(vconn_get_version(vconn)); barrier_xid = ((struct ofp_header *) barrier->data)->xid; error = vconn_send_block(vconn, barrier); if (error) { ofpbuf_delete(barrier); return error; } for (;;) { struct ofpbuf *msg; ovs_be32 msg_xid; int error; error = vconn_recv_block(vconn, &msg); if (error) { ofpbuf_delete(*replyp); *replyp = NULL; return error; } msg_xid = ((struct ofp_header *) msg->data)->xid; if (msg_xid == request_xid) { if (*replyp) { VLOG_WARN_RL(&bad_ofmsg_rl, "%s: duplicate replies with " "xid %08"PRIx32, vconn->name, ntohl(msg_xid)); ofpbuf_delete(*replyp); } *replyp = msg; } else { ofpbuf_delete(msg); if (msg_xid == barrier_xid) { return 0; } else { VLOG_DBG_RL(&bad_ofmsg_rl, "%s: reply with xid %08"PRIx32 " != expected %08"PRIx32" or %08"PRIx32, vconn->name, ntohl(msg_xid), ntohl(request_xid), ntohl(barrier_xid)); } } } } /* vconn_transact_noreply() for a list of "struct ofpbuf"s, sent one by one. * All of the requests on 'requests' are always destroyed, regardless of the * return value. */ int vconn_transact_multiple_noreply(struct vconn *vconn, struct list *requests, struct ofpbuf **replyp) { struct ofpbuf *request, *next; LIST_FOR_EACH_SAFE (request, next, list_node, requests) { int error; list_remove(&request->list_node); error = vconn_transact_noreply(vconn, request, replyp); if (error || *replyp) { ofpbuf_list_delete(requests); return error; } } *replyp = NULL; return 0; } void vconn_wait(struct vconn *vconn, enum vconn_wait_type wait) { ovs_assert(wait == WAIT_CONNECT || wait == WAIT_RECV || wait == WAIT_SEND); switch (vconn->state) { case VCS_CONNECTING: wait = WAIT_CONNECT; break; case VCS_SEND_HELLO: case VCS_SEND_ERROR: wait = WAIT_SEND; break; case VCS_RECV_HELLO: wait = WAIT_RECV; break; case VCS_CONNECTED: break; case VCS_DISCONNECTED: poll_immediate_wake(); return; } (vconn->class->wait)(vconn, wait); } void vconn_connect_wait(struct vconn *vconn) { vconn_wait(vconn, WAIT_CONNECT); } void vconn_recv_wait(struct vconn *vconn) { vconn_wait(vconn, WAIT_RECV); } void vconn_send_wait(struct vconn *vconn) { vconn_wait(vconn, WAIT_SEND); } /* Given 'name', a connection name in the form "TYPE:ARGS", stores the class * named "TYPE" into '*classp' and returns 0. Returns EAFNOSUPPORT and stores * a null pointer into '*classp' if 'name' is in the wrong form or if no such * class exists. */ static int pvconn_lookup_class(const char *name, const struct pvconn_class **classp) { size_t prefix_len; prefix_len = strcspn(name, ":"); if (name[prefix_len] != '\0') { size_t i; for (i = 0; i < ARRAY_SIZE(pvconn_classes); i++) { const struct pvconn_class *class = pvconn_classes[i]; if (strlen(class->name) == prefix_len && !memcmp(class->name, name, prefix_len)) { *classp = class; return 0; } } } *classp = NULL; return EAFNOSUPPORT; } /* Returns 0 if 'name' is a connection name in the form "TYPE:ARGS" and TYPE is * a supported connection type, otherwise EAFNOSUPPORT. */ int pvconn_verify_name(const char *name) { const struct pvconn_class *class; return pvconn_lookup_class(name, &class); } /* Attempts to start listening for OpenFlow connections. 'name' is a * connection name in the form "TYPE:ARGS", where TYPE is an passive vconn * class's name and ARGS are vconn class-specific. * * vconns accepted by the pvconn will automatically negotiate an OpenFlow * protocol version acceptable to both peers on the connection. The version * negotiated will be one of those in the 'allowed_versions' bitmap: version * 'x' is allowed if allowed_versions & (1 << x) is nonzero. If * 'allowed_versions' is zero, then OFPUTIL_DEFAULT_VERSIONS are allowed. * * Returns 0 if successful, otherwise a positive errno value. If successful, * stores a pointer to the new connection in '*pvconnp', otherwise a null * pointer. */ int pvconn_open(const char *name, uint32_t allowed_versions, uint8_t dscp, struct pvconn **pvconnp) { const struct pvconn_class *class; struct pvconn *pvconn; char *suffix_copy; int error; check_vconn_classes(); if (!allowed_versions) { allowed_versions = OFPUTIL_DEFAULT_VERSIONS; } /* Look up the class. */ error = pvconn_lookup_class(name, &class); if (!class) { goto error; } /* Call class's "open" function. */ suffix_copy = xstrdup(strchr(name, ':') + 1); error = class->listen(name, allowed_versions, suffix_copy, &pvconn, dscp); free(suffix_copy); if (error) { goto error; } /* Success. */ *pvconnp = pvconn; return 0; error: *pvconnp = NULL; return error; } /* Returns the name that was used to open 'pvconn'. The caller must not * modify or free the name. */ const char * pvconn_get_name(const struct pvconn *pvconn) { return pvconn->name; } /* Closes 'pvconn'. */ void pvconn_close(struct pvconn *pvconn) { if (pvconn != NULL) { char *name = pvconn->name; (pvconn->class->close)(pvconn); free(name); } } /* Tries to accept a new connection on 'pvconn'. If successful, stores the new * connection in '*new_vconn' and returns 0. Otherwise, returns a positive * errno value. * * The new vconn will automatically negotiate an OpenFlow protocol version * acceptable to both peers on the connection. The version negotiated will be * no lower than 'min_version' and no higher than 'max_version'. * * pvconn_accept() will not block waiting for a connection. If no connection * is ready to be accepted, it returns EAGAIN immediately. */ int pvconn_accept(struct pvconn *pvconn, struct vconn **new_vconn) { int retval = (pvconn->class->accept)(pvconn, new_vconn); if (retval) { *new_vconn = NULL; } else { ovs_assert((*new_vconn)->state != VCS_CONNECTING || (*new_vconn)->class->connect); } return retval; } void pvconn_wait(struct pvconn *pvconn) { (pvconn->class->wait)(pvconn); } /* Initializes 'vconn' as a new vconn named 'name', implemented via 'class'. * The initial connection status, supplied as 'connect_status', is interpreted * as follows: * * - 0: 'vconn' is connected. Its 'send' and 'recv' functions may be * called in the normal fashion. * * - EAGAIN: 'vconn' is trying to complete a connection. Its 'connect' * function should be called to complete the connection. * * - Other positive errno values indicate that the connection failed with * the specified error. * * After calling this function, vconn_close() must be used to destroy 'vconn', * otherwise resources will be leaked. * * The caller retains ownership of 'name'. */ void vconn_init(struct vconn *vconn, const struct vconn_class *class, int connect_status, const char *name, uint32_t allowed_versions) { memset(vconn, 0, sizeof *vconn); vconn->class = class; vconn->state = (connect_status == EAGAIN ? VCS_CONNECTING : !connect_status ? VCS_SEND_HELLO : VCS_DISCONNECTED); vconn->error = connect_status; vconn->allowed_versions = allowed_versions; vconn->name = xstrdup(name); ovs_assert(vconn->state != VCS_CONNECTING || class->connect); } void vconn_set_remote_ip(struct vconn *vconn, ovs_be32 ip) { vconn->remote_ip = ip; } void vconn_set_remote_port(struct vconn *vconn, ovs_be16 port) { vconn->remote_port = port; } void vconn_set_local_ip(struct vconn *vconn, ovs_be32 ip) { vconn->local_ip = ip; } void vconn_set_local_port(struct vconn *vconn, ovs_be16 port) { vconn->local_port = port; } void pvconn_init(struct pvconn *pvconn, const struct pvconn_class *class, const char *name, uint32_t allowed_versions) { pvconn->class = class; pvconn->name = xstrdup(name); pvconn->allowed_versions = allowed_versions; } openvswitch-2.0.1+git20140120/lib/vconn.h000066400000000000000000000062411226605124000175170ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef VCONN_H #define VCONN_H 1 #include #include "openvswitch/types.h" #include "openflow/openflow.h" #ifdef __cplusplus extern "C" { #endif struct list; struct ofpbuf; struct pvconn; struct vconn; void vconn_usage(bool active, bool passive, bool bootstrap); /* Active vconns: virtual connections to OpenFlow devices. */ int vconn_verify_name(const char *name); int vconn_open(const char *name, uint32_t allowed_versions, uint8_t dscp, struct vconn **vconnp); void vconn_close(struct vconn *); const char *vconn_get_name(const struct vconn *); uint32_t vconn_get_allowed_versions(const struct vconn *vconn); void vconn_set_allowed_versions(struct vconn *vconn, uint32_t allowed_versions); int vconn_get_version(const struct vconn *); void vconn_set_recv_any_version(struct vconn *); ovs_be32 vconn_get_remote_ip(const struct vconn *); ovs_be16 vconn_get_remote_port(const struct vconn *); ovs_be32 vconn_get_local_ip(const struct vconn *); ovs_be16 vconn_get_local_port(const struct vconn *); int vconn_connect(struct vconn *); int vconn_recv(struct vconn *, struct ofpbuf **); int vconn_send(struct vconn *, struct ofpbuf *); int vconn_recv_xid(struct vconn *, ovs_be32 xid, struct ofpbuf **); int vconn_transact(struct vconn *, struct ofpbuf *, struct ofpbuf **); int vconn_transact_noreply(struct vconn *, struct ofpbuf *, struct ofpbuf **); int vconn_transact_multiple_noreply(struct vconn *, struct list *requests, struct ofpbuf **replyp); void vconn_run(struct vconn *); void vconn_run_wait(struct vconn *); int vconn_open_block(const char *name, uint32_t allowed_versions, uint8_t dscp, struct vconn **); int vconn_connect_block(struct vconn *); int vconn_send_block(struct vconn *, struct ofpbuf *); int vconn_recv_block(struct vconn *, struct ofpbuf **); enum vconn_wait_type { WAIT_CONNECT, WAIT_RECV, WAIT_SEND }; void vconn_wait(struct vconn *, enum vconn_wait_type); void vconn_connect_wait(struct vconn *); void vconn_recv_wait(struct vconn *); void vconn_send_wait(struct vconn *); /* Passive vconns: virtual listeners for incoming OpenFlow connections. */ int pvconn_verify_name(const char *name); int pvconn_open(const char *name, uint32_t allowed_versions, uint8_t dscp, struct pvconn **pvconnp); const char *pvconn_get_name(const struct pvconn *); void pvconn_close(struct pvconn *); int pvconn_accept(struct pvconn *, struct vconn **); void pvconn_wait(struct pvconn *); #ifdef __cplusplus } #endif #endif /* vconn.h */ openvswitch-2.0.1+git20140120/lib/vlan-bitmap.c000066400000000000000000000037121226605124000206010ustar00rootroot00000000000000/* Copyright (c) 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "vlan-bitmap.h" /* Allocates and returns a new 4096-bit bitmap that has 1-bit in positions in * the 'n_vlans' bits indicated in 'vlans' and 0-bits everywhere else. Returns * a null pointer if there are no (valid) VLANs in 'vlans'. */ unsigned long * vlan_bitmap_from_array(const int64_t *vlans, size_t n_vlans) { unsigned long *b; if (!n_vlans) { return NULL; } b = bitmap_allocate(4096); if (!vlan_bitmap_from_array__(vlans, n_vlans, b)) { free(b); return NULL; } return b; } /* Adds to 4096-bit VLAN bitmap 'b' a 1-bit in each position in the 'n_vlans' * bits indicated in 'vlans'. Returns the number of 1-bits added to 'b'. */ int vlan_bitmap_from_array__(const int64_t *vlans, size_t n_vlans, unsigned long int *b) { size_t i; int n; n = 0; for (i = 0; i < n_vlans; i++) { int64_t vlan = vlans[i]; if (vlan >= 0 && vlan < 4096 && !bitmap_is_set(b, vlan)) { bitmap_set1(b, vlan); n++; } } return n; } /* Returns true if 'a' and 'b' are the same: either both null or both the same * 4096-bit bitmap. * * (We assume that a nonnull bitmap is not all 0-bits.) */ bool vlan_bitmap_equal(const unsigned long *a, const unsigned long *b) { return (!a && !b) || (a && b && bitmap_equal(a, b, 4096)); } openvswitch-2.0.1+git20140120/lib/vlan-bitmap.h000066400000000000000000000030111226605124000205760ustar00rootroot00000000000000/* Copyright (c) 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef VLAN_BITMAP_H #define VLAN_BITMAP_H 1 #include #include #include "bitmap.h" /* A "VLAN bitmap" is a 4096-bit bitmap that represents a set. A 1-bit * indicates that the respective VLAN is a member of the set, a 0-bit indicates * that it is not. There is one wrinkle: NULL is a valid value that indicates * either that all VLANs are or are not members, depending on the vlan_bitmap. * * This is empirically a useful data structure. */ unsigned long *vlan_bitmap_from_array(const int64_t *vlans, size_t n_vlans); int vlan_bitmap_from_array__(const int64_t *vlans, size_t n_vlans, unsigned long int *b); bool vlan_bitmap_equal(const unsigned long *a, const unsigned long *b); /* Returns a new copy of 'vlans'. */ static inline unsigned long * vlan_bitmap_clone(const unsigned long *vlans) { return vlans ? bitmap_clone(vlans, 4096) : NULL; } #endif /* lib/vlan-bitmap.h */ openvswitch-2.0.1+git20140120/lib/vlandev.c000066400000000000000000000260601226605124000200270ustar00rootroot00000000000000/* * Copyright (c) 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "vlandev.h" #include #include #include #include #include "dummy.h" #include "hash.h" #include "shash.h" #include "socket-util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(vlandev); /* A vlandev implementation. */ struct vlandev_class { int (*vd_refresh)(void); int (*vd_add)(const char *real_dev, int vid); int (*vd_del)(const char *vlan_dev); }; #ifdef LINUX_DATAPATH static const struct vlandev_class vlandev_linux_class; #endif static const struct vlandev_class vlandev_stub_class; static const struct vlandev_class vlandev_dummy_class; /* The in-use vlandev implementation. */ static const struct vlandev_class *vd_class; /* Maps from a VLAN device name (e.g. "eth0.10") to struct vlan_dev. */ static struct shash vlan_devs = SHASH_INITIALIZER(&vlan_devs); /* Maps from a VLAN real device name (e.g. "eth0") to struct vlan_real_dev. */ static struct shash vlan_real_devs = SHASH_INITIALIZER(&vlan_real_devs); static int vlandev_add__(const char *vlan_dev, const char *real_dev, int vid); static int vlandev_del__(const char *vlan_dev); static void vlandev_clear__(void); static const struct vlandev_class * vlandev_get_class(void) { if (!vd_class) { #ifdef LINUX_DATAPATH vd_class = &vlandev_linux_class; #else vd_class = &vlandev_stub_class; #endif } return vd_class; } /* On Linux, the default implementation of VLAN devices creates and destroys * Linux VLAN devices. On other OSess, the default implementation is a * nonfunctional stub. In either case, this function replaces this default * implementation by a "dummy" implementation that simply reports back whatever * the client sets up with vlandev_add() and vlandev_del(). * * Don't call this function directly; use dummy_enable() from dummy.h. */ void vlandev_dummy_enable(void) { if (vd_class != &vlandev_dummy_class) { vd_class = &vlandev_dummy_class; vlandev_clear__(); } } /* Creates a new VLAN device for VLAN 'vid' on top of real Ethernet device * 'real_dev'. Returns 0 if successful, otherwise a positive errno value. On * OSes other than Linux, in the absence of dummies (see * vlandev_dummy_enable()), this always fails. * * The name of the new VLAN device is not easily predictable, because Linux * provides multiple naming schemes, does not allow the client to specify a * name, and does not directly report the new VLAN device's name. Use * vlandev_refresh() then vlandev_get_name() to find out the new VLAN device's * name,. */ int vlandev_add(const char *real_dev, int vid) { return vlandev_get_class()->vd_add(real_dev, vid); } /* Deletes the VLAN device named 'vlan_dev'. Returns 0 if successful, * otherwise a positive errno value. On OSes other than Linux, in the absence * of dummies (see vlandev_dummy_enable()), this always fails. */ int vlandev_del(const char *vlan_dev) { return vlandev_get_class()->vd_del(vlan_dev); } /* Refreshes the cache of real device to VLAN device mappings reported by * vlandev_get_real_devs() and vlandev_get_name(). Without calling this * function, changes made by vlandev_add() and vlandev_del() may not be * reflected by vlandev_get_real_devs() and vlandev_get_name() output. */ int vlandev_refresh(void) { const struct vlandev_class *class = vlandev_get_class(); return class->vd_refresh ? class->vd_refresh() : 0; } /* Returns a shash mapping from the name of real Ethernet devices used as the * basis of VLAN devices to struct vlan_real_devs. The caller must not modify * or free anything in the returned shash. * * Changes made by vlandev_add() and vlandev_del() may not be reflected in this * function's output without an intervening call to vlandev_refresh(). */ struct shash * vlandev_get_real_devs(void) { return &vlan_real_devs; } /* Returns the name of the VLAN device for VLAN 'vid' on top of * 'real_dev_name', or NULL if there is no such VLAN device. * * Changes made by vlandev_add() and vlandev_del() may not be reflected in this * function's output without an intervening call to vlandev_refresh(). */ const char * vlandev_get_name(const char *real_dev_name, int vid) { const struct vlan_real_dev *real_dev; real_dev = shash_find_data(&vlan_real_devs, real_dev_name); if (real_dev) { const struct vlan_dev *vlan_dev; HMAP_FOR_EACH_WITH_HASH (vlan_dev, hmap_node, hash_int(vid, 0), &real_dev->vlan_devs) { if (vlan_dev->vid == vid) { return vlan_dev->name; } } } return NULL; } /* The Linux vlandev implementation. */ #ifdef LINUX_DATAPATH #include "rtnetlink-link.h" #include #include #include "netdev-linux.h" static struct nln_notifier *vlan_cache_notifier; static bool cache_valid; static void vlan_cache_cb(const struct rtnetlink_link_change *change OVS_UNUSED, void *aux OVS_UNUSED) { cache_valid = false; } static int vlandev_linux_refresh(void) { const char *fn = "/proc/net/vlan/config"; char line[128]; FILE *stream; if (!vlan_cache_notifier) { vlan_cache_notifier = rtnetlink_link_notifier_create(vlan_cache_cb, NULL); if (!vlan_cache_notifier) { return EINVAL; } } if (cache_valid) { return 0; } vlandev_clear__(); /* Repopulate cache. */ stream = fopen(fn, "r"); if (!stream) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); int error = errno; struct stat s; if (error == ENOENT && !stat("/proc", &s)) { /* Probably the vlan module just isn't loaded, and probably that's * because no VLAN devices have been created. * * Not really an error. */ return 0; } VLOG_WARN_RL(&rl, "%s: open failed (%s)", fn, ovs_strerror(error)); return error; } while (fgets(line, sizeof line, stream)) { char vlan_dev[16], real_dev[16]; int vid; if (sscanf(line, "%15[^ |] | %d | %15s", vlan_dev, &vid, real_dev) == 3) { vlandev_add__(vlan_dev, real_dev, vid); } } fclose(stream); cache_valid = true; return 0; } static int do_vlan_ioctl(const char *netdev_name, struct vlan_ioctl_args *via, int cmd, const char *cmd_name) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); int error; via->cmd = cmd; ovs_strlcpy(via->device1, netdev_name, sizeof via->device1); error = af_inet_ioctl(SIOCSIFVLAN, via); if (error) { VLOG_WARN_RL(&rl, "%s: VLAN ioctl %s failed (%s)", netdev_name, cmd_name, ovs_strerror(error)); } return error; } static int vlandev_linux_add(const char *real_dev, int vid) { struct vlan_ioctl_args via; int error; memset(&via, 0, sizeof via); via.u.VID = vid; error = do_vlan_ioctl(real_dev, &via, ADD_VLAN_CMD, "ADD_VLAN_CMD"); if (!error) { cache_valid = false; } return error; } static int vlandev_linux_del(const char *vlan_dev) { struct vlan_ioctl_args via; int error; memset(&via, 0, sizeof via); error = do_vlan_ioctl(vlan_dev, &via, DEL_VLAN_CMD, "DEL_VLAN_CMD"); if (!error) { cache_valid = false; } return error; } static const struct vlandev_class vlandev_linux_class = { vlandev_linux_refresh, vlandev_linux_add, vlandev_linux_del }; #endif /* Stub implementation. */ static int vlandev_stub_add(const char *real_dev OVS_UNUSED, int vid OVS_UNUSED) { VLOG_ERR("not supported on non-Linux platform"); return EOPNOTSUPP; } static int vlandev_stub_del(const char *vlan_dev OVS_UNUSED) { VLOG_ERR("not supported on non-Linux platform"); return EOPNOTSUPP; } static const struct vlandev_class vlandev_stub_class = { NULL, /* vd_refresh */ vlandev_stub_add, vlandev_stub_del }; /* Dummy implementation. */ static int vlandev_dummy_add(const char *real_dev, int vid) { char name[IFNAMSIZ]; if (snprintf(name, sizeof name, "%s.%d", real_dev, vid) >= sizeof name) { return ENAMETOOLONG; } return vlandev_add__(name, real_dev, vid); } static int vlandev_dummy_del(const char *vlan_dev) { return vlandev_del__(vlan_dev); } static const struct vlandev_class vlandev_dummy_class = { NULL, /* vd_refresh */ vlandev_dummy_add, vlandev_dummy_del }; static int vlandev_add__(const char *vlan_dev, const char *real_dev, int vid) { uint32_t vid_hash = hash_int(vid, 0); struct vlan_real_dev *vrd; struct vlan_dev *vd; if (vid < 0 || vid > 4095) { return EINVAL; } else if (shash_find(&vlan_devs, vlan_dev)) { return EEXIST; } vrd = shash_find_data(&vlan_real_devs, real_dev); if (!vrd) { vrd = xmalloc(sizeof *vrd); vrd->name = xstrdup(real_dev); hmap_init(&vrd->vlan_devs); shash_add_nocopy(&vlan_real_devs, vrd->name, vrd); } else { HMAP_FOR_EACH_WITH_HASH (vd, hmap_node, vid_hash, &vrd->vlan_devs) { if (vd->vid == vid) { return EEXIST; } } } vd = xmalloc(sizeof *vd); hmap_insert(&vrd->vlan_devs, &vd->hmap_node, vid_hash); vd->name = xstrdup(vlan_dev); vd->vid = vid; vd->real_dev = vrd; shash_add_nocopy(&vlan_devs, vd->name, vd); return 0; } static int vlandev_del__(const char *vlan_dev) { struct shash_node *vd_node = shash_find(&vlan_devs, vlan_dev); if (!vd_node) { struct vlan_dev *vd = vd_node->data; struct vlan_real_dev *vrd = vd->real_dev; hmap_remove(&vrd->vlan_devs, &vd->hmap_node); if (hmap_is_empty(&vrd->vlan_devs)) { shash_find_and_delete_assert(&vlan_real_devs, vrd->name); free(vrd); } shash_delete(&vlan_devs, vd_node); free(vd); return 0; } else { return ENOENT; } } /* Clear 'vlan_devs' and 'vlan_real_devs' in preparation for repopulating. */ static void vlandev_clear__(void) { /* We do not free the 'name' members of struct vlan_dev and struct * vlan_real_dev, because the "shash"es own them.. */ struct shash_node *node; shash_clear_free_data(&vlan_devs); SHASH_FOR_EACH (node, &vlan_real_devs) { struct vlan_real_dev *vrd = node->data; hmap_destroy(&vrd->vlan_devs); } shash_clear_free_data(&vlan_real_devs); } openvswitch-2.0.1+git20140120/lib/vlandev.h000066400000000000000000000034361226605124000200360ustar00rootroot00000000000000/* * Copyright (c) 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef VLANDEV_H #define VLANDEV_H 1 #include "hmap.h" /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) * * This is deprecated. It is only for compatibility with broken device * drivers in old versions of Linux that do not properly support VLANs when * VLAN devices are not used. When broken device drivers are no longer in * widespread use, we will delete these interfaces. */ /* A VLAN device (e.g. "eth0.10" for VLAN 10 on eth0). */ struct vlan_dev { struct vlan_real_dev *real_dev; /* Parent, e.g. "eth0". */ struct hmap_node hmap_node; /* In vlan_real_dev's "vlan_devs" map. */ char *name; /* VLAN device name, e.g. "eth0.10". */ int vid; /* VLAN ID, e.g. 10. */ }; /* A device that has VLAN devices broken out of it. */ struct vlan_real_dev { char *name; /* Name, e.g. "eth0". */ struct hmap vlan_devs; /* All child VLAN devices, hashed by VID. */ }; int vlandev_add(const char *real_dev, int vid); int vlandev_del(const char *vlan_dev); int vlandev_refresh(void); struct shash *vlandev_get_real_devs(void); const char *vlandev_get_name(const char *real_dev_name, int vid); #endif /* vlandev.h */ openvswitch-2.0.1+git20140120/lib/vlog-syn.man000066400000000000000000000003231226605124000204710ustar00rootroot00000000000000.IP "Logging options:" [\fB\-v\fR[\fImodule\fR[\fB:\fIfacility\fR[\fB:\fIlevel\fR]]]]\&... .br [\fB\-\-verbose[=\fImodule\fR[\fB:\fIfacility\fR[\fB:\fIlevel\fR]]]]\&... .br [\fB\-\-log\-file\fR[\fB=\fIfile\fR]] openvswitch-2.0.1+git20140120/lib/vlog-unixctl.man000066400000000000000000000053001226605124000213460ustar00rootroot00000000000000.de IQ . br . ns . IP "\\$1" .. .SS "VLOG COMMANDS" These commands manage \fB\*(PN\fR's logging settings. .IP "\fBvlog/set\fR [\fIspec\fR]" Sets logging levels. Without any \fIspec\fR, sets the log level for every module and facility to \fBdbg\fR. Otherwise, \fIspec\fR is a list of words separated by spaces or commas or colons, up to one from each category below: . .RS .IP \(bu A valid module name, as displayed by the \fBvlog/list\fR command on \fBovs\-appctl\fR(8), limits the log level change to the specified module. . .IP \(bu \fBsyslog\fR, \fBconsole\fR, or \fBfile\fR, to limit the log level change to only to the system log, to the console, or to a file, respectively. . .IP \(bu \fBoff\fR, \fBemer\fR, \fBerr\fR, \fBwarn\fR, \fBinfo\fR, or \fBdbg\fR, to control the log level. Messages of the given severity or higher will be logged, and messages of lower severity will be filtered out. \fBoff\fR filters out all messages. See \fBovs\-appctl\fR(8) for a definition of each log level. .RE . .IP Case is not significant within \fIspec\fR. .IP Regardless of the log levels set for \fBfile\fR, logging to a file will not take place unless \fB\*(PN\fR was invoked with the \fB\-\-log\-file\fR option. .IP For compatibility with older versions of OVS, \fBany\fR is accepted as a word but has no effect. .RE .IP "\fBvlog/set PATTERN:\fIfacility\fB:\fIpattern\fR" Sets the log pattern for \fIfacility\fR to \fIpattern\fR. Refer to \fBovs\-appctl\fR(8) for a description of the valid syntax for \fIpattern\fR. . .IP "\fBvlog/list\fR" Lists the supported logging modules and their current levels. . .IP "\fBvlog/reopen\fR" Causes \fB\*(PN\fR to close and reopen its log file. (This is useful after rotating log files, to cause a new log file to be used.) .IP This has no effect unless \fB\*(PN\fR was invoked with the \fB\-\-log\-file\fR option. . .IP "\fBvlog/disable\-rate\-limit \fR[\fImodule\fR]..." .IQ "\fBvlog/enable\-rate\-limit \fR[\fImodule\fR]..." By default, \fB\*(PN\fR limits the rate at which certain messages can be logged. When a message would appear more frequently than the limit, it is suppressed. This saves disk space, makes logs easier to read, and speeds up execution, but occasionally troubleshooting requires more detail. Therefore, \fBvlog/disable\-rate\-limit\fR allows rate limits to be disabled at the level of an individual log module. Specify one or more module names, as displayed by the \fBvlog/list\fR command. Specifying either no module names at all or the keyword \fBany\fR disables rate limits for every log module. . .IP The \fBvlog/enable\-rate\-limit\fR command, whose syntax is the same as \fBvlog/disable\-rate\-limit\fR, can be used to re-enable a rate limit that was previously disabled. openvswitch-2.0.1+git20140120/lib/vlog.c000066400000000000000000000745541226605124000173520ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "vlog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "async-append.h" #include "coverage.h" #include "dirs.h" #include "dynamic-string.h" #include "ofpbuf.h" #include "ovs-thread.h" #include "sat-math.h" #include "svec.h" #include "timeval.h" #include "unixctl.h" #include "util.h" VLOG_DEFINE_THIS_MODULE(vlog); COVERAGE_DEFINE(vlog_recursive); /* ovs_assert() logs the assertion message, so using ovs_assert() in this * source file could cause recursion. */ #undef ovs_assert #define ovs_assert use_assert_instead_of_ovs_assert_in_this_module /* Name for each logging level. */ static const char *const level_names[VLL_N_LEVELS] = { #define VLOG_LEVEL(NAME, SYSLOG_LEVEL) #NAME, VLOG_LEVELS #undef VLOG_LEVEL }; /* Syslog value for each logging level. */ static const int syslog_levels[VLL_N_LEVELS] = { #define VLOG_LEVEL(NAME, SYSLOG_LEVEL) SYSLOG_LEVEL, VLOG_LEVELS #undef VLOG_LEVEL }; /* The log modules. */ #if USE_LINKER_SECTIONS extern struct vlog_module *__start_vlog_modules[]; extern struct vlog_module *__stop_vlog_modules[]; #define vlog_modules __start_vlog_modules #define n_vlog_modules (__stop_vlog_modules - __start_vlog_modules) #else #define VLOG_MODULE VLOG_DEFINE_MODULE__ #include "vlog-modules.def" #undef VLOG_MODULE extern struct vlog_module *vlog_modules[]; struct vlog_module *vlog_modules[] = { #define VLOG_MODULE(NAME) &VLM_##NAME, #include "vlog-modules.def" #undef VLOG_MODULE }; #define n_vlog_modules ARRAY_SIZE(vlog_modules) #endif /* Protects the 'pattern' in all "struct facility"s, so that a race between * changing and reading the pattern does not cause an access to freed * memory. */ static struct ovs_rwlock pattern_rwlock = OVS_RWLOCK_INITIALIZER; /* Information about each facility. */ struct facility { const char *name; /* Name. */ char *pattern OVS_GUARDED_BY(pattern_rwlock); /* Current pattern. */ bool default_pattern; /* Whether current pattern is the default. */ }; static struct facility facilities[VLF_N_FACILITIES] = { #define VLOG_FACILITY(NAME, PATTERN) {#NAME, PATTERN, true}, VLOG_FACILITIES #undef VLOG_FACILITY }; /* Sequence number for the message currently being composed. */ DEFINE_STATIC_PER_THREAD_DATA(unsigned int, msg_num, 0); /* VLF_FILE configuration. * * All of the following is protected by 'log_file_mutex', which nests inside * pattern_rwlock. */ static struct ovs_mutex log_file_mutex = OVS_MUTEX_INITIALIZER; static char *log_file_name OVS_GUARDED_BY(log_file_mutex); static int log_fd OVS_GUARDED_BY(log_file_mutex) = -1; static struct async_append *log_writer OVS_GUARDED_BY(log_file_mutex); static bool log_async OVS_GUARDED_BY(log_file_mutex); static void format_log_message(const struct vlog_module *, enum vlog_level, enum vlog_facility, const char *message, va_list, struct ds *) PRINTF_FORMAT(4, 0) OVS_REQ_RDLOCK(&pattern_rwlock); /* Searches the 'n_names' in 'names'. Returns the index of a match for * 'target', or 'n_names' if no name matches. */ static size_t search_name_array(const char *target, const char *const *names, size_t n_names) { size_t i; for (i = 0; i < n_names; i++) { assert(names[i]); if (!strcasecmp(names[i], target)) { break; } } return i; } /* Returns the name for logging level 'level'. */ const char * vlog_get_level_name(enum vlog_level level) { assert(level < VLL_N_LEVELS); return level_names[level]; } /* Returns the logging level with the given 'name', or VLL_N_LEVELS if 'name' * is not the name of a logging level. */ enum vlog_level vlog_get_level_val(const char *name) { return search_name_array(name, level_names, ARRAY_SIZE(level_names)); } /* Returns the name for logging facility 'facility'. */ const char * vlog_get_facility_name(enum vlog_facility facility) { assert(facility < VLF_N_FACILITIES); return facilities[facility].name; } /* Returns the logging facility named 'name', or VLF_N_FACILITIES if 'name' is * not the name of a logging facility. */ enum vlog_facility vlog_get_facility_val(const char *name) { size_t i; for (i = 0; i < VLF_N_FACILITIES; i++) { if (!strcasecmp(facilities[i].name, name)) { break; } } return i; } /* Returns the name for logging module 'module'. */ const char * vlog_get_module_name(const struct vlog_module *module) { return module->name; } /* Returns the logging module named 'name', or NULL if 'name' is not the name * of a logging module. */ struct vlog_module * vlog_module_from_name(const char *name) { struct vlog_module **mp; for (mp = vlog_modules; mp < &vlog_modules[n_vlog_modules]; mp++) { if (!strcasecmp(name, (*mp)->name)) { return *mp; } } return NULL; } /* Returns the current logging level for the given 'module' and 'facility'. */ enum vlog_level vlog_get_level(const struct vlog_module *module, enum vlog_facility facility) { assert(facility < VLF_N_FACILITIES); return module->levels[facility]; } static void update_min_level(struct vlog_module *module) OVS_REQUIRES(&log_file_mutex) { enum vlog_facility facility; module->min_level = VLL_OFF; for (facility = 0; facility < VLF_N_FACILITIES; facility++) { if (log_fd >= 0 || facility != VLF_FILE) { enum vlog_level level = module->levels[facility]; if (level > module->min_level) { module->min_level = level; } } } } static void set_facility_level(enum vlog_facility facility, struct vlog_module *module, enum vlog_level level) { assert(facility >= 0 && facility < VLF_N_FACILITIES); assert(level < VLL_N_LEVELS); ovs_mutex_lock(&log_file_mutex); if (!module) { struct vlog_module **mp; for (mp = vlog_modules; mp < &vlog_modules[n_vlog_modules]; mp++) { (*mp)->levels[facility] = level; update_min_level(*mp); } } else { module->levels[facility] = level; update_min_level(module); } ovs_mutex_unlock(&log_file_mutex); } /* Sets the logging level for the given 'module' and 'facility' to 'level'. A * null 'module' or a 'facility' of VLF_ANY_FACILITY is treated as a wildcard * across all modules or facilities, respectively. */ void vlog_set_levels(struct vlog_module *module, enum vlog_facility facility, enum vlog_level level) { assert(facility < VLF_N_FACILITIES || facility == VLF_ANY_FACILITY); if (facility == VLF_ANY_FACILITY) { for (facility = 0; facility < VLF_N_FACILITIES; facility++) { set_facility_level(facility, module, level); } } else { set_facility_level(facility, module, level); } } static void do_set_pattern(enum vlog_facility facility, const char *pattern) { struct facility *f = &facilities[facility]; ovs_rwlock_wrlock(&pattern_rwlock); if (!f->default_pattern) { free(f->pattern); } else { f->default_pattern = false; } f->pattern = xstrdup(pattern); ovs_rwlock_unlock(&pattern_rwlock); } /* Sets the pattern for the given 'facility' to 'pattern'. */ void vlog_set_pattern(enum vlog_facility facility, const char *pattern) { assert(facility < VLF_N_FACILITIES || facility == VLF_ANY_FACILITY); if (facility == VLF_ANY_FACILITY) { for (facility = 0; facility < VLF_N_FACILITIES; facility++) { do_set_pattern(facility, pattern); } } else { do_set_pattern(facility, pattern); } } /* Sets the name of the log file used by VLF_FILE to 'file_name', or to the * default file name if 'file_name' is null. Returns 0 if successful, * otherwise a positive errno value. */ int vlog_set_log_file(const char *file_name) { char *new_log_file_name; struct vlog_module **mp; struct stat old_stat; struct stat new_stat; int new_log_fd; bool same_file; bool log_close; /* Open new log file. */ new_log_file_name = (file_name ? xstrdup(file_name) : xasprintf("%s/%s.log", ovs_logdir(), program_name)); new_log_fd = open(new_log_file_name, O_WRONLY | O_CREAT | O_APPEND, 0666); if (new_log_fd < 0) { VLOG_WARN("failed to open %s for logging: %s", new_log_file_name, ovs_strerror(errno)); free(new_log_file_name); return errno; } /* If the new log file is the same one we already have open, bail out. */ ovs_mutex_lock(&log_file_mutex); same_file = (log_fd >= 0 && new_log_fd >= 0 && !fstat(log_fd, &old_stat) && !fstat(new_log_fd, &new_stat) && old_stat.st_dev == new_stat.st_dev && old_stat.st_ino == new_stat.st_ino); ovs_mutex_unlock(&log_file_mutex); if (same_file) { close(new_log_fd); free(new_log_file_name); return 0; } /* Log closing old log file (we can't log while holding log_file_mutex). */ ovs_mutex_lock(&log_file_mutex); log_close = log_fd >= 0; ovs_mutex_unlock(&log_file_mutex); if (log_close) { VLOG_INFO("closing log file"); } /* Close old log file, if any, and install new one. */ ovs_mutex_lock(&log_file_mutex); if (log_fd >= 0) { free(log_file_name); close(log_fd); async_append_destroy(log_writer); } log_file_name = xstrdup(new_log_file_name); log_fd = new_log_fd; if (log_async) { log_writer = async_append_create(new_log_fd); } for (mp = vlog_modules; mp < &vlog_modules[n_vlog_modules]; mp++) { update_min_level(*mp); } ovs_mutex_unlock(&log_file_mutex); /* Log opening new log file (we can't log while holding log_file_mutex). */ VLOG_INFO("opened log file %s", new_log_file_name); free(new_log_file_name); return 0; } /* Closes and then attempts to re-open the current log file. (This is useful * just after log rotation, to ensure that the new log file starts being used.) * Returns 0 if successful, otherwise a positive errno value. */ int vlog_reopen_log_file(void) { char *fn; ovs_mutex_lock(&log_file_mutex); fn = log_file_name ? xstrdup(log_file_name) : NULL; ovs_mutex_unlock(&log_file_mutex); if (fn) { int error = vlog_set_log_file(fn); free(fn); return error; } else { return 0; } } /* Set debugging levels. Returns null if successful, otherwise an error * message that the caller must free(). */ char * vlog_set_levels_from_string(const char *s_) { char *s = xstrdup(s_); char *save_ptr = NULL; char *msg = NULL; char *word; word = strtok_r(s, " ,:\t", &save_ptr); if (word && !strcasecmp(word, "PATTERN")) { enum vlog_facility facility; word = strtok_r(NULL, " ,:\t", &save_ptr); if (!word) { msg = xstrdup("missing facility"); goto exit; } facility = (!strcasecmp(word, "ANY") ? VLF_ANY_FACILITY : vlog_get_facility_val(word)); if (facility == VLF_N_FACILITIES) { msg = xasprintf("unknown facility \"%s\"", word); goto exit; } vlog_set_pattern(facility, save_ptr); } else { struct vlog_module *module = NULL; enum vlog_level level = VLL_N_LEVELS; enum vlog_facility facility = VLF_N_FACILITIES; for (; word != NULL; word = strtok_r(NULL, " ,:\t", &save_ptr)) { if (!strcasecmp(word, "ANY")) { continue; } else if (vlog_get_facility_val(word) != VLF_N_FACILITIES) { if (facility != VLF_N_FACILITIES) { msg = xstrdup("cannot specify multiple facilities"); goto exit; } facility = vlog_get_facility_val(word); } else if (vlog_get_level_val(word) != VLL_N_LEVELS) { if (level != VLL_N_LEVELS) { msg = xstrdup("cannot specify multiple levels"); goto exit; } level = vlog_get_level_val(word); } else if (vlog_module_from_name(word)) { if (module) { msg = xstrdup("cannot specify multiple modules"); goto exit; } module = vlog_module_from_name(word); } else { msg = xasprintf("no facility, level, or module \"%s\"", word); goto exit; } } if (facility == VLF_N_FACILITIES) { facility = VLF_ANY_FACILITY; } if (level == VLL_N_LEVELS) { level = VLL_DBG; } vlog_set_levels(module, facility, level); } exit: free(s); return msg; } /* Set debugging levels. Abort with an error message if 's' is invalid. */ void vlog_set_levels_from_string_assert(const char *s) { char *error = vlog_set_levels_from_string(s); if (error) { ovs_fatal(0, "%s", error); } } /* If 'arg' is null, configure maximum verbosity. Otherwise, sets * configuration according to 'arg' (see vlog_set_levels_from_string()). */ void vlog_set_verbosity(const char *arg) { if (arg) { char *msg = vlog_set_levels_from_string(arg); if (msg) { ovs_fatal(0, "processing \"%s\": %s", arg, msg); } } else { vlog_set_levels(NULL, VLF_ANY_FACILITY, VLL_DBG); } } static void vlog_unixctl_set(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) { int i; for (i = 1; i < argc; i++) { char *msg = vlog_set_levels_from_string(argv[i]); if (msg) { unixctl_command_reply_error(conn, msg); free(msg); return; } } unixctl_command_reply(conn, NULL); } static void vlog_unixctl_list(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { char *msg = vlog_get_levels(); unixctl_command_reply(conn, msg); free(msg); } static void vlog_unixctl_reopen(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { bool has_log_file; ovs_mutex_lock(&log_file_mutex); has_log_file = log_file_name != NULL; ovs_mutex_unlock(&log_file_mutex); if (has_log_file) { int error = vlog_reopen_log_file(); if (error) { unixctl_command_reply_error(conn, ovs_strerror(errno)); } else { unixctl_command_reply(conn, NULL); } } else { unixctl_command_reply_error(conn, "Logging to file not configured"); } } static void set_all_rate_limits(bool enable) { struct vlog_module **mp; for (mp = vlog_modules; mp < &vlog_modules[n_vlog_modules]; mp++) { (*mp)->honor_rate_limits = enable; } } static void set_rate_limits(struct unixctl_conn *conn, int argc, const char *argv[], bool enable) { if (argc > 1) { int i; for (i = 1; i < argc; i++) { if (!strcasecmp(argv[i], "ANY")) { set_all_rate_limits(enable); } else { struct vlog_module *module = vlog_module_from_name(argv[i]); if (!module) { unixctl_command_reply_error(conn, "unknown module"); return; } module->honor_rate_limits = enable; } } } else { set_all_rate_limits(enable); } unixctl_command_reply(conn, NULL); } static void vlog_enable_rate_limit(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) { set_rate_limits(conn, argc, argv, true); } static void vlog_disable_rate_limit(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) { set_rate_limits(conn, argc, argv, false); } static void vlog_init__(void) { static char *program_name_copy; long long int now; /* openlog() is allowed to keep the pointer passed in, without making a * copy. The daemonize code sometimes frees and replaces 'program_name', * so make a private copy just for openlog(). (We keep a pointer to the * private copy to suppress memory leak warnings in case openlog() does * make its own copy.) */ program_name_copy = program_name ? xstrdup(program_name) : NULL; openlog(program_name_copy, LOG_NDELAY, LOG_DAEMON); now = time_wall_msec(); if (now < 0) { char *s = xastrftime_msec("%a, %d %b %Y %H:%M:%S", now, true); VLOG_ERR("current time is negative: %s (%lld)", s, now); free(s); } unixctl_command_register( "vlog/set", "{spec | PATTERN:facility:pattern}", 1, INT_MAX, vlog_unixctl_set, NULL); unixctl_command_register("vlog/list", "", 0, 0, vlog_unixctl_list, NULL); unixctl_command_register("vlog/enable-rate-limit", "[module]...", 0, INT_MAX, vlog_enable_rate_limit, NULL); unixctl_command_register("vlog/disable-rate-limit", "[module]...", 0, INT_MAX, vlog_disable_rate_limit, NULL); unixctl_command_register("vlog/reopen", "", 0, 0, vlog_unixctl_reopen, NULL); } /* Initializes the logging subsystem and registers its unixctl server * commands. */ void vlog_init(void) { static pthread_once_t once = PTHREAD_ONCE_INIT; pthread_once(&once, vlog_init__); } /* Enables VLF_FILE log output to be written asynchronously to disk. * Asynchronous file writes avoid blocking the process in the case of a busy * disk, but on the other hand they are less robust: there is a chance that the * write will not make it to the log file if the process crashes soon after the * log call. */ void vlog_enable_async(void) { ovs_mutex_lock(&log_file_mutex); log_async = true; if (log_fd >= 0 && !log_writer) { log_writer = async_append_create(log_fd); } ovs_mutex_unlock(&log_file_mutex); } /* Print the current logging level for each module. */ char * vlog_get_levels(void) { struct ds s = DS_EMPTY_INITIALIZER; struct vlog_module **mp; struct svec lines = SVEC_EMPTY_INITIALIZER; char *line; size_t i; ds_put_format(&s, " console syslog file\n"); ds_put_format(&s, " ------- ------ ------\n"); for (mp = vlog_modules; mp < &vlog_modules[n_vlog_modules]; mp++) { struct ds line; ds_init(&line); ds_put_format(&line, "%-16s %4s %4s %4s", vlog_get_module_name(*mp), vlog_get_level_name(vlog_get_level(*mp, VLF_CONSOLE)), vlog_get_level_name(vlog_get_level(*mp, VLF_SYSLOG)), vlog_get_level_name(vlog_get_level(*mp, VLF_FILE))); if (!(*mp)->honor_rate_limits) { ds_put_cstr(&line, " (rate limiting disabled)"); } ds_put_char(&line, '\n'); svec_add_nocopy(&lines, ds_steal_cstr(&line)); } svec_sort(&lines); SVEC_FOR_EACH (i, line, &lines) { ds_put_cstr(&s, line); } svec_destroy(&lines); return ds_cstr(&s); } /* Returns true if a log message emitted for the given 'module' and 'level' * would cause some log output, false if that module and level are completely * disabled. */ bool vlog_is_enabled(const struct vlog_module *module, enum vlog_level level) { return module->min_level >= level; } static const char * fetch_braces(const char *p, const char *def, char *out, size_t out_size) { if (*p == '{') { size_t n = strcspn(p + 1, "}"); size_t n_copy = MIN(n, out_size - 1); memcpy(out, p + 1, n_copy); out[n_copy] = '\0'; p += n + 2; } else { ovs_strlcpy(out, def, out_size); } return p; } static void format_log_message(const struct vlog_module *module, enum vlog_level level, enum vlog_facility facility, const char *message, va_list args_, struct ds *s) { char tmp[128]; va_list args; const char *p; ds_clear(s); for (p = facilities[facility].pattern; *p != '\0'; ) { const char *subprogram_name; enum { LEFT, RIGHT } justify = RIGHT; int pad = '0'; size_t length, field, used; if (*p != '%') { ds_put_char(s, *p++); continue; } p++; if (*p == '-') { justify = LEFT; p++; } if (*p == '0') { pad = '0'; p++; } field = 0; while (isdigit((unsigned char)*p)) { field = (field * 10) + (*p - '0'); p++; } length = s->length; switch (*p++) { case 'A': ds_put_cstr(s, program_name); break; case 'c': p = fetch_braces(p, "", tmp, sizeof tmp); ds_put_cstr(s, vlog_get_module_name(module)); break; case 'd': p = fetch_braces(p, "%Y-%m-%d %H:%M:%S.###", tmp, sizeof tmp); ds_put_strftime_msec(s, tmp, time_wall_msec(), false); break; case 'D': p = fetch_braces(p, "%Y-%m-%d %H:%M:%S.###", tmp, sizeof tmp); ds_put_strftime_msec(s, tmp, time_wall_msec(), true); break; case 'm': /* Format user-supplied log message and trim trailing new-lines. */ length = s->length; va_copy(args, args_); ds_put_format_valist(s, message, args); va_end(args); while (s->length > length && s->string[s->length - 1] == '\n') { s->length--; } break; case 'N': ds_put_format(s, "%u", *msg_num_get_unsafe()); break; case 'n': ds_put_char(s, '\n'); break; case 'p': ds_put_cstr(s, vlog_get_level_name(level)); break; case 'P': ds_put_format(s, "%ld", (long int) getpid()); break; case 'r': ds_put_format(s, "%lld", time_msec() - time_boot_msec()); break; case 't': subprogram_name = get_subprogram_name(); ds_put_cstr(s, subprogram_name[0] ? subprogram_name : "main"); break; case 'T': subprogram_name = get_subprogram_name(); if (subprogram_name[0]) { ds_put_format(s, "(%s)", subprogram_name); } break; default: ds_put_char(s, p[-1]); break; } used = s->length - length; if (used < field) { size_t n_pad = field - used; if (justify == RIGHT) { ds_put_uninit(s, n_pad); memmove(&s->string[length + n_pad], &s->string[length], used); memset(&s->string[length], pad, n_pad); } else { ds_put_char_multiple(s, pad, n_pad); } } } } /* Writes 'message' to the log at the given 'level' and as coming from the * given 'module'. * * Guaranteed to preserve errno. */ void vlog_valist(const struct vlog_module *module, enum vlog_level level, const char *message, va_list args) { bool log_to_console = module->levels[VLF_CONSOLE] >= level; bool log_to_syslog = module->levels[VLF_SYSLOG] >= level; bool log_to_file; ovs_mutex_lock(&log_file_mutex); log_to_file = module->levels[VLF_FILE] >= level && log_fd >= 0; ovs_mutex_unlock(&log_file_mutex); if (log_to_console || log_to_syslog || log_to_file) { int save_errno = errno; struct ds s; vlog_init(); ds_init(&s); ds_reserve(&s, 1024); ++*msg_num_get(); ovs_rwlock_rdlock(&pattern_rwlock); if (log_to_console) { format_log_message(module, level, VLF_CONSOLE, message, args, &s); ds_put_char(&s, '\n'); fputs(ds_cstr(&s), stderr); } if (log_to_syslog) { int syslog_level = syslog_levels[level]; char *save_ptr = NULL; char *line; format_log_message(module, level, VLF_SYSLOG, message, args, &s); for (line = strtok_r(s.string, "\n", &save_ptr); line; line = strtok_r(NULL, "\n", &save_ptr)) { syslog(syslog_level, "%s", line); } } if (log_to_file) { format_log_message(module, level, VLF_FILE, message, args, &s); ds_put_char(&s, '\n'); ovs_mutex_lock(&log_file_mutex); if (log_fd >= 0) { if (log_writer) { async_append_write(log_writer, s.string, s.length); if (level == VLL_EMER) { async_append_flush(log_writer); } } else { ignore(write(log_fd, s.string, s.length)); } } ovs_mutex_unlock(&log_file_mutex); } ovs_rwlock_unlock(&pattern_rwlock); ds_destroy(&s); errno = save_errno; } } void vlog(const struct vlog_module *module, enum vlog_level level, const char *message, ...) { va_list args; va_start(args, message); vlog_valist(module, level, message, args); va_end(args); } /* Logs 'message' to 'module' at maximum verbosity, then exits with a failure * exit code. Always writes the message to stderr, even if the console * facility is disabled. * * Choose this function instead of vlog_abort_valist() if the daemon monitoring * facility shouldn't automatically restart the current daemon. */ void vlog_fatal_valist(const struct vlog_module *module_, const char *message, va_list args) { struct vlog_module *module = CONST_CAST(struct vlog_module *, module_); /* Don't log this message to the console to avoid redundancy with the * message written by the later ovs_fatal_valist(). */ module->levels[VLF_CONSOLE] = VLL_OFF; vlog_valist(module, VLL_EMER, message, args); ovs_fatal_valist(0, message, args); } /* Logs 'message' to 'module' at maximum verbosity, then exits with a failure * exit code. Always writes the message to stderr, even if the console * facility is disabled. * * Choose this function instead of vlog_abort() if the daemon monitoring * facility shouldn't automatically restart the current daemon. */ void vlog_fatal(const struct vlog_module *module, const char *message, ...) { va_list args; va_start(args, message); vlog_fatal_valist(module, message, args); va_end(args); } /* Logs 'message' to 'module' at maximum verbosity, then calls abort(). Always * writes the message to stderr, even if the console facility is disabled. * * Choose this function instead of vlog_fatal_valist() if the daemon monitoring * facility should automatically restart the current daemon. */ void vlog_abort_valist(const struct vlog_module *module_, const char *message, va_list args) { struct vlog_module *module = (struct vlog_module *) module_; /* Don't log this message to the console to avoid redundancy with the * message written by the later ovs_abort_valist(). */ module->levels[VLF_CONSOLE] = VLL_OFF; vlog_valist(module, VLL_EMER, message, args); ovs_abort_valist(0, message, args); } /* Logs 'message' to 'module' at maximum verbosity, then calls abort(). Always * writes the message to stderr, even if the console facility is disabled. * * Choose this function instead of vlog_fatal() if the daemon monitoring * facility should automatically restart the current daemon. */ void vlog_abort(const struct vlog_module *module, const char *message, ...) { va_list args; va_start(args, message); vlog_abort_valist(module, message, args); va_end(args); } bool vlog_should_drop(const struct vlog_module *module, enum vlog_level level, struct vlog_rate_limit *rl) { if (!module->honor_rate_limits) { return false; } if (!vlog_is_enabled(module, level)) { return true; } ovs_mutex_lock(&rl->mutex); if (!token_bucket_withdraw(&rl->token_bucket, VLOG_MSG_TOKENS)) { time_t now = time_now(); if (!rl->n_dropped) { rl->first_dropped = now; } rl->last_dropped = now; rl->n_dropped++; ovs_mutex_unlock(&rl->mutex); return true; } if (!rl->n_dropped) { ovs_mutex_unlock(&rl->mutex); } else { time_t now = time_now(); unsigned int n_dropped = rl->n_dropped; unsigned int first_dropped_elapsed = now - rl->first_dropped; unsigned int last_dropped_elapsed = now - rl->last_dropped; rl->n_dropped = 0; ovs_mutex_unlock(&rl->mutex); vlog(module, level, "Dropped %u log messages in last %u seconds (most recently, " "%u seconds ago) due to excessive rate", n_dropped, first_dropped_elapsed, last_dropped_elapsed); } return false; } void vlog_rate_limit(const struct vlog_module *module, enum vlog_level level, struct vlog_rate_limit *rl, const char *message, ...) { if (!vlog_should_drop(module, level, rl)) { va_list args; va_start(args, message); vlog_valist(module, level, message, args); va_end(args); } } void vlog_usage(void) { printf("\nLogging options:\n" " -v, --verbose=[SPEC] set logging levels\n" " -v, --verbose set maximum verbosity level\n" " --log-file[=FILE] enable logging to specified FILE\n" " (default: %s/%s.log)\n", ovs_logdir(), program_name); } openvswitch-2.0.1+git20140120/lib/vlog.h000066400000000000000000000257001226605124000173440ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef VLOG_H #define VLOG_H 1 /* Logging. * * * Thread-safety * ============= * * Fully thread safe. */ #include #include #include #include #include "compiler.h" #include "ovs-thread.h" #include "sat-math.h" #include "token-bucket.h" #include "util.h" #ifdef __cplusplus extern "C" { #endif /* Logging severity levels. * * ovs-appctl(8) defines each of the log levels. */ #define VLOG_LEVELS \ VLOG_LEVEL(OFF, LOG_ALERT) \ VLOG_LEVEL(EMER, LOG_ALERT) \ VLOG_LEVEL(ERR, LOG_ERR) \ VLOG_LEVEL(WARN, LOG_WARNING) \ VLOG_LEVEL(INFO, LOG_NOTICE) \ VLOG_LEVEL(DBG, LOG_DEBUG) enum vlog_level { #define VLOG_LEVEL(NAME, SYSLOG_LEVEL) VLL_##NAME, VLOG_LEVELS #undef VLOG_LEVEL VLL_N_LEVELS }; const char *vlog_get_level_name(enum vlog_level); enum vlog_level vlog_get_level_val(const char *name); /* Facilities that we can log to. */ #define VLOG_FACILITIES \ VLOG_FACILITY(SYSLOG, "ovs|%05N|%c%T|%p|%m") \ VLOG_FACILITY(CONSOLE, "%D{%Y-%m-%dT%H:%M:%SZ}|%05N|%c%T|%p|%m") \ VLOG_FACILITY(FILE, "%D{%Y-%m-%dT%H:%M:%S.###Z}|%05N|%c%T|%p|%m") enum vlog_facility { #define VLOG_FACILITY(NAME, PATTERN) VLF_##NAME, VLOG_FACILITIES #undef VLOG_FACILITY VLF_N_FACILITIES, VLF_ANY_FACILITY = -1 }; const char *vlog_get_facility_name(enum vlog_facility); enum vlog_facility vlog_get_facility_val(const char *name); /* A log module. */ struct vlog_module { const char *name; /* User-visible name. */ int levels[VLF_N_FACILITIES]; /* Minimum log level for each facility. */ int min_level; /* Minimum log level for any facility. */ bool honor_rate_limits; /* Set false to ignore rate limits. */ }; /* Creates and initializes a global instance of a module named MODULE. */ #if USE_LINKER_SECTIONS #define VLOG_DEFINE_MODULE(MODULE) \ VLOG_DEFINE_MODULE__(MODULE) \ extern struct vlog_module *const vlog_module_ptr_##MODULE; \ struct vlog_module *const vlog_module_ptr_##MODULE \ __attribute__((section("vlog_modules"))) = &VLM_##MODULE #else #define VLOG_DEFINE_MODULE(MODULE) extern struct vlog_module VLM_##MODULE #endif const char *vlog_get_module_name(const struct vlog_module *); struct vlog_module *vlog_module_from_name(const char *name); /* Rate-limiter for log messages. */ struct vlog_rate_limit { struct token_bucket token_bucket; time_t first_dropped; /* Time first message was dropped. */ time_t last_dropped; /* Time of most recent message drop. */ unsigned int n_dropped; /* Number of messages dropped. */ struct ovs_mutex mutex; /* Mutual exclusion for rate limit. */ }; /* Number of tokens to emit a message. We add 'rate' tokens per millisecond, * thus 60,000 tokens are required to emit one message per minute. */ #define VLOG_MSG_TOKENS (60 * 1000) /* Initializer for a struct vlog_rate_limit, to set up a maximum rate of RATE * messages per minute and a maximum burst size of BURST messages. */ #define VLOG_RATE_LIMIT_INIT(RATE, BURST) \ { \ TOKEN_BUCKET_INIT(RATE, SAT_MUL(BURST, VLOG_MSG_TOKENS)), \ 0, /* first_dropped */ \ 0, /* last_dropped */ \ 0, /* n_dropped */ \ OVS_MUTEX_INITIALIZER /* mutex */ \ } /* Configuring how each module logs messages. */ enum vlog_level vlog_get_level(const struct vlog_module *, enum vlog_facility); void vlog_set_levels(struct vlog_module *, enum vlog_facility, enum vlog_level); char *vlog_set_levels_from_string(const char *) WARN_UNUSED_RESULT; void vlog_set_levels_from_string_assert(const char *); char *vlog_get_levels(void); bool vlog_is_enabled(const struct vlog_module *, enum vlog_level); bool vlog_should_drop(const struct vlog_module *, enum vlog_level, struct vlog_rate_limit *); void vlog_set_verbosity(const char *arg); /* Configuring log facilities. */ void vlog_set_pattern(enum vlog_facility, const char *pattern); int vlog_set_log_file(const char *file_name); int vlog_reopen_log_file(void); /* Initialization. */ void vlog_init(void); void vlog_enable_async(void); /* Functions for actual logging. */ void vlog(const struct vlog_module *, enum vlog_level, const char *format, ...) PRINTF_FORMAT (3, 4); void vlog_valist(const struct vlog_module *, enum vlog_level, const char *, va_list) PRINTF_FORMAT (3, 0); void vlog_fatal(const struct vlog_module *, const char *format, ...) PRINTF_FORMAT (2, 3) NO_RETURN; void vlog_fatal_valist(const struct vlog_module *, const char *format, va_list) PRINTF_FORMAT (2, 0) NO_RETURN; void vlog_abort(const struct vlog_module *, const char *format, ...) PRINTF_FORMAT (2, 3) NO_RETURN; void vlog_abort_valist(const struct vlog_module *, const char *format, va_list) PRINTF_FORMAT (2, 0) NO_RETURN; void vlog_rate_limit(const struct vlog_module *, enum vlog_level, struct vlog_rate_limit *, const char *, ...) PRINTF_FORMAT (4, 5); /* Creates and initializes a global instance of a module named MODULE, and * defines a static variable named THIS_MODULE that points to it, for use with * the convenience macros below. */ #define VLOG_DEFINE_THIS_MODULE(MODULE) \ VLOG_DEFINE_MODULE(MODULE); \ static struct vlog_module *const THIS_MODULE = &VLM_##MODULE /* Convenience macros. These assume that THIS_MODULE points to a "struct * vlog_module" for the current module, as set up by e.g. the * VLOG_DEFINE_MODULE macro above. * * Guaranteed to preserve errno. */ #define VLOG_FATAL(...) vlog_fatal(THIS_MODULE, __VA_ARGS__) #define VLOG_ABORT(...) vlog_abort(THIS_MODULE, __VA_ARGS__) #define VLOG_EMER(...) VLOG(VLL_EMER, __VA_ARGS__) #define VLOG_ERR(...) VLOG(VLL_ERR, __VA_ARGS__) #define VLOG_WARN(...) VLOG(VLL_WARN, __VA_ARGS__) #define VLOG_INFO(...) VLOG(VLL_INFO, __VA_ARGS__) #define VLOG_DBG(...) VLOG(VLL_DBG, __VA_ARGS__) /* More convenience macros, for testing whether a given level is enabled in * THIS_MODULE. When constructing a log message is expensive, this enables it * to be skipped. */ #define VLOG_IS_ERR_ENABLED() vlog_is_enabled(THIS_MODULE, VLL_ERR) #define VLOG_IS_WARN_ENABLED() vlog_is_enabled(THIS_MODULE, VLL_WARN) #define VLOG_IS_INFO_ENABLED() vlog_is_enabled(THIS_MODULE, VLL_INFO) #define VLOG_IS_DBG_ENABLED() vlog_is_enabled(THIS_MODULE, VLL_DBG) /* Convenience macros for rate-limiting. * Guaranteed to preserve errno. */ #define VLOG_ERR_RL(RL, ...) VLOG_RL(RL, VLL_ERR, __VA_ARGS__) #define VLOG_WARN_RL(RL, ...) VLOG_RL(RL, VLL_WARN, __VA_ARGS__) #define VLOG_INFO_RL(RL, ...) VLOG_RL(RL, VLL_INFO, __VA_ARGS__) #define VLOG_DBG_RL(RL, ...) VLOG_RL(RL, VLL_DBG, __VA_ARGS__) #define VLOG_DROP_ERR(RL) vlog_should_drop(THIS_MODULE, VLL_ERR, RL) #define VLOG_DROP_WARN(RL) vlog_should_drop(THIS_MODULE, VLL_WARN, RL) #define VLOG_DROP_INFO(RL) vlog_should_drop(THIS_MODULE, VLL_INFO, RL) #define VLOG_DROP_DBG(RL) vlog_should_drop(THIS_MODULE, VLL_DBG, RL) /* Macros for logging at most once per execution. */ #define VLOG_ERR_ONCE(...) VLOG_ONCE(VLL_ERR, __VA_ARGS__) #define VLOG_WARN_ONCE(...) VLOG_ONCE(VLL_WARN, __VA_ARGS__) #define VLOG_INFO_ONCE(...) VLOG_ONCE(VLL_INFO, __VA_ARGS__) #define VLOG_DBG_ONCE(...) VLOG_ONCE(VLL_DBG, __VA_ARGS__) /* Command line processing. */ #define VLOG_OPTION_ENUMS OPT_LOG_FILE #define VLOG_LONG_OPTIONS \ {"verbose", optional_argument, NULL, 'v'}, \ {"log-file", optional_argument, NULL, OPT_LOG_FILE} #define VLOG_OPTION_HANDLERS \ case 'v': \ vlog_set_verbosity(optarg); \ break; \ case OPT_LOG_FILE: \ vlog_set_log_file(optarg); \ break; void vlog_usage(void); /* Implementation details. */ #define VLOG(LEVEL, ...) \ do { \ enum vlog_level level__ = LEVEL; \ if (THIS_MODULE->min_level >= level__) { \ vlog(THIS_MODULE, level__, __VA_ARGS__); \ } \ } while (0) #define VLOG_RL(RL, LEVEL, ...) \ do { \ enum vlog_level level__ = LEVEL; \ if (THIS_MODULE->min_level >= level__) { \ vlog_rate_limit(THIS_MODULE, level__, RL, __VA_ARGS__); \ } \ } while (0) #define VLOG_ONCE(LEVEL, ...) \ do { \ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; \ if (ovsthread_once_start(&once)) { \ vlog(THIS_MODULE, LEVEL, __VA_ARGS__); \ ovsthread_once_done(&once); \ } \ } while (0) #define VLOG_DEFINE_MODULE__(MODULE) \ extern struct vlog_module VLM_##MODULE; \ struct vlog_module VLM_##MODULE = \ { \ #MODULE, /* name */ \ { [ 0 ... VLF_N_FACILITIES - 1] = VLL_INFO }, /* levels */ \ VLL_INFO, /* min_level */ \ true /* honor_rate_limits */ \ }; #ifdef __cplusplus } #endif #endif /* vlog.h */ openvswitch-2.0.1+git20140120/lib/vlog.man000066400000000000000000000036761226605124000177000ustar00rootroot00000000000000.de IQ . br . ns . IP "\\$1" .. .IP "\fB\-v\fR[\fIspec\fR] .IQ "\fB\-\-verbose=\fR[\fIspec\fR] . Sets logging levels. Without any \fIspec\fR, sets the log level for every module and facility to \fBdbg\fR. Otherwise, \fIspec\fR is a list of words separated by spaces or commas or colons, up to one from each category below: . .RS .IP \(bu A valid module name, as displayed by the \fBvlog/list\fR command on \fBovs\-appctl\fR(8), limits the log level change to the specified module. . .IP \(bu \fBsyslog\fR, \fBconsole\fR, or \fBfile\fR, to limit the log level change to only to the system log, to the console, or to a file, respectively. . .IP \(bu \fBoff\fR, \fBemer\fR, \fBerr\fR, \fBwarn\fR, \fBinfo\fR, or \fBdbg\fR, to control the log level. Messages of the given severity or higher will be logged, and messages of lower severity will be filtered out. \fBoff\fR filters out all messages. See \fBovs\-appctl\fR(8) for a definition of each log level. .RE . .IP Case is not significant within \fIspec\fR. .IP Regardless of the log levels set for \fBfile\fR, logging to a file will not take place unless \fB\-\-log\-file\fR is also specified (see below). .IP For compatibility with older versions of OVS, \fBany\fR is accepted as a word but has no effect. . .IP "\fB\-v\fR" .IQ "\fB\-\-verbose\fR" Sets the maximum logging verbosity level, equivalent to \fB\-\-verbose=dbg\fR. . .\" Python vlog doesn't implement -vPATTERN so only document it if .\" \*(PY is empty: .ie dPY .el \{ .IP "\fB\-vPATTERN:\fIfacility\fB:\fIpattern\fR" .IQ "\fB\-\-verbose=PATTERN:\fIfacility\fB:\fIpattern\fR" Sets the log pattern for \fIfacility\fR to \fIpattern\fR. Refer to \fBovs\-appctl\fR(8) for a description of the valid syntax for \fIpattern\fR. \} . .TP \fB\-\-log\-file\fR[\fB=\fIfile\fR] Enables logging to a file. If \fIfile\fR is specified, then it is used as the exact name for the log file. The default log file name used if \fIfile\fR is omitted is \fB@LOGDIR@/\*(PN.log\fR. openvswitch-2.0.1+git20140120/lib/vswitch-idl.ann000066400000000000000000000005401226605124000211520ustar00rootroot00000000000000# -*- python -*- # This code, when invoked by "ovsdb-idlc annotate" (by the build # process), annotates vswitch.ovsschema with additional data that give # the ovsdb-idl engine information about the types involved, so that # it can generate more programmer-friendly data structures. s["idlPrefix"] = "ovsrec_" s["idlHeader"] = "\"lib/vswitch-idl.h\"" openvswitch-2.0.1+git20140120/m4/000077500000000000000000000000001226605124000157725ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/m4/ax_check_openssl.m4000066400000000000000000000077761226605124000215650ustar00rootroot00000000000000# =========================================================================== # http://www.gnu.org/software/autoconf-archive/ax_check_openssl.html # =========================================================================== # # SYNOPSIS # # AX_CHECK_OPENSSL([action-if-found[, action-if-not-found]]) # # DESCRIPTION # # Look for OpenSSL in a number of default spots, or in a user-selected # spot (via --with-openssl). Sets # # SSL_INCLUDES to the include directives required # SSL_LIBS to the -l directives required # SSL_LDFLAGS to the -L or -R flags required # # and calls ACTION-IF-FOUND or ACTION-IF-NOT-FOUND appropriately # # This macro sets SSL_INCLUDES such that source files should use the # openssl/ directory in include directives: # # #include # # LICENSE # # Copyright (c) 2009,2010 Zmanda Inc. # Copyright (c) 2009,2010 Dustin J. Mitchell # # Copying and distribution of this file, with or without modification, are # permitted in any medium without royalty provided the copyright notice # and this notice are preserved. This file is offered as-is, without any # warranty. #serial 8 AU_ALIAS([CHECK_SSL], [AX_CHECK_OPENSSL]) AC_DEFUN([AX_CHECK_OPENSSL], [ found=false AC_ARG_WITH([openssl], [AS_HELP_STRING([--with-openssl=DIR], [root of the OpenSSL directory])], [ case "$withval" in "" | y | ye | yes | n | no) AC_MSG_ERROR([Invalid --with-openssl value]) ;; *) ssldirs="$withval" ;; esac ], [ # if pkg-config is installed and openssl has installed a .pc file, # then use that information and don't search ssldirs AC_PATH_PROG([PKG_CONFIG], [pkg-config]) if test x"$PKG_CONFIG" != x""; then SSL_LDFLAGS=`$PKG_CONFIG openssl --libs-only-L 2>/dev/null` if test $? = 0; then SSL_LIBS=`$PKG_CONFIG openssl --libs-only-l 2>/dev/null` SSL_INCLUDES=`$PKG_CONFIG openssl --cflags-only-I 2>/dev/null` found=true fi fi # no such luck; use some default ssldirs if ! $found; then ssldirs="/usr/local/ssl /usr/lib/ssl /usr/ssl /usr/pkg /usr/local /usr" fi ] ) # note that we #include , so the OpenSSL headers have to be in # an 'openssl' subdirectory if ! $found; then SSL_INCLUDES= for ssldir in $ssldirs; do AC_MSG_CHECKING([for openssl/ssl.h in $ssldir]) if test -f "$ssldir/include/openssl/ssl.h"; then SSL_INCLUDES="-I$ssldir/include" SSL_LDFLAGS="-L$ssldir/lib" SSL_LIBS="-lssl -lcrypto" found=true AC_MSG_RESULT([yes]) break else AC_MSG_RESULT([no]) fi done # if the file wasn't found, well, go ahead and try the link anyway -- maybe # it will just work! fi # try the preprocessor and linker with our new flags, # being careful not to pollute the global LIBS, LDFLAGS, and CPPFLAGS AC_MSG_CHECKING([whether compiling and linking against OpenSSL works]) echo "Trying link with SSL_LDFLAGS=$SSL_LDFLAGS;" \ "SSL_LIBS=$SSL_LIBS; SSL_INCLUDES=$SSL_INCLUDES" >&AS_MESSAGE_LOG_FD save_LIBS="$LIBS" save_LDFLAGS="$LDFLAGS" save_CPPFLAGS="$CPPFLAGS" LDFLAGS="$LDFLAGS $SSL_LDFLAGS" LIBS="$SSL_LIBS $LIBS" CPPFLAGS="$SSL_INCLUDES $CPPFLAGS" AC_LINK_IFELSE( [AC_LANG_PROGRAM([#include ], [SSL_new(NULL)])], [ AC_MSG_RESULT([yes]) $1 ], [ AC_MSG_RESULT([no]) $2 ]) CPPFLAGS="$save_CPPFLAGS" LDFLAGS="$save_LDFLAGS" LIBS="$save_LIBS" AC_SUBST([SSL_INCLUDES]) AC_SUBST([SSL_LIBS]) AC_SUBST([SSL_LDFLAGS]) ]) openvswitch-2.0.1+git20140120/m4/openvswitch.m4000066400000000000000000000454421226605124000206160ustar00rootroot00000000000000# -*- autoconf -*- # Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. dnl Checks for --enable-coverage and updates CFLAGS and LDFLAGS appropriately. AC_DEFUN([OVS_CHECK_COVERAGE], [AC_REQUIRE([AC_PROG_CC]) AC_ARG_ENABLE( [coverage], [AC_HELP_STRING([--enable-coverage], [Enable gcov coverage tool.])], [case "${enableval}" in (yes) coverage=true ;; (no) coverage=false ;; (*) AC_MSG_ERROR([bad value ${enableval} for --enable-coverage]) ;; esac], [coverage=false]) if $coverage; then CFLAGS="$CFLAGS -O0 --coverage" LDFLAGS="$LDFLAGS --coverage" fi]) dnl Checks for --enable-ndebug and defines NDEBUG if it is specified. AC_DEFUN([OVS_CHECK_NDEBUG], [AC_ARG_ENABLE( [ndebug], [AC_HELP_STRING([--enable-ndebug], [Disable debugging features for max performance])], [case "${enableval}" in (yes) ndebug=true ;; (no) ndebug=false ;; (*) AC_MSG_ERROR([bad value ${enableval} for --enable-ndebug]) ;; esac], [ndebug=false]) AM_CONDITIONAL([NDEBUG], [test x$ndebug = xtrue])]) dnl Checks for --enable-cache-time and defines CACHE_TIME if it is specified. AC_DEFUN([OVS_CHECK_CACHE_TIME], [AC_ARG_ENABLE( [cache-time], [AC_HELP_STRING([--enable-cache-time], [Override time caching default (for testing only)])], [case "${enableval}" in (yes) cache_time=1;; (no) cache_time=0;; (*) AC_MSG_ERROR([bad value ${enableval} for --enable-cache-time]) ;; esac AC_DEFINE_UNQUOTED([CACHE_TIME], [$cache_time], [Define to 1 to enable time caching, to 0 to disable time caching, or leave undefined to use the default (as one should ordinarily do).])])]) dnl Checks for ESX. AC_DEFUN([OVS_CHECK_ESX], [AC_CHECK_HEADER([vmware.h], [ESX=yes], [ESX=no]) AM_CONDITIONAL([ESX], [test "$ESX" = yes]) if test "$ESX" = yes; then AC_DEFINE([ESX], [1], [Define to 1 if building on ESX.]) fi]) dnl Checks for Netlink support. AC_DEFUN([OVS_CHECK_NETLINK], [AC_CHECK_HEADER([linux/netlink.h], [HAVE_NETLINK=yes], [HAVE_NETLINK=no], [#include #include ]) AM_CONDITIONAL([HAVE_NETLINK], [test "$HAVE_NETLINK" = yes]) if test "$HAVE_NETLINK" = yes; then AC_DEFINE([HAVE_NETLINK], [1], [Define to 1 if Netlink protocol is available.]) fi]) dnl Checks for OpenSSL. AC_DEFUN([OVS_CHECK_OPENSSL], [AC_ARG_ENABLE( [ssl], [AC_HELP_STRING([--disable-ssl], [Disable OpenSSL support])], [case "${enableval}" in (yes) ssl=true ;; (no) ssl=false ;; (*) AC_MSG_ERROR([bad value ${enableval} for --enable-ssl]) ;; esac], [ssl=check]) if test "$ssl" != false; then AX_CHECK_OPENSSL( [HAVE_OPENSSL=yes], [HAVE_OPENSSL=no if test "$ssl" = check; then AC_MSG_WARN([Cannot find openssl: $SSL_PKG_ERRORS OpenFlow connections over SSL will not be supported. (You may use --disable-ssl to suppress this warning.)]) else AC_MSG_ERROR([Cannot find openssl (use --disable-ssl to configure without SSL support)]) fi]) else HAVE_OPENSSL=no fi AC_SUBST([HAVE_OPENSSL]) AM_CONDITIONAL([HAVE_OPENSSL], [test "$HAVE_OPENSSL" = yes]) if test "$HAVE_OPENSSL" = yes; then AC_DEFINE([HAVE_OPENSSL], [1], [Define to 1 if OpenSSL is installed.]) fi]) dnl Checks for libraries needed by lib/socket-util.c. AC_DEFUN([OVS_CHECK_SOCKET_LIBS], [AC_CHECK_LIB([socket], [connect]) AC_SEARCH_LIBS([gethostbyname], [resolv], [RESOLVER_LIBS=-lresolv])]) dnl Checks for the directory in which to store the PKI. AC_DEFUN([OVS_CHECK_PKIDIR], [AC_ARG_WITH( [pkidir], AC_HELP_STRING([--with-pkidir=DIR], [PKI hierarchy directory [[LOCALSTATEDIR/lib/openvswitch/pki]]]), [PKIDIR=$withval], [PKIDIR='${localstatedir}/lib/openvswitch/pki']) AC_SUBST([PKIDIR])]) dnl Checks for the directory in which to store pidfiles. AC_DEFUN([OVS_CHECK_RUNDIR], [AC_ARG_WITH( [rundir], AC_HELP_STRING([--with-rundir=DIR], [directory used for pidfiles [[LOCALSTATEDIR/run/openvswitch]]]), [RUNDIR=$withval], [RUNDIR='${localstatedir}/run/openvswitch']) AC_SUBST([RUNDIR])]) dnl Checks for the directory in which to store logs. AC_DEFUN([OVS_CHECK_LOGDIR], [AC_ARG_WITH( [logdir], AC_HELP_STRING([--with-logdir=DIR], [directory used for logs [[LOCALSTATEDIR/log/PACKAGE]]]), [LOGDIR=$withval], [LOGDIR='${localstatedir}/log/${PACKAGE}']) AC_SUBST([LOGDIR])]) dnl Checks for the directory in which to store the Open vSwitch database. AC_DEFUN([OVS_CHECK_DBDIR], [AC_ARG_WITH( [dbdir], AC_HELP_STRING([--with-dbdir=DIR], [directory used for conf.db [[SYSCONFDIR/PACKAGE]]]), [DBDIR=$withval], [DBDIR='${sysconfdir}/${PACKAGE}']) AC_SUBST([DBDIR])]) dnl Defines HAVE_BACKTRACE if backtrace() is found. AC_DEFUN([OVS_CHECK_BACKTRACE], [AC_SEARCH_LIBS([backtrace], [execinfo ubacktrace], [AC_DEFINE([HAVE_BACKTRACE], [1], [Define to 1 if you have backtrace(3).])])]) dnl Checks for __malloc_hook, etc., supported by glibc. AC_DEFUN([OVS_CHECK_MALLOC_HOOKS], [AC_CACHE_CHECK( [whether libc supports hooks for malloc and related functions], [ovs_cv_malloc_hooks], [AC_COMPILE_IFELSE( [AC_LANG_PROGRAM( [#include ], [(void) __malloc_hook; (void) __realloc_hook; (void) __free_hook;])], [ovs_cv_malloc_hooks=yes], [ovs_cv_malloc_hooks=no])]) if test $ovs_cv_malloc_hooks = yes; then AC_DEFINE([HAVE_MALLOC_HOOKS], [1], [Define to 1 if you have __malloc_hook, __realloc_hook, and __free_hook in .]) fi]) dnl Checks for valgrind/valgrind.h. AC_DEFUN([OVS_CHECK_VALGRIND], [AC_CHECK_HEADERS([valgrind/valgrind.h])]) dnl Checks for Python 2.x, x >= 4. AC_DEFUN([OVS_CHECK_PYTHON], [AC_CACHE_CHECK( [for Python 2.x for x >= 4], [ovs_cv_python], [if test -n "$PYTHON"; then ovs_cv_python=$PYTHON else ovs_cv_python=no for binary in python python2.4 python2.5; do ovs_save_IFS=$IFS; IFS=$PATH_SEPARATOR for dir in $PATH; do IFS=$ovs_save_IFS test -z "$dir" && dir=. if test -x $dir/$binary && $dir/$binary -c 'import sys if sys.hexversion >= 0x02040000 and sys.hexversion < 0x03000000: sys.exit(0) else: sys.exit(1)'; then ovs_cv_python=$dir/$binary break 2 fi done done fi]) AC_SUBST([HAVE_PYTHON]) AM_MISSING_PROG([PYTHON], [python]) if test $ovs_cv_python != no; then PYTHON=$ovs_cv_python HAVE_PYTHON=yes else HAVE_PYTHON=no fi AM_CONDITIONAL([HAVE_PYTHON], [test "$HAVE_PYTHON" = yes])]) dnl Checks for dot. AC_DEFUN([OVS_CHECK_DOT], [AC_CACHE_CHECK( [for dot], [ovs_cv_dot], [dnl "dot" writes -V output to stderr: if (dot -V) 2>&1 | grep '^dot - [[gG]]raphviz version' >/dev/null 2>&1; then ovs_cv_dot=yes else ovs_cv_dot=no fi]) AM_CONDITIONAL([HAVE_DOT], [test "$ovs_cv_dot" = yes])]) dnl Checks for pyuic4. AC_DEFUN([OVS_CHECK_PYUIC4], [AC_CACHE_CHECK( [for pyuic4], [ovs_cv_pyuic4], [if (pyuic4 --version) >/dev/null 2>&1; then ovs_cv_pyuic4=pyuic4 else ovs_cv_pyuic4=no fi]) AM_MISSING_PROG([PYUIC4], [pyuic4]) if test $ovs_cv_pyuic4 != no; then PYUIC4=$ovs_cv_pyuic4 fi]) dnl Checks whether $PYTHON supports the module given as $1 AC_DEFUN([OVS_CHECK_PYTHON_MODULE], [AC_REQUIRE([OVS_CHECK_PYTHON]) AC_CACHE_CHECK( [for $1 Python module], [ovs_cv_py_[]AS_TR_SH([$1])], [ovs_cv_py_[]AS_TR_SH([$1])=no if test $HAVE_PYTHON = yes; then AS_ECHO(["running $PYTHON -c 'import $1 import sys sys.exit(0)'..."]) >&AS_MESSAGE_LOG_FD 2>&1 if $PYTHON -c 'import $1 import sys sys.exit(0)' >&AS_MESSAGE_LOG_FD 2>&1; then ovs_cv_py_[]AS_TR_SH([$1])=yes fi fi])]) dnl Checks for Python modules needed by ovsdbmonitor. AC_DEFUN([OVS_CHECK_OVSDBMONITOR], [OVS_CHECK_PYTHON_MODULE([PySide.QtCore]) OVS_CHECK_PYTHON_MODULE([PyQt4.QtCore]) OVS_CHECK_PYTHON_MODULE([twisted.conch.ssh]) OVS_CHECK_PYTHON_MODULE([twisted.internet]) OVS_CHECK_PYTHON_MODULE([twisted.application]) OVS_CHECK_PYTHON_MODULE([json]) OVS_CHECK_PYTHON_MODULE([zope.interface]) if (test $ovs_cv_py_PySide_QtCore = yes \ || test $ovs_cv_py_PyQt4_QtCore = yes) \ && test $ovs_cv_py_twisted_conch_ssh = yes \ && test $ovs_cv_py_twisted_internet = yes \ && test $ovs_cv_py_twisted_application = yes \ && test $ovs_cv_py_json = yes \ && test $ovs_cv_py_zope_interface = yes; then BUILD_OVSDBMONITOR=yes else BUILD_OVSDBMONITOR=no fi AC_MSG_CHECKING([whether to build ovsdbmonitor]) AC_MSG_RESULT([$BUILD_OVSDBMONITOR]) AM_CONDITIONAL([BUILD_OVSDBMONITOR], [test $BUILD_OVSDBMONITOR = yes])]) # OVS_LINK2_IFELSE(SOURCE1, SOURCE2, [ACTION-IF-TRUE], [ACTION-IF-FALSE]) # ------------------------------------------------------------- # Based on AC_LINK_IFELSE, but tries to link both SOURCE1 and SOURCE2 # into a program. # # This macro is borrowed from acinclude.m4 in GNU PSPP, which has the # following license: # # Copyright (C) 2005, 2006, 2007, 2009 Free Software Foundation, Inc. # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # m4_define([OVS_LINK2_IFELSE], [m4_ifvaln([$1], [AC_LANG_CONFTEST([$1])])dnl mv conftest.$ac_ext conftest1.$ac_ext m4_ifvaln([$2], [AC_LANG_CONFTEST([$2])])dnl mv conftest.$ac_ext conftest2.$ac_ext rm -f conftest1.$ac_objext conftest2.$ac_objext conftest$ac_exeext ovs_link2='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest1.$ac_ext conftest2.$ac_ext $LIBS >&5' AS_IF([_AC_DO_STDERR($ovs_link2) && { test -z "$ac_[]_AC_LANG_ABBREV[]_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || AS_TEST_X([conftest$ac_exeext]) }], [$3], [echo "$as_me: failed source file 1 of 2 was:" >&5 sed 's/^/| /' conftest1.$ac_ext >&5 echo "$as_me: failed source file 2 of 2 was:" >&5 sed 's/^/| /' conftest2.$ac_ext >&5 $4]) dnl Delete also the IPA/IPO (Inter Procedural Analysis/Optimization) dnl information created by the PGI compiler (conftest_ipa8_conftest.oo), dnl as it would interfere with the next link command. rm -rf conftest.dSYM conftest1.dSYM conftest2.dSYM rm -f core conftest.err conftest1.err conftest2.err rm -f conftest1.$ac_objext conftest2.$ac_objext conftest*_ipa8_conftest*.oo rm -f conftest$ac_exeext rm -f m4_ifval([$1], [conftest1.$ac_ext]) m4_ifval([$2], [conftest1.$ac_ext])[]dnl ])# OVS_LINK2_IFELSE dnl Defines USE_LINKER_SECTIONS to 1 if the compiler supports putting dnl variables in sections with user-defined names and the linker dnl automatically defines __start_SECNAME and __stop_SECNAME symbols dnl that designate the start and end of the sections. AC_DEFUN([OVS_CHECK_LINKER_SECTIONS], [AC_CACHE_CHECK( [for user-defined linker section support], [ovs_cv_use_linker_sections], [OVS_LINK2_IFELSE( [AC_LANG_SOURCE( [int a __attribute__((__section__("mysection"))) = 1; int b __attribute__((__section__("mysection"))) = 2; int c __attribute__((__section__("mysection"))) = 3;])], [AC_LANG_PROGRAM( [#include extern int __start_mysection; extern int __stop_mysection;], [int n_ints = &__stop_mysection - &__start_mysection; int *i; for (i = &__start_mysection; i < &__start_mysection + n_ints; i++) { printf("%d\n", *i); }])], [ovs_cv_use_linker_sections=yes], [ovs_cv_use_linker_sections=no])]) if test $ovs_cv_use_linker_sections = yes; then AC_DEFINE([USE_LINKER_SECTIONS], [1], [Define to 1 if the compiler support putting variables into sections with user-defined names and the linker automatically defines __start_SECNAME and __stop_SECNAME symbols that designate the start and end of the section.]) fi AM_CONDITIONAL( [USE_LINKER_SECTIONS], [test $ovs_cv_use_linker_sections = yes])]) dnl Checks for groff. AC_DEFUN([OVS_CHECK_GROFF], [AC_CACHE_CHECK( [for groff], [ovs_cv_groff], [if (groff -v) >/dev/null 2>&1; then ovs_cv_groff=yes else ovs_cv_groff=no fi]) AM_CONDITIONAL([HAVE_GROFF], [test "$ovs_cv_groff" = yes])]) dnl Checks for thread-local storage support. dnl dnl Checks whether the compiler and linker support the C11 dnl thread_local macro from , and if so defines dnl HAVE_THREAD_LOCAL. If not, checks whether the compiler and linker dnl support the GCC __thread extension, and if so defines dnl HAVE___THREAD. AC_DEFUN([OVS_CHECK_TLS], [AC_CACHE_CHECK( [whether $CC has that supports thread_local], [ovs_cv_thread_local], [AC_LINK_IFELSE( [AC_LANG_PROGRAM([#include static thread_local int var;], [return var;])], [ovs_cv_thread_local=yes], [ovs_cv_thread_local=no])]) if test $ovs_cv_thread_local = yes; then AC_DEFINE([HAVE_THREAD_LOCAL], [1], [Define to 1 if the C compiler and linker supports the C11 thread_local matcro defined in .]) else AC_CACHE_CHECK( [whether $CC supports __thread], [ovs_cv___thread], [AC_LINK_IFELSE( [AC_LANG_PROGRAM([static __thread int var;], [return var;])], [ovs_cv___thread=yes], [ovs_cv___thread=no])]) if test $ovs_cv___thread = yes; then AC_DEFINE([HAVE___THREAD], [1], [Define to 1 if the C compiler and linker supports the GCC __thread extenions.]) fi fi]) dnl OVS_CHECK_ATOMIC_LIBS dnl dnl Check to see if -latomic is need for GCC atomic built-ins. AC_DEFUN([OVS_CHECK_ATOMIC_LIBS], [AC_SEARCH_LIBS([__atomic_load_8], [atomic])]) dnl OVS_CHECK_GCC4_ATOMICS dnl dnl Checks whether the compiler and linker support GCC 4.0+ atomic built-ins. dnl A compile-time only check is not enough because the compiler defers dnl unimplemented built-ins to libgcc, which sometimes also lacks dnl implementations. AC_DEFUN([OVS_CHECK_GCC4_ATOMICS], [AC_CACHE_CHECK( [whether $CC supports GCC 4.0+ atomic built-ins], [ovs_cv_gcc4_atomics], [AC_LINK_IFELSE( [AC_LANG_PROGRAM([[#include #define ovs_assert(expr) if (!(expr)) abort(); #define TEST_ATOMIC_TYPE(TYPE) \ { \ TYPE x = 1; \ TYPE orig; \ \ __sync_synchronize(); \ ovs_assert(x == 1); \ \ __sync_synchronize(); \ x = 3; \ __sync_synchronize(); \ ovs_assert(x == 3); \ \ orig = __sync_fetch_and_add(&x, 1); \ ovs_assert(orig == 3); \ __sync_synchronize(); \ ovs_assert(x == 4); \ \ orig = __sync_fetch_and_sub(&x, 2); \ ovs_assert(orig == 4); \ __sync_synchronize(); \ ovs_assert(x == 2); \ \ orig = __sync_fetch_and_or(&x, 6); \ ovs_assert(orig == 2); \ __sync_synchronize(); \ ovs_assert(x == 6); \ \ orig = __sync_fetch_and_and(&x, 10); \ ovs_assert(orig == 6); \ __sync_synchronize(); \ ovs_assert(x == 2); \ \ orig = __sync_fetch_and_xor(&x, 10); \ ovs_assert(orig == 2); \ __sync_synchronize(); \ ovs_assert(x == 8); \ }]], [dnl TEST_ATOMIC_TYPE(char); TEST_ATOMIC_TYPE(unsigned char); TEST_ATOMIC_TYPE(signed char); TEST_ATOMIC_TYPE(short); TEST_ATOMIC_TYPE(unsigned short); TEST_ATOMIC_TYPE(int); TEST_ATOMIC_TYPE(unsigned int); TEST_ATOMIC_TYPE(long int); TEST_ATOMIC_TYPE(unsigned long int); TEST_ATOMIC_TYPE(long long int); TEST_ATOMIC_TYPE(unsigned long long int); ])], [ovs_cv_gcc4_atomics=yes], [ovs_cv_gcc4_atomics=no])]) if test $ovs_cv_gcc4_atomics = yes; then AC_DEFINE([HAVE_GCC4_ATOMICS], [1], [Define to 1 if the C compiler and linker supports the GCC 4.0+ atomic built-ins.]) fi]) dnl OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE(SIZE) dnl dnl Checks __atomic_always_lock_free(SIZE, 0) AC_DEFUN([OVS_CHECK_ATOMIC_ALWAYS_LOCK_FREE], [AC_CACHE_CHECK( [value of __atomic_always_lock_free($1)], [ovs_cv_atomic_always_lock_free_$1], [AC_COMPUTE_INT( [ovs_cv_atomic_always_lock_free_$1], [__atomic_always_lock_free($1, 0)], [], [ovs_cv_atomic_always_lock_free_$1=unsupported])]) if test ovs_cv_atomic_always_lock_free_$1 != unsupported; then AC_DEFINE_UNQUOTED( [ATOMIC_ALWAYS_LOCK_FREE_$1B], [$ovs_cv_atomic_always_lock_free_$1], [If the C compiler is GCC 4.7 or later, define to the return value of __atomic_always_lock_free($1, 0). If the C compiler is not GCC or is an older version of GCC, the value does not matter.]) fi]) dnl OVS_CHECK_POSIX_AIO AC_DEFUN([OVS_CHECK_POSIX_AIO], [AC_SEARCH_LIBS([aio_write], [rt]) AM_CONDITIONAL([HAVE_POSIX_AIO], [test "$ac_cv_search_aio_write" != no])]) openvswitch-2.0.1+git20140120/manpages.mk000066400000000000000000000124011226605124000175740ustar00rootroot00000000000000# Generated automatically -- do not modify! -*- buffer-read-only: t -*- ovsdb/ovsdb-client.1: \ ovsdb/ovsdb-client.1.in \ lib/common-syn.man \ lib/common.man \ lib/daemon-syn.man \ lib/daemon.man \ lib/ssl-bootstrap-syn.man \ lib/ssl-bootstrap.man \ lib/ssl-syn.man \ lib/ssl.man \ lib/table.man \ lib/vlog-syn.man \ lib/vlog.man \ ovsdb/remote-active.man \ ovsdb/remote-passive.man ovsdb/ovsdb-client.1.in: lib/common-syn.man: lib/common.man: lib/daemon-syn.man: lib/daemon.man: lib/ssl-bootstrap-syn.man: lib/ssl-bootstrap.man: lib/ssl-syn.man: lib/ssl.man: lib/table.man: lib/vlog-syn.man: lib/vlog.man: ovsdb/remote-active.man: ovsdb/remote-passive.man: ovsdb/ovsdb-server.1: \ ovsdb/ovsdb-server.1.in \ lib/common-syn.man \ lib/common.man \ lib/coverage-unixctl.man \ lib/daemon-syn.man \ lib/daemon.man \ lib/memory-unixctl.man \ lib/ssl-bootstrap-syn.man \ lib/ssl-bootstrap.man \ lib/ssl-syn.man \ lib/ssl.man \ lib/unixctl-syn.man \ lib/unixctl.man \ lib/vlog-syn.man \ lib/vlog-unixctl.man \ lib/vlog.man \ ovsdb/remote-active.man \ ovsdb/remote-passive.man ovsdb/ovsdb-server.1.in: lib/common-syn.man: lib/common.man: lib/coverage-unixctl.man: lib/daemon-syn.man: lib/daemon.man: lib/memory-unixctl.man: lib/ssl-bootstrap-syn.man: lib/ssl-bootstrap.man: lib/ssl-syn.man: lib/ssl.man: lib/unixctl-syn.man: lib/unixctl.man: lib/vlog-syn.man: lib/vlog-unixctl.man: lib/vlog.man: ovsdb/remote-active.man: ovsdb/remote-passive.man: ovsdb/ovsdb-tool.1: \ ovsdb/ovsdb-tool.1.in \ lib/common-syn.man \ lib/common.man \ lib/vlog-syn.man \ lib/vlog.man ovsdb/ovsdb-tool.1.in: lib/common-syn.man: lib/common.man: lib/vlog-syn.man: lib/vlog.man: utilities/bugtool/ovs-bugtool.8: \ utilities/bugtool/ovs-bugtool.8.in utilities/bugtool/ovs-bugtool.8.in: utilities/ovs-appctl.8: \ utilities/ovs-appctl.8.in \ lib/common.man utilities/ovs-appctl.8.in: lib/common.man: utilities/ovs-benchmark.1: \ utilities/ovs-benchmark.1.in \ lib/ovs.tmac utilities/ovs-benchmark.1.in: lib/ovs.tmac: utilities/ovs-controller.8: \ utilities/ovs-controller.8.in \ lib/common.man \ lib/daemon.man \ lib/ssl-peer-ca-cert.man \ lib/ssl.man \ lib/unixctl.man \ lib/vconn-active.man \ lib/vconn-passive.man \ lib/vlog.man utilities/ovs-controller.8.in: lib/common.man: lib/daemon.man: lib/ssl-peer-ca-cert.man: lib/ssl.man: lib/unixctl.man: lib/vconn-active.man: lib/vconn-passive.man: lib/vlog.man: utilities/ovs-dpctl-top.8: \ utilities/ovs-dpctl-top.8.in utilities/ovs-dpctl-top.8.in: utilities/ovs-dpctl.8: \ utilities/ovs-dpctl.8.in \ lib/common.man \ lib/vlog.man utilities/ovs-dpctl.8.in: lib/common.man: lib/vlog.man: utilities/ovs-l3ping.8: \ utilities/ovs-l3ping.8.in \ lib/common-syn.man \ lib/common.man utilities/ovs-l3ping.8.in: lib/common-syn.man: lib/common.man: utilities/ovs-ofctl.8: \ utilities/ovs-ofctl.8.in \ lib/common.man \ lib/daemon.man \ lib/ofp-version.man \ lib/ssl.man \ lib/vconn-active.man \ lib/vlog.man utilities/ovs-ofctl.8.in: lib/common.man: lib/daemon.man: lib/ofp-version.man: lib/ssl.man: lib/vconn-active.man: lib/vlog.man: utilities/ovs-pcap.1: \ utilities/ovs-pcap.1.in \ lib/common-syn.man \ lib/common.man utilities/ovs-pcap.1.in: lib/common-syn.man: lib/common.man: utilities/ovs-pki.8: \ utilities/ovs-pki.8.in utilities/ovs-pki.8.in: utilities/ovs-tcpundump.1: \ utilities/ovs-tcpundump.1.in \ lib/common-syn.man \ lib/common.man utilities/ovs-tcpundump.1.in: lib/common-syn.man: lib/common.man: utilities/ovs-test.8: \ utilities/ovs-test.8.in \ lib/common-syn.man \ lib/common.man \ utilities/ovs-vlan-bugs.man utilities/ovs-test.8.in: lib/common-syn.man: lib/common.man: utilities/ovs-vlan-bugs.man: utilities/ovs-vlan-bug-workaround.8: \ utilities/ovs-vlan-bug-workaround.8.in \ lib/common.man \ utilities/ovs-vlan-bugs.man utilities/ovs-vlan-bug-workaround.8.in: lib/common.man: utilities/ovs-vlan-bugs.man: utilities/ovs-vlan-test.8: \ utilities/ovs-vlan-test.8.in \ lib/common-syn.man \ lib/common.man \ utilities/ovs-vlan-bugs.man utilities/ovs-vlan-test.8.in: lib/common-syn.man: lib/common.man: utilities/ovs-vlan-bugs.man: utilities/ovs-vsctl.8: \ utilities/ovs-vsctl.8.in \ lib/ssl-bootstrap.man \ lib/ssl-peer-ca-cert.man \ lib/ssl.man \ lib/table.man \ lib/vconn-active.man \ lib/vconn-passive.man \ lib/vlog.man \ ovsdb/remote-active.man \ ovsdb/remote-active.man \ ovsdb/remote-passive.man \ ovsdb/remote-passive.man utilities/ovs-vsctl.8.in: lib/ssl-bootstrap.man: lib/ssl-peer-ca-cert.man: lib/ssl.man: lib/table.man: lib/vconn-active.man: lib/vconn-passive.man: lib/vlog.man: ovsdb/remote-active.man: ovsdb/remote-active.man: ovsdb/remote-passive.man: ovsdb/remote-passive.man: vswitchd/ovs-vswitchd.8: \ vswitchd/ovs-vswitchd.8.in \ lib/common.man \ lib/coverage-unixctl.man \ lib/daemon.man \ lib/memory-unixctl.man \ lib/ssl-bootstrap.man \ lib/ssl.man \ lib/vlog-unixctl.man \ lib/vlog.man \ ofproto/ofproto-dpif-unixctl.man \ ofproto/ofproto-unixctl.man \ ovsdb/remote-active.man \ ovsdb/remote-passive.man vswitchd/ovs-vswitchd.8.in: lib/common.man: lib/coverage-unixctl.man: lib/daemon.man: lib/memory-unixctl.man: lib/ssl-bootstrap.man: lib/ssl.man: lib/vlog-unixctl.man: lib/vlog.man: ofproto/ofproto-dpif-unixctl.man: ofproto/ofproto-unixctl.man: ovsdb/remote-active.man: ovsdb/remote-passive.man: openvswitch-2.0.1+git20140120/ofproto/000077500000000000000000000000001226605124000171425ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/ofproto/.gitignore000066400000000000000000000000531226605124000211300ustar00rootroot00000000000000/Makefile /Makefile.in /ipfix-entities.def openvswitch-2.0.1+git20140120/ofproto/automake.mk000066400000000000000000000035301226605124000213020ustar00rootroot00000000000000# Copyright (C) 2009, 2010, 2011, 2012 Nicira, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. This file is offered as-is, # without warranty of any kind. noinst_LIBRARIES += ofproto/libofproto.a ofproto_libofproto_a_SOURCES = \ ofproto/collectors.c \ ofproto/collectors.h \ ofproto/connmgr.c \ ofproto/connmgr.h \ ofproto/fail-open.c \ ofproto/fail-open.h \ ofproto/in-band.c \ ofproto/in-band.h \ ofproto/names.c \ ofproto/netflow.c \ ofproto/netflow.h \ ofproto/ofproto.c \ ofproto/ofproto.h \ ofproto/ofproto-dpif.c \ ofproto/ofproto-dpif.h \ ofproto/ofproto-dpif-governor.c \ ofproto/ofproto-dpif-governor.h \ ofproto/ofproto-dpif-ipfix.c \ ofproto/ofproto-dpif-ipfix.h \ ofproto/ofproto-dpif-mirror.c \ ofproto/ofproto-dpif-mirror.h \ ofproto/ofproto-dpif-sflow.c \ ofproto/ofproto-dpif-sflow.h \ ofproto/ofproto-dpif-upcall.c \ ofproto/ofproto-dpif-upcall.h \ ofproto/ofproto-dpif-xlate.c \ ofproto/ofproto-dpif-xlate.h \ ofproto/ofproto-provider.h \ ofproto/pktbuf.c \ ofproto/pktbuf.h \ ofproto/pinsched.c \ ofproto/pinsched.h \ ofproto/tunnel.c \ ofproto/tunnel.h # Distribute this generated file in order not to require Python at # build time if ofproto/ipfix.xml is not modified. ofproto_libofproto_a_SOURCES += ofproto/ipfix-entities.def BUILT_SOURCES += ofproto/ipfix-entities.def CLEANFILES += ofproto/ipfix-entities.def MAN_FRAGMENTS += ofproto/ofproto-unixctl.man ofproto/ofproto-dpif-unixctl.man # IPFIX entity definition macros generation from IANA's XML definition. EXTRA_DIST += ofproto/ipfix.xml dist_noinst_SCRIPTS = ofproto/ipfix-gen-entities ofproto/ipfix-entities.def: ofproto/ipfix.xml ofproto/ipfix-gen-entities $(run_python) $(srcdir)/ofproto/ipfix-gen-entities $< > $@ openvswitch-2.0.1+git20140120/ofproto/collectors.c000066400000000000000000000070031226605124000214570ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "collectors.h" #include #include #include #include #include "socket-util.h" #include "sset.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(collectors); struct collectors { int *fds; /* Sockets. */ size_t n_fds; /* Number of sockets. */ }; /* Opens the targets specified in 'targets' for sending UDP packets. This is * useful for e.g. sending NetFlow or sFlow packets. Returns 0 if successful, * otherwise a positive errno value if opening at least one collector failed. * * Each target in 'targets' should be a string in the format "[:]". * may be omitted if 'default_port' is nonzero, in which case it * defaults to 'default_port'. * * '*collectorsp' is set to a null pointer if no targets were successfully * added, otherwise to a new collectors object if at least one was successfully * added. Thus, even on a failure return, it is possible that '*collectorsp' * is nonnull, and even on a successful return, it is possible that * '*collectorsp' is null, if 'target's is an empty sset. */ int collectors_create(const struct sset *targets, uint16_t default_port, struct collectors **collectorsp) { struct collectors *c; const char *name; int retval = 0; c = xmalloc(sizeof *c); c->fds = xmalloc(sizeof *c->fds * sset_count(targets)); c->n_fds = 0; SSET_FOR_EACH (name, targets) { int error; int fd; error = inet_open_active(SOCK_DGRAM, name, default_port, NULL, &fd, 0); if (fd >= 0) { c->fds[c->n_fds++] = fd; } else { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_WARN_RL(&rl, "couldn't open connection to collector %s (%s)", name, ovs_strerror(error)); if (!retval) { retval = error; } } } if (c->n_fds) { *collectorsp = c; } else { collectors_destroy(c); *collectorsp = NULL; } return retval; } /* Destroys 'c'. */ void collectors_destroy(struct collectors *c) { if (c) { size_t i; for (i = 0; i < c->n_fds; i++) { close(c->fds[i]); } free(c->fds); free(c); } } /* Sends the 'n'-byte 'payload' to each of the collectors in 'c'. */ void collectors_send(const struct collectors *c, const void *payload, size_t n) { if (c) { size_t i; for (i = 0; i < c->n_fds; i++) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); if (send(c->fds[i], payload, n, 0) == -1) { VLOG_WARN_RL(&rl, "sending to collector failed: %s", ovs_strerror(errno)); } } } } int collectors_count(const struct collectors *c) { return c ? c->n_fds : 0; } openvswitch-2.0.1+git20140120/ofproto/collectors.h000066400000000000000000000020201226605124000214560ustar00rootroot00000000000000/* * Copyright (c) 2009, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef COLLECTORS_H #define COLLECTORS_H 1 #include #include struct collectors; struct sset; int collectors_create(const struct sset *targets, uint16_t default_port, struct collectors **); void collectors_destroy(struct collectors *); void collectors_send(const struct collectors *, const void *, size_t); int collectors_count(const struct collectors *); #endif /* collectors.h */ openvswitch-2.0.1+git20140120/ofproto/connmgr.c000066400000000000000000002017161226605124000207600ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "connmgr.h" #include #include #include "coverage.h" #include "fail-open.h" #include "in-band.h" #include "odp-util.h" #include "ofp-actions.h" #include "ofp-msgs.h" #include "ofp-util.h" #include "ofpbuf.h" #include "ofproto-provider.h" #include "pinsched.h" #include "poll-loop.h" #include "pktbuf.h" #include "rconn.h" #include "shash.h" #include "simap.h" #include "stream.h" #include "timeval.h" #include "vconn.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(connmgr); static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); /* An OpenFlow connection. * * * Thread-safety * ============= * * 'ofproto_mutex' must be held whenever an ofconn is created or destroyed or, * more or less equivalently, whenever an ofconn is added to or removed from a * connmgr. 'ofproto_mutex' doesn't protect the data inside the ofconn, except * as specifically noted below. */ struct ofconn { /* Configuration that persists from one connection to the next. */ struct list node; /* In struct connmgr's "all_conns" list. */ struct hmap_node hmap_node; /* In struct connmgr's "controllers" map. */ struct connmgr *connmgr; /* Connection's manager. */ struct rconn *rconn; /* OpenFlow connection. */ enum ofconn_type type; /* Type. */ enum ofproto_band band; /* In-band or out-of-band? */ bool enable_async_msgs; /* Initially enable async messages? */ /* State that should be cleared from one connection to the next. */ /* OpenFlow state. */ enum ofp12_controller_role role; /* Role. */ enum ofputil_protocol protocol; /* Current protocol variant. */ enum nx_packet_in_format packet_in_format; /* OFPT_PACKET_IN format. */ /* Asynchronous flow table operation support. */ struct list opgroups; /* Contains pending "ofopgroups", if any. */ struct ofpbuf *blocked; /* Postponed OpenFlow message, if any. */ bool retry; /* True if 'blocked' is ready to try again. */ /* OFPT_PACKET_IN related data. */ struct rconn_packet_counter *packet_in_counter; /* # queued on 'rconn'. */ #define N_SCHEDULERS 2 struct pinsched *schedulers[N_SCHEDULERS]; struct pktbuf *pktbuf; /* OpenFlow packet buffers. */ int miss_send_len; /* Bytes to send of buffered packets. */ uint16_t controller_id; /* Connection controller ID. */ /* Number of OpenFlow messages queued on 'rconn' as replies to OpenFlow * requests, and the maximum number before we stop reading OpenFlow * requests. */ #define OFCONN_REPLY_MAX 100 struct rconn_packet_counter *reply_counter; /* Asynchronous message configuration in each possible roles. * * A 1-bit enables sending an asynchronous message for one possible reason * that the message might be generated, a 0-bit disables it. */ uint32_t master_async_config[OAM_N_TYPES]; /* master, other */ uint32_t slave_async_config[OAM_N_TYPES]; /* slave */ /* Flow monitors (e.g. NXST_FLOW_MONITOR). */ /* Configuration. Contains "struct ofmonitor"s. */ struct hmap monitors OVS_GUARDED_BY(ofproto_mutex); /* Flow control. * * When too many flow monitor notifications back up in the transmit buffer, * we pause the transmission of further notifications. These members track * the flow control state. * * When notifications are flowing, 'monitor_paused' is 0. When * notifications are paused, 'monitor_paused' is the value of * 'monitor_seqno' at the point we paused. * * 'monitor_counter' counts the OpenFlow messages and bytes currently in * flight. This value growing too large triggers pausing. */ uint64_t monitor_paused OVS_GUARDED_BY(ofproto_mutex); struct rconn_packet_counter *monitor_counter OVS_GUARDED_BY(ofproto_mutex); /* State of monitors for a single ongoing flow_mod. * * 'updates' is a list of "struct ofpbuf"s that contain * NXST_FLOW_MONITOR_REPLY messages representing the changes made by the * current flow_mod. * * When 'updates' is nonempty, 'sent_abbrev_update' is true if 'updates' * contains an update event of type NXFME_ABBREV and false otherwise.. */ struct list updates OVS_GUARDED_BY(ofproto_mutex); bool sent_abbrev_update OVS_GUARDED_BY(ofproto_mutex); }; static struct ofconn *ofconn_create(struct connmgr *, struct rconn *, enum ofconn_type, bool enable_async_msgs) OVS_REQUIRES(ofproto_mutex); static void ofconn_destroy(struct ofconn *) OVS_REQUIRES(ofproto_mutex); static void ofconn_flush(struct ofconn *) OVS_REQUIRES(ofproto_mutex); static void ofconn_reconfigure(struct ofconn *, const struct ofproto_controller *); static void ofconn_run(struct ofconn *, bool (*handle_openflow)(struct ofconn *, const struct ofpbuf *ofp_msg)); static void ofconn_wait(struct ofconn *, bool handling_openflow); static const char *ofconn_get_target(const struct ofconn *); static char *ofconn_make_name(const struct connmgr *, const char *target); static void ofconn_set_rate_limit(struct ofconn *, int rate, int burst); static void ofconn_send(const struct ofconn *, struct ofpbuf *, struct rconn_packet_counter *); static void do_send_packet_in(struct ofpbuf *, void *ofconn_); /* A listener for incoming OpenFlow "service" connections. */ struct ofservice { struct hmap_node node; /* In struct connmgr's "services" hmap. */ struct pvconn *pvconn; /* OpenFlow connection listener. */ /* These are not used by ofservice directly. They are settings for * accepted "struct ofconn"s from the pvconn. */ int probe_interval; /* Max idle time before probing, in seconds. */ int rate_limit; /* Max packet-in rate in packets per second. */ int burst_limit; /* Limit on accumulating packet credits. */ bool enable_async_msgs; /* Initially enable async messages? */ uint8_t dscp; /* DSCP Value for controller connection */ uint32_t allowed_versions; /* OpenFlow protocol versions that may * be negotiated for a session. */ }; static void ofservice_reconfigure(struct ofservice *, const struct ofproto_controller *); static int ofservice_create(struct connmgr *mgr, const char *target, uint32_t allowed_versions, uint8_t dscp); static void ofservice_destroy(struct connmgr *, struct ofservice *); static struct ofservice *ofservice_lookup(struct connmgr *, const char *target); /* Connection manager for an OpenFlow switch. */ struct connmgr { struct ofproto *ofproto; char *name; char *local_port_name; /* OpenFlow connections. */ struct hmap controllers; /* Controller "struct ofconn"s. */ struct list all_conns; /* Contains "struct ofconn"s. */ uint64_t master_election_id; /* monotonically increasing sequence number * for master election */ bool master_election_id_defined; /* OpenFlow listeners. */ struct hmap services; /* Contains "struct ofservice"s. */ struct pvconn **snoops; size_t n_snoops; /* Fail open. */ struct fail_open *fail_open; enum ofproto_fail_mode fail_mode; /* In-band control. */ struct in_band *in_band; struct sockaddr_in *extra_in_band_remotes; size_t n_extra_remotes; int in_band_queue; }; static void update_in_band_remotes(struct connmgr *); static void add_snooper(struct connmgr *, struct vconn *); static void ofmonitor_run(struct connmgr *); static void ofmonitor_wait(struct connmgr *); /* Creates and returns a new connection manager owned by 'ofproto'. 'name' is * a name for the ofproto suitable for using in log messages. * 'local_port_name' is the name of the local port (OFPP_LOCAL) within * 'ofproto'. */ struct connmgr * connmgr_create(struct ofproto *ofproto, const char *name, const char *local_port_name) { struct connmgr *mgr; mgr = xmalloc(sizeof *mgr); mgr->ofproto = ofproto; mgr->name = xstrdup(name); mgr->local_port_name = xstrdup(local_port_name); hmap_init(&mgr->controllers); list_init(&mgr->all_conns); mgr->master_election_id = 0; mgr->master_election_id_defined = false; hmap_init(&mgr->services); mgr->snoops = NULL; mgr->n_snoops = 0; mgr->fail_open = NULL; mgr->fail_mode = OFPROTO_FAIL_SECURE; mgr->in_band = NULL; mgr->extra_in_band_remotes = NULL; mgr->n_extra_remotes = 0; mgr->in_band_queue = -1; return mgr; } /* Frees 'mgr' and all of its resources. */ void connmgr_destroy(struct connmgr *mgr) { struct ofservice *ofservice, *next_ofservice; struct ofconn *ofconn, *next_ofconn; size_t i; if (!mgr) { return; } ovs_mutex_lock(&ofproto_mutex); LIST_FOR_EACH_SAFE (ofconn, next_ofconn, node, &mgr->all_conns) { ofconn_destroy(ofconn); } ovs_mutex_unlock(&ofproto_mutex); hmap_destroy(&mgr->controllers); HMAP_FOR_EACH_SAFE (ofservice, next_ofservice, node, &mgr->services) { ofservice_destroy(mgr, ofservice); } hmap_destroy(&mgr->services); for (i = 0; i < mgr->n_snoops; i++) { pvconn_close(mgr->snoops[i]); } free(mgr->snoops); fail_open_destroy(mgr->fail_open); mgr->fail_open = NULL; in_band_destroy(mgr->in_band); mgr->in_band = NULL; free(mgr->extra_in_band_remotes); free(mgr->name); free(mgr->local_port_name); free(mgr); } /* Does all of the periodic maintenance required by 'mgr'. * * If 'handle_openflow' is nonnull, calls 'handle_openflow' for each message * received on an OpenFlow connection, passing along the OpenFlow connection * itself and the message that was sent. If 'handle_openflow' returns true, * the message is considered to be fully processed. If 'handle_openflow' * returns false, the message is considered not to have been processed at all; * it will be stored and re-presented to 'handle_openflow' following the next * call to connmgr_retry(). 'handle_openflow' must not modify or free the * message. * * If 'handle_openflow' is NULL, no OpenFlow messages will be processed and * other activities that could affect the flow table (in-band processing, * fail-open processing) are suppressed too. */ void connmgr_run(struct connmgr *mgr, bool (*handle_openflow)(struct ofconn *, const struct ofpbuf *ofp_msg)) OVS_EXCLUDED(ofproto_mutex) { struct ofconn *ofconn, *next_ofconn; struct ofservice *ofservice; size_t i; if (handle_openflow && mgr->in_band) { if (!in_band_run(mgr->in_band)) { in_band_destroy(mgr->in_band); mgr->in_band = NULL; } } LIST_FOR_EACH_SAFE (ofconn, next_ofconn, node, &mgr->all_conns) { ofconn_run(ofconn, handle_openflow); } ofmonitor_run(mgr); /* Fail-open maintenance. Do this after processing the ofconns since * fail-open checks the status of the controller rconn. */ if (handle_openflow && mgr->fail_open) { fail_open_run(mgr->fail_open); } HMAP_FOR_EACH (ofservice, node, &mgr->services) { struct vconn *vconn; int retval; retval = pvconn_accept(ofservice->pvconn, &vconn); if (!retval) { struct rconn *rconn; char *name; /* Passing default value for creation of the rconn */ rconn = rconn_create(ofservice->probe_interval, 0, ofservice->dscp, vconn_get_allowed_versions(vconn)); name = ofconn_make_name(mgr, vconn_get_name(vconn)); rconn_connect_unreliably(rconn, vconn, name); free(name); ovs_mutex_lock(&ofproto_mutex); ofconn = ofconn_create(mgr, rconn, OFCONN_SERVICE, ofservice->enable_async_msgs); ovs_mutex_unlock(&ofproto_mutex); ofconn_set_rate_limit(ofconn, ofservice->rate_limit, ofservice->burst_limit); } else if (retval != EAGAIN) { VLOG_WARN_RL(&rl, "accept failed (%s)", ovs_strerror(retval)); } } for (i = 0; i < mgr->n_snoops; i++) { struct vconn *vconn; int retval; retval = pvconn_accept(mgr->snoops[i], &vconn); if (!retval) { add_snooper(mgr, vconn); } else if (retval != EAGAIN) { VLOG_WARN_RL(&rl, "accept failed (%s)", ovs_strerror(retval)); } } } /* Causes the poll loop to wake up when connmgr_run() needs to run. * * If 'handling_openflow' is true, arriving OpenFlow messages and other * activities that affect the flow table will wake up the poll loop. If * 'handling_openflow' is false, they will not. */ void connmgr_wait(struct connmgr *mgr, bool handling_openflow) { struct ofservice *ofservice; struct ofconn *ofconn; size_t i; LIST_FOR_EACH (ofconn, node, &mgr->all_conns) { ofconn_wait(ofconn, handling_openflow); } ofmonitor_wait(mgr); if (handling_openflow && mgr->in_band) { in_band_wait(mgr->in_band); } if (handling_openflow && mgr->fail_open) { fail_open_wait(mgr->fail_open); } HMAP_FOR_EACH (ofservice, node, &mgr->services) { pvconn_wait(ofservice->pvconn); } for (i = 0; i < mgr->n_snoops; i++) { pvconn_wait(mgr->snoops[i]); } } /* Adds some memory usage statistics for 'mgr' into 'usage', for use with * memory_report(). */ void connmgr_get_memory_usage(const struct connmgr *mgr, struct simap *usage) { const struct ofconn *ofconn; unsigned int packets = 0; unsigned int ofconns = 0; LIST_FOR_EACH (ofconn, node, &mgr->all_conns) { int i; ofconns++; packets += rconn_count_txqlen(ofconn->rconn); for (i = 0; i < N_SCHEDULERS; i++) { packets += pinsched_count_txqlen(ofconn->schedulers[i]); } packets += pktbuf_count_packets(ofconn->pktbuf); } simap_increase(usage, "ofconns", ofconns); simap_increase(usage, "packets", packets); } /* Returns the ofproto that owns 'ofconn''s connmgr. */ struct ofproto * ofconn_get_ofproto(const struct ofconn *ofconn) { return ofconn->connmgr->ofproto; } /* If processing of OpenFlow messages was blocked on any 'mgr' ofconns by * returning false to the 'handle_openflow' callback to connmgr_run(), this * re-enables them. */ void connmgr_retry(struct connmgr *mgr) { struct ofconn *ofconn; LIST_FOR_EACH (ofconn, node, &mgr->all_conns) { ofconn->retry = true; } } /* OpenFlow configuration. */ static void add_controller(struct connmgr *, const char *target, uint8_t dscp, uint32_t allowed_versions) OVS_REQUIRES(ofproto_mutex); static struct ofconn *find_controller_by_target(struct connmgr *, const char *target); static void update_fail_open(struct connmgr *); static int set_pvconns(struct pvconn ***pvconnsp, size_t *n_pvconnsp, const struct sset *); /* Returns true if 'mgr' has any configured primary controllers. * * Service controllers do not count, but configured primary controllers do * count whether or not they are currently connected. */ bool connmgr_has_controllers(const struct connmgr *mgr) { return !hmap_is_empty(&mgr->controllers); } /* Initializes 'info' and populates it with information about each configured * primary controller. The keys in 'info' are the controllers' targets; the * data values are corresponding "struct ofproto_controller_info". * * The caller owns 'info' and everything in it and should free it when it is no * longer needed. */ void connmgr_get_controller_info(struct connmgr *mgr, struct shash *info) { const struct ofconn *ofconn; HMAP_FOR_EACH (ofconn, hmap_node, &mgr->controllers) { const struct rconn *rconn = ofconn->rconn; const char *target = rconn_get_target(rconn); if (!shash_find(info, target)) { struct ofproto_controller_info *cinfo = xmalloc(sizeof *cinfo); time_t now = time_now(); time_t last_connection = rconn_get_last_connection(rconn); time_t last_disconnect = rconn_get_last_disconnect(rconn); int last_error = rconn_get_last_error(rconn); shash_add(info, target, cinfo); cinfo->is_connected = rconn_is_connected(rconn); cinfo->role = ofconn->role; cinfo->pairs.n = 0; if (last_error) { cinfo->pairs.keys[cinfo->pairs.n] = "last_error"; cinfo->pairs.values[cinfo->pairs.n++] = xstrdup(ovs_retval_to_string(last_error)); } cinfo->pairs.keys[cinfo->pairs.n] = "state"; cinfo->pairs.values[cinfo->pairs.n++] = xstrdup(rconn_get_state(rconn)); if (last_connection != TIME_MIN) { cinfo->pairs.keys[cinfo->pairs.n] = "sec_since_connect"; cinfo->pairs.values[cinfo->pairs.n++] = xasprintf("%ld", (long int) (now - last_connection)); } if (last_disconnect != TIME_MIN) { cinfo->pairs.keys[cinfo->pairs.n] = "sec_since_disconnect"; cinfo->pairs.values[cinfo->pairs.n++] = xasprintf("%ld", (long int) (now - last_disconnect)); } } } } void connmgr_free_controller_info(struct shash *info) { struct shash_node *node; SHASH_FOR_EACH (node, info) { struct ofproto_controller_info *cinfo = node->data; while (cinfo->pairs.n) { free(CONST_CAST(char *, cinfo->pairs.values[--cinfo->pairs.n])); } free(cinfo); } shash_destroy(info); } /* Changes 'mgr''s set of controllers to the 'n_controllers' controllers in * 'controllers'. */ void connmgr_set_controllers(struct connmgr *mgr, const struct ofproto_controller *controllers, size_t n_controllers, uint32_t allowed_versions) OVS_EXCLUDED(ofproto_mutex) { bool had_controllers = connmgr_has_controllers(mgr); struct shash new_controllers; struct ofconn *ofconn, *next_ofconn; struct ofservice *ofservice, *next_ofservice; size_t i; /* Required to add and remove ofconns. This could probably be narrowed to * cover a smaller amount of code, if that yielded some benefit. */ ovs_mutex_lock(&ofproto_mutex); /* Create newly configured controllers and services. * Create a name to ofproto_controller mapping in 'new_controllers'. */ shash_init(&new_controllers); for (i = 0; i < n_controllers; i++) { const struct ofproto_controller *c = &controllers[i]; if (!vconn_verify_name(c->target)) { bool add = false; ofconn = find_controller_by_target(mgr, c->target); if (!ofconn) { VLOG_INFO("%s: added primary controller \"%s\"", mgr->name, c->target); add = true; } else if (rconn_get_allowed_versions(ofconn->rconn) != allowed_versions) { VLOG_INFO("%s: re-added primary controller \"%s\"", mgr->name, c->target); add = true; ofconn_destroy(ofconn); } if (add) { add_controller(mgr, c->target, c->dscp, allowed_versions); } } else if (!pvconn_verify_name(c->target)) { bool add = false; ofservice = ofservice_lookup(mgr, c->target); if (!ofservice) { VLOG_INFO("%s: added service controller \"%s\"", mgr->name, c->target); add = true; } else if (ofservice->allowed_versions != allowed_versions) { VLOG_INFO("%s: re-added service controller \"%s\"", mgr->name, c->target); ofservice_destroy(mgr, ofservice); add = true; } if (add) { ofservice_create(mgr, c->target, allowed_versions, c->dscp); } } else { VLOG_WARN_RL(&rl, "%s: unsupported controller \"%s\"", mgr->name, c->target); continue; } shash_add_once(&new_controllers, c->target, &controllers[i]); } /* Delete controllers that are no longer configured. * Update configuration of all now-existing controllers. */ HMAP_FOR_EACH_SAFE (ofconn, next_ofconn, hmap_node, &mgr->controllers) { const char *target = ofconn_get_target(ofconn); struct ofproto_controller *c; c = shash_find_data(&new_controllers, target); if (!c) { VLOG_INFO("%s: removed primary controller \"%s\"", mgr->name, target); ofconn_destroy(ofconn); } else { ofconn_reconfigure(ofconn, c); } } /* Delete services that are no longer configured. * Update configuration of all now-existing services. */ HMAP_FOR_EACH_SAFE (ofservice, next_ofservice, node, &mgr->services) { const char *target = pvconn_get_name(ofservice->pvconn); struct ofproto_controller *c; c = shash_find_data(&new_controllers, target); if (!c) { VLOG_INFO("%s: removed service controller \"%s\"", mgr->name, target); ofservice_destroy(mgr, ofservice); } else { ofservice_reconfigure(ofservice, c); } } shash_destroy(&new_controllers); ovs_mutex_unlock(&ofproto_mutex); update_in_band_remotes(mgr); update_fail_open(mgr); if (had_controllers != connmgr_has_controllers(mgr)) { ofproto_flush_flows(mgr->ofproto); } } /* Drops the connections between 'mgr' and all of its primary and secondary * controllers, forcing them to reconnect. */ void connmgr_reconnect(const struct connmgr *mgr) { struct ofconn *ofconn; LIST_FOR_EACH (ofconn, node, &mgr->all_conns) { rconn_reconnect(ofconn->rconn); } } /* Sets the "snoops" for 'mgr' to the pvconn targets listed in 'snoops'. * * A "snoop" is a pvconn to which every OpenFlow message to or from the most * important controller on 'mgr' is mirrored. */ int connmgr_set_snoops(struct connmgr *mgr, const struct sset *snoops) { return set_pvconns(&mgr->snoops, &mgr->n_snoops, snoops); } /* Adds each of the snoops currently configured on 'mgr' to 'snoops'. */ void connmgr_get_snoops(const struct connmgr *mgr, struct sset *snoops) { size_t i; for (i = 0; i < mgr->n_snoops; i++) { sset_add(snoops, pvconn_get_name(mgr->snoops[i])); } } /* Returns true if 'mgr' has at least one snoop, false if it has none. */ bool connmgr_has_snoops(const struct connmgr *mgr) { return mgr->n_snoops > 0; } /* Creates a new controller for 'target' in 'mgr'. update_controller() needs * to be called later to finish the new ofconn's configuration. */ static void add_controller(struct connmgr *mgr, const char *target, uint8_t dscp, uint32_t allowed_versions) OVS_REQUIRES(ofproto_mutex) { char *name = ofconn_make_name(mgr, target); struct ofconn *ofconn; ofconn = ofconn_create(mgr, rconn_create(5, 8, dscp, allowed_versions), OFCONN_PRIMARY, true); ofconn->pktbuf = pktbuf_create(); rconn_connect(ofconn->rconn, target, name); hmap_insert(&mgr->controllers, &ofconn->hmap_node, hash_string(target, 0)); free(name); } static struct ofconn * find_controller_by_target(struct connmgr *mgr, const char *target) { struct ofconn *ofconn; HMAP_FOR_EACH_WITH_HASH (ofconn, hmap_node, hash_string(target, 0), &mgr->controllers) { if (!strcmp(ofconn_get_target(ofconn), target)) { return ofconn; } } return NULL; } static void update_in_band_remotes(struct connmgr *mgr) { struct sockaddr_in *addrs; size_t max_addrs, n_addrs; struct ofconn *ofconn; size_t i; /* Allocate enough memory for as many remotes as we could possibly have. */ max_addrs = mgr->n_extra_remotes + hmap_count(&mgr->controllers); addrs = xmalloc(max_addrs * sizeof *addrs); n_addrs = 0; /* Add all the remotes. */ HMAP_FOR_EACH (ofconn, hmap_node, &mgr->controllers) { struct sockaddr_in *sin = &addrs[n_addrs]; const char *target = rconn_get_target(ofconn->rconn); if (ofconn->band == OFPROTO_OUT_OF_BAND) { continue; } if (stream_parse_target_with_default_ports(target, OFP_TCP_PORT, OFP_SSL_PORT, sin)) { n_addrs++; } } for (i = 0; i < mgr->n_extra_remotes; i++) { addrs[n_addrs++] = mgr->extra_in_band_remotes[i]; } /* Create or update or destroy in-band. */ if (n_addrs) { if (!mgr->in_band) { in_band_create(mgr->ofproto, mgr->local_port_name, &mgr->in_band); } in_band_set_queue(mgr->in_band, mgr->in_band_queue); } else { /* in_band_run() needs a chance to delete any existing in-band flows. * We will destroy mgr->in_band after it's done with that. */ } if (mgr->in_band) { in_band_set_remotes(mgr->in_band, addrs, n_addrs); } /* Clean up. */ free(addrs); } static void update_fail_open(struct connmgr *mgr) { if (connmgr_has_controllers(mgr) && mgr->fail_mode == OFPROTO_FAIL_STANDALONE) { if (!mgr->fail_open) { mgr->fail_open = fail_open_create(mgr->ofproto, mgr); } } else { fail_open_destroy(mgr->fail_open); mgr->fail_open = NULL; } } static int set_pvconns(struct pvconn ***pvconnsp, size_t *n_pvconnsp, const struct sset *sset) { struct pvconn **pvconns = *pvconnsp; size_t n_pvconns = *n_pvconnsp; const char *name; int retval = 0; size_t i; for (i = 0; i < n_pvconns; i++) { pvconn_close(pvconns[i]); } free(pvconns); pvconns = xmalloc(sset_count(sset) * sizeof *pvconns); n_pvconns = 0; SSET_FOR_EACH (name, sset) { struct pvconn *pvconn; int error; error = pvconn_open(name, 0, 0, &pvconn); if (!error) { pvconns[n_pvconns++] = pvconn; } else { VLOG_ERR("failed to listen on %s: %s", name, ovs_strerror(error)); if (!retval) { retval = error; } } } *pvconnsp = pvconns; *n_pvconnsp = n_pvconns; return retval; } /* Returns a "preference level" for snooping 'ofconn'. A higher return value * means that 'ofconn' is more interesting for monitoring than a lower return * value. */ static int snoop_preference(const struct ofconn *ofconn) { switch (ofconn->role) { case OFPCR12_ROLE_MASTER: return 3; case OFPCR12_ROLE_EQUAL: return 2; case OFPCR12_ROLE_SLAVE: return 1; case OFPCR12_ROLE_NOCHANGE: default: /* Shouldn't happen. */ return 0; } } /* One of 'mgr''s "snoop" pvconns has accepted a new connection on 'vconn'. * Connects this vconn to a controller. */ static void add_snooper(struct connmgr *mgr, struct vconn *vconn) { struct ofconn *ofconn, *best; /* Pick a controller for monitoring. */ best = NULL; LIST_FOR_EACH (ofconn, node, &mgr->all_conns) { if (ofconn->type == OFCONN_PRIMARY && (!best || snoop_preference(ofconn) > snoop_preference(best))) { best = ofconn; } } if (best) { rconn_add_monitor(best->rconn, vconn); } else { VLOG_INFO_RL(&rl, "no controller connection to snoop"); vconn_close(vconn); } } /* Public ofconn functions. */ /* Returns the connection type, either OFCONN_PRIMARY or OFCONN_SERVICE. */ enum ofconn_type ofconn_get_type(const struct ofconn *ofconn) { return ofconn->type; } /* If a master election id is defined, stores it into '*idp' and returns * true. Otherwise, stores UINT64_MAX into '*idp' and returns false. */ bool ofconn_get_master_election_id(const struct ofconn *ofconn, uint64_t *idp) { *idp = (ofconn->connmgr->master_election_id_defined ? ofconn->connmgr->master_election_id : UINT64_MAX); return ofconn->connmgr->master_election_id_defined; } /* Sets the master election id. * * Returns true if successful, false if the id is stale */ bool ofconn_set_master_election_id(struct ofconn *ofconn, uint64_t id) { if (ofconn->connmgr->master_election_id_defined && /* Unsigned difference interpreted as a two's complement signed * value */ (int64_t)(id - ofconn->connmgr->master_election_id) < 0) { return false; } ofconn->connmgr->master_election_id = id; ofconn->connmgr->master_election_id_defined = true; return true; } /* Returns the role configured for 'ofconn'. * * The default role, if no other role has been set, is OFPCR12_ROLE_EQUAL. */ enum ofp12_controller_role ofconn_get_role(const struct ofconn *ofconn) { return ofconn->role; } /* Changes 'ofconn''s role to 'role'. If 'role' is OFPCR12_ROLE_MASTER then * any existing master is demoted to a slave. */ void ofconn_set_role(struct ofconn *ofconn, enum ofp12_controller_role role) { if (role == OFPCR12_ROLE_MASTER) { struct ofconn *other; HMAP_FOR_EACH (other, hmap_node, &ofconn->connmgr->controllers) { if (other->role == OFPCR12_ROLE_MASTER) { other->role = OFPCR12_ROLE_SLAVE; } } } ofconn->role = role; } void ofconn_set_invalid_ttl_to_controller(struct ofconn *ofconn, bool enable) { uint32_t bit = 1u << OFPR_INVALID_TTL; if (enable) { ofconn->master_async_config[OAM_PACKET_IN] |= bit; } else { ofconn->master_async_config[OAM_PACKET_IN] &= ~bit; } } bool ofconn_get_invalid_ttl_to_controller(struct ofconn *ofconn) { uint32_t bit = 1u << OFPR_INVALID_TTL; return (ofconn->master_async_config[OAM_PACKET_IN] & bit) != 0; } /* Returns the currently configured protocol for 'ofconn', one of OFPUTIL_P_*. * * Returns OFPUTIL_P_NONE, which is not a valid protocol, if 'ofconn' hasn't * completed version negotiation. This can't happen if at least one OpenFlow * message, other than OFPT_HELLO, has been received on the connection (such as * in ofproto.c's message handling code), since version negotiation is a * prerequisite for starting to receive messages. This means that * OFPUTIL_P_NONE is a special case that most callers need not worry about. */ enum ofputil_protocol ofconn_get_protocol(const struct ofconn *ofconn) { if (ofconn->protocol == OFPUTIL_P_NONE && rconn_is_connected(ofconn->rconn)) { int version = rconn_get_version(ofconn->rconn); if (version > 0) { ofconn_set_protocol(CONST_CAST(struct ofconn *, ofconn), ofputil_protocol_from_ofp_version(version)); } } return ofconn->protocol; } /* Sets the protocol for 'ofconn' to 'protocol' (one of OFPUTIL_P_*). * * (This doesn't actually send anything to accomplish this. Presumably the * caller already did that.) */ void ofconn_set_protocol(struct ofconn *ofconn, enum ofputil_protocol protocol) { ofconn->protocol = protocol; } /* Returns the currently configured packet in format for 'ofconn', one of * NXPIF_*. * * The default, if no other format has been set, is NXPIF_OPENFLOW10. */ enum nx_packet_in_format ofconn_get_packet_in_format(struct ofconn *ofconn) { return ofconn->packet_in_format; } /* Sets the packet in format for 'ofconn' to 'packet_in_format' (one of * NXPIF_*). */ void ofconn_set_packet_in_format(struct ofconn *ofconn, enum nx_packet_in_format packet_in_format) { ofconn->packet_in_format = packet_in_format; } /* Sets the controller connection ID for 'ofconn' to 'controller_id'. * * The connection controller ID is used for OFPP_CONTROLLER and * NXAST_CONTROLLER actions. See "struct nx_action_controller" for details. */ void ofconn_set_controller_id(struct ofconn *ofconn, uint16_t controller_id) { ofconn->controller_id = controller_id; } /* Returns the default miss send length for 'ofconn'. */ int ofconn_get_miss_send_len(const struct ofconn *ofconn) { return ofconn->miss_send_len; } /* Sets the default miss send length for 'ofconn' to 'miss_send_len'. */ void ofconn_set_miss_send_len(struct ofconn *ofconn, int miss_send_len) { ofconn->miss_send_len = miss_send_len; } void ofconn_set_async_config(struct ofconn *ofconn, const uint32_t master_masks[OAM_N_TYPES], const uint32_t slave_masks[OAM_N_TYPES]) { size_t size = sizeof ofconn->master_async_config; memcpy(ofconn->master_async_config, master_masks, size); memcpy(ofconn->slave_async_config, slave_masks, size); } /* Sends 'msg' on 'ofconn', accounting it as a reply. (If there is a * sufficient number of OpenFlow replies in-flight on a single ofconn, then the * connmgr will stop accepting new OpenFlow requests on that ofconn until the * controller has accepted some of the replies.) */ void ofconn_send_reply(const struct ofconn *ofconn, struct ofpbuf *msg) { ofconn_send(ofconn, msg, ofconn->reply_counter); } /* Sends each of the messages in list 'replies' on 'ofconn' in order, * accounting them as replies. */ void ofconn_send_replies(const struct ofconn *ofconn, struct list *replies) { struct ofpbuf *reply, *next; LIST_FOR_EACH_SAFE (reply, next, list_node, replies) { list_remove(&reply->list_node); ofconn_send_reply(ofconn, reply); } } /* Sends 'error' on 'ofconn', as a reply to 'request'. Only at most the * first 64 bytes of 'request' are used. */ void ofconn_send_error(const struct ofconn *ofconn, const struct ofp_header *request, enum ofperr error) { static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(10, 10); struct ofpbuf *reply; reply = ofperr_encode_reply(error, request); if (!VLOG_DROP_INFO(&err_rl)) { const char *type_name; size_t request_len; enum ofpraw raw; request_len = ntohs(request->length); type_name = (!ofpraw_decode_partial(&raw, request, MIN(64, request_len)) ? ofpraw_get_name(raw) : "invalid"); VLOG_INFO("%s: sending %s error reply to %s message", rconn_get_name(ofconn->rconn), ofperr_to_string(error), type_name); } ofconn_send_reply(ofconn, reply); } /* Same as pktbuf_retrieve(), using the pktbuf owned by 'ofconn'. */ enum ofperr ofconn_pktbuf_retrieve(struct ofconn *ofconn, uint32_t id, struct ofpbuf **bufferp, ofp_port_t *in_port) { return pktbuf_retrieve(ofconn->pktbuf, id, bufferp, in_port); } /* Returns true if 'ofconn' has any pending opgroups. */ bool ofconn_has_pending_opgroups(const struct ofconn *ofconn) { return !list_is_empty(&ofconn->opgroups); } /* Adds 'ofconn_node' to 'ofconn''s list of pending opgroups. * * If 'ofconn' is destroyed or its connection drops, then 'ofconn' will remove * 'ofconn_node' from the list and re-initialize it with list_init(). The * client may, therefore, use list_is_empty(ofconn_node) to determine whether * 'ofconn_node' is still associated with an active ofconn. * * The client may also remove ofconn_node from the list itself, with * list_remove(). */ void ofconn_add_opgroup(struct ofconn *ofconn, struct list *ofconn_node) { list_push_back(&ofconn->opgroups, ofconn_node); } /* Private ofconn functions. */ static const char * ofconn_get_target(const struct ofconn *ofconn) { return rconn_get_target(ofconn->rconn); } static struct ofconn * ofconn_create(struct connmgr *mgr, struct rconn *rconn, enum ofconn_type type, bool enable_async_msgs) { struct ofconn *ofconn; ofconn = xzalloc(sizeof *ofconn); ofconn->connmgr = mgr; list_push_back(&mgr->all_conns, &ofconn->node); ofconn->rconn = rconn; ofconn->type = type; ofconn->enable_async_msgs = enable_async_msgs; list_init(&ofconn->opgroups); hmap_init(&ofconn->monitors); list_init(&ofconn->updates); ofconn_flush(ofconn); return ofconn; } /* Clears all of the state in 'ofconn' that should not persist from one * connection to the next. */ static void ofconn_flush(struct ofconn *ofconn) OVS_REQUIRES(ofproto_mutex) { struct ofmonitor *monitor, *next_monitor; int i; ofconn->role = OFPCR12_ROLE_EQUAL; ofconn_set_protocol(ofconn, OFPUTIL_P_NONE); ofconn->packet_in_format = NXPIF_OPENFLOW10; /* Disassociate 'ofconn' from all of the ofopgroups that it initiated that * have not yet completed. (Those ofopgroups will still run to completion * in the usual way, but any errors that they run into will not be reported * on any OpenFlow channel.) * * Also discard any blocked operation on 'ofconn'. */ while (!list_is_empty(&ofconn->opgroups)) { list_init(list_pop_front(&ofconn->opgroups)); } ofpbuf_delete(ofconn->blocked); ofconn->blocked = NULL; rconn_packet_counter_destroy(ofconn->packet_in_counter); ofconn->packet_in_counter = rconn_packet_counter_create(); for (i = 0; i < N_SCHEDULERS; i++) { if (ofconn->schedulers[i]) { int rate, burst; pinsched_get_limits(ofconn->schedulers[i], &rate, &burst); pinsched_destroy(ofconn->schedulers[i]); ofconn->schedulers[i] = pinsched_create(rate, burst); } } if (ofconn->pktbuf) { pktbuf_destroy(ofconn->pktbuf); ofconn->pktbuf = pktbuf_create(); } ofconn->miss_send_len = (ofconn->type == OFCONN_PRIMARY ? OFP_DEFAULT_MISS_SEND_LEN : 0); ofconn->controller_id = 0; rconn_packet_counter_destroy(ofconn->reply_counter); ofconn->reply_counter = rconn_packet_counter_create(); if (ofconn->enable_async_msgs) { uint32_t *master = ofconn->master_async_config; uint32_t *slave = ofconn->slave_async_config; /* "master" and "other" roles get all asynchronous messages by default, * except that the controller needs to enable nonstandard "packet-in" * reasons itself. */ master[OAM_PACKET_IN] = (1u << OFPR_NO_MATCH) | (1u << OFPR_ACTION); master[OAM_PORT_STATUS] = ((1u << OFPPR_ADD) | (1u << OFPPR_DELETE) | (1u << OFPPR_MODIFY)); master[OAM_FLOW_REMOVED] = ((1u << OFPRR_IDLE_TIMEOUT) | (1u << OFPRR_HARD_TIMEOUT) | (1u << OFPRR_DELETE)); /* "slave" role gets port status updates by default. */ slave[OAM_PACKET_IN] = 0; slave[OAM_PORT_STATUS] = ((1u << OFPPR_ADD) | (1u << OFPPR_DELETE) | (1u << OFPPR_MODIFY)); slave[OAM_FLOW_REMOVED] = 0; } else { memset(ofconn->master_async_config, 0, sizeof ofconn->master_async_config); memset(ofconn->slave_async_config, 0, sizeof ofconn->slave_async_config); } HMAP_FOR_EACH_SAFE (monitor, next_monitor, ofconn_node, &ofconn->monitors) { ofmonitor_destroy(monitor); } rconn_packet_counter_destroy(ofconn->monitor_counter); ofconn->monitor_counter = rconn_packet_counter_create(); ofpbuf_list_delete(&ofconn->updates); /* ...but it should be empty. */ } static void ofconn_destroy(struct ofconn *ofconn) OVS_REQUIRES(ofproto_mutex) { ofconn_flush(ofconn); if (ofconn->type == OFCONN_PRIMARY) { hmap_remove(&ofconn->connmgr->controllers, &ofconn->hmap_node); } hmap_destroy(&ofconn->monitors); list_remove(&ofconn->node); rconn_destroy(ofconn->rconn); rconn_packet_counter_destroy(ofconn->packet_in_counter); rconn_packet_counter_destroy(ofconn->reply_counter); pktbuf_destroy(ofconn->pktbuf); rconn_packet_counter_destroy(ofconn->monitor_counter); free(ofconn); } /* Reconfigures 'ofconn' to match 'c'. 'ofconn' and 'c' must have the same * target. */ static void ofconn_reconfigure(struct ofconn *ofconn, const struct ofproto_controller *c) { int probe_interval; ofconn->band = c->band; ofconn->enable_async_msgs = c->enable_async_msgs; rconn_set_max_backoff(ofconn->rconn, c->max_backoff); probe_interval = c->probe_interval ? MAX(c->probe_interval, 5) : 0; rconn_set_probe_interval(ofconn->rconn, probe_interval); ofconn_set_rate_limit(ofconn, c->rate_limit, c->burst_limit); /* If dscp value changed reconnect. */ if (c->dscp != rconn_get_dscp(ofconn->rconn)) { rconn_set_dscp(ofconn->rconn, c->dscp); rconn_reconnect(ofconn->rconn); } } /* Returns true if it makes sense for 'ofconn' to receive and process OpenFlow * messages. */ static bool ofconn_may_recv(const struct ofconn *ofconn) { int count = rconn_packet_counter_n_packets(ofconn->reply_counter); return (!ofconn->blocked || ofconn->retry) && count < OFCONN_REPLY_MAX; } static void ofconn_run(struct ofconn *ofconn, bool (*handle_openflow)(struct ofconn *, const struct ofpbuf *ofp_msg)) { struct connmgr *mgr = ofconn->connmgr; size_t i; for (i = 0; i < N_SCHEDULERS; i++) { pinsched_run(ofconn->schedulers[i], do_send_packet_in, ofconn); } rconn_run(ofconn->rconn); if (handle_openflow) { /* Limit the number of iterations to avoid starving other tasks. */ for (i = 0; i < 50 && ofconn_may_recv(ofconn); i++) { struct ofpbuf *of_msg; of_msg = (ofconn->blocked ? ofconn->blocked : rconn_recv(ofconn->rconn)); if (!of_msg) { break; } if (mgr->fail_open) { fail_open_maybe_recover(mgr->fail_open); } if (handle_openflow(ofconn, of_msg)) { ofpbuf_delete(of_msg); ofconn->blocked = NULL; } else { ofconn->blocked = of_msg; ofconn->retry = false; } } } ovs_mutex_lock(&ofproto_mutex); if (!rconn_is_alive(ofconn->rconn)) { ofconn_destroy(ofconn); } else if (!rconn_is_connected(ofconn->rconn)) { ofconn_flush(ofconn); } ovs_mutex_unlock(&ofproto_mutex); } static void ofconn_wait(struct ofconn *ofconn, bool handling_openflow) { int i; for (i = 0; i < N_SCHEDULERS; i++) { pinsched_wait(ofconn->schedulers[i]); } rconn_run_wait(ofconn->rconn); if (handling_openflow && ofconn_may_recv(ofconn)) { rconn_recv_wait(ofconn->rconn); } } /* Returns true if 'ofconn' should receive asynchronous messages of the given * OAM_* 'type' and 'reason', which should be a OFPR_* value for OAM_PACKET_IN, * a OFPPR_* value for OAM_PORT_STATUS, or an OFPRR_* value for * OAM_FLOW_REMOVED. Returns false if the message should not be sent on * 'ofconn'. */ static bool ofconn_receives_async_msg(const struct ofconn *ofconn, enum ofconn_async_msg_type type, unsigned int reason) { const uint32_t *async_config; ovs_assert(reason < 32); ovs_assert((unsigned int) type < OAM_N_TYPES); if (ofconn_get_protocol(ofconn) == OFPUTIL_P_NONE || !rconn_is_connected(ofconn->rconn)) { return false; } /* Keep the following code in sync with the documentation in the * "Asynchronous Messages" section in DESIGN. */ if (ofconn->type == OFCONN_SERVICE && !ofconn->miss_send_len) { /* Service connections don't get asynchronous messages unless they have * explicitly asked for them by setting a nonzero miss send length. */ return false; } async_config = (ofconn->role == OFPCR12_ROLE_SLAVE ? ofconn->slave_async_config : ofconn->master_async_config); if (!(async_config[type] & (1u << reason))) { return false; } return true; } /* Returns a human-readable name for an OpenFlow connection between 'mgr' and * 'target', suitable for use in log messages for identifying the connection. * * The name is dynamically allocated. The caller should free it (with free()) * when it is no longer needed. */ static char * ofconn_make_name(const struct connmgr *mgr, const char *target) { return xasprintf("%s<->%s", mgr->name, target); } static void ofconn_set_rate_limit(struct ofconn *ofconn, int rate, int burst) { int i; for (i = 0; i < N_SCHEDULERS; i++) { struct pinsched **s = &ofconn->schedulers[i]; if (rate > 0) { if (!*s) { *s = pinsched_create(rate, burst); } else { pinsched_set_limits(*s, rate, burst); } } else { pinsched_destroy(*s); *s = NULL; } } } static void ofconn_send(const struct ofconn *ofconn, struct ofpbuf *msg, struct rconn_packet_counter *counter) { ofpmsg_update_length(msg); rconn_send(ofconn->rconn, msg, counter); } /* Sending asynchronous messages. */ static void schedule_packet_in(struct ofconn *, struct ofputil_packet_in); /* Sends an OFPT_PORT_STATUS message with 'opp' and 'reason' to appropriate * controllers managed by 'mgr'. */ void connmgr_send_port_status(struct connmgr *mgr, const struct ofputil_phy_port *pp, uint8_t reason) { /* XXX Should limit the number of queued port status change messages. */ struct ofputil_port_status ps; struct ofconn *ofconn; ps.reason = reason; ps.desc = *pp; LIST_FOR_EACH (ofconn, node, &mgr->all_conns) { if (ofconn_receives_async_msg(ofconn, OAM_PORT_STATUS, reason)) { struct ofpbuf *msg; msg = ofputil_encode_port_status(&ps, ofconn_get_protocol(ofconn)); ofconn_send(ofconn, msg, NULL); } } } /* Sends an OFPT_FLOW_REMOVED or NXT_FLOW_REMOVED message based on 'fr' to * appropriate controllers managed by 'mgr'. */ void connmgr_send_flow_removed(struct connmgr *mgr, const struct ofputil_flow_removed *fr) { struct ofconn *ofconn; LIST_FOR_EACH (ofconn, node, &mgr->all_conns) { if (ofconn_receives_async_msg(ofconn, OAM_FLOW_REMOVED, fr->reason)) { struct ofpbuf *msg; /* Account flow expirations as replies to OpenFlow requests. That * works because preventing OpenFlow requests from being processed * also prevents new flows from being added (and expiring). (It * also prevents processing OpenFlow requests that would not add * new flows, so it is imperfect.) */ msg = ofputil_encode_flow_removed(fr, ofconn_get_protocol(ofconn)); ofconn_send_reply(ofconn, msg); } } } /* Given 'pin', sends an OFPT_PACKET_IN message to each OpenFlow controller as * necessary according to their individual configurations. * * The caller doesn't need to fill in pin->buffer_id or pin->total_len. */ void connmgr_send_packet_in(struct connmgr *mgr, const struct ofputil_packet_in *pin) { struct ofconn *ofconn; LIST_FOR_EACH (ofconn, node, &mgr->all_conns) { if (ofconn_receives_async_msg(ofconn, OAM_PACKET_IN, pin->reason) && ofconn->controller_id == pin->controller_id) { schedule_packet_in(ofconn, *pin); } } } /* pinsched callback for sending 'ofp_packet_in' on 'ofconn'. */ static void do_send_packet_in(struct ofpbuf *ofp_packet_in, void *ofconn_) { struct ofconn *ofconn = ofconn_; rconn_send_with_limit(ofconn->rconn, ofp_packet_in, ofconn->packet_in_counter, 100); } /* Takes 'pin', composes an OpenFlow packet-in message from it, and passes it * to 'ofconn''s packet scheduler for sending. */ static void schedule_packet_in(struct ofconn *ofconn, struct ofputil_packet_in pin) { struct connmgr *mgr = ofconn->connmgr; pin.total_len = pin.packet_len; /* Get OpenFlow buffer_id. */ if (pin.reason == OFPR_ACTION) { pin.buffer_id = UINT32_MAX; } else if (mgr->fail_open && fail_open_is_active(mgr->fail_open)) { pin.buffer_id = pktbuf_get_null(); } else if (!ofconn->pktbuf) { pin.buffer_id = UINT32_MAX; } else { pin.buffer_id = pktbuf_save(ofconn->pktbuf, pin.packet, pin.packet_len, pin.fmd.in_port); } /* Figure out how much of the packet to send. */ if (pin.reason == OFPR_NO_MATCH) { pin.send_len = pin.packet_len; } else { /* Caller should have initialized 'send_len' to 'max_len' specified in * output action. */ } if (pin.buffer_id != UINT32_MAX) { pin.send_len = MIN(pin.send_len, ofconn->miss_send_len); } /* Make OFPT_PACKET_IN and hand over to packet scheduler. It might * immediately call into do_send_packet_in() or it might buffer it for a * while (until a later call to pinsched_run()). */ pinsched_send(ofconn->schedulers[pin.reason == OFPR_NO_MATCH ? 0 : 1], pin.fmd.in_port, ofputil_encode_packet_in(&pin, ofconn_get_protocol(ofconn), ofconn->packet_in_format), do_send_packet_in, ofconn); } /* Fail-open settings. */ /* Returns the failure handling mode (OFPROTO_FAIL_SECURE or * OFPROTO_FAIL_STANDALONE) for 'mgr'. */ enum ofproto_fail_mode connmgr_get_fail_mode(const struct connmgr *mgr) { return mgr->fail_mode; } /* Sets the failure handling mode for 'mgr' to 'fail_mode' (either * OFPROTO_FAIL_SECURE or OFPROTO_FAIL_STANDALONE). */ void connmgr_set_fail_mode(struct connmgr *mgr, enum ofproto_fail_mode fail_mode) { if (mgr->fail_mode != fail_mode) { mgr->fail_mode = fail_mode; update_fail_open(mgr); if (!connmgr_has_controllers(mgr)) { ofproto_flush_flows(mgr->ofproto); } } } /* Fail-open implementation. */ /* Returns the longest probe interval among the primary controllers configured * on 'mgr'. Returns 0 if there are no primary controllers. */ int connmgr_get_max_probe_interval(const struct connmgr *mgr) { const struct ofconn *ofconn; int max_probe_interval; max_probe_interval = 0; HMAP_FOR_EACH (ofconn, hmap_node, &mgr->controllers) { int probe_interval = rconn_get_probe_interval(ofconn->rconn); max_probe_interval = MAX(max_probe_interval, probe_interval); } return max_probe_interval; } /* Returns the number of seconds for which all of 'mgr's primary controllers * have been disconnected. Returns 0 if 'mgr' has no primary controllers. */ int connmgr_failure_duration(const struct connmgr *mgr) { const struct ofconn *ofconn; int min_failure_duration; if (!connmgr_has_controllers(mgr)) { return 0; } min_failure_duration = INT_MAX; HMAP_FOR_EACH (ofconn, hmap_node, &mgr->controllers) { int failure_duration = rconn_failure_duration(ofconn->rconn); min_failure_duration = MIN(min_failure_duration, failure_duration); } return min_failure_duration; } /* Returns true if at least one primary controller is connected (regardless of * whether those controllers are believed to have authenticated and accepted * this switch), false if none of them are connected. */ bool connmgr_is_any_controller_connected(const struct connmgr *mgr) { const struct ofconn *ofconn; HMAP_FOR_EACH (ofconn, hmap_node, &mgr->controllers) { if (rconn_is_connected(ofconn->rconn)) { return true; } } return false; } /* Returns true if at least one primary controller is believed to have * authenticated and accepted this switch, false otherwise. */ bool connmgr_is_any_controller_admitted(const struct connmgr *mgr) { const struct ofconn *ofconn; HMAP_FOR_EACH (ofconn, hmap_node, &mgr->controllers) { if (rconn_is_admitted(ofconn->rconn)) { return true; } } return false; } /* In-band configuration. */ static bool any_extras_changed(const struct connmgr *, const struct sockaddr_in *extras, size_t n); /* Sets the 'n' TCP port addresses in 'extras' as ones to which 'mgr''s * in-band control should guarantee access, in the same way that in-band * control guarantees access to OpenFlow controllers. */ void connmgr_set_extra_in_band_remotes(struct connmgr *mgr, const struct sockaddr_in *extras, size_t n) { if (!any_extras_changed(mgr, extras, n)) { return; } free(mgr->extra_in_band_remotes); mgr->n_extra_remotes = n; mgr->extra_in_band_remotes = xmemdup(extras, n * sizeof *extras); update_in_band_remotes(mgr); } /* Sets the OpenFlow queue used by flows set up by in-band control on * 'mgr' to 'queue_id'. If 'queue_id' is negative, then in-band control * flows will use the default queue. */ void connmgr_set_in_band_queue(struct connmgr *mgr, int queue_id) { if (queue_id != mgr->in_band_queue) { mgr->in_band_queue = queue_id; update_in_band_remotes(mgr); } } static bool any_extras_changed(const struct connmgr *mgr, const struct sockaddr_in *extras, size_t n) { size_t i; if (n != mgr->n_extra_remotes) { return true; } for (i = 0; i < n; i++) { const struct sockaddr_in *old = &mgr->extra_in_band_remotes[i]; const struct sockaddr_in *new = &extras[i]; if (old->sin_addr.s_addr != new->sin_addr.s_addr || old->sin_port != new->sin_port) { return true; } } return false; } /* In-band implementation. */ bool connmgr_has_in_band(struct connmgr *mgr) { return mgr->in_band != NULL; } /* Fail-open and in-band implementation. */ /* Called by 'ofproto' after all flows have been flushed, to allow fail-open * and standalone mode to re-create their flows. * * In-band control has more sophisticated code that manages flows itself. */ void connmgr_flushed(struct connmgr *mgr) OVS_EXCLUDED(ofproto_mutex) { if (mgr->fail_open) { fail_open_flushed(mgr->fail_open); } /* If there are no controllers and we're in standalone mode, set up a flow * that matches every packet and directs them to OFPP_NORMAL (which goes to * us). Otherwise, the switch is in secure mode and we won't pass any * traffic until a controller has been defined and it tells us to do so. */ if (!connmgr_has_controllers(mgr) && mgr->fail_mode == OFPROTO_FAIL_STANDALONE) { struct ofpbuf ofpacts; struct match match; ofpbuf_init(&ofpacts, OFPACT_OUTPUT_SIZE); ofpact_put_OUTPUT(&ofpacts)->port = OFPP_NORMAL; ofpact_pad(&ofpacts); match_init_catchall(&match); ofproto_add_flow(mgr->ofproto, &match, 0, ofpacts.data, ofpacts.size); ofpbuf_uninit(&ofpacts); } } /* Creates a new ofservice for 'target' in 'mgr'. Returns 0 if successful, * otherwise a positive errno value. * * ofservice_reconfigure() must be called to fully configure the new * ofservice. */ static int ofservice_create(struct connmgr *mgr, const char *target, uint32_t allowed_versions, uint8_t dscp) { struct ofservice *ofservice; struct pvconn *pvconn; int error; error = pvconn_open(target, allowed_versions, dscp, &pvconn); if (error) { return error; } ofservice = xzalloc(sizeof *ofservice); hmap_insert(&mgr->services, &ofservice->node, hash_string(target, 0)); ofservice->pvconn = pvconn; ofservice->allowed_versions = allowed_versions; return 0; } static void ofservice_destroy(struct connmgr *mgr, struct ofservice *ofservice) { hmap_remove(&mgr->services, &ofservice->node); pvconn_close(ofservice->pvconn); free(ofservice); } static void ofservice_reconfigure(struct ofservice *ofservice, const struct ofproto_controller *c) { ofservice->probe_interval = c->probe_interval; ofservice->rate_limit = c->rate_limit; ofservice->burst_limit = c->burst_limit; ofservice->enable_async_msgs = c->enable_async_msgs; ofservice->dscp = c->dscp; } /* Finds and returns the ofservice within 'mgr' that has the given * 'target', or a null pointer if none exists. */ static struct ofservice * ofservice_lookup(struct connmgr *mgr, const char *target) { struct ofservice *ofservice; HMAP_FOR_EACH_WITH_HASH (ofservice, node, hash_string(target, 0), &mgr->services) { if (!strcmp(pvconn_get_name(ofservice->pvconn), target)) { return ofservice; } } return NULL; } /* Flow monitors (NXST_FLOW_MONITOR). */ /* A counter incremented when something significant happens to an OpenFlow * rule. * * - When a rule is added, its 'add_seqno' and 'modify_seqno' are set to * the current value (which is then incremented). * * - When a rule is modified, its 'modify_seqno' is set to the current * value (which is then incremented). * * Thus, by comparing an old value of monitor_seqno against a rule's * 'add_seqno', one can tell whether the rule was added before or after the old * value was read, and similarly for 'modify_seqno'. * * 32 bits should normally be sufficient (and would be nice, to save space in * each rule) but then we'd have to have some special cases for wraparound. * * We initialize monitor_seqno to 1 to allow 0 to be used as an invalid * value. */ static uint64_t monitor_seqno = 1; COVERAGE_DEFINE(ofmonitor_pause); COVERAGE_DEFINE(ofmonitor_resume); enum ofperr ofmonitor_create(const struct ofputil_flow_monitor_request *request, struct ofconn *ofconn, struct ofmonitor **monitorp) OVS_REQUIRES(ofproto_mutex) { struct ofmonitor *m; *monitorp = NULL; m = ofmonitor_lookup(ofconn, request->id); if (m) { return OFPERR_NXBRC_FM_DUPLICATE_ID; } m = xmalloc(sizeof *m); m->ofconn = ofconn; hmap_insert(&ofconn->monitors, &m->ofconn_node, hash_int(request->id, 0)); m->id = request->id; m->flags = request->flags; m->out_port = request->out_port; m->table_id = request->table_id; minimatch_init(&m->match, &request->match); *monitorp = m; return 0; } struct ofmonitor * ofmonitor_lookup(struct ofconn *ofconn, uint32_t id) OVS_REQUIRES(ofproto_mutex) { struct ofmonitor *m; HMAP_FOR_EACH_IN_BUCKET (m, ofconn_node, hash_int(id, 0), &ofconn->monitors) { if (m->id == id) { return m; } } return NULL; } void ofmonitor_destroy(struct ofmonitor *m) OVS_REQUIRES(ofproto_mutex) { if (m) { minimatch_destroy(&m->match); hmap_remove(&m->ofconn->monitors, &m->ofconn_node); free(m); } } void ofmonitor_report(struct connmgr *mgr, struct rule *rule, enum nx_flow_update_event event, enum ofp_flow_removed_reason reason, const struct ofconn *abbrev_ofconn, ovs_be32 abbrev_xid) OVS_REQUIRES(ofproto_mutex) { enum nx_flow_monitor_flags update; struct ofconn *ofconn; switch (event) { case NXFME_ADDED: update = NXFMF_ADD; rule->add_seqno = rule->modify_seqno = monitor_seqno++; break; case NXFME_DELETED: update = NXFMF_DELETE; break; case NXFME_MODIFIED: update = NXFMF_MODIFY; rule->modify_seqno = monitor_seqno++; break; default: case NXFME_ABBREV: NOT_REACHED(); } LIST_FOR_EACH (ofconn, node, &mgr->all_conns) { enum nx_flow_monitor_flags flags = 0; struct ofmonitor *m; if (ofconn->monitor_paused) { /* Only send NXFME_DELETED notifications for flows that were added * before we paused. */ if (event != NXFME_DELETED || rule->add_seqno > ofconn->monitor_paused) { continue; } } HMAP_FOR_EACH (m, ofconn_node, &ofconn->monitors) { if (m->flags & update && (m->table_id == 0xff || m->table_id == rule->table_id) && ofoperation_has_out_port(rule->pending, m->out_port) && cls_rule_is_loose_match(&rule->cr, &m->match)) { flags |= m->flags; } } if (flags) { if (list_is_empty(&ofconn->updates)) { ofputil_start_flow_update(&ofconn->updates); ofconn->sent_abbrev_update = false; } if (ofconn != abbrev_ofconn || ofconn->monitor_paused) { struct ofputil_flow_update fu; struct match match; fu.event = event; fu.reason = event == NXFME_DELETED ? reason : 0; fu.table_id = rule->table_id; fu.cookie = rule->flow_cookie; minimatch_expand(&rule->cr.match, &match); fu.match = &match; fu.priority = rule->cr.priority; ovs_mutex_lock(&rule->mutex); fu.idle_timeout = rule->idle_timeout; fu.hard_timeout = rule->hard_timeout; ovs_mutex_unlock(&rule->mutex); if (flags & NXFMF_ACTIONS) { fu.ofpacts = rule->actions->ofpacts; fu.ofpacts_len = rule->actions->ofpacts_len; } else { fu.ofpacts = NULL; fu.ofpacts_len = 0; } ofputil_append_flow_update(&fu, &ofconn->updates); } else if (!ofconn->sent_abbrev_update) { struct ofputil_flow_update fu; fu.event = NXFME_ABBREV; fu.xid = abbrev_xid; ofputil_append_flow_update(&fu, &ofconn->updates); ofconn->sent_abbrev_update = true; } } } } void ofmonitor_flush(struct connmgr *mgr) OVS_REQUIRES(ofproto_mutex) { struct ofconn *ofconn; LIST_FOR_EACH (ofconn, node, &mgr->all_conns) { struct ofpbuf *msg, *next; LIST_FOR_EACH_SAFE (msg, next, list_node, &ofconn->updates) { unsigned int n_bytes; list_remove(&msg->list_node); ofconn_send(ofconn, msg, ofconn->monitor_counter); n_bytes = rconn_packet_counter_n_bytes(ofconn->monitor_counter); if (!ofconn->monitor_paused && n_bytes > 128 * 1024) { struct ofpbuf *pause; COVERAGE_INC(ofmonitor_pause); ofconn->monitor_paused = monitor_seqno++; pause = ofpraw_alloc_xid(OFPRAW_NXT_FLOW_MONITOR_PAUSED, OFP10_VERSION, htonl(0), 0); ofconn_send(ofconn, pause, ofconn->monitor_counter); } } } } static void ofmonitor_resume(struct ofconn *ofconn) OVS_REQUIRES(ofproto_mutex) { struct rule_collection rules; struct ofpbuf *resumed; struct ofmonitor *m; struct list msgs; rule_collection_init(&rules); HMAP_FOR_EACH (m, ofconn_node, &ofconn->monitors) { ofmonitor_collect_resume_rules(m, ofconn->monitor_paused, &rules); } list_init(&msgs); ofmonitor_compose_refresh_updates(&rules, &msgs); resumed = ofpraw_alloc_xid(OFPRAW_NXT_FLOW_MONITOR_RESUMED, OFP10_VERSION, htonl(0), 0); list_push_back(&msgs, &resumed->list_node); ofconn_send_replies(ofconn, &msgs); ofconn->monitor_paused = 0; } static bool ofmonitor_may_resume(const struct ofconn *ofconn) OVS_REQUIRES(ofproto_mutex) { return (ofconn->monitor_paused != 0 && !rconn_packet_counter_n_packets(ofconn->monitor_counter)); } static void ofmonitor_run(struct connmgr *mgr) { struct ofconn *ofconn; ovs_mutex_lock(&ofproto_mutex); LIST_FOR_EACH (ofconn, node, &mgr->all_conns) { if (ofmonitor_may_resume(ofconn)) { COVERAGE_INC(ofmonitor_resume); ofmonitor_resume(ofconn); } } ovs_mutex_unlock(&ofproto_mutex); } static void ofmonitor_wait(struct connmgr *mgr) { struct ofconn *ofconn; ovs_mutex_lock(&ofproto_mutex); LIST_FOR_EACH (ofconn, node, &mgr->all_conns) { if (ofmonitor_may_resume(ofconn)) { poll_immediate_wake(); } } ovs_mutex_unlock(&ofproto_mutex); } openvswitch-2.0.1+git20140120/ofproto/connmgr.h000066400000000000000000000174161226605124000207670ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef CONNMGR_H #define CONNMGR_H 1 #include "classifier.h" #include "hmap.h" #include "list.h" #include "match.h" #include "ofp-errors.h" #include "ofproto.h" #include "openflow/nicira-ext.h" #include "openvswitch/types.h" struct nlattr; struct ofconn; struct ofopgroup; struct ofputil_flow_removed; struct ofputil_packet_in; struct ofputil_phy_port; struct rule; struct simap; struct sset; /* ofproto supports two kinds of OpenFlow connections: * * - "Primary" connections to ordinary OpenFlow controllers. ofproto * maintains persistent connections to these controllers and by default * sends them asynchronous messages such as packet-ins. * * - "Service" connections, e.g. from ovs-ofctl. When these connections * drop, it is the other side's responsibility to reconnect them if * necessary. ofproto does not send them asynchronous messages by default. * * Currently, active (tcp, ssl, unix) connections are always "primary" * connections and passive (ptcp, pssl, punix) connections are always "service" * connections. There is no inherent reason for this, but it reflects the * common case. */ enum ofconn_type { OFCONN_PRIMARY, /* An ordinary OpenFlow controller. */ OFCONN_SERVICE /* A service connection, e.g. "ovs-ofctl". */ }; /* The type of an OpenFlow asynchronous message. */ enum ofconn_async_msg_type { OAM_PACKET_IN, /* OFPT_PACKET_IN or NXT_PACKET_IN. */ OAM_PORT_STATUS, /* OFPT_PORT_STATUS. */ OAM_FLOW_REMOVED, /* OFPT_FLOW_REMOVED or NXT_FLOW_REMOVED. */ OAM_N_TYPES }; /* Basics. */ struct connmgr *connmgr_create(struct ofproto *ofproto, const char *dpif_name, const char *local_name); void connmgr_destroy(struct connmgr *); void connmgr_run(struct connmgr *, bool (*handle_openflow)(struct ofconn *, const struct ofpbuf *ofp_msg)); void connmgr_wait(struct connmgr *, bool handling_openflow); void connmgr_get_memory_usage(const struct connmgr *, struct simap *usage); struct ofproto *ofconn_get_ofproto(const struct ofconn *); void connmgr_retry(struct connmgr *); /* OpenFlow configuration. */ bool connmgr_has_controllers(const struct connmgr *); void connmgr_get_controller_info(struct connmgr *, struct shash *); void connmgr_free_controller_info(struct shash *); void connmgr_set_controllers(struct connmgr *, const struct ofproto_controller[], size_t n, uint32_t allowed_versions); void connmgr_reconnect(const struct connmgr *); int connmgr_set_snoops(struct connmgr *, const struct sset *snoops); bool connmgr_has_snoops(const struct connmgr *); void connmgr_get_snoops(const struct connmgr *, struct sset *snoops); /* Individual connections to OpenFlow controllers. */ enum ofconn_type ofconn_get_type(const struct ofconn *); bool ofconn_get_master_election_id(const struct ofconn *, uint64_t *idp); bool ofconn_set_master_election_id(struct ofconn *, uint64_t); enum ofp12_controller_role ofconn_get_role(const struct ofconn *); void ofconn_set_role(struct ofconn *, enum ofp12_controller_role); enum ofputil_protocol ofconn_get_protocol(const struct ofconn *); void ofconn_set_protocol(struct ofconn *, enum ofputil_protocol); enum nx_packet_in_format ofconn_get_packet_in_format(struct ofconn *); void ofconn_set_packet_in_format(struct ofconn *, enum nx_packet_in_format); void ofconn_set_controller_id(struct ofconn *, uint16_t controller_id); void ofconn_set_invalid_ttl_to_controller(struct ofconn *, bool); bool ofconn_get_invalid_ttl_to_controller(struct ofconn *); int ofconn_get_miss_send_len(const struct ofconn *); void ofconn_set_miss_send_len(struct ofconn *, int miss_send_len); void ofconn_set_async_config(struct ofconn *, const uint32_t master_masks[OAM_N_TYPES], const uint32_t slave_masks[OAM_N_TYPES]); void ofconn_send_reply(const struct ofconn *, struct ofpbuf *); void ofconn_send_replies(const struct ofconn *, struct list *); void ofconn_send_error(const struct ofconn *, const struct ofp_header *request, enum ofperr); enum ofperr ofconn_pktbuf_retrieve(struct ofconn *, uint32_t id, struct ofpbuf **bufferp, ofp_port_t *in_port); bool ofconn_has_pending_opgroups(const struct ofconn *); void ofconn_add_opgroup(struct ofconn *, struct list *); void ofconn_remove_opgroup(struct ofconn *, struct list *, const struct ofp_header *request, int error); /* Sending asynchronous messages. */ void connmgr_send_port_status(struct connmgr *, const struct ofputil_phy_port *, uint8_t reason); void connmgr_send_flow_removed(struct connmgr *, const struct ofputil_flow_removed *); void connmgr_send_packet_in(struct connmgr *, const struct ofputil_packet_in *); /* Fail-open settings. */ enum ofproto_fail_mode connmgr_get_fail_mode(const struct connmgr *); void connmgr_set_fail_mode(struct connmgr *, enum ofproto_fail_mode); /* Fail-open implementation. */ int connmgr_get_max_probe_interval(const struct connmgr *); bool connmgr_is_any_controller_connected(const struct connmgr *); bool connmgr_is_any_controller_admitted(const struct connmgr *); int connmgr_failure_duration(const struct connmgr *); /* In-band configuration. */ void connmgr_set_extra_in_band_remotes(struct connmgr *, const struct sockaddr_in *, size_t); void connmgr_set_in_band_queue(struct connmgr *, int queue_id); /* In-band implementation. */ bool connmgr_has_in_band(struct connmgr *); /* Fail-open and in-band implementation. */ void connmgr_flushed(struct connmgr *); /* A flow monitor managed by NXST_FLOW_MONITOR and related requests. */ struct ofmonitor { struct ofconn *ofconn; /* Owning 'ofconn'. */ struct hmap_node ofconn_node; /* In ofconn's 'monitors' hmap. */ uint32_t id; enum nx_flow_monitor_flags flags; /* Matching. */ ofp_port_t out_port; uint8_t table_id; struct minimatch match; }; struct ofputil_flow_monitor_request; enum ofperr ofmonitor_create(const struct ofputil_flow_monitor_request *, struct ofconn *, struct ofmonitor **) OVS_REQUIRES(ofproto_mutex); struct ofmonitor *ofmonitor_lookup(struct ofconn *, uint32_t id) OVS_REQUIRES(ofproto_mutex); void ofmonitor_destroy(struct ofmonitor *) OVS_REQUIRES(ofproto_mutex); void ofmonitor_report(struct connmgr *, struct rule *, enum nx_flow_update_event, enum ofp_flow_removed_reason, const struct ofconn *abbrev_ofconn, ovs_be32 abbrev_xid) OVS_REQUIRES(ofproto_mutex); void ofmonitor_flush(struct connmgr *) OVS_REQUIRES(ofproto_mutex); struct rule_collection; void ofmonitor_collect_resume_rules(struct ofmonitor *, uint64_t seqno, struct rule_collection *) OVS_REQUIRES(ofproto_mutex); void ofmonitor_compose_refresh_updates(struct rule_collection *rules, struct list *msgs) OVS_REQUIRES(ofproto_mutex); #endif /* connmgr.h */ openvswitch-2.0.1+git20140120/ofproto/fail-open.c000066400000000000000000000203741226605124000211660ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "fail-open.h" #include #include #include "classifier.h" #include "connmgr.h" #include "flow.h" #include "mac-learning.h" #include "odp-util.h" #include "ofp-actions.h" #include "ofp-util.h" #include "ofpbuf.h" #include "ofproto.h" #include "ofproto-provider.h" #include "pktbuf.h" #include "poll-loop.h" #include "rconn.h" #include "timeval.h" #include "vconn.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(fail_open); /* * Fail-open mode. * * In fail-open mode, the switch detects when the controller cannot be * contacted or when the controller is dropping switch connections because the * switch does not pass its admission control policy. In those situations the * switch sets up flows itself using the "normal" action. * * There is a little subtlety to implementation, to properly handle the case * where the controller allows switch connections but drops them a few seconds * later for admission control reasons. Because of this case, we don't want to * just stop setting up flows when we connect to the controller: if we did, * then new flow setup and existing flows would stop during the duration of * connection to the controller, and thus the whole network would go down for * that period of time. * * So, instead, we add some special cases when we are connected to a * controller, but not yet sure that it has admitted us: * * - We set up flows immediately ourselves, but simultaneously send out an * OFPT_PACKET_IN to the controller. We put a special bogus buffer-id in * these OFPT_PACKET_IN messages so that duplicate packets don't get sent * out to the network when the controller replies. * * - We also send out OFPT_PACKET_IN messages for totally bogus packets * every so often, in case no real new flows are arriving in the network. * * - We don't flush the flow table at the time we connect, because this * could cause network stuttering in a switch with lots of flows or very * high-bandwidth flows by suddenly throwing lots of packets down to * userspace. */ struct fail_open { struct ofproto *ofproto; struct connmgr *connmgr; int last_disconn_secs; long long int next_bogus_packet_in; struct rconn_packet_counter *bogus_packet_counter; }; static void fail_open_recover(struct fail_open *); /* Returns the number of seconds of disconnection after which fail-open mode * should activate. */ static int trigger_duration(const struct fail_open *fo) { if (!connmgr_has_controllers(fo->connmgr)) { /* Shouldn't ever arrive here, but if we do, never fail open. */ return INT_MAX; } else { /* Otherwise, every controller must have a chance to send an * inactivity probe and reconnect before we fail open, so take the * maximum probe interval and multiply by 3: * * - The first interval is the idle time before sending an inactivity * probe. * * - The second interval is the time allowed for a response to the * inactivity probe. * * - The third interval is the time allowed to reconnect after no * response is received. */ return connmgr_get_max_probe_interval(fo->connmgr) * 3; } } /* Returns true if 'fo' is currently in fail-open mode, otherwise false. */ bool fail_open_is_active(const struct fail_open *fo) { return fo->last_disconn_secs != 0; } static void send_bogus_packet_ins(struct fail_open *fo) { struct ofputil_packet_in pin; uint8_t mac[ETH_ADDR_LEN]; struct ofpbuf b; ofpbuf_init(&b, 128); eth_addr_nicira_random(mac); compose_rarp(&b, mac); memset(&pin, 0, sizeof pin); pin.packet = b.data; pin.packet_len = b.size; pin.reason = OFPR_NO_MATCH; pin.send_len = b.size; pin.fmd.in_port = OFPP_LOCAL; connmgr_send_packet_in(fo->connmgr, &pin); ofpbuf_uninit(&b); } /* Enter fail-open mode if we should be in it. */ void fail_open_run(struct fail_open *fo) { int disconn_secs = connmgr_failure_duration(fo->connmgr); /* Enter fail-open mode if 'fo' is not in it but should be. */ if (disconn_secs >= trigger_duration(fo)) { if (!fail_open_is_active(fo)) { VLOG_WARN("Could not connect to controller (or switch failed " "controller's post-connection admission control " "policy) for %d seconds, failing open", disconn_secs); fo->last_disconn_secs = disconn_secs; /* Flush all OpenFlow and datapath flows. We will set up our * fail-open rule from fail_open_flushed() when * ofproto_flush_flows() calls back to us. */ ofproto_flush_flows(fo->ofproto); } else if (disconn_secs > fo->last_disconn_secs + 60) { VLOG_INFO("Still in fail-open mode after %d seconds disconnected " "from controller", disconn_secs); fo->last_disconn_secs = disconn_secs; } } /* Schedule a bogus packet-in if we're connected and in fail-open. */ if (fail_open_is_active(fo)) { if (connmgr_is_any_controller_connected(fo->connmgr)) { bool expired = time_msec() >= fo->next_bogus_packet_in; if (expired) { send_bogus_packet_ins(fo); } if (expired || fo->next_bogus_packet_in == LLONG_MAX) { fo->next_bogus_packet_in = time_msec() + 2000; } } else { fo->next_bogus_packet_in = LLONG_MAX; } } } /* If 'fo' is currently in fail-open mode and its rconn has connected to the * controller, exits fail open mode. */ void fail_open_maybe_recover(struct fail_open *fo) { if (fail_open_is_active(fo) && connmgr_is_any_controller_admitted(fo->connmgr)) { fail_open_recover(fo); } } static void fail_open_recover(struct fail_open *fo) { struct match match; VLOG_WARN("No longer in fail-open mode"); fo->last_disconn_secs = 0; fo->next_bogus_packet_in = LLONG_MAX; match_init_catchall(&match); ofproto_delete_flow(fo->ofproto, &match, FAIL_OPEN_PRIORITY); } void fail_open_wait(struct fail_open *fo) { if (fo->next_bogus_packet_in != LLONG_MAX) { poll_timer_wait_until(fo->next_bogus_packet_in); } } void fail_open_flushed(struct fail_open *fo) OVS_EXCLUDED(ofproto_mutex) { int disconn_secs = connmgr_failure_duration(fo->connmgr); bool open = disconn_secs >= trigger_duration(fo); if (open) { struct ofpbuf ofpacts; struct match match; /* Set up a flow that matches every packet and directs them to * OFPP_NORMAL. */ ofpbuf_init(&ofpacts, OFPACT_OUTPUT_SIZE); ofpact_put_OUTPUT(&ofpacts)->port = OFPP_NORMAL; ofpact_pad(&ofpacts); match_init_catchall(&match); ofproto_add_flow(fo->ofproto, &match, FAIL_OPEN_PRIORITY, ofpacts.data, ofpacts.size); ofpbuf_uninit(&ofpacts); } } /* Creates and returns a new struct fail_open for 'ofproto' and 'mgr'. */ struct fail_open * fail_open_create(struct ofproto *ofproto, struct connmgr *mgr) { struct fail_open *fo = xmalloc(sizeof *fo); fo->ofproto = ofproto; fo->connmgr = mgr; fo->last_disconn_secs = 0; fo->next_bogus_packet_in = LLONG_MAX; fo->bogus_packet_counter = rconn_packet_counter_create(); return fo; } /* Destroys 'fo'. */ void fail_open_destroy(struct fail_open *fo) { if (fo) { if (fail_open_is_active(fo)) { fail_open_recover(fo); } /* We don't own fo->connmgr. */ rconn_packet_counter_destroy(fo->bogus_packet_counter); free(fo); } } openvswitch-2.0.1+git20140120/ofproto/fail-open.h000066400000000000000000000027211226605124000211670ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef FAIL_OPEN_H #define FAIL_OPEN_H 1 #include #include #include "flow.h" struct connmgr; struct fail_open; struct ofproto; /* Priority of the rule added by the fail-open subsystem when a switch enters * fail-open mode. This priority value uniquely identifies a fail-open flow * (OpenFlow priorities max out at 65535 and nothing else in Open vSwitch * creates flows with this priority). And "f0" is mnemonic for "fail open"! */ #define FAIL_OPEN_PRIORITY 0xf0f0f0 struct fail_open *fail_open_create(struct ofproto *, struct connmgr *); void fail_open_destroy(struct fail_open *); void fail_open_wait(struct fail_open *); bool fail_open_is_active(const struct fail_open *); void fail_open_run(struct fail_open *); void fail_open_maybe_recover(struct fail_open *); void fail_open_flushed(struct fail_open *); #endif /* fail-open.h */ openvswitch-2.0.1+git20140120/ofproto/in-band.c000066400000000000000000000406231226605124000206230ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "in-band.h" #include #include #include #include #include #include #include #include "classifier.h" #include "dhcp.h" #include "flow.h" #include "netdev.h" #include "netlink.h" #include "odp-util.h" #include "ofp-actions.h" #include "ofproto.h" #include "ofpbuf.h" #include "ofproto-provider.h" #include "openflow/openflow.h" #include "packets.h" #include "poll-loop.h" #include "timeval.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(in_band); /* Priorities used in classifier for in-band rules. These values are higher * than any that may be set with OpenFlow, and "18" kind of looks like "IB". * The ordering of priorities is not important because all of the rules set up * by in-band control have the same action. The only reason to use more than * one priority is to make the kind of flow easier to see during debugging. */ enum { /* One set per bridge. */ IBR_FROM_LOCAL_DHCP = 180000, /* (a) From local port, DHCP. */ IBR_TO_LOCAL_ARP, /* (b) To local port, ARP. */ IBR_FROM_LOCAL_ARP, /* (c) From local port, ARP. */ /* One set per unique next-hop MAC. */ IBR_TO_NEXT_HOP_ARP, /* (d) To remote MAC, ARP. */ IBR_FROM_NEXT_HOP_ARP, /* (e) From remote MAC, ARP. */ /* One set per unique remote IP address. */ IBR_TO_REMOTE_ARP, /* (f) To remote IP, ARP. */ IBR_FROM_REMOTE_ARP, /* (g) From remote IP, ARP. */ /* One set per unique remote (IP,port) pair. */ IBR_TO_REMOTE_TCP, /* (h) To remote IP, TCP port. */ IBR_FROM_REMOTE_TCP /* (i) From remote IP, TCP port. */ }; /* Track one remote IP and next hop information. */ struct in_band_remote { struct sockaddr_in remote_addr; /* IP address, in network byte order. */ uint8_t remote_mac[ETH_ADDR_LEN]; /* Next-hop MAC, all-zeros if unknown. */ uint8_t last_remote_mac[ETH_ADDR_LEN]; /* Previous nonzero next-hop MAC. */ struct netdev *remote_netdev; /* Device to send to next-hop MAC. */ }; /* What to do to an in_band_rule. */ enum in_band_op { ADD, /* Add the rule to ofproto's flow table. */ DELETE /* Delete the rule from ofproto's flow table. */ }; /* A rule to add to or delete from ofproto's flow table. */ struct in_band_rule { struct hmap_node hmap_node; /* In struct in_band's "rules" hmap. */ struct match match; unsigned int priority; enum in_band_op op; }; struct in_band { struct ofproto *ofproto; int queue_id; /* Remote information. */ time_t next_remote_refresh; /* Refresh timer. */ struct in_band_remote *remotes; size_t n_remotes; /* Local information. */ time_t next_local_refresh; /* Refresh timer. */ uint8_t local_mac[ETH_ADDR_LEN]; /* Current MAC. */ struct netdev *local_netdev; /* Local port's network device. */ /* Flow tracking. */ struct hmap rules; /* Contains "struct in_band_rule"s. */ }; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60); static int refresh_remote(struct in_band *ib, struct in_band_remote *r) { struct in_addr next_hop_inaddr; char *next_hop_dev; int retval; /* Find the next-hop IP address. */ memset(r->remote_mac, 0, sizeof r->remote_mac); retval = netdev_get_next_hop(ib->local_netdev, &r->remote_addr.sin_addr, &next_hop_inaddr, &next_hop_dev); if (retval) { VLOG_WARN("cannot find route for controller ("IP_FMT"): %s", IP_ARGS(r->remote_addr.sin_addr.s_addr), ovs_strerror(retval)); return 1; } if (!next_hop_inaddr.s_addr) { next_hop_inaddr = r->remote_addr.sin_addr; } /* Open the next-hop network device. */ if (!r->remote_netdev || strcmp(netdev_get_name(r->remote_netdev), next_hop_dev)) { netdev_close(r->remote_netdev); retval = netdev_open(next_hop_dev, "system", &r->remote_netdev); if (retval) { VLOG_WARN_RL(&rl, "cannot open netdev %s (next hop " "to controller "IP_FMT"): %s", next_hop_dev, IP_ARGS(r->remote_addr.sin_addr.s_addr), ovs_strerror(retval)); free(next_hop_dev); return 1; } } free(next_hop_dev); /* Look up the MAC address of the next-hop IP address. */ retval = netdev_arp_lookup(r->remote_netdev, next_hop_inaddr.s_addr, r->remote_mac); if (retval) { VLOG_DBG_RL(&rl, "cannot look up remote MAC address ("IP_FMT"): %s", IP_ARGS(next_hop_inaddr.s_addr), ovs_strerror(retval)); } /* If we don't have a MAC address, then refresh quickly, since we probably * will get a MAC address soon (via ARP). Otherwise, we can afford to wait * a little while. */ return eth_addr_is_zero(r->remote_mac) ? 1 : 10; } static bool refresh_remotes(struct in_band *ib) { struct in_band_remote *r; bool any_changes; if (time_now() < ib->next_remote_refresh) { return false; } any_changes = false; ib->next_remote_refresh = TIME_MAX; for (r = ib->remotes; r < &ib->remotes[ib->n_remotes]; r++) { uint8_t old_remote_mac[ETH_ADDR_LEN]; time_t next_refresh; /* Save old MAC. */ memcpy(old_remote_mac, r->remote_mac, ETH_ADDR_LEN); /* Refresh remote information. */ next_refresh = refresh_remote(ib, r) + time_now(); ib->next_remote_refresh = MIN(ib->next_remote_refresh, next_refresh); /* If the MAC changed, log the changes. */ if (!eth_addr_equals(r->remote_mac, old_remote_mac)) { any_changes = true; if (!eth_addr_is_zero(r->remote_mac) && !eth_addr_equals(r->last_remote_mac, r->remote_mac)) { VLOG_DBG("remote MAC address changed from "ETH_ADDR_FMT " to "ETH_ADDR_FMT, ETH_ADDR_ARGS(r->last_remote_mac), ETH_ADDR_ARGS(r->remote_mac)); memcpy(r->last_remote_mac, r->remote_mac, ETH_ADDR_LEN); } } } return any_changes; } /* Refreshes the MAC address of the local port into ib->local_mac, if it is due * for a refresh. Returns true if anything changed, otherwise false. */ static bool refresh_local(struct in_band *ib) { uint8_t ea[ETH_ADDR_LEN]; time_t now; now = time_now(); if (now < ib->next_local_refresh) { return false; } ib->next_local_refresh = now + 1; if (netdev_get_etheraddr(ib->local_netdev, ea) || eth_addr_equals(ea, ib->local_mac)) { return false; } memcpy(ib->local_mac, ea, ETH_ADDR_LEN); return true; } /* Returns true if packets in 'flow' should be directed to the local port. * (This keeps the flow table from preventing DHCP replies from being seen by * the local port.) */ bool in_band_must_output_to_local_port(const struct flow *flow) { return (flow->dl_type == htons(ETH_TYPE_IP) && flow->nw_proto == IPPROTO_UDP && flow->tp_src == htons(DHCP_SERVER_PORT) && flow->tp_dst == htons(DHCP_CLIENT_PORT)); } static void add_rule(struct in_band *ib, const struct match *match, unsigned int priority) { uint32_t hash = match_hash(match, 0); struct in_band_rule *rule; HMAP_FOR_EACH_WITH_HASH (rule, hmap_node, hash, &ib->rules) { if (match_equal(&rule->match, match)) { rule->op = ADD; return; } } rule = xmalloc(sizeof *rule); rule->match = *match; rule->priority = priority; rule->op = ADD; hmap_insert(&ib->rules, &rule->hmap_node, hash); } static void update_rules(struct in_band *ib) { struct in_band_rule *ib_rule; struct in_band_remote *r; struct match match; /* Mark all the existing rules for deletion. (Afterward we will re-add any * rules that are still valid.) */ HMAP_FOR_EACH (ib_rule, hmap_node, &ib->rules) { ib_rule->op = DELETE; } if (ib->n_remotes && !eth_addr_is_zero(ib->local_mac)) { /* (a) Allow DHCP requests sent from the local port. */ match_init_catchall(&match); match_set_in_port(&match, OFPP_LOCAL); match_set_dl_type(&match, htons(ETH_TYPE_IP)); match_set_dl_src(&match, ib->local_mac); match_set_nw_proto(&match, IPPROTO_UDP); match_set_tp_src(&match, htons(DHCP_CLIENT_PORT)); match_set_tp_dst(&match, htons(DHCP_SERVER_PORT)); add_rule(ib, &match, IBR_FROM_LOCAL_DHCP); /* (b) Allow ARP replies to the local port's MAC address. */ match_init_catchall(&match); match_set_dl_type(&match, htons(ETH_TYPE_ARP)); match_set_dl_dst(&match, ib->local_mac); match_set_nw_proto(&match, ARP_OP_REPLY); add_rule(ib, &match, IBR_TO_LOCAL_ARP); /* (c) Allow ARP requests from the local port's MAC address. */ match_init_catchall(&match); match_set_dl_type(&match, htons(ETH_TYPE_ARP)); match_set_dl_src(&match, ib->local_mac); match_set_nw_proto(&match, ARP_OP_REQUEST); add_rule(ib, &match, IBR_FROM_LOCAL_ARP); } for (r = ib->remotes; r < &ib->remotes[ib->n_remotes]; r++) { const uint8_t *remote_mac = r->remote_mac; if (eth_addr_is_zero(remote_mac)) { continue; } /* (d) Allow ARP replies to the next hop's MAC address. */ match_init_catchall(&match); match_set_dl_type(&match, htons(ETH_TYPE_ARP)); match_set_dl_dst(&match, remote_mac); match_set_nw_proto(&match, ARP_OP_REPLY); add_rule(ib, &match, IBR_TO_NEXT_HOP_ARP); /* (e) Allow ARP requests from the next hop's MAC address. */ match_init_catchall(&match); match_set_dl_type(&match, htons(ETH_TYPE_ARP)); match_set_dl_src(&match, remote_mac); match_set_nw_proto(&match, ARP_OP_REQUEST); add_rule(ib, &match, IBR_FROM_NEXT_HOP_ARP); } for (r = ib->remotes; r < &ib->remotes[ib->n_remotes]; r++) { const struct sockaddr_in *a = &r->remote_addr; /* (f) Allow ARP replies containing the remote's IP address as a * target. */ match_init_catchall(&match); match_set_dl_type(&match, htons(ETH_TYPE_ARP)); match_set_nw_proto(&match, ARP_OP_REPLY); match_set_nw_dst(&match, a->sin_addr.s_addr); add_rule(ib, &match, IBR_TO_REMOTE_ARP); /* (g) Allow ARP requests containing the remote's IP address as a * source. */ match_init_catchall(&match); match_set_dl_type(&match, htons(ETH_TYPE_ARP)); match_set_nw_proto(&match, ARP_OP_REQUEST); match_set_nw_src(&match, a->sin_addr.s_addr); add_rule(ib, &match, IBR_FROM_REMOTE_ARP); /* (h) Allow TCP traffic to the remote's IP and port. */ match_init_catchall(&match); match_set_dl_type(&match, htons(ETH_TYPE_IP)); match_set_nw_proto(&match, IPPROTO_TCP); match_set_nw_dst(&match, a->sin_addr.s_addr); match_set_tp_dst(&match, a->sin_port); add_rule(ib, &match, IBR_TO_REMOTE_TCP); /* (i) Allow TCP traffic from the remote's IP and port. */ match_init_catchall(&match); match_set_dl_type(&match, htons(ETH_TYPE_IP)); match_set_nw_proto(&match, IPPROTO_TCP); match_set_nw_src(&match, a->sin_addr.s_addr); match_set_tp_src(&match, a->sin_port); add_rule(ib, &match, IBR_FROM_REMOTE_TCP); } } /* Updates the OpenFlow flow table for the current state of in-band control. * Returns true ordinarily. Returns false if no remotes are configured on 'ib' * and 'ib' doesn't have any rules left to remove from the OpenFlow flow * table. Thus, a false return value means that the caller can destroy 'ib' * without leaving extra flows hanging around in the flow table. */ bool in_band_run(struct in_band *ib) { uint64_t ofpacts_stub[128 / 8]; struct ofpbuf ofpacts; struct in_band_rule *rule, *next; ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); if (ib->queue_id >= 0) { ofpact_put_SET_QUEUE(&ofpacts)->queue_id = ib->queue_id; } ofpact_put_OUTPUT(&ofpacts)->port = OFPP_NORMAL; refresh_local(ib); refresh_remotes(ib); update_rules(ib); HMAP_FOR_EACH_SAFE (rule, next, hmap_node, &ib->rules) { switch (rule->op) { case ADD: ofproto_add_flow(ib->ofproto, &rule->match, rule->priority, ofpacts.data, ofpacts.size); break; case DELETE: if (ofproto_delete_flow(ib->ofproto, &rule->match, rule->priority)) { /* ofproto doesn't have the rule anymore so there's no reason * for us to track it any longer. */ hmap_remove(&ib->rules, &rule->hmap_node); free(rule); } break; } } ofpbuf_uninit(&ofpacts); return ib->n_remotes || !hmap_is_empty(&ib->rules); } void in_band_wait(struct in_band *in_band) { long long int wakeup = MIN(in_band->next_remote_refresh, in_band->next_local_refresh); poll_timer_wait_until(wakeup * 1000); } int in_band_create(struct ofproto *ofproto, const char *local_name, struct in_band **in_bandp) { struct in_band *in_band; struct netdev *local_netdev; int error; *in_bandp = NULL; error = netdev_open(local_name, "internal", &local_netdev); if (error) { VLOG_ERR("failed to initialize in-band control: cannot open " "datapath local port %s (%s)", local_name, ovs_strerror(error)); return error; } in_band = xzalloc(sizeof *in_band); in_band->ofproto = ofproto; in_band->queue_id = -1; in_band->next_remote_refresh = TIME_MIN; in_band->next_local_refresh = TIME_MIN; in_band->local_netdev = local_netdev; hmap_init(&in_band->rules); *in_bandp = in_band; return 0; } void in_band_destroy(struct in_band *ib) { if (ib) { struct in_band_rule *rule, *next; HMAP_FOR_EACH_SAFE (rule, next, hmap_node, &ib->rules) { hmap_remove(&ib->rules, &rule->hmap_node); free(rule); } hmap_destroy(&ib->rules); in_band_set_remotes(ib, NULL, 0); netdev_close(ib->local_netdev); free(ib); } } static bool any_addresses_changed(struct in_band *ib, const struct sockaddr_in *addresses, size_t n) { size_t i; if (n != ib->n_remotes) { return true; } for (i = 0; i < n; i++) { const struct sockaddr_in *old = &ib->remotes[i].remote_addr; const struct sockaddr_in *new = &addresses[i]; if (old->sin_addr.s_addr != new->sin_addr.s_addr || old->sin_port != new->sin_port) { return true; } } return false; } void in_band_set_remotes(struct in_band *ib, const struct sockaddr_in *addresses, size_t n) { size_t i; if (!any_addresses_changed(ib, addresses, n)) { return; } /* Clear old remotes. */ for (i = 0; i < ib->n_remotes; i++) { netdev_close(ib->remotes[i].remote_netdev); } free(ib->remotes); /* Set up new remotes. */ ib->remotes = n ? xzalloc(n * sizeof *ib->remotes) : NULL; ib->n_remotes = n; for (i = 0; i < n; i++) { ib->remotes[i].remote_addr = addresses[i]; } /* Force refresh in next call to in_band_run(). */ ib->next_remote_refresh = TIME_MIN; } /* Sets the OpenFlow queue used by flows set up by 'ib' to 'queue_id'. If * 'queue_id' is negative, 'ib' will not set any queue (which is also the * default). */ void in_band_set_queue(struct in_band *ib, int queue_id) { ib->queue_id = queue_id; } openvswitch-2.0.1+git20140120/ofproto/in-band.h000066400000000000000000000024411226605124000206240ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef IN_BAND_H #define IN_BAND_H 1 #include #include #include #include #include "flow.h" struct flow; struct in_band; struct nlattr; struct ofpbuf; struct ofproto; int in_band_create(struct ofproto *, const char *local_name, struct in_band **); void in_band_destroy(struct in_band *); void in_band_set_queue(struct in_band *, int queue_id); void in_band_set_remotes(struct in_band *, const struct sockaddr_in *, size_t n); bool in_band_run(struct in_band *); void in_band_wait(struct in_band *); bool in_band_must_output_to_local_port(const struct flow *); #endif /* in-band.h */ openvswitch-2.0.1+git20140120/ofproto/ipfix-gen-entities000077500000000000000000000102221226605124000225750ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (C) 2012 Nicira, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. This file is offered as-is, # without warranty of any kind. import getopt import re import sys import xml.sax import xml.sax.handler class IpfixEntityHandler(xml.sax.handler.ContentHandler): RECORD_FIELDS = ['name', 'dataType', 'elementId', 'status'] # Cf. RFC 5101, Section 6. DATA_TYPE_SIZE = { 'unsigned8': 1, 'unsigned16': 2, 'unsigned32': 4, 'unsigned64': 8, 'signed8': 1, 'signed16': 2, 'signed32': 4, 'signed64': 8, 'float32': 4, 'float64': 8, 'boolean': 1, # Not clear. 'macAddress': 6, 'octetArray': 0, # Not clear. 'string': 0, # Not clear. 'dateTimeSeconds': 4, 'dateTimeMilliseconds': 8, 'dateTimeMicroseconds': 8, 'dateTimeNanoseconds': 8, 'ipv4Address': 4, 'ipv6Address': 16, } def __init__(self): self.current_field_name = None self.current_field_value = [] self.current_record = dict() def startDocument(self): print """\ /* IPFIX entities. */ #ifndef IPFIX_ENTITY #define IPFIX_ENTITY(ENUM, ID, SIZE, NAME) #endif """ def endDocument(self): print """ #undef IPFIX_ENTITY""" def startElement(self, name, attrs): if name in self.RECORD_FIELDS: self.current_field_name = name else: self.current_field_name = None self.current_field_value = [] @staticmethod def camelcase_to_uppercase(s): return re.sub('(.)([A-Z]+)', r'\1_\2', s).upper() def endElement(self, name): if self.current_field_name is not None: self.current_record[self.current_field_name] = ''.join( self.current_field_value).strip() elif (name == 'record' and self.current_record.get('status') == 'current' and 'dataType' in self.current_record): self.current_record['enumName'] = self.camelcase_to_uppercase( self.current_record['name']) self.current_record['dataTypeSize'] = self.DATA_TYPE_SIZE.get( self.current_record['dataType'], 0) print 'IPFIX_ENTITY(%(enumName)s, %(elementId)s, ' \ '%(dataTypeSize)i, %(name)s)' % self.current_record self.current_record.clear() def characters(self, content): if self.current_field_name is not None: self.current_field_value.append(content) def print_ipfix_entity_macros(xml_file): xml.sax.parse(xml_file, IpfixEntityHandler()) def usage(name): print """\ %(name)s: IPFIX entity definition generator Prints C macros defining IPFIX entities from the standard IANA file at usage: %(name)s [OPTIONS] XML where XML is the standard IANA XML file defining IPFIX entities The following options are also available: -h, --help display this help message -V, --version display version information\ """ % {'name': name} sys.exit(0) if __name__ == '__main__': # try: try: options, args = getopt.gnu_getopt(sys.argv[1:], 'hV', ['help', 'version']) except getopt.GetoptError, geo: sys.stderr.write('%s: %s\n' % (sys.argv[0], geo.msg)) sys.exit(1) for key, value in options: if key in ['-h', '--help']: usage() elif key in ['-V', '--version']: print 'ipfix-gen-entities (Open vSwitch)' else: sys.exit(0) if len(args) != 1: sys.stderr.write('%s: exactly 1 non-option arguments required ' '(use --help for help)\n' % sys.argv[0]) sys.exit(1) print_ipfix_entity_macros(args[0]) # except Exception, e: # sys.stderr.write('%s: %s\n' % (sys.argv[0], e)) # sys.exit(1) # Local variables: # mode: python # End: openvswitch-2.0.1+git20140120/ofproto/ipfix.xml000066400000000000000000011507501226605124000210140ustar00rootroot00000000000000 IP Flow Information Export (IPFIX) Entities 2007-05-10 2012-12-05 For the IPFIX schema, please see . IPFIX Information Elements Expert Review Primary expert - Nevil Brownlee and Secondary expert - Juergen Quittek Values 0-127: NFv9-compatible Reserved 0 current octetDeltaCount unsigned64 flowCounter deltaCounter 1 data current The number of octets since the previous report (if any) in incoming packets for this Flow at the Observation Point. The number of octets includes IP header(s) and IP payload. octets packetDeltaCount unsigned64 flowCounter deltaCounter 2 data current The number of incoming packets since the previous report (if any) for this Flow at the Observation Point. packets deltaFlowCount unsigned64 deltaCounter 3 current The conservative count of Original Flows contributing to this Aggregated Flow; may be distributed via any of the methods expressed by the valueDistributionMethod Information Element. protocolIdentifier unsigned8 ipHeader identifier 4 all current The value of the protocol number in the IP packet header. The protocol number identifies the IP packet payload type. Protocol numbers are defined in the IANA Protocol Numbers registry. In Internet Protocol version 4 (IPv4), this is carried in the Protocol field. In Internet Protocol version 6 (IPv6), this is carried in the Next Header field in the last extension header of the packet. See for the specification of the IPv4 protocol field. See for the specification of the IPv6 protocol field. See the list of protocol numbers assigned by IANA at . ipClassOfService unsigned8 ipHeader identifier 5 all current For IPv4 packets, this is the value of the TOS field in the IPv4 packet header. For IPv6 packets, this is the value of the Traffic Class field in the IPv6 packet header. See (Section 5.3.2) and for the definition of the IPv4 TOS field. See for the definition of the IPv6 Traffic Class field. tcpControlBits unsigned8 minMax flags 6 all current TCP control bits observed for packets of this Flow. The information is encoded in a set of bit fields. For each TCP control bit, there is a bit in this set. A bit is set to 1 if any observed packet of this Flow has the corresponding TCP control bit set to 1. A value of 0 for a bit indicates that the corresponding bit was not set in any of the observed packets of this Flow. 0 1 2 3 4 5 6 7 +-----+-----+-----+-----+-----+-----+-----+-----+ | Reserved | URG | ACK | PSH | RST | SYN | FIN | +-----+-----+-----+-----+-----+-----+-----+-----+ Reserved: Reserved for future use by TCP. Must be zero. URG: Urgent Pointer field significant ACK: Acknowledgment field significant PSH: Push Function RST: Reset the connection SYN: Synchronize sequence numbers FIN: No more data from sender See for the definition of the TCP control bits in the TCP header. sourceTransportPort unsigned16 transportHeader identifier 7 all current The source port identifier in the transport header. For the transport protocols UDP, TCP, and SCTP, this is the source port number given in the respective header. This field MAY also be used for future transport protocols that have 16-bit source port identifiers. See for the definition of the UDP source port field. See for the definition of the TCP source port field. See for the definition of SCTP. Additional information on defined UDP and TCP port numbers can be found at . sourceIPv4Address ipv4Address ipHeader identifier 8 all current The IPv4 source address in the IP packet header. See for the definition of the IPv4 source address field. sourceIPv4PrefixLength unsigned8 ipHeader 9 option current The number of contiguous bits that are relevant in the sourceIPv4Prefix Information Element. bits 0-32 ingressInterface unsigned32 scope identifier 10 all current The index of the IP interface where packets of this Flow are being received. The value matches the value of managed object 'ifIndex' as defined in RFC 2863. Note that ifIndex values are not assigned statically to an interface and that the interfaces may be renumbered every time the device's management system is re-initialized, as specified in RFC 2863. See for the definition of the ifIndex object. destinationTransportPort unsigned16 transportHeader identifier 11 all current The destination port identifier in the transport header. For the transport protocols UDP, TCP, and SCTP, this is the destination port number given in the respective header. This field MAY also be used for future transport protocols that have 16-bit destination port identifiers. See for the definition of the UDP destination port field. See for the definition of the TCP destination port field. See for the definition of SCTP. Additional information on defined UDP and TCP port numbers can be found at . destinationIPv4Address ipv4Address ipHeader identifier 12 all current The IPv4 destination address in the IP packet header. See for the definition of the IPv4 destination address field. destinationIPv4PrefixLength unsigned8 ipHeader 13 option current The number of contiguous bits that are relevant in the destinationIPv4Prefix Information Element. bits 0-32 egressInterface unsigned32 scope identifier 14 all current The index of the IP interface where packets of this Flow are being sent. The value matches the value of managed object 'ifIndex' as defined in RFC 2863. Note that ifIndex values are not assigned statically to an interface and that the interfaces may be renumbered every time the device's management system is re-initialized, as specified in RFC 2863. See for the definition of the ifIndex object. ipNextHopIPv4Address ipv4Address derived identifier 15 data current The IPv4 address of the next IPv4 hop. bgpSourceAsNumber unsigned32 derived identifier 16 all current The autonomous system (AS) number of the source IP address. If AS path information for this Flow is only available as an unordered AS set (and not as an ordered AS sequence), then the value of this Information Element is 0. See for a description of BGP-4, and see for the definition of the AS number. bgpDestinationAsNumber unsigned32 derived identifier 17 all current The autonomous system (AS) number of the destination IP address. If AS path information for this Flow is only available as an unordered AS set (and not as an ordered AS sequence), then the value of this Information Element is 0. See for a description of BGP-4, and see for the definition of the AS number. bgpNextHopIPv4Address ipv4Address derived identifier 18 all current The IPv4 address of the next (adjacent) BGP hop. See for a description of BGP-4. postMCastPacketDeltaCount unsigned64 flowCounter deltaCounter 19 data current The number of outgoing multicast packets since the previous report (if any) sent for packets of this Flow by a multicast daemon within the Observation Domain. This property cannot necessarily be observed at the Observation Point, but may be retrieved by other means. packets postMCastOctetDeltaCount unsigned64 flowCounter deltaCounter 20 data current The number of octets since the previous report (if any) in outgoing multicast packets sent for packets of this Flow by a multicast daemon within the Observation Domain. This property cannot necessarily be observed at the Observation Point, but may be retrieved by other means. The number of octets includes IP header(s) and IP payload. octets flowEndSysUpTime unsigned32 timestamp 21 data current The relative timestamp of the last packet of this Flow. It indicates the number of milliseconds since the last (re-)initialization of the IPFIX Device (sysUpTime). milliseconds flowStartSysUpTime unsigned32 timestamp 22 data current The relative timestamp of the first packet of this Flow. It indicates the number of milliseconds since the last (re-)initialization of the IPFIX Device (sysUpTime). milliseconds postOctetDeltaCount unsigned64 flowCounter deltaCounter 23 data current The definition of this Information Element is identical to the definition of Information Element 'octetDeltaCount', except that it reports a potentially modified value caused by a middlebox function after the packet passed the Observation Point. octets postPacketDeltaCount unsigned64 flowCounter deltaCounter 24 data current The definition of this Information Element is identical to the definition of Information Element 'packetDeltaCount', except that it reports a potentially modified value caused by a middlebox function after the packet passed the Observation Point. packets minimumIpTotalLength unsigned64 minMax 25 all current Length of the smallest packet observed for this Flow. The packet length includes the IP header(s) length and the IP payload length. octets See for the specification of the IPv4 total length. See for the specification of the IPv6 payload length. See for the specification of the IPv6 jumbo payload length. maximumIpTotalLength unsigned64 minMax 26 all current Length of the largest packet observed for this Flow. The packet length includes the IP header(s) length and the IP payload length. octets See for the specification of the IPv4 total length. See for the specification of the IPv6 payload length. See for the specification of the IPv6 jumbo payload length. sourceIPv6Address ipv6Address ipHeader identifier 27 all current The IPv6 source address in the IP packet header. See for the definition of the Source Address field in the IPv6 header. destinationIPv6Address ipv6Address ipHeader identifier 28 all current The IPv6 destination address in the IP packet header. See for the definition of the Destination Address field in the IPv6 header. sourceIPv6PrefixLength unsigned8 ipHeader 29 option current The number of contiguous bits that are relevant in the sourceIPv6Prefix Information Element. bits 0-128 destinationIPv6PrefixLength unsigned8 ipHeader 30 option current The number of contiguous bits that are relevant in the destinationIPv6Prefix Information Element. bits 0-128 flowLabelIPv6 unsigned32 ipHeader identifier 31 all current The value of the IPv6 Flow Label field in the IP packet header. See for the definition of the Flow Label field in the IPv6 packet header. icmpTypeCodeIPv4 unsigned16 transportHeader identifier 32 all current Type and Code of the IPv4 ICMP message. The combination of both values is reported as (ICMP type * 256) + ICMP code. See for the definition of the IPv4 ICMP type and code fields. igmpType unsigned8 transportHeader identifier 33 all current The type field of the IGMP message. See for the definition of the IGMP type field. 34-35 flowActiveTimeout unsigned16 misc 36 all current The number of seconds after which an active Flow is timed out anyway, even if there is still a continuous flow of packets. seconds flowIdleTimeout unsigned16 misc 37 all current A Flow is considered to be timed out if no packets belonging to the Flow have been observed for the number of seconds specified by this field. seconds 38-39 exportedOctetTotalCount unsigned64 processCounter totalCounter 40 data current The total number of octets that the Exporting Process has sent since the Exporting Process (re-)initialization to a particular Collecting Process. The value of this Information Element is calculated by summing up the IPFIX Message Header length values of all IPFIX Messages that were successfully sent to the Collecting Process. The reported number excludes octets in the IPFIX Message that carries the counter value. If this Information Element is sent to a particular Collecting Process, then by default it specifies the number of octets sent to this Collecting Process. octets exportedMessageTotalCount unsigned64 processCounter totalCounter 41 data current The total number of IPFIX Messages that the Exporting Process has sent since the Exporting Process (re-)initialization to a particular Collecting Process. The reported number excludes the IPFIX Message that carries the counter value. If this Information Element is sent to a particular Collecting Process, then by default it specifies the number of IPFIX Messages sent to this Collecting Process. messages exportedFlowRecordTotalCount unsigned64 processCounter totalCounter 42 data current The total number of Flow Records that the Exporting Process has sent as Data Records since the Exporting Process (re-)initialization to a particular Collecting Process. The reported number excludes Flow Records in the IPFIX Message that carries the counter value. If this Information Element is sent to a particular Collecting Process, then by default it specifies the number of Flow Records sent to this process. flows 43 sourceIPv4Prefix ipv4Address ipHeader 44 data current IPv4 source address prefix. destinationIPv4Prefix ipv4Address ipHeader 45 data current IPv4 destination address prefix. mplsTopLabelType unsigned8 derived identifier 46 data current This field identifies the control protocol that allocated the top-of-stack label. Values for this field are listed in the MPLS label type registry. See See for the MPLS label structure. See for the association of MPLS labels with Virtual Private Networks (VPNs). See for BGP and BGP routing. See for Label Distribution Protocol (LDP). See the list of MPLS label types assigned by IANA at . mplsTopLabelIPv4Address ipv4Address derived identifier 47 data current The IPv4 address of the system that the MPLS top label will cause this Flow to be forwarded to. See for the association between MPLS labels and IP addresses. 48-51 minimumTTL unsigned8 minMax 52 data current Minimum TTL value observed for any packet in this Flow. hops See for the definition of the IPv4 Time to Live field. See for the definition of the IPv6 Hop Limit field. maximumTTL unsigned8 minMax 53 data current Maximum TTL value observed for any packet in this Flow. hops See for the definition of the IPv4 Time to Live field. See for the definition of the IPv6 Hop Limit field. fragmentIdentification unsigned32 ipHeader identifier 54 data current The value of the Identification field in the IPv4 packet header or in the IPv6 Fragment header, respectively. The value is 0 for IPv6 if there is no fragment header. See for the definition of the IPv4 Identification field. See for the definition of the Identification field in the IPv6 Fragment header. postIpClassOfService unsigned8 ipHeader identifier 55 all current The definition of this Information Element is identical to the definition of Information Element 'ipClassOfService', except that it reports a potentially modified value caused by a middlebox function after the packet passed the Observation Point. See for the definition of the IPv4 TOS field. See for the definition of the IPv6 Traffic Class field. See for the definition of middleboxes. sourceMacAddress macAddress subIpHeader identifier 56 data current The IEEE 802 source MAC address field. See IEEE.802-3.2002. postDestinationMacAddress macAddress subIpHeader identifier 57 data current The definition of this Information Element is identical to the definition of Information Element 'destinationMacAddress', except that it reports a potentially modified value caused by a middlebox function after the packet passed the Observation Point. See IEEE.802-3.2002. vlanId unsigned16 subIpHeader identifier 58 data current Virtual LAN identifier associated with ingress interface. For dot1q vlans, see 243 dot1qVlanId. See IEEE.802-1Q.2003. postVlanId unsigned16 subIpHeader identifier 59 data current Virtual LAN identifier associated with egress interface. For postdot1q vlans, see 254 postDot1qVlanId. See IEEE.802-1Q.2003. ipVersion unsigned8 ipHeader identifier 60 all current The IP version field in the IP packet header. See for the definition of the version field in the IPv4 packet header. See for the definition of the version field in the IPv6 packet header. Additional information on defined version numbers can be found at . flowDirection unsigned8 misc identifier 61 data current The direction of the Flow observed at the Observation Point. There are only two values defined. 0x00: ingress flow 0x01: egress flow ipNextHopIPv6Address ipv6Address derived identifier 62 data current The IPv6 address of the next IPv6 hop. bgpNextHopIPv6Address ipv6Address derived identifier 63 all current The IPv6 address of the next (adjacent) BGP hop. See for a description of BGP-4. ipv6ExtensionHeaders unsigned32 minMax flags 64 all current IPv6 extension headers observed in packets of this Flow. The information is encoded in a set of bit fields. For each IPv6 option header, there is a bit in this set. The bit is set to 1 if any observed packet of this Flow contains the corresponding IPv6 extension header. Otherwise, if no observed packet of this Flow contained the respective IPv6 extension header, the value of the corresponding bit is 0. 0 1 2 3 4 5 6 7 +-----+-----+-----+-----+-----+-----+-----+-----+ | DST | HOP | Res | UNK |FRA0 | RH |FRA1 | Res | ... +-----+-----+-----+-----+-----+-----+-----+-----+ 8 9 10 11 12 13 14 15 +-----+-----+-----+-----+-----+-----+-----+-----+ ... | Reserved | MOB | ESP | AH | PAY | ... +-----+-----+-----+-----+-----+-----+-----+-----+ 16 17 18 19 20 21 22 23 +-----+-----+-----+-----+-----+-----+-----+-----+ ... | Reserved | ... +-----+-----+-----+-----+-----+-----+-----+-----+ 24 25 26 27 28 29 30 31 +-----+-----+-----+-----+-----+-----+-----+-----+ ... | Reserved | +-----+-----+-----+-----+-----+-----+-----+-----+ Bit IPv6 Option Description 0, DST 60 Destination option header 1, HOP 0 Hop-by-hop option header 2, Res Reserved 3, UNK Unknown Layer 4 header (compressed, encrypted, not supported) 4, FRA0 44 Fragment header - first fragment 5, RH 43 Routing header 6, FRA1 44 Fragmentation header - not first fragment 7, Res Reserved 8 to 11 Reserved 12, MOB 135 IPv6 mobility [RFC3775] 13, ESP 50 Encrypted security payload 14, AH 51 Authentication Header 15, PAY 108 Payload compression header 16 to 31 Reserved See for the general definition of IPv6 extension headers and for the specification of the hop-by-hop options header, the routing header, the fragment header, and the destination options header. See for the specification of the authentication header. See for the specification of the encapsulating security payload. The diagram provided in is incorrect. The diagram in this registry is taken from Errata 1738. See 65-69 mplsTopLabelStackSection octetArray subIpHeader identifier 70 all current The Label, Exp, and S fields from the top MPLS label stack entry, i.e., from the last label that was pushed. The size of this Information Element is 3 octets. 0 1 2 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | Label | Exp |S| +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ Label: Label Value, 20 bits Exp: Experimental Use, 3 bits S: Bottom of Stack, 1 bit See . mplsLabelStackSection2 octetArray subIpHeader identifier 71 all current The Label, Exp, and S fields from the label stack entry that was pushed immediately before the label stack entry that would be reported by mplsTopLabelStackSection. See the definition of mplsTopLabelStackSection for further details. The size of this Information Element is 3 octets. See . mplsLabelStackSection3 octetArray subIpHeader identifier 72 all current The Label, Exp, and S fields from the label stack entry that was pushed immediately before the label stack entry that would be reported by mplsLabelStackSection2. See the definition of mplsTopLabelStackSection for further details. The size of this Information Element is 3 octets. See . mplsLabelStackSection4 octetArray subIpHeader identifier 73 all current The Label, Exp, and S fields from the label stack entry that was pushed immediately before the label stack entry that would be reported by mplsLabelStackSection3. See the definition of mplsTopLabelStackSection for further details. The size of this Information Element is 3 octets. See . mplsLabelStackSection5 octetArray subIpHeader identifier 74 all current The Label, Exp, and S fields from the label stack entry that was pushed immediately before the label stack entry that would be reported by mplsLabelStackSection4. See the definition of mplsTopLabelStackSection for further details. The size of this Information Element is 3 octets. See . mplsLabelStackSection6 octetArray subIpHeader identifier 75 all current The Label, Exp, and S fields from the label stack entry that was pushed immediately before the label stack entry that would be reported by mplsLabelStackSection5. See the definition of mplsTopLabelStackSection for further details. The size of this Information Element is 3 octets. See . mplsLabelStackSection7 octetArray subIpHeader identifier 76 all current The Label, Exp, and S fields from the label stack entry that was pushed immediately before the label stack entry that would be reported by mplsLabelStackSection6. See the definition of mplsTopLabelStackSection for further details. The size of this Information Element is 3 octets. See . mplsLabelStackSection8 octetArray subIpHeader identifier 77 all current The Label, Exp, and S fields from the label stack entry that was pushed immediately before the label stack entry that would be reported by mplsLabelStackSection7. See the definition of mplsTopLabelStackSection for further details. The size of this Information Element is 3 octets. See . mplsLabelStackSection9 octetArray subIpHeader identifier 78 all current The Label, Exp, and S fields from the label stack entry that was pushed immediately before the label stack entry that would be reported by mplsLabelStackSection8. See the definition of mplsTopLabelStackSection for further details. The size of this Information Element is 3 octets. See . mplsLabelStackSection10 octetArray subIpHeader identifier 79 all current The Label, Exp, and S fields from the label stack entry that was pushed immediately before the label stack entry that would be reported by mplsLabelStackSection9. See the definition of mplsTopLabelStackSection for further details. The size of this Information Element is 3 octets. See . destinationMacAddress macAddress subIpHeader identifier 80 data current The IEEE 802 destination MAC address field. See IEEE.802-3.2002. postSourceMacAddress macAddress subIpHeader identifier 81 data current The definition of this Information Element is identical to the definition of Information Element 'sourceMacAddress', except that it reports a potentially modified value caused by a middlebox function after the packet passed the Observation Point. See IEEE.802-3.2002. interfaceName string 82 current A short name uniquely describing an interface, eg "Eth1/0". See for the definition of the ifName object. interfaceDescription string 83 current The description of an interface, eg "FastEthernet 1/0" or "ISP connection". See for the definition of the ifDescr object. 84 octetTotalCount unsigned64 flowCounter totalCounter 85 all current The total number of octets in incoming packets for this Flow at the Observation Point since the Metering Process (re-)initialization for this Observation Point. The number of octets includes IP header(s) and IP payload. octets packetTotalCount unsigned64 flowCounter totalCounter 86 all current The total number of incoming packets for this Flow at the Observation Point since the Metering Process (re-)initialization for this Observation Point. packets 87 fragmentOffset unsigned16 ipHeader identifier 88 all current The value of the IP fragment offset field in the IPv4 packet header or the IPv6 Fragment header, respectively. The value is 0 for IPv6 if there is no fragment header. See for the specification of the fragment offset in the IPv4 header. See for the specification of the fragment offset in the IPv6 Fragment header. 89 mplsVpnRouteDistinguisher octetArray derived identifier 90 all current The value of the VPN route distinguisher of a corresponding entry in a VPN routing and forwarding table. Route distinguisher ensures that the same address can be used in several different MPLS VPNs and that it is possible for BGP to carry several completely different routes to that address, one for each VPN. According to RFC 4364, the size of mplsVpnRouteDistinguisher is 8 octets. However, in RFC 4382 an octet string with flexible length was chosen for representing a VPN route distinguisher by object MplsL3VpnRouteDistinguisher. This choice was made in order to be open to future changes of the size. This idea was adopted when choosing octetArray as abstract data type for this Information Element. The maximum length of this Information Element is 256 octets. See for the specification of the route distinguisher. See for the specification of the MPLS/BGP Layer 3 Virtual Private Network (VPN) Management Information Base. mplsTopLabelPrefixLength unsigned8 identifier 91 current The prefix length of the subnet of the mplsTopLabelIPv4Address that the MPLS top label will cause the Flow to be forwarded to. bits 0-32 See for the association between MPLS labels and prefix lengths. 92-93 applicationDescription string 94 current Specifies the description of an application. applicationId octetArray identifier 95 current Specifies an Application ID per . See section 4 of for the applicationId Information Element Specification. applicationName string 96 current Specifies the name of an application. 97 postIpDiffServCodePoint unsigned8 identifier 98 current The definition of this Information Element is identical to the definition of Information Element 'ipDiffServCodePoint', except that it reports a potentially modified value caused by a middlebox function after the packet passed the Observation Point. 0-63 See for the definition of the Differentiated Services Field. See section 5.3.2 of and for the definition of the IPv4 TOS field. See for the definition of the IPv6 Traffic Class field. See the IPFIX Information Model for the 'ipDiffServCodePoint' specification. multicastReplicationFactor unsigned32 quantity 99 current The amount of multicast replication that's applied to a traffic stream. See for the specification of reserved IPv4 multicast addresses. See for the specification of reserved IPv6 multicast addresses. 100 classificationEngineId unsigned8 identifier 101 current A unique identifier for the engine that determined the Selector ID. Thus, the Classification Engine ID defines the context for the Selector ID. The Classification Engine can be considered a specific registry for application assignments. Values for this field are listed in the Classification Engine IDs registry. See 102-127 bgpNextAdjacentAsNumber unsigned32 derived identifier 128 all current The autonomous system (AS) number of the first AS in the AS path to the destination IP address. The path is deduced by looking up the destination IP address of the Flow in the BGP routing information base. If AS path information for this Flow is only available as an unordered AS set (and not as an ordered AS sequence), then the value of this Information Element is 0. See for a description of BGP-4, and see for the definition of the AS number. bgpPrevAdjacentAsNumber unsigned32 derived identifier 129 all current The autonomous system (AS) number of the last AS in the AS path from the source IP address. The path is deduced by looking up the source IP address of the Flow in the BGP routing information base. If AS path information for this Flow is only available as an unordered AS set (and not as an ordered AS sequence), then the value of this Information Element is 0. In case of BGP asymmetry, the bgpPrevAdjacentAsNumber might not be able to report the correct value. See for a description of BGP-4, and see for the definition of the AS number. exporterIPv4Address ipv4Address config identifier 130 all current The IPv4 address used by the Exporting Process. This is used by the Collector to identify the Exporter in cases where the identity of the Exporter may have been obscured by the use of a proxy. exporterIPv6Address ipv6Address config identifier 131 all current The IPv6 address used by the Exporting Process. This is used by the Collector to identify the Exporter in cases where the identity of the Exporter may have been obscured by the use of a proxy. droppedOctetDeltaCount unsigned64 flowCounter deltaCounter 132 data current The number of octets since the previous report (if any) in packets of this Flow dropped by packet treatment. The number of octets includes IP header(s) and IP payload. octets droppedPacketDeltaCount unsigned64 flowCounter deltaCounter 133 data current The number of packets since the previous report (if any) of this Flow dropped by packet treatment. packets droppedOctetTotalCount unsigned64 flowCounter totalCounter 134 data current The total number of octets in packets of this Flow dropped by packet treatment since the Metering Process (re-)initialization for this Observation Point. The number of octets includes IP header(s) and IP payload. octets droppedPacketTotalCount unsigned64 flowCounter totalCounter 135 data current The number of packets of this Flow dropped by packet treatment since the Metering Process (re-)initialization for this Observation Point. packets flowEndReason unsigned8 misc identifier 136 data current The reason for Flow termination. The range of values includes the following: 0x01: idle timeout The Flow was terminated because it was considered to be idle. 0x02: active timeout The Flow was terminated for reporting purposes while it was still active, for example, after the maximum lifetime of unreported Flows was reached. 0x03: end of Flow detected The Flow was terminated because the Metering Process detected signals indicating the end of the Flow, for example, the TCP FIN flag. 0x04: forced end The Flow was terminated because of some external event, for example, a shutdown of the Metering Process initiated by a network management application. 0x05: lack of resources The Flow was terminated because of lack of resources available to the Metering Process and/or the Exporting Process. commonPropertiesId unsigned64 scope identifier 137 option current An identifier of a set of common properties that is unique per Observation Domain and Transport Session. Typically, this Information Element is used to link to information reported in separate Data Records. observationPointId unsigned32 scope identifier 138 option current An identifier of an Observation Point that is unique per Observation Domain. It is RECOMMENDED that this identifier is also unique per IPFIX Device. Typically, this Information Element is used for limiting the scope of other Information Elements. icmpTypeCodeIPv6 unsigned16 transportHeader identifier 139 all current Type and Code of the IPv6 ICMP message. The combination of both values is reported as (ICMP type * 256) + ICMP code. See for the definition of the IPv6 ICMP type and code fields. mplsTopLabelIPv6Address ipv6Address derived identifier 140 data current The IPv6 address of the system that the MPLS top label will cause this Flow to be forwarded to. See for the association between MPLS labels and IP addresses. lineCardId unsigned32 scope identifier 141 option current An identifier of a line card that is unique per IPFIX Device hosting an Observation Point. Typically, this Information Element is used for limiting the scope of other Information Elements. portId unsigned32 scope identifier 142 option current An identifier of a line port that is unique per IPFIX Device hosting an Observation Point. Typically, this Information Element is used for limiting the scope of other Information Elements. meteringProcessId unsigned32 scope identifier 143 option current An identifier of a Metering Process that is unique per IPFIX Device. Typically, this Information Element is used for limiting the scope of other Information Elements. Note that process identifiers are typically assigned dynamically. The Metering Process may be re-started with a different ID. exportingProcessId unsigned32 scope identifier 144 option current An identifier of an Exporting Process that is unique per IPFIX Device. Typically, this Information Element is used for limiting the scope of other Information Elements. Note that process identifiers are typically assigned dynamically. The Exporting Process may be re-started with a different ID. templateId unsigned16 scope identifier 145 option current An identifier of a Template that is locally unique within a combination of a Transport session and an Observation Domain. Template IDs 0-255 are reserved for Template Sets, Options Template Sets, and other reserved Sets yet to be created. Template IDs of Data Sets are numbered from 256 to 65535. Typically, this Information Element is used for limiting the scope of other Information Elements. Note that after a re-start of the Exporting Process Template identifiers may be re-assigned. wlanChannelId unsigned8 subIpHeader identifier 146 data current The identifier of the 802.11 (Wi-Fi) channel used. See IEEE.802-11.1999. wlanSSID string subIpHeader 147 data current The Service Set IDentifier (SSID) identifying an 802.11 (Wi-Fi) network used. According to IEEE.802-11.1999, the SSID is encoded into a string of up to 32 characters. See IEEE.802-11.1999. flowId unsigned64 scope identifier 148 option current An identifier of a Flow that is unique within an Observation Domain. This Information Element can be used to distinguish between different Flows if Flow Keys such as IP addresses and port numbers are not reported or are reported in separate records. observationDomainId unsigned32 scope identifier 149 option current An identifier of an Observation Domain that is locally unique to an Exporting Process. The Exporting Process uses the Observation Domain ID to uniquely identify to the Collecting Process the Observation Domain where Flows were metered. It is RECOMMENDED that this identifier is also unique per IPFIX Device. A value of 0 indicates that no specific Observation Domain is identified by this Information Element. Typically, this Information Element is used for limiting the scope of other Information Elements. flowStartSeconds dateTimeSeconds timestamp 150 data current The absolute timestamp of the first packet of this Flow. seconds flowEndSeconds dateTimeSeconds timestamp 151 data current The absolute timestamp of the last packet of this Flow. seconds flowStartMilliseconds dateTimeMilliseconds timestamp 152 data current The absolute timestamp of the first packet of this Flow. milliseconds flowEndMilliseconds dateTimeMilliseconds timestamp 153 data current The absolute timestamp of the last packet of this Flow. milliseconds flowStartMicroseconds dateTimeMicroseconds timestamp 154 data current The absolute timestamp of the first packet of this Flow. microseconds flowEndMicroseconds dateTimeMicroseconds timestamp 155 data current The absolute timestamp of the last packet of this Flow. microseconds flowStartNanoseconds dateTimeNanoseconds timestamp 156 data current The absolute timestamp of the first packet of this Flow. nanoseconds flowEndNanoseconds dateTimeNanoseconds timestamp 157 data current The absolute timestamp of the last packet of this Flow. nanoseconds flowStartDeltaMicroseconds unsigned32 timestamp 158 data current This is a relative timestamp only valid within the scope of a single IPFIX Message. It contains the negative time offset of the first observed packet of this Flow relative to the export time specified in the IPFIX Message Header. microseconds See the IPFIX protocol specification for the definition of the IPFIX Message Header. flowEndDeltaMicroseconds unsigned32 timestamp 159 data current This is a relative timestamp only valid within the scope of a single IPFIX Message. It contains the negative time offset of the last observed packet of this Flow relative to the export time specified in the IPFIX Message Header. microseconds See the IPFIX protocol specification for the definition of the IPFIX Message Header. systemInitTimeMilliseconds dateTimeMilliseconds timestamp 160 data current The absolute timestamp of the last (re-)initialization of the IPFIX Device. milliseconds flowDurationMilliseconds unsigned32 misc 161 data current The difference in time between the first observed packet of this Flow and the last observed packet of this Flow. milliseconds flowDurationMicroseconds unsigned32 misc 162 data current The difference in time between the first observed packet of this Flow and the last observed packet of this Flow. microseconds observedFlowTotalCount unsigned64 processCounter totalCounter 163 data current The total number of Flows observed in the Observation Domain since the Metering Process (re-)initialization for this Observation Point. flows ignoredPacketTotalCount unsigned64 processCounter totalCounter 164 data current The total number of observed IP packets that the Metering Process did not process since the (re-)initialization of the Metering Process. packets ignoredOctetTotalCount unsigned64 processCounter totalCounter 165 data current The total number of octets in observed IP packets (including the IP header) that the Metering Process did not process since the (re-)initialization of the Metering Process. octets notSentFlowTotalCount unsigned64 processCounter totalCounter 166 data current The total number of Flow Records that were generated by the Metering Process and dropped by the Metering Process or by the Exporting Process instead of being sent to the Collecting Process. There are several potential reasons for this including resource shortage and special Flow export policies. flows notSentPacketTotalCount unsigned64 processCounter totalCounter 167 data current The total number of packets in Flow Records that were generated by the Metering Process and dropped by the Metering Process or by the Exporting Process instead of being sent to the Collecting Process. There are several potential reasons for this including resource shortage and special Flow export policies. packets notSentOctetTotalCount unsigned64 processCounter totalCounter 168 data current The total number of octets in packets in Flow Records that were generated by the Metering Process and dropped by the Metering Process or by the Exporting Process instead of being sent to the Collecting Process. There are several potential reasons for this including resource shortage and special Flow export policies. octets destinationIPv6Prefix ipv6Address ipHeader 169 data current IPv6 destination address prefix. sourceIPv6Prefix ipv6Address ipHeader 170 data current IPv6 source address prefix. postOctetTotalCount unsigned64 flowCounter totalCounter 171 all current The definition of this Information Element is identical to the definition of Information Element 'octetTotalCount', except that it reports a potentially modified value caused by a middlebox function after the packet passed the Observation Point. octets postPacketTotalCount unsigned64 flowCounter totalCounter 172 all current The definition of this Information Element is identical to the definition of Information Element 'packetTotalCount', except that it reports a potentially modified value caused by a middlebox function after the packet passed the Observation Point. packets flowKeyIndicator unsigned64 config flags 173 all current This set of bit fields is used for marking the Information Elements of a Data Record that serve as Flow Key. Each bit represents an Information Element in the Data Record with the n-th bit representing the n-th Information Element. A bit set to value 1 indicates that the corresponding Information Element is a Flow Key of the reported Flow. A bit set to value 0 indicates that this is not the case. If the Data Record contains more than 64 Information Elements, the corresponding Template SHOULD be designed such that all Flow Keys are among the first 64 Information Elements, because the flowKeyIndicator only contains 64 bits. If the Data Record contains less than 64 Information Elements, then the bits in the flowKeyIndicator for which no corresponding Information Element exists MUST have the value 0. postMCastPacketTotalCount unsigned64 flowCounter totalCounter 174 data current The total number of outgoing multicast packets sent for packets of this Flow by a multicast daemon within the Observation Domain since the Metering Process (re-)initialization. This property cannot necessarily be observed at the Observation Point, but may be retrieved by other means. packets postMCastOctetTotalCount unsigned64 flowCounter totalCounter 175 data current The total number of octets in outgoing multicast packets sent for packets of this Flow by a multicast daemon in the Observation Domain since the Metering Process (re-)initialization. This property cannot necessarily be observed at the Observation Point, but may be retrieved by other means. The number of octets includes IP header(s) and IP payload. octets icmpTypeIPv4 unsigned8 transportHeader identifier 176 all current Type of the IPv4 ICMP message. See for the definition of the IPv4 ICMP type field. icmpCodeIPv4 unsigned8 transportHeader identifier 177 all current Code of the IPv4 ICMP message. See for the definition of the IPv4 ICMP code field. icmpTypeIPv6 unsigned8 transportHeader identifier 178 all current Type of the IPv6 ICMP message. See for the definition of the IPv6 ICMP type field. icmpCodeIPv6 unsigned8 transportHeader identifier 179 all current Code of the IPv6 ICMP message. See for the definition of the IPv6 ICMP code field. udpSourcePort unsigned16 transportHeader identifier 180 all current The source port identifier in the UDP header. See for the definition of the UDP source port field. Additional information on defined UDP port numbers can be found at . udpDestinationPort unsigned16 transportHeader identifier 181 all current The destination port identifier in the UDP header. See for the definition of the UDP destination port field. Additional information on defined UDP port numbers can be found at . tcpSourcePort unsigned16 transportHeader identifier 182 all current The source port identifier in the TCP header. See for the definition of the TCP source port field. Additional information on defined TCP port numbers can be found at . tcpDestinationPort unsigned16 transportHeader identifier 183 all current The destination port identifier in the TCP header. See for the definition of the TCP destination port field. Additional information on defined TCP port numbers can be found at . tcpSequenceNumber unsigned32 transportHeader 184 all current The sequence number in the TCP header. See for the definition of the TCP sequence number. tcpAcknowledgementNumber unsigned32 transportHeader 185 all current The acknowledgement number in the TCP header. See for the definition of the TCP acknowledgement number. tcpWindowSize unsigned16 transportHeader 186 all current The window field in the TCP header. If the TCP window scale is supported, then TCP window scale must be known to fully interpret the value of this information. See for the definition of the TCP window field. See for the definition of the TCP window scale. tcpUrgentPointer unsigned16 transportHeader 187 all current The urgent pointer in the TCP header. See for the definition of the TCP urgent pointer. tcpHeaderLength unsigned8 transportHeader 188 all current The length of the TCP header. Note that the value of this Information Element is different from the value of the Data Offset field in the TCP header. The Data Offset field indicates the length of the TCP header in units of 4 octets. This Information Elements specifies the length of the TCP header in units of octets. octets See for the definition of the TCP header. ipHeaderLength unsigned8 ipHeader 189 all current The length of the IP header. For IPv6, the value of this Information Element is 40. octets See for the definition of the IPv4 header. See for the definition of the IPv6 header. totalLengthIPv4 unsigned16 ipHeader 190 all current The total length of the IPv4 packet. octets See for the specification of the IPv4 total length. payloadLengthIPv6 unsigned16 ipHeader 191 all current This Information Element reports the value of the Payload Length field in the IPv6 header. Note that IPv6 extension headers belong to the payload. Also note that in case of a jumbo payload option the value of the Payload Length field in the IPv6 header is zero and so will be the value reported by this Information Element. octets See for the specification of the IPv6 payload length. See for the specification of the IPv6 jumbo payload option. ipTTL unsigned8 ipHeader 192 all current For IPv4, the value of the Information Element matches the value of the Time to Live (TTL) field in the IPv4 packet header. For IPv6, the value of the Information Element matches the value of the Hop Limit field in the IPv6 packet header. hops See for the definition of the IPv4 Time to Live field. See for the definition of the IPv6 Hop Limit field. nextHeaderIPv6 unsigned8 ipHeader 193 all current The value of the Next Header field of the IPv6 header. The value identifies the type of the following IPv6 extension header or of the following IP payload. Valid values are defined in the IANA Protocol Numbers registry. See for the definition of the IPv6 Next Header field. See the list of protocol numbers assigned by IANA at . mplsPayloadLength unsigned32 subIpHeader 194 all current The size of the MPLS packet without the label stack. octets See for the specification of MPLS packets. See for the specification of the MPLS label stack. ipDiffServCodePoint unsigned8 ipHeader identifier 195 all current The value of a Differentiated Services Code Point (DSCP) encoded in the Differentiated Services field. The Differentiated Services field spans the most significant 6 bits of the IPv4 TOS field or the IPv6 Traffic Class field, respectively. This Information Element encodes only the 6 bits of the Differentiated Services field. Therefore, its value may range from 0 to 63. 0-63 See for the definition of the Differentiated Services field. See (Section 5.3.2) and for the definition of the IPv4 TOS field. See for the definition of the IPv6 Traffic Class field. ipPrecedence unsigned8 ipHeader identifier 196 all current The value of the IP Precedence. The IP Precedence value is encoded in the first 3 bits of the IPv4 TOS field or the IPv6 Traffic Class field, respectively. This Information Element encodes only these 3 bits. Therefore, its value may range from 0 to 7. 0-7 See (Section 5.3.3) and for the definition of the IP Precedence. See (Section 5.3.2) and for the definition of the IPv4 TOS field. See for the definition of the IPv6 Traffic Class field. fragmentFlags unsigned8 ipHeader flags 197 all current Fragmentation properties indicated by flags in the IPv4 packet header or the IPv6 Fragment header, respectively. Bit 0: (RS) Reserved. The value of this bit MUST be 0 until specified otherwise. Bit 1: (DF) 0 = May Fragment, 1 = Don't Fragment. Corresponds to the value of the DF flag in the IPv4 header. Will always be 0 for IPv6 unless a "don't fragment" feature is introduced to IPv6. Bit 2: (MF) 0 = Last Fragment, 1 = More Fragments. Corresponds to the MF flag in the IPv4 header or to the M flag in the IPv6 Fragment header, respectively. The value is 0 for IPv6 if there is no fragment header. Bits 3-7: (DC) Don't Care. The values of these bits are irrelevant. 0 1 2 3 4 5 6 7 +---+---+---+---+---+---+---+---+ | R | D | M | D | D | D | D | D | | S | F | F | C | C | C | C | C | +---+---+---+---+---+---+---+---+ See for the specification of the IPv4 fragment flags. See for the specification of the IPv6 Fragment header. octetDeltaSumOfSquares unsigned64 flowCounter 198 data current The sum of the squared numbers of octets per incoming packet since the previous report (if any) for this Flow at the Observation Point. The number of octets includes IP header(s) and IP payload. octetTotalSumOfSquares unsigned64 flowCounter 199 all current The total sum of the squared numbers of octets in incoming packets for this Flow at the Observation Point since the Metering Process (re-)initialization for this Observation Point. The number of octets includes IP header(s) and IP payload. octets mplsTopLabelTTL unsigned8 subIpHeader 200 all current The TTL field from the top MPLS label stack entry, i.e., the last label that was pushed. hops See for the specification of the TTL field. mplsLabelStackLength unsigned32 subIpHeader 201 all current The length of the MPLS label stack in units of octets. octets See for the specification of the MPLS label stack. mplsLabelStackDepth unsigned32 subIpHeader 202 all current The number of labels in the MPLS label stack. label stack entries See for the specification of the MPLS label stack. mplsTopLabelExp unsigned8 subIpHeader flags 203 all current The Exp field from the top MPLS label stack entry, i.e., the last label that was pushed. Bits 0-4: Don't Care, value is irrelevant. Bits 5-7: MPLS Exp field. 0 1 2 3 4 5 6 7 +---+---+---+---+---+---+---+---+ | don't care | Exp | +---+---+---+---+---+---+---+---+ See for the specification of the Exp field. See for usage of the Exp field. ipPayloadLength unsigned32 derived 204 all current The effective length of the IP payload. For IPv4 packets, the value of this Information Element is the difference between the total length of the IPv4 packet (as reported by Information Element totalLengthIPv4) and the length of the IPv4 header (as reported by Information Element headerLengthIPv4). For IPv6, the value of the Payload Length field in the IPv6 header is reported except in the case that the value of this field is zero and that there is a valid jumbo payload option. In this case, the value of the Jumbo Payload Length field in the jumbo payload option is reported. octets See for the specification of IPv4 packets. See for the specification of the IPv6 payload length. See for the specification of the IPv6 jumbo payload length. udpMessageLength unsigned16 transportHeader 205 all current The value of the Length field in the UDP header. octets See for the specification of the UDP header. isMulticast unsigned8 ipHeader flags 206 data current If the IP destination address is not a reserved multicast address, then the value of all bits of the octet (including the reserved ones) is zero. The first bit of this octet is set to 1 if the Version field of the IP header has the value 4 and if the Destination Address field contains a reserved multicast address in the range from 224.0.0.0 to 239.255.255.255. Otherwise, this bit is set to 0. The second and third bits of this octet are reserved for future use. The remaining bits of the octet are only set to values other than zero if the IP Destination Address is a reserved IPv6 multicast address. Then the fourth bit of the octet is set to the value of the T flag in the IPv6 multicast address and the remaining four bits are set to the value of the scope field in the IPv6 multicast address. 0 1 2 3 4 5 6 7 +------+------+------+------+------+------+------+------+ | IPv6 multicast scope | T | RES. | RES. | MCv4 | +------+------+------+------+------+------+------+------+ Bits 0-3: set to value of multicast scope if IPv6 multicast Bit 4: set to value of T flag, if IPv6 multicast Bits 5-6: reserved for future use Bit 7: set to 1 if IPv4 multicast See for the specification of reserved IPv4 multicast addresses. See for the specification of reserved IPv6 multicast addresses and the definition of the T flag and the IPv6 multicast scope. The diagram provided in is incorrect. The diagram in this registry is taken from Errata 1736. See ipv4IHL unsigned8 ipHeader 207 all current The value of the Internet Header Length (IHL) field in the IPv4 header. It specifies the length of the header in units of 4 octets. Please note that its unit is different from most of the other Information Elements reporting length values. 4 octets See for the specification of the IPv4 header. ipv4Options unsigned32 minMax flags 208 all current IPv4 options in packets of this Flow. The information is encoded in a set of bit fields. For each valid IPv4 option type, there is a bit in this set. The bit is set to 1 if any observed packet of this Flow contains the corresponding IPv4 option type. Otherwise, if no observed packet of this Flow contained the respective IPv4 option type, the value of the corresponding bit is 0. The list of valid IPv4 options is maintained by IANA. Note that for identifying an option not just the 5-bit Option Number, but all 8 bits of the Option Type need to match one of the IPv4 options specified at http://www.iana.org/assignments/ip-parameters. Options are mapped to bits according to their option numbers. Option number X is mapped to bit X. The mapping is illustrated by the figure below. 0 1 2 3 4 5 6 7 +------+------+------+------+------+------+------+------+ ... | RR |CIPSO |E-SEC | TS | LSR | SEC | NOP | EOOL | +------+------+------+------+------+------+------+------+ 8 9 10 11 12 13 14 15 +------+------+------+------+------+------+------+------+ ... |ENCODE| VISA | FINN | MTUR | MTUP | ZSU | SSR | SID | ... +------+------+------+------+------+------+------+------+ 16 17 18 19 20 21 22 23 +------+------+------+------+------+------+------+------+ ... | DPS |NSAPA | SDB |RTRALT|ADDEXT| TR | EIP |IMITD | ... +------+------+------+------+------+------+------+------+ 24 25 26 27 28 29 30 31 +------+------+------+------+------+------+------+------+ | | EXP | to be assigned by IANA | QS | UMP | ... +------+------+------+------+------+------+------+------+ Type Option Bit Value Name Reference ---+-----+-------+------------------------------------ 0 7 RR Record Route, RFC 791 1 134 CIPSO Commercial Security 2 133 E-SEC Extended Security, RFC 1108 3 68 TS Time Stamp, RFC 791 4 131 LSR Loose Source Route, RFC791 5 130 SEC Security, RFC 1108 6 1 NOP No Operation, RFC 791 7 0 EOOL End of Options List, RFC 791 8 15 ENCODE 9 142 VISA Experimental Access Control 10 205 FINN Experimental Flow Control 11 12 MTUR (obsoleted) MTU Reply, RFC 1191 12 11 MTUP (obsoleted) MTU Probe, RFC 1191 13 10 ZSU Experimental Measurement 14 137 SSR Strict Source Route, RFC 791 15 136 SID Stream ID, RFC 791 16 151 DPS Dynamic Packet State 17 150 NSAPA NSAP Address 18 149 SDB Selective Directed Broadcast 19 147 ADDEXT Address Extension 20 148 RTRALT Router Alert, RFC 2113 21 82 TR Traceroute, RFC 3193 22 145 EIP Extended Internet Protocol, RFC 1385 23 144 IMITD IMI Traffic Descriptor 25 30 EXP RFC3692-style Experiment 25 94 EXP RFC3692-style Experiment 25 158 EXP RFC3692-style Experiment 25 222 EXP RFC3692-style Experiment 30 25 QS Quick-Start 31 152 UMP Upstream Multicast Pkt. ... ... ... Further options numbers may be assigned by IANA See for the definition of IPv4 options. See the list of IPv4 option numbers assigned by IANA at . The diagram provided in is incorrect. The diagram in this registry is taken from Errata 1737. See tcpOptions unsigned64 minMax flags 209 all current TCP options in packets of this Flow. The information is encoded in a set of bit fields. For each TCP option, there is a bit in this set. The bit is set to 1 if any observed packet of this Flow contains the corresponding TCP option. Otherwise, if no observed packet of this Flow contained the respective TCP option, the value of the corresponding bit is 0. Options are mapped to bits according to their option numbers. Option number X is mapped to bit X. TCP option numbers are maintained by IANA. 0 1 2 3 4 5 6 7 +-----+-----+-----+-----+-----+-----+-----+-----+ | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | ... +-----+-----+-----+-----+-----+-----+-----+-----+ 8 9 10 11 12 13 14 15 +-----+-----+-----+-----+-----+-----+-----+-----+ ... | 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |... +-----+-----+-----+-----+-----+-----+-----+-----+ 16 17 18 19 20 21 22 23 +-----+-----+-----+-----+-----+-----+-----+-----+ ... | 23 | 22 | 21 | 20 | 19 | 18 | 17 | 16 |... +-----+-----+-----+-----+-----+-----+-----+-----+ . . . 56 57 58 59 60 61 62 63 +-----+-----+-----+-----+-----+-----+-----+-----+ ... | 63 | 62 | 61 | 60 | 59 | 58 | 57 | 56 | +-----+-----+-----+-----+-----+-----+-----+-----+ See for the definition of TCP options. See the list of TCP option numbers assigned by IANA at . The diagram provided in is incorrect. The diagram in this registry is taken from Errata 1739. See paddingOctets octetArray padding 210 option current The value of this Information Element is always a sequence of 0x00 values. collectorIPv4Address ipv4Address config identifier 211 all current An IPv4 address to which the Exporting Process sends Flow information. collectorIPv6Address ipv6Address config identifier 212 all current An IPv6 address to which the Exporting Process sends Flow information. exportInterface unsigned32 config identifier 213 all current The index of the interface from which IPFIX Messages sent by the Exporting Process to a Collector leave the IPFIX Device. The value matches the value of managed object 'ifIndex' as defined in RFC 2863. Note that ifIndex values are not assigned statically to an interface and that the interfaces may be renumbered every time the device's management system is re-initialized, as specified in RFC 2863. See for the definition of the ifIndex object. exportProtocolVersion unsigned8 config identifier 214 all current The protocol version used by the Exporting Process for sending Flow information. The protocol version is given by the value of the Version Number field in the Message Header. The protocol version is 10 for IPFIX and 9 for NetFlow version 9. A value of 0 indicates that no export protocol is in use. See the IPFIX protocol specification for the definition of the IPFIX Message Header. See for the definition of the NetFlow version 9 message header. exportTransportProtocol unsigned8 config identifier 215 all current The value of the protocol number used by the Exporting Process for sending Flow information. The protocol number identifies the IP packet payload type. Protocol numbers are defined in the IANA Protocol Numbers registry. In Internet Protocol version 4 (IPv4), this is carried in the Protocol field. In Internet Protocol version 6 (IPv6), this is carried in the Next Header field in the last extension header of the packet. See for the specification of the IPv4 protocol field. See for the specification of the IPv6 protocol field. See the list of protocol numbers assigned by IANA at . collectorTransportPort unsigned16 config identifier 216 all current The destination port identifier to which the Exporting Process sends Flow information. For the transport protocols UDP, TCP, and SCTP, this is the destination port number. This field MAY also be used for future transport protocols that have 16-bit source port identifiers. See for the definition of the UDP destination port field. See for the definition of the TCP destination port field. See for the definition of SCTP. Additional information on defined UDP and TCP port numbers can be found at . exporterTransportPort unsigned16 config identifier 217 all current The source port identifier from which the Exporting Process sends Flow information. For the transport protocols UDP, TCP, and SCTP, this is the source port number. This field MAY also be used for future transport protocols that have 16-bit source port identifiers. This field may be useful for distinguishing multiple Exporting Processes that use the same IP address. See for the definition of the UDP source port field. See for the definition of the TCP source port field. See for the definition of SCTP. Additional information on defined UDP and TCP port numbers can be found at . tcpSynTotalCount unsigned64 flowCounter totalCounter 218 data current The total number of packets of this Flow with TCP "Synchronize sequence numbers" (SYN) flag set. packets See for the definition of the TCP SYN flag. tcpFinTotalCount unsigned64 flowCounter totalCounter 219 data current The total number of packets of this Flow with TCP "No more data from sender" (FIN) flag set. packets See for the definition of the TCP FIN flag. tcpRstTotalCount unsigned64 flowCounter totalCounter 220 data current The total number of packets of this Flow with TCP "Reset the connection" (RST) flag set. packets See for the definition of the TCP RST flag. tcpPshTotalCount unsigned64 flowCounter totalCounter 221 data current The total number of packets of this Flow with TCP "Push Function" (PSH) flag set. packets See for the definition of the TCP PSH flag. tcpAckTotalCount unsigned64 flowCounter totalCounter 222 data current The total number of packets of this Flow with TCP "Acknowledgment field significant" (ACK) flag set. packets See for the definition of the TCP ACK flag. tcpUrgTotalCount unsigned64 flowCounter totalCounter 223 data current The total number of packets of this Flow with TCP "Urgent Pointer field significant" (URG) flag set. packets See for the definition of the TCP URG flag. ipTotalLength unsigned64 ipHeader 224 all current The total length of the IP packet. octets See for the specification of the IPv4 total length. See for the specification of the IPv6 payload length. See for the specification of the IPv6 jumbo payload length. postNATSourceIPv4Address ipv4Address identifier 225 current The definition of this Information Element is identical to the definition of Information Element 'sourceIPv4Address', except that it reports a modified value caused by a NAT middlebox function after the packet passed the Observation Point. See for the definition of the IPv4 source address field. See for the definition of NAT. See for the definition of middleboxes. postNATDestinationIPv4Address ipv4Address identifier 226 current The definition of this Information Element is identical to the definition of Information Element 'destinationIPv4Address', except that it reports a modified value caused by a NAT middlebox function after the packet passed the Observation Point. See for the definition of the IPv4 destination address field. See for the definition of NAT. See for the definition of middleboxes. postNAPTSourceTransportPort unsigned16 identifier 227 current The definition of this Information Element is identical to the definition of Information Element 'sourceTransportPort', except that it reports a modified value caused by a Network Address Port Translation (NAPT) middlebox function after the packet passed the Observation Point. See for the definition of the UDP source port field. See for the definition of the TCP source port field. See for the definition of SCTP. See for the definition of NAPT. See for the definition of middleboxes. Additional information on defined UDP and TCP port numbers can be found at http://www.iana.org/assignments/port-numbers. postNAPTDestinationTransportPort unsigned16 identifier 228 current The definition of this Information Element is identical to the definition of Information Element 'destinationTransportPort', except that it reports a modified value caused by a Network Address Port Translation (NAPT) middlebox function after the packet passed the Observation Point. See for the definition of the UDP source port field. See for the definition of the TCP source port field. See for the definition of SCTP. See for the definition of NAPT. See for the definition of middleboxes. Additional information on defined UDP and TCP port numbers can be found at http://www.iana.org/assignments/port-numbers. natOriginatingAddressRealm unsigned8 flags 229 current Indicates whether the session was created because traffic originated in the private or public address realm. postNATSourceIPv4Address, postNATDestinationIPv4Address, postNAPTSourceTransportPort, and postNAPTDestinationTransportPort are qualified with the address realm in perspective. The allowed values are: Private: 1 Public: 2 See for the definition of NAT. natEvent unsigned8 230 current Indicates a NAT event. The allowed values are: 1 - Create event. 2 - Delete event. 3 - Pool exhausted. A Create event is generated when a NAT translation is created, whether dynamically or statically. A Delete event is generated when a NAT translation is deleted. See for the definition of NAT. initiatorOctets unsigned64 231 current The total number of layer 4 payload bytes in a flow from the initiator. The initiator is the device which triggered the session creation, and remains the same for the life of the session. octets See #298, initiatorPackets. responderOctets unsigned64 232 current The total number of layer 4 payload bytes in a flow from the responder. The responder is the device which replies to the initiator, and remains the same for the life of the session. octets See #299, responderPackets. firewallEvent unsigned8 233 current Indicates a firewall event. The allowed values are: 0 - Ignore (invalid) 1 - Flow Created 2 - Flow Deleted 3 - Flow Denied 4 - Flow Alert 5 - Flow Update ingressVRFID unsigned32 234 current An unique identifier of the VRFname where the packets of this flow are being received. This identifier is unique per Metering Process egressVRFID unsigned32 235 current An unique identifier of the VRFname where the packets of this flow are being sent. This identifier is unique per Metering Process VRFname string 236 current The name of a VPN Routing and Forwarding table (VRF). See for the definition of VRF. postMplsTopLabelExp unsigned8 subIpHeader flags 237 all current The definition of this Information Element is identical to the definition of Information Element 'mplsTopLabelExp', except that it reports a potentially modified value caused by a middlebox function after the packet passed the Observation Point. See for the specification of the Exp field. See for usage of the Exp field. tcpWindowScale unsigned16 transportHeader 238 all current The scale of the window field in the TCP header. See for the definition of the TCP window scale. biflowDirection unsigned8 misc identifier 239 all current A description of the direction assignment method used to assign the Biflow Source and Destination. This Information Element MAY be present in a Flow Data Record, or applied to all flows exported from an Exporting Process or Observation Domain using IPFIX Options. If this Information Element is not present in a Flow Record or associated with a Biflow via scope, it is assumed that the configuration of the direction assignment method is done out-of-band. Note that when using IPFIX Options to apply this Information Element to all flows within an Observation Domain or from an Exporting Process, the Option SHOULD be sent reliably. If reliable transport is not available (i.e., when using UDP), this Information Element SHOULD appear in each Flow Record. This field may take the following values: +-------+------------------+----------------------------------------+ | Value | Name | Description | +-------+------------------+----------------------------------------+ | 0x00 | arbitrary | Direction was assigned arbitrarily. | | 0x01 | initiator | The Biflow Source is the flow | | | | initiator, as determined by the | | | | Metering Process' best effort to | | | | detect the initiator. | | 0x02 | reverseInitiator | The Biflow Destination is the flow | | | | initiator, as determined by the | | | | Metering Process' best effort to | | | | detect the initiator. This value is | | | | provided for the convenience of | | | | Exporting Processes to revise an | | | | initiator estimate without re-encoding | | | | the Biflow Record. | | 0x03 | perimeter | The Biflow Source is the endpoint | | | | outside of a defined perimeter. The | | | | perimeter's definition is implicit in | | | | the set of Biflow Source and Biflow | | | | Destination addresses exported in the | | | | Biflow Records. | +-------+------------------+----------------------------------------+ ethernetHeaderLength unsigned8 identifier 240 current The difference between the length of an Ethernet frame (minus the FCS) and the length of its MAC Client Data section (including any padding) as defined in section 3.1 of [IEEE.802-3.2005]. It does not include the Preamble, SFD and Extension field lengths. octets [IEEE.802-3.2005] ethernetPayloadLength unsigned16 identifier 241 current The length of the MAC Client Data section (including any padding) of a frame as defined in section 3.1 of [IEEE.802-3.2005]. octets [IEEE.802-3.2005] ethernetTotalLength unsigned16 identifier 242 current The total length of the Ethernet frame (excluding the Preamble, SFD, Extension and FCS fields) as described in section 3.1 of [IEEE.802-3.2005]. octets [IEEE.802-3.2005] dot1qVlanId unsigned16 identifier 243 current The value of the 12-bit VLAN Identifier portion of the Tag Control Information field of an Ethernet frame as described in section 3.5.5 of [IEEE.802-3.2005]. The structure and semantics within the Tag Control Information field are defined in IEEE P802.1Q. In case of a QinQ frame, it represents the outer tag's VLAN identifier and in case of an IEEE 802.1ad frame it represents the Service VLAN identifier in the S-TAG Tag Control Information (TCI) field as described in [IEEE.802-1ad.2005]. octets [IEEE.802-3.2005] dot1qPriority unsigned8 identifier 244 current The value of the 3-bit User Priority portion of the Tag Control Information field of an Ethernet frame as described in section 3.5.5 of [IEEE.802-3.2005]. The structure and semantics within the Tag Control Information field are defined in IEEE P802.1Q. In case of a QinQ frame, it represents the outer tag's 3-bit Class of Service (CoS) identifier and in case of an IEEE 802.1ad frame it represents the 3-bit Priority Code Point (PCP) portion of the S-TAG Tag Control Information (TCI) field as described in [IEEE.802-1ad.2005]. [IEEE.802-3.2005] [IEEE.802-1ad.2005] dot1qCustomerVlanId unsigned16 identifier 245 current In case of a QinQ frame, it represents the inner tag's (*) VLAN identifier and in case of an IEEE 802.1ad frame it represents the Customer VLAN identifier in the C-TAG Tag Control Information (TCI) field as described in [IEEE.802-1ad.2005]. (*) Note: the 801.2Q tag directly following the outer one. [IEEE.802-1ad.2005] [IEEE.802-1Q.2003] dot1qCustomerPriority unsigned8 identifier 246 current In case of a QinQ frame, it represents the inner tag's (*) Class of Service (CoS) identifier and in case of an IEEE 802.1ad frame it represents the 3-bit Priority Code Point (PCP) portion of the C-TAG Tag Control Information (TCI) field as described in [IEEE.802-1ad.2005]. (*) Note: the 801.2Q tag directly following the outer one. [IEEE.802-1ad.2005] [IEEE.802-1Q.2003] metroEvcId string 247 current The EVC Service Attribute which uniquely identifies the Ethernet Virtual Connection (EVC) within a Metro Ethernet Network, as defined in section 6.2 of MEF 10.1. The MetroEVCID is encoded in a string of up to 100 characters. MEF 10.1 (Ethernet Services Attributes Phase 2) MEF16 (Ethernet Local Management Interface) metroEvcType unsigned8 identifier 248 current The 3-bit EVC Service Attribute which identifies the type of service provided by an EVC. MEF 10.1 (Ethernet Services Attributes Phase 2) MEF16 (Ethernet Local Management Interface) pseudoWireId unsigned32 identifier 249 current A 32-bit non-zero connection identifier, which together with the pseudoWireType, identifies the Pseudo Wire (PW) as defined in RFC 4447 [RFC4447]. See for pseudowire definitions. pseudoWireType unsigned16 identifier 250 current The value of this information element identifies the type of MPLS Pseudo Wire (PW) as defined in RFC 4446. See for the pseudowire type definition, and http://www.iana.org/assignments/pwe3-parameters for the IANA Pseudowire Types Registry. pseudoWireControlWord unsigned32 identifier 251 current The 32-bit Preferred Pseudo Wire (PW) MPLS Control Word as defined in Section 3 of . See for the Pseudo Wire Control Word definition. ingressPhysicalInterface unsigned32 identifier 252 current The index of a networking device's physical interface (example, a switch port) where packets of this flow are being received. See for the definition of the ifIndex object. egressPhysicalInterface unsigned32 identifier 253 current The index of a networking device's physical interface (example, a switch port) where packets of this flow are being sent. See for the definition of the ifIndex object. postDot1qVlanId unsigned16 identifier 254 current The definition of this Information Element is identical to the definition of Information Element 'dot1qVlanId', except that it reports a potentially modified value caused by a middlebox function after the packet passed the Observation Point. [IEEE.802-3.2005] [IEEE.802-1ad.2005] postDot1qCustomerVlanId unsigned16 identifier 255 current The definition of this Information Element is identical to the definition of Information Element 'dot1qCustomerVlanId', except that it reports a potentially modified value caused by a middlebox function after the packet passed the Observation Point. [IEEE.802-1ad.2005] [IEEE.802-1Q.2003] ethernetType unsigned16 identifier 256 current The Ethernet type field of an Ethernet frame that identifies the MAC client protocol carried in the payload as defined in paragraph 1.4.349 of [IEEE.802-3.2005]. [IEEE.802-3.2005] Ethertype registry available at http://standards.ieee.org/regauth/ethertype/eth.txt postIpPrecedence unsigned8 identifier 257 current The definition of this Information Element is identical to the definition of Information Element 'ipPrecedence', except that it reports a potentially modified value caused by a middlebox function after the packet passed the Observation Point. 0-7 See (Section 5.3.3) and for the definition of the IP Precedence. See (Section 5.3.2) and for the definition of the IPv4 TOS field. See for the definition of the IPv6 Traffic Class field. collectionTimeMilliseconds dateTimeMilliseconds 258 current The absolute timestamp at which the data within the scope containing this Information Element was received by a Collecting Process. This Information Element SHOULD be bound to its containing IPFIX Message via IPFIX Options and the messageScope Information Element, as defined below. exportSctpStreamId unsigned16 identifier 259 current The value of the SCTP Stream Identifier used by the Exporting Process for exporting IPFIX Message data. This is carried in the Stream Identifier field of the header of the SCTP DATA chunk containing the IPFIX Message(s). maxExportSeconds dateTimeSeconds 260 current The absolute Export Time of the latest IPFIX Message within the scope containing this Information Element. This Information Element SHOULD be bound to its containing IPFIX Transport Session via IPFIX Options and the sessionScope Information Element. seconds maxFlowEndSeconds dateTimeSeconds 261 current The latest absolute timestamp of the last packet within any Flow within the scope containing this Information Element, rounded up to the second if necessary. This Information Element SHOULD be bound to its containing IPFIX Transport Session via IPFIX Options and the sessionScope Information Element. seconds messageMD5Checksum octetArray 262 current The MD5 checksum of the IPFIX Message containing this record. This Information Element SHOULD be bound to its containing IPFIX Message via an options record and the messageScope Information Element, as defined below, and SHOULD appear only once in a given IPFIX Message. To calculate the value of this Information Element, first buffer the containing IPFIX Message, setting the value of this Information Element to all zeroes. Then calculate the MD5 checksum of the resulting buffer as defined in [RFC1321], place the resulting value in this Information Element, and export the buffered message. This Information Element is intended as a simple checksum only; therefore collision resistance and algorithm agility are not required, and MD5 is an appropriate message digest. This Information Element has a fixed length of 16 octets. messageScope unsigned8 263 current The presence of this Information Element as scope in an Options Template signifies that the options described by the Template apply to the IPFIX Message that contains them. It is defined for general purpose message scoping of options, and proposed specifically to allow the attachment a checksum to a message via IPFIX Options. The value of this Information Element MUST be written as 0 by the File Writer or Exporting Process. The value of this Information Element MUST be ignored by the File Reader or the Collecting Process. minExportSeconds dateTimeSeconds 264 current The absolute Export Time of the earliest IPFIX Message within the scope containing this Information Element. This Information Element SHOULD be bound to its containing IPFIX Transport Session via an options record and the sessionScope Information Element. seconds minFlowStartSeconds dateTimeSeconds 265 current The earliest absolute timestamp of the first packet within any Flow within the scope containing this Information Element, rounded down to the second if necessary. This Information Element SHOULD be bound to its containing IPFIX Transport Session via an options record and the sessionScope Information Element. seconds opaqueOctets octetArray 266 current This Information Element is used to encapsulate non- IPFIX data into an IPFIX Message stream, for the purpose of allowing a non-IPFIX data processor to store a data stream inline within an IPFIX File. A Collecting Process or File Writer MUST NOT try to interpret this binary data. This Information Element differs from paddingOctets as its contents are meaningful in some non-IPFIX context, while the contents of paddingOctets MUST be 0x00 and are intended only for Information Element alignment. sessionScope unsigned8 267 current The presence of this Information Element as scope in an Options Template signifies that the options described by the Template apply to the IPFIX Transport Session that contains them. Note that as all options are implicitly scoped to Transport Session and Observation Domain, this Information Element is equivalent to a "null" scope. It is defined for general purpose session scoping of options, and proposed specifically to allow the attachment of time window to an IPFIX File via IPFIX Options. The value of this Information Element MUST be written as 0 by the File Writer or Exporting Process. The value of this Information Element MUST be ignored by the File Reader or the Collecting Process. maxFlowEndMicroseconds dateTimeMicroseconds 268 current The latest absolute timestamp of the last packet within any Flow within the scope containing this Information Element, rounded up to the microsecond if necessary. This Information Element SHOULD be bound to its containing IPFIX Transport Session via IPFIX Options and the sessionScope Information Element. This Information Element SHOULD be used only in Transport Sessions containing Flow Records with microsecond- precision (or better) timestamp Information Elements. microseconds maxFlowEndMilliseconds dateTimeMilliseconds 269 current The latest absolute timestamp of the last packet within any Flow within the scope containing this Information Element, rounded up to the millisecond if necessary. This Information Element SHOULD be bound to its containing IPFIX Transport Session via IPFIX Options and the sessionScope Information Element. This Information Element SHOULD be used only in Transport Sessions containing Flow Records with millisecond- precision (or better) timestamp Information Elements. milliseconds maxFlowEndNanoseconds dateTimeNanoseconds 270 current The latest absolute timestamp of the last packet within any Flow within the scope containing this Information Element. This Information Element SHOULD be bound to its containing IPFIX Transport Session via IPFIX Options and the sessionScope Information Element. This Information Element SHOULD be used only in Transport Sessions containing Flow Records with nanosecond-precision timestamp Information Elements. nanoseconds minFlowStartMicroseconds dateTimeMicroseconds 271 current The earliest absolute timestamp of the first packet within any Flow within the scope containing this Information Element, rounded down to the microsecond if necessary. This Information Element SHOULD be bound to its containing IPFIX Transport Session via an options record and the sessionScope Information Element. This Information Element SHOULD be used only in Transport Sessions containing Flow Records with microsecond- precision (or better) timestamp Information Elements. microseconds minFlowStartMilliseconds dateTimeMilliseconds 272 current The earliest absolute timestamp of the first packet within any Flow within the scope containing this Information Element, rounded down to the millisecond if necessary. This Information Element SHOULD be bound to its containing IPFIX Transport Session via an options record and the sessionScope Information Element. This Information Element SHOULD be used only in Transport Sessions containing Flow Records with millisecond- precision (or better) timestamp Information Elements. milliseconds minFlowStartNanoseconds dateTimeNanoseconds 273 current The earliest absolute timestamp of the first packet within any Flow within the scope containing this Information Element. This Information Element SHOULD be bound to its containing IPFIX Transport Session via an options record and the sessionScope Information Element. This Information Element SHOULD be used only in Transport Sessions containing Flow Records with nanosecond-precision timestamp Information Elements. nanoseconds collectorCertificate octetArray 274 current The full X.509 certificate, encoded in ASN.1 DER format, used by the Collector when IPFIX Messages were transmitted using TLS or DTLS. This Information Element SHOULD be bound to its containing IPFIX Transport Session via an options record and the sessionScope Information Element, or to its containing IPFIX Message via an options record and the messageScope Information Element. exporterCertificate octetArray 275 current The full X.509 certificate, encoded in ASN.1 DER format, used by the Collector when IPFIX Messages were transmitted using TLS or DTLS. This Information Element SHOULD be bound to its containing IPFIX Transport Session via an options record and the sessionScope Information Element, or to its containing IPFIX Message via an options record and the messageScope Information Element. dataRecordsReliability boolean identifier 276 current The export reliability of Data Records, within this SCTP stream, for the element(s) in the Options Template scope. A typical example of an element for which the export reliability will be reported is the templateID, as specified in the Data Records Reliability Options Template. A value of 'True' means that the Exporting Process MUST send any Data Records associated with the element(s) reliably within this SCTP stream. A value of 'False' means that the Exporting Process MAY send any Data Records associated with the element(s) unreliably within this SCTP stream. observationPointType unsigned8 identifier 277 current Type of observation point. Values assigned to date are: 1. Physical port 2. Port channel 3. Vlan. connectionCountNew unsigned32 deltaCounter 278 current This information element counts the number of TCP or UDP connections which were opened during the observation period. The observation period may be specified by the flow start and end timestamps. connectionSumDuration unsigned64 279 current This information element aggregates the total time in seconds for all of the TCP or UDP connections which were in use during the observation period. For example if there are 5 concurrent connections each for 10 seconds, the value would be 50 s. connectionTransactionId unsigned64 identifier 280 current This information element identifies a transaction within a connection. A transaction is a meaningful exchange of application data between two network devices or a client and server. A transactionId is assigned the first time a flow is reported, so that later reports for the same flow will have the same transactionId. A different transactionId is used for each transaction within a TCP or UDP connection. The identifiers need not be sequential. postNATSourceIPv6Address ipv6Address 281 current The definition of this Information Element is identical to the definition of Information Element 'sourceIPv6Address', except that it reports a modified value caused by a NAT64 middlebox function after the packet passed the Observation Point. See [RFC2460] for the definition of the Source Address field in the IPv6 header. See [RFC3234] for the definition of middleboxes. See http://tools.ietf.org/html/draft-ietf-behave-v6v4-xlate-stateful-12 for nat64 specification. postNATDestinationIPv6Address ipv6Address 282 current The definition of this Information Element is identical to the definition of Information Element 'destinationIPv6Address', except that it reports a modified value caused by a NAT64 middlebox function after the packet passed the Observation Point. See [RFC2460] for the definition of the Destination Address field in the IPv6 header. See [RFC3234] for the definition of middleboxes. See http://tools.ietf.org/html/draft-ietf-behave-v6v4-xlate-stateful-12 for nat64 specification. natPoolId unsigned32 identifier 283 current Locally unique identifier of a NAT pool. natPoolName string 284 current The name of a NAT pool identified by a natPoolID. anonymizationFlags unsigned16 flags 285 current A flag word describing specialized modifications to the anonymization policy in effect for the anonymization technique applied to a referenced Information Element within a referenced Template. When flags are clear (0), the normal policy (as described by anonymizationTechnique) applies without modification. MSB 14 13 12 11 10 9 8 7 6 5 4 3 2 1 LSB +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ | Reserved |LOR|PmA| SC | +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ anonymizationFlags IE +--------+----------+-----------------------------------------------+ | bit(s) | name | description | | (LSB = | | | | 0) | | | +--------+----------+-----------------------------------------------+ | 0-1 | SC | Stability Class: see the Stability Class | | | | table below, and section Section 5.1. | | 2 | PmA | Perimeter Anonymization: when set (1), | | | | source- Information Elements as described in | | | | [RFC5103] are interpreted as external | | | | addresses, and destination- Information | | | | Elements as described in [RFC5103] are | | | | interpreted as internal addresses, for the | | | | purposes of associating | | | | anonymizationTechnique to Information | | | | Elements only; see Section 7.2.2 for details. | | | | This bit MUST NOT be set when associated with | | | | a non-endpoint (i.e., source- or | | | | destination-) Information Element. SHOULD be | | | | consistent within a record (i.e., if a | | | | source- Information Element has this flag | | | | set, the corresponding destination- element | | | | SHOULD have this flag set, and vice-versa.) | | 3 | LOR | Low-Order Unchanged: when set (1), the | | | | low-order bits of the anonymized Information | | | | Element contain real data. This modification | | | | is intended for the anonymization of | | | | network-level addresses while leaving | | | | host-level addresses intact in order to | | | | preserve host level-structure, which could | | | | otherwise be used to reverse anonymization. | | | | MUST NOT be set when associated with a | | | | truncation-based anonymizationTechnique. | | 4-15 | Reserved | Reserved for future use: SHOULD be cleared | | | | (0) by the Exporting Process and MUST be | | | | ignored by the Collecting Process. | +--------+----------+-----------------------------------------------+ The Stability Class portion of this flags word describes the stability class of the anonymization technique applied to a referenced Information Element within a referenced Template. Stability classes refer to the stability of the parameters of the anonymization technique, and therefore the comparability of the mapping between the real and anonymized values over time. This determines which anonymized datasets may be compared with each other. Values are as follows: +-----+-----+-------------------------------------------------------+ | Bit | Bit | Description | | 1 | 0 | | +-----+-----+-------------------------------------------------------+ | 0 | 0 | Undefined: the Exporting Process makes no | | | | representation as to how stable the mapping is, or | | | | over what time period values of this field will | | | | remain comparable; while the Collecting Process MAY | | | | assume Session level stability, Session level | | | | stability is not guaranteed. Processes SHOULD assume | | | | this is the case in the absence of stability class | | | | information; this is the default stability class. | | 0 | 1 | Session: the Exporting Process will ensure that the | | | | parameters of the anonymization technique are stable | | | | during the Transport Session. All the values of the | | | | described Information Element for each Record | | | | described by the referenced Template within the | | | | Transport Session are comparable. The Exporting | | | | Process SHOULD endeavour to ensure at least this | | | | stability class. | | 1 | 0 | Exporter-Collector Pair: the Exporting Process will | | | | ensure that the parameters of the anonymization | | | | technique are stable across Transport Sessions over | | | | time with the given Collecting Process, but may use | | | | different parameters for different Collecting | | | | Processes. Data exported to different Collecting | | | | Processes are not comparable. | | 1 | 1 | Stable: the Exporting Process will ensure that the | | | | parameters of the anonymization technique are stable | | | | across Transport Sessions over time, regardless of | | | | the Collecting Process to which it is sent. | +-----+-----+-------------------------------------------------------+ anonymizationTechnique unsigned16 identifier 286 current A description of the anonymization technique applied to a referenced Information Element within a referenced Template. Each technique may be applicable only to certain Information Elements and recommended only for certain Infomation Elements; these restrictions are noted in the table below. +-------+---------------------------+-----------------+-------------+ | Value | Description | Applicable to | Recommended | | | | | for | +-------+---------------------------+-----------------+-------------+ | 0 | Undefined: the Exporting | all | all | | | Process makes no | | | | | representation as to | | | | | whether the defined field | | | | | is anonymized or not. | | | | | While the Collecting | | | | | Process MAY assume that | | | | | the field is not | | | | | anonymized, it is not | | | | | guaranteed not to be. | | | | | This is the default | | | | | anonymization technique. | | | | 1 | None: the values exported | all | all | | | are real. | | | | 2 | Precision | all | all | | | Degradation/Truncation: | | | | | the values exported are | | | | | anonymized using simple | | | | | precision degradation or | | | | | truncation. The new | | | | | precision or number of | | | | | truncated bits is | | | | | implicit in the exported | | | | | data, and can be deduced | | | | | by the Collecting | | | | | Process. | | | | 3 | Binning: the values | all | all | | | exported are anonymized | | | | | into bins. | | | | 4 | Enumeration: the values | all | timestamps | | | exported are anonymized | | | | | by enumeration. | | | | 5 | Permutation: the values | all | identifiers | | | exported are anonymized | | | | | by permutation. | | | | 6 | Structured Permutation: | addresses | | | | the values exported are | | | | | anonymized by | | | | | permutation, preserving | | | | | bit-level structure as | | | | | appropriate; this | | | | | represents | | | | | prefix-preserving IP | | | | | address anonymization or | | | | | structured MAC address | | | | | anonymization. | | | | 7 | Reverse Truncation: the | addresses | | | | values exported are | | | | | anonymized using reverse | | | | | truncation. The number | | | | | of truncated bits is | | | | | implicit in the exported | | | | | data, and can be deduced | | | | | by the Collecting | | | | | Process. | | | | 8 | Noise: the values | non-identifiers | counters | | | exported are anonymized | | | | | by adding random noise to | | | | | each value. | | | | 9 | Offset: the values | all | timestamps | | | exported are anonymized | | | | | by adding a single offset | | | | | to all values. | | | +-------+---------------------------+-----------------+-------------+ informationElementIndex unsigned16 identifier 287 current A zero-based index of an Information Element referenced by informationElementId within a Template referenced by templateId; used to disambiguate scope for templates containing multiple identical Information Elements. p2pTechnology string 288 current Specifies if the Application ID is based on peer-to-peer technology. Possible values are: { "yes", "y", 1 }, { "no", "n", 2 } and { "unassigned", "u", 0 }. tunnelTechnology string 289 current Specifies if the Application ID is used as a tunnel technology. Possible values are: { "yes", "y", 1 }, { "no", "n", 2 } and { "unassigned", "u", 0 }. encryptedTechnology string 290 current Specifies if the Application ID is an encrypted networking protocol. Possible values are: { "yes", "y", 1 }, { "no", "n", 2 } and { "unassigned", "u", 0 }. basicList basicList list 291 current Specifies a generic Information Element with a basicList abstract data type. For example, a list of port numbers, a list of interface indexes, etc. subTemplateList subTemplateList list 292 current Specifies a generic Information Element with a subTemplateList abstract data type. subTemplateMultiList subTemplateMultiList list 293 current Specifies a generic Information Element with a subTemplateMultiList abstract data type. bgpValidityState unsigned8 identifier 294 current This element describes the "validity state" of the BGP route correspondent source or destination IP address. If the "validity state" for this Flow is only available, then the value of this Information Element is 255. See for a description of BGP-4, for the definition of "validity states" and for the encoding of those "validity states". IPSecSPI unsigned32 identifier 295 current IPSec Security Parameters Index (SPI). 0x0-0xFFFFFFFF See for the definition of SPI. greKey unsigned32 identifier 296 current GRE key, which is used for identifying an individual traffic flow within a tunnel. 0x0-0xFFFFFFFF See for the definition of GRE and the GRE Key. natType unsigned8 identifier 297 current The type of NAT treatment: 0 unknown 1 NAT44 translated 2 NAT64 translated 3 NAT46 translated 4 IPv4-->IPv4 (no NAT) 5 NAT66 translated 6 IPv6-->IPv6 (no NAT) See for the definition of NAT. See for the definition of NAT44. See for the definition of NAT64. See for the definition of NAT46. See for the definition of NAT66. See for the definition of IPv4. See for the definition of IPv6. initiatorPackets unsigned64 identifier 298 current The total number of layer 4 packets in a flow from the initiator. The initiator is the device which triggered the session creation, and remains the same for the life of the session. packets See #231, initiatorOctets. responderPackets unsigned64 identifier 299 current The total number of layer 4 packets in a flow from the responder. The responder is the device which replies to the initiator, and remains the same for the life of the session. packets See #232, responderOctets. observationDomainName string 300 current The name of an observation domain identified by an observationDomainId. See #149, observationDomainId. selectionSequenceId unsigned64 identifier 301 current From all the packets observed at an Observation Point, a subset of the packets is selected by a sequence of one or more Selectors. The selectionSequenceId is a unique value per Observation Domain, specifying the Observation Point and the sequence of Selectors through which the packets are selected. selectorId unsigned64 identifier 302 current The Selector ID is the unique ID identifying a Primitive Selector. Each Primitive Selector must have a unique ID in the Observation Domain. informationElementId unsigned16 identifier 303 current This Information Element contains the ID of another Information Element. selectorAlgorithm unsigned16 identifier 304 current This Information Element identifies the packet selection methods (e.g., Filtering, Sampling) that are applied by the Selection Process. Most of these methods have parameters. Further Information Elements are needed to fully specify packet selection with these methods and all their parameters. The methods listed below are defined in [RFC5475]. For their parameters, Information Elements are defined in the information model document. The names of these Information Elements are listed for each method identifier. Further method identifiers may be added to the list below. It might be necessary to define new Information Elements to specify their parameters. The selectorAlgorithm registry is maintained by IANA. New assignments for the registry will be administered by IANA, and are subject to Expert Review [RFC5226]. The registry can be updated when specifications of the new method(s) and any new Information Elements are provided. The group of experts must double check the selectorAlgorithm definitions and Information Elements with already defined selectorAlgorithms and Information Elements for completeness, accuracy, and redundancy. Those experts will initially be drawn from the Working Group Chairs and document editors of the IPFIX and PSAMP Working Groups. The following packet selection methods identifiers are defined here: http://www.iana.org/assignments/psamp-parameters/psamp-parameters.xhtml There is a broad variety of possible parameters that could be used for Property match Filtering (5) but currently there are no agreed parameters specified. samplingPacketInterval unsigned32 quantity 305 current This Information Element specifies the number of packets that are consecutively sampled. A value of 100 means that 100 consecutive packets are sampled. For example, this Information Element may be used to describe the configuration of a systematic count-based Sampling Selector. packets samplingPacketSpace unsigned32 quantity 306 current This Information Element specifies the number of packets between two "samplingPacketInterval"s. A value of 100 means that the next interval starts 100 packets (which are not sampled) after the current "samplingPacketInterval" is over. For example, this Information Element may be used to describe the configuration of a systematic count-based Sampling Selector. packets samplingTimeInterval unsigned32 quantity 307 current This Information Element specifies the time interval in microseconds during which all arriving packets are sampled. For example, this Information Element may be used to describe the configuration of a systematic time-based Sampling Selector. microseconds samplingTimeSpace unsigned32 quantity 308 current This Information Element specifies the time interval in microseconds between two "samplingTimeInterval"s. A value of 100 means that the next interval starts 100 microseconds (during which no packets are sampled) after the current "samplingTimeInterval" is over. For example, this Information Element may used to describe the configuration of a systematic time-based Sampling Selector. microseconds samplingSize unsigned32 quantity 309 current This Information Element specifies the number of elements taken from the parent Population for random Sampling methods. For example, this Information Element may be used to describe the configuration of a random n-out-of-N Sampling Selector. packets samplingPopulation unsigned32 quantity 310 current This Information Element specifies the number of elements in the parent Population for random Sampling methods. For example, this Information Element may be used to describe the configuration of a random n-out-of-N Sampling Selector. packets samplingProbability float64 quantity 311 current This Information Element specifies the probability that a packet is sampled, expressed as a value between 0 and 1. The probability is equal for every packet. A value of 0 means no packet was sampled since the probability is 0. For example, this Information Element may be used to describe the configuration of a uniform probabilistic Sampling Selector. dataLinkFrameSize unsigned16 312 current This Information Element specifies the length of the selected data link frame. The data link layer is defined in [ISO/IEC 7498-1:1994]. ipHeaderPacketSection octetArray 313 current This Information Element, which may have a variable length, carries a series of octets from the start of the IP header of a sampled packet. With sufficient length, this element also reports octets from the IP payload, subject to [RFC2804]. See the Security Considerations section. The size of the exported section may be constrained due to limitations in the IPFIX protocol. The data for this field MUST NOT be padded. ipPayloadPacketSection octetArray 314 current This Information Element, which may have a variable length, carries a series of octets from the start of the IP payload of a sampled packet. The IPv4 payload is that part of the packet that follows the IPv4 header and any options, which [RFC0791] refers to as "data" or "data octets". For example, see the examples in [RFC0791], APPENDIX A. The IPv6 payload is the rest of the packet following the 40 octet IPv6 header. Note that any extension headers present are considered part of the payload. See [RFC2460] for the IPv6 specification. The size of the exported section may be constrained due to limitations in the IPFIX protocol. The data for this field MUST NOT be padded. dataLinkFrameSection octetArray 315 current This Information Element carries n octets from the data link frame of a selected frame, starting sectionOffset octets into the frame. The sectionObservedOctets expresses how much data was observed, while the remainder is padding. When the sectionObservedOctets field corresponding to this Information Element exists, this Information Element MAY have a fixed length and MAY be padded, or MAY have a variable length. When the sectionObservedOctets field corresponding to this Information Element does not exist, this Information Element SHOULD have a variable length and MUST NOT be padded. In this case, the size of the exported section may be constrained due to limitations in the IPFIX protocol. Further Information Elements, i.e., dataLinkFrameType and dataLinkFrameSize are needed to specify the data link type and the size of the data link frame of this Information Element. A set of these Information Elements MAY be contained in a structured data type, as expressed in . Or a set of these Information Elements MAY be contained in one Flow Record as shown in Appendix C of . The data link layer is defined in [ISO/IEC 7498-1:1994]. mplsLabelStackSection octetArray 316 current This Information Element, which may have a variable length, carries the first n octets from the MPLS label stack of a sampled packet. With sufficient length, this element also reports octets from the MPLS payload, subject to [RFC2804]. See the Security Considerations section. See [RFC3031] for the specification of MPLS packets. See [RFC3032] for the specification of the MPLS label stack. The size of the exported section may be constrained due to limitations in the IPFIX protocol. The data for this field MUST NOT be padded. mplsPayloadPacketSection octetArray 317 current This Information Element, which may have a variable length, carries the first n octets from the MPLS payload of a sampled packet, being data that follows immediately after the MPLS label stack. See [RFC3031] for the specification of MPLS packets. See [RFC3032] for the specification of the MPLS label stack. The size of the exported section may be constrained due to limitations in the IPFIX protocol. The data for this field MUST NOT be padded. selectorIdTotalPktsObserved unsigned64 totalCounter 318 current This Information Element specifies the total number of packets observed by a Selector, for a specific value of SelectorId. This Information Element should be used in an Options Template scoped to the observation to which it refers. See Section 3.4.2.1 of the IPFIX protocol document [RFC5101]. packets selectorIdTotalPktsSelected unsigned64 totalCounter 319 current This Information Element specifies the total number of packets selected by a Selector, for a specific value of SelectorId. This Information Element should be used in an Options Template scoped to the observation to which it refers. See Section 3.4.2.1 of the IPFIX protocol document [RFC5101]. packets absoluteError float64 quantity 320 current This Information Element specifies the maximum possible measurement error of the reported value for a given Information Element. The absoluteError has the same unit as the Information Element with which it is associated. The real value of the metric can differ by absoluteError (positive or negative) from the measured value. This Information Element provides only the error for measured values. If an Information Element contains an estimated value (from Sampling), the confidence boundaries and confidence level have to be provided instead, using the upperCILimit, lowerCILimit, and confidenceLevel Information Elements. This Information Element should be used in an Options Template scoped to the observation to which it refers. See Section 3.4.2.1 of the IPFIX protocol document [RFC5101]. The units of the Information Element for which the error is specified. relativeError float64 quantity 321 current This Information Element specifies the maximum possible positive or negative error ratio for the reported value for a given Information Element as percentage of the measured value. The real value of the metric can differ by relativeError percent (positive or negative) from the measured value. This Information Element provides only the error for measured values. If an Information Element contains an estimated value (from Sampling), the confidence boundaries and confidence level have to be provided instead, using the upperCILimit, lowerCILimit, and confidenceLevel Information Elements. This Information Element should be used in an Options Template scoped to the observation to which it refers. See Section 3.4.2.1 of the IPFIX protocol document [RFC5101]. observationTimeSeconds dateTimeSeconds quantity 322 current This Information Element specifies the absolute time in seconds of an observation. seconds observationTimeMilliseconds dateTimeMilliseconds quantity 323 current This Information Element specifies the absolute time in milliseconds of an observation. milliseconds observationTimeMicroseconds dateTimeMicroseconds quantity 324 current This Information Element specifies the absolute time in microseconds of an observation. microseconds observationTimeNanoseconds dateTimeNanoseconds quantity 325 current This Information Element specifies the absolute time in nanoseconds of an observation. nanoseconds digestHashValue unsigned64 quantity 326 current This Information Element specifies the value from the digest hash function. See also Sections 6.2, 3.8 and 7.1 of [RFC5475]. hashIPPayloadOffset unsigned64 quantity 327 current This Information Element specifies the IP payload offset used by a Hash-based Selection Selector. See also Sections 6.2, 3.8 and 7.1 of [RFC5475]. hashIPPayloadSize unsigned64 quantity 328 current This Information Element specifies the IP payload size used by a Hash-based Selection Selector. See also Sections 6.2, 3.8 and 7.1 of [RFC5475]. hashOutputRangeMin unsigned64 quantity 329 current This Information Element specifies the value for the beginning of a hash function's potential output range. See also Sections 6.2, 3.8 and 7.1 of [RFC5475]. hashOutputRangeMax unsigned64 quantity 330 current This Information Element specifies the value for the end of a hash function's potential output range. See also Sections 6.2, 3.8 and 7.1 of [RFC5475]. hashSelectedRangeMin unsigned64 quantity 331 current This Information Element specifies the value for the beginning of a hash function's selected range. See also Sections 6.2, 3.8 and 7.1 of [RFC5475]. hashSelectedRangeMax unsigned64 quantity 332 current This Information Element specifies the value for the end of a hash function's selected range. See also Sections 6.2, 3.8 and 7.1 of [RFC5475]. hashDigestOutput boolean quantity 333 current This Information Element contains a boolean value that is TRUE if the output from this hash Selector has been configured to be included in the packet report as a packet digest, else FALSE. See also Sections 6.2, 3.8 and 7.1 of [RFC5475]. hashInitialiserValue unsigned64 quantity 334 current This Information Element specifies the initialiser value to the hash function. See also Sections 6.2, 3.8 and 7.1 of [RFC5475]. selectorName string 335 current The name of a selector identified by a selectorID. Globally unique per Metering Process. upperCILimit float64 quantity 336 current This Information Element specifies the upper limit of a confidence interval. It is used to provide an accuracy statement for an estimated value. The confidence limits define the range in which the real value is assumed to be with a certain probability p. Confidence limits always need to be associated with a confidence level that defines this probability p. Please note that a confidence interval only provides a probability that the real value lies within the limits. That means the real value can lie outside the confidence limits. The upperCILimit, lowerCILimit, and confidenceLevel Information Elements should all be used in an Options Template scoped to the observation to which they refer. See Section 3.4.2.1 of the IPFIX protocol document [RFC5101]. Note that the upperCILimit, lowerCILimit, and confidenceLevel are all required to specify confidence, and should be disregarded unless all three are specified together. lowerCILimit float64 quantity 337 current This Information Element specifies the lower limit of a confidence interval. For further information, see the description of upperCILimit. The upperCILimit, lowerCILimit, and confidenceLevel Information Elements should all be used in an Options Template scoped to the observation to which they refer. See Section 3.4.2.1 of the IPFIX protocol document [RFC5101]. Note that the upperCILimit, lowerCILimit, and confidenceLevel are all required to specify confidence, and should be disregarded unless all three are specified together. confidenceLevel float64 quantity 338 current This Information Element specifies the confidence level. It is used to provide an accuracy statement for estimated values. The confidence level provides the probability p with which the real value lies within a given range. A confidence level always needs to be associated with confidence limits that define the range in which the real value is assumed to be. The upperCILimit, lowerCILimit, and confidenceLevel Information Elements should all be used in an Options Template scoped to the observation to which they refer. See Section 3.4.2.1 of the IPFIX protocol document [RFC5101]. Note that the upperCILimit, lowerCILimit, and confidenceLevel are all required to specify confidence, and should be disregarded unless all three are specified together. informationElementDataType unsigned8 339 current A description of the abstract data type of an IPFIX information element.These are taken from the abstract data types defined in section 3.1 of the IPFIX Information Model [RFC5102]; see that section for more information on the types described in the informationElementDataType sub-registry. These types are registered in the IANA IPFIX Information Element Data Type subregistry. This subregistry is intended to assign numbers for type names, not to provide a mechanism for adding data types to the IPFIX Protocol, and as such requires a Standards Action [RFC5226] to modify. informationElementDescription string 340 current A UTF-8 [RFC3629] encoded Unicode string containing a human-readable description of an Information Element. The content of the informationElementDescription MAY be annotated with one or more language tags [RFC4646], encoded in-line [RFC2482] within the UTF-8 string, in order to specify the language in which the description is written. Description text in multiple languages MAY tag each section with its own language tag; in this case, the description information in each language SHOULD have equivalent meaning. In the absence of any language tag, the "i-default" [RFC2277] language SHOULD be assumed. See the Security Considerations section for notes on string handling for Information Element type records. informationElementName string 341 current A UTF-8 [RFC3629] encoded Unicode string containing the name of an Information Element, intended as a simple identifier. See the Security Considerations section for notes on string handling for Information Element type records informationElementRangeBegin unsigned64 quantity 342 current Contains the inclusive low end of the range of acceptable values for an Information Element. informationElementRangeEnd unsigned64 quantity 343 current Contains the inclusive high end of the range of acceptable values for an Information Element. informationElementSemantics unsigned8 344 current A description of the semantics of an IPFIX Information Element. These are taken from the data type semantics defined in section 3.2 of the IPFIX Information Model [RFC5102]; see that section for more information on the types defined in the informationElementSemantics sub-registry. This field may take the values in Table ; the special value 0x00 (default) is used to note that no semantics apply to the field; it cannot be manipulated by a Collecting Process or File Reader that does not understand it a priori. These semantics are registered in the IANA IPFIX Information Element Semantics subregistry. This subregistry is intended to assign numbers for semantics names, not to provide a mechanism for adding semantics to the IPFIX Protocol, and as such requires a Standards Action [RFC5226] to modify. informationElementUnits unsigned16 345 current A description of the units of an IPFIX Information Element. These correspond to the units implicitly defined in the Information Element definitions in section 5 of the IPFIX Information Model [RFC5102]; see that section for more information on the types described in the informationElementsUnits sub-registry. This field may take the values in Table 3 below; the special value 0x00 (none) is used to note that the field is unitless. These types are registered in the IANA IPFIX Information Element Units subregistry; new types may be added on a First Come First Served [RFC5226] basis. privateEnterpriseNumber unsigned32 identifier 346 current A private enterprise number, as assigned by IANA. Within the context of an Information Element Type record, this element can be used along with the informationElementId element to scope properties to a specific Information Element. To export type information about an IANA-assigned Information Element, set the privateEnterpriseNumber to 0, or do not export the privateEnterpriseNumber in the type record. To export type information about an enterprise-specific Information Element, export the enterprise number in privateEnterpriseNumber, and export the Information Element number with the Enterprise bit cleared in informationElementId. The Enterprise bit in the associated informationElementId Information Element MUST be ignored by the Collecting Process. virtualStationInterfaceId octetArray identifier 347 current Instance Identifier of the interface to a Virtual Station. A Virtual Station is an end station instance: it can be a virtual machine or a physical host. See IEEE 802.1Qbg for the definition of Virtual Station Interface ID. virtualStationInterfaceName string 348 current Name of the interface to a Virtual Station. A Virtual Station is an end station instance: it can be a virtual machine or a physical host. See IEEE 802.1Qbg for the definition of Virtual Station Interface. virtualStationUUID octetArray identifier 349 current Unique Identifier of a Virtual Station. A Virtual Station is an end station instance: it can be a virtual machine or a physical host. See IEEE 802.1Qbg for the definition of Virtual Station. virtualStationName string 350 current Name of a Virtual Station. A Virtual Station is an end station instance: it can be a virtual machine or a physical host. See IEEE 802.1Qbg for the definition of Virtual Station. layer2SegmentId unsigned64 identifier 351 current Identifier of a layer 2 network segment in an overlay network. The most significant byte identifies the layer 2 network overlay network encapsulation type: 0x00 reserved 0x01 VxLAN 0x02 NVGRE The three lowest significant bytes hold the value of the layer 2 overlay network segment identifier. For example: - a 24 bit segment ID VXLAN Network Identifier (VNI) - a 24 bit Tenant Network Identifier (TNI) for NVGRE See VxLAN RFC at http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-00 See NVGRE RFC at http://tools.ietf.org/html/draft-sridharan-virtualization-nvgre-00 layer2OctetDeltaCount unsigned64 deltaCounter 352 current The number of layer 2 octets since the previous report (if any) in incoming packets for this Flow at the Observation Point. The number of octets includes layer 2 header(s) and layer 2 payload. # memo: layer 2 version of octetDeltaCount (field #1) octets layer2OctetTotalCount unsigned64 totalCounter 353 current The total number of layer 2 octets in incoming packets for this Flow at the Observation Point since the Metering Process (re-)initialization for this Observation Point. The number of octets includes layer 2 header(s) and layer 2 payload. # memo: layer 2 version of octetTotalCount (field #85) octets ingressUnicastPacketTotalCount unsigned64 totalCounter 354 current The total number of incoming unicast packets metered at the Observation Point since the Metering Process (re-)initialization for this Observation Point. packets ingressMulticastPacketTotalCount unsigned64 totalCounter 355 current The total number of incoming multicast packets metered at the Observation Point since the Metering Process (re-)initialization for this Observation Point. packets ingressBroadcastPacketTotalCount unsigned64 totalCounter 356 current The total number of incoming broadcast packets metered at the Observation Point since the Metering Process (re-)initialization for this Observation Point. packets egressUnicastPacketTotalCount unsigned64 totalCounter 357 current The total number of incoming unicast packets metered at the Observation Point since the Metering Process (re-)initialization for this Observation Point. packets egressBroadcastPacketTotalCount unsigned64 totalCounter 358 current The total number of incoming broadcast packets metered at the Observation Point since the Metering Process (re-)initialization for this Observation Point. packets monitoringIntervalStartMilliSeconds dateTimeMilliseconds 359 current The absolute timestamp at which the monitoring interval started. A Monitoring interval is the period of time during which the Metering Process is running. milliseconds monitoringIntervalEndMilliSeconds dateTimeMilliseconds 360 current The absolute timestamp at which the monitoring interval ended. A Monitoring interval is the period of time during which the Metering Process is running. milliseconds portRangeStart unsigned16 identifier 361 current The port number identifying the start of a range of ports. A value of zero indicates that the range start is not specified, ie the range is defined in some other way. Additional information on defined TCP port numbers can be found at http://www.iana.org/assignments/service-names-port-numbers. portRangeEnd unsigned16 identifier 362 current The port number identifying the end of a range of ports. A value of zero indicates that the range end is not specified, ie the range is defined in some other way. Additional information on defined TCP port numbers can be found at http://www.iana.org/assignments/service-names-port-numbers. portRangeStepSize unsigned16 identifier 363 current The step size in a port range. The default step size is 1, which indicates contiguous ports. A value of zero indicates that the step size is not specified, ie the range is defined in some other way. portRangeNumPorts unsigned16 identifier 364 current The number of ports in a port range. A value of zero indicates that the number of ports is not specified, ie the range is defined in some other way. staMacAddress macAddress identifier 365 current The IEEE 802 MAC address of a wireless station (STA). See section 1.4 of RFC5415 for the definition of STA. staIPv4Address ipv4Address identifier 366 current The IPv4 address of a wireless station (STA). See section 1.4 of RFC5415 for the definition of STA. wtpMacAddress macAddress identifier 367 current The IEEE 802 MAC address of a wireless access point (WTP). See section 1.4 of RFC5415 for the definition of WTP. ingressInterfaceType unsigned32 identifier 368 current The type of interface where packets of this Flow are being received. The value matches the value of managed object 'ifType' as defined in http://www.iana.org/assignments/ianaiftype-mib/ianaiftype-mib http://www.iana.org/assignments/ianaiftype-mib/ianaiftype-mib egressInterfaceType unsigned32 identifier 369 current The type of interface where packets of this Flow are being sent. The value matches the value of managed object 'ifType' as defined in http://www.iana.org/assignments/ianaiftype-mib/ianaiftype-mib http://www.iana.org/assignments/ianaiftype-mib/ianaiftype-mib rtpSequenceNumber unsigned16 370 current The RTP sequence number per RFC3550. userName string 371 current User name associated with the flow. applicationCategoryName string 372 current An attribute that provides a first level categorization for each Application ID. applicationSubCategoryName string 373 current An attribute that provides a second level categorization for each Application ID. applicationGroupName string 374 current An attribute that groups multiple Application IDs that belong to the same networking application. originalFlowsPresent unsigned64 deltaCounter 375 current The non-conservative count of Original Flows contributing to this Aggregated Flow. Non-conservative counts need not sum to the original count on re-aggregation. originalFlowsInitiated unsigned64 deltaCounter 376 current The conservative count of Original Flows whose first packet is represented within this Aggregated Flow. Conservative counts must sum to the original count on re-aggregation. originalFlowsCompleted unsigned64 deltaCounter 377 current The conservative count of Original Flows whose last packet is represented within this Aggregated Flow. Conservative counts must sum to the original count on re-aggregation. distinctCountOfSourceIPAddress unsigned64 totalCounter 378 current The count of distinct source IP address values for Original Flows contributing to this Aggregated Flow, without regard to IP version. This Information Element is preferred to the IP-version-specific counters, unless it is important to separate the counts by version. distinctCountOfDestinationIPAddress unsigned64 totalCounter 379 current The count of distinct destination IP address values for Original Flows contributing to this Aggregated Flow, without regard to IP version. This Information Element is preferred to the version-specific counters below, unless it is important to separate the counts by version. distinctCountOfSourceIPv4Address unsigned32 totalCounter 380 current The count of distinct source IPv4 address values for Original Flows contributing to this Aggregated Flow. distinctCountOfDestinationIPv4Address unsigned32 totalCounter 381 current The count of distinct destination IPv4 address values for Original Flows contributing to this Aggregated Flow. distinctCountOfSourceIPv6Address unsigned64 totalCounter 382 current The count of distinct source IPv6 address values for Original Flows contributing to this Aggregated Flow. distinctCountOfDestinationIPv6Address unsigned64 totalCounter 383 current The count of distinct destination IPv6 address values for Original Flows contributing to this Aggregated Flow. valueDistributionMethod unsigned8 384 current A description of the method used to distribute the counters from Contributing Flows into the Aggregated Flow records described by an associated scope, generally a Template. The method is deemed to apply to all the non-key Information Elements in the referenced scope for which value distribution is a valid operation; if the originalFlowsInitiated and/or originalFlowsCompleted Information Elements appear in the Template, they are not subject to this distribution method, as they each infer their own distribution method. This is intended to be a complete set of possible value distribution methods; it is encoded as follows: +-------+-----------------------------------------------------------+ | Value | Description | +-------+-----------------------------------------------------------+ | 0 | Unspecified: The counters for an Original Flow are | | | explicitly not distributed according to any other method | | | defined for this Information Element; use for arbitrary | | | distribution, or distribution algorithms not described by | | | any other codepoint. | | | --------------------------------------------------------- | | | | | 1 | Start Interval: The counters for an Original Flow are | | | added to the counters of the appropriate Aggregated Flow | | | containing the start time of the Original Flow. This | | | should be assumed the default if value distribution | | | information is not available at a Collecting Process for | | | an Aggregated Flow. | | | --------------------------------------------------------- | | | | | 2 | End Interval: The counters for an Original Flow are added | | | to the counters of the appropriate Aggregated Flow | | | containing the end time of the Original Flow. | | | --------------------------------------------------------- | | | | | 3 | Mid Interval: The counters for an Original Flow are added | | | to the counters of a single appropriate Aggregated Flow | | | containing some timestamp between start and end time of | | | the Original Flow. | | | --------------------------------------------------------- | | | | | 4 | Simple Uniform Distribution: Each counter for an Original | | | Flow is divided by the number of time intervals the | | | Original Flow covers (i.e., of appropriate Aggregated | | | Flows sharing the same Flow Key), and this number is | | | added to each corresponding counter in each Aggregated | | | Flow. | | | --------------------------------------------------------- | | | | | 5 | Proportional Uniform Distribution: Each counter for an | | | Original Flow is divided by the number of time units the | | | Original Flow covers, to derive a mean count rate. This | | | mean count rate is then multiplied by the number of time | | | units in the intersection of the duration of the Original | | | Flow and the time interval of each Aggregated Flow. This | | | is like simple uniform distribution, but accounts for the | | | fractional portions of a time interval covered by an | | | Original Flow in the first and last time interval. | | | --------------------------------------------------------- | | | | | 6 | Simulated Process: Each counter of the Original Flow is | | | distributed among the intervals of the Aggregated Flows | | | according to some function the Intermediate Aggregation | | | Process uses based upon properties of Flows presumed to | | | be like the Original Flow. This is essentially an | | | assertion that the Intermediate Aggregation Process has | | | no direct packet timing information but is nevertheless | | | not using one of the other simpler distribution methods. | | | The Intermediate Aggregation Process specifically makes | | | no assertion as to the correctness of the simulation. | | | --------------------------------------------------------- | | | | | 7 | Direct: The Intermediate Aggregation Process has access | | | to the original packet timings from the packets making up | | | the Original Flow, and uses these to distribute or | | | recalculate the counters. | +-------+-----------------------------------------------------------+ rfc3550JitterMeanMilliseconds unsigned32 quantity 385 current Interarrival jitter as defined in section 6.4.1 of , measured in milliseconds. milliseconds rfc3550JitterMeanMicroseconds unsigned32 quantity 386 current Interarrival jitter as defined in section 6.4.1 of , measured in microseconds. microseconds rfc3550JitterMeanNanoseconds unsigned32 quantity 387 current Interarrival jitter as defined in section 6.4.1 of , measured in nanoseconds. nanoseconds 388-32767 IPFIX MPLS label type (Value 46) Expert Review Primary expert - Nevil Brownlee and Secondary expert - Juergen Quittek 1 TE-MIDPT: Any TE tunnel mid-point or tail label 2 Pseudowire: Any PWE3 or Cisco AToM based label 3 VPN: Any label associated with VPN 4 BGP: Any label associated with BGP or BGP routing 5 LDP: Any label associated with dynamically assigned labels using LDP 6-255 Unassigned Classification Engine IDs (Value 101) Expert Review Primary expert - Nevil Brownlee and Secondary expert - Juergen Quittek 0 Invalid. 1 IANA-L3: The Assigned Internet Protocol Number (layer 3 (L3)) is exported in the Selector ID. See http://www.iana.org/assignments/protocol-numbers. 1 2 PANA-L3: Proprietary layer 3 definition. An enterprise can export its own layer 3 protocol numbers. The Selector ID has a global significance for all devices from the same enterprise. 1 3 IANA-L4: The IANA layer 4 (L4) well-known port number is exported in the Selector ID. See [http://www.iana.org/assignments/service-names-port-numbers]. Note: as an IPFIX flow is unidirectional, it contains the destination port in a flow from the client to the server. 2 4 PANA-L4: Proprietary layer 4 definition. An enterprise can export its own layer 4 port numbers. The Selector ID has global significance for devices from the same enterprise. Example: IPFIX had the port 4739 pre-assigned in the IETF draft for years. While waiting for the RFC and its associated IANA registration, the Selector ID 4739 was used with this PANA-L4. 2 5 Reserved 6 USER-Defined: The Selector ID represents applications defined by the user (using CLI, GUI, etc.) based on the methods described in section 2. The Selector ID has a local significance per device. 3 7 Reserved 8 Reserved 9 Reserved 10 Reserved 11 Reserved 12 PANA-L2: Proprietary layer 2 (L2) definition. An enterprise can export its own layer 2 identifiers. The Selector ID represents the enterprise's unique global layer 2 applications. The Selector ID has a global significance for all devices from the same enterprise. Examples include Cisco Subnetwork Access Protocol (SNAP). 5 13 PANA-L7: Proprietary layer 7 definition. The Selector ID represents the enterprise's unique global ID for the layer 7 applications. The Selector ID has a global significance for all devices from the same enterprise. This Classification Engine Id is used when the application registry is owned by the Exporter manufacturer (referred to as the "enterprise" in this document). 3 14 Reserved 15 Reserved 16 Reserved 17 Reserved 18 ETHERTYPE: The Selector ID represents the well- known Ethertype. See http://standards.ieee.org/develop/regauth/ethertype/eth.txt. Note that the Ethertype is usually expressed in hexadecimal. However, the corresponding decimal value is used in this Selector ID. 2 19 LLC: The Selector ID represents the well-known IEEE 802.2 Link Layer Control (LLC) Destination Service Access Point (DSAP). See http://standards.ieee.org/develop/regauth/llc/public.html. Note that LLC DSAP is usually expressed in hexadecimal. However, the corresponding decimal value is used in this Selector ID. 1 20 PANA-L7-PEN: Proprietary layer 7 definition, including a Private Enterprise Number (PEN) [http://www.iana.org/assignments/enterprise-numbers] to identify that the application registry being used is not owned by the Exporter manufacturer or to identify the original enterprise in the case of a mediator or 3rd party device. The Selector ID represents the enterprise unique global ID for the layer 7 applications. The Selector ID has a global significance for all devices from the same enterprise. 3 IPFIX Version Numbers Standards Action 0 Reserved 1-8 Reserved (historic) 9 Cisco Systems NetFlow Version 9 (historic) 10 IPFIX as documented in RFC5101 11-65535 Unassigned IPFIX Set IDs Standards Action 0-1 Not used (historic) 2 Template Set 3 Option Template Set 4-255 Unassigned 256-65535 Reserved for Data Sets IPFIX Information Element Data Types Standards Action 0 octetArray 1 unsigned8 2 unsigned16 3 unsigned32 4 unsigned64 5 signed8 6 signed16 7 signed32 8 signed64 9 float32 10 float64 11 boolean 12 macAddress 13 string 14 dateTimeSeconds 15 dateTimeMilliseconds 16 dateTimeMicroseconds 17 dateTimeNanoseconds 18 ipv4Address 19 ipv6Address 20 basicList 21 subTemplateList 22 subTemplateMultiList 23-255 Unassigned IPFIX Information Element Semantics Standards Action 0 default 1 quantity 2 totalCounter 3 deltaCounter 4 identifier 5 flags 6 list 7-255 Unassigned IPFIX Information Element Units Expert Review 0 none 1 bits 2 octets 3 packets 4 flows 5 seconds 6 milliseconds 7 microseconds 8 nanoseconds 9 4-octet words for IPv4 header length 10 messages for reliability reporting 11 hops for TTL 12 entries for MPLS label stack 13-65535 Unassigned IPFIX Structured Data Types Semantics Standards Action 0x00 noneOf The "noneOf" structured data type semantic specifies that none of the elements are actual properties of the Data Record. 0x01 exactlyOneOf The "exactlyOneOf" structured data type semantic specifies that only a single element from the structured data is an actual property of the Data Record. This is equivalent to a logical XOR operation. 0x02 oneOrMoreOf The "oneOrMoreOf" structured data type semantic specifies that one or more elements from the list in the structured data are actual properties of the Data Record. This is equivalent to a logical OR operation. 0x03 allOf The "allOf" structured data type semantic specifies that all of the list elements from the structured data are actual properties of the Data Record. 0x04 ordered The "ordered" structured data type semantic specifies that elements from the list in the structured data are ordered. 0x05-0xFE unassigned 0xFF undefined The "undefined" structured data type semantic specifies that the semantic of the list elements is not specified and that, if a semantic exists, then it is up to the Collecting Process to draw its own conclusions. The "undefined" structured data type semantic is the default structured data type semantic. ipfix-iana at cisco.com mailto:ipfix-iana&cisco.com 2012-09-20 openvswitch-2.0.1+git20140120/ofproto/names.c000066400000000000000000000021761226605124000204170ustar00rootroot00000000000000/* * Copyright (c) 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ofproto/ofproto.h" #include "dpif.h" /* This function is in a separate file because it is the only ofproto function * required by ovs-ofctl, which allows ovs-ofctl to avoid linking in extra * baggage. */ /* Parses 'ofproto_name', which is of the form [type@]name into component * pieces that are suitable for passing to ofproto_create(). The caller must * free 'name' and 'type'. */ void ofproto_parse_name(const char *ofproto_name, char **name, char **type) { dp_parse_name(ofproto_name, name, type); } openvswitch-2.0.1+git20140120/ofproto/netflow.c000066400000000000000000000241051226605124000207660ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "netflow.h" #include #include #include #include #include "byte-order.h" #include "collectors.h" #include "flow.h" #include "lib/netflow.h" #include "ofpbuf.h" #include "ofproto.h" #include "ofproto/netflow.h" #include "packets.h" #include "poll-loop.h" #include "socket-util.h" #include "timeval.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(netflow); struct netflow { uint8_t engine_type; /* Value of engine_type to use. */ uint8_t engine_id; /* Value of engine_id to use. */ long long int boot_time; /* Time when netflow_create() was called. */ struct collectors *collectors; /* NetFlow collectors. */ bool add_id_to_iface; /* Put the 7 least significiant bits of * 'engine_id' into the most significant * bits of the interface fields. */ uint32_t netflow_cnt; /* Flow sequence number for NetFlow. */ struct ofpbuf packet; /* NetFlow packet being accumulated. */ long long int active_timeout; /* Timeout for flows that are still active. */ long long int next_timeout; /* Next scheduled active timeout. */ long long int reconfig_time; /* When we reconfigured the timeouts. */ }; void netflow_mask_wc(struct flow *flow, struct flow_wildcards *wc) { if (flow->dl_type != htons(ETH_TYPE_IP)) { return; } memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src); memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst); memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src); memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst); wc->masks.nw_tos |= IP_DSCP_MASK; } static void gen_netflow_rec(struct netflow *nf, struct netflow_flow *nf_flow, struct ofexpired *expired, uint32_t packet_count, uint32_t byte_count) { struct netflow_v5_header *nf_hdr; struct netflow_v5_record *nf_rec; if (!nf->packet.size) { struct timespec now; time_wall_timespec(&now); nf_hdr = ofpbuf_put_zeros(&nf->packet, sizeof *nf_hdr); nf_hdr->version = htons(NETFLOW_V5_VERSION); nf_hdr->count = htons(0); nf_hdr->sysuptime = htonl(time_msec() - nf->boot_time); nf_hdr->unix_secs = htonl(now.tv_sec); nf_hdr->unix_nsecs = htonl(now.tv_nsec); nf_hdr->flow_seq = htonl(nf->netflow_cnt++); nf_hdr->engine_type = nf->engine_type; nf_hdr->engine_id = nf->engine_id; nf_hdr->sampling_interval = htons(0); } nf_hdr = nf->packet.data; nf_hdr->count = htons(ntohs(nf_hdr->count) + 1); nf_rec = ofpbuf_put_zeros(&nf->packet, sizeof *nf_rec); nf_rec->src_addr = expired->flow.nw_src; nf_rec->dst_addr = expired->flow.nw_dst; nf_rec->nexthop = htonl(0); if (nf->add_id_to_iface) { uint16_t iface = (nf->engine_id & 0x7f) << 9; nf_rec->input = htons(iface | (ofp_to_u16(expired->flow.in_port.ofp_port) & 0x1ff)); nf_rec->output = htons(iface | (ofp_to_u16(nf_flow->output_iface) & 0x1ff)); } else { nf_rec->input = htons(ofp_to_u16(expired->flow.in_port.ofp_port)); nf_rec->output = htons(ofp_to_u16(nf_flow->output_iface)); } nf_rec->packet_count = htonl(packet_count); nf_rec->byte_count = htonl(byte_count); nf_rec->init_time = htonl(nf_flow->created - nf->boot_time); nf_rec->used_time = htonl(MAX(nf_flow->created, expired->used) - nf->boot_time); if (expired->flow.nw_proto == IPPROTO_ICMP) { /* In NetFlow, the ICMP type and code are concatenated and * placed in the 'dst_port' field. */ uint8_t type = ntohs(expired->flow.tp_src); uint8_t code = ntohs(expired->flow.tp_dst); nf_rec->src_port = htons(0); nf_rec->dst_port = htons((type << 8) | code); } else { nf_rec->src_port = expired->flow.tp_src; nf_rec->dst_port = expired->flow.tp_dst; } nf_rec->tcp_flags = nf_flow->tcp_flags; nf_rec->ip_proto = expired->flow.nw_proto; nf_rec->ip_tos = expired->flow.nw_tos & IP_DSCP_MASK; /* NetFlow messages are limited to 30 records. */ if (ntohs(nf_hdr->count) >= 30) { netflow_run(nf); } } void netflow_expire(struct netflow *nf, struct netflow_flow *nf_flow, struct ofexpired *expired) { uint64_t pkt_delta = expired->packet_count - nf_flow->packet_count_off; uint64_t byte_delta = expired->byte_count - nf_flow->byte_count_off; nf_flow->last_expired += nf->active_timeout; /* NetFlow only reports on IP packets and we should only report flows * that actually have traffic. */ if (expired->flow.dl_type != htons(ETH_TYPE_IP) || pkt_delta == 0) { return; } if ((byte_delta >> 32) <= 175) { /* NetFlow v5 records are limited to 32-bit counters. If we've wrapped * a counter, send as multiple records so we don't lose track of any * traffic. We try to evenly distribute the packet and byte counters, * so that the bytes-per-packet lengths don't look wonky across the * records. */ while (byte_delta) { int n_recs = (byte_delta + UINT32_MAX - 1) / UINT32_MAX; uint32_t pkt_count = pkt_delta / n_recs; uint32_t byte_count = byte_delta / n_recs; gen_netflow_rec(nf, nf_flow, expired, pkt_count, byte_count); pkt_delta -= pkt_count; byte_delta -= byte_count; } } else { /* In 600 seconds, a 10GbE link can theoretically transmit 75 * 10**10 * == 175 * 2**32 bytes. The byte counter is bigger than that, so it's * probably a bug--for example, the netdev code uses UINT64_MAX to * report "unknown value", and perhaps that has leaked through to here. * * We wouldn't want to hit the loop above in this case, because it * would try to send up to UINT32_MAX netflow records, which would take * a long time. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); VLOG_WARN_RL(&rl, "impossible byte counter %"PRIu64, byte_delta); } /* Update flow tracking data. */ nf_flow->created = 0; nf_flow->packet_count_off = expired->packet_count; nf_flow->byte_count_off = expired->byte_count; nf_flow->tcp_flags = 0; } /* Returns true if it's time to send out a round of NetFlow active timeouts, * false otherwise. */ bool netflow_run(struct netflow *nf) { if (nf->packet.size) { collectors_send(nf->collectors, nf->packet.data, nf->packet.size); nf->packet.size = 0; } if (nf->active_timeout && time_msec() >= nf->next_timeout) { nf->next_timeout = time_msec() + 1000; return true; } else { return false; } } void netflow_wait(struct netflow *nf) { if (nf->active_timeout) { poll_timer_wait_until(nf->next_timeout); } if (nf->packet.size) { poll_immediate_wake(); } } int netflow_set_options(struct netflow *nf, const struct netflow_options *nf_options) { int error = 0; long long int old_timeout; nf->engine_type = nf_options->engine_type; nf->engine_id = nf_options->engine_id; nf->add_id_to_iface = nf_options->add_id_to_iface; collectors_destroy(nf->collectors); collectors_create(&nf_options->collectors, 0, &nf->collectors); old_timeout = nf->active_timeout; if (nf_options->active_timeout >= 0) { nf->active_timeout = nf_options->active_timeout; } else { nf->active_timeout = NF_ACTIVE_TIMEOUT_DEFAULT; } nf->active_timeout *= 1000; if (old_timeout != nf->active_timeout) { nf->reconfig_time = time_msec(); nf->next_timeout = time_msec(); } return error; } struct netflow * netflow_create(void) { struct netflow *nf = xzalloc(sizeof *nf); nf->engine_type = 0; nf->engine_id = 0; nf->boot_time = time_msec(); nf->collectors = NULL; nf->add_id_to_iface = false; nf->netflow_cnt = 0; ofpbuf_init(&nf->packet, 1500); return nf; } void netflow_destroy(struct netflow *nf) { if (nf) { ofpbuf_uninit(&nf->packet); collectors_destroy(nf->collectors); free(nf); } } /* Initializes a new 'nf_flow' given that the caller has already cleared it to * all-zero-bits. */ void netflow_flow_init(struct netflow_flow *nf_flow OVS_UNUSED) { /* Nothing to do. */ } void netflow_flow_clear(struct netflow_flow *nf_flow) { ofp_port_t output_iface = nf_flow->output_iface; memset(nf_flow, 0, sizeof *nf_flow); nf_flow->output_iface = output_iface; } void netflow_flow_update_time(struct netflow *nf, struct netflow_flow *nf_flow, long long int used) { if (!nf_flow->created) { nf_flow->created = used; } if (!nf || !nf->active_timeout || !nf_flow->last_expired || nf->reconfig_time > nf_flow->last_expired) { /* Keep the time updated to prevent a flood of expiration in * the future. */ nf_flow->last_expired = time_msec(); } } void netflow_flow_update_flags(struct netflow_flow *nf_flow, uint8_t tcp_flags) { nf_flow->tcp_flags |= tcp_flags; } bool netflow_active_timeout_expired(struct netflow *nf, struct netflow_flow *nf_flow) { if (nf->active_timeout) { return time_msec() > nf_flow->last_expired + nf->active_timeout; } return false; } openvswitch-2.0.1+git20140120/ofproto/netflow.h000066400000000000000000000047621226605124000210020ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OFPROTO_NETFLOW_H #define OFPROTO_NETFLOW_H 1 #include #include "flow.h" #include "sset.h" /* Default active timeout interval, in seconds. * * (The active timeout interval is the interval at which NetFlow records are * sent for flows that do not expire, so that such flows are still * accounted.) */ #define NF_ACTIVE_TIMEOUT_DEFAULT 600 struct ofexpired; struct netflow_options { struct sset collectors; uint8_t engine_type; uint8_t engine_id; int active_timeout; bool add_id_to_iface; }; #define NF_OUT_FLOOD OFP_PORT_C(UINT16_MAX) #define NF_OUT_MULTI OFP_PORT_C(UINT16_MAX - 1) #define NF_OUT_DROP OFP_PORT_C(UINT16_MAX - 2) struct netflow_flow { long long int last_expired; /* Time this flow last timed out. */ long long int created; /* Time flow was created since time out. */ uint64_t packet_count_off; /* Packet count at last time out. */ uint64_t byte_count_off; /* Byte count at last time out. */ ofp_port_t output_iface; /* Output interface index. */ uint8_t tcp_flags; /* Bitwise-OR of all TCP flags seen. */ }; struct netflow *netflow_create(void); void netflow_destroy(struct netflow *); int netflow_set_options(struct netflow *, const struct netflow_options *); void netflow_expire(struct netflow *, struct netflow_flow *, struct ofexpired *); bool netflow_run(struct netflow *); void netflow_wait(struct netflow *); void netflow_mask_wc(struct flow *, struct flow_wildcards *); void netflow_flow_init(struct netflow_flow *); void netflow_flow_clear(struct netflow_flow *); void netflow_flow_update_time(struct netflow *, struct netflow_flow *, long long int used); void netflow_flow_update_flags(struct netflow_flow *, uint8_t tcp_flags); bool netflow_active_timeout_expired(struct netflow *, struct netflow_flow *); #endif /* netflow.h */ openvswitch-2.0.1+git20140120/ofproto/ofproto-dpif-governor.c000066400000000000000000000151211226605124000235550ustar00rootroot00000000000000/* * Copyright (c) 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ofproto-dpif-governor.h" #include #include "coverage.h" #include "poll-loop.h" #include "random.h" #include "timeval.h" #include "util.h" #include "valgrind.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ofproto_dpif_governor); /* Minimum number of observed packets before setting up a flow. * * This value seems OK empirically. */ #define FLOW_SETUP_THRESHOLD 5 BUILD_ASSERT_DECL(FLOW_SETUP_THRESHOLD > 1); BUILD_ASSERT_DECL(FLOW_SETUP_THRESHOLD < 16); /* Minimum and maximum size of a governor, in bytes. */ enum { MIN_SIZE = 16 * 1024 }; enum { MAX_SIZE = 256 * 1024 }; BUILD_ASSERT_DECL(IS_POW2(MIN_SIZE)); BUILD_ASSERT_DECL(IS_POW2(MAX_SIZE)); /* Minimum and maximum time to process the number of packets that make up a * given generation. If a generation completes faster than the minimum time, * we double the table size (but no more than MAX_SIZE). If a generation take * more than the maximum time to complete, we halve the table size (but no * smaller than MIN_SIZE). */ enum { MIN_ELAPSED = 1000 }; /* In milliseconds. */ enum { MAX_ELAPSED = 5000 }; /* In milliseconds. */ static void governor_new_generation(struct governor *, unsigned int size); /* Creates and returns a new governor. */ struct governor * governor_create(void) { struct governor *g = xzalloc(sizeof *g); governor_new_generation(g, MIN_SIZE); return g; } /* Destroys 'g'. */ void governor_destroy(struct governor *g) { if (g) { VLOG_INFO("disengaging"); free(g->table); free(g); } } /* Performs periodic maintenance work on 'g'. */ void governor_run(struct governor *g) { if (time_msec() - g->start > MAX_ELAPSED) { if (g->size > MIN_SIZE) { governor_new_generation(g, g->size / 2); } else { /* Don't start a new generation (we'd never go idle). */ } } } /* Arranges for the poll loop to wake up when 'g' needs to do some work. */ void governor_wait(struct governor *g) { if (g->size > MIN_SIZE) { poll_timer_wait_until(g->start + MAX_ELAPSED); } } /* Returns true if 'g' has been doing only a minimal amount of work and thus * the client should consider getting rid of it entirely. */ bool governor_is_idle(struct governor *g) { return g->size == MIN_SIZE && time_msec() - g->start > MAX_ELAPSED; } /* Tests whether a flow whose hash is 'hash' and for which 'n' packets have * just arrived should be set up in the datapath or just processed on a * packet-by-packet basis. Returns true to set up a datapath flow, false to * process the packets individually. * * One would expect 'n' to ordinarily be 1, if batching leads multiple packets * to be processed at a time then it could be greater. */ bool governor_should_install_flow(struct governor *g, uint32_t hash, int n) { int old_count, new_count; bool install_flow; uint8_t *e; ovs_assert(n > 0); /* Count these packets and begin a new generation if necessary. */ g->n_packets += n; if (g->n_packets >= g->size / 4) { unsigned int new_size; long long elapsed; elapsed = time_msec() - g->start; new_size = (elapsed < MIN_ELAPSED && g->size < MAX_SIZE ? g->size * 2 : elapsed > MAX_ELAPSED && g->size > MIN_SIZE ? g->size / 2 : g->size); governor_new_generation(g, new_size); } /* If we've set up most of the flows we've seen, then we're wasting time * handling most packets one at a time, so in this case instead set up most * flows directly and use the remaining flows as a sample set to adjust our * criteria later. * * The definition of "most" is conservative, but the sample size is tuned * based on a few experiments with TCP_CRR mode in netperf. */ if (g->n_setups >= g->n_flows - g->n_flows / 16 && g->n_flows >= 64 && hash & 0x3f) { g->n_shortcuts++; return true; } /* Do hash table processing. * * Even-numbered hash values use high-order nibbles. * Odd-numbered hash values use low-order nibbles. */ e = &g->table[(hash >> 1) & (g->size - 1)]; old_count = (hash & 1 ? *e >> 4 : *e & 0x0f); if (!old_count) { g->n_flows++; } new_count = n + old_count; if (new_count >= FLOW_SETUP_THRESHOLD) { g->n_setups++; install_flow = true; new_count = 0; } else { install_flow = false; } *e = hash & 1 ? (new_count << 4) | (*e & 0x0f) : (*e & 0xf0) | new_count; return install_flow; } /* Starts a new generation in 'g' with a table size of 'size' bytes. 'size' * must be a power of two between MIN_SIZE and MAX_SIZE, inclusive. */ static void governor_new_generation(struct governor *g, unsigned int size) { ovs_assert(size >= MIN_SIZE && size <= MAX_SIZE); ovs_assert(is_pow2(size)); /* Allocate new table, if necessary. */ if (g->size != size) { if (!g->size) { VLOG_INFO("engaging governor with %u kB hash table", size / 1024); } else { VLOG_INFO("processed %u packets in %.2f s, " "%s hash table to %u kB " "(%u hashes, %u setups, %u shortcuts)", g->n_packets, (time_msec() - g->start) / 1000.0, size > g->size ? "enlarging" : "shrinking", size / 1024, g->n_flows, g->n_setups, g->n_shortcuts); } free(g->table); g->table = xmalloc(size * sizeof *g->table); g->size = size; } else { VLOG_DBG("processed %u packets in %.2f s with %u kB hash table " "(%u hashes, %u setups, %u shortcuts)", g->n_packets, (time_msec() - g->start) / 1000.0, size / 1024, g->n_flows, g->n_setups, g->n_shortcuts); } /* Clear data for next generation. */ memset(g->table, 0, size * sizeof *g->table); g->start = time_msec(); g->n_packets = 0; g->n_flows /= 2; g->n_setups /= 2; g->n_shortcuts = 0; } openvswitch-2.0.1+git20140120/ofproto/ofproto-dpif-governor.h000066400000000000000000000043761226605124000235740ustar00rootroot00000000000000/* * Copyright (c) 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OFPROTO_DPIF_GOVERNOR_H #define OFPROTO_DPIF_GOVERNOR_H 1 /* Flow setup rate limiter. * * A governor in an engine limits a vehicle's speed. This governor limits the * rate at which flows are set up in the datapath. The client provides as * input the hashes of observed packets. The governor keeps track of hashes * seen multiple times. When a given hash is seen often enough, the governor * indicates to its client that it should set up a facet and a subfacet and a * datapath flow for that flow. * * The same tracking could be done in terms of facets and subfacets directly, * but the governor code uses much less time and space to do the same job. */ #include #include struct governor { char *name; /* Name, for log messages. */ uint8_t *table; /* Table of counters, two per byte. */ unsigned int size; /* Table size in bytes. */ long long int start; /* Time when the table was last cleared. */ unsigned int n_packets; /* Number of packets processed. */ /* Statistics for skipping counters when most flows get set up. */ unsigned int n_flows; /* Number of unique flows seen. */ unsigned int n_setups; /* Number of flows set up based on counters. */ unsigned int n_shortcuts; /* Number of flows set up based on history. */ }; struct governor *governor_create(void); void governor_destroy(struct governor *); void governor_run(struct governor *); void governor_wait(struct governor *); bool governor_is_idle(struct governor *); bool governor_should_install_flow(struct governor *, uint32_t hash, int n); #endif /* ofproto/ofproto-dpif-governor.h */ openvswitch-2.0.1+git20140120/ofproto/ofproto-dpif-ipfix.c000066400000000000000000001357041226605124000230450ustar00rootroot00000000000000/* * Copyright (c) 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ofproto-dpif-ipfix.h" #include #include "byte-order.h" #include "collectors.h" #include "flow.h" #include "hash.h" #include "hmap.h" #include "list.h" #include "ofpbuf.h" #include "ofproto.h" #include "packets.h" #include "poll-loop.h" #include "sset.h" #include "util.h" #include "timeval.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ipfix); static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; /* Cf. IETF RFC 5101 Section 10.3.4. */ #define IPFIX_DEFAULT_COLLECTOR_PORT 4739 struct dpif_ipfix_exporter { struct collectors *collectors; uint32_t seq_number; time_t last_template_set_time; struct hmap cache_flow_key_map; /* ipfix_flow_cache_entry. */ struct list cache_flow_start_timestamp_list; /* ipfix_flow_cache_entry. */ uint32_t cache_active_timeout; /* In seconds. */ uint32_t cache_max_flows; }; struct dpif_ipfix_bridge_exporter { struct dpif_ipfix_exporter exporter; struct ofproto_ipfix_bridge_exporter_options *options; uint32_t probability; }; struct dpif_ipfix_flow_exporter { struct dpif_ipfix_exporter exporter; struct ofproto_ipfix_flow_exporter_options *options; }; struct dpif_ipfix_flow_exporter_map_node { struct hmap_node node; struct dpif_ipfix_flow_exporter exporter; }; struct dpif_ipfix { struct dpif_ipfix_bridge_exporter bridge_exporter; struct hmap flow_exporter_map; /* dpif_ipfix_flow_exporter_map_node. */ atomic_int ref_cnt; }; #define IPFIX_VERSION 0x000a /* When using UDP, IPFIX Template Records must be re-sent regularly. * The standard default interval is 10 minutes (600 seconds). * Cf. IETF RFC 5101 Section 10.3.6. */ #define IPFIX_TEMPLATE_INTERVAL 600 /* Cf. IETF RFC 5101 Section 3.1. */ OVS_PACKED( struct ipfix_header { ovs_be16 version; /* IPFIX_VERSION. */ ovs_be16 length; /* Length in bytes including this header. */ ovs_be32 export_time; /* Seconds since the epoch. */ ovs_be32 seq_number; /* Message sequence number. */ ovs_be32 obs_domain_id; /* Observation Domain ID. */ }); BUILD_ASSERT_DECL(sizeof(struct ipfix_header) == 16); #define IPFIX_SET_ID_TEMPLATE 2 #define IPFIX_SET_ID_OPTION_TEMPLATE 3 /* Cf. IETF RFC 5101 Section 3.3.2. */ OVS_PACKED( struct ipfix_set_header { ovs_be16 set_id; /* IPFIX_SET_ID_* or valid template ID for Data Sets. */ ovs_be16 length; /* Length of the set in bytes including header. */ }); BUILD_ASSERT_DECL(sizeof(struct ipfix_set_header) == 4); /* Alternatives for templates at each layer. A template is defined by * a combination of one value for each layer. */ enum ipfix_proto_l2 { IPFIX_PROTO_L2_ETH = 0, /* No VLAN. */ IPFIX_PROTO_L2_VLAN, NUM_IPFIX_PROTO_L2 }; enum ipfix_proto_l3 { IPFIX_PROTO_L3_UNKNOWN = 0, IPFIX_PROTO_L3_IPV4, IPFIX_PROTO_L3_IPV6, NUM_IPFIX_PROTO_L3 }; enum ipfix_proto_l4 { IPFIX_PROTO_L4_UNKNOWN = 0, IPFIX_PROTO_L4_TCP_UDP, NUM_IPFIX_PROTO_L4 }; /* Any Template ID > 255 is usable for Template Records. */ #define IPFIX_TEMPLATE_ID_MIN 256 /* Cf. IETF RFC 5101 Section 3.4.1. */ OVS_PACKED( struct ipfix_template_record_header { ovs_be16 template_id; ovs_be16 field_count; }); BUILD_ASSERT_DECL(sizeof(struct ipfix_template_record_header) == 4); enum ipfix_entity_id { #define IPFIX_ENTITY(ENUM, ID, SIZE, NAME) IPFIX_ENTITY_ID_##ENUM = ID, #include "ofproto/ipfix-entities.def" }; enum ipfix_entity_size { #define IPFIX_ENTITY(ENUM, ID, SIZE, NAME) IPFIX_ENTITY_SIZE_##ENUM = SIZE, #include "ofproto/ipfix-entities.def" }; OVS_PACKED( struct ipfix_template_field_specifier { ovs_be16 element_id; /* IPFIX_ENTITY_ID_*. */ ovs_be16 field_length; /* Length of the field's value, in bytes. */ /* No Enterprise ID, since only standard element IDs are specified. */ }); BUILD_ASSERT_DECL(sizeof(struct ipfix_template_field_specifier) == 4); /* Part of data record flow key for common metadata and Ethernet entities. */ OVS_PACKED( struct ipfix_data_record_flow_key_common { ovs_be32 observation_point_id; /* OBSERVATION_POINT_ID */ uint8_t source_mac_address[6]; /* SOURCE_MAC_ADDRESS */ uint8_t destination_mac_address[6]; /* DESTINATION_MAC_ADDRESS */ ovs_be16 ethernet_type; /* ETHERNET_TYPE */ uint8_t ethernet_header_length; /* ETHERNET_HEADER_LENGTH */ }); BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_common) == 19); /* Part of data record flow key for VLAN entities. */ OVS_PACKED( struct ipfix_data_record_flow_key_vlan { ovs_be16 vlan_id; /* VLAN_ID */ ovs_be16 dot1q_vlan_id; /* DOT1Q_VLAN_ID */ uint8_t dot1q_priority; /* DOT1Q_PRIORITY */ }); BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_vlan) == 5); /* Part of data record flow key for IP entities. */ /* XXX: Replace IP_TTL with MINIMUM_TTL and MAXIMUM_TTL? */ OVS_PACKED( struct ipfix_data_record_flow_key_ip { uint8_t ip_version; /* IP_VERSION */ uint8_t ip_ttl; /* IP_TTL */ uint8_t protocol_identifier; /* PROTOCOL_IDENTIFIER */ uint8_t ip_diff_serv_code_point; /* IP_DIFF_SERV_CODE_POINT */ uint8_t ip_precedence; /* IP_PRECEDENCE */ uint8_t ip_class_of_service; /* IP_CLASS_OF_SERVICE */ }); BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_ip) == 6); /* Part of data record flow key for IPv4 entities. */ OVS_PACKED( struct ipfix_data_record_flow_key_ipv4 { ovs_be32 source_ipv4_address; /* SOURCE_IPV4_ADDRESS */ ovs_be32 destination_ipv4_address; /* DESTINATION_IPV4_ADDRESS */ }); BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_ipv4) == 8); /* Part of data record flow key for IPv6 entities. */ OVS_PACKED( struct ipfix_data_record_flow_key_ipv6 { uint8_t source_ipv6_address[16]; /* SOURCE_IPV6_ADDRESS */ uint8_t destination_ipv6_address[16]; /* DESTINATION_IPV6_ADDRESS */ ovs_be32 flow_label_ipv6; /* FLOW_LABEL_IPV6 */ }); BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_ipv6) == 36); /* Part of data record flow key for TCP/UDP entities. */ OVS_PACKED( struct ipfix_data_record_flow_key_tcpudp { ovs_be16 source_transport_port; /* SOURCE_TRANSPORT_PORT */ ovs_be16 destination_transport_port; /* DESTINATION_TRANSPORT_PORT */ }); BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_tcpudp) == 4); /* Cf. IETF RFC 5102 Section 5.11.3. */ enum ipfix_flow_end_reason { IDLE_TIMEOUT = 0x01, ACTIVE_TIMEOUT = 0x02, END_OF_FLOW_DETECTED = 0x03, FORCED_END = 0x04, LACK_OF_RESOURCES = 0x05 }; /* Part of data record for common aggregated elements. */ OVS_PACKED( struct ipfix_data_record_aggregated_common { ovs_be32 flow_start_delta_microseconds; /* FLOW_START_DELTA_MICROSECONDS */ ovs_be32 flow_end_delta_microseconds; /* FLOW_END_DELTA_MICROSECONDS */ ovs_be64 packet_delta_count; /* PACKET_DELTA_COUNT */ ovs_be64 layer2_octet_delta_count; /* LAYER2_OCTET_DELTA_COUNT */ uint8_t flow_end_reason; /* FLOW_END_REASON */ }); BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_aggregated_common) == 25); /* Part of data record for IP aggregated elements. */ OVS_PACKED( struct ipfix_data_record_aggregated_ip { ovs_be64 octet_delta_sum_of_squares; /* OCTET_DELTA_SUM_OF_SQUARES */ ovs_be64 minimum_ip_total_length; /* MINIMUM_IP_TOTAL_LENGTH */ ovs_be64 maximum_ip_total_length; /* MAXIMUM_IP_TOTAL_LENGTH */ }); BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_aggregated_ip) == 24); #define MAX_FLOW_KEY_LEN \ (sizeof(struct ipfix_data_record_flow_key_common) \ + sizeof(struct ipfix_data_record_flow_key_vlan) \ + sizeof(struct ipfix_data_record_flow_key_ip) \ + sizeof(struct ipfix_data_record_flow_key_ipv6) \ + sizeof(struct ipfix_data_record_flow_key_tcpudp)) #define MAX_DATA_RECORD_LEN \ (MAX_FLOW_KEY_LEN \ + sizeof(struct ipfix_data_record_aggregated_common) \ + sizeof(struct ipfix_data_record_aggregated_ip)) /* Max length of a data set. To simplify the implementation, each * data record is sent in a separate data set, so each data set * contains at most one data record. */ #define MAX_DATA_SET_LEN \ (sizeof(struct ipfix_set_header) \ + MAX_DATA_RECORD_LEN) /* Max length of an IPFIX message. Arbitrarily set to accomodate low * MTU. */ #define MAX_MESSAGE_LEN 1024 /* Cache structures. */ /* Flow key. */ struct ipfix_flow_key { uint32_t obs_domain_id; uint16_t template_id; size_t flow_key_msg_part_size; uint64_t flow_key_msg_part[DIV_ROUND_UP(MAX_FLOW_KEY_LEN, 8)]; }; /* Flow cache entry. */ struct ipfix_flow_cache_entry { struct hmap_node flow_key_map_node; struct list cache_flow_start_timestamp_list_node; struct ipfix_flow_key flow_key; /* Common aggregated elements. */ uint64_t flow_start_timestamp_usec; uint64_t flow_end_timestamp_usec; uint64_t packet_delta_count; uint64_t layer2_octet_delta_count; uint64_t octet_delta_sum_of_squares; /* 0 if not IP. */ uint16_t minimum_ip_total_length; /* 0 if not IP. */ uint16_t maximum_ip_total_length; /* 0 if not IP. */ }; static void dpif_ipfix_cache_expire(struct dpif_ipfix_exporter *, bool, const uint64_t, const uint32_t); static void get_export_time_now(uint64_t *, uint32_t *); static void dpif_ipfix_cache_expire_now(struct dpif_ipfix_exporter *, bool); static bool ofproto_ipfix_bridge_exporter_options_equal( const struct ofproto_ipfix_bridge_exporter_options *a, const struct ofproto_ipfix_bridge_exporter_options *b) { return (a->obs_domain_id == b->obs_domain_id && a->obs_point_id == b->obs_point_id && a->sampling_rate == b->sampling_rate && a->cache_active_timeout == b->cache_active_timeout && a->cache_max_flows == b->cache_max_flows && sset_equals(&a->targets, &b->targets)); } static struct ofproto_ipfix_bridge_exporter_options * ofproto_ipfix_bridge_exporter_options_clone( const struct ofproto_ipfix_bridge_exporter_options *old) { struct ofproto_ipfix_bridge_exporter_options *new = xmemdup(old, sizeof *old); sset_clone(&new->targets, &old->targets); return new; } static void ofproto_ipfix_bridge_exporter_options_destroy( struct ofproto_ipfix_bridge_exporter_options *options) { if (options) { sset_destroy(&options->targets); free(options); } } static bool ofproto_ipfix_flow_exporter_options_equal( const struct ofproto_ipfix_flow_exporter_options *a, const struct ofproto_ipfix_flow_exporter_options *b) { return (a->collector_set_id == b->collector_set_id && a->cache_active_timeout == b->cache_active_timeout && a->cache_max_flows == b->cache_max_flows && sset_equals(&a->targets, &b->targets)); } static struct ofproto_ipfix_flow_exporter_options * ofproto_ipfix_flow_exporter_options_clone( const struct ofproto_ipfix_flow_exporter_options *old) { struct ofproto_ipfix_flow_exporter_options *new = xmemdup(old, sizeof *old); sset_clone(&new->targets, &old->targets); return new; } static void ofproto_ipfix_flow_exporter_options_destroy( struct ofproto_ipfix_flow_exporter_options *options) { if (options) { sset_destroy(&options->targets); free(options); } } static void dpif_ipfix_exporter_init(struct dpif_ipfix_exporter *exporter) { exporter->collectors = NULL; exporter->seq_number = 1; exporter->last_template_set_time = TIME_MIN; hmap_init(&exporter->cache_flow_key_map); list_init(&exporter->cache_flow_start_timestamp_list); exporter->cache_active_timeout = 0; exporter->cache_max_flows = 0; } static void dpif_ipfix_exporter_clear(struct dpif_ipfix_exporter *exporter) { /* Flush the cache with flow end reason "forced end." */ dpif_ipfix_cache_expire_now(exporter, true); collectors_destroy(exporter->collectors); exporter->collectors = NULL; exporter->seq_number = 1; exporter->last_template_set_time = TIME_MIN; exporter->cache_active_timeout = 0; exporter->cache_max_flows = 0; } static void dpif_ipfix_exporter_destroy(struct dpif_ipfix_exporter *exporter) { dpif_ipfix_exporter_clear(exporter); hmap_destroy(&exporter->cache_flow_key_map); } static bool dpif_ipfix_exporter_set_options(struct dpif_ipfix_exporter *exporter, const struct sset *targets, const uint32_t cache_active_timeout, const uint32_t cache_max_flows) { collectors_destroy(exporter->collectors); collectors_create(targets, IPFIX_DEFAULT_COLLECTOR_PORT, &exporter->collectors); if (exporter->collectors == NULL) { VLOG_WARN_RL(&rl, "no collectors could be initialized, " "IPFIX exporter disabled"); dpif_ipfix_exporter_clear(exporter); return false; } exporter->cache_active_timeout = cache_active_timeout; exporter->cache_max_flows = cache_max_flows; return true; } static void dpif_ipfix_bridge_exporter_init(struct dpif_ipfix_bridge_exporter *exporter) { dpif_ipfix_exporter_init(&exporter->exporter); exporter->options = NULL; exporter->probability = 0; } static void dpif_ipfix_bridge_exporter_clear(struct dpif_ipfix_bridge_exporter *exporter) { dpif_ipfix_exporter_clear(&exporter->exporter); ofproto_ipfix_bridge_exporter_options_destroy(exporter->options); exporter->options = NULL; exporter->probability = 0; } static void dpif_ipfix_bridge_exporter_destroy(struct dpif_ipfix_bridge_exporter *exporter) { dpif_ipfix_bridge_exporter_clear(exporter); dpif_ipfix_exporter_destroy(&exporter->exporter); } static void dpif_ipfix_bridge_exporter_set_options( struct dpif_ipfix_bridge_exporter *exporter, const struct ofproto_ipfix_bridge_exporter_options *options) { bool options_changed; if (!options || sset_is_empty(&options->targets)) { /* No point in doing any work if there are no targets. */ dpif_ipfix_bridge_exporter_clear(exporter); return; } options_changed = ( !exporter->options || !ofproto_ipfix_bridge_exporter_options_equal( options, exporter->options)); /* Configure collectors if options have changed or if we're * shortchanged in collectors (which indicates that opening one or * more of the configured collectors failed, so that we should * retry). */ if (options_changed || collectors_count(exporter->exporter.collectors) < sset_count(&options->targets)) { if (!dpif_ipfix_exporter_set_options( &exporter->exporter, &options->targets, options->cache_active_timeout, options->cache_max_flows)) { return; } } /* Avoid reconfiguring if options didn't change. */ if (!options_changed) { return; } ofproto_ipfix_bridge_exporter_options_destroy(exporter->options); exporter->options = ofproto_ipfix_bridge_exporter_options_clone(options); exporter->probability = MAX(1, UINT32_MAX / exporter->options->sampling_rate); /* Run over the cache as some entries might have expired after * changing the timeouts. */ dpif_ipfix_cache_expire_now(&exporter->exporter, false); } static struct dpif_ipfix_flow_exporter_map_node* dpif_ipfix_find_flow_exporter_map_node( const struct dpif_ipfix *di, const uint32_t collector_set_id) OVS_REQUIRES(mutex) { struct dpif_ipfix_flow_exporter_map_node *exporter_node; HMAP_FOR_EACH_WITH_HASH (exporter_node, node, hash_int(collector_set_id, 0), &di->flow_exporter_map) { if (exporter_node->exporter.options->collector_set_id == collector_set_id) { return exporter_node; } } return NULL; } static void dpif_ipfix_flow_exporter_init(struct dpif_ipfix_flow_exporter *exporter) { dpif_ipfix_exporter_init(&exporter->exporter); exporter->options = NULL; } static void dpif_ipfix_flow_exporter_clear(struct dpif_ipfix_flow_exporter *exporter) { dpif_ipfix_exporter_clear(&exporter->exporter); ofproto_ipfix_flow_exporter_options_destroy(exporter->options); exporter->options = NULL; } static void dpif_ipfix_flow_exporter_destroy(struct dpif_ipfix_flow_exporter *exporter) { dpif_ipfix_flow_exporter_clear(exporter); dpif_ipfix_exporter_destroy(&exporter->exporter); } static bool dpif_ipfix_flow_exporter_set_options( struct dpif_ipfix_flow_exporter *exporter, const struct ofproto_ipfix_flow_exporter_options *options) { bool options_changed; if (sset_is_empty(&options->targets)) { /* No point in doing any work if there are no targets. */ dpif_ipfix_flow_exporter_clear(exporter); return true; } options_changed = ( !exporter->options || !ofproto_ipfix_flow_exporter_options_equal( options, exporter->options)); /* Configure collectors if options have changed or if we're * shortchanged in collectors (which indicates that opening one or * more of the configured collectors failed, so that we should * retry). */ if (options_changed || collectors_count(exporter->exporter.collectors) < sset_count(&options->targets)) { if (!dpif_ipfix_exporter_set_options( &exporter->exporter, &options->targets, options->cache_active_timeout, options->cache_max_flows)) { return false; } } /* Avoid reconfiguring if options didn't change. */ if (!options_changed) { return true; } ofproto_ipfix_flow_exporter_options_destroy(exporter->options); exporter->options = ofproto_ipfix_flow_exporter_options_clone(options); /* Run over the cache as some entries might have expired after * changing the timeouts. */ dpif_ipfix_cache_expire_now(&exporter->exporter, false); return true; } void dpif_ipfix_set_options( struct dpif_ipfix *di, const struct ofproto_ipfix_bridge_exporter_options *bridge_exporter_options, const struct ofproto_ipfix_flow_exporter_options *flow_exporters_options, size_t n_flow_exporters_options) OVS_EXCLUDED(mutex) { int i; struct ofproto_ipfix_flow_exporter_options *options; struct dpif_ipfix_flow_exporter_map_node *node, *next; size_t n_broken_flow_exporters_options = 0; ovs_mutex_lock(&mutex); dpif_ipfix_bridge_exporter_set_options(&di->bridge_exporter, bridge_exporter_options); /* Add new flow exporters and update current flow exporters. */ options = (struct ofproto_ipfix_flow_exporter_options *) flow_exporters_options; for (i = 0; i < n_flow_exporters_options; i++) { node = dpif_ipfix_find_flow_exporter_map_node( di, options->collector_set_id); if (!node) { node = xzalloc(sizeof *node); dpif_ipfix_flow_exporter_init(&node->exporter); hmap_insert(&di->flow_exporter_map, &node->node, hash_int(options->collector_set_id, 0)); } if (!dpif_ipfix_flow_exporter_set_options(&node->exporter, options)) { n_broken_flow_exporters_options++; } options++; } ovs_assert(hmap_count(&di->flow_exporter_map) >= (n_flow_exporters_options - n_broken_flow_exporters_options)); /* Remove dropped flow exporters, if any needs to be removed. */ if (hmap_count(&di->flow_exporter_map) > n_flow_exporters_options) { HMAP_FOR_EACH_SAFE (node, next, node, &di->flow_exporter_map) { /* This is slow but doesn't take any extra memory, and * this table is not supposed to contain many rows anyway. */ options = (struct ofproto_ipfix_flow_exporter_options *) flow_exporters_options; for (i = 0; i < n_flow_exporters_options; i++) { if (node->exporter.options->collector_set_id == options->collector_set_id) { break; } options++; } if (i == n_flow_exporters_options) { // Not found. hmap_remove(&di->flow_exporter_map, &node->node); dpif_ipfix_flow_exporter_destroy(&node->exporter); free(node); } } } ovs_assert(hmap_count(&di->flow_exporter_map) == (n_flow_exporters_options - n_broken_flow_exporters_options)); ovs_mutex_unlock(&mutex); } struct dpif_ipfix * dpif_ipfix_create(void) { struct dpif_ipfix *di; di = xzalloc(sizeof *di); dpif_ipfix_bridge_exporter_init(&di->bridge_exporter); hmap_init(&di->flow_exporter_map); atomic_init(&di->ref_cnt, 1); return di; } struct dpif_ipfix * dpif_ipfix_ref(const struct dpif_ipfix *di_) { struct dpif_ipfix *di = CONST_CAST(struct dpif_ipfix *, di_); if (di) { int orig; atomic_add(&di->ref_cnt, 1, &orig); ovs_assert(orig > 0); } return di; } uint32_t dpif_ipfix_get_bridge_exporter_probability(const struct dpif_ipfix *di) OVS_EXCLUDED(mutex) { uint32_t ret; ovs_mutex_lock(&mutex); ret = di->bridge_exporter.probability; ovs_mutex_unlock(&mutex); return ret; } static void dpif_ipfix_clear(struct dpif_ipfix *di) OVS_REQUIRES(mutex) { struct dpif_ipfix_flow_exporter_map_node *exp_node, *exp_next; dpif_ipfix_bridge_exporter_clear(&di->bridge_exporter); HMAP_FOR_EACH_SAFE (exp_node, exp_next, node, &di->flow_exporter_map) { hmap_remove(&di->flow_exporter_map, &exp_node->node); dpif_ipfix_flow_exporter_destroy(&exp_node->exporter); free(exp_node); } } void dpif_ipfix_unref(struct dpif_ipfix *di) OVS_EXCLUDED(mutex) { int orig; if (!di) { return; } atomic_sub(&di->ref_cnt, 1, &orig); ovs_assert(orig > 0); if (orig == 1) { ovs_mutex_lock(&mutex); dpif_ipfix_clear(di); dpif_ipfix_bridge_exporter_destroy(&di->bridge_exporter); hmap_destroy(&di->flow_exporter_map); free(di); ovs_mutex_unlock(&mutex); } } static void ipfix_init_header(uint32_t export_time_sec, uint32_t seq_number, uint32_t obs_domain_id, struct ofpbuf *msg) { struct ipfix_header *hdr; hdr = ofpbuf_put_zeros(msg, sizeof *hdr); hdr->version = htons(IPFIX_VERSION); hdr->length = htons(sizeof *hdr); /* Updated in ipfix_send_msg. */ hdr->export_time = htonl(export_time_sec); hdr->seq_number = htonl(seq_number); hdr->obs_domain_id = htonl(obs_domain_id); } static void ipfix_send_msg(const struct collectors *collectors, struct ofpbuf *msg) { struct ipfix_header *hdr; /* Adjust the length in the header. */ hdr = msg->data; hdr->length = htons(msg->size); collectors_send(collectors, msg->data, msg->size); msg->size = 0; } static uint16_t ipfix_get_template_id(enum ipfix_proto_l2 l2, enum ipfix_proto_l3 l3, enum ipfix_proto_l4 l4) { uint16_t template_id; template_id = l2; template_id = template_id * NUM_IPFIX_PROTO_L3 + l3; template_id = template_id * NUM_IPFIX_PROTO_L4 + l4; return IPFIX_TEMPLATE_ID_MIN + template_id; } static void ipfix_define_template_entity(enum ipfix_entity_id id, enum ipfix_entity_size size, struct ofpbuf *msg) { struct ipfix_template_field_specifier *field; field = ofpbuf_put_zeros(msg, sizeof *field); field->element_id = htons(id); field->field_length = htons(size); } static uint16_t ipfix_define_template_fields(enum ipfix_proto_l2 l2, enum ipfix_proto_l3 l3, enum ipfix_proto_l4 l4, struct ofpbuf *msg) { uint16_t count = 0; #define DEF(ID) \ { \ ipfix_define_template_entity(IPFIX_ENTITY_ID_##ID, \ IPFIX_ENTITY_SIZE_##ID, msg); \ count++; \ } /* 1. Flow key. */ DEF(OBSERVATION_POINT_ID); /* Common Ethernet entities. */ DEF(SOURCE_MAC_ADDRESS); DEF(DESTINATION_MAC_ADDRESS); DEF(ETHERNET_TYPE); DEF(ETHERNET_HEADER_LENGTH); if (l2 == IPFIX_PROTO_L2_VLAN) { DEF(VLAN_ID); DEF(DOT1Q_VLAN_ID); DEF(DOT1Q_PRIORITY); } if (l3 != IPFIX_PROTO_L3_UNKNOWN) { DEF(IP_VERSION); DEF(IP_TTL); DEF(PROTOCOL_IDENTIFIER); DEF(IP_DIFF_SERV_CODE_POINT); DEF(IP_PRECEDENCE); DEF(IP_CLASS_OF_SERVICE); if (l3 == IPFIX_PROTO_L3_IPV4) { DEF(SOURCE_IPV4_ADDRESS); DEF(DESTINATION_IPV4_ADDRESS); } else { /* l3 == IPFIX_PROTO_L3_IPV6 */ DEF(SOURCE_IPV6_ADDRESS); DEF(DESTINATION_IPV6_ADDRESS); DEF(FLOW_LABEL_IPV6); } } if (l4 != IPFIX_PROTO_L4_UNKNOWN) { DEF(SOURCE_TRANSPORT_PORT); DEF(DESTINATION_TRANSPORT_PORT); } /* 2. Flow aggregated data. */ DEF(FLOW_START_DELTA_MICROSECONDS); DEF(FLOW_END_DELTA_MICROSECONDS); DEF(PACKET_DELTA_COUNT); DEF(LAYER2_OCTET_DELTA_COUNT); DEF(FLOW_END_REASON); if (l3 != IPFIX_PROTO_L3_UNKNOWN) { DEF(OCTET_DELTA_SUM_OF_SQUARES); DEF(MINIMUM_IP_TOTAL_LENGTH); DEF(MAXIMUM_IP_TOTAL_LENGTH); } #undef DEF return count; } static void ipfix_send_template_msg(struct dpif_ipfix_exporter *exporter, uint32_t export_time_sec, uint32_t obs_domain_id) { uint64_t msg_stub[DIV_ROUND_UP(MAX_MESSAGE_LEN, 8)]; struct ofpbuf msg; size_t set_hdr_offset, tmpl_hdr_offset; struct ipfix_set_header *set_hdr; struct ipfix_template_record_header *tmpl_hdr; uint16_t field_count; enum ipfix_proto_l2 l2; enum ipfix_proto_l3 l3; enum ipfix_proto_l4 l4; ofpbuf_use_stub(&msg, msg_stub, sizeof msg_stub); ipfix_init_header(export_time_sec, exporter->seq_number, obs_domain_id, &msg); set_hdr_offset = msg.size; /* Add a Template Set. */ set_hdr = ofpbuf_put_zeros(&msg, sizeof *set_hdr); set_hdr->set_id = htons(IPFIX_SET_ID_TEMPLATE); /* Define one template for each possible combination of * protocols. */ for (l2 = 0; l2 < NUM_IPFIX_PROTO_L2; l2++) { for (l3 = 0; l3 < NUM_IPFIX_PROTO_L3; l3++) { for (l4 = 0; l4 < NUM_IPFIX_PROTO_L4; l4++) { if (l3 == IPFIX_PROTO_L3_UNKNOWN && l4 != IPFIX_PROTO_L4_UNKNOWN) { continue; } tmpl_hdr_offset = msg.size; tmpl_hdr = ofpbuf_put_zeros(&msg, sizeof *tmpl_hdr); tmpl_hdr->template_id = htons( ipfix_get_template_id(l2, l3, l4)); field_count = ipfix_define_template_fields(l2, l3, l4, &msg); tmpl_hdr = (struct ipfix_template_record_header*) ((uint8_t*)msg.data + tmpl_hdr_offset); tmpl_hdr->field_count = htons(field_count); } } } set_hdr = (struct ipfix_set_header*)((uint8_t*)msg.data + set_hdr_offset); set_hdr->length = htons(msg.size - set_hdr_offset); /* XXX: Add Options Template Sets, at least to define a Flow Keys * Option Template. */ ipfix_send_msg(exporter->collectors, &msg); ofpbuf_uninit(&msg); } static inline uint32_t ipfix_hash_flow_key(const struct ipfix_flow_key *flow_key, uint32_t basis) { uint32_t hash; hash = hash_int(flow_key->obs_domain_id, basis); hash = hash_int(flow_key->template_id, hash); hash = hash_bytes(flow_key->flow_key_msg_part, flow_key->flow_key_msg_part_size, hash); return hash; } static bool ipfix_flow_key_equal(const struct ipfix_flow_key *a, const struct ipfix_flow_key *b) { /* The template ID determines the flow key size, so not need to * compare it. */ return (a->obs_domain_id == b->obs_domain_id && a->template_id == b->template_id && memcmp(a->flow_key_msg_part, b->flow_key_msg_part, a->flow_key_msg_part_size) == 0); } static struct ipfix_flow_cache_entry* ipfix_cache_find_entry(const struct dpif_ipfix_exporter *exporter, const struct ipfix_flow_key *flow_key) { struct ipfix_flow_cache_entry *entry; HMAP_FOR_EACH_WITH_HASH (entry, flow_key_map_node, ipfix_hash_flow_key(flow_key, 0), &exporter->cache_flow_key_map) { if (ipfix_flow_key_equal(&entry->flow_key, flow_key)) { return entry; } } return NULL; } static bool ipfix_cache_next_timeout_msec(const struct dpif_ipfix_exporter *exporter, long long int *next_timeout_msec) { struct ipfix_flow_cache_entry *entry; LIST_FOR_EACH (entry, cache_flow_start_timestamp_list_node, &exporter->cache_flow_start_timestamp_list) { *next_timeout_msec = entry->flow_start_timestamp_usec / 1000LL + 1000LL * exporter->cache_active_timeout; return true; } return false; } static void ipfix_cache_aggregate_entries(struct ipfix_flow_cache_entry *from_entry, struct ipfix_flow_cache_entry *to_entry) { uint64_t *to_start, *to_end, *from_start, *from_end; uint16_t *to_min_len, *to_max_len, *from_min_len, *from_max_len; to_start = &to_entry->flow_start_timestamp_usec; to_end = &to_entry->flow_end_timestamp_usec; from_start = &from_entry->flow_start_timestamp_usec; from_end = &from_entry->flow_end_timestamp_usec; if (*to_start > *from_start) { *to_start = *from_start; } if (*to_end < *from_end) { *to_end = *from_end; } to_entry->packet_delta_count += from_entry->packet_delta_count; to_entry->layer2_octet_delta_count += from_entry->layer2_octet_delta_count; to_entry->octet_delta_sum_of_squares += from_entry->octet_delta_sum_of_squares; to_min_len = &to_entry->minimum_ip_total_length; to_max_len = &to_entry->maximum_ip_total_length; from_min_len = &from_entry->minimum_ip_total_length; from_max_len = &from_entry->maximum_ip_total_length; if (!*to_min_len || (*from_min_len && *to_min_len > *from_min_len)) { *to_min_len = *from_min_len; } if (*to_max_len < *from_max_len) { *to_max_len = *from_max_len; } } /* Add an entry into a flow cache. The entry is either aggregated into * an existing entry with the same flow key and free()d, or it is * inserted into the cache. */ static void ipfix_cache_update(struct dpif_ipfix_exporter *exporter, struct ipfix_flow_cache_entry *entry) { struct ipfix_flow_cache_entry *old_entry; old_entry = ipfix_cache_find_entry(exporter, &entry->flow_key); if (old_entry == NULL) { hmap_insert(&exporter->cache_flow_key_map, &entry->flow_key_map_node, ipfix_hash_flow_key(&entry->flow_key, 0)); /* As the latest entry added into the cache, it should * logically have the highest flow_start_timestamp_usec, so * append it at the tail. */ list_push_back(&exporter->cache_flow_start_timestamp_list, &entry->cache_flow_start_timestamp_list_node); /* Enforce exporter->cache_max_flows limit. */ if (hmap_count(&exporter->cache_flow_key_map) > exporter->cache_max_flows) { dpif_ipfix_cache_expire_now(exporter, false); } } else { ipfix_cache_aggregate_entries(entry, old_entry); free(entry); } } static void ipfix_cache_entry_init(struct ipfix_flow_cache_entry *entry, struct ofpbuf *packet, const struct flow *flow, uint64_t packet_delta_count, uint32_t obs_domain_id, uint32_t obs_point_id) { struct ipfix_flow_key *flow_key; struct ofpbuf msg; enum ipfix_proto_l2 l2; enum ipfix_proto_l3 l3; enum ipfix_proto_l4 l4; uint8_t ethernet_header_length; uint16_t ethernet_total_length; flow_key = &entry->flow_key; ofpbuf_use_stack(&msg, flow_key->flow_key_msg_part, sizeof flow_key->flow_key_msg_part); /* Choose the right template ID matching the protocols in the * sampled packet. */ l2 = (flow->vlan_tci == 0) ? IPFIX_PROTO_L2_ETH : IPFIX_PROTO_L2_VLAN; switch(ntohs(flow->dl_type)) { case ETH_TYPE_IP: l3 = IPFIX_PROTO_L3_IPV4; break; case ETH_TYPE_IPV6: l3 = IPFIX_PROTO_L3_IPV6; break; default: l3 = IPFIX_PROTO_L3_UNKNOWN; } l4 = IPFIX_PROTO_L4_UNKNOWN; if (l3 != IPFIX_PROTO_L3_UNKNOWN) { switch(flow->nw_proto) { case IPPROTO_TCP: /* TCP */ case IPPROTO_UDP: /* UDP */ l4 = IPFIX_PROTO_L4_TCP_UDP; break; } } flow_key->obs_domain_id = obs_domain_id; flow_key->template_id = ipfix_get_template_id(l2, l3, l4); /* The fields defined in the ipfix_data_record_* structs and sent * below must match exactly the templates defined in * ipfix_define_template_fields. */ ethernet_header_length = (l2 == IPFIX_PROTO_L2_VLAN) ? VLAN_ETH_HEADER_LEN : ETH_HEADER_LEN; ethernet_total_length = packet->size; /* Common Ethernet entities. */ { struct ipfix_data_record_flow_key_common *data_common; data_common = ofpbuf_put_zeros(&msg, sizeof *data_common); data_common->observation_point_id = htonl(obs_point_id); memcpy(data_common->source_mac_address, flow->dl_src, sizeof flow->dl_src); memcpy(data_common->destination_mac_address, flow->dl_dst, sizeof flow->dl_dst); data_common->ethernet_type = flow->dl_type; data_common->ethernet_header_length = ethernet_header_length; } if (l2 == IPFIX_PROTO_L2_VLAN) { struct ipfix_data_record_flow_key_vlan *data_vlan; uint16_t vlan_id = vlan_tci_to_vid(flow->vlan_tci); uint8_t priority = vlan_tci_to_pcp(flow->vlan_tci); data_vlan = ofpbuf_put_zeros(&msg, sizeof *data_vlan); data_vlan->vlan_id = htons(vlan_id); data_vlan->dot1q_vlan_id = htons(vlan_id); data_vlan->dot1q_priority = priority; } if (l3 != IPFIX_PROTO_L3_UNKNOWN) { struct ipfix_data_record_flow_key_ip *data_ip; data_ip = ofpbuf_put_zeros(&msg, sizeof *data_ip); data_ip->ip_version = (l3 == IPFIX_PROTO_L3_IPV4) ? 4 : 6; data_ip->ip_ttl = flow->nw_ttl; data_ip->protocol_identifier = flow->nw_proto; data_ip->ip_diff_serv_code_point = flow->nw_tos >> 2; data_ip->ip_precedence = flow->nw_tos >> 5; data_ip->ip_class_of_service = flow->nw_tos; if (l3 == IPFIX_PROTO_L3_IPV4) { struct ipfix_data_record_flow_key_ipv4 *data_ipv4; data_ipv4 = ofpbuf_put_zeros(&msg, sizeof *data_ipv4); data_ipv4->source_ipv4_address = flow->nw_src; data_ipv4->destination_ipv4_address = flow->nw_dst; } else { /* l3 == IPFIX_PROTO_L3_IPV6 */ struct ipfix_data_record_flow_key_ipv6 *data_ipv6; data_ipv6 = ofpbuf_put_zeros(&msg, sizeof *data_ipv6); memcpy(data_ipv6->source_ipv6_address, &flow->ipv6_src, sizeof flow->ipv6_src); memcpy(data_ipv6->destination_ipv6_address, &flow->ipv6_dst, sizeof flow->ipv6_dst); data_ipv6->flow_label_ipv6 = flow->ipv6_label; } } if (l4 != IPFIX_PROTO_L4_UNKNOWN) { struct ipfix_data_record_flow_key_tcpudp *data_tcpudp; data_tcpudp = ofpbuf_put_zeros(&msg, sizeof *data_tcpudp); data_tcpudp->source_transport_port = flow->tp_src; data_tcpudp->destination_transport_port = flow->tp_dst; } flow_key->flow_key_msg_part_size = msg.size; { struct timeval now; uint64_t layer2_octet_delta_count; /* Calculate the total matched octet count by considering as * an approximation that all matched packets have the same * length. */ layer2_octet_delta_count = packet_delta_count * ethernet_total_length; xgettimeofday(&now); entry->flow_end_timestamp_usec = now.tv_usec + 1000000LL * now.tv_sec; entry->flow_start_timestamp_usec = entry->flow_end_timestamp_usec; entry->packet_delta_count = packet_delta_count; entry->layer2_octet_delta_count = layer2_octet_delta_count; } if (l3 != IPFIX_PROTO_L3_UNKNOWN) { uint16_t ip_total_length = ethernet_total_length - ethernet_header_length; entry->octet_delta_sum_of_squares = packet_delta_count * ip_total_length * ip_total_length; entry->minimum_ip_total_length = ip_total_length; entry->maximum_ip_total_length = ip_total_length; } else { entry->octet_delta_sum_of_squares = 0; entry->minimum_ip_total_length = 0; entry->maximum_ip_total_length = 0; } } /* Send each single data record in its own data set, to simplify the * implementation by avoiding having to group record by template ID * before sending. */ static void ipfix_put_data_set(uint32_t export_time_sec, struct ipfix_flow_cache_entry *entry, enum ipfix_flow_end_reason flow_end_reason, struct ofpbuf *msg) { size_t set_hdr_offset; struct ipfix_set_header *set_hdr; set_hdr_offset = msg->size; /* Put a Data Set. */ set_hdr = ofpbuf_put_zeros(msg, sizeof *set_hdr); set_hdr->set_id = htons(entry->flow_key.template_id); /* Copy the flow key part of the data record. */ ofpbuf_put(msg, entry->flow_key.flow_key_msg_part, entry->flow_key.flow_key_msg_part_size); /* Put the non-key part of the data record. */ { struct ipfix_data_record_aggregated_common *data_aggregated_common; uint64_t export_time_usec, flow_start_delta_usec, flow_end_delta_usec; /* Calculate the negative deltas relative to the export time * in seconds sent in the header, not the exact export * time. */ export_time_usec = 1000000LL * export_time_sec; flow_start_delta_usec = export_time_usec - entry->flow_start_timestamp_usec; flow_end_delta_usec = export_time_usec - entry->flow_end_timestamp_usec; data_aggregated_common = ofpbuf_put_zeros( msg, sizeof *data_aggregated_common); data_aggregated_common->flow_start_delta_microseconds = htonl( flow_start_delta_usec); data_aggregated_common->flow_end_delta_microseconds = htonl( flow_end_delta_usec); data_aggregated_common->packet_delta_count = htonll( entry->packet_delta_count); data_aggregated_common->layer2_octet_delta_count = htonll( entry->layer2_octet_delta_count); data_aggregated_common->flow_end_reason = flow_end_reason; } if (entry->octet_delta_sum_of_squares) { /* IP packet. */ struct ipfix_data_record_aggregated_ip *data_aggregated_ip; data_aggregated_ip = ofpbuf_put_zeros( msg, sizeof *data_aggregated_ip); data_aggregated_ip->octet_delta_sum_of_squares = htonll( entry->octet_delta_sum_of_squares); data_aggregated_ip->minimum_ip_total_length = htonll( entry->minimum_ip_total_length); data_aggregated_ip->maximum_ip_total_length = htonll( entry->maximum_ip_total_length); } set_hdr = (struct ipfix_set_header*)((uint8_t*)msg->data + set_hdr_offset); set_hdr->length = htons(msg->size - set_hdr_offset); } /* Send an IPFIX message with a single data record. */ static void ipfix_send_data_msg(struct dpif_ipfix_exporter *exporter, uint32_t export_time_sec, struct ipfix_flow_cache_entry *entry, enum ipfix_flow_end_reason flow_end_reason) { uint64_t msg_stub[DIV_ROUND_UP(MAX_MESSAGE_LEN, 8)]; struct ofpbuf msg; ofpbuf_use_stub(&msg, msg_stub, sizeof msg_stub); ipfix_init_header(export_time_sec, exporter->seq_number++, entry->flow_key.obs_domain_id, &msg); ipfix_put_data_set(export_time_sec, entry, flow_end_reason, &msg); ipfix_send_msg(exporter->collectors, &msg); ofpbuf_uninit(&msg); } static void dpif_ipfix_sample(struct dpif_ipfix_exporter *exporter, struct ofpbuf *packet, const struct flow *flow, uint64_t packet_delta_count, uint32_t obs_domain_id, uint32_t obs_point_id) { struct ipfix_flow_cache_entry *entry; /* Create a flow cache entry from the sample. */ entry = xmalloc(sizeof *entry); ipfix_cache_entry_init(entry, packet, flow, packet_delta_count, obs_domain_id, obs_point_id); ipfix_cache_update(exporter, entry); } void dpif_ipfix_bridge_sample(struct dpif_ipfix *di, struct ofpbuf *packet, const struct flow *flow) OVS_EXCLUDED(mutex) { uint64_t packet_delta_count; ovs_mutex_lock(&mutex); /* Use the sampling probability as an approximation of the number * of matched packets. */ packet_delta_count = UINT32_MAX / di->bridge_exporter.probability; dpif_ipfix_sample(&di->bridge_exporter.exporter, packet, flow, packet_delta_count, di->bridge_exporter.options->obs_domain_id, di->bridge_exporter.options->obs_point_id); ovs_mutex_unlock(&mutex); } void dpif_ipfix_flow_sample(struct dpif_ipfix *di, struct ofpbuf *packet, const struct flow *flow, uint32_t collector_set_id, uint16_t probability, uint32_t obs_domain_id, uint32_t obs_point_id) OVS_EXCLUDED(mutex) { struct dpif_ipfix_flow_exporter_map_node *node; /* Use the sampling probability as an approximation of the number * of matched packets. */ uint64_t packet_delta_count = USHRT_MAX / probability; ovs_mutex_lock(&mutex); node = dpif_ipfix_find_flow_exporter_map_node(di, collector_set_id); if (node) { dpif_ipfix_sample(&node->exporter.exporter, packet, flow, packet_delta_count, obs_domain_id, obs_point_id); } ovs_mutex_unlock(&mutex); } static void dpif_ipfix_cache_expire(struct dpif_ipfix_exporter *exporter, bool forced_end, const uint64_t export_time_usec, const uint32_t export_time_sec) { struct ipfix_flow_cache_entry *entry, *next_entry; uint64_t max_flow_start_timestamp_usec; bool template_msg_sent = false; enum ipfix_flow_end_reason flow_end_reason; if (list_is_empty(&exporter->cache_flow_start_timestamp_list)) { return; } max_flow_start_timestamp_usec = export_time_usec - 1000000LL * exporter->cache_active_timeout; LIST_FOR_EACH_SAFE (entry, next_entry, cache_flow_start_timestamp_list_node, &exporter->cache_flow_start_timestamp_list) { if (forced_end) { flow_end_reason = FORCED_END; } else if (entry->flow_start_timestamp_usec <= max_flow_start_timestamp_usec) { flow_end_reason = ACTIVE_TIMEOUT; } else if (hmap_count(&exporter->cache_flow_key_map) > exporter->cache_max_flows) { /* Enforce exporter->cache_max_flows. */ flow_end_reason = LACK_OF_RESOURCES; } else { /* Remaining flows haven't expired yet. */ break; } list_remove(&entry->cache_flow_start_timestamp_list_node); hmap_remove(&exporter->cache_flow_key_map, &entry->flow_key_map_node); if (!template_msg_sent && (exporter->last_template_set_time + IPFIX_TEMPLATE_INTERVAL) <= export_time_sec) { ipfix_send_template_msg(exporter, export_time_sec, entry->flow_key.obs_domain_id); exporter->last_template_set_time = export_time_sec; template_msg_sent = true; } /* XXX: Group multiple data records for the same obs domain id * into the same message. */ ipfix_send_data_msg(exporter, export_time_sec, entry, flow_end_reason); free(entry); } } static void get_export_time_now(uint64_t *export_time_usec, uint32_t *export_time_sec) { struct timeval export_time; xgettimeofday(&export_time); *export_time_usec = export_time.tv_usec + 1000000LL * export_time.tv_sec; /* The IPFIX start and end deltas are negative deltas relative to * the export time, so set the export time 1 second off to * calculate those deltas. */ if (export_time.tv_usec == 0) { *export_time_sec = export_time.tv_sec; } else { *export_time_sec = export_time.tv_sec + 1; } } static void dpif_ipfix_cache_expire_now(struct dpif_ipfix_exporter *exporter, bool forced_end) { uint64_t export_time_usec; uint32_t export_time_sec; get_export_time_now(&export_time_usec, &export_time_sec); dpif_ipfix_cache_expire(exporter, forced_end, export_time_usec, export_time_sec); } void dpif_ipfix_run(struct dpif_ipfix *di) OVS_EXCLUDED(mutex) { uint64_t export_time_usec; uint32_t export_time_sec; struct dpif_ipfix_flow_exporter_map_node *flow_exporter_node; ovs_mutex_lock(&mutex); get_export_time_now(&export_time_usec, &export_time_sec); if (di->bridge_exporter.probability > 0) { /* Bridge exporter enabled. */ dpif_ipfix_cache_expire( &di->bridge_exporter.exporter, false, export_time_usec, export_time_sec); } HMAP_FOR_EACH (flow_exporter_node, node, &di->flow_exporter_map) { dpif_ipfix_cache_expire( &flow_exporter_node->exporter.exporter, false, export_time_usec, export_time_sec); } ovs_mutex_unlock(&mutex); } void dpif_ipfix_wait(struct dpif_ipfix *di) OVS_EXCLUDED(mutex) { long long int next_timeout_msec = LLONG_MAX; struct dpif_ipfix_flow_exporter_map_node *flow_exporter_node; ovs_mutex_lock(&mutex); if (di->bridge_exporter.probability > 0) { /* Bridge exporter enabled. */ if (ipfix_cache_next_timeout_msec( &di->bridge_exporter.exporter, &next_timeout_msec)) { poll_timer_wait_until(next_timeout_msec); } } HMAP_FOR_EACH (flow_exporter_node, node, &di->flow_exporter_map) { if (ipfix_cache_next_timeout_msec( &flow_exporter_node->exporter.exporter, &next_timeout_msec)) { poll_timer_wait_until(next_timeout_msec); } } ovs_mutex_unlock(&mutex); } openvswitch-2.0.1+git20140120/ofproto/ofproto-dpif-ipfix.h000066400000000000000000000032011226605124000230340ustar00rootroot00000000000000/* * Copyright (c) 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OFPROTO_DPIF_IPFIX_H #define OFPROTO_DPIF_IPFIX_H 1 #include #include struct flow; struct ofpbuf; struct ofproto_ipfix_bridge_exporter_options; struct ofproto_ipfix_flow_exporter_options; struct dpif_ipfix *dpif_ipfix_create(void); struct dpif_ipfix *dpif_ipfix_ref(const struct dpif_ipfix *); void dpif_ipfix_unref(struct dpif_ipfix *); uint32_t dpif_ipfix_get_bridge_exporter_probability(const struct dpif_ipfix *); void dpif_ipfix_set_options( struct dpif_ipfix *, const struct ofproto_ipfix_bridge_exporter_options *, const struct ofproto_ipfix_flow_exporter_options *, size_t); void dpif_ipfix_bridge_sample(struct dpif_ipfix *, struct ofpbuf *, const struct flow *); void dpif_ipfix_flow_sample(struct dpif_ipfix *, struct ofpbuf *, const struct flow *, uint32_t, uint16_t, uint32_t, uint32_t); void dpif_ipfix_run(struct dpif_ipfix *); void dpif_ipfix_wait(struct dpif_ipfix *); #endif /* ofproto/ofproto-dpif-ipfix.h */ openvswitch-2.0.1+git20140120/ofproto/ofproto-dpif-mirror.c000066400000000000000000000324041226605124000232310ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ofproto-dpif-mirror.h" #include #include "hmap.h" #include "hmapx.h" #include "ofproto.h" #include "vlan-bitmap.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ofproto_dpif_mirror); #define MIRROR_MASK_C(X) UINT32_C(X) BUILD_ASSERT_DECL(sizeof(mirror_mask_t) * CHAR_BIT >= MAX_MIRRORS); struct mbridge { struct mirror *mirrors[MAX_MIRRORS]; struct hmap mbundles; bool need_revalidate; bool has_mirrors; int ref_cnt; }; struct mbundle { struct hmap_node hmap_node; /* In parent 'mbridge' map. */ struct ofbundle *ofbundle; mirror_mask_t src_mirrors; /* Mirrors triggered when packet received. */ mirror_mask_t dst_mirrors; /* Mirrors triggered when packet sent. */ mirror_mask_t mirror_out; /* Mirrors that output to this mbundle. */ }; struct mirror { struct mbridge *mbridge; /* Owning ofproto. */ size_t idx; /* In ofproto's "mirrors" array. */ void *aux; /* Key supplied by ofproto's client. */ /* Selection criteria. */ struct hmapx srcs; /* Contains "struct mbundle*"s. */ struct hmapx dsts; /* Contains "struct mbundle*"s. */ unsigned long *vlans; /* Bitmap of chosen VLANs, NULL selects all. */ /* Output (exactly one of out == NULL and out_vlan == -1 is true). */ struct mbundle *out; /* Output port or NULL. */ int out_vlan; /* Output VLAN or -1. */ mirror_mask_t dup_mirrors; /* Bitmap of mirrors with the same output. */ /* Counters. */ int64_t packet_count; /* Number of packets sent. */ int64_t byte_count; /* Number of bytes sent. */ }; static struct mirror *mirror_lookup(struct mbridge *, void *aux); static struct mbundle *mbundle_lookup(const struct mbridge *, struct ofbundle *); static void mbundle_lookup_multiple(const struct mbridge *, struct ofbundle **, size_t n_bundles, struct hmapx *mbundles); static int mirror_scan(struct mbridge *); static void mirror_update_dups(struct mbridge *); static int mirror_mask_ffs(mirror_mask_t); struct mbridge * mbridge_create(void) { struct mbridge *mbridge; mbridge = xzalloc(sizeof *mbridge); mbridge->ref_cnt = 1; hmap_init(&mbridge->mbundles); return mbridge; } struct mbridge * mbridge_ref(const struct mbridge *mbridge_) { struct mbridge *mbridge = CONST_CAST(struct mbridge *, mbridge_); if (mbridge) { ovs_assert(mbridge->ref_cnt > 0); mbridge->ref_cnt++; } return mbridge; } void mbridge_unref(struct mbridge *mbridge) { struct mbundle *mbundle, *next; size_t i; if (!mbridge) { return; } ovs_assert(mbridge->ref_cnt > 0); if (--mbridge->ref_cnt) { return; } for (i = 0; i < MAX_MIRRORS; i++) { if (mbridge->mirrors[i]) { mirror_destroy(mbridge, mbridge->mirrors[i]->aux); } } HMAP_FOR_EACH_SAFE (mbundle, next, hmap_node, &mbridge->mbundles) { mbridge_unregister_bundle(mbridge, mbundle->ofbundle); } hmap_destroy(&mbridge->mbundles); free(mbridge); } bool mbridge_has_mirrors(struct mbridge *mbridge) { return mbridge ? mbridge->has_mirrors : false; } /* Returns true if configurations changes in 'mbridge''s mirrors require * revalidation. */ bool mbridge_need_revalidate(struct mbridge *mbridge) { return mbridge->need_revalidate; } void mbridge_register_bundle(struct mbridge *mbridge, struct ofbundle *ofbundle) { struct mbundle *mbundle; mbundle = xzalloc(sizeof *mbundle); mbundle->ofbundle = ofbundle; hmap_insert(&mbridge->mbundles, &mbundle->hmap_node, hash_pointer(ofbundle, 0)); } void mbridge_unregister_bundle(struct mbridge *mbridge, struct ofbundle *ofbundle) { struct mbundle *mbundle = mbundle_lookup(mbridge, ofbundle); size_t i; if (!mbundle) { return; } for (i = 0; i < MAX_MIRRORS; i++) { struct mirror *m = mbridge->mirrors[i]; if (m) { if (m->out == mbundle) { mirror_destroy(mbridge, m->aux); } else if (hmapx_find_and_delete(&m->srcs, mbundle) || hmapx_find_and_delete(&m->dsts, mbundle)) { mbridge->need_revalidate = true; } } } hmap_remove(&mbridge->mbundles, &mbundle->hmap_node); free(mbundle); } mirror_mask_t mirror_bundle_out(struct mbridge *mbridge, struct ofbundle *ofbundle) { struct mbundle *mbundle = mbundle_lookup(mbridge, ofbundle); return mbundle ? mbundle->mirror_out : 0; } mirror_mask_t mirror_bundle_src(struct mbridge *mbridge, struct ofbundle *ofbundle) { struct mbundle *mbundle = mbundle_lookup(mbridge, ofbundle); return mbundle ? mbundle->src_mirrors : 0; } mirror_mask_t mirror_bundle_dst(struct mbridge *mbridge, struct ofbundle *ofbundle) { struct mbundle *mbundle = mbundle_lookup(mbridge, ofbundle); return mbundle ? mbundle->dst_mirrors : 0; } int mirror_set(struct mbridge *mbridge, void *aux, const char *name, struct ofbundle **srcs, size_t n_srcs, struct ofbundle **dsts, size_t n_dsts, unsigned long *src_vlans, struct ofbundle *out_bundle, uint16_t out_vlan) { struct mbundle *mbundle, *out; mirror_mask_t mirror_bit; struct mirror *mirror; struct hmapx srcs_map; /* Contains "struct ofbundle *"s. */ struct hmapx dsts_map; /* Contains "struct ofbundle *"s. */ mirror = mirror_lookup(mbridge, aux); if (!mirror) { int idx; idx = mirror_scan(mbridge); if (idx < 0) { VLOG_WARN("maximum of %d port mirrors reached, cannot create %s", MAX_MIRRORS, name); return EFBIG; } mirror = mbridge->mirrors[idx] = xzalloc(sizeof *mirror); mirror->mbridge = mbridge; mirror->idx = idx; mirror->aux = aux; mirror->out_vlan = -1; } /* Get the new configuration. */ if (out_bundle) { out = mbundle_lookup(mbridge, out_bundle); if (!out) { mirror_destroy(mbridge, mirror->aux); return EINVAL; } out_vlan = -1; } else { out = NULL; } mbundle_lookup_multiple(mbridge, srcs, n_srcs, &srcs_map); mbundle_lookup_multiple(mbridge, dsts, n_dsts, &dsts_map); /* If the configuration has not changed, do nothing. */ if (hmapx_equals(&srcs_map, &mirror->srcs) && hmapx_equals(&dsts_map, &mirror->dsts) && vlan_bitmap_equal(mirror->vlans, src_vlans) && mirror->out == out && mirror->out_vlan == out_vlan) { hmapx_destroy(&srcs_map); hmapx_destroy(&dsts_map); return 0; } hmapx_swap(&srcs_map, &mirror->srcs); hmapx_destroy(&srcs_map); hmapx_swap(&dsts_map, &mirror->dsts); hmapx_destroy(&dsts_map); free(mirror->vlans); mirror->vlans = vlan_bitmap_clone(src_vlans); mirror->out = out; mirror->out_vlan = out_vlan; /* Update mbundles. */ mirror_bit = MIRROR_MASK_C(1) << mirror->idx; HMAP_FOR_EACH (mbundle, hmap_node, &mirror->mbridge->mbundles) { if (hmapx_contains(&mirror->srcs, mbundle)) { mbundle->src_mirrors |= mirror_bit; } else { mbundle->src_mirrors &= ~mirror_bit; } if (hmapx_contains(&mirror->dsts, mbundle)) { mbundle->dst_mirrors |= mirror_bit; } else { mbundle->dst_mirrors &= ~mirror_bit; } if (mirror->out == mbundle) { mbundle->mirror_out |= mirror_bit; } else { mbundle->mirror_out &= ~mirror_bit; } } mbridge->has_mirrors = true; mirror_update_dups(mbridge); return 0; } void mirror_destroy(struct mbridge *mbridge, void *aux) { struct mirror *mirror = mirror_lookup(mbridge, aux); mirror_mask_t mirror_bit; struct mbundle *mbundle; int i; if (!mirror) { return; } mirror_bit = MIRROR_MASK_C(1) << mirror->idx; HMAP_FOR_EACH (mbundle, hmap_node, &mbridge->mbundles) { mbundle->src_mirrors &= ~mirror_bit; mbundle->dst_mirrors &= ~mirror_bit; mbundle->mirror_out &= ~mirror_bit; } hmapx_destroy(&mirror->srcs); hmapx_destroy(&mirror->dsts); free(mirror->vlans); mbridge->mirrors[mirror->idx] = NULL; free(mirror); mirror_update_dups(mbridge); mbridge->has_mirrors = false; for (i = 0; i < MAX_MIRRORS; i++) { if (mbridge->mirrors[i]) { mbridge->has_mirrors = true; break; } } } int mirror_get_stats(struct mbridge *mbridge, void *aux, uint64_t *packets, uint64_t *bytes) { struct mirror *mirror = mirror_lookup(mbridge, aux); if (!mirror) { *packets = *bytes = UINT64_MAX; return 0; } *packets = mirror->packet_count; *bytes = mirror->byte_count; return 0; } void mirror_update_stats(struct mbridge *mbridge, mirror_mask_t mirrors, uint64_t packets, uint64_t bytes) { if (!mbridge || !mirrors) { return; } for (; mirrors; mirrors = zero_rightmost_1bit(mirrors)) { struct mirror *m; m = mbridge->mirrors[mirror_mask_ffs(mirrors) - 1]; if (!m) { /* In normal circumstances 'm' will not be NULL. However, * if mirrors are reconfigured, we can temporarily get out * of sync in facet_revalidate(). We could "correct" the * mirror list before reaching here, but doing that would * not properly account the traffic stats we've currently * accumulated for previous mirror configuration. */ continue; } m->packet_count += packets; m->byte_count += bytes; } } /* Retrieves the mirror in 'mbridge' represented by the first bet set of * 'mirrors'. Returns true if such a mirror exists, false otherwise. * The caller takes ownership of, and is expected to deallocate, 'vlans' */ bool mirror_get(struct mbridge *mbridge, int index, unsigned long **vlans, mirror_mask_t *dup_mirrors, struct ofbundle **out, int *out_vlan) { struct mirror *mirror; if (!mbridge) { return false; } mirror = mbridge->mirrors[index]; if (!mirror) { return false; } *vlans = vlan_bitmap_clone(mirror->vlans); *dup_mirrors = mirror->dup_mirrors; *out = mirror->out ? mirror->out->ofbundle : NULL; *out_vlan = mirror->out_vlan; return true; } /* Helpers. */ static struct mbundle * mbundle_lookup(const struct mbridge *mbridge, struct ofbundle *ofbundle) { struct mbundle *mbundle; HMAP_FOR_EACH_IN_BUCKET (mbundle, hmap_node, hash_pointer(ofbundle, 0), &mbridge->mbundles) { if (mbundle->ofbundle == ofbundle) { return mbundle; } } return NULL; } /* Looks up each of the 'n_ofbundlees' pointers in 'ofbundlees' as mbundles and * adds the ones that are found to 'mbundles'. */ static void mbundle_lookup_multiple(const struct mbridge *mbridge, struct ofbundle **ofbundles, size_t n_ofbundles, struct hmapx *mbundles) { size_t i; hmapx_init(mbundles); for (i = 0; i < n_ofbundles; i++) { struct mbundle *mbundle = mbundle_lookup(mbridge, ofbundles[i]); if (mbundle) { hmapx_add(mbundles, mbundle); } } } static int mirror_scan(struct mbridge *mbridge) { int idx; for (idx = 0; idx < MAX_MIRRORS; idx++) { if (!mbridge->mirrors[idx]) { return idx; } } return -1; } static struct mirror * mirror_lookup(struct mbridge *mbridge, void *aux) { int i; for (i = 0; i < MAX_MIRRORS; i++) { struct mirror *mirror = mbridge->mirrors[i]; if (mirror && mirror->aux == aux) { return mirror; } } return NULL; } /* Update the 'dup_mirrors' member of each of the mirrors in 'ofproto'. */ static void mirror_update_dups(struct mbridge *mbridge) { int i; for (i = 0; i < MAX_MIRRORS; i++) { struct mirror *m = mbridge->mirrors[i]; if (m) { m->dup_mirrors = MIRROR_MASK_C(1) << i; } } for (i = 0; i < MAX_MIRRORS; i++) { struct mirror *m1 = mbridge->mirrors[i]; int j; if (!m1) { continue; } for (j = i + 1; j < MAX_MIRRORS; j++) { struct mirror *m2 = mbridge->mirrors[j]; if (m2 && m1->out == m2->out && m1->out_vlan == m2->out_vlan) { m1->dup_mirrors |= MIRROR_MASK_C(1) << j; m2->dup_mirrors |= m1->dup_mirrors; } } } } openvswitch-2.0.1+git20140120/ofproto/ofproto-dpif-mirror.h000066400000000000000000000043221226605124000232340ustar00rootroot00000000000000/* Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OFPROT_DPIF_MIRROR_H #define OFPROT_DPIF_MIRROR_H 1 #include #include "util.h" #define MAX_MIRRORS 32 typedef uint32_t mirror_mask_t; struct ofproto_dpif; struct ofbundle; struct mbridge *mbridge_create(void); struct mbridge *mbridge_ref(const struct mbridge *); void mbridge_unref(struct mbridge *); bool mbridge_has_mirrors(struct mbridge *); bool mbridge_need_revalidate(struct mbridge *); void mbridge_register_bundle(struct mbridge *, struct ofbundle *); void mbridge_unregister_bundle(struct mbridge *, struct ofbundle *); mirror_mask_t mirror_bundle_out(struct mbridge *, struct ofbundle *); mirror_mask_t mirror_bundle_src(struct mbridge *, struct ofbundle *); mirror_mask_t mirror_bundle_dst(struct mbridge *, struct ofbundle *); int mirror_set(struct mbridge *, void *aux, const char *name, struct ofbundle **srcs, size_t n_srcs, struct ofbundle **dsts, size_t n_dsts, unsigned long *src_vlans, struct ofbundle *out_bundle, uint16_t out_vlan); void mirror_destroy(struct mbridge *, void *aux); int mirror_get_stats(struct mbridge *, void *aux, uint64_t *packets, uint64_t *bytes); void mirror_update_stats(struct mbridge*, mirror_mask_t, uint64_t packets, uint64_t bytes); bool mirror_get(struct mbridge *, int index, unsigned long **vlans, mirror_mask_t *dup_mirrors, struct ofbundle **out, int *out_vlan); static inline int mirror_mask_ffs(mirror_mask_t mask) { BUILD_ASSERT_DECL(sizeof(unsigned int) >= sizeof(mask)); return ffs(mask); } #endif /* ofproto-dpif-mirror.h */ openvswitch-2.0.1+git20140120/ofproto/ofproto-dpif-sflow.c000066400000000000000000000475431226605124000230630ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * Copyright (c) 2009 InMon Corp. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ofproto-dpif-sflow.h" #include #include #include #include #include "collectors.h" #include "compiler.h" #include "dpif.h" #include "hash.h" #include "hmap.h" #include "netdev.h" #include "netlink.h" #include "ofpbuf.h" #include "ofproto.h" #include "packets.h" #include "poll-loop.h" #include "route-table.h" #include "sflow_api.h" #include "socket-util.h" #include "timeval.h" #include "vlog.h" #include "lib/odp-util.h" #include "ofproto-provider.h" VLOG_DEFINE_THIS_MODULE(sflow); static struct ovs_mutex mutex; struct dpif_sflow_port { struct hmap_node hmap_node; /* In struct dpif_sflow's "ports" hmap. */ SFLDataSource_instance dsi; /* sFlow library's notion of port number. */ struct ofport *ofport; /* To retrive port stats. */ odp_port_t odp_port; }; struct dpif_sflow { struct collectors *collectors; SFLAgent *sflow_agent; struct ofproto_sflow_options *options; time_t next_tick; size_t n_flood, n_all; struct hmap ports; /* Contains "struct dpif_sflow_port"s. */ uint32_t probability; atomic_int ref_cnt; }; static void dpif_sflow_del_port__(struct dpif_sflow *, struct dpif_sflow_port *); #define RECEIVER_INDEX 1 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); static bool nullable_string_is_equal(const char *a, const char *b) { return a ? b && !strcmp(a, b) : !b; } static bool ofproto_sflow_options_equal(const struct ofproto_sflow_options *a, const struct ofproto_sflow_options *b) { return (sset_equals(&a->targets, &b->targets) && a->sampling_rate == b->sampling_rate && a->polling_interval == b->polling_interval && a->header_len == b->header_len && a->sub_id == b->sub_id && nullable_string_is_equal(a->agent_device, b->agent_device) && nullable_string_is_equal(a->control_ip, b->control_ip)); } static struct ofproto_sflow_options * ofproto_sflow_options_clone(const struct ofproto_sflow_options *old) { struct ofproto_sflow_options *new = xmemdup(old, sizeof *old); sset_clone(&new->targets, &old->targets); new->agent_device = old->agent_device ? xstrdup(old->agent_device) : NULL; new->control_ip = old->control_ip ? xstrdup(old->control_ip) : NULL; return new; } static void ofproto_sflow_options_destroy(struct ofproto_sflow_options *options) { if (options) { sset_destroy(&options->targets); free(options->agent_device); free(options->control_ip); free(options); } } /* sFlow library callback to allocate memory. */ static void * sflow_agent_alloc_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED, size_t bytes) { return calloc(1, bytes); } /* sFlow library callback to free memory. */ static int sflow_agent_free_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED, void *obj) { free(obj); return 0; } /* sFlow library callback to report error. */ static void sflow_agent_error_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED, char *msg) { VLOG_WARN("sFlow agent error: %s", msg); } /* sFlow library callback to send datagram. */ static void sflow_agent_send_packet_cb(void *ds_, SFLAgent *agent OVS_UNUSED, SFLReceiver *receiver OVS_UNUSED, u_char *pkt, uint32_t pktLen) { struct dpif_sflow *ds = ds_; collectors_send(ds->collectors, pkt, pktLen); } static struct dpif_sflow_port * dpif_sflow_find_port(const struct dpif_sflow *ds, odp_port_t odp_port) OVS_REQUIRES(mutex) { struct dpif_sflow_port *dsp; HMAP_FOR_EACH_IN_BUCKET (dsp, hmap_node, hash_odp_port(odp_port), &ds->ports) { if (dsp->odp_port == odp_port) { return dsp; } } return NULL; } static void sflow_agent_get_counters(void *ds_, SFLPoller *poller, SFL_COUNTERS_SAMPLE_TYPE *cs) OVS_REQUIRES(mutex) { struct dpif_sflow *ds = ds_; SFLCounters_sample_element elem; enum netdev_features current; struct dpif_sflow_port *dsp; SFLIf_counters *counters; struct netdev_stats stats; enum netdev_flags flags; dsp = dpif_sflow_find_port(ds, u32_to_odp(poller->bridgePort)); if (!dsp) { return; } elem.tag = SFLCOUNTERS_GENERIC; counters = &elem.counterBlock.generic; counters->ifIndex = SFL_DS_INDEX(poller->dsi); counters->ifType = 6; if (!netdev_get_features(dsp->ofport->netdev, ¤t, NULL, NULL, NULL)) { /* The values of ifDirection come from MAU MIB (RFC 2668): 0 = unknown, 1 = full-duplex, 2 = half-duplex, 3 = in, 4=out */ counters->ifSpeed = netdev_features_to_bps(current, 0); counters->ifDirection = (netdev_features_is_full_duplex(current) ? 1 : 2); } else { counters->ifSpeed = 100000000; counters->ifDirection = 0; } if (!netdev_get_flags(dsp->ofport->netdev, &flags) && flags & NETDEV_UP) { counters->ifStatus = 1; /* ifAdminStatus up. */ if (netdev_get_carrier(dsp->ofport->netdev)) { counters->ifStatus |= 2; /* ifOperStatus us. */ } } else { counters->ifStatus = 0; /* Down. */ } /* XXX 1. Is the multicast counter filled in? 2. Does the multicast counter include broadcasts? 3. Does the rx_packets counter include multicasts/broadcasts? */ ofproto_port_get_stats(dsp->ofport, &stats); counters->ifInOctets = stats.rx_bytes; counters->ifInUcastPkts = stats.rx_packets; counters->ifInMulticastPkts = stats.multicast; counters->ifInBroadcastPkts = -1; counters->ifInDiscards = stats.rx_dropped; counters->ifInErrors = stats.rx_errors; counters->ifInUnknownProtos = -1; counters->ifOutOctets = stats.tx_bytes; counters->ifOutUcastPkts = stats.tx_packets; counters->ifOutMulticastPkts = -1; counters->ifOutBroadcastPkts = -1; counters->ifOutDiscards = stats.tx_dropped; counters->ifOutErrors = stats.tx_errors; counters->ifPromiscuousMode = 0; SFLADD_ELEMENT(cs, &elem); sfl_poller_writeCountersSample(poller, cs); } /* Obtains an address to use for the local sFlow agent and stores it into * '*agent_addr'. Returns true if successful, false on failure. * * The sFlow agent address should be a local IP address that is persistent and * reachable over the network, if possible. The IP address associated with * 'agent_device' is used if it has one, and otherwise 'control_ip', the IP * address used to talk to the controller. If the agent device is not * specified then it is figured out by taking a look at the routing table based * on 'targets'. */ static bool sflow_choose_agent_address(const char *agent_device, const struct sset *targets, const char *control_ip, SFLAddress *agent_addr) { const char *target; struct in_addr in4; memset(agent_addr, 0, sizeof *agent_addr); agent_addr->type = SFLADDRESSTYPE_IP_V4; if (agent_device) { if (!netdev_get_in4_by_name(agent_device, &in4)) { goto success; } } SSET_FOR_EACH (target, targets) { struct sockaddr_in sin; char name[IFNAMSIZ]; if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &sin) && route_table_get_name(sin.sin_addr.s_addr, name) && !netdev_get_in4_by_name(name, &in4)) { goto success; } } if (control_ip && !lookup_ip(control_ip, &in4)) { goto success; } VLOG_ERR("could not determine IP address for sFlow agent"); return false; success: agent_addr->address.ip_v4.addr = (OVS_FORCE uint32_t) in4.s_addr; return true; } static void dpif_sflow_clear__(struct dpif_sflow *ds) OVS_REQUIRES(mutex) { if (ds->sflow_agent) { sfl_agent_release(ds->sflow_agent); free(ds->sflow_agent); ds->sflow_agent = NULL; } collectors_destroy(ds->collectors); ds->collectors = NULL; ofproto_sflow_options_destroy(ds->options); ds->options = NULL; /* Turn off sampling to save CPU cycles. */ ds->probability = 0; } void dpif_sflow_clear(struct dpif_sflow *ds) OVS_EXCLUDED(mutex) { ovs_mutex_lock(&mutex); dpif_sflow_clear__(ds); ovs_mutex_unlock(&mutex); } bool dpif_sflow_is_enabled(const struct dpif_sflow *ds) OVS_EXCLUDED(mutex) { bool enabled; ovs_mutex_lock(&mutex); enabled = ds->collectors != NULL; ovs_mutex_unlock(&mutex); return enabled; } struct dpif_sflow * dpif_sflow_create(void) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; struct dpif_sflow *ds; if (ovsthread_once_start(&once)) { ovs_mutex_init_recursive(&mutex); ovsthread_once_done(&once); } ds = xcalloc(1, sizeof *ds); ds->next_tick = time_now() + 1; hmap_init(&ds->ports); ds->probability = 0; route_table_register(); atomic_init(&ds->ref_cnt, 1); return ds; } struct dpif_sflow * dpif_sflow_ref(const struct dpif_sflow *ds_) { struct dpif_sflow *ds = CONST_CAST(struct dpif_sflow *, ds_); if (ds) { int orig; atomic_add(&ds->ref_cnt, 1, &orig); ovs_assert(orig > 0); } return ds; } /* 32-bit fraction of packets to sample with. A value of 0 samples no packets, * a value of %UINT32_MAX samples all packets and intermediate values sample * intermediate fractions of packets. */ uint32_t dpif_sflow_get_probability(const struct dpif_sflow *ds) OVS_EXCLUDED(mutex) { uint32_t probability; ovs_mutex_lock(&mutex); probability = ds->probability; ovs_mutex_unlock(&mutex); return probability; } void dpif_sflow_unref(struct dpif_sflow *ds) OVS_EXCLUDED(mutex) { int orig; if (!ds) { return; } atomic_sub(&ds->ref_cnt, 1, &orig); ovs_assert(orig > 0); if (orig == 1) { struct dpif_sflow_port *dsp, *next; route_table_unregister(); dpif_sflow_clear(ds); HMAP_FOR_EACH_SAFE (dsp, next, hmap_node, &ds->ports) { dpif_sflow_del_port__(ds, dsp); } hmap_destroy(&ds->ports); free(ds); } } static void dpif_sflow_add_poller(struct dpif_sflow *ds, struct dpif_sflow_port *dsp) OVS_REQUIRES(mutex) { SFLPoller *poller = sfl_agent_addPoller(ds->sflow_agent, &dsp->dsi, ds, sflow_agent_get_counters); sfl_poller_set_sFlowCpInterval(poller, ds->options->polling_interval); sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX); sfl_poller_set_bridgePort(poller, odp_to_u32(dsp->odp_port)); } void dpif_sflow_add_port(struct dpif_sflow *ds, struct ofport *ofport, odp_port_t odp_port) OVS_EXCLUDED(mutex) { struct dpif_sflow_port *dsp; int ifindex; ovs_mutex_lock(&mutex); dpif_sflow_del_port(ds, odp_port); ifindex = netdev_get_ifindex(ofport->netdev); if (ifindex <= 0) { /* Not an ifindex port, so do not add a cross-reference to it here */ goto out; } /* Add to table of ports. */ dsp = xmalloc(sizeof *dsp); dsp->ofport = ofport; dsp->odp_port = odp_port; SFL_DS_SET(dsp->dsi, SFL_DSCLASS_IFINDEX, ifindex, 0); hmap_insert(&ds->ports, &dsp->hmap_node, hash_odp_port(odp_port)); /* Add poller. */ if (ds->sflow_agent) { dpif_sflow_add_poller(ds, dsp); } out: ovs_mutex_unlock(&mutex); } static void dpif_sflow_del_port__(struct dpif_sflow *ds, struct dpif_sflow_port *dsp) OVS_REQUIRES(mutex) { if (ds->sflow_agent) { sfl_agent_removePoller(ds->sflow_agent, &dsp->dsi); sfl_agent_removeSampler(ds->sflow_agent, &dsp->dsi); } hmap_remove(&ds->ports, &dsp->hmap_node); free(dsp); } void dpif_sflow_del_port(struct dpif_sflow *ds, odp_port_t odp_port) OVS_EXCLUDED(mutex) { struct dpif_sflow_port *dsp; ovs_mutex_lock(&mutex); dsp = dpif_sflow_find_port(ds, odp_port); if (dsp) { dpif_sflow_del_port__(ds, dsp); } ovs_mutex_unlock(&mutex); } void dpif_sflow_set_options(struct dpif_sflow *ds, const struct ofproto_sflow_options *options) OVS_EXCLUDED(mutex) { struct dpif_sflow_port *dsp; bool options_changed; SFLReceiver *receiver; SFLAddress agentIP; time_t now; SFLDataSource_instance dsi; uint32_t dsIndex; SFLSampler *sampler; ovs_mutex_lock(&mutex); if (sset_is_empty(&options->targets) || !options->sampling_rate) { /* No point in doing any work if there are no targets or nothing to * sample. */ dpif_sflow_clear__(ds); goto out; } options_changed = (!ds->options || !ofproto_sflow_options_equal(options, ds->options)); /* Configure collectors if options have changed or if we're shortchanged in * collectors (which indicates that opening one or more of the configured * collectors failed, so that we should retry). */ if (options_changed || collectors_count(ds->collectors) < sset_count(&options->targets)) { collectors_destroy(ds->collectors); collectors_create(&options->targets, SFL_DEFAULT_COLLECTOR_PORT, &ds->collectors); if (ds->collectors == NULL) { VLOG_WARN_RL(&rl, "no collectors could be initialized, " "sFlow disabled"); dpif_sflow_clear__(ds); goto out; } } /* Choose agent IP address and agent device (if not yet setup) */ if (!sflow_choose_agent_address(options->agent_device, &options->targets, options->control_ip, &agentIP)) { dpif_sflow_clear__(ds); goto out; } /* Avoid reconfiguring if options didn't change. */ if (!options_changed) { goto out; } ofproto_sflow_options_destroy(ds->options); ds->options = ofproto_sflow_options_clone(options); /* Create agent. */ VLOG_INFO("creating sFlow agent %d", options->sub_id); if (ds->sflow_agent) { sfl_agent_release(ds->sflow_agent); } ds->sflow_agent = xcalloc(1, sizeof *ds->sflow_agent); now = time_wall(); sfl_agent_init(ds->sflow_agent, &agentIP, options->sub_id, now, /* Boot time. */ now, /* Current time. */ ds, /* Pointer supplied to callbacks. */ sflow_agent_alloc_cb, sflow_agent_free_cb, sflow_agent_error_cb, sflow_agent_send_packet_cb); receiver = sfl_agent_addReceiver(ds->sflow_agent); sfl_receiver_set_sFlowRcvrOwner(receiver, "Open vSwitch sFlow"); sfl_receiver_set_sFlowRcvrTimeout(receiver, 0xffffffff); /* Set the sampling_rate down in the datapath. */ ds->probability = MAX(1, UINT32_MAX / ds->options->sampling_rate); /* Add a single sampler for the bridge. This appears as a PHYSICAL_ENTITY because it is associated with the hypervisor, and interacts with the server hardware directly. The sub_id is used to distinguish this sampler from others on other bridges within the same agent. */ dsIndex = 1000 + options->sub_id; SFL_DS_SET(dsi, SFL_DSCLASS_PHYSICAL_ENTITY, dsIndex, 0); sampler = sfl_agent_addSampler(ds->sflow_agent, &dsi); sfl_sampler_set_sFlowFsPacketSamplingRate(sampler, ds->options->sampling_rate); sfl_sampler_set_sFlowFsMaximumHeaderSize(sampler, ds->options->header_len); sfl_sampler_set_sFlowFsReceiver(sampler, RECEIVER_INDEX); /* Add pollers for the currently known ifindex-ports */ HMAP_FOR_EACH (dsp, hmap_node, &ds->ports) { dpif_sflow_add_poller(ds, dsp); } out: ovs_mutex_unlock(&mutex); } int dpif_sflow_odp_port_to_ifindex(const struct dpif_sflow *ds, odp_port_t odp_port) OVS_EXCLUDED(mutex) { struct dpif_sflow_port *dsp; int ret; ovs_mutex_lock(&mutex); dsp = dpif_sflow_find_port(ds, odp_port); ret = dsp ? SFL_DS_INDEX(dsp->dsi) : 0; ovs_mutex_unlock(&mutex); return ret; } void dpif_sflow_received(struct dpif_sflow *ds, struct ofpbuf *packet, const struct flow *flow, odp_port_t odp_in_port, const union user_action_cookie *cookie) OVS_EXCLUDED(mutex) { SFL_FLOW_SAMPLE_TYPE fs; SFLFlow_sample_element hdrElem; SFLSampled_header *header; SFLFlow_sample_element switchElem; SFLSampler *sampler; struct dpif_sflow_port *in_dsp; ovs_be16 vlan_tci; ovs_mutex_lock(&mutex); sampler = ds->sflow_agent->samplers; if (!sampler) { goto out; } /* Build a flow sample. */ memset(&fs, 0, sizeof fs); /* Look up the input ifIndex if this port has one. Otherwise just * leave it as 0 (meaning 'unknown') and continue. */ in_dsp = dpif_sflow_find_port(ds, odp_in_port); if (in_dsp) { fs.input = SFL_DS_INDEX(in_dsp->dsi); } /* Make the assumption that the random number generator in the datapath converges * to the configured mean, and just increment the samplePool by the configured * sampling rate every time. */ sampler->samplePool += sfl_sampler_get_sFlowFsPacketSamplingRate(sampler); /* Sampled header. */ memset(&hdrElem, 0, sizeof hdrElem); hdrElem.tag = SFLFLOW_HEADER; header = &hdrElem.flowType.header; header->header_protocol = SFLHEADER_ETHERNET_ISO8023; /* The frame_length should include the Ethernet FCS (4 bytes), * but it has already been stripped, so we need to add 4 here. */ header->frame_length = packet->size + 4; /* Ethernet FCS stripped off. */ header->stripped = 4; header->header_length = MIN(packet->size, sampler->sFlowFsMaximumHeaderSize); header->header_bytes = packet->data; /* Add extended switch element. */ memset(&switchElem, 0, sizeof(switchElem)); switchElem.tag = SFLFLOW_EX_SWITCH; switchElem.flowType.sw.src_vlan = vlan_tci_to_vid(flow->vlan_tci); switchElem.flowType.sw.src_priority = vlan_tci_to_pcp(flow->vlan_tci); /* Retrieve data from user_action_cookie. */ vlan_tci = cookie->sflow.vlan_tci; switchElem.flowType.sw.dst_vlan = vlan_tci_to_vid(vlan_tci); switchElem.flowType.sw.dst_priority = vlan_tci_to_pcp(vlan_tci); fs.output = cookie->sflow.output; /* Submit the flow sample to be encoded into the next datagram. */ SFLADD_ELEMENT(&fs, &hdrElem); SFLADD_ELEMENT(&fs, &switchElem); sfl_sampler_writeFlowSample(sampler, &fs); out: ovs_mutex_unlock(&mutex); } void dpif_sflow_run(struct dpif_sflow *ds) OVS_EXCLUDED(mutex) { ovs_mutex_lock(&mutex); if (ds->collectors != NULL) { time_t now = time_now(); route_table_run(); if (now >= ds->next_tick) { sfl_agent_tick(ds->sflow_agent, time_wall()); ds->next_tick = now + 1; } } ovs_mutex_unlock(&mutex); } void dpif_sflow_wait(struct dpif_sflow *ds) OVS_EXCLUDED(mutex) { ovs_mutex_lock(&mutex); if (ds->collectors != NULL) { poll_timer_wait_until(ds->next_tick * 1000LL); } ovs_mutex_unlock(&mutex); } openvswitch-2.0.1+git20140120/ofproto/ofproto-dpif-sflow.h000066400000000000000000000036661226605124000230660ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010 InMon Corp. * Copyright (c) 2009, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OFPROTO_DPIF_SFLOW_H #define OFPROTO_DPIF_SFLOW_H 1 #include #include "svec.h" #include "lib/odp-util.h" struct dpif; struct dpif_upcall; struct flow; struct ofproto_sflow_options; struct ofport; struct dpif_sflow *dpif_sflow_create(void); struct dpif_sflow *dpif_sflow_ref(const struct dpif_sflow *); void dpif_sflow_unref(struct dpif_sflow *); uint32_t dpif_sflow_get_probability(const struct dpif_sflow *); void dpif_sflow_set_options(struct dpif_sflow *, const struct ofproto_sflow_options *); void dpif_sflow_clear(struct dpif_sflow *); bool dpif_sflow_is_enabled(const struct dpif_sflow *); void dpif_sflow_add_port(struct dpif_sflow *ds, struct ofport *ofport, odp_port_t odp_port); void dpif_sflow_del_port(struct dpif_sflow *, odp_port_t odp_port); void dpif_sflow_run(struct dpif_sflow *); void dpif_sflow_wait(struct dpif_sflow *); void dpif_sflow_received(struct dpif_sflow *, struct ofpbuf *, const struct flow *, odp_port_t odp_port, const union user_action_cookie *); int dpif_sflow_odp_port_to_ifindex(const struct dpif_sflow *, odp_port_t odp_port); #endif /* ofproto/ofproto-dpif-sflow.h */ openvswitch-2.0.1+git20140120/ofproto/ofproto-dpif-unixctl.man000066400000000000000000000023231226605124000237330ustar00rootroot00000000000000.SS "DATAPATH COMMANDS" These commands manage logical datapaths. They are are similar to the equivalent \fBovs\-dpctl\fR commands. . .IP "\fBdpif/dump\-dps\fR" Prints the name of each configured datapath on a separate line. . .IP "\fBdpif/show\fR" Prints a summary of configured datapaths, including statistics and a list of connected ports. The port information includes the OpenFlow port number, datapath port number, and the type. (The local port is identified as OpenFlow port 65534.) . .IP "\fBdpif/dump\-flows \fIdp\fR" Prints to the console all flow entries in datapath \fIdp\fR's flow table. .IP This command is primarily useful for debugging Open vSwitch. The flow table entries that it displays are not OpenFlow flow entries. Instead, they are different and considerably simpler flows maintained by the datapath module. If you wish to see the OpenFlow flow entries, use \fBovs\-ofctl dump\-flows\fR. . .IP "\fBdpif/del\-flows \fIdp\fR" Deletes all flow entries from datapath \fIdp\fR's flow table and underlying datapath implementation (e.g., kernel datapath module). .IP This command is primarily useful for debugging Open vSwitch. As discussed in \fBdpif/dump\-flows\fR, these entries are not OpenFlow flow entries. openvswitch-2.0.1+git20140120/ofproto/ofproto-dpif-upcall.c000066400000000000000000000634351226605124000232070ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ofproto-dpif-upcall.h" #include #include #include #include "coverage.h" #include "dynamic-string.h" #include "dpif.h" #include "fail-open.h" #include "guarded-list.h" #include "latch.h" #include "seq.h" #include "list.h" #include "netlink.h" #include "ofpbuf.h" #include "ofproto-dpif.h" #include "packets.h" #include "poll-loop.h" #include "vlog.h" #define MAX_QUEUE_LENGTH 512 VLOG_DEFINE_THIS_MODULE(ofproto_dpif_upcall); COVERAGE_DEFINE(upcall_queue_overflow); COVERAGE_DEFINE(drop_queue_overflow); COVERAGE_DEFINE(miss_queue_overflow); COVERAGE_DEFINE(fmb_queue_overflow); COVERAGE_DEFINE(fmb_queue_revalidated); /* A thread that processes each upcall handed to it by the dispatcher thread, * forwards the upcall's packet, and then queues it to the main ofproto_dpif * to possibly set up a kernel flow as a cache. */ struct handler { struct udpif *udpif; /* Parent udpif. */ pthread_t thread; /* Thread ID. */ struct ovs_mutex mutex; /* Mutex guarding the following. */ /* Atomic queue of unprocessed miss upcalls. */ struct list upcalls OVS_GUARDED; size_t n_upcalls OVS_GUARDED; size_t n_new_upcalls; /* Only changed by the dispatcher. */ pthread_cond_t wake_cond; /* Wakes 'thread' while holding 'mutex'. */ }; /* An upcall handler for ofproto_dpif. * * udpif is implemented as a "dispatcher" thread that reads upcalls from the * kernel. It processes each upcall just enough to figure out its next * destination. For a "miss" upcall (MISS_UPCALL), this is one of several * "handler" threads (see struct handler). Other upcalls are queued to the * main ofproto_dpif. */ struct udpif { struct dpif *dpif; /* Datapath handle. */ struct dpif_backer *backer; /* Opaque dpif_backer pointer. */ uint32_t secret; /* Random seed for upcall hash. */ pthread_t dispatcher; /* Dispatcher thread ID. */ struct handler *handlers; /* Miss handlers. */ size_t n_handlers; /* Queues to pass up to ofproto-dpif. */ struct guarded_list drop_keys; /* "struct drop key"s. */ struct guarded_list upcalls; /* "struct upcall"s. */ struct guarded_list fmbs; /* "struct flow_miss_batch"es. */ /* Number of times udpif_revalidate() has been called. */ atomic_uint reval_seq; struct seq *wait_seq; struct latch exit_latch; /* Tells child threads to exit. */ }; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); static void recv_upcalls(struct udpif *); static void handle_miss_upcalls(struct udpif *, struct list *upcalls); static void miss_destroy(struct flow_miss *); static void *udpif_dispatcher(void *); static void *udpif_miss_handler(void *); struct udpif * udpif_create(struct dpif_backer *backer, struct dpif *dpif) { struct udpif *udpif = xzalloc(sizeof *udpif); udpif->dpif = dpif; udpif->backer = backer; udpif->secret = random_uint32(); udpif->wait_seq = seq_create(); latch_init(&udpif->exit_latch); guarded_list_init(&udpif->drop_keys); guarded_list_init(&udpif->upcalls); guarded_list_init(&udpif->fmbs); atomic_init(&udpif->reval_seq, 0); return udpif; } void udpif_destroy(struct udpif *udpif) { struct flow_miss_batch *fmb; struct drop_key *drop_key; struct upcall *upcall; udpif_recv_set(udpif, 0, false); while ((drop_key = drop_key_next(udpif))) { drop_key_destroy(drop_key); } while ((upcall = upcall_next(udpif))) { upcall_destroy(upcall); } while ((fmb = flow_miss_batch_next(udpif))) { flow_miss_batch_destroy(fmb); } guarded_list_destroy(&udpif->drop_keys); guarded_list_destroy(&udpif->upcalls); guarded_list_destroy(&udpif->fmbs); latch_destroy(&udpif->exit_latch); seq_destroy(udpif->wait_seq); free(udpif); } /* Tells 'udpif' to begin or stop handling flow misses depending on the value * of 'enable'. 'n_handlers' is the number of miss_handler threads to create. * Passing 'n_handlers' as zero is equivalent to passing 'enable' as false. */ void udpif_recv_set(struct udpif *udpif, size_t n_handlers, bool enable) { n_handlers = enable ? n_handlers : 0; n_handlers = MIN(n_handlers, 64); /* Stop the old threads (if any). */ if (udpif->handlers && udpif->n_handlers != n_handlers) { size_t i; latch_set(&udpif->exit_latch); /* Wake the handlers so they can exit. */ for (i = 0; i < udpif->n_handlers; i++) { struct handler *handler = &udpif->handlers[i]; ovs_mutex_lock(&handler->mutex); xpthread_cond_signal(&handler->wake_cond); ovs_mutex_unlock(&handler->mutex); } xpthread_join(udpif->dispatcher, NULL); for (i = 0; i < udpif->n_handlers; i++) { struct handler *handler = &udpif->handlers[i]; struct upcall *miss, *next; xpthread_join(handler->thread, NULL); ovs_mutex_lock(&handler->mutex); LIST_FOR_EACH_SAFE (miss, next, list_node, &handler->upcalls) { list_remove(&miss->list_node); upcall_destroy(miss); } ovs_mutex_unlock(&handler->mutex); ovs_mutex_destroy(&handler->mutex); xpthread_cond_destroy(&handler->wake_cond); } latch_poll(&udpif->exit_latch); free(udpif->handlers); udpif->handlers = NULL; udpif->n_handlers = 0; } /* Start new threads (if necessary). */ if (!udpif->handlers && n_handlers) { size_t i; udpif->n_handlers = n_handlers; udpif->handlers = xzalloc(udpif->n_handlers * sizeof *udpif->handlers); for (i = 0; i < udpif->n_handlers; i++) { struct handler *handler = &udpif->handlers[i]; handler->udpif = udpif; list_init(&handler->upcalls); xpthread_cond_init(&handler->wake_cond, NULL); ovs_mutex_init(&handler->mutex); xpthread_create(&handler->thread, NULL, udpif_miss_handler, handler); } xpthread_create(&udpif->dispatcher, NULL, udpif_dispatcher, udpif); } } void udpif_wait(struct udpif *udpif) { uint64_t seq = seq_read(udpif->wait_seq); if (!guarded_list_is_empty(&udpif->drop_keys) || !guarded_list_is_empty(&udpif->upcalls) || !guarded_list_is_empty(&udpif->fmbs)) { poll_immediate_wake(); } else { seq_wait(udpif->wait_seq, seq); } } /* Notifies 'udpif' that something changed which may render previous * xlate_actions() results invalid. */ void udpif_revalidate(struct udpif *udpif) { struct flow_miss_batch *fmb, *next_fmb; unsigned int junk; struct list fmbs; /* Since we remove each miss on revalidation, their statistics won't be * accounted to the appropriate 'facet's in the upper layer. In most * cases, this is alright because we've already pushed the stats to the * relevant rules. However, NetFlow requires absolute packet counts on * 'facet's which could now be incorrect. */ atomic_add(&udpif->reval_seq, 1, &junk); guarded_list_pop_all(&udpif->fmbs, &fmbs); LIST_FOR_EACH_SAFE (fmb, next_fmb, list_node, &fmbs) { list_remove(&fmb->list_node); flow_miss_batch_destroy(fmb); } udpif_drop_key_clear(udpif); } /* Retreives the next upcall which ofproto-dpif is responsible for handling. * The caller is responsible for destroying the returned upcall with * upcall_destroy(). */ struct upcall * upcall_next(struct udpif *udpif) { struct list *next = guarded_list_pop_front(&udpif->upcalls); return next ? CONTAINER_OF(next, struct upcall, list_node) : NULL; } /* Destroys and deallocates 'upcall'. */ void upcall_destroy(struct upcall *upcall) { if (upcall) { ofpbuf_uninit(&upcall->upcall_buf); free(upcall); } } /* Retreives the next batch of processed flow misses for 'udpif' to install. * The caller is responsible for destroying it with flow_miss_batch_destroy(). */ struct flow_miss_batch * flow_miss_batch_next(struct udpif *udpif) { int i; for (i = 0; i < 50; i++) { struct flow_miss_batch *next; unsigned int reval_seq; struct list *next_node; next_node = guarded_list_pop_front(&udpif->fmbs); if (!next_node) { break; } next = CONTAINER_OF(next_node, struct flow_miss_batch, list_node); atomic_read(&udpif->reval_seq, &reval_seq); if (next->reval_seq == reval_seq) { return next; } flow_miss_batch_destroy(next); } return NULL; } /* Destroys and deallocates 'fmb'. */ void flow_miss_batch_destroy(struct flow_miss_batch *fmb) { struct flow_miss *miss, *next; if (!fmb) { return; } HMAP_FOR_EACH_SAFE (miss, next, hmap_node, &fmb->misses) { hmap_remove(&fmb->misses, &miss->hmap_node); miss_destroy(miss); } hmap_destroy(&fmb->misses); free(fmb); } /* Retreives the next drop key which ofproto-dpif needs to process. The caller * is responsible for destroying it with drop_key_destroy(). */ struct drop_key * drop_key_next(struct udpif *udpif) { struct list *next = guarded_list_pop_front(&udpif->drop_keys); return next ? CONTAINER_OF(next, struct drop_key, list_node) : NULL; } /* Destorys and deallocates 'drop_key'. */ void drop_key_destroy(struct drop_key *drop_key) { if (drop_key) { free(drop_key->key); free(drop_key); } } /* Clears all drop keys waiting to be processed by drop_key_next(). */ void udpif_drop_key_clear(struct udpif *udpif) { struct drop_key *drop_key, *next; struct list list; guarded_list_pop_all(&udpif->drop_keys, &list); LIST_FOR_EACH_SAFE (drop_key, next, list_node, &list) { list_remove(&drop_key->list_node); drop_key_destroy(drop_key); } } /* The dispatcher thread is responsible for receving upcalls from the kernel, * assigning the miss upcalls to a miss_handler thread, and assigning the more * complex ones to ofproto-dpif directly. */ static void * udpif_dispatcher(void *arg) { struct udpif *udpif = arg; set_subprogram_name("dispatcher"); while (!latch_is_set(&udpif->exit_latch)) { recv_upcalls(udpif); dpif_recv_wait(udpif->dpif); latch_wait(&udpif->exit_latch); poll_block(); } return NULL; } /* The miss handler thread is responsible for processing miss upcalls retreived * by the dispatcher thread. Once finished it passes the processed miss * upcalls to ofproto-dpif where they're installed in the datapath. */ static void * udpif_miss_handler(void *arg) { struct list misses = LIST_INITIALIZER(&misses); struct handler *handler = arg; set_subprogram_name("miss_handler"); for (;;) { size_t i; ovs_mutex_lock(&handler->mutex); if (latch_is_set(&handler->udpif->exit_latch)) { ovs_mutex_unlock(&handler->mutex); return NULL; } if (!handler->n_upcalls) { ovs_mutex_cond_wait(&handler->wake_cond, &handler->mutex); } for (i = 0; i < FLOW_MISS_MAX_BATCH; i++) { if (handler->n_upcalls) { handler->n_upcalls--; list_push_back(&misses, list_pop_front(&handler->upcalls)); } else { break; } } ovs_mutex_unlock(&handler->mutex); handle_miss_upcalls(handler->udpif, &misses); } } static void miss_destroy(struct flow_miss *miss) { struct upcall *upcall, *next; LIST_FOR_EACH_SAFE (upcall, next, list_node, &miss->upcalls) { list_remove(&upcall->list_node); upcall_destroy(upcall); } xlate_out_uninit(&miss->xout); } static enum upcall_type classify_upcall(const struct upcall *upcall) { const struct dpif_upcall *dpif_upcall = &upcall->dpif_upcall; union user_action_cookie cookie; size_t userdata_len; /* First look at the upcall type. */ switch (dpif_upcall->type) { case DPIF_UC_ACTION: break; case DPIF_UC_MISS: return MISS_UPCALL; case DPIF_N_UC_TYPES: default: VLOG_WARN_RL(&rl, "upcall has unexpected type %"PRIu32, dpif_upcall->type); return BAD_UPCALL; } /* "action" upcalls need a closer look. */ if (!dpif_upcall->userdata) { VLOG_WARN_RL(&rl, "action upcall missing cookie"); return BAD_UPCALL; } userdata_len = nl_attr_get_size(dpif_upcall->userdata); if (userdata_len < sizeof cookie.type || userdata_len > sizeof cookie) { VLOG_WARN_RL(&rl, "action upcall cookie has unexpected size %zu", userdata_len); return BAD_UPCALL; } memset(&cookie, 0, sizeof cookie); memcpy(&cookie, nl_attr_get(dpif_upcall->userdata), userdata_len); if (userdata_len == sizeof cookie.sflow && cookie.type == USER_ACTION_COOKIE_SFLOW) { return SFLOW_UPCALL; } else if (userdata_len == sizeof cookie.slow_path && cookie.type == USER_ACTION_COOKIE_SLOW_PATH) { return MISS_UPCALL; } else if (userdata_len == sizeof cookie.flow_sample && cookie.type == USER_ACTION_COOKIE_FLOW_SAMPLE) { return FLOW_SAMPLE_UPCALL; } else if (userdata_len == sizeof cookie.ipfix && cookie.type == USER_ACTION_COOKIE_IPFIX) { return IPFIX_UPCALL; } else { VLOG_WARN_RL(&rl, "invalid user cookie of type %"PRIu16 " and size %zu", cookie.type, userdata_len); return BAD_UPCALL; } } static void recv_upcalls(struct udpif *udpif) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60); size_t n_udpif_new_upcalls = 0; struct handler *handler; int n; for (;;) { struct upcall *upcall; int error; upcall = xmalloc(sizeof *upcall); ofpbuf_use_stub(&upcall->upcall_buf, upcall->upcall_stub, sizeof upcall->upcall_stub); error = dpif_recv(udpif->dpif, &upcall->dpif_upcall, &upcall->upcall_buf); if (error) { upcall_destroy(upcall); break; } upcall->type = classify_upcall(upcall); if (upcall->type == BAD_UPCALL) { upcall_destroy(upcall); } else if (upcall->type == MISS_UPCALL) { struct dpif_upcall *dupcall = &upcall->dpif_upcall; uint32_t hash = udpif->secret; struct nlattr *nla; size_t n_bytes, left; n_bytes = 0; NL_ATTR_FOR_EACH (nla, left, dupcall->key, dupcall->key_len) { enum ovs_key_attr type = nl_attr_type(nla); if (type == OVS_KEY_ATTR_IN_PORT || type == OVS_KEY_ATTR_TCP || type == OVS_KEY_ATTR_UDP) { if (nl_attr_get_size(nla) == 4) { ovs_be32 attr = nl_attr_get_be32(nla); hash = mhash_add(hash, (OVS_FORCE uint32_t) attr); n_bytes += 4; } else { VLOG_WARN("Netlink attribute with incorrect size."); } } } hash = mhash_finish(hash, n_bytes); handler = &udpif->handlers[hash % udpif->n_handlers]; ovs_mutex_lock(&handler->mutex); if (handler->n_upcalls < MAX_QUEUE_LENGTH) { list_push_back(&handler->upcalls, &upcall->list_node); handler->n_new_upcalls = ++handler->n_upcalls; if (handler->n_new_upcalls >= FLOW_MISS_MAX_BATCH) { xpthread_cond_signal(&handler->wake_cond); } ovs_mutex_unlock(&handler->mutex); if (!VLOG_DROP_DBG(&rl)) { struct ds ds = DS_EMPTY_INITIALIZER; odp_flow_key_format(upcall->dpif_upcall.key, upcall->dpif_upcall.key_len, &ds); VLOG_DBG("dispatcher: miss enqueue (%s)", ds_cstr(&ds)); ds_destroy(&ds); } } else { ovs_mutex_unlock(&handler->mutex); COVERAGE_INC(miss_queue_overflow); upcall_destroy(upcall); } } else { size_t len; len = guarded_list_push_back(&udpif->upcalls, &upcall->list_node, MAX_QUEUE_LENGTH); if (len > 0) { n_udpif_new_upcalls = len; if (n_udpif_new_upcalls >= FLOW_MISS_MAX_BATCH) { seq_change(udpif->wait_seq); } } else { COVERAGE_INC(upcall_queue_overflow); upcall_destroy(upcall); } } } for (n = 0; n < udpif->n_handlers; ++n) { handler = &udpif->handlers[n]; if (handler->n_new_upcalls) { handler->n_new_upcalls = 0; ovs_mutex_lock(&handler->mutex); xpthread_cond_signal(&handler->wake_cond); ovs_mutex_unlock(&handler->mutex); } } if (n_udpif_new_upcalls) { seq_change(udpif->wait_seq); } } static struct flow_miss * flow_miss_find(struct hmap *todo, const struct ofproto_dpif *ofproto, const struct flow *flow, uint32_t hash) { struct flow_miss *miss; HMAP_FOR_EACH_WITH_HASH (miss, hmap_node, hash, todo) { if (miss->ofproto == ofproto && flow_equal(&miss->flow, flow)) { return miss; } } return NULL; } /* Executes flow miss 'miss'. May add any required datapath operations * to 'ops', incrementing '*n_ops' for each new op. */ static void execute_flow_miss(struct flow_miss *miss, struct dpif_op *ops, size_t *n_ops) { struct ofproto_dpif *ofproto = miss->ofproto; struct ofpbuf *packet; struct xlate_in xin; memset(&miss->stats, 0, sizeof miss->stats); miss->stats.used = time_msec(); LIST_FOR_EACH (packet, list_node, &miss->packets) { miss->stats.tcp_flags |= packet_get_tcp_flags(packet, &miss->flow); miss->stats.n_bytes += packet->size; miss->stats.n_packets++; } xlate_in_init(&xin, ofproto, &miss->flow, NULL, miss->stats.tcp_flags, NULL); xin.may_learn = true; xin.resubmit_stats = &miss->stats; xlate_actions(&xin, &miss->xout); if (miss->xout.fail_open) { LIST_FOR_EACH (packet, list_node, &miss->packets) { struct ofputil_packet_in *pin; /* Extra-special case for fail-open mode. * * We are in fail-open mode and the packet matched the fail-open * rule, but we are connected to a controller too. We should send * the packet up to the controller in the hope that it will try to * set up a flow and thereby allow us to exit fail-open. * * See the top-level comment in fail-open.c for more information. */ pin = xmalloc(sizeof(*pin)); pin->packet = xmemdup(packet->data, packet->size); pin->packet_len = packet->size; pin->reason = OFPR_NO_MATCH; pin->controller_id = 0; pin->table_id = 0; pin->cookie = 0; pin->send_len = 0; /* Not used for flow table misses. */ flow_get_metadata(&miss->flow, &pin->fmd); ofproto_dpif_send_packet_in(ofproto, pin); } } if (miss->xout.slow) { LIST_FOR_EACH (packet, list_node, &miss->packets) { struct xlate_in xin; xlate_in_init(&xin, miss->ofproto, &miss->flow, NULL, 0, packet); xlate_actions_for_side_effects(&xin); } } if (miss->xout.odp_actions.size) { LIST_FOR_EACH (packet, list_node, &miss->packets) { struct dpif_op *op = &ops[*n_ops]; struct dpif_execute *execute = &op->u.execute; if (miss->flow.in_port.ofp_port != vsp_realdev_to_vlandev(miss->ofproto, miss->flow.in_port.ofp_port, miss->flow.vlan_tci)) { /* This packet was received on a VLAN splinter port. We * added a VLAN to the packet to make the packet resemble * the flow, but the actions were composed assuming that * the packet contained no VLAN. So, we must remove the * VLAN header from the packet before trying to execute the * actions. */ eth_pop_vlan(packet); } op->type = DPIF_OP_EXECUTE; execute->key = miss->key; execute->key_len = miss->key_len; execute->packet = packet; execute->actions = miss->xout.odp_actions.data; execute->actions_len = miss->xout.odp_actions.size; (*n_ops)++; } } } static void handle_miss_upcalls(struct udpif *udpif, struct list *upcalls) { struct dpif_op *opsp[FLOW_MISS_MAX_BATCH]; struct dpif_op ops[FLOW_MISS_MAX_BATCH]; struct upcall *upcall, *next; struct flow_miss_batch *fmb; size_t n_upcalls, n_ops, i; struct flow_miss *miss; unsigned int reval_seq; /* Construct the to-do list. * * This just amounts to extracting the flow from each packet and sticking * the packets that have the same flow in the same "flow_miss" structure so * that we can process them together. */ fmb = xmalloc(sizeof *fmb); atomic_read(&udpif->reval_seq, &fmb->reval_seq); hmap_init(&fmb->misses); n_upcalls = 0; LIST_FOR_EACH_SAFE (upcall, next, list_node, upcalls) { struct dpif_upcall *dupcall = &upcall->dpif_upcall; struct flow_miss *miss = &fmb->miss_buf[n_upcalls]; struct flow_miss *existing_miss; struct ofproto_dpif *ofproto; odp_port_t odp_in_port; struct flow flow; uint32_t hash; int error; error = xlate_receive(udpif->backer, dupcall->packet, dupcall->key, dupcall->key_len, &flow, &miss->key_fitness, &ofproto, &odp_in_port); if (error == ENODEV) { struct drop_key *drop_key; /* Received packet on datapath port for which we couldn't * associate an ofproto. This can happen if a port is removed * while traffic is being received. Print a rate-limited message * in case it happens frequently. Install a drop flow so * that future packets of the flow are inexpensively dropped * in the kernel. */ VLOG_INFO_RL(&rl, "received packet on unassociated datapath port " "%"PRIu32, odp_in_port); drop_key = xmalloc(sizeof *drop_key); drop_key->key = xmemdup(dupcall->key, dupcall->key_len); drop_key->key_len = dupcall->key_len; if (guarded_list_push_back(&udpif->drop_keys, &drop_key->list_node, MAX_QUEUE_LENGTH)) { seq_change(udpif->wait_seq); } else { COVERAGE_INC(drop_queue_overflow); drop_key_destroy(drop_key); } continue; } else if (error) { continue; } flow_extract(dupcall->packet, flow.skb_priority, flow.pkt_mark, &flow.tunnel, &flow.in_port, &miss->flow); /* Add other packets to a to-do list. */ hash = flow_hash(&miss->flow, 0); existing_miss = flow_miss_find(&fmb->misses, ofproto, &miss->flow, hash); if (!existing_miss) { hmap_insert(&fmb->misses, &miss->hmap_node, hash); miss->ofproto = ofproto; miss->key = dupcall->key; miss->key_len = dupcall->key_len; miss->upcall_type = dupcall->type; list_init(&miss->packets); list_init(&miss->upcalls); n_upcalls++; } else { miss = existing_miss; } list_push_back(&miss->packets, &dupcall->packet->list_node); list_remove(&upcall->list_node); list_push_back(&miss->upcalls, &upcall->list_node); } LIST_FOR_EACH_SAFE (upcall, next, list_node, upcalls) { list_remove(&upcall->list_node); upcall_destroy(upcall); } /* Process each element in the to-do list, constructing the set of * operations to batch. */ n_ops = 0; HMAP_FOR_EACH (miss, hmap_node, &fmb->misses) { execute_flow_miss(miss, ops, &n_ops); } ovs_assert(n_ops <= ARRAY_SIZE(ops)); /* Execute batch. */ for (i = 0; i < n_ops; i++) { opsp[i] = &ops[i]; } dpif_operate(udpif->dpif, opsp, n_ops); atomic_read(&udpif->reval_seq, &reval_seq); if (reval_seq != fmb->reval_seq) { COVERAGE_INC(fmb_queue_revalidated); flow_miss_batch_destroy(fmb); } else if (!guarded_list_push_back(&udpif->fmbs, &fmb->list_node, MAX_QUEUE_LENGTH)) { COVERAGE_INC(fmb_queue_overflow); flow_miss_batch_destroy(fmb); } else { seq_change(udpif->wait_seq); } } openvswitch-2.0.1+git20140120/ofproto/ofproto-dpif-upcall.h000066400000000000000000000104031226605124000231770ustar00rootroot00000000000000/* Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OFPROTO_DPIF_UPCALL_H #define OFPROTO_DPIF_UPCALL_H #define FLOW_MISS_MAX_BATCH 50 #include "dpif.h" #include "flow.h" #include "hmap.h" #include "list.h" #include "odp-util.h" #include "ofpbuf.h" #include "ofproto-dpif-xlate.h" struct dpif; struct dpif_backer; /* udif is responsible for retrieving upcalls from the kernel, processing miss * upcalls, and handing more complex ones up to the main ofproto-dpif * module. */ struct udpif *udpif_create(struct dpif_backer *, struct dpif *); void udpif_recv_set(struct udpif *, size_t n_workers, bool enable); void udpif_destroy(struct udpif *); void udpif_wait(struct udpif *); void udpif_revalidate(struct udpif *); /* udpif can handle some upcalls on its own. Others need the main ofproto_dpif * code to handle them. This interface passes upcalls not handled by udpif up * to the ofproto_dpif main thread. */ /* Type of an upcall. */ enum upcall_type { /* Handled internally by udpif code. Not returned by upcall_next().*/ BAD_UPCALL, /* Some kind of bug somewhere. */ MISS_UPCALL, /* A flow miss. */ /* Require main thread's involvement. May be returned by upcall_next(). */ SFLOW_UPCALL, /* sFlow sample. */ FLOW_SAMPLE_UPCALL, /* Per-flow sampling. */ IPFIX_UPCALL /* Per-bridge sampling. */ }; /* An upcall. */ struct upcall { struct list list_node; /* For queuing upcalls. */ enum upcall_type type; /* Classification. */ /* Raw upcall plus data for keeping track of the memory backing it. */ struct dpif_upcall dpif_upcall; /* As returned by dpif_recv() */ struct ofpbuf upcall_buf; /* Owns some data in 'dpif_upcall'. */ uint64_t upcall_stub[512 / 8]; /* Buffer to reduce need for malloc(). */ }; struct upcall *upcall_next(struct udpif *); void upcall_destroy(struct upcall *); /* udpif figures out how to forward packets, and does forward them, but it * can't set up datapath flows on its own. This interface passes packet * forwarding data from udpif to the higher level ofproto_dpif to allow the * latter to set up datapath flows. */ /* Flow miss batching. * * Some dpifs implement operations faster when you hand them off in a batch. * To allow batching, "struct flow_miss" queues the dpif-related work needed * for a given flow. Each "struct flow_miss" corresponds to sending one or * more packets, plus possibly installing the flow in the dpif. */ struct flow_miss { struct hmap_node hmap_node; struct ofproto_dpif *ofproto; struct flow flow; enum odp_key_fitness key_fitness; const struct nlattr *key; size_t key_len; struct list packets; enum dpif_upcall_type upcall_type; struct dpif_flow_stats stats; struct xlate_out xout; struct list upcalls; }; struct flow_miss_batch { struct list list_node; struct flow_miss miss_buf[FLOW_MISS_MAX_BATCH]; struct hmap misses; unsigned int reval_seq; }; struct flow_miss_batch *flow_miss_batch_next(struct udpif *); void flow_miss_batch_destroy(struct flow_miss_batch *); /* Drop keys are odp flow keys which have drop flows installed in the kernel. * These are datapath flows which have no associated ofproto, if they did we * would use facets. * * udpif can't install drop flows by itself. This interfaces allows udpif to * pass the drop flows up to ofproto_dpif to get it to install them. */ struct drop_key { struct hmap_node hmap_node; struct list list_node; struct nlattr *key; size_t key_len; }; struct drop_key *drop_key_next(struct udpif *); void drop_key_destroy(struct drop_key *); void udpif_drop_key_clear(struct udpif *); #endif /* ofproto-dpif-upcall.h */ openvswitch-2.0.1+git20140120/ofproto/ofproto-dpif-xlate.c000066400000000000000000002542311226605124000230400ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ofproto/ofproto-dpif-xlate.h" #include #include "bfd.h" #include "bitmap.h" #include "bond.h" #include "bundle.h" #include "byte-order.h" #include "cfm.h" #include "connmgr.h" #include "coverage.h" #include "dpif.h" #include "dynamic-string.h" #include "in-band.h" #include "lacp.h" #include "learn.h" #include "list.h" #include "mac-learning.h" #include "meta-flow.h" #include "multipath.h" #include "netdev-vport.h" #include "netlink.h" #include "nx-match.h" #include "odp-execute.h" #include "ofp-actions.h" #include "ofproto/ofproto-dpif-ipfix.h" #include "ofproto/ofproto-dpif-mirror.h" #include "ofproto/ofproto-dpif-sflow.h" #include "ofproto/ofproto-dpif.h" #include "ofproto/ofproto-provider.h" #include "tunnel.h" #include "vlog.h" COVERAGE_DEFINE(xlate_actions); VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate); /* Maximum depth of flow table recursion (due to resubmit actions) in a * flow translation. */ #define MAX_RESUBMIT_RECURSION 64 struct ovs_rwlock xlate_rwlock = OVS_RWLOCK_INITIALIZER; struct xbridge { struct hmap_node hmap_node; /* Node in global 'xbridges' map. */ struct ofproto_dpif *ofproto; /* Key in global 'xbridges' map. */ struct list xbundles; /* Owned xbundles. */ struct hmap xports; /* Indexed by ofp_port. */ char *name; /* Name used in log messages. */ struct dpif *dpif; /* Datapath interface. */ struct mac_learning *ml; /* Mac learning handle. */ struct mbridge *mbridge; /* Mirroring. */ struct dpif_sflow *sflow; /* SFlow handle, or null. */ struct dpif_ipfix *ipfix; /* Ipfix handle, or null. */ struct stp *stp; /* STP or null if disabled. */ /* Special rules installed by ofproto-dpif. */ struct rule_dpif *miss_rule; struct rule_dpif *no_packet_in_rule; enum ofp_config_flags frag; /* Fragmentation handling. */ bool has_netflow; /* Bridge runs netflow? */ bool has_in_band; /* Bridge has in band control? */ bool forward_bpdu; /* Bridge forwards STP BPDUs? */ }; struct xbundle { struct hmap_node hmap_node; /* In global 'xbundles' map. */ struct ofbundle *ofbundle; /* Key in global 'xbundles' map. */ struct list list_node; /* In parent 'xbridges' list. */ struct xbridge *xbridge; /* Parent xbridge. */ struct list xports; /* Contains "struct xport"s. */ char *name; /* Name used in log messages. */ struct bond *bond; /* Nonnull iff more than one port. */ struct lacp *lacp; /* LACP handle or null. */ enum port_vlan_mode vlan_mode; /* VLAN mode. */ int vlan; /* -1=trunk port, else a 12-bit VLAN ID. */ unsigned long *trunks; /* Bitmap of trunked VLANs, if 'vlan' == -1. * NULL if all VLANs are trunked. */ bool use_priority_tags; /* Use 802.1p tag for frames in VLAN 0? */ bool floodable; /* No port has OFPUTIL_PC_NO_FLOOD set? */ }; struct xport { struct hmap_node hmap_node; /* Node in global 'xports' map. */ struct ofport_dpif *ofport; /* Key in global 'xports map. */ struct hmap_node ofp_node; /* Node in parent xbridge 'xports' map. */ ofp_port_t ofp_port; /* Key in parent xbridge 'xports' map. */ odp_port_t odp_port; /* Datapath port number or ODPP_NONE. */ struct list bundle_node; /* In parent xbundle (if it exists). */ struct xbundle *xbundle; /* Parent xbundle or null. */ struct netdev *netdev; /* 'ofport''s netdev. */ struct xbridge *xbridge; /* Parent bridge. */ struct xport *peer; /* Patch port peer or null. */ enum ofputil_port_config config; /* OpenFlow port configuration. */ int stp_port_no; /* STP port number or -1 if not in use. */ struct hmap skb_priorities; /* Map of 'skb_priority_to_dscp's. */ bool may_enable; /* May be enabled in bonds. */ bool is_tunnel; /* Is a tunnel port. */ struct cfm *cfm; /* CFM handle or null. */ struct bfd *bfd; /* BFD handle or null. */ }; struct xlate_ctx { struct xlate_in *xin; struct xlate_out *xout; const struct xbridge *xbridge; /* Flow at the last commit. */ struct flow base_flow; /* Tunnel IP destination address as received. This is stored separately * as the base_flow.tunnel is cleared on init to reflect the datapath * behavior. Used to make sure not to send tunneled output to ourselves, * which might lead to an infinite loop. This could happen easily * if a tunnel is marked as 'ip_remote=flow', and the flow does not * actually set the tun_dst field. */ ovs_be32 orig_tunnel_ip_dst; /* Stack for the push and pop actions. Each stack element is of type * "union mf_subvalue". */ union mf_subvalue init_stack[1024 / sizeof(union mf_subvalue)]; struct ofpbuf stack; /* The rule that we are currently translating, or NULL. */ struct rule_dpif *rule; int recurse; /* Recursion level, via xlate_table_action. */ uint32_t orig_skb_priority; /* Priority when packet arrived. */ uint8_t table_id; /* OpenFlow table ID where flow was found. */ uint32_t sflow_n_outputs; /* Number of output ports. */ odp_port_t sflow_odp_port; /* Output port for composing sFlow action. */ uint16_t user_cookie_offset;/* Used for user_action_cookie fixup. */ bool exit; /* No further actions should be processed. */ }; /* A controller may use OFPP_NONE as the ingress port to indicate that * it did not arrive on a "real" port. 'ofpp_none_bundle' exists for * when an input bundle is needed for validation (e.g., mirroring or * OFPP_NORMAL processing). It is not connected to an 'ofproto' or have * any 'port' structs, so care must be taken when dealing with it. * The bundle's name and vlan mode are initialized in lookup_input_bundle() */ static struct xbundle ofpp_none_bundle; /* Node in 'xport''s 'skb_priorities' map. Used to maintain a map from * 'priority' (the datapath's term for QoS queue) to the dscp bits which all * traffic egressing the 'ofport' with that priority should be marked with. */ struct skb_priority_to_dscp { struct hmap_node hmap_node; /* Node in 'ofport_dpif''s 'skb_priorities'. */ uint32_t skb_priority; /* Priority of this queue (see struct flow). */ uint8_t dscp; /* DSCP bits to mark outgoing traffic with. */ }; static struct hmap xbridges = HMAP_INITIALIZER(&xbridges); static struct hmap xbundles = HMAP_INITIALIZER(&xbundles); static struct hmap xports = HMAP_INITIALIZER(&xports); static bool may_receive(const struct xport *, struct xlate_ctx *); static void do_xlate_actions(const struct ofpact *, size_t ofpacts_len, struct xlate_ctx *); static void xlate_normal(struct xlate_ctx *); static void xlate_report(struct xlate_ctx *, const char *); static void xlate_table_action(struct xlate_ctx *, ofp_port_t in_port, uint8_t table_id, bool may_packet_in); static bool input_vid_is_valid(uint16_t vid, struct xbundle *, bool warn); static uint16_t input_vid_to_vlan(const struct xbundle *, uint16_t vid); static void output_normal(struct xlate_ctx *, const struct xbundle *, uint16_t vlan); static void compose_output_action(struct xlate_ctx *, ofp_port_t ofp_port); static struct xbridge *xbridge_lookup(const struct ofproto_dpif *); static struct xbundle *xbundle_lookup(const struct ofbundle *); static struct xport *xport_lookup(const struct ofport_dpif *); static struct xport *get_ofp_port(const struct xbridge *, ofp_port_t ofp_port); static struct skb_priority_to_dscp *get_skb_priority(const struct xport *, uint32_t skb_priority); static void clear_skb_priorities(struct xport *); static bool dscp_from_skb_priority(const struct xport *, uint32_t skb_priority, uint8_t *dscp); void xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name, struct dpif *dpif, struct rule_dpif *miss_rule, struct rule_dpif *no_packet_in_rule, const struct mac_learning *ml, struct stp *stp, const struct mbridge *mbridge, const struct dpif_sflow *sflow, const struct dpif_ipfix *ipfix, enum ofp_config_flags frag, bool forward_bpdu, bool has_in_band, bool has_netflow) { struct xbridge *xbridge = xbridge_lookup(ofproto); if (!xbridge) { xbridge = xzalloc(sizeof *xbridge); xbridge->ofproto = ofproto; hmap_insert(&xbridges, &xbridge->hmap_node, hash_pointer(ofproto, 0)); hmap_init(&xbridge->xports); list_init(&xbridge->xbundles); } if (xbridge->ml != ml) { mac_learning_unref(xbridge->ml); xbridge->ml = mac_learning_ref(ml); } if (xbridge->mbridge != mbridge) { mbridge_unref(xbridge->mbridge); xbridge->mbridge = mbridge_ref(mbridge); } if (xbridge->sflow != sflow) { dpif_sflow_unref(xbridge->sflow); xbridge->sflow = dpif_sflow_ref(sflow); } if (xbridge->ipfix != ipfix) { dpif_ipfix_unref(xbridge->ipfix); xbridge->ipfix = dpif_ipfix_ref(ipfix); } if (xbridge->stp != stp) { stp_unref(xbridge->stp); xbridge->stp = stp_ref(stp); } free(xbridge->name); xbridge->name = xstrdup(name); xbridge->dpif = dpif; xbridge->forward_bpdu = forward_bpdu; xbridge->has_in_band = has_in_band; xbridge->has_netflow = has_netflow; xbridge->frag = frag; xbridge->miss_rule = miss_rule; xbridge->no_packet_in_rule = no_packet_in_rule; } void xlate_remove_ofproto(struct ofproto_dpif *ofproto) { struct xbridge *xbridge = xbridge_lookup(ofproto); struct xbundle *xbundle, *next_xbundle; struct xport *xport, *next_xport; if (!xbridge) { return; } HMAP_FOR_EACH_SAFE (xport, next_xport, ofp_node, &xbridge->xports) { xlate_ofport_remove(xport->ofport); } LIST_FOR_EACH_SAFE (xbundle, next_xbundle, list_node, &xbridge->xbundles) { xlate_bundle_remove(xbundle->ofbundle); } hmap_remove(&xbridges, &xbridge->hmap_node); mac_learning_unref(xbridge->ml); mbridge_unref(xbridge->mbridge); dpif_sflow_unref(xbridge->sflow); dpif_ipfix_unref(xbridge->ipfix); stp_unref(xbridge->stp); hmap_destroy(&xbridge->xports); free(xbridge->name); free(xbridge); } void xlate_bundle_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle, const char *name, enum port_vlan_mode vlan_mode, int vlan, unsigned long *trunks, bool use_priority_tags, const struct bond *bond, const struct lacp *lacp, bool floodable) { struct xbundle *xbundle = xbundle_lookup(ofbundle); if (!xbundle) { xbundle = xzalloc(sizeof *xbundle); xbundle->ofbundle = ofbundle; xbundle->xbridge = xbridge_lookup(ofproto); hmap_insert(&xbundles, &xbundle->hmap_node, hash_pointer(ofbundle, 0)); list_insert(&xbundle->xbridge->xbundles, &xbundle->list_node); list_init(&xbundle->xports); } ovs_assert(xbundle->xbridge); free(xbundle->name); xbundle->name = xstrdup(name); xbundle->vlan_mode = vlan_mode; xbundle->vlan = vlan; xbundle->trunks = trunks; xbundle->use_priority_tags = use_priority_tags; xbundle->floodable = floodable; if (xbundle->bond != bond) { bond_unref(xbundle->bond); xbundle->bond = bond_ref(bond); } if (xbundle->lacp != lacp) { lacp_unref(xbundle->lacp); xbundle->lacp = lacp_ref(lacp); } } void xlate_bundle_remove(struct ofbundle *ofbundle) { struct xbundle *xbundle = xbundle_lookup(ofbundle); struct xport *xport, *next; if (!xbundle) { return; } LIST_FOR_EACH_SAFE (xport, next, bundle_node, &xbundle->xports) { list_remove(&xport->bundle_node); xport->xbundle = NULL; } hmap_remove(&xbundles, &xbundle->hmap_node); list_remove(&xbundle->list_node); bond_unref(xbundle->bond); lacp_unref(xbundle->lacp); free(xbundle->name); free(xbundle); } void xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle, struct ofport_dpif *ofport, ofp_port_t ofp_port, odp_port_t odp_port, const struct netdev *netdev, const struct cfm *cfm, const struct bfd *bfd, struct ofport_dpif *peer, int stp_port_no, const struct ofproto_port_queue *qdscp_list, size_t n_qdscp, enum ofputil_port_config config, bool is_tunnel, bool may_enable) { struct xport *xport = xport_lookup(ofport); size_t i; if (!xport) { xport = xzalloc(sizeof *xport); xport->ofport = ofport; xport->xbridge = xbridge_lookup(ofproto); xport->ofp_port = ofp_port; hmap_init(&xport->skb_priorities); hmap_insert(&xports, &xport->hmap_node, hash_pointer(ofport, 0)); hmap_insert(&xport->xbridge->xports, &xport->ofp_node, hash_ofp_port(xport->ofp_port)); } ovs_assert(xport->ofp_port == ofp_port); xport->config = config; xport->stp_port_no = stp_port_no; xport->is_tunnel = is_tunnel; xport->may_enable = may_enable; xport->odp_port = odp_port; if (xport->netdev != netdev) { netdev_close(xport->netdev); xport->netdev = netdev_ref(netdev); } if (xport->cfm != cfm) { cfm_unref(xport->cfm); xport->cfm = cfm_ref(cfm); } if (xport->bfd != bfd) { bfd_unref(xport->bfd); xport->bfd = bfd_ref(bfd); } if (xport->peer) { xport->peer->peer = NULL; } xport->peer = xport_lookup(peer); if (xport->peer) { xport->peer->peer = xport; } if (xport->xbundle) { list_remove(&xport->bundle_node); } xport->xbundle = xbundle_lookup(ofbundle); if (xport->xbundle) { list_insert(&xport->xbundle->xports, &xport->bundle_node); } clear_skb_priorities(xport); for (i = 0; i < n_qdscp; i++) { struct skb_priority_to_dscp *pdscp; uint32_t skb_priority; if (dpif_queue_to_priority(xport->xbridge->dpif, qdscp_list[i].queue, &skb_priority)) { continue; } pdscp = xmalloc(sizeof *pdscp); pdscp->skb_priority = skb_priority; pdscp->dscp = (qdscp_list[i].dscp << 2) & IP_DSCP_MASK; hmap_insert(&xport->skb_priorities, &pdscp->hmap_node, hash_int(pdscp->skb_priority, 0)); } } void xlate_ofport_remove(struct ofport_dpif *ofport) { struct xport *xport = xport_lookup(ofport); if (!xport) { return; } if (xport->peer) { xport->peer->peer = NULL; xport->peer = NULL; } if (xport->xbundle) { list_remove(&xport->bundle_node); } clear_skb_priorities(xport); hmap_destroy(&xport->skb_priorities); hmap_remove(&xports, &xport->hmap_node); hmap_remove(&xport->xbridge->xports, &xport->ofp_node); netdev_close(xport->netdev); cfm_unref(xport->cfm); bfd_unref(xport->bfd); free(xport); } /* Given a datpath, packet, and flow metadata ('backer', 'packet', and 'key' * respectively), populates 'flow' with the result of odp_flow_key_to_flow(). * Optionally, if nonnull, populates 'fitnessp' with the fitness of 'flow' as * returned by odp_flow_key_to_flow(). Also, optionally populates 'ofproto' * with the ofproto_dpif, and 'odp_in_port' with the datapath in_port, that * 'packet' ingressed. * * If 'ofproto' is nonnull, requires 'flow''s in_port to exist. Otherwise sets * 'flow''s in_port to OFPP_NONE. * * This function does post-processing on data returned from * odp_flow_key_to_flow() to help make VLAN splinters transparent to the rest * of the upcall processing logic. In particular, if the extracted in_port is * a VLAN splinter port, it replaces flow->in_port by the "real" port, sets * flow->vlan_tci correctly for the VLAN of the VLAN splinter port, and pushes * a VLAN header onto 'packet' (if it is nonnull). * * Similarly, this function also includes some logic to help with tunnels. It * may modify 'flow' as necessary to make the tunneling implementation * transparent to the upcall processing logic. * * Returns 0 if successful, ENODEV if the parsed flow has no associated ofport, * or some other positive errno if there are other problems. */ int xlate_receive(const struct dpif_backer *backer, struct ofpbuf *packet, const struct nlattr *key, size_t key_len, struct flow *flow, enum odp_key_fitness *fitnessp, struct ofproto_dpif **ofproto, odp_port_t *odp_in_port) { enum odp_key_fitness fitness; const struct xport *xport; int error = ENODEV; ovs_rwlock_rdlock(&xlate_rwlock); fitness = odp_flow_key_to_flow(key, key_len, flow); if (fitness == ODP_FIT_ERROR) { error = EINVAL; goto exit; } if (odp_in_port) { *odp_in_port = flow->in_port.odp_port; } xport = xport_lookup(tnl_port_should_receive(flow) ? tnl_port_receive(flow) : odp_port_to_ofport(backer, flow->in_port.odp_port)); flow->in_port.ofp_port = xport ? xport->ofp_port : OFPP_NONE; if (!xport) { goto exit; } if (vsp_adjust_flow(xport->xbridge->ofproto, flow)) { if (packet) { /* Make the packet resemble the flow, so that it gets sent to * an OpenFlow controller properly, so that it looks correct * for sFlow, and so that flow_extract() will get the correct * vlan_tci if it is called on 'packet'. * * The allocated space inside 'packet' probably also contains * 'key', that is, both 'packet' and 'key' are probably part of * a struct dpif_upcall (see the large comment on that * structure definition), so pushing data on 'packet' is in * general not a good idea since it could overwrite 'key' or * free it as a side effect. However, it's OK in this special * case because we know that 'packet' is inside a Netlink * attribute: pushing 4 bytes will just overwrite the 4-byte * "struct nlattr", which is fine since we don't need that * header anymore. */ eth_push_vlan(packet, flow->vlan_tci); } /* We can't reproduce 'key' from 'flow'. */ fitness = fitness == ODP_FIT_PERFECT ? ODP_FIT_TOO_MUCH : fitness; } error = 0; if (ofproto) { *ofproto = xport->xbridge->ofproto; } exit: if (fitnessp) { *fitnessp = fitness; } ovs_rwlock_unlock(&xlate_rwlock); return error; } static struct xbridge * xbridge_lookup(const struct ofproto_dpif *ofproto) { struct xbridge *xbridge; if (!ofproto) { return NULL; } HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, hash_pointer(ofproto, 0), &xbridges) { if (xbridge->ofproto == ofproto) { return xbridge; } } return NULL; } static struct xbundle * xbundle_lookup(const struct ofbundle *ofbundle) { struct xbundle *xbundle; if (!ofbundle) { return NULL; } HMAP_FOR_EACH_IN_BUCKET (xbundle, hmap_node, hash_pointer(ofbundle, 0), &xbundles) { if (xbundle->ofbundle == ofbundle) { return xbundle; } } return NULL; } static struct xport * xport_lookup(const struct ofport_dpif *ofport) { struct xport *xport; if (!ofport) { return NULL; } HMAP_FOR_EACH_IN_BUCKET (xport, hmap_node, hash_pointer(ofport, 0), &xports) { if (xport->ofport == ofport) { return xport; } } return NULL; } static struct stp_port * xport_get_stp_port(const struct xport *xport) { return xport->xbridge->stp && xport->stp_port_no != -1 ? stp_get_port(xport->xbridge->stp, xport->stp_port_no) : NULL; } static enum stp_state xport_stp_learn_state(const struct xport *xport) { struct stp_port *sp = xport_get_stp_port(xport); return stp_learn_in_state(sp ? stp_port_get_state(sp) : STP_DISABLED); } static bool xport_stp_forward_state(const struct xport *xport) { struct stp_port *sp = xport_get_stp_port(xport); return stp_forward_in_state(sp ? stp_port_get_state(sp) : STP_DISABLED); } /* Returns true if STP should process 'flow'. Sets fields in 'wc' that * were used to make the determination.*/ static bool stp_should_process_flow(const struct flow *flow, struct flow_wildcards *wc) { memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); return eth_addr_equals(flow->dl_dst, eth_addr_stp); } static void stp_process_packet(const struct xport *xport, const struct ofpbuf *packet) { struct stp_port *sp = xport_get_stp_port(xport); struct ofpbuf payload = *packet; struct eth_header *eth = payload.data; /* Sink packets on ports that have STP disabled when the bridge has * STP enabled. */ if (!sp || stp_port_get_state(sp) == STP_DISABLED) { return; } /* Trim off padding on payload. */ if (payload.size > ntohs(eth->eth_type) + ETH_HEADER_LEN) { payload.size = ntohs(eth->eth_type) + ETH_HEADER_LEN; } if (ofpbuf_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) { stp_received_bpdu(sp, payload.data, payload.size); } } static struct xport * get_ofp_port(const struct xbridge *xbridge, ofp_port_t ofp_port) { struct xport *xport; HMAP_FOR_EACH_IN_BUCKET (xport, ofp_node, hash_ofp_port(ofp_port), &xbridge->xports) { if (xport->ofp_port == ofp_port) { return xport; } } return NULL; } static odp_port_t ofp_port_to_odp_port(const struct xbridge *xbridge, ofp_port_t ofp_port) { const struct xport *xport = get_ofp_port(xbridge, ofp_port); return xport ? xport->odp_port : ODPP_NONE; } static bool xbundle_trunks_vlan(const struct xbundle *bundle, uint16_t vlan) { return (bundle->vlan_mode != PORT_VLAN_ACCESS && (!bundle->trunks || bitmap_is_set(bundle->trunks, vlan))); } static bool xbundle_includes_vlan(const struct xbundle *xbundle, uint16_t vlan) { return vlan == xbundle->vlan || xbundle_trunks_vlan(xbundle, vlan); } static mirror_mask_t xbundle_mirror_out(const struct xbridge *xbridge, struct xbundle *xbundle) { return xbundle != &ofpp_none_bundle ? mirror_bundle_out(xbridge->mbridge, xbundle->ofbundle) : 0; } static mirror_mask_t xbundle_mirror_src(const struct xbridge *xbridge, struct xbundle *xbundle) { return xbundle != &ofpp_none_bundle ? mirror_bundle_src(xbridge->mbridge, xbundle->ofbundle) : 0; } static mirror_mask_t xbundle_mirror_dst(const struct xbridge *xbridge, struct xbundle *xbundle) { return xbundle != &ofpp_none_bundle ? mirror_bundle_dst(xbridge->mbridge, xbundle->ofbundle) : 0; } static struct xbundle * lookup_input_bundle(const struct xbridge *xbridge, ofp_port_t in_port, bool warn, struct xport **in_xportp) { struct xport *xport; /* Find the port and bundle for the received packet. */ xport = get_ofp_port(xbridge, in_port); if (in_xportp) { *in_xportp = xport; } if (xport && xport->xbundle) { return xport->xbundle; } /* Special-case OFPP_NONE, which a controller may use as the ingress * port for traffic that it is sourcing. */ if (in_port == OFPP_NONE) { ofpp_none_bundle.name = "OFPP_NONE"; ofpp_none_bundle.vlan_mode = PORT_VLAN_TRUNK; return &ofpp_none_bundle; } /* Odd. A few possible reasons here: * * - We deleted a port but there are still a few packets queued up * from it. * * - Someone externally added a port (e.g. "ovs-dpctl add-if") that * we don't know about. * * - The ofproto client didn't configure the port as part of a bundle. * This is particularly likely to happen if a packet was received on the * port after it was created, but before the client had a chance to * configure its bundle. */ if (warn) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_WARN_RL(&rl, "bridge %s: received packet on unknown " "port %"PRIu16, xbridge->name, in_port); } return NULL; } static void add_mirror_actions(struct xlate_ctx *ctx, const struct flow *orig_flow) { const struct xbridge *xbridge = ctx->xbridge; mirror_mask_t mirrors; struct xbundle *in_xbundle; uint16_t vlan; uint16_t vid; mirrors = ctx->xout->mirrors; ctx->xout->mirrors = 0; in_xbundle = lookup_input_bundle(xbridge, orig_flow->in_port.ofp_port, ctx->xin->packet != NULL, NULL); if (!in_xbundle) { return; } mirrors |= xbundle_mirror_src(xbridge, in_xbundle); /* Drop frames on bundles reserved for mirroring. */ if (xbundle_mirror_out(xbridge, in_xbundle)) { if (ctx->xin->packet != NULL) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port " "%s, which is reserved exclusively for mirroring", ctx->xbridge->name, in_xbundle->name); } ofpbuf_clear(&ctx->xout->odp_actions); return; } /* Check VLAN. */ vid = vlan_tci_to_vid(orig_flow->vlan_tci); if (!input_vid_is_valid(vid, in_xbundle, ctx->xin->packet != NULL)) { return; } vlan = input_vid_to_vlan(in_xbundle, vid); if (!mirrors) { return; } /* Restore the original packet before adding the mirror actions. */ ctx->xin->flow = *orig_flow; while (mirrors) { mirror_mask_t dup_mirrors; struct ofbundle *out; unsigned long *vlans; bool vlan_mirrored; bool has_mirror; int out_vlan; has_mirror = mirror_get(xbridge->mbridge, mirror_mask_ffs(mirrors) - 1, &vlans, &dup_mirrors, &out, &out_vlan); ovs_assert(has_mirror); if (vlans) { ctx->xout->wc.masks.vlan_tci |= htons(VLAN_CFI | VLAN_VID_MASK); } vlan_mirrored = !vlans || bitmap_is_set(vlans, vlan); free(vlans); if (!vlan_mirrored) { mirrors = zero_rightmost_1bit(mirrors); continue; } mirrors &= ~dup_mirrors; ctx->xout->mirrors |= dup_mirrors; if (out) { struct xbundle *out_xbundle = xbundle_lookup(out); if (out_xbundle) { output_normal(ctx, out_xbundle, vlan); } } else if (vlan != out_vlan && !eth_addr_is_reserved(orig_flow->dl_dst)) { struct xbundle *xbundle; LIST_FOR_EACH (xbundle, list_node, &xbridge->xbundles) { if (xbundle_includes_vlan(xbundle, out_vlan) && !xbundle_mirror_out(xbridge, xbundle)) { output_normal(ctx, xbundle, out_vlan); } } } } } /* Given 'vid', the VID obtained from the 802.1Q header that was received as * part of a packet (specify 0 if there was no 802.1Q header), and 'in_xbundle', * the bundle on which the packet was received, returns the VLAN to which the * packet belongs. * * Both 'vid' and the return value are in the range 0...4095. */ static uint16_t input_vid_to_vlan(const struct xbundle *in_xbundle, uint16_t vid) { switch (in_xbundle->vlan_mode) { case PORT_VLAN_ACCESS: return in_xbundle->vlan; break; case PORT_VLAN_TRUNK: return vid; case PORT_VLAN_NATIVE_UNTAGGED: case PORT_VLAN_NATIVE_TAGGED: return vid ? vid : in_xbundle->vlan; default: NOT_REACHED(); } } /* Checks whether a packet with the given 'vid' may ingress on 'in_xbundle'. * If so, returns true. Otherwise, returns false and, if 'warn' is true, logs * a warning. * * 'vid' should be the VID obtained from the 802.1Q header that was received as * part of a packet (specify 0 if there was no 802.1Q header), in the range * 0...4095. */ static bool input_vid_is_valid(uint16_t vid, struct xbundle *in_xbundle, bool warn) { /* Allow any VID on the OFPP_NONE port. */ if (in_xbundle == &ofpp_none_bundle) { return true; } switch (in_xbundle->vlan_mode) { case PORT_VLAN_ACCESS: if (vid) { if (warn) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_WARN_RL(&rl, "dropping VLAN %"PRIu16" tagged " "packet received on port %s configured as VLAN " "%"PRIu16" access port", vid, in_xbundle->name, in_xbundle->vlan); } return false; } return true; case PORT_VLAN_NATIVE_UNTAGGED: case PORT_VLAN_NATIVE_TAGGED: if (!vid) { /* Port must always carry its native VLAN. */ return true; } /* Fall through. */ case PORT_VLAN_TRUNK: if (!xbundle_includes_vlan(in_xbundle, vid)) { if (warn) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_WARN_RL(&rl, "dropping VLAN %"PRIu16" packet " "received on port %s not configured for trunking " "VLAN %"PRIu16, vid, in_xbundle->name, vid); } return false; } return true; default: NOT_REACHED(); } } /* Given 'vlan', the VLAN that a packet belongs to, and * 'out_xbundle', a bundle on which the packet is to be output, returns the VID * that should be included in the 802.1Q header. (If the return value is 0, * then the 802.1Q header should only be included in the packet if there is a * nonzero PCP.) * * Both 'vlan' and the return value are in the range 0...4095. */ static uint16_t output_vlan_to_vid(const struct xbundle *out_xbundle, uint16_t vlan) { switch (out_xbundle->vlan_mode) { case PORT_VLAN_ACCESS: return 0; case PORT_VLAN_TRUNK: case PORT_VLAN_NATIVE_TAGGED: return vlan; case PORT_VLAN_NATIVE_UNTAGGED: return vlan == out_xbundle->vlan ? 0 : vlan; default: NOT_REACHED(); } } static void output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle, uint16_t vlan) { ovs_be16 *flow_tci = &ctx->xin->flow.vlan_tci; uint16_t vid; ovs_be16 tci, old_tci; struct xport *xport; vid = output_vlan_to_vid(out_xbundle, vlan); if (list_is_empty(&out_xbundle->xports)) { /* Partially configured bundle with no slaves. Drop the packet. */ return; } else if (!out_xbundle->bond) { xport = CONTAINER_OF(list_front(&out_xbundle->xports), struct xport, bundle_node); } else { struct ofport_dpif *ofport; ofport = bond_choose_output_slave(out_xbundle->bond, &ctx->xin->flow, &ctx->xout->wc, vid); xport = xport_lookup(ofport); if (!xport) { /* No slaves enabled, so drop packet. */ return; } } old_tci = *flow_tci; tci = htons(vid); if (tci || out_xbundle->use_priority_tags) { tci |= *flow_tci & htons(VLAN_PCP_MASK); if (tci) { tci |= htons(VLAN_CFI); } } *flow_tci = tci; compose_output_action(ctx, xport->ofp_port); *flow_tci = old_tci; } /* A VM broadcasts a gratuitous ARP to indicate that it has resumed after * migration. Older Citrix-patched Linux DomU used gratuitous ARP replies to * indicate this; newer upstream kernels use gratuitous ARP requests. */ static bool is_gratuitous_arp(const struct flow *flow, struct flow_wildcards *wc) { if (flow->dl_type != htons(ETH_TYPE_ARP)) { return false; } memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); if (!eth_addr_is_broadcast(flow->dl_dst)) { return false; } memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); if (flow->nw_proto == ARP_OP_REPLY) { return true; } else if (flow->nw_proto == ARP_OP_REQUEST) { memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src); memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst); return flow->nw_src == flow->nw_dst; } else { return false; } } /* Checks whether a MAC learning update is necessary for MAC learning table * 'ml' given that a packet matching 'flow' was received on 'in_xbundle' in * 'vlan'. * * Most packets processed through the MAC learning table do not actually * change it in any way. This function requires only a read lock on the MAC * learning table, so it is much cheaper in this common case. * * Keep the code here synchronized with that in update_learning_table__() * below. */ static bool is_mac_learning_update_needed(const struct mac_learning *ml, const struct flow *flow, struct flow_wildcards *wc, int vlan, struct xbundle *in_xbundle) OVS_REQ_RDLOCK(ml->rwlock) { struct mac_entry *mac; if (!mac_learning_may_learn(ml, flow->dl_src, vlan)) { return false; } mac = mac_learning_lookup(ml, flow->dl_src, vlan); if (!mac || mac_entry_age(ml, mac)) { return true; } if (is_gratuitous_arp(flow, wc)) { /* We don't want to learn from gratuitous ARP packets that are * reflected back over bond slaves so we lock the learning table. */ if (!in_xbundle->bond) { return true; } else if (mac_entry_is_grat_arp_locked(mac)) { return false; } } return mac->port.p != in_xbundle->ofbundle; } /* Updates MAC learning table 'ml' given that a packet matching 'flow' was * received on 'in_xbundle' in 'vlan'. * * This code repeats all the checks in is_mac_learning_update_needed() because * the lock was released between there and here and thus the MAC learning state * could have changed. * * Keep the code here synchronized with that in is_mac_learning_update_needed() * above. */ static void update_learning_table__(const struct xbridge *xbridge, const struct flow *flow, struct flow_wildcards *wc, int vlan, struct xbundle *in_xbundle) OVS_REQ_WRLOCK(xbridge->ml->rwlock) { struct mac_entry *mac; if (!mac_learning_may_learn(xbridge->ml, flow->dl_src, vlan)) { return; } mac = mac_learning_insert(xbridge->ml, flow->dl_src, vlan); if (is_gratuitous_arp(flow, wc)) { /* We don't want to learn from gratuitous ARP packets that are * reflected back over bond slaves so we lock the learning table. */ if (!in_xbundle->bond) { mac_entry_set_grat_arp_lock(mac); } else if (mac_entry_is_grat_arp_locked(mac)) { return; } } if (mac->port.p != in_xbundle->ofbundle) { /* The log messages here could actually be useful in debugging, * so keep the rate limit relatively high. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300); VLOG_DBG_RL(&rl, "bridge %s: learned that "ETH_ADDR_FMT" is " "on port %s in VLAN %d", xbridge->name, ETH_ADDR_ARGS(flow->dl_src), in_xbundle->name, vlan); mac->port.p = in_xbundle->ofbundle; mac_learning_changed(xbridge->ml); } } static void update_learning_table(const struct xbridge *xbridge, const struct flow *flow, struct flow_wildcards *wc, int vlan, struct xbundle *in_xbundle) { bool need_update; /* Don't learn the OFPP_NONE port. */ if (in_xbundle == &ofpp_none_bundle) { return; } /* First try the common case: no change to MAC learning table. */ ovs_rwlock_rdlock(&xbridge->ml->rwlock); need_update = is_mac_learning_update_needed(xbridge->ml, flow, wc, vlan, in_xbundle); ovs_rwlock_unlock(&xbridge->ml->rwlock); if (need_update) { /* Slow path: MAC learning table might need an update. */ ovs_rwlock_wrlock(&xbridge->ml->rwlock); update_learning_table__(xbridge, flow, wc, vlan, in_xbundle); ovs_rwlock_unlock(&xbridge->ml->rwlock); } } /* Determines whether packets in 'flow' within 'xbridge' should be forwarded or * dropped. Returns true if they may be forwarded, false if they should be * dropped. * * 'in_port' must be the xport that corresponds to flow->in_port. * 'in_port' must be part of a bundle (e.g. in_port->bundle must be nonnull). * * 'vlan' must be the VLAN that corresponds to flow->vlan_tci on 'in_port', as * returned by input_vid_to_vlan(). It must be a valid VLAN for 'in_port', as * checked by input_vid_is_valid(). * * May also add tags to '*tags', although the current implementation only does * so in one special case. */ static bool is_admissible(struct xlate_ctx *ctx, struct xport *in_port, uint16_t vlan) { struct xbundle *in_xbundle = in_port->xbundle; const struct xbridge *xbridge = ctx->xbridge; struct flow *flow = &ctx->xin->flow; /* Drop frames for reserved multicast addresses * only if forward_bpdu option is absent. */ if (!xbridge->forward_bpdu && eth_addr_is_reserved(flow->dl_dst)) { xlate_report(ctx, "packet has reserved destination MAC, dropping"); return false; } if (in_xbundle->bond) { struct mac_entry *mac; switch (bond_check_admissibility(in_xbundle->bond, in_port->ofport, flow->dl_dst)) { case BV_ACCEPT: break; case BV_DROP: xlate_report(ctx, "bonding refused admissibility, dropping"); return false; case BV_DROP_IF_MOVED: ovs_rwlock_rdlock(&xbridge->ml->rwlock); mac = mac_learning_lookup(xbridge->ml, flow->dl_src, vlan); if (mac && mac->port.p != in_xbundle->ofbundle && (!is_gratuitous_arp(flow, &ctx->xout->wc) || mac_entry_is_grat_arp_locked(mac))) { ovs_rwlock_unlock(&xbridge->ml->rwlock); xlate_report(ctx, "SLB bond thinks this packet looped back, " "dropping"); return false; } ovs_rwlock_unlock(&xbridge->ml->rwlock); break; } } return true; } static void xlate_normal(struct xlate_ctx *ctx) { struct flow_wildcards *wc = &ctx->xout->wc; struct flow *flow = &ctx->xin->flow; struct xbundle *in_xbundle; struct xport *in_port; struct mac_entry *mac; void *mac_port; uint16_t vlan; uint16_t vid; ctx->xout->has_normal = true; memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src); memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI); in_xbundle = lookup_input_bundle(ctx->xbridge, flow->in_port.ofp_port, ctx->xin->packet != NULL, &in_port); if (!in_xbundle) { xlate_report(ctx, "no input bundle, dropping"); return; } /* Drop malformed frames. */ if (flow->dl_type == htons(ETH_TYPE_VLAN) && !(flow->vlan_tci & htons(VLAN_CFI))) { if (ctx->xin->packet != NULL) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_WARN_RL(&rl, "bridge %s: dropping packet with partial " "VLAN tag received on port %s", ctx->xbridge->name, in_xbundle->name); } xlate_report(ctx, "partial VLAN tag, dropping"); return; } /* Drop frames on bundles reserved for mirroring. */ if (xbundle_mirror_out(ctx->xbridge, in_xbundle)) { if (ctx->xin->packet != NULL) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port " "%s, which is reserved exclusively for mirroring", ctx->xbridge->name, in_xbundle->name); } xlate_report(ctx, "input port is mirror output port, dropping"); return; } /* Check VLAN. */ vid = vlan_tci_to_vid(flow->vlan_tci); if (!input_vid_is_valid(vid, in_xbundle, ctx->xin->packet != NULL)) { xlate_report(ctx, "disallowed VLAN VID for this input port, dropping"); return; } vlan = input_vid_to_vlan(in_xbundle, vid); /* Check other admissibility requirements. */ if (in_port && !is_admissible(ctx, in_port, vlan)) { return; } /* Learn source MAC. */ if (ctx->xin->may_learn) { update_learning_table(ctx->xbridge, flow, wc, vlan, in_xbundle); } /* Determine output bundle. */ ovs_rwlock_rdlock(&ctx->xbridge->ml->rwlock); mac = mac_learning_lookup(ctx->xbridge->ml, flow->dl_dst, vlan); mac_port = mac ? mac->port.p : NULL; ovs_rwlock_unlock(&ctx->xbridge->ml->rwlock); if (mac_port) { struct xbundle *mac_xbundle = xbundle_lookup(mac_port); if (mac_xbundle && mac_xbundle != in_xbundle) { xlate_report(ctx, "forwarding to learned port"); output_normal(ctx, mac_xbundle, vlan); } else if (!mac_xbundle) { xlate_report(ctx, "learned port is unknown, dropping"); } else { xlate_report(ctx, "learned port is input port, dropping"); } } else { struct xbundle *xbundle; xlate_report(ctx, "no learned MAC for destination, flooding"); LIST_FOR_EACH (xbundle, list_node, &ctx->xbridge->xbundles) { if (xbundle != in_xbundle && xbundle_includes_vlan(xbundle, vlan) && xbundle->floodable && !xbundle_mirror_out(ctx->xbridge, xbundle)) { output_normal(ctx, xbundle, vlan); } } ctx->xout->nf_output_iface = NF_OUT_FLOOD; } } /* Compose SAMPLE action for sFlow or IPFIX. The given probability is * the number of packets out of UINT32_MAX to sample. The given * cookie is passed back in the callback for each sampled packet. */ static size_t compose_sample_action(const struct xbridge *xbridge, struct ofpbuf *odp_actions, const struct flow *flow, const uint32_t probability, const union user_action_cookie *cookie, const size_t cookie_size) { size_t sample_offset, actions_offset; odp_port_t odp_port; int cookie_offset; uint32_t pid; sample_offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_SAMPLE); nl_msg_put_u32(odp_actions, OVS_SAMPLE_ATTR_PROBABILITY, probability); actions_offset = nl_msg_start_nested(odp_actions, OVS_SAMPLE_ATTR_ACTIONS); odp_port = ofp_port_to_odp_port(xbridge, flow->in_port.ofp_port); pid = dpif_port_get_pid(xbridge->dpif, odp_port); cookie_offset = odp_put_userspace_action(pid, cookie, cookie_size, odp_actions); nl_msg_end_nested(odp_actions, actions_offset); nl_msg_end_nested(odp_actions, sample_offset); return cookie_offset; } static void compose_sflow_cookie(const struct xbridge *xbridge, ovs_be16 vlan_tci, odp_port_t odp_port, unsigned int n_outputs, union user_action_cookie *cookie) { int ifindex; cookie->type = USER_ACTION_COOKIE_SFLOW; cookie->sflow.vlan_tci = vlan_tci; /* See http://www.sflow.org/sflow_version_5.txt (search for "Input/output * port information") for the interpretation of cookie->output. */ switch (n_outputs) { case 0: /* 0x40000000 | 256 means "packet dropped for unknown reason". */ cookie->sflow.output = 0x40000000 | 256; break; case 1: ifindex = dpif_sflow_odp_port_to_ifindex(xbridge->sflow, odp_port); if (ifindex) { cookie->sflow.output = ifindex; break; } /* Fall through. */ default: /* 0x80000000 means "multiple output ports. */ cookie->sflow.output = 0x80000000 | n_outputs; break; } } /* Compose SAMPLE action for sFlow bridge sampling. */ static size_t compose_sflow_action(const struct xbridge *xbridge, struct ofpbuf *odp_actions, const struct flow *flow, odp_port_t odp_port) { uint32_t probability; union user_action_cookie cookie; if (!xbridge->sflow || flow->in_port.ofp_port == OFPP_NONE) { return 0; } probability = dpif_sflow_get_probability(xbridge->sflow); compose_sflow_cookie(xbridge, htons(0), odp_port, odp_port == ODPP_NONE ? 0 : 1, &cookie); return compose_sample_action(xbridge, odp_actions, flow, probability, &cookie, sizeof cookie.sflow); } static void compose_flow_sample_cookie(uint16_t probability, uint32_t collector_set_id, uint32_t obs_domain_id, uint32_t obs_point_id, union user_action_cookie *cookie) { cookie->type = USER_ACTION_COOKIE_FLOW_SAMPLE; cookie->flow_sample.probability = probability; cookie->flow_sample.collector_set_id = collector_set_id; cookie->flow_sample.obs_domain_id = obs_domain_id; cookie->flow_sample.obs_point_id = obs_point_id; } static void compose_ipfix_cookie(union user_action_cookie *cookie) { cookie->type = USER_ACTION_COOKIE_IPFIX; } /* Compose SAMPLE action for IPFIX bridge sampling. */ static void compose_ipfix_action(const struct xbridge *xbridge, struct ofpbuf *odp_actions, const struct flow *flow) { uint32_t probability; union user_action_cookie cookie; if (!xbridge->ipfix || flow->in_port.ofp_port == OFPP_NONE) { return; } probability = dpif_ipfix_get_bridge_exporter_probability(xbridge->ipfix); compose_ipfix_cookie(&cookie); compose_sample_action(xbridge, odp_actions, flow, probability, &cookie, sizeof cookie.ipfix); } /* SAMPLE action for sFlow must be first action in any given list of * actions. At this point we do not have all information required to * build it. So try to build sample action as complete as possible. */ static void add_sflow_action(struct xlate_ctx *ctx) { ctx->user_cookie_offset = compose_sflow_action(ctx->xbridge, &ctx->xout->odp_actions, &ctx->xin->flow, ODPP_NONE); ctx->sflow_odp_port = 0; ctx->sflow_n_outputs = 0; } /* SAMPLE action for IPFIX must be 1st or 2nd action in any given list * of actions, eventually after the SAMPLE action for sFlow. */ static void add_ipfix_action(struct xlate_ctx *ctx) { compose_ipfix_action(ctx->xbridge, &ctx->xout->odp_actions, &ctx->xin->flow); } /* Fix SAMPLE action according to data collected while composing ODP actions. * We need to fix SAMPLE actions OVS_SAMPLE_ATTR_ACTIONS attribute, i.e. nested * USERSPACE action's user-cookie which is required for sflow. */ static void fix_sflow_action(struct xlate_ctx *ctx) { const struct flow *base = &ctx->base_flow; union user_action_cookie *cookie; if (!ctx->user_cookie_offset) { return; } cookie = ofpbuf_at(&ctx->xout->odp_actions, ctx->user_cookie_offset, sizeof cookie->sflow); ovs_assert(cookie->type == USER_ACTION_COOKIE_SFLOW); compose_sflow_cookie(ctx->xbridge, base->vlan_tci, ctx->sflow_odp_port, ctx->sflow_n_outputs, cookie); } static enum slow_path_reason process_special(struct xlate_ctx *ctx, const struct flow *flow, const struct xport *xport, const struct ofpbuf *packet) { struct flow_wildcards *wc = &ctx->xout->wc; const struct xbridge *xbridge = ctx->xbridge; if (!xport) { return 0; } else if (xport->cfm && cfm_should_process_flow(xport->cfm, flow, wc)) { if (packet) { cfm_process_heartbeat(xport->cfm, packet); } return SLOW_CFM; } else if (xport->bfd && bfd_should_process_flow(xport->bfd, flow, wc)) { if (packet) { bfd_process_packet(xport->bfd, flow, packet); } return SLOW_BFD; } else if (xport->xbundle && xport->xbundle->lacp && flow->dl_type == htons(ETH_TYPE_LACP)) { if (packet) { lacp_process_packet(xport->xbundle->lacp, xport->ofport, packet); } return SLOW_LACP; } else if (xbridge->stp && stp_should_process_flow(flow, wc)) { if (packet) { stp_process_packet(xport, packet); } return SLOW_STP; } else { return 0; } } static void compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, bool check_stp) { const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port); struct flow_wildcards *wc = &ctx->xout->wc; struct flow *flow = &ctx->xin->flow; ovs_be16 flow_vlan_tci; uint32_t flow_pkt_mark; uint8_t flow_nw_tos; odp_port_t out_port, odp_port; uint8_t dscp; /* If 'struct flow' gets additional metadata, we'll need to zero it out * before traversing a patch port. */ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 20); if (!xport) { xlate_report(ctx, "Nonexistent output port"); return; } else if (xport->config & OFPUTIL_PC_NO_FWD) { xlate_report(ctx, "OFPPC_NO_FWD set, skipping output"); return; } else if (check_stp && !xport_stp_forward_state(xport)) { xlate_report(ctx, "STP not in forwarding state, skipping output"); return; } if (mbridge_has_mirrors(ctx->xbridge->mbridge) && xport->xbundle) { ctx->xout->mirrors |= xbundle_mirror_dst(xport->xbundle->xbridge, xport->xbundle); } if (xport->peer) { const struct xport *peer = xport->peer; struct flow old_flow = ctx->xin->flow; enum slow_path_reason special; ctx->xbridge = peer->xbridge; flow->in_port.ofp_port = peer->ofp_port; flow->metadata = htonll(0); memset(&flow->tunnel, 0, sizeof flow->tunnel); memset(flow->regs, 0, sizeof flow->regs); special = process_special(ctx, &ctx->xin->flow, peer, ctx->xin->packet); if (special) { ctx->xout->slow = special; } else if (may_receive(peer, ctx)) { if (xport_stp_forward_state(peer)) { xlate_table_action(ctx, flow->in_port.ofp_port, 0, true); } else { /* Forwarding is disabled by STP. Let OFPP_NORMAL and the * learning action look at the packet, then drop it. */ struct flow old_base_flow = ctx->base_flow; size_t old_size = ctx->xout->odp_actions.size; mirror_mask_t old_mirrors = ctx->xout->mirrors; xlate_table_action(ctx, flow->in_port.ofp_port, 0, true); ctx->xout->mirrors = old_mirrors; ctx->base_flow = old_base_flow; ctx->xout->odp_actions.size = old_size; } } ctx->xin->flow = old_flow; ctx->xbridge = xport->xbridge; if (ctx->xin->resubmit_stats) { netdev_vport_inc_tx(xport->netdev, ctx->xin->resubmit_stats); netdev_vport_inc_rx(peer->netdev, ctx->xin->resubmit_stats); } return; } flow_vlan_tci = flow->vlan_tci; flow_pkt_mark = flow->pkt_mark; flow_nw_tos = flow->nw_tos; if (dscp_from_skb_priority(xport, flow->skb_priority, &dscp)) { wc->masks.nw_tos |= IP_ECN_MASK; flow->nw_tos &= ~IP_DSCP_MASK; flow->nw_tos |= dscp; } if (xport->is_tunnel) { /* Save tunnel metadata so that changes made due to * the Logical (tunnel) Port are not visible for any further * matches, while explicit set actions on tunnel metadata are. */ struct flow_tnl flow_tnl = flow->tunnel; odp_port = tnl_port_send(xport->ofport, flow, &ctx->xout->wc); if (odp_port == ODPP_NONE) { xlate_report(ctx, "Tunneling decided against output"); goto out; /* restore flow_nw_tos */ } if (flow->tunnel.ip_dst == ctx->orig_tunnel_ip_dst) { xlate_report(ctx, "Not tunneling to our own address"); goto out; /* restore flow_nw_tos */ } if (ctx->xin->resubmit_stats) { netdev_vport_inc_tx(xport->netdev, ctx->xin->resubmit_stats); } out_port = odp_port; commit_odp_tunnel_action(flow, &ctx->base_flow, &ctx->xout->odp_actions); flow->tunnel = flow_tnl; /* Restore tunnel metadata */ } else { ofp_port_t vlandev_port; odp_port = xport->odp_port; if (ofproto_has_vlan_splinters(ctx->xbridge->ofproto)) { wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI); } vlandev_port = vsp_realdev_to_vlandev(ctx->xbridge->ofproto, ofp_port, flow->vlan_tci); if (vlandev_port == ofp_port) { out_port = odp_port; } else { out_port = ofp_port_to_odp_port(ctx->xbridge, vlandev_port); flow->vlan_tci = htons(0); } } if (out_port != ODPP_NONE) { commit_odp_actions(flow, &ctx->base_flow, &ctx->xout->odp_actions, &ctx->xout->wc); nl_msg_put_odp_port(&ctx->xout->odp_actions, OVS_ACTION_ATTR_OUTPUT, out_port); ctx->sflow_odp_port = odp_port; ctx->sflow_n_outputs++; ctx->xout->nf_output_iface = ofp_port; } out: /* Restore flow */ flow->vlan_tci = flow_vlan_tci; flow->pkt_mark = flow_pkt_mark; flow->nw_tos = flow_nw_tos; } static void compose_output_action(struct xlate_ctx *ctx, ofp_port_t ofp_port) { compose_output_action__(ctx, ofp_port, true); } static void xlate_recursively(struct xlate_ctx *ctx, struct rule_dpif *rule) { struct rule_dpif *old_rule = ctx->rule; struct rule_actions *actions; if (ctx->xin->resubmit_stats) { rule_dpif_credit_stats(rule, ctx->xin->resubmit_stats); } ctx->recurse++; ctx->rule = rule; actions = rule_dpif_get_actions(rule); do_xlate_actions(actions->ofpacts, actions->ofpacts_len, ctx); rule_actions_unref(actions); ctx->rule = old_rule; ctx->recurse--; } static void xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id, bool may_packet_in) { if (ctx->recurse < MAX_RESUBMIT_RECURSION) { struct rule_dpif *rule; ofp_port_t old_in_port = ctx->xin->flow.in_port.ofp_port; uint8_t old_table_id = ctx->table_id; ctx->table_id = table_id; /* Look up a flow with 'in_port' as the input port. Then restore the * original input port (otherwise OFPP_NORMAL and OFPP_IN_PORT will * have surprising behavior). */ ctx->xin->flow.in_port.ofp_port = in_port; rule_dpif_lookup_in_table(ctx->xbridge->ofproto, &ctx->xin->flow, &ctx->xout->wc, table_id, &rule); ctx->xin->flow.in_port.ofp_port = old_in_port; if (ctx->xin->resubmit_hook) { ctx->xin->resubmit_hook(ctx->xin, rule, ctx->recurse); } if (!rule && may_packet_in) { struct xport *xport; /* XXX * check if table configuration flags * OFPTC_TABLE_MISS_CONTROLLER, default. * OFPTC_TABLE_MISS_CONTINUE, * OFPTC_TABLE_MISS_DROP * When OF1.0, OFPTC_TABLE_MISS_CONTINUE is used. What to do? */ xport = get_ofp_port(ctx->xbridge, ctx->xin->flow.in_port.ofp_port); choose_miss_rule(xport ? xport->config : 0, ctx->xbridge->miss_rule, ctx->xbridge->no_packet_in_rule, &rule); } if (rule) { xlate_recursively(ctx, rule); rule_dpif_unref(rule); } ctx->table_id = old_table_id; } else { static struct vlog_rate_limit recurse_rl = VLOG_RATE_LIMIT_INIT(1, 1); VLOG_ERR_RL(&recurse_rl, "resubmit actions recursed over %d times", MAX_RESUBMIT_RECURSION); } } static void xlate_ofpact_resubmit(struct xlate_ctx *ctx, const struct ofpact_resubmit *resubmit) { ofp_port_t in_port; uint8_t table_id; in_port = resubmit->in_port; if (in_port == OFPP_IN_PORT) { in_port = ctx->xin->flow.in_port.ofp_port; } table_id = resubmit->table_id; if (table_id == 255) { table_id = ctx->table_id; } xlate_table_action(ctx, in_port, table_id, false); } static void flood_packets(struct xlate_ctx *ctx, bool all) { const struct xport *xport; HMAP_FOR_EACH (xport, ofp_node, &ctx->xbridge->xports) { if (xport->ofp_port == ctx->xin->flow.in_port.ofp_port) { continue; } if (all) { compose_output_action__(ctx, xport->ofp_port, false); } else if (!(xport->config & OFPUTIL_PC_NO_FLOOD)) { compose_output_action(ctx, xport->ofp_port); } } ctx->xout->nf_output_iface = NF_OUT_FLOOD; } static void execute_controller_action(struct xlate_ctx *ctx, int len, enum ofp_packet_in_reason reason, uint16_t controller_id) { struct ofputil_packet_in *pin; struct ofpbuf *packet; struct flow key; ovs_assert(!ctx->xout->slow || ctx->xout->slow == SLOW_CONTROLLER); ctx->xout->slow = SLOW_CONTROLLER; if (!ctx->xin->packet) { return; } packet = ofpbuf_clone(ctx->xin->packet); key.skb_priority = 0; key.pkt_mark = 0; memset(&key.tunnel, 0, sizeof key.tunnel); commit_odp_actions(&ctx->xin->flow, &ctx->base_flow, &ctx->xout->odp_actions, &ctx->xout->wc); odp_execute_actions(NULL, packet, &key, ctx->xout->odp_actions.data, ctx->xout->odp_actions.size, NULL, NULL); pin = xmalloc(sizeof *pin); pin->packet_len = packet->size; pin->packet = ofpbuf_steal_data(packet); pin->reason = reason; pin->controller_id = controller_id; pin->table_id = ctx->table_id; pin->cookie = ctx->rule ? rule_dpif_get_flow_cookie(ctx->rule) : 0; pin->send_len = len; flow_get_metadata(&ctx->xin->flow, &pin->fmd); ofproto_dpif_send_packet_in(ctx->xbridge->ofproto, pin); ofpbuf_delete(packet); } static void compose_mpls_push_action(struct xlate_ctx *ctx, ovs_be16 eth_type) { struct flow_wildcards *wc = &ctx->xout->wc; struct flow *flow = &ctx->xin->flow; ovs_assert(eth_type_mpls(eth_type)); memset(&wc->masks.mpls_lse, 0xff, sizeof wc->masks.mpls_lse); memset(&wc->masks.mpls_depth, 0xff, sizeof wc->masks.mpls_depth); if (flow->mpls_depth) { flow->mpls_lse &= ~htonl(MPLS_BOS_MASK); flow->mpls_depth++; } else { ovs_be32 label; uint8_t tc, ttl; if (flow->dl_type == htons(ETH_TYPE_IPV6)) { label = htonl(0x2); /* IPV6 Explicit Null. */ } else { label = htonl(0x0); /* IPV4 Explicit Null. */ } wc->masks.nw_tos |= IP_DSCP_MASK; wc->masks.nw_ttl = 0xff; tc = (flow->nw_tos & IP_DSCP_MASK) >> 2; ttl = flow->nw_ttl ? flow->nw_ttl : 0x40; flow->mpls_lse = set_mpls_lse_values(ttl, tc, 1, label); flow->mpls_depth = 1; } flow->dl_type = eth_type; } static void compose_mpls_pop_action(struct xlate_ctx *ctx, ovs_be16 eth_type) { struct flow_wildcards *wc = &ctx->xout->wc; struct flow *flow = &ctx->xin->flow; ovs_assert(eth_type_mpls(ctx->xin->flow.dl_type)); ovs_assert(!eth_type_mpls(eth_type)); memset(&wc->masks.mpls_lse, 0xff, sizeof wc->masks.mpls_lse); memset(&wc->masks.mpls_depth, 0xff, sizeof wc->masks.mpls_depth); if (flow->mpls_depth) { flow->mpls_depth--; flow->mpls_lse = htonl(0); if (!flow->mpls_depth) { flow->dl_type = eth_type; } } } static bool compose_dec_ttl(struct xlate_ctx *ctx, struct ofpact_cnt_ids *ids) { struct flow *flow = &ctx->xin->flow; if (!is_ip_any(flow)) { return false; } ctx->xout->wc.masks.nw_ttl = 0xff; if (flow->nw_ttl > 1) { flow->nw_ttl--; return false; } else { size_t i; for (i = 0; i < ids->n_controllers; i++) { execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, ids->cnt_ids[i]); } /* Stop processing for current table. */ return true; } } static bool compose_set_mpls_ttl_action(struct xlate_ctx *ctx, uint8_t ttl) { if (!eth_type_mpls(ctx->xin->flow.dl_type)) { return true; } ctx->xout->wc.masks.mpls_lse |= htonl(MPLS_TTL_MASK); set_mpls_lse_ttl(&ctx->xin->flow.mpls_lse, ttl); return false; } static bool compose_dec_mpls_ttl_action(struct xlate_ctx *ctx) { struct flow *flow = &ctx->xin->flow; uint8_t ttl = mpls_lse_to_ttl(flow->mpls_lse); struct flow_wildcards *wc = &ctx->xout->wc; memset(&wc->masks.mpls_lse, 0xff, sizeof wc->masks.mpls_lse); if (!eth_type_mpls(flow->dl_type)) { return false; } if (ttl > 1) { ttl--; set_mpls_lse_ttl(&flow->mpls_lse, ttl); return false; } else { execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, 0); /* Stop processing for current table. */ return true; } } static void xlate_output_action(struct xlate_ctx *ctx, ofp_port_t port, uint16_t max_len, bool may_packet_in) { ofp_port_t prev_nf_output_iface = ctx->xout->nf_output_iface; ctx->xout->nf_output_iface = NF_OUT_DROP; switch (port) { case OFPP_IN_PORT: compose_output_action(ctx, ctx->xin->flow.in_port.ofp_port); break; case OFPP_TABLE: xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port, 0, may_packet_in); break; case OFPP_NORMAL: xlate_normal(ctx); break; case OFPP_FLOOD: flood_packets(ctx, false); break; case OFPP_ALL: flood_packets(ctx, true); break; case OFPP_CONTROLLER: execute_controller_action(ctx, max_len, OFPR_ACTION, 0); break; case OFPP_NONE: break; case OFPP_LOCAL: default: if (port != ctx->xin->flow.in_port.ofp_port) { compose_output_action(ctx, port); } else { xlate_report(ctx, "skipping output to input port"); } break; } if (prev_nf_output_iface == NF_OUT_FLOOD) { ctx->xout->nf_output_iface = NF_OUT_FLOOD; } else if (ctx->xout->nf_output_iface == NF_OUT_DROP) { ctx->xout->nf_output_iface = prev_nf_output_iface; } else if (prev_nf_output_iface != NF_OUT_DROP && ctx->xout->nf_output_iface != NF_OUT_FLOOD) { ctx->xout->nf_output_iface = NF_OUT_MULTI; } } static void xlate_output_reg_action(struct xlate_ctx *ctx, const struct ofpact_output_reg *or) { uint64_t port = mf_get_subfield(&or->src, &ctx->xin->flow); if (port <= UINT16_MAX) { union mf_subvalue value; memset(&value, 0xff, sizeof value); mf_write_subfield_flow(&or->src, &value, &ctx->xout->wc.masks); xlate_output_action(ctx, u16_to_ofp(port), or->max_len, false); } } static void xlate_enqueue_action(struct xlate_ctx *ctx, const struct ofpact_enqueue *enqueue) { ofp_port_t ofp_port = enqueue->port; uint32_t queue_id = enqueue->queue; uint32_t flow_priority, priority; int error; /* Translate queue to priority. */ error = dpif_queue_to_priority(ctx->xbridge->dpif, queue_id, &priority); if (error) { /* Fall back to ordinary output action. */ xlate_output_action(ctx, enqueue->port, 0, false); return; } /* Check output port. */ if (ofp_port == OFPP_IN_PORT) { ofp_port = ctx->xin->flow.in_port.ofp_port; } else if (ofp_port == ctx->xin->flow.in_port.ofp_port) { return; } /* Add datapath actions. */ flow_priority = ctx->xin->flow.skb_priority; ctx->xin->flow.skb_priority = priority; compose_output_action(ctx, ofp_port); ctx->xin->flow.skb_priority = flow_priority; /* Update NetFlow output port. */ if (ctx->xout->nf_output_iface == NF_OUT_DROP) { ctx->xout->nf_output_iface = ofp_port; } else if (ctx->xout->nf_output_iface != NF_OUT_FLOOD) { ctx->xout->nf_output_iface = NF_OUT_MULTI; } } static void xlate_set_queue_action(struct xlate_ctx *ctx, uint32_t queue_id) { uint32_t skb_priority; if (!dpif_queue_to_priority(ctx->xbridge->dpif, queue_id, &skb_priority)) { ctx->xin->flow.skb_priority = skb_priority; } else { /* Couldn't translate queue to a priority. Nothing to do. A warning * has already been logged. */ } } static bool slave_enabled_cb(ofp_port_t ofp_port, void *xbridge_) { const struct xbridge *xbridge = xbridge_; struct xport *port; switch (ofp_port) { case OFPP_IN_PORT: case OFPP_TABLE: case OFPP_NORMAL: case OFPP_FLOOD: case OFPP_ALL: case OFPP_NONE: return true; case OFPP_CONTROLLER: /* Not supported by the bundle action. */ return false; default: port = get_ofp_port(xbridge, ofp_port); return port ? port->may_enable : false; } } static void xlate_bundle_action(struct xlate_ctx *ctx, const struct ofpact_bundle *bundle) { ofp_port_t port; port = bundle_execute(bundle, &ctx->xin->flow, &ctx->xout->wc, slave_enabled_cb, CONST_CAST(struct xbridge *, ctx->xbridge)); if (bundle->dst.field) { nxm_reg_load(&bundle->dst, ofp_to_u16(port), &ctx->xin->flow, &ctx->xout->wc); } else { xlate_output_action(ctx, port, 0, false); } } static void xlate_learn_action(struct xlate_ctx *ctx, const struct ofpact_learn *learn) { uint64_t ofpacts_stub[1024 / 8]; struct ofputil_flow_mod fm; struct ofpbuf ofpacts; ctx->xout->has_learn = true; learn_mask(learn, &ctx->xout->wc); if (!ctx->xin->may_learn) { return; } ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); learn_execute(learn, &ctx->xin->flow, &fm, &ofpacts); ofproto_dpif_flow_mod(ctx->xbridge->ofproto, &fm); ofpbuf_uninit(&ofpacts); } static void xlate_fin_timeout(struct xlate_ctx *ctx, const struct ofpact_fin_timeout *oft) { if (ctx->xin->tcp_flags & (TCP_FIN | TCP_RST) && ctx->rule) { rule_dpif_reduce_timeouts(ctx->rule, oft->fin_idle_timeout, oft->fin_hard_timeout); } } static void xlate_sample_action(struct xlate_ctx *ctx, const struct ofpact_sample *os) { union user_action_cookie cookie; /* Scale the probability from 16-bit to 32-bit while representing * the same percentage. */ uint32_t probability = (os->probability << 16) | os->probability; commit_odp_actions(&ctx->xin->flow, &ctx->base_flow, &ctx->xout->odp_actions, &ctx->xout->wc); compose_flow_sample_cookie(os->probability, os->collector_set_id, os->obs_domain_id, os->obs_point_id, &cookie); compose_sample_action(ctx->xbridge, &ctx->xout->odp_actions, &ctx->xin->flow, probability, &cookie, sizeof cookie.flow_sample); } static bool may_receive(const struct xport *xport, struct xlate_ctx *ctx) { if (xport->config & (eth_addr_equals(ctx->xin->flow.dl_dst, eth_addr_stp) ? OFPUTIL_PC_NO_RECV_STP : OFPUTIL_PC_NO_RECV)) { return false; } /* Only drop packets here if both forwarding and learning are * disabled. If just learning is enabled, we need to have * OFPP_NORMAL and the learning action have a look at the packet * before we can drop it. */ if (!xport_stp_forward_state(xport) && !xport_stp_learn_state(xport)) { return false; } return true; } static void do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, struct xlate_ctx *ctx) { struct flow_wildcards *wc = &ctx->xout->wc; struct flow *flow = &ctx->xin->flow; const struct ofpact *a; OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { struct ofpact_controller *controller; const struct ofpact_metadata *metadata; if (ctx->exit) { break; } switch (a->type) { case OFPACT_OUTPUT: xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port, ofpact_get_OUTPUT(a)->max_len, true); break; case OFPACT_CONTROLLER: controller = ofpact_get_CONTROLLER(a); execute_controller_action(ctx, controller->max_len, controller->reason, controller->controller_id); break; case OFPACT_ENQUEUE: xlate_enqueue_action(ctx, ofpact_get_ENQUEUE(a)); break; case OFPACT_SET_VLAN_VID: wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI); flow->vlan_tci &= ~htons(VLAN_VID_MASK); flow->vlan_tci |= (htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid) | htons(VLAN_CFI)); break; case OFPACT_SET_VLAN_PCP: wc->masks.vlan_tci |= htons(VLAN_PCP_MASK | VLAN_CFI); flow->vlan_tci &= ~htons(VLAN_PCP_MASK); flow->vlan_tci |= htons((ofpact_get_SET_VLAN_PCP(a)->vlan_pcp << VLAN_PCP_SHIFT) | VLAN_CFI); break; case OFPACT_STRIP_VLAN: memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci); flow->vlan_tci = htons(0); break; case OFPACT_PUSH_VLAN: /* XXX 802.1AD(QinQ) */ memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci); flow->vlan_tci = htons(VLAN_CFI); break; case OFPACT_SET_ETH_SRC: memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src); memcpy(flow->dl_src, ofpact_get_SET_ETH_SRC(a)->mac, ETH_ADDR_LEN); break; case OFPACT_SET_ETH_DST: memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst); memcpy(flow->dl_dst, ofpact_get_SET_ETH_DST(a)->mac, ETH_ADDR_LEN); break; case OFPACT_SET_IPV4_SRC: memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src); if (flow->dl_type == htons(ETH_TYPE_IP)) { flow->nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4; } break; case OFPACT_SET_IPV4_DST: memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst); if (flow->dl_type == htons(ETH_TYPE_IP)) { flow->nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4; } break; case OFPACT_SET_IPV4_DSCP: wc->masks.nw_tos |= IP_DSCP_MASK; /* OpenFlow 1.0 only supports IPv4. */ if (flow->dl_type == htons(ETH_TYPE_IP)) { flow->nw_tos &= ~IP_DSCP_MASK; flow->nw_tos |= ofpact_get_SET_IPV4_DSCP(a)->dscp; } break; case OFPACT_SET_L4_SRC_PORT: memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src); if (is_ip_any(flow)) { flow->tp_src = htons(ofpact_get_SET_L4_SRC_PORT(a)->port); } break; case OFPACT_SET_L4_DST_PORT: memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst); if (is_ip_any(flow)) { flow->tp_dst = htons(ofpact_get_SET_L4_DST_PORT(a)->port); } break; case OFPACT_RESUBMIT: xlate_ofpact_resubmit(ctx, ofpact_get_RESUBMIT(a)); break; case OFPACT_SET_TUNNEL: flow->tunnel.tun_id = htonll(ofpact_get_SET_TUNNEL(a)->tun_id); break; case OFPACT_SET_QUEUE: xlate_set_queue_action(ctx, ofpact_get_SET_QUEUE(a)->queue_id); break; case OFPACT_POP_QUEUE: flow->skb_priority = ctx->orig_skb_priority; break; case OFPACT_REG_MOVE: nxm_execute_reg_move(ofpact_get_REG_MOVE(a), flow, wc); break; case OFPACT_REG_LOAD: nxm_execute_reg_load(ofpact_get_REG_LOAD(a), flow); break; case OFPACT_STACK_PUSH: nxm_execute_stack_push(ofpact_get_STACK_PUSH(a), flow, wc, &ctx->stack); break; case OFPACT_STACK_POP: nxm_execute_stack_pop(ofpact_get_STACK_POP(a), flow, wc, &ctx->stack); break; case OFPACT_PUSH_MPLS: compose_mpls_push_action(ctx, ofpact_get_PUSH_MPLS(a)->ethertype); break; case OFPACT_POP_MPLS: compose_mpls_pop_action(ctx, ofpact_get_POP_MPLS(a)->ethertype); break; case OFPACT_SET_MPLS_TTL: if (compose_set_mpls_ttl_action(ctx, ofpact_get_SET_MPLS_TTL(a)->ttl)) { return; } break; case OFPACT_DEC_MPLS_TTL: if (compose_dec_mpls_ttl_action(ctx)) { return; } break; case OFPACT_DEC_TTL: wc->masks.nw_ttl = 0xff; if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) { return; } break; case OFPACT_NOTE: /* Nothing to do. */ break; case OFPACT_MULTIPATH: multipath_execute(ofpact_get_MULTIPATH(a), flow, wc); break; case OFPACT_BUNDLE: xlate_bundle_action(ctx, ofpact_get_BUNDLE(a)); break; case OFPACT_OUTPUT_REG: xlate_output_reg_action(ctx, ofpact_get_OUTPUT_REG(a)); break; case OFPACT_LEARN: xlate_learn_action(ctx, ofpact_get_LEARN(a)); break; case OFPACT_EXIT: ctx->exit = true; break; case OFPACT_FIN_TIMEOUT: memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); ctx->xout->has_fin_timeout = true; xlate_fin_timeout(ctx, ofpact_get_FIN_TIMEOUT(a)); break; case OFPACT_CLEAR_ACTIONS: /* XXX * Nothing to do because writa-actions is not supported for now. * When writa-actions is supported, clear-actions also must * be supported at the same time. */ break; case OFPACT_WRITE_METADATA: metadata = ofpact_get_WRITE_METADATA(a); flow->metadata &= ~metadata->mask; flow->metadata |= metadata->metadata & metadata->mask; break; case OFPACT_METER: /* Not implemented yet. */ break; case OFPACT_GOTO_TABLE: { /* It is assumed that goto-table is the last action. */ struct ofpact_goto_table *ogt = ofpact_get_GOTO_TABLE(a); ovs_assert(ctx->table_id < ogt->table_id); xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port, ogt->table_id, true); break; } case OFPACT_SAMPLE: xlate_sample_action(ctx, ofpact_get_SAMPLE(a)); break; } } } void xlate_in_init(struct xlate_in *xin, struct ofproto_dpif *ofproto, const struct flow *flow, struct rule_dpif *rule, uint8_t tcp_flags, const struct ofpbuf *packet) { xin->ofproto = ofproto; xin->flow = *flow; xin->packet = packet; xin->may_learn = packet != NULL; xin->rule = rule; xin->ofpacts = NULL; xin->ofpacts_len = 0; xin->tcp_flags = tcp_flags; xin->resubmit_hook = NULL; xin->report_hook = NULL; xin->resubmit_stats = NULL; } void xlate_out_uninit(struct xlate_out *xout) { if (xout) { ofpbuf_uninit(&xout->odp_actions); } } /* Translates the 'ofpacts_len' bytes of "struct ofpact"s starting at 'ofpacts' * into datapath actions, using 'ctx', and discards the datapath actions. */ void xlate_actions_for_side_effects(struct xlate_in *xin) { struct xlate_out xout; xlate_actions(xin, &xout); xlate_out_uninit(&xout); } static void xlate_report(struct xlate_ctx *ctx, const char *s) { if (ctx->xin->report_hook) { ctx->xin->report_hook(ctx->xin, s, ctx->recurse); } } void xlate_out_copy(struct xlate_out *dst, const struct xlate_out *src) { dst->wc = src->wc; dst->slow = src->slow; dst->has_learn = src->has_learn; dst->has_normal = src->has_normal; dst->has_fin_timeout = src->has_fin_timeout; dst->nf_output_iface = src->nf_output_iface; dst->mirrors = src->mirrors; ofpbuf_use_stub(&dst->odp_actions, dst->odp_actions_stub, sizeof dst->odp_actions_stub); ofpbuf_put(&dst->odp_actions, src->odp_actions.data, src->odp_actions.size); } static struct skb_priority_to_dscp * get_skb_priority(const struct xport *xport, uint32_t skb_priority) { struct skb_priority_to_dscp *pdscp; uint32_t hash; hash = hash_int(skb_priority, 0); HMAP_FOR_EACH_IN_BUCKET (pdscp, hmap_node, hash, &xport->skb_priorities) { if (pdscp->skb_priority == skb_priority) { return pdscp; } } return NULL; } static bool dscp_from_skb_priority(const struct xport *xport, uint32_t skb_priority, uint8_t *dscp) { struct skb_priority_to_dscp *pdscp = get_skb_priority(xport, skb_priority); *dscp = pdscp ? pdscp->dscp : 0; return pdscp != NULL; } static void clear_skb_priorities(struct xport *xport) { struct skb_priority_to_dscp *pdscp, *next; HMAP_FOR_EACH_SAFE (pdscp, next, hmap_node, &xport->skb_priorities) { hmap_remove(&xport->skb_priorities, &pdscp->hmap_node); free(pdscp); } } static bool actions_output_to_local_port(const struct xlate_ctx *ctx) { odp_port_t local_odp_port = ofp_port_to_odp_port(ctx->xbridge, OFPP_LOCAL); const struct nlattr *a; unsigned int left; NL_ATTR_FOR_EACH_UNSAFE (a, left, ctx->xout->odp_actions.data, ctx->xout->odp_actions.size) { if (nl_attr_type(a) == OVS_ACTION_ATTR_OUTPUT && nl_attr_get_odp_port(a) == local_odp_port) { return true; } } return false; } /* Translates the 'ofpacts_len' bytes of "struct ofpacts" starting at 'ofpacts' * into datapath actions in 'odp_actions', using 'ctx'. * * The caller must take responsibility for eventually freeing 'xout', with * xlate_out_uninit(). */ void xlate_actions(struct xlate_in *xin, struct xlate_out *xout) { struct flow_wildcards *wc = &xout->wc; struct flow *flow = &xin->flow; struct rule_dpif *rule = NULL; struct rule_actions *actions = NULL; enum slow_path_reason special; const struct ofpact *ofpacts; struct xport *in_port; struct flow orig_flow; struct xlate_ctx ctx; size_t ofpacts_len; bool tnl_may_send; COVERAGE_INC(xlate_actions); ovs_rwlock_rdlock(&xlate_rwlock); /* Flow initialization rules: * - 'base_flow' must match the kernel's view of the packet at the * time that action processing starts. 'flow' represents any * transformations we wish to make through actions. * - By default 'base_flow' and 'flow' are the same since the input * packet matches the output before any actions are applied. * - When using VLAN splinters, 'base_flow''s VLAN is set to the value * of the received packet as seen by the kernel. If we later output * to another device without any modifications this will cause us to * insert a new tag since the original one was stripped off by the * VLAN device. * - Tunnel metadata as received is retained in 'flow'. This allows * tunnel metadata matching also in later tables. * Since a kernel action for setting the tunnel metadata will only be * generated with actual tunnel output, changing the tunnel metadata * values in 'flow' (such as tun_id) will only have effect with a later * tunnel output action. * - Tunnel 'base_flow' is completely cleared since that is what the * kernel does. If we wish to maintain the original values an action * needs to be generated. */ ctx.xin = xin; ctx.xout = xout; ctx.xout->slow = 0; ctx.xout->has_learn = false; ctx.xout->has_normal = false; ctx.xout->has_fin_timeout = false; ctx.xout->nf_output_iface = NF_OUT_DROP; ctx.xout->mirrors = 0; ofpbuf_use_stub(&ctx.xout->odp_actions, ctx.xout->odp_actions_stub, sizeof ctx.xout->odp_actions_stub); ofpbuf_reserve(&ctx.xout->odp_actions, NL_A_U32_SIZE); ctx.xbridge = xbridge_lookup(xin->ofproto); if (!ctx.xbridge) { goto out; } ctx.rule = xin->rule; ctx.base_flow = *flow; memset(&ctx.base_flow.tunnel, 0, sizeof ctx.base_flow.tunnel); ctx.orig_tunnel_ip_dst = flow->tunnel.ip_dst; flow_wildcards_init_catchall(wc); memset(&wc->masks.in_port, 0xff, sizeof wc->masks.in_port); memset(&wc->masks.skb_priority, 0xff, sizeof wc->masks.skb_priority); memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type); wc->masks.nw_frag |= FLOW_NW_FRAG_MASK; tnl_may_send = tnl_xlate_init(&ctx.base_flow, flow, wc); if (ctx.xbridge->has_netflow) { netflow_mask_wc(flow, wc); } ctx.recurse = 0; ctx.orig_skb_priority = flow->skb_priority; ctx.table_id = 0; ctx.exit = false; if (!xin->ofpacts && !ctx.rule) { rule_dpif_lookup(ctx.xbridge->ofproto, flow, wc, &rule); if (ctx.xin->resubmit_stats) { rule_dpif_credit_stats(rule, ctx.xin->resubmit_stats); } ctx.rule = rule; } xout->fail_open = ctx.rule && rule_dpif_fail_open(ctx.rule); if (xin->ofpacts) { ofpacts = xin->ofpacts; ofpacts_len = xin->ofpacts_len; } else if (ctx.rule) { actions = rule_dpif_get_actions(ctx.rule); ofpacts = actions->ofpacts; ofpacts_len = actions->ofpacts_len; } else { NOT_REACHED(); } ofpbuf_use_stub(&ctx.stack, ctx.init_stack, sizeof ctx.init_stack); if (mbridge_has_mirrors(ctx.xbridge->mbridge)) { /* Do this conditionally because the copy is expensive enough that it * shows up in profiles. */ orig_flow = *flow; } if (flow->nw_frag & FLOW_NW_FRAG_ANY) { switch (ctx.xbridge->frag) { case OFPC_FRAG_NORMAL: /* We must pretend that transport ports are unavailable. */ flow->tp_src = ctx.base_flow.tp_src = htons(0); flow->tp_dst = ctx.base_flow.tp_dst = htons(0); break; case OFPC_FRAG_DROP: goto out; case OFPC_FRAG_REASM: NOT_REACHED(); case OFPC_FRAG_NX_MATCH: /* Nothing to do. */ break; case OFPC_INVALID_TTL_TO_CONTROLLER: NOT_REACHED(); } } in_port = get_ofp_port(ctx.xbridge, flow->in_port.ofp_port); special = process_special(&ctx, flow, in_port, ctx.xin->packet); if (special) { ctx.xout->slow = special; } else { size_t sample_actions_len; if (flow->in_port.ofp_port != vsp_realdev_to_vlandev(ctx.xbridge->ofproto, flow->in_port.ofp_port, flow->vlan_tci)) { ctx.base_flow.vlan_tci = 0; } add_sflow_action(&ctx); add_ipfix_action(&ctx); sample_actions_len = ctx.xout->odp_actions.size; if (tnl_may_send && (!in_port || may_receive(in_port, &ctx))) { do_xlate_actions(ofpacts, ofpacts_len, &ctx); /* We've let OFPP_NORMAL and the learning action look at the * packet, so drop it now if forwarding is disabled. */ if (in_port && !xport_stp_forward_state(in_port)) { ctx.xout->odp_actions.size = sample_actions_len; } } if (ctx.xbridge->has_in_band && in_band_must_output_to_local_port(flow) && !actions_output_to_local_port(&ctx)) { compose_output_action(&ctx, OFPP_LOCAL); } fix_sflow_action(&ctx); if (mbridge_has_mirrors(ctx.xbridge->mbridge)) { add_mirror_actions(&ctx, &orig_flow); } } ofpbuf_uninit(&ctx.stack); /* Clear the metadata and register wildcard masks, because we won't * use non-header fields as part of the cache. */ flow_wildcards_clear_non_packet_fields(wc); out: ovs_rwlock_unlock(&xlate_rwlock); rule_dpif_unref(rule); rule_actions_unref(actions); } openvswitch-2.0.1+git20140120/ofproto/ofproto-dpif-xlate.h000066400000000000000000000153641226605124000230470ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OFPROTO_DPIF_XLATE_H #define OFPROTO_DPIF_XLATE_H 1 #include "flow.h" #include "meta-flow.h" #include "odp-util.h" #include "ofpbuf.h" #include "ofproto-dpif-mirror.h" #include "ofproto-dpif.h" #include "ofproto.h" #include "stp.h" struct bfd; struct bond; struct dpif; struct lacp; struct dpif_ipfix; struct dpif_sflow; struct mac_learning; struct xlate_out { /* Wildcards relevant in translation. Any fields that were used to * calculate the action must be set for caching and kernel * wildcarding to work. For example, if the flow lookup involved * performing the "normal" action on IPv4 and ARP packets, 'wc' * would have the 'in_port' (always set), 'dl_type' (flow match), * 'vlan_tci' (normal action), and 'dl_dst' (normal action) fields * set. */ struct flow_wildcards wc; enum slow_path_reason slow; /* 0 if fast path may be used. */ bool fail_open; /* Initial rule is fail open? */ bool has_learn; /* Actions include NXAST_LEARN? */ bool has_normal; /* Actions output to OFPP_NORMAL? */ bool has_fin_timeout; /* Actions include NXAST_FIN_TIMEOUT? */ ofp_port_t nf_output_iface; /* Output interface index for NetFlow. */ mirror_mask_t mirrors; /* Bitmap of associated mirrors. */ uint64_t odp_actions_stub[256 / 8]; struct ofpbuf odp_actions; }; struct xlate_in { struct ofproto_dpif *ofproto; /* Flow to which the OpenFlow actions apply. xlate_actions() will modify * this flow when actions change header fields. */ struct flow flow; /* The packet corresponding to 'flow', or a null pointer if we are * revalidating without a packet to refer to. */ const struct ofpbuf *packet; /* Should OFPP_NORMAL update the MAC learning table? Should "learn" * actions update the flow table? * * We want to update these tables if we are actually processing a packet, * or if we are accounting for packets that the datapath has processed, but * not if we are just revalidating. */ bool may_learn; /* The rule initiating translation or NULL. If both 'rule' and 'ofpacts' * are NULL, xlate_actions() will do the initial rule lookup itself. */ struct rule_dpif *rule; /* The actions to translate. If 'rule' is not NULL, these may be NULL. */ const struct ofpact *ofpacts; size_t ofpacts_len; /* Union of the set of TCP flags seen so far in this flow. (Used only by * NXAST_FIN_TIMEOUT. Set to zero to avoid updating updating rules' * timeouts.) */ uint8_t tcp_flags; /* If nonnull, flow translation calls this function just before executing a * resubmit or OFPP_TABLE action. In addition, disables logging of traces * when the recursion depth is exceeded. * * 'rule' is the rule being submitted into. It will be null if the * resubmit or OFPP_TABLE action didn't find a matching rule. * * 'recurse' is the resubmit recursion depth at time of invocation. * * This is normally null so the client has to set it manually after * calling xlate_in_init(). */ void (*resubmit_hook)(struct xlate_in *, struct rule_dpif *rule, int recurse); /* If nonnull, flow translation calls this function to report some * significant decision, e.g. to explain why OFPP_NORMAL translation * dropped a packet. 'recurse' is the resubmit recursion depth at time of * invocation. */ void (*report_hook)(struct xlate_in *, const char *s, int recurse); /* If nonnull, flow translation credits the specified statistics to each * rule reached through a resubmit or OFPP_TABLE action. * * This is normally null so the client has to set it manually after * calling xlate_in_init(). */ const struct dpif_flow_stats *resubmit_stats; }; extern struct ovs_rwlock xlate_rwlock; void xlate_ofproto_set(struct ofproto_dpif *, const char *name, struct dpif *, struct rule_dpif *miss_rule, struct rule_dpif *no_packet_in_rule, const struct mac_learning *, struct stp *, const struct mbridge *, const struct dpif_sflow *, const struct dpif_ipfix *, enum ofp_config_flags, bool forward_bpdu, bool has_in_band, bool has_netflow) OVS_REQ_WRLOCK(xlate_rwlock); void xlate_remove_ofproto(struct ofproto_dpif *) OVS_REQ_WRLOCK(xlate_rwlock); void xlate_bundle_set(struct ofproto_dpif *, struct ofbundle *, const char *name, enum port_vlan_mode, int vlan, unsigned long *trunks, bool use_priority_tags, const struct bond *, const struct lacp *, bool floodable) OVS_REQ_WRLOCK(xlate_rwlock); void xlate_bundle_remove(struct ofbundle *) OVS_REQ_WRLOCK(xlate_rwlock); void xlate_ofport_set(struct ofproto_dpif *, struct ofbundle *, struct ofport_dpif *, ofp_port_t, odp_port_t, const struct netdev *, const struct cfm *, const struct bfd *, struct ofport_dpif *peer, int stp_port_no, const struct ofproto_port_queue *qdscp, size_t n_qdscp, enum ofputil_port_config, bool is_tunnel, bool may_enable) OVS_REQ_WRLOCK(xlate_rwlock); void xlate_ofport_remove(struct ofport_dpif *) OVS_REQ_WRLOCK(xlate_rwlock); int xlate_receive(const struct dpif_backer *, struct ofpbuf *packet, const struct nlattr *key, size_t key_len, struct flow *, enum odp_key_fitness *, struct ofproto_dpif **, odp_port_t *odp_in_port) OVS_EXCLUDED(xlate_rwlock); void xlate_actions(struct xlate_in *, struct xlate_out *) OVS_EXCLUDED(xlate_rwlock); void xlate_in_init(struct xlate_in *, struct ofproto_dpif *, const struct flow *, struct rule_dpif *, uint8_t tcp_flags, const struct ofpbuf *packet); void xlate_out_uninit(struct xlate_out *); void xlate_actions_for_side_effects(struct xlate_in *); void xlate_out_copy(struct xlate_out *dst, const struct xlate_out *src); #endif /* ofproto-dpif-xlate.h */ openvswitch-2.0.1+git20140120/ofproto/ofproto-dpif.c000066400000000000000000006163461226605124000217360ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ofproto/ofproto-dpif.h" #include "ofproto/ofproto-provider.h" #include #include "bfd.h" #include "bond.h" #include "bundle.h" #include "byte-order.h" #include "connmgr.h" #include "coverage.h" #include "cfm.h" #include "dpif.h" #include "dynamic-string.h" #include "fail-open.h" #include "guarded-list.h" #include "hmapx.h" #include "lacp.h" #include "learn.h" #include "mac-learning.h" #include "meta-flow.h" #include "multipath.h" #include "netdev-vport.h" #include "netdev.h" #include "netlink.h" #include "nx-match.h" #include "odp-util.h" #include "odp-execute.h" #include "ofp-util.h" #include "ofpbuf.h" #include "ofp-actions.h" #include "ofp-parse.h" #include "ofp-print.h" #include "ofproto-dpif-governor.h" #include "ofproto-dpif-ipfix.h" #include "ofproto-dpif-mirror.h" #include "ofproto-dpif-sflow.h" #include "ofproto-dpif-upcall.h" #include "ofproto-dpif-xlate.h" #include "poll-loop.h" #include "simap.h" #include "smap.h" #include "timer.h" #include "tunnel.h" #include "unaligned.h" #include "unixctl.h" #include "vlan-bitmap.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ofproto_dpif); COVERAGE_DEFINE(ofproto_dpif_expired); COVERAGE_DEFINE(facet_changed_rule); COVERAGE_DEFINE(facet_revalidate); COVERAGE_DEFINE(facet_unexpected); COVERAGE_DEFINE(facet_suppress); COVERAGE_DEFINE(subfacet_install_fail); COVERAGE_DEFINE(packet_in_overflow); COVERAGE_DEFINE(flow_mod_overflow); /* Number of implemented OpenFlow tables. */ enum { N_TABLES = 255 }; enum { TBL_INTERNAL = N_TABLES - 1 }; /* Used for internal hidden rules. */ BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255); struct flow_miss; struct facet; struct rule_dpif { struct rule up; /* These statistics: * * - Do include packets and bytes from facets that have been deleted or * whose own statistics have been folded into the rule. * * - Do include packets and bytes sent "by hand" that were accounted to * the rule without any facet being involved (this is a rare corner * case in rule_execute()). * * - Do not include packet or bytes that can be obtained from any facet's * packet_count or byte_count member or that can be obtained from the * datapath by, e.g., dpif_flow_get() for any subfacet. */ struct ovs_mutex stats_mutex; uint64_t packet_count OVS_GUARDED; /* Number of packets received. */ uint64_t byte_count OVS_GUARDED; /* Number of bytes received. */ }; static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes); static struct rule_dpif *rule_dpif_cast(const struct rule *); struct ofbundle { struct hmap_node hmap_node; /* In struct ofproto's "bundles" hmap. */ struct ofproto_dpif *ofproto; /* Owning ofproto. */ void *aux; /* Key supplied by ofproto's client. */ char *name; /* Identifier for log messages. */ /* Configuration. */ struct list ports; /* Contains "struct ofport"s. */ enum port_vlan_mode vlan_mode; /* VLAN mode */ int vlan; /* -1=trunk port, else a 12-bit VLAN ID. */ unsigned long *trunks; /* Bitmap of trunked VLANs, if 'vlan' == -1. * NULL if all VLANs are trunked. */ struct lacp *lacp; /* LACP if LACP is enabled, otherwise NULL. */ struct bond *bond; /* Nonnull iff more than one port. */ bool use_priority_tags; /* Use 802.1p tag for frames in VLAN 0? */ /* Status. */ bool floodable; /* True if no port has OFPUTIL_PC_NO_FLOOD set. */ }; static void bundle_remove(struct ofport *); static void bundle_update(struct ofbundle *); static void bundle_destroy(struct ofbundle *); static void bundle_del_port(struct ofport_dpif *); static void bundle_run(struct ofbundle *); static void bundle_wait(struct ofbundle *); static void stp_run(struct ofproto_dpif *ofproto); static void stp_wait(struct ofproto_dpif *ofproto); static int set_stp_port(struct ofport *, const struct ofproto_port_stp_settings *); static void compose_slow_path(const struct ofproto_dpif *, const struct flow *, enum slow_path_reason, uint64_t *stub, size_t stub_size, const struct nlattr **actionsp, size_t *actions_lenp); /* A subfacet (see "struct subfacet" below) has three possible installation * states: * * - SF_NOT_INSTALLED: Not installed in the datapath. This will only be the * case just after the subfacet is created, just before the subfacet is * destroyed, or if the datapath returns an error when we try to install a * subfacet. * * - SF_FAST_PATH: The subfacet's actions are installed in the datapath. * * - SF_SLOW_PATH: An action that sends every packet for the subfacet through * ofproto_dpif is installed in the datapath. */ enum subfacet_path { SF_NOT_INSTALLED, /* No datapath flow for this subfacet. */ SF_FAST_PATH, /* Full actions are installed. */ SF_SLOW_PATH, /* Send-to-userspace action is installed. */ }; /* A dpif flow and actions associated with a facet. * * See also the large comment on struct facet. */ struct subfacet { /* Owners. */ struct hmap_node hmap_node; /* In struct ofproto_dpif 'subfacets' list. */ struct list list_node; /* In struct facet's 'facets' list. */ struct facet *facet; /* Owning facet. */ struct dpif_backer *backer; /* Owning backer. */ enum odp_key_fitness key_fitness; struct nlattr *key; int key_len; long long int used; /* Time last used; time created if not used. */ long long int created; /* Time created. */ uint64_t dp_packet_count; /* Last known packet count in the datapath. */ uint64_t dp_byte_count; /* Last known byte count in the datapath. */ enum subfacet_path path; /* Installed in datapath? */ }; #define SUBFACET_DESTROY_MAX_BATCH 50 static struct subfacet *subfacet_create(struct facet *, struct flow_miss *); static struct subfacet *subfacet_find(struct dpif_backer *, const struct nlattr *key, size_t key_len, uint32_t key_hash); static void subfacet_destroy(struct subfacet *); static void subfacet_destroy__(struct subfacet *); static void subfacet_destroy_batch(struct dpif_backer *, struct subfacet **, int n); static void subfacet_reset_dp_stats(struct subfacet *, struct dpif_flow_stats *); static void subfacet_update_stats(struct subfacet *, const struct dpif_flow_stats *); static int subfacet_install(struct subfacet *, const struct ofpbuf *odp_actions, struct dpif_flow_stats *); static void subfacet_uninstall(struct subfacet *); /* A unique, non-overlapping instantiation of an OpenFlow flow. * * A facet associates a "struct flow", which represents the Open vSwitch * userspace idea of an exact-match flow, with one or more subfacets. * While the facet is created based on an exact-match flow, it is stored * within the ofproto based on the wildcards that could be expressed * based on the flow table and other configuration. (See the 'wc' * description in "struct xlate_out" for more details.) * * Each subfacet tracks the datapath's idea of the flow equivalent to * the facet. When the kernel module (or other dpif implementation) and * Open vSwitch userspace agree on the definition of a flow key, there * is exactly one subfacet per facet. If the dpif implementation * supports more-specific flow matching than userspace, however, a facet * can have more than one subfacet. Examples include the dpif * implementation not supporting the same wildcards as userspace or some * distinction in flow that userspace simply doesn't understand. * * Flow expiration works in terms of subfacets, so a facet must have at * least one subfacet or it will never expire, leaking memory. */ struct facet { /* Owners. */ struct hmap_node hmap_node; /* In owning ofproto's 'facets' hmap. */ struct ofproto_dpif *ofproto; /* Owned data. */ struct list subfacets; long long int used; /* Time last used; time created if not used. */ /* Key. */ struct flow flow; /* Flow of the creating subfacet. */ struct cls_rule cr; /* In 'ofproto_dpif's facets classifier. */ /* These statistics: * * - Do include packets and bytes sent "by hand", e.g. with * dpif_execute(). * * - Do include packets and bytes that were obtained from the datapath * when a subfacet's statistics were reset (e.g. dpif_flow_put() with * DPIF_FP_ZERO_STATS). * * - Do not include packets or bytes that can be obtained from the * datapath for any existing subfacet. */ uint64_t packet_count; /* Number of packets received. */ uint64_t byte_count; /* Number of bytes received. */ /* Resubmit statistics. */ uint64_t prev_packet_count; /* Number of packets from last stats push. */ uint64_t prev_byte_count; /* Number of bytes from last stats push. */ long long int prev_used; /* Used time from last stats push. */ /* Accounting. */ uint64_t accounted_bytes; /* Bytes processed by facet_account(). */ struct netflow_flow nf_flow; /* Per-flow NetFlow tracking data. */ uint8_t tcp_flags; /* TCP flags seen for this 'rule'. */ struct xlate_out xout; /* Storage for a single subfacet, to reduce malloc() time and space * overhead. (A facet always has at least one subfacet and in the common * case has exactly one subfacet. However, 'one_subfacet' may not * always be valid, since it could have been removed after newer * subfacets were pushed onto the 'subfacets' list.) */ struct subfacet one_subfacet; long long int learn_rl; /* Rate limiter for facet_learn(). */ }; static struct facet *facet_create(const struct flow_miss *); static void facet_remove(struct facet *); static void facet_free(struct facet *); static struct facet *facet_find(struct ofproto_dpif *, const struct flow *); static struct facet *facet_lookup_valid(struct ofproto_dpif *, const struct flow *); static bool facet_revalidate(struct facet *); static bool facet_check_consistency(struct facet *); static void facet_flush_stats(struct facet *); static void facet_reset_counters(struct facet *); static void flow_push_stats(struct ofproto_dpif *, struct flow *, struct dpif_flow_stats *, bool may_learn); static void facet_push_stats(struct facet *, bool may_learn); static void facet_learn(struct facet *); static void facet_account(struct facet *); static void push_all_stats(void); static bool facet_is_controller_flow(struct facet *); struct ofport_dpif { struct hmap_node odp_port_node; /* In dpif_backer's "odp_to_ofport_map". */ struct ofport up; odp_port_t odp_port; struct ofbundle *bundle; /* Bundle that contains this port, if any. */ struct list bundle_node; /* In struct ofbundle's "ports" list. */ struct cfm *cfm; /* Connectivity Fault Management, if any. */ struct bfd *bfd; /* BFD, if any. */ bool may_enable; /* May be enabled in bonds. */ bool is_tunnel; /* This port is a tunnel. */ long long int carrier_seq; /* Carrier status changes. */ struct ofport_dpif *peer; /* Peer if patch port. */ /* Spanning tree. */ struct stp_port *stp_port; /* Spanning Tree Protocol, if any. */ enum stp_state stp_state; /* Always STP_DISABLED if STP not in use. */ long long int stp_state_entered; /* Queue to DSCP mapping. */ struct ofproto_port_queue *qdscp; size_t n_qdscp; /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) * * This is deprecated. It is only for compatibility with broken device * drivers in old versions of Linux that do not properly support VLANs when * VLAN devices are not used. When broken device drivers are no longer in * widespread use, we will delete these interfaces. */ ofp_port_t realdev_ofp_port; int vlandev_vid; }; /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) * * This is deprecated. It is only for compatibility with broken device drivers * in old versions of Linux that do not properly support VLANs when VLAN * devices are not used. When broken device drivers are no longer in * widespread use, we will delete these interfaces. */ struct vlan_splinter { struct hmap_node realdev_vid_node; struct hmap_node vlandev_node; ofp_port_t realdev_ofp_port; ofp_port_t vlandev_ofp_port; int vid; }; static void vsp_remove(struct ofport_dpif *); static void vsp_add(struct ofport_dpif *, ofp_port_t realdev_ofp_port, int vid); static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *, ofp_port_t); static ofp_port_t odp_port_to_ofp_port(const struct ofproto_dpif *, odp_port_t); static struct ofport_dpif * ofport_dpif_cast(const struct ofport *ofport) { return ofport ? CONTAINER_OF(ofport, struct ofport_dpif, up) : NULL; } static void port_run(struct ofport_dpif *); static void port_run_fast(struct ofport_dpif *); static void port_wait(struct ofport_dpif *); static int set_bfd(struct ofport *, const struct smap *); static int set_cfm(struct ofport *, const struct cfm_settings *); static void ofport_update_peer(struct ofport_dpif *); static void run_fast_rl(void); static int run_fast(struct ofproto *); struct dpif_completion { struct list list_node; struct ofoperation *op; }; /* Reasons that we might need to revalidate every facet, and corresponding * coverage counters. * * A value of 0 means that there is no need to revalidate. * * It would be nice to have some cleaner way to integrate with coverage * counters, but with only a few reasons I guess this is good enough for * now. */ enum revalidate_reason { REV_RECONFIGURE = 1, /* Switch configuration changed. */ REV_STP, /* Spanning tree protocol port status change. */ REV_BOND, /* Bonding changed. */ REV_PORT_TOGGLED, /* Port enabled or disabled by CFM, LACP, ...*/ REV_FLOW_TABLE, /* Flow table changed. */ REV_MAC_LEARNING, /* Mac learning changed. */ REV_INCONSISTENCY /* Facet self-check failed. */ }; COVERAGE_DEFINE(rev_reconfigure); COVERAGE_DEFINE(rev_stp); COVERAGE_DEFINE(rev_bond); COVERAGE_DEFINE(rev_port_toggled); COVERAGE_DEFINE(rev_flow_table); COVERAGE_DEFINE(rev_mac_learning); COVERAGE_DEFINE(rev_inconsistency); struct avg_subfacet_rates { double add_rate; /* Moving average of new flows created per minute. */ double del_rate; /* Moving average of flows deleted per minute. */ }; /* All datapaths of a given type share a single dpif backer instance. */ struct dpif_backer { char *type; int refcount; struct dpif *dpif; struct udpif *udpif; struct timer next_expiration; struct ovs_rwlock odp_to_ofport_lock; struct hmap odp_to_ofport_map OVS_GUARDED; /* ODP port to ofport map. */ struct simap tnl_backers; /* Set of dpif ports backing tunnels. */ /* Facet revalidation flags applying to facets which use this backer. */ enum revalidate_reason need_revalidate; /* Revalidate every facet. */ struct hmap drop_keys; /* Set of dropped odp keys. */ bool recv_set_enable; /* Enables or disables receiving packets. */ struct hmap subfacets; struct governor *governor; /* Subfacet statistics. * * These keep track of the total number of subfacets added and deleted and * flow life span. They are useful for computing the flow rates stats * exposed via "ovs-appctl dpif/show". The goal is to learn about * traffic patterns in ways that we can use later to improve Open vSwitch * performance in new situations. */ long long int created; /* Time when it is created. */ unsigned max_n_subfacet; /* Maximum number of flows */ unsigned avg_n_subfacet; /* Average number of flows. */ long long int avg_subfacet_life; /* Average life span of subfacets. */ /* The average number of subfacets... */ struct avg_subfacet_rates hourly; /* ...over the last hour. */ struct avg_subfacet_rates daily; /* ...over the last day. */ struct avg_subfacet_rates lifetime; /* ...over the switch lifetime. */ long long int last_minute; /* Last time 'hourly' was updated. */ /* Number of subfacets added or deleted since 'last_minute'. */ unsigned subfacet_add_count; unsigned subfacet_del_count; /* Number of subfacets added or deleted from 'created' to 'last_minute.' */ unsigned long long int total_subfacet_add_count; unsigned long long int total_subfacet_del_count; /* Number of upcall handling threads. */ unsigned int n_handler_threads; }; /* All existing ofproto_backer instances, indexed by ofproto->up.type. */ static struct shash all_dpif_backers = SHASH_INITIALIZER(&all_dpif_backers); static void drop_key_clear(struct dpif_backer *); static void update_moving_averages(struct dpif_backer *backer); struct ofproto_dpif { struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */ struct ofproto up; struct dpif_backer *backer; /* Special OpenFlow rules. */ struct rule_dpif *miss_rule; /* Sends flow table misses to controller. */ struct rule_dpif *no_packet_in_rule; /* Drops flow table misses. */ struct rule_dpif *drop_frags_rule; /* Used in OFPC_FRAG_DROP mode. */ /* Bridging. */ struct netflow *netflow; struct dpif_sflow *sflow; struct dpif_ipfix *ipfix; struct hmap bundles; /* Contains "struct ofbundle"s. */ struct mac_learning *ml; bool has_bonded_bundles; struct mbridge *mbridge; /* Facets. */ struct classifier facets; /* Contains 'struct facet's. */ long long int consistency_rl; struct netdev_stats stats; /* To account packets generated and consumed in * userspace. */ /* Spanning tree. */ struct stp *stp; long long int stp_last_tick; /* VLAN splinters. */ struct ovs_mutex vsp_mutex; struct hmap realdev_vid_map OVS_GUARDED; /* (realdev,vid) -> vlandev. */ struct hmap vlandev_map OVS_GUARDED; /* vlandev -> (realdev,vid). */ /* Ports. */ struct sset ports; /* Set of standard port names. */ struct sset ghost_ports; /* Ports with no datapath port. */ struct sset port_poll_set; /* Queued names for port_poll() reply. */ int port_poll_errno; /* Last errno for port_poll() reply. */ /* Per ofproto's dpif stats. */ uint64_t n_hit; uint64_t n_missed; /* Work queues. */ struct guarded_list pins; /* Contains "struct ofputil_packet_in"s. */ }; /* By default, flows in the datapath are wildcarded (megaflows). They * may be disabled with the "ovs-appctl dpif/disable-megaflows" command. */ static bool enable_megaflows = true; /* All existing ofproto_dpif instances, indexed by ->up.name. */ static struct hmap all_ofproto_dpifs = HMAP_INITIALIZER(&all_ofproto_dpifs); static void ofproto_dpif_unixctl_init(void); static inline struct ofproto_dpif * ofproto_dpif_cast(const struct ofproto *ofproto) { ovs_assert(ofproto->ofproto_class == &ofproto_dpif_class); return CONTAINER_OF(ofproto, struct ofproto_dpif, up); } static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port); static void ofproto_trace(struct ofproto_dpif *, const struct flow *, const struct ofpbuf *packet, struct ds *); /* Upcalls. */ static void handle_upcalls(struct dpif_backer *); /* Flow expiration. */ static int expire(struct dpif_backer *); /* NetFlow. */ static void send_netflow_active_timeouts(struct ofproto_dpif *); /* Utilities. */ static int send_packet(const struct ofport_dpif *, struct ofpbuf *packet); /* Global variables. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); /* Initial mappings of port to bridge mappings. */ static struct shash init_ofp_ports = SHASH_INITIALIZER(&init_ofp_ports); /* Executes 'fm'. The caller retains ownership of 'fm' and everything in * it. */ void ofproto_dpif_flow_mod(struct ofproto_dpif *ofproto, struct ofputil_flow_mod *fm) { ofproto_flow_mod(&ofproto->up, fm); } /* Appends 'pin' to the queue of "packet ins" to be sent to the controller. * Takes ownership of 'pin' and pin->packet. */ void ofproto_dpif_send_packet_in(struct ofproto_dpif *ofproto, struct ofputil_packet_in *pin) { if (!guarded_list_push_back(&ofproto->pins, &pin->list_node, 1024)) { COVERAGE_INC(packet_in_overflow); free(CONST_CAST(void *, pin->packet)); free(pin); } } /* Factory functions. */ static void init(const struct shash *iface_hints) { struct shash_node *node; /* Make a local copy, since we don't own 'iface_hints' elements. */ SHASH_FOR_EACH(node, iface_hints) { const struct iface_hint *orig_hint = node->data; struct iface_hint *new_hint = xmalloc(sizeof *new_hint); new_hint->br_name = xstrdup(orig_hint->br_name); new_hint->br_type = xstrdup(orig_hint->br_type); new_hint->ofp_port = orig_hint->ofp_port; shash_add(&init_ofp_ports, node->name, new_hint); } } static void enumerate_types(struct sset *types) { dp_enumerate_types(types); } static int enumerate_names(const char *type, struct sset *names) { struct ofproto_dpif *ofproto; sset_clear(names); HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { if (strcmp(type, ofproto->up.type)) { continue; } sset_add(names, ofproto->up.name); } return 0; } static int del(const char *type, const char *name) { struct dpif *dpif; int error; error = dpif_open(name, type, &dpif); if (!error) { error = dpif_delete(dpif); dpif_close(dpif); } return error; } static const char * port_open_type(const char *datapath_type, const char *port_type) { return dpif_port_open_type(datapath_type, port_type); } /* Type functions. */ static void process_dpif_port_changes(struct dpif_backer *); static void process_dpif_all_ports_changed(struct dpif_backer *); static void process_dpif_port_change(struct dpif_backer *, const char *devname); static void process_dpif_port_error(struct dpif_backer *, int error); static struct ofproto_dpif * lookup_ofproto_dpif_by_port_name(const char *name) { struct ofproto_dpif *ofproto; HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { if (sset_contains(&ofproto->ports, name)) { return ofproto; } } return NULL; } static int type_run(const char *type) { static long long int push_timer = LLONG_MIN; struct dpif_backer *backer; backer = shash_find_data(&all_dpif_backers, type); if (!backer) { /* This is not necessarily a problem, since backers are only * created on demand. */ return 0; } dpif_run(backer->dpif); /* The most natural place to push facet statistics is when they're pulled * from the datapath. However, when there are many flows in the datapath, * this expensive operation can occur so frequently, that it reduces our * ability to quickly set up flows. To reduce the cost, we push statistics * here instead. */ if (time_msec() > push_timer) { push_timer = time_msec() + 2000; push_all_stats(); } /* If vswitchd started with other_config:flow_restore_wait set as "true", * and the configuration has now changed to "false", enable receiving * packets from the datapath. */ if (!backer->recv_set_enable && !ofproto_get_flow_restore_wait()) { int error; backer->recv_set_enable = true; error = dpif_recv_set(backer->dpif, backer->recv_set_enable); if (error) { udpif_recv_set(backer->udpif, 0, false); VLOG_ERR("Failed to enable receiving packets in dpif."); return error; } udpif_recv_set(backer->udpif, n_handler_threads, backer->recv_set_enable); dpif_flow_flush(backer->dpif); backer->need_revalidate = REV_RECONFIGURE; } /* If the n_handler_threads is reconfigured, call udpif_recv_set() * to reset the handler threads. */ if (backer->n_handler_threads != n_handler_threads) { udpif_recv_set(backer->udpif, n_handler_threads, backer->recv_set_enable); backer->n_handler_threads = n_handler_threads; } if (backer->need_revalidate) { struct ofproto_dpif *ofproto; struct simap_node *node; struct simap tmp_backers; /* Handle tunnel garbage collection. */ simap_init(&tmp_backers); simap_swap(&backer->tnl_backers, &tmp_backers); HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { struct ofport_dpif *iter; if (backer != ofproto->backer) { continue; } HMAP_FOR_EACH (iter, up.hmap_node, &ofproto->up.ports) { char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; const char *dp_port; if (!iter->is_tunnel) { continue; } dp_port = netdev_vport_get_dpif_port(iter->up.netdev, namebuf, sizeof namebuf); node = simap_find(&tmp_backers, dp_port); if (node) { simap_put(&backer->tnl_backers, dp_port, node->data); simap_delete(&tmp_backers, node); node = simap_find(&backer->tnl_backers, dp_port); } else { node = simap_find(&backer->tnl_backers, dp_port); if (!node) { odp_port_t odp_port = ODPP_NONE; if (!dpif_port_add(backer->dpif, iter->up.netdev, &odp_port)) { simap_put(&backer->tnl_backers, dp_port, odp_to_u32(odp_port)); node = simap_find(&backer->tnl_backers, dp_port); } } } iter->odp_port = node ? u32_to_odp(node->data) : ODPP_NONE; if (tnl_port_reconfigure(iter, iter->up.netdev, iter->odp_port)) { backer->need_revalidate = REV_RECONFIGURE; } } } SIMAP_FOR_EACH (node, &tmp_backers) { dpif_port_del(backer->dpif, u32_to_odp(node->data)); } simap_destroy(&tmp_backers); switch (backer->need_revalidate) { case REV_RECONFIGURE: COVERAGE_INC(rev_reconfigure); break; case REV_STP: COVERAGE_INC(rev_stp); break; case REV_BOND: COVERAGE_INC(rev_bond); break; case REV_PORT_TOGGLED: COVERAGE_INC(rev_port_toggled); break; case REV_FLOW_TABLE: COVERAGE_INC(rev_flow_table); break; case REV_MAC_LEARNING: COVERAGE_INC(rev_mac_learning); break; case REV_INCONSISTENCY: COVERAGE_INC(rev_inconsistency); break; } backer->need_revalidate = 0; /* Clear the drop_keys in case we should now be accepting some * formerly dropped flows. */ drop_key_clear(backer); HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { struct facet *facet, *next; struct ofport_dpif *ofport; struct cls_cursor cursor; struct ofbundle *bundle; if (ofproto->backer != backer) { continue; } ovs_rwlock_wrlock(&xlate_rwlock); xlate_ofproto_set(ofproto, ofproto->up.name, ofproto->backer->dpif, ofproto->miss_rule, ofproto->no_packet_in_rule, ofproto->ml, ofproto->stp, ofproto->mbridge, ofproto->sflow, ofproto->ipfix, ofproto->up.frag_handling, ofproto->up.forward_bpdu, connmgr_has_in_band(ofproto->up.connmgr), ofproto->netflow != NULL); HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { xlate_bundle_set(ofproto, bundle, bundle->name, bundle->vlan_mode, bundle->vlan, bundle->trunks, bundle->use_priority_tags, bundle->bond, bundle->lacp, bundle->floodable); } HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) { int stp_port = ofport->stp_port ? stp_port_no(ofport->stp_port) : -1; xlate_ofport_set(ofproto, ofport->bundle, ofport, ofport->up.ofp_port, ofport->odp_port, ofport->up.netdev, ofport->cfm, ofport->bfd, ofport->peer, stp_port, ofport->qdscp, ofport->n_qdscp, ofport->up.pp.config, ofport->is_tunnel, ofport->may_enable); } ovs_rwlock_unlock(&xlate_rwlock); /* Only ofproto-dpif cares about the facet classifier so we just * lock cls_cursor_init() to appease the thread safety analysis. */ ovs_rwlock_rdlock(&ofproto->facets.rwlock); cls_cursor_init(&cursor, &ofproto->facets, NULL); ovs_rwlock_unlock(&ofproto->facets.rwlock); CLS_CURSOR_FOR_EACH_SAFE (facet, next, cr, &cursor) { facet_revalidate(facet); run_fast_rl(); } } udpif_revalidate(backer->udpif); } if (!backer->recv_set_enable) { /* Wake up before a max of 1000ms. */ timer_set_duration(&backer->next_expiration, 1000); } else if (timer_expired(&backer->next_expiration)) { int delay = expire(backer); timer_set_duration(&backer->next_expiration, delay); } process_dpif_port_changes(backer); if (backer->governor) { size_t n_subfacets; governor_run(backer->governor); /* If the governor has shrunk to its minimum size and the number of * subfacets has dwindled, then drop the governor entirely. * * For hysteresis, the number of subfacets to drop the governor is * smaller than the number needed to trigger its creation. */ n_subfacets = hmap_count(&backer->subfacets); if (n_subfacets * 4 < flow_eviction_threshold && governor_is_idle(backer->governor)) { governor_destroy(backer->governor); backer->governor = NULL; } } return 0; } /* Check for and handle port changes in 'backer''s dpif. */ static void process_dpif_port_changes(struct dpif_backer *backer) { for (;;) { char *devname; int error; error = dpif_port_poll(backer->dpif, &devname); switch (error) { case EAGAIN: return; case ENOBUFS: process_dpif_all_ports_changed(backer); break; case 0: process_dpif_port_change(backer, devname); free(devname); break; default: process_dpif_port_error(backer, error); break; } } } static void process_dpif_all_ports_changed(struct dpif_backer *backer) { struct ofproto_dpif *ofproto; struct dpif_port dpif_port; struct dpif_port_dump dump; struct sset devnames; const char *devname; sset_init(&devnames); HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { if (ofproto->backer == backer) { struct ofport *ofport; HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) { sset_add(&devnames, netdev_get_name(ofport->netdev)); } } } DPIF_PORT_FOR_EACH (&dpif_port, &dump, backer->dpif) { sset_add(&devnames, dpif_port.name); } SSET_FOR_EACH (devname, &devnames) { process_dpif_port_change(backer, devname); } sset_destroy(&devnames); } static void process_dpif_port_change(struct dpif_backer *backer, const char *devname) { struct ofproto_dpif *ofproto; struct dpif_port port; /* Don't report on the datapath's device. */ if (!strcmp(devname, dpif_base_name(backer->dpif))) { return; } HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { if (simap_contains(&ofproto->backer->tnl_backers, devname)) { return; } } ofproto = lookup_ofproto_dpif_by_port_name(devname); if (dpif_port_query_by_name(backer->dpif, devname, &port)) { /* The port was removed. If we know the datapath, * report it through poll_set(). If we don't, it may be * notifying us of a removal we initiated, so ignore it. * If there's a pending ENOBUFS, let it stand, since * everything will be reevaluated. */ if (ofproto && ofproto->port_poll_errno != ENOBUFS) { sset_add(&ofproto->port_poll_set, devname); ofproto->port_poll_errno = 0; } } else if (!ofproto) { /* The port was added, but we don't know with which * ofproto we should associate it. Delete it. */ dpif_port_del(backer->dpif, port.port_no); } else { struct ofport_dpif *ofport; ofport = ofport_dpif_cast(shash_find_data( &ofproto->up.port_by_name, devname)); if (ofport && ofport->odp_port != port.port_no && !odp_port_to_ofport(backer, port.port_no)) { /* 'ofport''s datapath port number has changed from * 'ofport->odp_port' to 'port.port_no'. Update our internal data * structures to match. */ ovs_rwlock_wrlock(&backer->odp_to_ofport_lock); hmap_remove(&backer->odp_to_ofport_map, &ofport->odp_port_node); ofport->odp_port = port.port_no; hmap_insert(&backer->odp_to_ofport_map, &ofport->odp_port_node, hash_odp_port(port.port_no)); ovs_rwlock_unlock(&backer->odp_to_ofport_lock); backer->need_revalidate = REV_RECONFIGURE; } } dpif_port_destroy(&port); } /* Propagate 'error' to all ofprotos based on 'backer'. */ static void process_dpif_port_error(struct dpif_backer *backer, int error) { struct ofproto_dpif *ofproto; HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { if (ofproto->backer == backer) { sset_clear(&ofproto->port_poll_set); ofproto->port_poll_errno = error; } } } static int dpif_backer_run_fast(struct dpif_backer *backer) { handle_upcalls(backer); return 0; } static int type_run_fast(const char *type) { struct dpif_backer *backer; backer = shash_find_data(&all_dpif_backers, type); if (!backer) { /* This is not necessarily a problem, since backers are only * created on demand. */ return 0; } return dpif_backer_run_fast(backer); } static void run_fast_rl(void) { static long long int port_rl = LLONG_MIN; if (time_msec() >= port_rl) { struct ofproto_dpif *ofproto; HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { run_fast(&ofproto->up); } port_rl = time_msec() + 200; } } static void type_wait(const char *type) { struct dpif_backer *backer; backer = shash_find_data(&all_dpif_backers, type); if (!backer) { /* This is not necessarily a problem, since backers are only * created on demand. */ return; } if (backer->governor) { governor_wait(backer->governor); } timer_wait(&backer->next_expiration); dpif_wait(backer->dpif); udpif_wait(backer->udpif); } /* Basic life-cycle. */ static int add_internal_flows(struct ofproto_dpif *); static struct ofproto * alloc(void) { struct ofproto_dpif *ofproto = xmalloc(sizeof *ofproto); return &ofproto->up; } static void dealloc(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); free(ofproto); } static void close_dpif_backer(struct dpif_backer *backer) { struct shash_node *node; ovs_assert(backer->refcount > 0); if (--backer->refcount) { return; } drop_key_clear(backer); hmap_destroy(&backer->drop_keys); udpif_destroy(backer->udpif); simap_destroy(&backer->tnl_backers); ovs_rwlock_destroy(&backer->odp_to_ofport_lock); hmap_destroy(&backer->odp_to_ofport_map); node = shash_find(&all_dpif_backers, backer->type); free(backer->type); shash_delete(&all_dpif_backers, node); dpif_close(backer->dpif); ovs_assert(hmap_is_empty(&backer->subfacets)); hmap_destroy(&backer->subfacets); governor_destroy(backer->governor); free(backer); } /* Datapath port slated for removal from datapath. */ struct odp_garbage { struct list list_node; odp_port_t odp_port; }; static int open_dpif_backer(const char *type, struct dpif_backer **backerp) { struct dpif_backer *backer; struct dpif_port_dump port_dump; struct dpif_port port; struct shash_node *node; struct list garbage_list; struct odp_garbage *garbage, *next; struct sset names; char *backer_name; const char *name; int error; backer = shash_find_data(&all_dpif_backers, type); if (backer) { backer->refcount++; *backerp = backer; return 0; } backer_name = xasprintf("ovs-%s", type); /* Remove any existing datapaths, since we assume we're the only * userspace controlling the datapath. */ sset_init(&names); dp_enumerate_names(type, &names); SSET_FOR_EACH(name, &names) { struct dpif *old_dpif; /* Don't remove our backer if it exists. */ if (!strcmp(name, backer_name)) { continue; } if (dpif_open(name, type, &old_dpif)) { VLOG_WARN("couldn't open old datapath %s to remove it", name); } else { dpif_delete(old_dpif); dpif_close(old_dpif); } } sset_destroy(&names); backer = xmalloc(sizeof *backer); error = dpif_create_and_open(backer_name, type, &backer->dpif); free(backer_name); if (error) { VLOG_ERR("failed to open datapath of type %s: %s", type, ovs_strerror(error)); free(backer); return error; } backer->udpif = udpif_create(backer, backer->dpif); backer->type = xstrdup(type); backer->governor = NULL; backer->refcount = 1; hmap_init(&backer->odp_to_ofport_map); ovs_rwlock_init(&backer->odp_to_ofport_lock); hmap_init(&backer->drop_keys); hmap_init(&backer->subfacets); timer_set_duration(&backer->next_expiration, 1000); backer->need_revalidate = 0; simap_init(&backer->tnl_backers); backer->recv_set_enable = !ofproto_get_flow_restore_wait(); *backerp = backer; if (backer->recv_set_enable) { dpif_flow_flush(backer->dpif); } /* Loop through the ports already on the datapath and remove any * that we don't need anymore. */ list_init(&garbage_list); dpif_port_dump_start(&port_dump, backer->dpif); while (dpif_port_dump_next(&port_dump, &port)) { node = shash_find(&init_ofp_ports, port.name); if (!node && strcmp(port.name, dpif_base_name(backer->dpif))) { garbage = xmalloc(sizeof *garbage); garbage->odp_port = port.port_no; list_push_front(&garbage_list, &garbage->list_node); } } dpif_port_dump_done(&port_dump); LIST_FOR_EACH_SAFE (garbage, next, list_node, &garbage_list) { dpif_port_del(backer->dpif, garbage->odp_port); list_remove(&garbage->list_node); free(garbage); } shash_add(&all_dpif_backers, type, backer); error = dpif_recv_set(backer->dpif, backer->recv_set_enable); if (error) { VLOG_ERR("failed to listen on datapath of type %s: %s", type, ovs_strerror(error)); close_dpif_backer(backer); return error; } udpif_recv_set(backer->udpif, n_handler_threads, backer->recv_set_enable); backer->n_handler_threads = n_handler_threads; backer->max_n_subfacet = 0; backer->created = time_msec(); backer->last_minute = backer->created; memset(&backer->hourly, 0, sizeof backer->hourly); memset(&backer->daily, 0, sizeof backer->daily); memset(&backer->lifetime, 0, sizeof backer->lifetime); backer->subfacet_add_count = 0; backer->subfacet_del_count = 0; backer->total_subfacet_add_count = 0; backer->total_subfacet_del_count = 0; backer->avg_n_subfacet = 0; backer->avg_subfacet_life = 0; return error; } static int construct(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct shash_node *node, *next; uint32_t max_ports; int error; error = open_dpif_backer(ofproto->up.type, &ofproto->backer); if (error) { return error; } max_ports = dpif_get_max_ports(ofproto->backer->dpif); ofproto_init_max_ports(ofproto_, MIN(max_ports, ofp_to_u16(OFPP_MAX))); ofproto->netflow = NULL; ofproto->sflow = NULL; ofproto->ipfix = NULL; ofproto->stp = NULL; hmap_init(&ofproto->bundles); ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME); ofproto->mbridge = mbridge_create(); ofproto->has_bonded_bundles = false; ovs_mutex_init(&ofproto->vsp_mutex); classifier_init(&ofproto->facets); ofproto->consistency_rl = LLONG_MIN; guarded_list_init(&ofproto->pins); ofproto_dpif_unixctl_init(); hmap_init(&ofproto->vlandev_map); hmap_init(&ofproto->realdev_vid_map); sset_init(&ofproto->ports); sset_init(&ofproto->ghost_ports); sset_init(&ofproto->port_poll_set); ofproto->port_poll_errno = 0; SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) { struct iface_hint *iface_hint = node->data; if (!strcmp(iface_hint->br_name, ofproto->up.name)) { /* Check if the datapath already has this port. */ if (dpif_port_exists(ofproto->backer->dpif, node->name)) { sset_add(&ofproto->ports, node->name); } free(iface_hint->br_name); free(iface_hint->br_type); free(iface_hint); shash_delete(&init_ofp_ports, node); } } hmap_insert(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node, hash_string(ofproto->up.name, 0)); memset(&ofproto->stats, 0, sizeof ofproto->stats); ofproto_init_tables(ofproto_, N_TABLES); error = add_internal_flows(ofproto); ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY; ofproto->n_hit = 0; ofproto->n_missed = 0; return error; } static int add_internal_flow(struct ofproto_dpif *ofproto, int id, const struct ofpbuf *ofpacts, struct rule_dpif **rulep) { struct ofputil_flow_mod fm; int error; match_init_catchall(&fm.match); fm.priority = 0; match_set_reg(&fm.match, 0, id); fm.new_cookie = htonll(0); fm.cookie = htonll(0); fm.cookie_mask = htonll(0); fm.modify_cookie = false; fm.table_id = TBL_INTERNAL; fm.command = OFPFC_ADD; fm.idle_timeout = 0; fm.hard_timeout = 0; fm.buffer_id = 0; fm.out_port = 0; fm.flags = 0; fm.ofpacts = ofpacts->data; fm.ofpacts_len = ofpacts->size; error = ofproto_flow_mod(&ofproto->up, &fm); if (error) { VLOG_ERR_RL(&rl, "failed to add internal flow %d (%s)", id, ofperr_to_string(error)); return error; } if (rule_dpif_lookup_in_table(ofproto, &fm.match.flow, NULL, TBL_INTERNAL, rulep)) { rule_dpif_unref(*rulep); } else { NOT_REACHED(); } return 0; } static int add_internal_flows(struct ofproto_dpif *ofproto) { struct ofpact_controller *controller; uint64_t ofpacts_stub[128 / 8]; struct ofpbuf ofpacts; int error; int id; ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); id = 1; controller = ofpact_put_CONTROLLER(&ofpacts); controller->max_len = UINT16_MAX; controller->controller_id = 0; controller->reason = OFPR_NO_MATCH; ofpact_pad(&ofpacts); error = add_internal_flow(ofproto, id++, &ofpacts, &ofproto->miss_rule); if (error) { return error; } ofpbuf_clear(&ofpacts); error = add_internal_flow(ofproto, id++, &ofpacts, &ofproto->no_packet_in_rule); if (error) { return error; } error = add_internal_flow(ofproto, id++, &ofpacts, &ofproto->drop_frags_rule); return error; } static void destruct(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct rule_dpif *rule, *next_rule; struct ofputil_packet_in *pin, *next_pin; struct facet *facet, *next_facet; struct cls_cursor cursor; struct oftable *table; struct list pins; ovs_rwlock_rdlock(&ofproto->facets.rwlock); cls_cursor_init(&cursor, &ofproto->facets, NULL); ovs_rwlock_unlock(&ofproto->facets.rwlock); CLS_CURSOR_FOR_EACH_SAFE (facet, next_facet, cr, &cursor) { facet_remove(facet); } ofproto->backer->need_revalidate = REV_RECONFIGURE; ovs_rwlock_wrlock(&xlate_rwlock); xlate_remove_ofproto(ofproto); ovs_rwlock_unlock(&xlate_rwlock); /* Discard any flow_miss_batches queued up for 'ofproto', avoiding a * use-after-free error. */ udpif_revalidate(ofproto->backer->udpif); hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node); OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) { struct cls_cursor cursor; ovs_rwlock_rdlock(&table->cls.rwlock); cls_cursor_init(&cursor, &table->cls, NULL); ovs_rwlock_unlock(&table->cls.rwlock); CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, up.cr, &cursor) { ofproto_rule_delete(&ofproto->up, &rule->up); } } guarded_list_pop_all(&ofproto->pins, &pins); LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) { list_remove(&pin->list_node); free(CONST_CAST(void *, pin->packet)); free(pin); } guarded_list_destroy(&ofproto->pins); mbridge_unref(ofproto->mbridge); netflow_destroy(ofproto->netflow); dpif_sflow_unref(ofproto->sflow); hmap_destroy(&ofproto->bundles); mac_learning_unref(ofproto->ml); classifier_destroy(&ofproto->facets); hmap_destroy(&ofproto->vlandev_map); hmap_destroy(&ofproto->realdev_vid_map); sset_destroy(&ofproto->ports); sset_destroy(&ofproto->ghost_ports); sset_destroy(&ofproto->port_poll_set); ovs_mutex_destroy(&ofproto->vsp_mutex); close_dpif_backer(ofproto->backer); } static int run_fast(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct ofputil_packet_in *pin, *next_pin; struct ofport_dpif *ofport; struct list pins; /* Do not perform any periodic activity required by 'ofproto' while * waiting for flow restore to complete. */ if (ofproto_get_flow_restore_wait()) { return 0; } guarded_list_pop_all(&ofproto->pins, &pins); LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) { connmgr_send_packet_in(ofproto->up.connmgr, pin); list_remove(&pin->list_node); free(CONST_CAST(void *, pin->packet)); free(pin); } HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) { port_run_fast(ofport); } return 0; } static int run(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct ofport_dpif *ofport; struct ofbundle *bundle; int error; if (mbridge_need_revalidate(ofproto->mbridge)) { ofproto->backer->need_revalidate = REV_RECONFIGURE; ovs_rwlock_wrlock(&ofproto->ml->rwlock); mac_learning_flush(ofproto->ml); ovs_rwlock_unlock(&ofproto->ml->rwlock); } /* Do not perform any periodic activity below required by 'ofproto' while * waiting for flow restore to complete. */ if (ofproto_get_flow_restore_wait()) { return 0; } error = run_fast(ofproto_); if (error) { return error; } if (ofproto->netflow) { if (netflow_run(ofproto->netflow)) { send_netflow_active_timeouts(ofproto); } } if (ofproto->sflow) { dpif_sflow_run(ofproto->sflow); } if (ofproto->ipfix) { dpif_ipfix_run(ofproto->ipfix); } HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) { port_run(ofport); } HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { bundle_run(bundle); } stp_run(ofproto); ovs_rwlock_wrlock(&ofproto->ml->rwlock); if (mac_learning_run(ofproto->ml)) { ofproto->backer->need_revalidate = REV_MAC_LEARNING; } ovs_rwlock_unlock(&ofproto->ml->rwlock); /* Check the consistency of a random facet, to aid debugging. */ ovs_rwlock_rdlock(&ofproto->facets.rwlock); if (time_msec() >= ofproto->consistency_rl && !classifier_is_empty(&ofproto->facets) && !ofproto->backer->need_revalidate) { struct cls_table *table; struct cls_rule *cr; struct facet *facet; ofproto->consistency_rl = time_msec() + 250; table = CONTAINER_OF(hmap_random_node(&ofproto->facets.tables), struct cls_table, hmap_node); cr = CONTAINER_OF(hmap_random_node(&table->rules), struct cls_rule, hmap_node); facet = CONTAINER_OF(cr, struct facet, cr); if (!facet_check_consistency(facet)) { ofproto->backer->need_revalidate = REV_INCONSISTENCY; } } ovs_rwlock_unlock(&ofproto->facets.rwlock); return 0; } static void wait(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct ofport_dpif *ofport; struct ofbundle *bundle; if (ofproto_get_flow_restore_wait()) { return; } if (ofproto->sflow) { dpif_sflow_wait(ofproto->sflow); } if (ofproto->ipfix) { dpif_ipfix_wait(ofproto->ipfix); } HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) { port_wait(ofport); } HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { bundle_wait(bundle); } if (ofproto->netflow) { netflow_wait(ofproto->netflow); } ovs_rwlock_rdlock(&ofproto->ml->rwlock); mac_learning_wait(ofproto->ml); ovs_rwlock_unlock(&ofproto->ml->rwlock); stp_wait(ofproto); if (ofproto->backer->need_revalidate) { /* Shouldn't happen, but if it does just go around again. */ VLOG_DBG_RL(&rl, "need revalidate in ofproto_wait_cb()"); poll_immediate_wake(); } } static void get_memory_usage(const struct ofproto *ofproto_, struct simap *usage) { const struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct cls_cursor cursor; size_t n_subfacets = 0; struct facet *facet; ovs_rwlock_rdlock(&ofproto->facets.rwlock); simap_increase(usage, "facets", classifier_count(&ofproto->facets)); ovs_rwlock_unlock(&ofproto->facets.rwlock); ovs_rwlock_rdlock(&ofproto->facets.rwlock); cls_cursor_init(&cursor, &ofproto->facets, NULL); CLS_CURSOR_FOR_EACH (facet, cr, &cursor) { n_subfacets += list_size(&facet->subfacets); } ovs_rwlock_unlock(&ofproto->facets.rwlock); simap_increase(usage, "subfacets", n_subfacets); } static void flush(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct subfacet *subfacet, *next_subfacet; struct subfacet *batch[SUBFACET_DESTROY_MAX_BATCH]; int n_batch; n_batch = 0; HMAP_FOR_EACH_SAFE (subfacet, next_subfacet, hmap_node, &ofproto->backer->subfacets) { if (subfacet->facet->ofproto != ofproto) { continue; } if (subfacet->path != SF_NOT_INSTALLED) { batch[n_batch++] = subfacet; if (n_batch >= SUBFACET_DESTROY_MAX_BATCH) { subfacet_destroy_batch(ofproto->backer, batch, n_batch); n_batch = 0; } } else { subfacet_destroy(subfacet); } } if (n_batch > 0) { subfacet_destroy_batch(ofproto->backer, batch, n_batch); } } static void get_features(struct ofproto *ofproto_ OVS_UNUSED, bool *arp_match_ip, enum ofputil_action_bitmap *actions) { *arp_match_ip = true; *actions = (OFPUTIL_A_OUTPUT | OFPUTIL_A_SET_VLAN_VID | OFPUTIL_A_SET_VLAN_PCP | OFPUTIL_A_STRIP_VLAN | OFPUTIL_A_SET_DL_SRC | OFPUTIL_A_SET_DL_DST | OFPUTIL_A_SET_NW_SRC | OFPUTIL_A_SET_NW_DST | OFPUTIL_A_SET_NW_TOS | OFPUTIL_A_SET_TP_SRC | OFPUTIL_A_SET_TP_DST | OFPUTIL_A_ENQUEUE); } static void get_tables(struct ofproto *ofproto_, struct ofp12_table_stats *ots) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct dpif_dp_stats s; uint64_t n_miss, n_no_pkt_in, n_bytes, n_dropped_frags; uint64_t n_lookup; strcpy(ots->name, "classifier"); dpif_get_dp_stats(ofproto->backer->dpif, &s); rule_get_stats(&ofproto->miss_rule->up, &n_miss, &n_bytes); rule_get_stats(&ofproto->no_packet_in_rule->up, &n_no_pkt_in, &n_bytes); rule_get_stats(&ofproto->drop_frags_rule->up, &n_dropped_frags, &n_bytes); n_lookup = s.n_hit + s.n_missed - n_dropped_frags; ots->lookup_count = htonll(n_lookup); ots->matched_count = htonll(n_lookup - n_miss - n_no_pkt_in); } static struct ofport * port_alloc(void) { struct ofport_dpif *port = xmalloc(sizeof *port); return &port->up; } static void port_dealloc(struct ofport *port_) { struct ofport_dpif *port = ofport_dpif_cast(port_); free(port); } static int port_construct(struct ofport *port_) { struct ofport_dpif *port = ofport_dpif_cast(port_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); const struct netdev *netdev = port->up.netdev; char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; struct dpif_port dpif_port; int error; ofproto->backer->need_revalidate = REV_RECONFIGURE; port->bundle = NULL; port->cfm = NULL; port->bfd = NULL; port->may_enable = true; port->stp_port = NULL; port->stp_state = STP_DISABLED; port->is_tunnel = false; port->peer = NULL; port->qdscp = NULL; port->n_qdscp = 0; port->realdev_ofp_port = 0; port->vlandev_vid = 0; port->carrier_seq = netdev_get_carrier_resets(netdev); if (netdev_vport_is_patch(netdev)) { /* By bailing out here, we don't submit the port to the sFlow module * to be considered for counter polling export. This is correct * because the patch port represents an interface that sFlow considers * to be "internal" to the switch as a whole, and therefore not an * candidate for counter polling. */ port->odp_port = ODPP_NONE; ofport_update_peer(port); return 0; } error = dpif_port_query_by_name(ofproto->backer->dpif, netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf), &dpif_port); if (error) { return error; } port->odp_port = dpif_port.port_no; if (netdev_get_tunnel_config(netdev)) { tnl_port_add(port, port->up.netdev, port->odp_port); port->is_tunnel = true; } else { /* Sanity-check that a mapping doesn't already exist. This * shouldn't happen for non-tunnel ports. */ if (odp_port_to_ofp_port(ofproto, port->odp_port) != OFPP_NONE) { VLOG_ERR("port %s already has an OpenFlow port number", dpif_port.name); dpif_port_destroy(&dpif_port); return EBUSY; } ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock); hmap_insert(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node, hash_odp_port(port->odp_port)); ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock); } dpif_port_destroy(&dpif_port); if (ofproto->sflow) { dpif_sflow_add_port(ofproto->sflow, port_, port->odp_port); } return 0; } static void port_destruct(struct ofport *port_) { struct ofport_dpif *port = ofport_dpif_cast(port_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); const char *devname = netdev_get_name(port->up.netdev); char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; const char *dp_port_name; ofproto->backer->need_revalidate = REV_RECONFIGURE; ovs_rwlock_wrlock(&xlate_rwlock); xlate_ofport_remove(port); ovs_rwlock_unlock(&xlate_rwlock); dp_port_name = netdev_vport_get_dpif_port(port->up.netdev, namebuf, sizeof namebuf); if (dpif_port_exists(ofproto->backer->dpif, dp_port_name)) { /* The underlying device is still there, so delete it. This * happens when the ofproto is being destroyed, since the caller * assumes that removal of attached ports will happen as part of * destruction. */ if (!port->is_tunnel) { dpif_port_del(ofproto->backer->dpif, port->odp_port); } } if (port->peer) { port->peer->peer = NULL; port->peer = NULL; } if (port->odp_port != ODPP_NONE && !port->is_tunnel) { ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock); hmap_remove(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node); ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock); } tnl_port_del(port); sset_find_and_delete(&ofproto->ports, devname); sset_find_and_delete(&ofproto->ghost_ports, devname); bundle_remove(port_); set_cfm(port_, NULL); set_bfd(port_, NULL); if (ofproto->sflow) { dpif_sflow_del_port(ofproto->sflow, port->odp_port); } free(port->qdscp); } static void port_modified(struct ofport *port_) { struct ofport_dpif *port = ofport_dpif_cast(port_); if (port->bundle && port->bundle->bond) { bond_slave_set_netdev(port->bundle->bond, port, port->up.netdev); } if (port->cfm) { cfm_set_netdev(port->cfm, port->up.netdev); } if (port->bfd) { bfd_set_netdev(port->bfd, port->up.netdev); } if (port->is_tunnel && tnl_port_reconfigure(port, port->up.netdev, port->odp_port)) { ofproto_dpif_cast(port->up.ofproto)->backer->need_revalidate = REV_RECONFIGURE; } ofport_update_peer(port); } static void port_reconfigured(struct ofport *port_, enum ofputil_port_config old_config) { struct ofport_dpif *port = ofport_dpif_cast(port_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); enum ofputil_port_config changed = old_config ^ port->up.pp.config; if (changed & (OFPUTIL_PC_NO_RECV | OFPUTIL_PC_NO_RECV_STP | OFPUTIL_PC_NO_FWD | OFPUTIL_PC_NO_FLOOD | OFPUTIL_PC_NO_PACKET_IN)) { ofproto->backer->need_revalidate = REV_RECONFIGURE; if (changed & OFPUTIL_PC_NO_FLOOD && port->bundle) { bundle_update(port->bundle); } } } static int set_sflow(struct ofproto *ofproto_, const struct ofproto_sflow_options *sflow_options) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct dpif_sflow *ds = ofproto->sflow; if (sflow_options) { if (!ds) { struct ofport_dpif *ofport; ds = ofproto->sflow = dpif_sflow_create(); HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) { dpif_sflow_add_port(ds, &ofport->up, ofport->odp_port); } ofproto->backer->need_revalidate = REV_RECONFIGURE; } dpif_sflow_set_options(ds, sflow_options); } else { if (ds) { dpif_sflow_unref(ds); ofproto->backer->need_revalidate = REV_RECONFIGURE; ofproto->sflow = NULL; } } return 0; } static int set_ipfix( struct ofproto *ofproto_, const struct ofproto_ipfix_bridge_exporter_options *bridge_exporter_options, const struct ofproto_ipfix_flow_exporter_options *flow_exporters_options, size_t n_flow_exporters_options) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct dpif_ipfix *di = ofproto->ipfix; bool has_options = bridge_exporter_options || flow_exporters_options; if (has_options && !di) { di = ofproto->ipfix = dpif_ipfix_create(); } if (di) { /* Call set_options in any case to cleanly flush the flow * caches in the last exporters that are to be destroyed. */ dpif_ipfix_set_options( di, bridge_exporter_options, flow_exporters_options, n_flow_exporters_options); if (!has_options) { dpif_ipfix_unref(di); ofproto->ipfix = NULL; } } return 0; } static int set_cfm(struct ofport *ofport_, const struct cfm_settings *s) { struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); int error; if (!s) { error = 0; } else { if (!ofport->cfm) { struct ofproto_dpif *ofproto; ofproto = ofproto_dpif_cast(ofport->up.ofproto); ofproto->backer->need_revalidate = REV_RECONFIGURE; ofport->cfm = cfm_create(ofport->up.netdev); } if (cfm_configure(ofport->cfm, s)) { return 0; } error = EINVAL; } cfm_unref(ofport->cfm); ofport->cfm = NULL; return error; } static bool get_cfm_status(const struct ofport *ofport_, struct ofproto_cfm_status *status) { struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); if (ofport->cfm) { status->faults = cfm_get_fault(ofport->cfm); status->remote_opstate = cfm_get_opup(ofport->cfm); status->health = cfm_get_health(ofport->cfm); cfm_get_remote_mpids(ofport->cfm, &status->rmps, &status->n_rmps); return true; } else { return false; } } static int set_bfd(struct ofport *ofport_, const struct smap *cfg) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto); struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); struct bfd *old; old = ofport->bfd; ofport->bfd = bfd_configure(old, netdev_get_name(ofport->up.netdev), cfg, ofport->up.netdev); if (ofport->bfd != old) { ofproto->backer->need_revalidate = REV_RECONFIGURE; } return 0; } static int get_bfd_status(struct ofport *ofport_, struct smap *smap) { struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); if (ofport->bfd) { bfd_get_status(ofport->bfd, smap); return 0; } else { return ENOENT; } } /* Spanning Tree. */ static void send_bpdu_cb(struct ofpbuf *pkt, int port_num, void *ofproto_) { struct ofproto_dpif *ofproto = ofproto_; struct stp_port *sp = stp_get_port(ofproto->stp, port_num); struct ofport_dpif *ofport; ofport = stp_port_get_aux(sp); if (!ofport) { VLOG_WARN_RL(&rl, "%s: cannot send BPDU on unknown port %d", ofproto->up.name, port_num); } else { struct eth_header *eth = pkt->l2; netdev_get_etheraddr(ofport->up.netdev, eth->eth_src); if (eth_addr_is_zero(eth->eth_src)) { VLOG_WARN_RL(&rl, "%s: cannot send BPDU on port %d " "with unknown MAC", ofproto->up.name, port_num); } else { send_packet(ofport, pkt); } } ofpbuf_delete(pkt); } /* Configures STP on 'ofproto_' using the settings defined in 's'. */ static int set_stp(struct ofproto *ofproto_, const struct ofproto_stp_settings *s) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); /* Only revalidate flows if the configuration changed. */ if (!s != !ofproto->stp) { ofproto->backer->need_revalidate = REV_RECONFIGURE; } if (s) { if (!ofproto->stp) { ofproto->stp = stp_create(ofproto_->name, s->system_id, send_bpdu_cb, ofproto); ofproto->stp_last_tick = time_msec(); } stp_set_bridge_id(ofproto->stp, s->system_id); stp_set_bridge_priority(ofproto->stp, s->priority); stp_set_hello_time(ofproto->stp, s->hello_time); stp_set_max_age(ofproto->stp, s->max_age); stp_set_forward_delay(ofproto->stp, s->fwd_delay); } else { struct ofport *ofport; HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) { set_stp_port(ofport, NULL); } stp_unref(ofproto->stp); ofproto->stp = NULL; } return 0; } static int get_stp_status(struct ofproto *ofproto_, struct ofproto_stp_status *s) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); if (ofproto->stp) { s->enabled = true; s->bridge_id = stp_get_bridge_id(ofproto->stp); s->designated_root = stp_get_designated_root(ofproto->stp); s->root_path_cost = stp_get_root_path_cost(ofproto->stp); } else { s->enabled = false; } return 0; } static void update_stp_port_state(struct ofport_dpif *ofport) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); enum stp_state state; /* Figure out new state. */ state = ofport->stp_port ? stp_port_get_state(ofport->stp_port) : STP_DISABLED; /* Update state. */ if (ofport->stp_state != state) { enum ofputil_port_state of_state; bool fwd_change; VLOG_DBG_RL(&rl, "port %s: STP state changed from %s to %s", netdev_get_name(ofport->up.netdev), stp_state_name(ofport->stp_state), stp_state_name(state)); if (stp_learn_in_state(ofport->stp_state) != stp_learn_in_state(state)) { /* xxx Learning action flows should also be flushed. */ ovs_rwlock_wrlock(&ofproto->ml->rwlock); mac_learning_flush(ofproto->ml); ovs_rwlock_unlock(&ofproto->ml->rwlock); } fwd_change = stp_forward_in_state(ofport->stp_state) != stp_forward_in_state(state); ofproto->backer->need_revalidate = REV_STP; ofport->stp_state = state; ofport->stp_state_entered = time_msec(); if (fwd_change && ofport->bundle) { bundle_update(ofport->bundle); } /* Update the STP state bits in the OpenFlow port description. */ of_state = ofport->up.pp.state & ~OFPUTIL_PS_STP_MASK; of_state |= (state == STP_LISTENING ? OFPUTIL_PS_STP_LISTEN : state == STP_LEARNING ? OFPUTIL_PS_STP_LEARN : state == STP_FORWARDING ? OFPUTIL_PS_STP_FORWARD : state == STP_BLOCKING ? OFPUTIL_PS_STP_BLOCK : 0); ofproto_port_set_state(&ofport->up, of_state); } } /* Configures STP on 'ofport_' using the settings defined in 's'. The * caller is responsible for assigning STP port numbers and ensuring * there are no duplicates. */ static int set_stp_port(struct ofport *ofport_, const struct ofproto_port_stp_settings *s) { struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); struct stp_port *sp = ofport->stp_port; if (!s || !s->enable) { if (sp) { ofport->stp_port = NULL; stp_port_disable(sp); update_stp_port_state(ofport); } return 0; } else if (sp && stp_port_no(sp) != s->port_num && ofport == stp_port_get_aux(sp)) { /* The port-id changed, so disable the old one if it's not * already in use by another port. */ stp_port_disable(sp); } sp = ofport->stp_port = stp_get_port(ofproto->stp, s->port_num); stp_port_enable(sp); stp_port_set_aux(sp, ofport); stp_port_set_priority(sp, s->priority); stp_port_set_path_cost(sp, s->path_cost); update_stp_port_state(ofport); return 0; } static int get_stp_port_status(struct ofport *ofport_, struct ofproto_port_stp_status *s) { struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); struct stp_port *sp = ofport->stp_port; if (!ofproto->stp || !sp) { s->enabled = false; return 0; } s->enabled = true; s->port_id = stp_port_get_id(sp); s->state = stp_port_get_state(sp); s->sec_in_state = (time_msec() - ofport->stp_state_entered) / 1000; s->role = stp_port_get_role(sp); stp_port_get_counts(sp, &s->tx_count, &s->rx_count, &s->error_count); return 0; } static void stp_run(struct ofproto_dpif *ofproto) { if (ofproto->stp) { long long int now = time_msec(); long long int elapsed = now - ofproto->stp_last_tick; struct stp_port *sp; if (elapsed > 0) { stp_tick(ofproto->stp, MIN(INT_MAX, elapsed)); ofproto->stp_last_tick = now; } while (stp_get_changed_port(ofproto->stp, &sp)) { struct ofport_dpif *ofport = stp_port_get_aux(sp); if (ofport) { update_stp_port_state(ofport); } } if (stp_check_and_reset_fdb_flush(ofproto->stp)) { ovs_rwlock_wrlock(&ofproto->ml->rwlock); mac_learning_flush(ofproto->ml); ovs_rwlock_unlock(&ofproto->ml->rwlock); } } } static void stp_wait(struct ofproto_dpif *ofproto) { if (ofproto->stp) { poll_timer_wait(1000); } } static int set_queues(struct ofport *ofport_, const struct ofproto_port_queue *qdscp, size_t n_qdscp) { struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); if (ofport->n_qdscp != n_qdscp || (n_qdscp && memcmp(ofport->qdscp, qdscp, n_qdscp * sizeof *qdscp))) { ofproto->backer->need_revalidate = REV_RECONFIGURE; free(ofport->qdscp); ofport->qdscp = n_qdscp ? xmemdup(qdscp, n_qdscp * sizeof *qdscp) : NULL; ofport->n_qdscp = n_qdscp; } return 0; } /* Bundles. */ /* Expires all MAC learning entries associated with 'bundle' and forces its * ofproto to revalidate every flow. * * Normally MAC learning entries are removed only from the ofproto associated * with 'bundle', but if 'all_ofprotos' is true, then the MAC learning entries * are removed from every ofproto. When patch ports and SLB bonds are in use * and a VM migration happens and the gratuitous ARPs are somehow lost, this * avoids a MAC_ENTRY_IDLE_TIME delay before the migrated VM can communicate * with the host from which it migrated. */ static void bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos) { struct ofproto_dpif *ofproto = bundle->ofproto; struct mac_learning *ml = ofproto->ml; struct mac_entry *mac, *next_mac; ofproto->backer->need_revalidate = REV_RECONFIGURE; ovs_rwlock_wrlock(&ml->rwlock); LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) { if (mac->port.p == bundle) { if (all_ofprotos) { struct ofproto_dpif *o; HMAP_FOR_EACH (o, all_ofproto_dpifs_node, &all_ofproto_dpifs) { if (o != ofproto) { struct mac_entry *e; ovs_rwlock_wrlock(&o->ml->rwlock); e = mac_learning_lookup(o->ml, mac->mac, mac->vlan); if (e) { mac_learning_expire(o->ml, e); } ovs_rwlock_unlock(&o->ml->rwlock); } } } mac_learning_expire(ml, mac); } } ovs_rwlock_unlock(&ml->rwlock); } static struct ofbundle * bundle_lookup(const struct ofproto_dpif *ofproto, void *aux) { struct ofbundle *bundle; HMAP_FOR_EACH_IN_BUCKET (bundle, hmap_node, hash_pointer(aux, 0), &ofproto->bundles) { if (bundle->aux == aux) { return bundle; } } return NULL; } static void bundle_update(struct ofbundle *bundle) { struct ofport_dpif *port; bundle->floodable = true; LIST_FOR_EACH (port, bundle_node, &bundle->ports) { if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD || !stp_forward_in_state(port->stp_state)) { bundle->floodable = false; break; } } } static void bundle_del_port(struct ofport_dpif *port) { struct ofbundle *bundle = port->bundle; bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE; list_remove(&port->bundle_node); port->bundle = NULL; if (bundle->lacp) { lacp_slave_unregister(bundle->lacp, port); } if (bundle->bond) { bond_slave_unregister(bundle->bond, port); } bundle_update(bundle); } static bool bundle_add_port(struct ofbundle *bundle, ofp_port_t ofp_port, struct lacp_slave_settings *lacp) { struct ofport_dpif *port; port = get_ofp_port(bundle->ofproto, ofp_port); if (!port) { return false; } if (port->bundle != bundle) { bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE; if (port->bundle) { bundle_remove(&port->up); } port->bundle = bundle; list_push_back(&bundle->ports, &port->bundle_node); if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD || !stp_forward_in_state(port->stp_state)) { bundle->floodable = false; } } if (lacp) { bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE; lacp_slave_register(bundle->lacp, port, lacp); } return true; } static void bundle_destroy(struct ofbundle *bundle) { struct ofproto_dpif *ofproto; struct ofport_dpif *port, *next_port; if (!bundle) { return; } ofproto = bundle->ofproto; mbridge_unregister_bundle(ofproto->mbridge, bundle->aux); ovs_rwlock_wrlock(&xlate_rwlock); xlate_bundle_remove(bundle); ovs_rwlock_unlock(&xlate_rwlock); LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) { bundle_del_port(port); } bundle_flush_macs(bundle, true); hmap_remove(&ofproto->bundles, &bundle->hmap_node); free(bundle->name); free(bundle->trunks); lacp_unref(bundle->lacp); bond_unref(bundle->bond); free(bundle); } static int bundle_set(struct ofproto *ofproto_, void *aux, const struct ofproto_bundle_settings *s) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); bool need_flush = false; struct ofport_dpif *port; struct ofbundle *bundle; unsigned long *trunks; int vlan; size_t i; bool ok; if (!s) { bundle_destroy(bundle_lookup(ofproto, aux)); return 0; } ovs_assert(s->n_slaves == 1 || s->bond != NULL); ovs_assert((s->lacp != NULL) == (s->lacp_slaves != NULL)); bundle = bundle_lookup(ofproto, aux); if (!bundle) { bundle = xmalloc(sizeof *bundle); bundle->ofproto = ofproto; hmap_insert(&ofproto->bundles, &bundle->hmap_node, hash_pointer(aux, 0)); bundle->aux = aux; bundle->name = NULL; list_init(&bundle->ports); bundle->vlan_mode = PORT_VLAN_TRUNK; bundle->vlan = -1; bundle->trunks = NULL; bundle->use_priority_tags = s->use_priority_tags; bundle->lacp = NULL; bundle->bond = NULL; bundle->floodable = true; mbridge_register_bundle(ofproto->mbridge, bundle); } if (!bundle->name || strcmp(s->name, bundle->name)) { free(bundle->name); bundle->name = xstrdup(s->name); } /* LACP. */ if (s->lacp) { if (!bundle->lacp) { ofproto->backer->need_revalidate = REV_RECONFIGURE; bundle->lacp = lacp_create(); } lacp_configure(bundle->lacp, s->lacp); } else { lacp_unref(bundle->lacp); bundle->lacp = NULL; } /* Update set of ports. */ ok = true; for (i = 0; i < s->n_slaves; i++) { if (!bundle_add_port(bundle, s->slaves[i], s->lacp ? &s->lacp_slaves[i] : NULL)) { ok = false; } } if (!ok || list_size(&bundle->ports) != s->n_slaves) { struct ofport_dpif *next_port; LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) { for (i = 0; i < s->n_slaves; i++) { if (s->slaves[i] == port->up.ofp_port) { goto found; } } bundle_del_port(port); found: ; } } ovs_assert(list_size(&bundle->ports) <= s->n_slaves); if (list_is_empty(&bundle->ports)) { bundle_destroy(bundle); return EINVAL; } /* Set VLAN tagging mode */ if (s->vlan_mode != bundle->vlan_mode || s->use_priority_tags != bundle->use_priority_tags) { bundle->vlan_mode = s->vlan_mode; bundle->use_priority_tags = s->use_priority_tags; need_flush = true; } /* Set VLAN tag. */ vlan = (s->vlan_mode == PORT_VLAN_TRUNK ? -1 : s->vlan >= 0 && s->vlan <= 4095 ? s->vlan : 0); if (vlan != bundle->vlan) { bundle->vlan = vlan; need_flush = true; } /* Get trunked VLANs. */ switch (s->vlan_mode) { case PORT_VLAN_ACCESS: trunks = NULL; break; case PORT_VLAN_TRUNK: trunks = CONST_CAST(unsigned long *, s->trunks); break; case PORT_VLAN_NATIVE_UNTAGGED: case PORT_VLAN_NATIVE_TAGGED: if (vlan != 0 && (!s->trunks || !bitmap_is_set(s->trunks, vlan) || bitmap_is_set(s->trunks, 0))) { /* Force trunking the native VLAN and prohibit trunking VLAN 0. */ if (s->trunks) { trunks = bitmap_clone(s->trunks, 4096); } else { trunks = bitmap_allocate1(4096); } bitmap_set1(trunks, vlan); bitmap_set0(trunks, 0); } else { trunks = CONST_CAST(unsigned long *, s->trunks); } break; default: NOT_REACHED(); } if (!vlan_bitmap_equal(trunks, bundle->trunks)) { free(bundle->trunks); if (trunks == s->trunks) { bundle->trunks = vlan_bitmap_clone(trunks); } else { bundle->trunks = trunks; trunks = NULL; } need_flush = true; } if (trunks != s->trunks) { free(trunks); } /* Bonding. */ if (!list_is_short(&bundle->ports)) { bundle->ofproto->has_bonded_bundles = true; if (bundle->bond) { if (bond_reconfigure(bundle->bond, s->bond)) { ofproto->backer->need_revalidate = REV_RECONFIGURE; } } else { bundle->bond = bond_create(s->bond); ofproto->backer->need_revalidate = REV_RECONFIGURE; } LIST_FOR_EACH (port, bundle_node, &bundle->ports) { bond_slave_register(bundle->bond, port, port->up.netdev); } } else { bond_unref(bundle->bond); bundle->bond = NULL; } /* If we changed something that would affect MAC learning, un-learn * everything on this port and force flow revalidation. */ if (need_flush) { bundle_flush_macs(bundle, false); } return 0; } static void bundle_remove(struct ofport *port_) { struct ofport_dpif *port = ofport_dpif_cast(port_); struct ofbundle *bundle = port->bundle; if (bundle) { bundle_del_port(port); if (list_is_empty(&bundle->ports)) { bundle_destroy(bundle); } else if (list_is_short(&bundle->ports)) { bond_unref(bundle->bond); bundle->bond = NULL; } } } static void send_pdu_cb(void *port_, const void *pdu, size_t pdu_size) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10); struct ofport_dpif *port = port_; uint8_t ea[ETH_ADDR_LEN]; int error; error = netdev_get_etheraddr(port->up.netdev, ea); if (!error) { struct ofpbuf packet; void *packet_pdu; ofpbuf_init(&packet, 0); packet_pdu = eth_compose(&packet, eth_addr_lacp, ea, ETH_TYPE_LACP, pdu_size); memcpy(packet_pdu, pdu, pdu_size); send_packet(port, &packet); ofpbuf_uninit(&packet); } else { VLOG_ERR_RL(&rl, "port %s: cannot obtain Ethernet address of iface " "%s (%s)", port->bundle->name, netdev_get_name(port->up.netdev), ovs_strerror(error)); } } static void bundle_send_learning_packets(struct ofbundle *bundle) { struct ofproto_dpif *ofproto = bundle->ofproto; struct ofpbuf *learning_packet; int error, n_packets, n_errors; struct mac_entry *e; struct list packets; list_init(&packets); ovs_rwlock_rdlock(&ofproto->ml->rwlock); LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) { if (e->port.p != bundle) { void *port_void; learning_packet = bond_compose_learning_packet(bundle->bond, e->mac, e->vlan, &port_void); learning_packet->private_p = port_void; list_push_back(&packets, &learning_packet->list_node); } } ovs_rwlock_unlock(&ofproto->ml->rwlock); error = n_packets = n_errors = 0; LIST_FOR_EACH (learning_packet, list_node, &packets) { int ret; ret = send_packet(learning_packet->private_p, learning_packet); if (ret) { error = ret; n_errors++; } n_packets++; } ofpbuf_list_delete(&packets); if (n_errors) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_WARN_RL(&rl, "bond %s: %d errors sending %d gratuitous learning " "packets, last error was: %s", bundle->name, n_errors, n_packets, ovs_strerror(error)); } else { VLOG_DBG("bond %s: sent %d gratuitous learning packets", bundle->name, n_packets); } } static void bundle_run(struct ofbundle *bundle) { if (bundle->lacp) { lacp_run(bundle->lacp, send_pdu_cb); } if (bundle->bond) { struct ofport_dpif *port; LIST_FOR_EACH (port, bundle_node, &bundle->ports) { bond_slave_set_may_enable(bundle->bond, port, port->may_enable); } if (bond_run(bundle->bond, lacp_status(bundle->lacp))) { bundle->ofproto->backer->need_revalidate = REV_BOND; } if (bond_should_send_learning_packets(bundle->bond)) { bundle_send_learning_packets(bundle); } } } static void bundle_wait(struct ofbundle *bundle) { if (bundle->lacp) { lacp_wait(bundle->lacp); } if (bundle->bond) { bond_wait(bundle->bond); } } /* Mirrors. */ static int mirror_set__(struct ofproto *ofproto_, void *aux, const struct ofproto_mirror_settings *s) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct ofbundle **srcs, **dsts; int error; size_t i; if (!s) { mirror_destroy(ofproto->mbridge, aux); return 0; } srcs = xmalloc(s->n_srcs * sizeof *srcs); dsts = xmalloc(s->n_dsts * sizeof *dsts); for (i = 0; i < s->n_srcs; i++) { srcs[i] = bundle_lookup(ofproto, s->srcs[i]); } for (i = 0; i < s->n_dsts; i++) { dsts[i] = bundle_lookup(ofproto, s->dsts[i]); } error = mirror_set(ofproto->mbridge, aux, s->name, srcs, s->n_srcs, dsts, s->n_dsts, s->src_vlans, bundle_lookup(ofproto, s->out_bundle), s->out_vlan); free(srcs); free(dsts); return error; } static int mirror_get_stats__(struct ofproto *ofproto, void *aux, uint64_t *packets, uint64_t *bytes) { push_all_stats(); return mirror_get_stats(ofproto_dpif_cast(ofproto)->mbridge, aux, packets, bytes); } static int set_flood_vlans(struct ofproto *ofproto_, unsigned long *flood_vlans) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); ovs_rwlock_wrlock(&ofproto->ml->rwlock); if (mac_learning_set_flood_vlans(ofproto->ml, flood_vlans)) { mac_learning_flush(ofproto->ml); } ovs_rwlock_unlock(&ofproto->ml->rwlock); return 0; } static bool is_mirror_output_bundle(const struct ofproto *ofproto_, void *aux) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct ofbundle *bundle = bundle_lookup(ofproto, aux); return bundle && mirror_bundle_out(ofproto->mbridge, bundle) != 0; } static void forward_bpdu_changed(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); ofproto->backer->need_revalidate = REV_RECONFIGURE; } static void set_mac_table_config(struct ofproto *ofproto_, unsigned int idle_time, size_t max_entries) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); ovs_rwlock_wrlock(&ofproto->ml->rwlock); mac_learning_set_idle_time(ofproto->ml, idle_time); mac_learning_set_max_entries(ofproto->ml, max_entries); ovs_rwlock_unlock(&ofproto->ml->rwlock); } /* Ports. */ static struct ofport_dpif * get_ofp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port) { struct ofport *ofport = ofproto_get_port(&ofproto->up, ofp_port); return ofport ? ofport_dpif_cast(ofport) : NULL; } static struct ofport_dpif * get_odp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port) { struct ofport_dpif *port = odp_port_to_ofport(ofproto->backer, odp_port); return port && &ofproto->up == port->up.ofproto ? port : NULL; } static void ofproto_port_from_dpif_port(struct ofproto_dpif *ofproto, struct ofproto_port *ofproto_port, struct dpif_port *dpif_port) { ofproto_port->name = dpif_port->name; ofproto_port->type = dpif_port->type; ofproto_port->ofp_port = odp_port_to_ofp_port(ofproto, dpif_port->port_no); } static void ofport_update_peer(struct ofport_dpif *ofport) { const struct ofproto_dpif *ofproto; struct dpif_backer *backer; char *peer_name; if (!netdev_vport_is_patch(ofport->up.netdev)) { return; } backer = ofproto_dpif_cast(ofport->up.ofproto)->backer; backer->need_revalidate = REV_RECONFIGURE; if (ofport->peer) { ofport->peer->peer = NULL; ofport->peer = NULL; } peer_name = netdev_vport_patch_peer(ofport->up.netdev); if (!peer_name) { return; } HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { struct ofport *peer_ofport; struct ofport_dpif *peer; char *peer_peer; if (ofproto->backer != backer) { continue; } peer_ofport = shash_find_data(&ofproto->up.port_by_name, peer_name); if (!peer_ofport) { continue; } peer = ofport_dpif_cast(peer_ofport); peer_peer = netdev_vport_patch_peer(peer->up.netdev); if (peer_peer && !strcmp(netdev_get_name(ofport->up.netdev), peer_peer)) { ofport->peer = peer; ofport->peer->peer = ofport; } free(peer_peer); break; } free(peer_name); } static void port_run_fast(struct ofport_dpif *ofport) { if (ofport->cfm && cfm_should_send_ccm(ofport->cfm)) { struct ofpbuf packet; ofpbuf_init(&packet, 0); cfm_compose_ccm(ofport->cfm, &packet, ofport->up.pp.hw_addr); send_packet(ofport, &packet); ofpbuf_uninit(&packet); } if (ofport->bfd && bfd_should_send_packet(ofport->bfd)) { struct ofpbuf packet; ofpbuf_init(&packet, 0); bfd_put_packet(ofport->bfd, &packet, ofport->up.pp.hw_addr); send_packet(ofport, &packet); ofpbuf_uninit(&packet); } } static void port_run(struct ofport_dpif *ofport) { long long int carrier_seq = netdev_get_carrier_resets(ofport->up.netdev); bool carrier_changed = carrier_seq != ofport->carrier_seq; bool enable = netdev_get_carrier(ofport->up.netdev); bool cfm_enable = false; bool bfd_enable = false; ofport->carrier_seq = carrier_seq; port_run_fast(ofport); if (ofport->cfm) { int cfm_opup = cfm_get_opup(ofport->cfm); cfm_run(ofport->cfm); cfm_enable = !cfm_get_fault(ofport->cfm); if (cfm_opup >= 0) { cfm_enable = cfm_enable && cfm_opup; } } if (ofport->bfd) { bfd_run(ofport->bfd); bfd_enable = bfd_forwarding(ofport->bfd); } if (ofport->bfd || ofport->cfm) { enable = enable && (cfm_enable || bfd_enable); } if (ofport->bundle) { enable = enable && lacp_slave_may_enable(ofport->bundle->lacp, ofport); if (carrier_changed) { lacp_slave_carrier_changed(ofport->bundle->lacp, ofport); } } if (ofport->may_enable != enable) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); ofproto->backer->need_revalidate = REV_PORT_TOGGLED; } ofport->may_enable = enable; } static void port_wait(struct ofport_dpif *ofport) { if (ofport->cfm) { cfm_wait(ofport->cfm); } if (ofport->bfd) { bfd_wait(ofport->bfd); } } static int port_query_by_name(const struct ofproto *ofproto_, const char *devname, struct ofproto_port *ofproto_port) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct dpif_port dpif_port; int error; if (sset_contains(&ofproto->ghost_ports, devname)) { const char *type = netdev_get_type_from_name(devname); /* We may be called before ofproto->up.port_by_name is populated with * the appropriate ofport. For this reason, we must get the name and * type from the netdev layer directly. */ if (type) { const struct ofport *ofport; ofport = shash_find_data(&ofproto->up.port_by_name, devname); ofproto_port->ofp_port = ofport ? ofport->ofp_port : OFPP_NONE; ofproto_port->name = xstrdup(devname); ofproto_port->type = xstrdup(type); return 0; } return ENODEV; } if (!sset_contains(&ofproto->ports, devname)) { return ENODEV; } error = dpif_port_query_by_name(ofproto->backer->dpif, devname, &dpif_port); if (!error) { ofproto_port_from_dpif_port(ofproto, ofproto_port, &dpif_port); } return error; } static int port_add(struct ofproto *ofproto_, struct netdev *netdev) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); const char *devname = netdev_get_name(netdev); char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; const char *dp_port_name; if (netdev_vport_is_patch(netdev)) { sset_add(&ofproto->ghost_ports, netdev_get_name(netdev)); return 0; } dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf); if (!dpif_port_exists(ofproto->backer->dpif, dp_port_name)) { odp_port_t port_no = ODPP_NONE; int error; error = dpif_port_add(ofproto->backer->dpif, netdev, &port_no); if (error) { return error; } if (netdev_get_tunnel_config(netdev)) { simap_put(&ofproto->backer->tnl_backers, dp_port_name, odp_to_u32(port_no)); } } if (netdev_get_tunnel_config(netdev)) { sset_add(&ofproto->ghost_ports, devname); } else { sset_add(&ofproto->ports, devname); } return 0; } static int port_del(struct ofproto *ofproto_, ofp_port_t ofp_port) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port); int error = 0; if (!ofport) { return 0; } sset_find_and_delete(&ofproto->ghost_ports, netdev_get_name(ofport->up.netdev)); ofproto->backer->need_revalidate = REV_RECONFIGURE; if (!ofport->is_tunnel && !netdev_vport_is_patch(ofport->up.netdev)) { error = dpif_port_del(ofproto->backer->dpif, ofport->odp_port); if (!error) { /* The caller is going to close ofport->up.netdev. If this is a * bonded port, then the bond is using that netdev, so remove it * from the bond. The client will need to reconfigure everything * after deleting ports, so then the slave will get re-added. */ bundle_remove(&ofport->up); } } return error; } static int port_get_stats(const struct ofport *ofport_, struct netdev_stats *stats) { struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); int error; push_all_stats(); error = netdev_get_stats(ofport->up.netdev, stats); if (!error && ofport_->ofp_port == OFPP_LOCAL) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); /* ofproto->stats.tx_packets represents packets that we created * internally and sent to some port (e.g. packets sent with * send_packet()). Account for them as if they had come from * OFPP_LOCAL and got forwarded. */ if (stats->rx_packets != UINT64_MAX) { stats->rx_packets += ofproto->stats.tx_packets; } if (stats->rx_bytes != UINT64_MAX) { stats->rx_bytes += ofproto->stats.tx_bytes; } /* ofproto->stats.rx_packets represents packets that were received on * some port and we processed internally and dropped (e.g. STP). * Account for them as if they had been forwarded to OFPP_LOCAL. */ if (stats->tx_packets != UINT64_MAX) { stats->tx_packets += ofproto->stats.rx_packets; } if (stats->tx_bytes != UINT64_MAX) { stats->tx_bytes += ofproto->stats.rx_bytes; } } return error; } struct port_dump_state { uint32_t bucket; uint32_t offset; bool ghost; struct ofproto_port port; bool has_port; }; static int port_dump_start(const struct ofproto *ofproto_ OVS_UNUSED, void **statep) { *statep = xzalloc(sizeof(struct port_dump_state)); return 0; } static int port_dump_next(const struct ofproto *ofproto_, void *state_, struct ofproto_port *port) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct port_dump_state *state = state_; const struct sset *sset; struct sset_node *node; if (state->has_port) { ofproto_port_destroy(&state->port); state->has_port = false; } sset = state->ghost ? &ofproto->ghost_ports : &ofproto->ports; while ((node = sset_at_position(sset, &state->bucket, &state->offset))) { int error; error = port_query_by_name(ofproto_, node->name, &state->port); if (!error) { *port = state->port; state->has_port = true; return 0; } else if (error != ENODEV) { return error; } } if (!state->ghost) { state->ghost = true; state->bucket = 0; state->offset = 0; return port_dump_next(ofproto_, state_, port); } return EOF; } static int port_dump_done(const struct ofproto *ofproto_ OVS_UNUSED, void *state_) { struct port_dump_state *state = state_; if (state->has_port) { ofproto_port_destroy(&state->port); } free(state); return 0; } static int port_poll(const struct ofproto *ofproto_, char **devnamep) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); if (ofproto->port_poll_errno) { int error = ofproto->port_poll_errno; ofproto->port_poll_errno = 0; return error; } if (sset_is_empty(&ofproto->port_poll_set)) { return EAGAIN; } *devnamep = sset_pop(&ofproto->port_poll_set); return 0; } static void port_poll_wait(const struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); dpif_port_poll_wait(ofproto->backer->dpif); } static int port_is_lacp_current(const struct ofport *ofport_) { const struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); return (ofport->bundle && ofport->bundle->lacp ? lacp_slave_is_current(ofport->bundle->lacp, ofport) : -1); } /* Upcall handling. */ struct flow_miss_op { struct dpif_op dpif_op; uint64_t slow_stub[128 / 8]; /* Buffer for compose_slow_path() */ struct xlate_out xout; bool xout_garbage; /* 'xout' needs to be uninitialized? */ struct ofpbuf mask; /* Flow mask for "put" ops. */ struct odputil_keybuf maskbuf; /* If this is a "put" op, then a pointer to the subfacet that should * be marked as uninstalled if the operation fails. */ struct subfacet *subfacet; }; /* Figures out whether a flow that missed in 'ofproto', whose details are in * 'miss' masked by 'wc', is likely to be worth tracking in detail in userspace * and (usually) installing a datapath flow. The answer is usually "yes" (a * return value of true). However, for short flows the cost of bookkeeping is * much higher than the benefits, so when the datapath holds a large number of * flows we impose some heuristics to decide which flows are likely to be worth * tracking. */ static bool flow_miss_should_make_facet(struct flow_miss *miss) { struct dpif_backer *backer = miss->ofproto->backer; uint32_t hash; switch (flow_miss_model) { case OFPROTO_HANDLE_MISS_AUTO: break; case OFPROTO_HANDLE_MISS_WITH_FACETS: return true; case OFPROTO_HANDLE_MISS_WITHOUT_FACETS: return false; } if (!backer->governor) { size_t n_subfacets; n_subfacets = hmap_count(&backer->subfacets); if (n_subfacets * 2 <= flow_eviction_threshold) { return true; } backer->governor = governor_create(); } hash = flow_hash_in_wildcards(&miss->flow, &miss->xout.wc, 0); return governor_should_install_flow(backer->governor, hash, list_size(&miss->packets)); } /* Handles 'miss', which matches 'facet'. May add any required datapath * operations to 'ops', incrementing '*n_ops' for each new op. * * All of the packets in 'miss' are considered to have arrived at time * 'miss->stats.used'. This is really important only for new facets: if we * just called time_msec() here, then the new subfacet or its packets could * look (occasionally) as though it was used some time after the facet was * used. That can make a one-packet flow look like it has a nonzero duration, * which looks odd in e.g. NetFlow statistics. */ static void handle_flow_miss_with_facet(struct flow_miss *miss, struct facet *facet, struct flow_miss_op *ops, size_t *n_ops) { enum subfacet_path want_path; struct subfacet *subfacet; facet->packet_count += miss->stats.n_packets; facet->prev_packet_count += miss->stats.n_packets; facet->byte_count += miss->stats.n_bytes; facet->prev_byte_count += miss->stats.n_bytes; want_path = facet->xout.slow ? SF_SLOW_PATH : SF_FAST_PATH; /* Don't install the flow if it's the result of the "userspace" * action for an already installed facet. This can occur when a * datapath flow with wildcards has a "userspace" action and flows * sent to userspace result in a different subfacet, which will then * be rejected as overlapping by the datapath. */ if (miss->upcall_type == DPIF_UC_ACTION && !list_is_empty(&facet->subfacets)) { return; } subfacet = subfacet_create(facet, miss); if (subfacet->path != want_path) { struct flow_miss_op *op = &ops[(*n_ops)++]; struct dpif_flow_put *put = &op->dpif_op.u.flow_put; subfacet->path = want_path; ofpbuf_use_stack(&op->mask, &op->maskbuf, sizeof op->maskbuf); if (enable_megaflows) { mask_to_netlink_attr(&op->mask, facet->ofproto, &facet->xout.wc.masks, &miss->flow); } op->xout_garbage = false; op->dpif_op.type = DPIF_OP_FLOW_PUT; op->subfacet = subfacet; put->flags = DPIF_FP_CREATE; put->key = miss->key; put->key_len = miss->key_len; put->mask = op->mask.data; put->mask_len = op->mask.size; if (want_path == SF_FAST_PATH) { put->actions = facet->xout.odp_actions.data; put->actions_len = facet->xout.odp_actions.size; } else { compose_slow_path(facet->ofproto, &miss->flow, facet->xout.slow, op->slow_stub, sizeof op->slow_stub, &put->actions, &put->actions_len); } put->stats = NULL; } } /* Handles flow miss 'miss'. May add any required datapath operations * to 'ops', incrementing '*n_ops' for each new op. */ static void handle_flow_miss(struct flow_miss *miss, struct flow_miss_op *ops, size_t *n_ops) { struct facet *facet; miss->ofproto->n_missed += list_size(&miss->packets); facet = facet_lookup_valid(miss->ofproto, &miss->flow); if (!facet) { /* There does not exist a bijection between 'struct flow' and datapath * flow keys with fitness ODP_FIT_TO_LITTLE. This breaks a fundamental * assumption used throughout the facet and subfacet handling code. * Since we have to handle these misses in userspace anyway, we simply * skip facet creation, avoiding the problem altogether. */ if (miss->key_fitness == ODP_FIT_TOO_LITTLE || !flow_miss_should_make_facet(miss)) { return; } facet = facet_create(miss); } handle_flow_miss_with_facet(miss, facet, ops, n_ops); } static struct drop_key * drop_key_lookup(const struct dpif_backer *backer, const struct nlattr *key, size_t key_len) { struct drop_key *drop_key; HMAP_FOR_EACH_WITH_HASH (drop_key, hmap_node, hash_bytes(key, key_len, 0), &backer->drop_keys) { if (drop_key->key_len == key_len && !memcmp(drop_key->key, key, key_len)) { return drop_key; } } return NULL; } static void drop_key_clear(struct dpif_backer *backer) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15); struct drop_key *drop_key, *next; HMAP_FOR_EACH_SAFE (drop_key, next, hmap_node, &backer->drop_keys) { int error; error = dpif_flow_del(backer->dpif, drop_key->key, drop_key->key_len, NULL); if (error && !VLOG_DROP_WARN(&rl)) { struct ds ds = DS_EMPTY_INITIALIZER; odp_flow_key_format(drop_key->key, drop_key->key_len, &ds); VLOG_WARN("Failed to delete drop key (%s) (%s)", ovs_strerror(error), ds_cstr(&ds)); ds_destroy(&ds); } hmap_remove(&backer->drop_keys, &drop_key->hmap_node); drop_key_destroy(drop_key); } udpif_drop_key_clear(backer->udpif); } static void handle_flow_misses(struct dpif_backer *backer, struct flow_miss_batch *fmb) { struct flow_miss_op flow_miss_ops[FLOW_MISS_MAX_BATCH]; struct dpif_op *dpif_ops[FLOW_MISS_MAX_BATCH]; struct flow_miss *miss; size_t n_ops, i; /* Process each element in the to-do list, constructing the set of * operations to batch. */ n_ops = 0; HMAP_FOR_EACH (miss, hmap_node, &fmb->misses) { handle_flow_miss(miss, flow_miss_ops, &n_ops); } ovs_assert(n_ops <= ARRAY_SIZE(flow_miss_ops)); /* Execute batch. */ for (i = 0; i < n_ops; i++) { dpif_ops[i] = &flow_miss_ops[i].dpif_op; } dpif_operate(backer->dpif, dpif_ops, n_ops); for (i = 0; i < n_ops; i++) { if (dpif_ops[i]->error != 0 && flow_miss_ops[i].dpif_op.type == DPIF_OP_FLOW_PUT && flow_miss_ops[i].subfacet) { struct subfacet *subfacet = flow_miss_ops[i].subfacet; COVERAGE_INC(subfacet_install_fail); /* Zero-out subfacet counters when installation failed, but * datapath reported hits. This should not happen and * indicates a bug, since if the datapath flow exists, we * should not be attempting to create a new subfacet. A * buggy datapath could trigger this, so just zero out the * counters and log an error. */ if (subfacet->dp_packet_count || subfacet->dp_byte_count) { VLOG_ERR_RL(&rl, "failed to install subfacet for which " "datapath reported hits"); subfacet->dp_packet_count = subfacet->dp_byte_count = 0; } subfacet->path = SF_NOT_INSTALLED; } } } static void handle_sflow_upcall(struct dpif_backer *backer, const struct dpif_upcall *upcall) { struct ofproto_dpif *ofproto; union user_action_cookie cookie; struct flow flow; odp_port_t odp_in_port; if (xlate_receive(backer, upcall->packet, upcall->key, upcall->key_len, &flow, NULL, &ofproto, &odp_in_port) || !ofproto->sflow) { return; } memset(&cookie, 0, sizeof cookie); memcpy(&cookie, nl_attr_get(upcall->userdata), sizeof cookie.sflow); dpif_sflow_received(ofproto->sflow, upcall->packet, &flow, odp_in_port, &cookie); } static void handle_flow_sample_upcall(struct dpif_backer *backer, const struct dpif_upcall *upcall) { struct ofproto_dpif *ofproto; union user_action_cookie cookie; struct flow flow; if (xlate_receive(backer, upcall->packet, upcall->key, upcall->key_len, &flow, NULL, &ofproto, NULL) || !ofproto->ipfix) { return; } memset(&cookie, 0, sizeof cookie); memcpy(&cookie, nl_attr_get(upcall->userdata), sizeof cookie.flow_sample); /* The flow reflects exactly the contents of the packet. Sample * the packet using it. */ dpif_ipfix_flow_sample(ofproto->ipfix, upcall->packet, &flow, cookie.flow_sample.collector_set_id, cookie.flow_sample.probability, cookie.flow_sample.obs_domain_id, cookie.flow_sample.obs_point_id); } static void handle_ipfix_upcall(struct dpif_backer *backer, const struct dpif_upcall *upcall) { struct ofproto_dpif *ofproto; struct flow flow; if (xlate_receive(backer, upcall->packet, upcall->key, upcall->key_len, &flow, NULL, &ofproto, NULL) || !ofproto->ipfix) { return; } /* The flow reflects exactly the contents of the packet. Sample * the packet using it. */ dpif_ipfix_bridge_sample(ofproto->ipfix, upcall->packet, &flow); } static void handle_upcalls(struct dpif_backer *backer) { struct flow_miss_batch *fmb; int n_processed; for (n_processed = 0; n_processed < FLOW_MISS_MAX_BATCH; n_processed++) { struct upcall *upcall = upcall_next(backer->udpif); if (!upcall) { break; } switch (upcall->type) { case SFLOW_UPCALL: handle_sflow_upcall(backer, &upcall->dpif_upcall); break; case FLOW_SAMPLE_UPCALL: handle_flow_sample_upcall(backer, &upcall->dpif_upcall); break; case IPFIX_UPCALL: handle_ipfix_upcall(backer, &upcall->dpif_upcall); break; case BAD_UPCALL: break; case MISS_UPCALL: NOT_REACHED(); } upcall_destroy(upcall); } for (n_processed = 0; n_processed < FLOW_MISS_MAX_BATCH; n_processed++) { struct drop_key *drop_key = drop_key_next(backer->udpif); if (!drop_key) { break; } if (!drop_key_lookup(backer, drop_key->key, drop_key->key_len)) { hmap_insert(&backer->drop_keys, &drop_key->hmap_node, hash_bytes(drop_key->key, drop_key->key_len, 0)); dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_MODIFY, drop_key->key, drop_key->key_len, NULL, 0, NULL, 0, NULL); } else { drop_key_destroy(drop_key); } } fmb = flow_miss_batch_next(backer->udpif); if (fmb) { handle_flow_misses(backer, fmb); flow_miss_batch_destroy(fmb); } } /* Flow expiration. */ static int subfacet_max_idle(const struct dpif_backer *); static void update_stats(struct dpif_backer *); static void rule_expire(struct rule_dpif *) OVS_REQUIRES(ofproto_mutex); static void expire_subfacets(struct dpif_backer *, int dp_max_idle); /* This function is called periodically by run(). Its job is to collect * updates for the flows that have been installed into the datapath, most * importantly when they last were used, and then use that information to * expire flows that have not been used recently. * * Returns the number of milliseconds after which it should be called again. */ static int expire(struct dpif_backer *backer) { struct ofproto_dpif *ofproto; size_t n_subfacets; int max_idle; /* Periodically clear out the drop keys in an effort to keep them * relatively few. */ drop_key_clear(backer); /* Update stats for each flow in the backer. */ update_stats(backer); n_subfacets = hmap_count(&backer->subfacets); if (n_subfacets) { struct subfacet *subfacet; long long int total, now; total = 0; now = time_msec(); HMAP_FOR_EACH (subfacet, hmap_node, &backer->subfacets) { total += now - subfacet->created; } backer->avg_subfacet_life += total / n_subfacets; } backer->avg_subfacet_life /= 2; backer->avg_n_subfacet += n_subfacets; backer->avg_n_subfacet /= 2; backer->max_n_subfacet = MAX(backer->max_n_subfacet, n_subfacets); max_idle = subfacet_max_idle(backer); expire_subfacets(backer, max_idle); HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { struct rule *rule, *next_rule; if (ofproto->backer != backer) { continue; } /* Expire OpenFlow flows whose idle_timeout or hard_timeout * has passed. */ ovs_mutex_lock(&ofproto_mutex); LIST_FOR_EACH_SAFE (rule, next_rule, expirable, &ofproto->up.expirable) { rule_expire(rule_dpif_cast(rule)); } ovs_mutex_unlock(&ofproto_mutex); /* All outstanding data in existing flows has been accounted, so it's a * good time to do bond rebalancing. */ if (ofproto->has_bonded_bundles) { struct ofbundle *bundle; HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { if (bundle->bond) { bond_rebalance(bundle->bond); } } } } return MIN(max_idle, 1000); } /* Updates flow table statistics given that the datapath just reported 'stats' * as 'subfacet''s statistics. */ static void update_subfacet_stats(struct subfacet *subfacet, const struct dpif_flow_stats *stats) { struct facet *facet = subfacet->facet; struct dpif_flow_stats diff; diff.tcp_flags = stats->tcp_flags; diff.used = stats->used; if (stats->n_packets >= subfacet->dp_packet_count) { diff.n_packets = stats->n_packets - subfacet->dp_packet_count; } else { VLOG_WARN_RL(&rl, "unexpected packet count from the datapath"); diff.n_packets = 0; } if (stats->n_bytes >= subfacet->dp_byte_count) { diff.n_bytes = stats->n_bytes - subfacet->dp_byte_count; } else { VLOG_WARN_RL(&rl, "unexpected byte count from datapath"); diff.n_bytes = 0; } facet->ofproto->n_hit += diff.n_packets; subfacet->dp_packet_count = stats->n_packets; subfacet->dp_byte_count = stats->n_bytes; subfacet_update_stats(subfacet, &diff); if (facet->accounted_bytes < facet->byte_count) { facet_learn(facet); facet_account(facet); facet->accounted_bytes = facet->byte_count; } } /* 'key' with length 'key_len' bytes is a flow in 'dpif' that we know nothing * about, or a flow that shouldn't be installed but was anyway. Delete it. */ static void delete_unexpected_flow(struct dpif_backer *backer, const struct nlattr *key, size_t key_len) { if (!VLOG_DROP_WARN(&rl)) { struct ds s; ds_init(&s); odp_flow_key_format(key, key_len, &s); VLOG_WARN("unexpected flow: %s", ds_cstr(&s)); ds_destroy(&s); } COVERAGE_INC(facet_unexpected); dpif_flow_del(backer->dpif, key, key_len, NULL); } /* Update 'packet_count', 'byte_count', and 'used' members of installed facets. * * This function also pushes statistics updates to rules which each facet * resubmits into. Generally these statistics will be accurate. However, if a * facet changes the rule it resubmits into at some time in between * update_stats() runs, it is possible that statistics accrued to the * old rule will be incorrectly attributed to the new rule. This could be * avoided by calling update_stats() whenever rules are created or * deleted. However, the performance impact of making so many calls to the * datapath do not justify the benefit of having perfectly accurate statistics. * * In addition, this function maintains per ofproto flow hit counts. The patch * port is not treated specially. e.g. A packet ingress from br0 patched into * br1 will increase the hit count of br0 by 1, however, does not affect * the hit or miss counts of br1. */ static void update_stats(struct dpif_backer *backer) { const struct dpif_flow_stats *stats; struct dpif_flow_dump dump; const struct nlattr *key, *mask; size_t key_len, mask_len; dpif_flow_dump_start(&dump, backer->dpif); while (dpif_flow_dump_next(&dump, &key, &key_len, &mask, &mask_len, NULL, NULL, &stats)) { struct subfacet *subfacet; uint32_t key_hash; key_hash = odp_flow_key_hash(key, key_len); subfacet = subfacet_find(backer, key, key_len, key_hash); switch (subfacet ? subfacet->path : SF_NOT_INSTALLED) { case SF_FAST_PATH: update_subfacet_stats(subfacet, stats); break; case SF_SLOW_PATH: /* Stats are updated per-packet. */ break; case SF_NOT_INSTALLED: default: delete_unexpected_flow(backer, key, key_len); break; } run_fast_rl(); } dpif_flow_dump_done(&dump); update_moving_averages(backer); } /* Calculates and returns the number of milliseconds of idle time after which * subfacets should expire from the datapath. When a subfacet expires, we fold * its statistics into its facet, and when a facet's last subfacet expires, we * fold its statistic into its rule. */ static int subfacet_max_idle(const struct dpif_backer *backer) { /* * Idle time histogram. * * Most of the time a switch has a relatively small number of subfacets. * When this is the case we might as well keep statistics for all of them * in userspace and to cache them in the kernel datapath for performance as * well. * * As the number of subfacets increases, the memory required to maintain * statistics about them in userspace and in the kernel becomes * significant. However, with a large number of subfacets it is likely * that only a few of them are "heavy hitters" that consume a large amount * of bandwidth. At this point, only heavy hitters are worth caching in * the kernel and maintaining in userspaces; other subfacets we can * discard. * * The technique used to compute the idle time is to build a histogram with * N_BUCKETS buckets whose width is BUCKET_WIDTH msecs each. Each subfacet * that is installed in the kernel gets dropped in the appropriate bucket. * After the histogram has been built, we compute the cutoff so that only * the most-recently-used 1% of subfacets (but at least * flow_eviction_threshold flows) are kept cached. At least * the most-recently-used bucket of subfacets is kept, so actually an * arbitrary number of subfacets can be kept in any given expiration run * (though the next run will delete most of those unless they receive * additional data). * * This requires a second pass through the subfacets, in addition to the * pass made by update_stats(), because the former function never looks at * uninstallable subfacets. */ enum { BUCKET_WIDTH = ROUND_UP(100, TIME_UPDATE_INTERVAL) }; enum { N_BUCKETS = 5000 / BUCKET_WIDTH }; int buckets[N_BUCKETS] = { 0 }; int total, subtotal, bucket; struct subfacet *subfacet; long long int now; int i; total = hmap_count(&backer->subfacets); if (total <= flow_eviction_threshold) { return N_BUCKETS * BUCKET_WIDTH; } /* Build histogram. */ now = time_msec(); HMAP_FOR_EACH (subfacet, hmap_node, &backer->subfacets) { long long int idle = now - subfacet->used; int bucket = (idle <= 0 ? 0 : idle >= BUCKET_WIDTH * N_BUCKETS ? N_BUCKETS - 1 : (unsigned int) idle / BUCKET_WIDTH); buckets[bucket]++; } /* Find the first bucket whose flows should be expired. */ subtotal = bucket = 0; do { subtotal += buckets[bucket++]; } while (bucket < N_BUCKETS && subtotal < MAX(flow_eviction_threshold, total / 100)); if (VLOG_IS_DBG_ENABLED()) { struct ds s; ds_init(&s); ds_put_cstr(&s, "keep"); for (i = 0; i < N_BUCKETS; i++) { if (i == bucket) { ds_put_cstr(&s, ", drop"); } if (buckets[i]) { ds_put_format(&s, " %d:%d", i * BUCKET_WIDTH, buckets[i]); } } VLOG_INFO("%s (msec:count)", ds_cstr(&s)); ds_destroy(&s); } return bucket * BUCKET_WIDTH; } static void expire_subfacets(struct dpif_backer *backer, int dp_max_idle) { /* Cutoff time for most flows. */ long long int normal_cutoff = time_msec() - dp_max_idle; /* We really want to keep flows for special protocols around, so use a more * conservative cutoff. */ long long int special_cutoff = time_msec() - 10000; struct subfacet *subfacet, *next_subfacet; struct subfacet *batch[SUBFACET_DESTROY_MAX_BATCH]; int n_batch; n_batch = 0; HMAP_FOR_EACH_SAFE (subfacet, next_subfacet, hmap_node, &backer->subfacets) { long long int cutoff; cutoff = (subfacet->facet->xout.slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP) ? special_cutoff : normal_cutoff); if (subfacet->used < cutoff) { if (subfacet->path != SF_NOT_INSTALLED) { batch[n_batch++] = subfacet; if (n_batch >= SUBFACET_DESTROY_MAX_BATCH) { subfacet_destroy_batch(backer, batch, n_batch); n_batch = 0; } } else { subfacet_destroy(subfacet); } } } if (n_batch > 0) { subfacet_destroy_batch(backer, batch, n_batch); } } /* If 'rule' is an OpenFlow rule, that has expired according to OpenFlow rules, * then delete it entirely. */ static void rule_expire(struct rule_dpif *rule) OVS_REQUIRES(ofproto_mutex) { uint16_t idle_timeout, hard_timeout; long long int now = time_msec(); int reason; ovs_assert(!rule->up.pending); /* Has 'rule' expired? */ ovs_mutex_lock(&rule->up.mutex); hard_timeout = rule->up.hard_timeout; idle_timeout = rule->up.idle_timeout; if (hard_timeout && now > rule->up.modified + hard_timeout * 1000) { reason = OFPRR_HARD_TIMEOUT; } else if (idle_timeout && now > rule->up.used + idle_timeout * 1000) { reason = OFPRR_IDLE_TIMEOUT; } else { reason = -1; } ovs_mutex_unlock(&rule->up.mutex); if (reason >= 0) { COVERAGE_INC(ofproto_dpif_expired); ofproto_rule_expire(&rule->up, reason); } } /* Facets. */ /* Creates and returns a new facet based on 'miss'. * * The caller must already have determined that no facet with an identical * 'miss->flow' exists in 'miss->ofproto'. * * 'rule' and 'xout' must have been created based on 'miss'. * * 'facet'' statistics are initialized based on 'stats'. * * The facet will initially have no subfacets. The caller should create (at * least) one subfacet with subfacet_create(). */ static struct facet * facet_create(const struct flow_miss *miss) { struct ofproto_dpif *ofproto = miss->ofproto; struct facet *facet; struct match match; facet = xzalloc(sizeof *facet); facet->ofproto = miss->ofproto; facet->used = miss->stats.used; facet->flow = miss->flow; facet->learn_rl = time_msec() + 500; list_init(&facet->subfacets); netflow_flow_init(&facet->nf_flow); netflow_flow_update_time(ofproto->netflow, &facet->nf_flow, facet->used); xlate_out_copy(&facet->xout, &miss->xout); match_init(&match, &facet->flow, &facet->xout.wc); cls_rule_init(&facet->cr, &match, OFP_DEFAULT_PRIORITY); ovs_rwlock_wrlock(&ofproto->facets.rwlock); classifier_insert(&ofproto->facets, &facet->cr); ovs_rwlock_unlock(&ofproto->facets.rwlock); facet->nf_flow.output_iface = facet->xout.nf_output_iface; return facet; } static void facet_free(struct facet *facet) { if (facet) { xlate_out_uninit(&facet->xout); free(facet); } } /* Executes, within 'ofproto', the 'n_actions' actions in 'actions' on * 'packet', which arrived on 'in_port'. */ static bool execute_odp_actions(struct ofproto_dpif *ofproto, const struct flow *flow, const struct nlattr *odp_actions, size_t actions_len, struct ofpbuf *packet) { struct odputil_keybuf keybuf; struct ofpbuf key; int error; ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); odp_flow_key_from_flow(&key, flow, ofp_port_to_odp_port(ofproto, flow->in_port.ofp_port)); error = dpif_execute(ofproto->backer->dpif, key.data, key.size, odp_actions, actions_len, packet); return !error; } /* Remove 'facet' from its ofproto and free up the associated memory: * * - If 'facet' was installed in the datapath, uninstalls it and updates its * rule's statistics, via subfacet_uninstall(). * * - Removes 'facet' from its rule and from ofproto->facets. */ static void facet_remove(struct facet *facet) { struct subfacet *subfacet, *next_subfacet; ovs_assert(!list_is_empty(&facet->subfacets)); /* First uninstall all of the subfacets to get final statistics. */ LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) { subfacet_uninstall(subfacet); } /* Flush the final stats to the rule. * * This might require us to have at least one subfacet around so that we * can use its actions for accounting in facet_account(), which is why we * have uninstalled but not yet destroyed the subfacets. */ facet_flush_stats(facet); /* Now we're really all done so destroy everything. */ LIST_FOR_EACH_SAFE (subfacet, next_subfacet, list_node, &facet->subfacets) { subfacet_destroy__(subfacet); } ovs_rwlock_wrlock(&facet->ofproto->facets.rwlock); classifier_remove(&facet->ofproto->facets, &facet->cr); ovs_rwlock_unlock(&facet->ofproto->facets.rwlock); cls_rule_destroy(&facet->cr); facet_free(facet); } /* Feed information from 'facet' back into the learning table to keep it in * sync with what is actually flowing through the datapath. */ static void facet_learn(struct facet *facet) { long long int now = time_msec(); if (!facet->xout.has_fin_timeout && now < facet->learn_rl) { return; } facet->learn_rl = now + 500; if (!facet->xout.has_learn && !facet->xout.has_normal && (!facet->xout.has_fin_timeout || !(facet->tcp_flags & (TCP_FIN | TCP_RST)))) { return; } facet_push_stats(facet, true); } static void facet_account(struct facet *facet) { const struct nlattr *a; unsigned int left; ovs_be16 vlan_tci; uint64_t n_bytes; if (!facet->xout.has_normal || !facet->ofproto->has_bonded_bundles) { return; } n_bytes = facet->byte_count - facet->accounted_bytes; /* This loop feeds byte counters to bond_account() for rebalancing to use * as a basis. We also need to track the actual VLAN on which the packet * is going to be sent to ensure that it matches the one passed to * bond_choose_output_slave(). (Otherwise, we will account to the wrong * hash bucket.) * * We use the actions from an arbitrary subfacet because they should all * be equally valid for our purpose. */ vlan_tci = facet->flow.vlan_tci; NL_ATTR_FOR_EACH_UNSAFE (a, left, facet->xout.odp_actions.data, facet->xout.odp_actions.size) { const struct ovs_action_push_vlan *vlan; struct ofport_dpif *port; switch (nl_attr_type(a)) { case OVS_ACTION_ATTR_OUTPUT: port = get_odp_port(facet->ofproto, nl_attr_get_odp_port(a)); if (port && port->bundle && port->bundle->bond) { bond_account(port->bundle->bond, &facet->flow, vlan_tci_to_vid(vlan_tci), n_bytes); } break; case OVS_ACTION_ATTR_POP_VLAN: vlan_tci = htons(0); break; case OVS_ACTION_ATTR_PUSH_VLAN: vlan = nl_attr_get(a); vlan_tci = vlan->vlan_tci; break; } } } /* Returns true if the only action for 'facet' is to send to the controller. * (We don't report NetFlow expiration messages for such facets because they * are just part of the control logic for the network, not real traffic). */ static bool facet_is_controller_flow(struct facet *facet) { if (facet) { struct ofproto_dpif *ofproto = facet->ofproto; const struct ofpact *ofpacts; struct rule_actions *actions; struct rule_dpif *rule; size_t ofpacts_len; bool is_controller; rule_dpif_lookup(ofproto, &facet->flow, NULL, &rule); actions = rule_dpif_get_actions(rule); rule_dpif_unref(rule); ofpacts_len = actions->ofpacts_len; ofpacts = actions->ofpacts; is_controller = ofpacts_len > 0 && ofpacts->type == OFPACT_CONTROLLER && ofpact_next(ofpacts) >= ofpact_end(ofpacts, ofpacts_len); rule_actions_unref(actions); return is_controller; } return false; } /* Folds all of 'facet''s statistics into its rule. Also updates the * accounting ofhook and emits a NetFlow expiration if appropriate. All of * 'facet''s statistics in the datapath should have been zeroed and folded into * its packet and byte counts before this function is called. */ static void facet_flush_stats(struct facet *facet) { struct ofproto_dpif *ofproto = facet->ofproto; struct subfacet *subfacet; LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) { ovs_assert(!subfacet->dp_byte_count); ovs_assert(!subfacet->dp_packet_count); } facet_push_stats(facet, false); if (facet->accounted_bytes < facet->byte_count) { facet_account(facet); facet->accounted_bytes = facet->byte_count; } if (ofproto->netflow && !facet_is_controller_flow(facet)) { struct ofexpired expired; expired.flow = facet->flow; expired.packet_count = facet->packet_count; expired.byte_count = facet->byte_count; expired.used = facet->used; netflow_expire(ofproto->netflow, &facet->nf_flow, &expired); } /* Reset counters to prevent double counting if 'facet' ever gets * reinstalled. */ facet_reset_counters(facet); netflow_flow_clear(&facet->nf_flow); facet->tcp_flags = 0; } /* Searches 'ofproto''s table of facets for one which would be responsible for * 'flow'. Returns it if found, otherwise a null pointer. * * The returned facet might need revalidation; use facet_lookup_valid() * instead if that is important. */ static struct facet * facet_find(struct ofproto_dpif *ofproto, const struct flow *flow) { struct cls_rule *cr; ovs_rwlock_rdlock(&ofproto->facets.rwlock); cr = classifier_lookup(&ofproto->facets, flow, NULL); ovs_rwlock_unlock(&ofproto->facets.rwlock); return cr ? CONTAINER_OF(cr, struct facet, cr) : NULL; } /* Searches 'ofproto''s table of facets for one capable that covers * 'flow'. Returns it if found, otherwise a null pointer. * * The returned facet is guaranteed to be valid. */ static struct facet * facet_lookup_valid(struct ofproto_dpif *ofproto, const struct flow *flow) { struct facet *facet; facet = facet_find(ofproto, flow); if (facet && ofproto->backer->need_revalidate && !facet_revalidate(facet)) { return NULL; } return facet; } static bool facet_check_consistency(struct facet *facet) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15); struct xlate_out xout; struct xlate_in xin; bool ok; /* Check the datapath actions for consistency. */ xlate_in_init(&xin, facet->ofproto, &facet->flow, NULL, 0, NULL); xlate_actions(&xin, &xout); ok = ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions) && facet->xout.slow == xout.slow; if (!ok && !VLOG_DROP_WARN(&rl)) { struct ds s = DS_EMPTY_INITIALIZER; flow_format(&s, &facet->flow); ds_put_cstr(&s, ": inconsistency in facet"); if (!ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions)) { ds_put_cstr(&s, " (actions were: "); format_odp_actions(&s, facet->xout.odp_actions.data, facet->xout.odp_actions.size); ds_put_cstr(&s, ") (correct actions: "); format_odp_actions(&s, xout.odp_actions.data, xout.odp_actions.size); ds_put_char(&s, ')'); } if (facet->xout.slow != xout.slow) { ds_put_format(&s, " slow path incorrect. should be %d", xout.slow); } ds_destroy(&s); } xlate_out_uninit(&xout); return ok; } /* Re-searches the classifier for 'facet': * * - If the rule found is different from 'facet''s current rule, moves * 'facet' to the new rule and recompiles its actions. * * - If the rule found is the same as 'facet''s current rule, leaves 'facet' * where it is and recompiles its actions anyway. * * - If any of 'facet''s subfacets correspond to a new flow according to * xlate_receive(), 'facet' is removed. * * Returns true if 'facet' is still valid. False if 'facet' was removed. */ static bool facet_revalidate(struct facet *facet) { struct ofproto_dpif *ofproto = facet->ofproto; struct rule_dpif *new_rule; struct subfacet *subfacet; struct flow_wildcards wc; struct xlate_out xout; struct xlate_in xin; COVERAGE_INC(facet_revalidate); /* Check that child subfacets still correspond to this facet. Tunnel * configuration changes could cause a subfacet's OpenFlow in_port to * change. */ LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) { struct ofproto_dpif *recv_ofproto; struct flow recv_flow; int error; error = xlate_receive(ofproto->backer, NULL, subfacet->key, subfacet->key_len, &recv_flow, NULL, &recv_ofproto, NULL); if (error || recv_ofproto != ofproto || facet != facet_find(ofproto, &recv_flow)) { facet_remove(facet); return false; } } flow_wildcards_init_catchall(&wc); rule_dpif_lookup(ofproto, &facet->flow, &wc, &new_rule); /* Calculate new datapath actions. * * We do not modify any 'facet' state yet, because we might need to, e.g., * emit a NetFlow expiration and, if so, we need to have the old state * around to properly compose it. */ xlate_in_init(&xin, ofproto, &facet->flow, new_rule, 0, NULL); xlate_actions(&xin, &xout); flow_wildcards_or(&xout.wc, &xout.wc, &wc); /* Make sure non -packet fields are not masked. If not cleared, * the memcmp() below may fail, causing an otherwise valid facet * to be removed. */ flow_wildcards_clear_non_packet_fields(&xout.wc); /* A facet's slow path reason should only change under dramatic * circumstances. Rather than try to update everything, it's simpler to * remove the facet and start over. * * More importantly, if a facet's wildcards change, it will be relatively * difficult to figure out if its subfacets still belong to it, and if not * which facet they may belong to. Again, to avoid the complexity, we * simply give up instead. */ if (facet->xout.slow != xout.slow || memcmp(&facet->xout.wc, &xout.wc, sizeof xout.wc)) { facet_remove(facet); xlate_out_uninit(&xout); rule_dpif_unref(new_rule); return false; } if (!ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions)) { LIST_FOR_EACH(subfacet, list_node, &facet->subfacets) { if (subfacet->path == SF_FAST_PATH) { struct dpif_flow_stats stats; subfacet_install(subfacet, &xout.odp_actions, &stats); subfacet_update_stats(subfacet, &stats); } } facet_flush_stats(facet); ofpbuf_clear(&facet->xout.odp_actions); ofpbuf_put(&facet->xout.odp_actions, xout.odp_actions.data, xout.odp_actions.size); } /* Update 'facet' now that we've taken care of all the old state. */ facet->xout.slow = xout.slow; facet->xout.has_learn = xout.has_learn; facet->xout.has_normal = xout.has_normal; facet->xout.has_fin_timeout = xout.has_fin_timeout; facet->xout.nf_output_iface = xout.nf_output_iface; facet->xout.mirrors = xout.mirrors; facet->nf_flow.output_iface = facet->xout.nf_output_iface; ovs_mutex_lock(&new_rule->up.mutex); facet->used = MAX(facet->used, new_rule->up.created); ovs_mutex_unlock(&new_rule->up.mutex); xlate_out_uninit(&xout); rule_dpif_unref(new_rule); return true; } static void facet_reset_counters(struct facet *facet) { facet->packet_count = 0; facet->byte_count = 0; facet->prev_packet_count = 0; facet->prev_byte_count = 0; facet->accounted_bytes = 0; } static void flow_push_stats(struct ofproto_dpif *ofproto, struct flow *flow, struct dpif_flow_stats *stats, bool may_learn) { struct ofport_dpif *in_port; struct xlate_in xin; in_port = get_ofp_port(ofproto, flow->in_port.ofp_port); if (in_port && in_port->is_tunnel) { netdev_vport_inc_rx(in_port->up.netdev, stats); } xlate_in_init(&xin, ofproto, flow, NULL, stats->tcp_flags, NULL); xin.resubmit_stats = stats; xin.may_learn = may_learn; xlate_actions_for_side_effects(&xin); } static void facet_push_stats(struct facet *facet, bool may_learn) { struct dpif_flow_stats stats; ovs_assert(facet->packet_count >= facet->prev_packet_count); ovs_assert(facet->byte_count >= facet->prev_byte_count); ovs_assert(facet->used >= facet->prev_used); stats.n_packets = facet->packet_count - facet->prev_packet_count; stats.n_bytes = facet->byte_count - facet->prev_byte_count; stats.used = facet->used; stats.tcp_flags = facet->tcp_flags; if (may_learn || stats.n_packets || facet->used > facet->prev_used) { facet->prev_packet_count = facet->packet_count; facet->prev_byte_count = facet->byte_count; facet->prev_used = facet->used; netflow_flow_update_time(facet->ofproto->netflow, &facet->nf_flow, facet->used); netflow_flow_update_flags(&facet->nf_flow, facet->tcp_flags); mirror_update_stats(facet->ofproto->mbridge, facet->xout.mirrors, stats.n_packets, stats.n_bytes); flow_push_stats(facet->ofproto, &facet->flow, &stats, may_learn); } } static void push_all_stats__(bool run_fast) { static long long int rl = LLONG_MIN; struct ofproto_dpif *ofproto; if (time_msec() < rl) { return; } HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { struct cls_cursor cursor; struct facet *facet; ovs_rwlock_rdlock(&ofproto->facets.rwlock); cls_cursor_init(&cursor, &ofproto->facets, NULL); CLS_CURSOR_FOR_EACH (facet, cr, &cursor) { facet_push_stats(facet, false); if (run_fast) { run_fast_rl(); } } ovs_rwlock_unlock(&ofproto->facets.rwlock); } rl = time_msec() + 100; } static void push_all_stats(void) { push_all_stats__(true); } void rule_dpif_credit_stats(struct rule_dpif *rule, const struct dpif_flow_stats *stats) { ovs_mutex_lock(&rule->stats_mutex); rule->packet_count += stats->n_packets; rule->byte_count += stats->n_bytes; rule->up.used = MAX(rule->up.used, stats->used); ovs_mutex_unlock(&rule->stats_mutex); } bool rule_dpif_fail_open(const struct rule_dpif *rule) { return rule->up.cr.priority == FAIL_OPEN_PRIORITY; } ovs_be64 rule_dpif_get_flow_cookie(const struct rule_dpif *rule) OVS_REQUIRES(rule->up.mutex) { return rule->up.flow_cookie; } void rule_dpif_reduce_timeouts(struct rule_dpif *rule, uint16_t idle_timeout, uint16_t hard_timeout) { ofproto_rule_reduce_timeouts(&rule->up, idle_timeout, hard_timeout); } /* Returns 'rule''s actions. The caller owns a reference on the returned * actions and must eventually release it (with rule_actions_unref()) to avoid * a memory leak. */ struct rule_actions * rule_dpif_get_actions(const struct rule_dpif *rule) { return rule_get_actions(&rule->up); } /* Subfacets. */ static struct subfacet * subfacet_find(struct dpif_backer *backer, const struct nlattr *key, size_t key_len, uint32_t key_hash) { struct subfacet *subfacet; HMAP_FOR_EACH_WITH_HASH (subfacet, hmap_node, key_hash, &backer->subfacets) { if (subfacet->key_len == key_len && !memcmp(key, subfacet->key, key_len)) { return subfacet; } } return NULL; } /* Searches 'facet' (within 'ofproto') for a subfacet with the specified * 'key_fitness', 'key', and 'key_len' members in 'miss'. Returns the * existing subfacet if there is one, otherwise creates and returns a * new subfacet. */ static struct subfacet * subfacet_create(struct facet *facet, struct flow_miss *miss) { struct dpif_backer *backer = miss->ofproto->backer; enum odp_key_fitness key_fitness = miss->key_fitness; const struct nlattr *key = miss->key; size_t key_len = miss->key_len; uint32_t key_hash; struct subfacet *subfacet; key_hash = odp_flow_key_hash(key, key_len); if (list_is_empty(&facet->subfacets)) { subfacet = &facet->one_subfacet; } else { subfacet = subfacet_find(backer, key, key_len, key_hash); if (subfacet) { if (subfacet->facet == facet) { return subfacet; } /* This shouldn't happen. */ VLOG_ERR_RL(&rl, "subfacet with wrong facet"); subfacet_destroy(subfacet); } subfacet = xmalloc(sizeof *subfacet); } hmap_insert(&backer->subfacets, &subfacet->hmap_node, key_hash); list_push_back(&facet->subfacets, &subfacet->list_node); subfacet->facet = facet; subfacet->key_fitness = key_fitness; subfacet->key = xmemdup(key, key_len); subfacet->key_len = key_len; subfacet->used = miss->stats.used; subfacet->created = subfacet->used; subfacet->dp_packet_count = 0; subfacet->dp_byte_count = 0; subfacet->path = SF_NOT_INSTALLED; subfacet->backer = backer; backer->subfacet_add_count++; return subfacet; } /* Uninstalls 'subfacet' from the datapath, if it is installed, removes it from * its facet within 'ofproto', and frees it. */ static void subfacet_destroy__(struct subfacet *subfacet) { struct facet *facet = subfacet->facet; struct ofproto_dpif *ofproto = facet->ofproto; /* Update ofproto stats before uninstall the subfacet. */ ofproto->backer->subfacet_del_count++; subfacet_uninstall(subfacet); hmap_remove(&subfacet->backer->subfacets, &subfacet->hmap_node); list_remove(&subfacet->list_node); free(subfacet->key); if (subfacet != &facet->one_subfacet) { free(subfacet); } } /* Destroys 'subfacet', as with subfacet_destroy__(), and then if this was the * last remaining subfacet in its facet destroys the facet too. */ static void subfacet_destroy(struct subfacet *subfacet) { struct facet *facet = subfacet->facet; if (list_is_singleton(&facet->subfacets)) { /* facet_remove() needs at least one subfacet (it will remove it). */ facet_remove(facet); } else { subfacet_destroy__(subfacet); } } static void subfacet_destroy_batch(struct dpif_backer *backer, struct subfacet **subfacets, int n) { struct dpif_op ops[SUBFACET_DESTROY_MAX_BATCH]; struct dpif_op *opsp[SUBFACET_DESTROY_MAX_BATCH]; struct dpif_flow_stats stats[SUBFACET_DESTROY_MAX_BATCH]; int i; for (i = 0; i < n; i++) { ops[i].type = DPIF_OP_FLOW_DEL; ops[i].u.flow_del.key = subfacets[i]->key; ops[i].u.flow_del.key_len = subfacets[i]->key_len; ops[i].u.flow_del.stats = &stats[i]; opsp[i] = &ops[i]; } dpif_operate(backer->dpif, opsp, n); for (i = 0; i < n; i++) { subfacet_reset_dp_stats(subfacets[i], &stats[i]); subfacets[i]->path = SF_NOT_INSTALLED; subfacet_destroy(subfacets[i]); run_fast_rl(); } } /* Updates 'subfacet''s datapath flow, setting its actions to 'actions_len' * bytes of actions in 'actions'. If 'stats' is non-null, statistics counters * in the datapath will be zeroed and 'stats' will be updated with traffic new * since 'subfacet' was last updated. * * Returns 0 if successful, otherwise a positive errno value. */ static int subfacet_install(struct subfacet *subfacet, const struct ofpbuf *odp_actions, struct dpif_flow_stats *stats) { struct facet *facet = subfacet->facet; enum subfacet_path path = facet->xout.slow ? SF_SLOW_PATH : SF_FAST_PATH; const struct nlattr *actions = odp_actions->data; size_t actions_len = odp_actions->size; struct odputil_keybuf maskbuf; struct ofpbuf mask; uint64_t slow_path_stub[128 / 8]; enum dpif_flow_put_flags flags; int ret; flags = subfacet->path == SF_NOT_INSTALLED ? DPIF_FP_CREATE : DPIF_FP_MODIFY; if (stats) { flags |= DPIF_FP_ZERO_STATS; } if (path == SF_SLOW_PATH) { compose_slow_path(facet->ofproto, &facet->flow, facet->xout.slow, slow_path_stub, sizeof slow_path_stub, &actions, &actions_len); } ofpbuf_use_stack(&mask, &maskbuf, sizeof maskbuf); if (enable_megaflows) { mask_to_netlink_attr(&mask, facet->ofproto, &facet->xout.wc.masks, &facet->flow); } ret = dpif_flow_put(subfacet->backer->dpif, flags, subfacet->key, subfacet->key_len, mask.data, mask.size, actions, actions_len, stats); if (stats) { subfacet_reset_dp_stats(subfacet, stats); } if (ret) { COVERAGE_INC(subfacet_install_fail); } else { subfacet->path = path; } return ret; } /* If 'subfacet' is installed in the datapath, uninstalls it. */ static void subfacet_uninstall(struct subfacet *subfacet) { if (subfacet->path != SF_NOT_INSTALLED) { struct ofproto_dpif *ofproto = subfacet->facet->ofproto; struct dpif_flow_stats stats; int error; error = dpif_flow_del(ofproto->backer->dpif, subfacet->key, subfacet->key_len, &stats); subfacet_reset_dp_stats(subfacet, &stats); if (!error) { subfacet_update_stats(subfacet, &stats); } subfacet->path = SF_NOT_INSTALLED; } else { ovs_assert(subfacet->dp_packet_count == 0); ovs_assert(subfacet->dp_byte_count == 0); } } /* Resets 'subfacet''s datapath statistics counters. This should be called * when 'subfacet''s statistics are cleared in the datapath. If 'stats' is * non-null, it should contain the statistics returned by dpif when 'subfacet' * was reset in the datapath. 'stats' will be modified to include only * statistics new since 'subfacet' was last updated. */ static void subfacet_reset_dp_stats(struct subfacet *subfacet, struct dpif_flow_stats *stats) { if (stats && subfacet->dp_packet_count <= stats->n_packets && subfacet->dp_byte_count <= stats->n_bytes) { stats->n_packets -= subfacet->dp_packet_count; stats->n_bytes -= subfacet->dp_byte_count; } subfacet->dp_packet_count = 0; subfacet->dp_byte_count = 0; } /* Folds the statistics from 'stats' into the counters in 'subfacet'. * * Because of the meaning of a subfacet's counters, it only makes sense to do * this if 'stats' are not tracked in the datapath, that is, if 'stats' * represents a packet that was sent by hand or if it represents statistics * that have been cleared out of the datapath. */ static void subfacet_update_stats(struct subfacet *subfacet, const struct dpif_flow_stats *stats) { if (stats->n_packets || stats->used > subfacet->used) { struct facet *facet = subfacet->facet; subfacet->used = MAX(subfacet->used, stats->used); facet->used = MAX(facet->used, stats->used); facet->packet_count += stats->n_packets; facet->byte_count += stats->n_bytes; facet->tcp_flags |= stats->tcp_flags; } } /* Rules. */ /* Lookup 'flow' in 'ofproto''s classifier. If 'wc' is non-null, sets * the fields that were relevant as part of the lookup. */ void rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow, struct flow_wildcards *wc, struct rule_dpif **rule) { struct ofport_dpif *port; if (rule_dpif_lookup_in_table(ofproto, flow, wc, 0, rule)) { return; } port = get_ofp_port(ofproto, flow->in_port.ofp_port); if (!port) { VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16, flow->in_port.ofp_port); } choose_miss_rule(port ? port->up.pp.config : 0, ofproto->miss_rule, ofproto->no_packet_in_rule, rule); } bool rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, const struct flow *flow, struct flow_wildcards *wc, uint8_t table_id, struct rule_dpif **rule) { const struct cls_rule *cls_rule; struct classifier *cls; bool frag; *rule = NULL; if (table_id >= N_TABLES) { return false; } if (wc) { memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type); wc->masks.nw_frag |= FLOW_NW_FRAG_MASK; } cls = &ofproto->up.tables[table_id].cls; ovs_rwlock_rdlock(&cls->rwlock); frag = (flow->nw_frag & FLOW_NW_FRAG_ANY) != 0; if (frag && ofproto->up.frag_handling == OFPC_FRAG_NORMAL) { /* We must pretend that transport ports are unavailable. */ struct flow ofpc_normal_flow = *flow; ofpc_normal_flow.tp_src = htons(0); ofpc_normal_flow.tp_dst = htons(0); cls_rule = classifier_lookup(cls, &ofpc_normal_flow, wc); } else if (frag && ofproto->up.frag_handling == OFPC_FRAG_DROP) { cls_rule = &ofproto->drop_frags_rule->up.cr; if (wc) { flow_wildcards_init_exact(wc); } } else { cls_rule = classifier_lookup(cls, flow, wc); } *rule = rule_dpif_cast(rule_from_cls_rule(cls_rule)); rule_dpif_ref(*rule); ovs_rwlock_unlock(&cls->rwlock); return *rule != NULL; } /* Given a port configuration (specified as zero if there's no port), chooses * which of 'miss_rule' and 'no_packet_in_rule' should be used in case of a * flow table miss. */ void choose_miss_rule(enum ofputil_port_config config, struct rule_dpif *miss_rule, struct rule_dpif *no_packet_in_rule, struct rule_dpif **rule) { *rule = config & OFPUTIL_PC_NO_PACKET_IN ? no_packet_in_rule : miss_rule; rule_dpif_ref(*rule); } void rule_dpif_ref(struct rule_dpif *rule) { if (rule) { ofproto_rule_ref(&rule->up); } } void rule_dpif_unref(struct rule_dpif *rule) { if (rule) { ofproto_rule_unref(&rule->up); } } static void complete_operation(struct rule_dpif *rule) OVS_REQUIRES(ofproto_mutex) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); ofproto->backer->need_revalidate = REV_FLOW_TABLE; ofoperation_complete(rule->up.pending, 0); } static struct rule_dpif *rule_dpif_cast(const struct rule *rule) { return rule ? CONTAINER_OF(rule, struct rule_dpif, up) : NULL; } static struct rule * rule_alloc(void) { struct rule_dpif *rule = xmalloc(sizeof *rule); return &rule->up; } static void rule_dealloc(struct rule *rule_) { struct rule_dpif *rule = rule_dpif_cast(rule_); free(rule); } static enum ofperr rule_construct(struct rule *rule_) { struct rule_dpif *rule = rule_dpif_cast(rule_); ovs_mutex_init(&rule->stats_mutex); ovs_mutex_lock(&rule->stats_mutex); rule->packet_count = 0; rule->byte_count = 0; ovs_mutex_unlock(&rule->stats_mutex); return 0; } static void rule_insert(struct rule *rule_) OVS_REQUIRES(ofproto_mutex) { struct rule_dpif *rule = rule_dpif_cast(rule_); complete_operation(rule); } static void rule_delete(struct rule *rule_) OVS_REQUIRES(ofproto_mutex) { struct rule_dpif *rule = rule_dpif_cast(rule_); complete_operation(rule); } static void rule_destruct(struct rule *rule_) { struct rule_dpif *rule = rule_dpif_cast(rule_); ovs_mutex_destroy(&rule->stats_mutex); } static void rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes) { struct rule_dpif *rule = rule_dpif_cast(rule_); /* push_all_stats() can handle flow misses which, when using the learn * action, can cause rules to be added and deleted. This can corrupt our * caller's datastructures which assume that rule_get_stats() doesn't have * an impact on the flow table. To be safe, we disable miss handling. */ push_all_stats__(false); /* Start from historical data for 'rule' itself that are no longer tracked * in facets. This counts, for example, facets that have expired. */ ovs_mutex_lock(&rule->stats_mutex); *packets = rule->packet_count; *bytes = rule->byte_count; ovs_mutex_unlock(&rule->stats_mutex); } static void rule_dpif_execute(struct rule_dpif *rule, const struct flow *flow, struct ofpbuf *packet) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); struct dpif_flow_stats stats; struct xlate_out xout; struct xlate_in xin; dpif_flow_stats_extract(flow, packet, time_msec(), &stats); rule_dpif_credit_stats(rule, &stats); xlate_in_init(&xin, ofproto, flow, rule, stats.tcp_flags, packet); xin.resubmit_stats = &stats; xlate_actions(&xin, &xout); execute_odp_actions(ofproto, flow, xout.odp_actions.data, xout.odp_actions.size, packet); xlate_out_uninit(&xout); } static enum ofperr rule_execute(struct rule *rule, const struct flow *flow, struct ofpbuf *packet) { rule_dpif_execute(rule_dpif_cast(rule), flow, packet); ofpbuf_delete(packet); return 0; } static void rule_modify_actions(struct rule *rule_, bool reset_counters) OVS_REQUIRES(ofproto_mutex) { struct rule_dpif *rule = rule_dpif_cast(rule_); if (reset_counters) { ovs_mutex_lock(&rule->stats_mutex); rule->packet_count = 0; rule->byte_count = 0; ovs_mutex_unlock(&rule->stats_mutex); } complete_operation(rule); } /* Sends 'packet' out 'ofport'. * May modify 'packet'. * Returns 0 if successful, otherwise a positive errno value. */ static int send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); uint64_t odp_actions_stub[1024 / 8]; struct ofpbuf key, odp_actions; struct dpif_flow_stats stats; struct odputil_keybuf keybuf; struct ofpact_output output; struct xlate_out xout; struct xlate_in xin; struct flow flow; union flow_in_port in_port_; int error; ofpbuf_use_stub(&odp_actions, odp_actions_stub, sizeof odp_actions_stub); ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); /* Use OFPP_NONE as the in_port to avoid special packet processing. */ in_port_.ofp_port = OFPP_NONE; flow_extract(packet, 0, 0, NULL, &in_port_, &flow); odp_flow_key_from_flow(&key, &flow, ofp_port_to_odp_port(ofproto, OFPP_LOCAL)); dpif_flow_stats_extract(&flow, packet, time_msec(), &stats); ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output); output.port = ofport->up.ofp_port; output.max_len = 0; xlate_in_init(&xin, ofproto, &flow, NULL, 0, packet); xin.ofpacts_len = sizeof output; xin.ofpacts = &output.ofpact; xin.resubmit_stats = &stats; xlate_actions(&xin, &xout); error = dpif_execute(ofproto->backer->dpif, key.data, key.size, xout.odp_actions.data, xout.odp_actions.size, packet); xlate_out_uninit(&xout); if (error) { VLOG_WARN_RL(&rl, "%s: failed to send packet on port %s (%s)", ofproto->up.name, netdev_get_name(ofport->up.netdev), ovs_strerror(error)); } ofproto->stats.tx_packets++; ofproto->stats.tx_bytes += packet->size; return error; } /* Composes an ODP action for a "slow path" action for 'flow' within 'ofproto'. * The action will state 'slow' as the reason that the action is in the slow * path. (This is purely informational: it allows a human viewing "ovs-dpctl * dump-flows" output to see why a flow is in the slow path.) * * The 'stub_size' bytes in 'stub' will be used to store the action. * 'stub_size' must be large enough for the action. * * The action and its size will be stored in '*actionsp' and '*actions_lenp', * respectively. */ static void compose_slow_path(const struct ofproto_dpif *ofproto, const struct flow *flow, enum slow_path_reason slow, uint64_t *stub, size_t stub_size, const struct nlattr **actionsp, size_t *actions_lenp) { union user_action_cookie cookie; struct ofpbuf buf; cookie.type = USER_ACTION_COOKIE_SLOW_PATH; cookie.slow_path.unused = 0; cookie.slow_path.reason = slow; ofpbuf_use_stack(&buf, stub, stub_size); if (slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)) { uint32_t pid = dpif_port_get_pid(ofproto->backer->dpif, ODPP_NONE); odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, &buf); } else { odp_port_t odp_port; uint32_t pid; odp_port = ofp_port_to_odp_port(ofproto, flow->in_port.ofp_port); pid = dpif_port_get_pid(ofproto->backer->dpif, odp_port); odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, &buf); } *actionsp = buf.data; *actions_lenp = buf.size; } static bool set_frag_handling(struct ofproto *ofproto_, enum ofp_config_flags frag_handling) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); if (frag_handling != OFPC_FRAG_REASM) { ofproto->backer->need_revalidate = REV_RECONFIGURE; return true; } else { return false; } } static enum ofperr packet_out(struct ofproto *ofproto_, struct ofpbuf *packet, const struct flow *flow, const struct ofpact *ofpacts, size_t ofpacts_len) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct odputil_keybuf keybuf; struct dpif_flow_stats stats; struct xlate_out xout; struct xlate_in xin; struct ofpbuf key; ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); odp_flow_key_from_flow(&key, flow, ofp_port_to_odp_port(ofproto, flow->in_port.ofp_port)); dpif_flow_stats_extract(flow, packet, time_msec(), &stats); xlate_in_init(&xin, ofproto, flow, NULL, stats.tcp_flags, packet); xin.resubmit_stats = &stats; xin.ofpacts_len = ofpacts_len; xin.ofpacts = ofpacts; xlate_actions(&xin, &xout); dpif_execute(ofproto->backer->dpif, key.data, key.size, xout.odp_actions.data, xout.odp_actions.size, packet); xlate_out_uninit(&xout); return 0; } /* NetFlow. */ static int set_netflow(struct ofproto *ofproto_, const struct netflow_options *netflow_options) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); if (netflow_options) { if (!ofproto->netflow) { ofproto->netflow = netflow_create(); ofproto->backer->need_revalidate = REV_RECONFIGURE; } return netflow_set_options(ofproto->netflow, netflow_options); } else if (ofproto->netflow) { ofproto->backer->need_revalidate = REV_RECONFIGURE; netflow_destroy(ofproto->netflow); ofproto->netflow = NULL; } return 0; } static void get_netflow_ids(const struct ofproto *ofproto_, uint8_t *engine_type, uint8_t *engine_id) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); dpif_get_netflow_ids(ofproto->backer->dpif, engine_type, engine_id); } static void send_active_timeout(struct ofproto_dpif *ofproto, struct facet *facet) { if (!facet_is_controller_flow(facet) && netflow_active_timeout_expired(ofproto->netflow, &facet->nf_flow)) { struct subfacet *subfacet; struct ofexpired expired; LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) { if (subfacet->path == SF_FAST_PATH) { struct dpif_flow_stats stats; subfacet_install(subfacet, &facet->xout.odp_actions, &stats); subfacet_update_stats(subfacet, &stats); } } expired.flow = facet->flow; expired.packet_count = facet->packet_count; expired.byte_count = facet->byte_count; expired.used = facet->used; netflow_expire(ofproto->netflow, &facet->nf_flow, &expired); } } static void send_netflow_active_timeouts(struct ofproto_dpif *ofproto) { struct cls_cursor cursor; struct facet *facet; ovs_rwlock_rdlock(&ofproto->facets.rwlock); cls_cursor_init(&cursor, &ofproto->facets, NULL); CLS_CURSOR_FOR_EACH (facet, cr, &cursor) { send_active_timeout(ofproto, facet); } ovs_rwlock_unlock(&ofproto->facets.rwlock); } static struct ofproto_dpif * ofproto_dpif_lookup(const char *name) { struct ofproto_dpif *ofproto; HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_node, hash_string(name, 0), &all_ofproto_dpifs) { if (!strcmp(ofproto->up.name, name)) { return ofproto; } } return NULL; } static void ofproto_unixctl_fdb_flush(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) { struct ofproto_dpif *ofproto; if (argc > 1) { ofproto = ofproto_dpif_lookup(argv[1]); if (!ofproto) { unixctl_command_reply_error(conn, "no such bridge"); return; } ovs_rwlock_wrlock(&ofproto->ml->rwlock); mac_learning_flush(ofproto->ml); ovs_rwlock_unlock(&ofproto->ml->rwlock); } else { HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { ovs_rwlock_wrlock(&ofproto->ml->rwlock); mac_learning_flush(ofproto->ml); ovs_rwlock_unlock(&ofproto->ml->rwlock); } } unixctl_command_reply(conn, "table successfully flushed"); } static struct ofport_dpif * ofbundle_get_a_port(const struct ofbundle *bundle) { return CONTAINER_OF(list_front(&bundle->ports), struct ofport_dpif, bundle_node); } static void ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) { struct ds ds = DS_EMPTY_INITIALIZER; const struct ofproto_dpif *ofproto; const struct mac_entry *e; ofproto = ofproto_dpif_lookup(argv[1]); if (!ofproto) { unixctl_command_reply_error(conn, "no such bridge"); return; } ds_put_cstr(&ds, " port VLAN MAC Age\n"); ovs_rwlock_rdlock(&ofproto->ml->rwlock); LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) { struct ofbundle *bundle = e->port.p; char name[OFP_MAX_PORT_NAME_LEN]; ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port, name, sizeof name); ds_put_format(&ds, "%5s %4d "ETH_ADDR_FMT" %3d\n", name, e->vlan, ETH_ADDR_ARGS(e->mac), mac_entry_age(ofproto->ml, e)); } ovs_rwlock_unlock(&ofproto->ml->rwlock); unixctl_command_reply(conn, ds_cstr(&ds)); ds_destroy(&ds); } struct trace_ctx { struct xlate_out xout; struct xlate_in xin; struct flow flow; struct ds *result; }; static void trace_format_rule(struct ds *result, int level, const struct rule_dpif *rule) { struct rule_actions *actions; ovs_be64 cookie; ds_put_char_multiple(result, '\t', level); if (!rule) { ds_put_cstr(result, "No match\n"); return; } ovs_mutex_lock(&rule->up.mutex); cookie = rule->up.flow_cookie; ovs_mutex_unlock(&rule->up.mutex); ds_put_format(result, "Rule: table=%"PRIu8" cookie=%#"PRIx64" ", rule ? rule->up.table_id : 0, ntohll(cookie)); cls_rule_format(&rule->up.cr, result); ds_put_char(result, '\n'); actions = rule_dpif_get_actions(rule); ds_put_char_multiple(result, '\t', level); ds_put_cstr(result, "OpenFlow "); ofpacts_format(actions->ofpacts, actions->ofpacts_len, result); ds_put_char(result, '\n'); rule_actions_unref(actions); } static void trace_format_flow(struct ds *result, int level, const char *title, struct trace_ctx *trace) { ds_put_char_multiple(result, '\t', level); ds_put_format(result, "%s: ", title); if (flow_equal(&trace->xin.flow, &trace->flow)) { ds_put_cstr(result, "unchanged"); } else { flow_format(result, &trace->xin.flow); trace->flow = trace->xin.flow; } ds_put_char(result, '\n'); } static void trace_format_regs(struct ds *result, int level, const char *title, struct trace_ctx *trace) { size_t i; ds_put_char_multiple(result, '\t', level); ds_put_format(result, "%s:", title); for (i = 0; i < FLOW_N_REGS; i++) { ds_put_format(result, " reg%zu=0x%"PRIx32, i, trace->flow.regs[i]); } ds_put_char(result, '\n'); } static void trace_format_odp(struct ds *result, int level, const char *title, struct trace_ctx *trace) { struct ofpbuf *odp_actions = &trace->xout.odp_actions; ds_put_char_multiple(result, '\t', level); ds_put_format(result, "%s: ", title); format_odp_actions(result, odp_actions->data, odp_actions->size); ds_put_char(result, '\n'); } static void trace_resubmit(struct xlate_in *xin, struct rule_dpif *rule, int recurse) { struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin); struct ds *result = trace->result; ds_put_char(result, '\n'); trace_format_flow(result, recurse + 1, "Resubmitted flow", trace); trace_format_regs(result, recurse + 1, "Resubmitted regs", trace); trace_format_odp(result, recurse + 1, "Resubmitted odp", trace); trace_format_rule(result, recurse + 1, rule); } static void trace_report(struct xlate_in *xin, const char *s, int recurse) { struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin); struct ds *result = trace->result; ds_put_char_multiple(result, '\t', recurse); ds_put_cstr(result, s); ds_put_char(result, '\n'); } static void ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) { const struct dpif_backer *backer; struct ofproto_dpif *ofproto; struct ofpbuf odp_key, odp_mask; struct ofpbuf *packet; struct ds result; struct flow flow; char *s; packet = NULL; backer = NULL; ds_init(&result); ofpbuf_init(&odp_key, 0); ofpbuf_init(&odp_mask, 0); /* Handle "-generate" or a hex string as the last argument. */ if (!strcmp(argv[argc - 1], "-generate")) { packet = ofpbuf_new(0); argc--; } else { const char *error = eth_from_hex(argv[argc - 1], &packet); if (!error) { argc--; } else if (argc == 4) { /* The 3-argument form must end in "-generate' or a hex string. */ unixctl_command_reply_error(conn, error); goto exit; } } /* Parse the flow and determine whether a datapath or * bridge is specified. If function odp_flow_key_from_string() * returns 0, the flow is a odp_flow. If function * parse_ofp_exact_flow() returns 0, the flow is a br_flow. */ if (!odp_flow_from_string(argv[argc - 1], NULL, &odp_key, &odp_mask)) { /* If the odp_flow is the second argument, * the datapath name is the first argument. */ if (argc == 3) { const char *dp_type; if (!strncmp(argv[1], "ovs-", 4)) { dp_type = argv[1] + 4; } else { dp_type = argv[1]; } backer = shash_find_data(&all_dpif_backers, dp_type); if (!backer) { unixctl_command_reply_error(conn, "Cannot find datapath " "of this name"); goto exit; } } else { /* No datapath name specified, so there should be only one * datapath. */ struct shash_node *node; if (shash_count(&all_dpif_backers) != 1) { unixctl_command_reply_error(conn, "Must specify datapath " "name, there is more than one type of datapath"); goto exit; } node = shash_first(&all_dpif_backers); backer = node->data; } if (xlate_receive(backer, NULL, odp_key.data, odp_key.size, &flow, NULL, &ofproto, NULL)) { unixctl_command_reply_error(conn, "Invalid datapath flow"); goto exit; } ds_put_format(&result, "Bridge: %s\n", ofproto->up.name); } else if (!parse_ofp_exact_flow(&flow, argv[argc - 1])) { if (argc != 3) { unixctl_command_reply_error(conn, "Must specify bridge name"); goto exit; } ofproto = ofproto_dpif_lookup(argv[1]); if (!ofproto) { unixctl_command_reply_error(conn, "Unknown bridge name"); goto exit; } } else { unixctl_command_reply_error(conn, "Bad flow syntax"); goto exit; } /* Generate a packet, if requested. */ if (packet) { if (!packet->size) { flow_compose(packet, &flow); } else { union flow_in_port in_port_; in_port_ = flow.in_port; ds_put_cstr(&result, "Packet: "); s = ofp_packet_to_string(packet->data, packet->size); ds_put_cstr(&result, s); free(s); /* Use the metadata from the flow and the packet argument * to reconstruct the flow. */ flow_extract(packet, flow.skb_priority, flow.pkt_mark, NULL, &in_port_, &flow); } } ofproto_trace(ofproto, &flow, packet, &result); unixctl_command_reply(conn, ds_cstr(&result)); exit: ds_destroy(&result); ofpbuf_delete(packet); ofpbuf_uninit(&odp_key); ofpbuf_uninit(&odp_mask); } static void ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow, const struct ofpbuf *packet, struct ds *ds) { struct rule_dpif *rule; struct flow_wildcards wc; ds_put_cstr(ds, "Flow: "); flow_format(ds, flow); ds_put_char(ds, '\n'); flow_wildcards_init_catchall(&wc); rule_dpif_lookup(ofproto, flow, &wc, &rule); trace_format_rule(ds, 0, rule); if (rule == ofproto->miss_rule) { ds_put_cstr(ds, "\nNo match, flow generates \"packet in\"s.\n"); } else if (rule == ofproto->no_packet_in_rule) { ds_put_cstr(ds, "\nNo match, packets dropped because " "OFPPC_NO_PACKET_IN is set on in_port.\n"); } else if (rule == ofproto->drop_frags_rule) { ds_put_cstr(ds, "\nPackets dropped because they are IP fragments " "and the fragment handling mode is \"drop\".\n"); } if (rule) { uint64_t odp_actions_stub[1024 / 8]; struct ofpbuf odp_actions; struct trace_ctx trace; struct match match; uint8_t tcp_flags; tcp_flags = packet ? packet_get_tcp_flags(packet, flow) : 0; trace.result = ds; trace.flow = *flow; ofpbuf_use_stub(&odp_actions, odp_actions_stub, sizeof odp_actions_stub); xlate_in_init(&trace.xin, ofproto, flow, rule, tcp_flags, packet); trace.xin.resubmit_hook = trace_resubmit; trace.xin.report_hook = trace_report; xlate_actions(&trace.xin, &trace.xout); flow_wildcards_or(&trace.xout.wc, &trace.xout.wc, &wc); ds_put_char(ds, '\n'); trace_format_flow(ds, 0, "Final flow", &trace); match_init(&match, flow, &trace.xout.wc); ds_put_cstr(ds, "Relevant fields: "); match_format(&match, ds, OFP_DEFAULT_PRIORITY); ds_put_char(ds, '\n'); ds_put_cstr(ds, "Datapath actions: "); format_odp_actions(ds, trace.xout.odp_actions.data, trace.xout.odp_actions.size); if (trace.xout.slow) { ds_put_cstr(ds, "\nThis flow is handled by the userspace " "slow path because it:"); switch (trace.xout.slow) { case SLOW_CFM: ds_put_cstr(ds, "\n\t- Consists of CFM packets."); break; case SLOW_LACP: ds_put_cstr(ds, "\n\t- Consists of LACP packets."); break; case SLOW_STP: ds_put_cstr(ds, "\n\t- Consists of STP packets."); break; case SLOW_BFD: ds_put_cstr(ds, "\n\t- Consists of BFD packets."); break; case SLOW_CONTROLLER: ds_put_cstr(ds, "\n\t- Sends \"packet-in\" messages " "to the OpenFlow controller."); break; case __SLOW_MAX: NOT_REACHED(); } } xlate_out_uninit(&trace.xout); } rule_dpif_unref(rule); } /* Runs a self-check of flow translations in 'ofproto'. Appends a message to * 'reply' describing the results. */ static void ofproto_dpif_self_check__(struct ofproto_dpif *ofproto, struct ds *reply) { struct cls_cursor cursor; struct facet *facet; int errors; errors = 0; ovs_rwlock_rdlock(&ofproto->facets.rwlock); cls_cursor_init(&cursor, &ofproto->facets, NULL); CLS_CURSOR_FOR_EACH (facet, cr, &cursor) { if (!facet_check_consistency(facet)) { errors++; } } ovs_rwlock_unlock(&ofproto->facets.rwlock); if (errors) { ofproto->backer->need_revalidate = REV_INCONSISTENCY; } if (errors) { ds_put_format(reply, "%s: self-check failed (%d errors)\n", ofproto->up.name, errors); } else { ds_put_format(reply, "%s: self-check passed\n", ofproto->up.name); } } static void ofproto_dpif_self_check(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) { struct ds reply = DS_EMPTY_INITIALIZER; struct ofproto_dpif *ofproto; if (argc > 1) { ofproto = ofproto_dpif_lookup(argv[1]); if (!ofproto) { unixctl_command_reply_error(conn, "Unknown ofproto (use " "ofproto/list for help)"); return; } ofproto_dpif_self_check__(ofproto, &reply); } else { HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { ofproto_dpif_self_check__(ofproto, &reply); } } unixctl_command_reply(conn, ds_cstr(&reply)); ds_destroy(&reply); } /* Store the current ofprotos in 'ofproto_shash'. Returns a sorted list * of the 'ofproto_shash' nodes. It is the responsibility of the caller * to destroy 'ofproto_shash' and free the returned value. */ static const struct shash_node ** get_ofprotos(struct shash *ofproto_shash) { const struct ofproto_dpif *ofproto; HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { char *name = xasprintf("%s@%s", ofproto->up.type, ofproto->up.name); shash_add_nocopy(ofproto_shash, name, ofproto); } return shash_sort(ofproto_shash); } static void ofproto_unixctl_dpif_dump_dps(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { struct ds ds = DS_EMPTY_INITIALIZER; struct shash ofproto_shash; const struct shash_node **sorted_ofprotos; int i; shash_init(&ofproto_shash); sorted_ofprotos = get_ofprotos(&ofproto_shash); for (i = 0; i < shash_count(&ofproto_shash); i++) { const struct shash_node *node = sorted_ofprotos[i]; ds_put_format(&ds, "%s\n", node->name); } shash_destroy(&ofproto_shash); free(sorted_ofprotos); unixctl_command_reply(conn, ds_cstr(&ds)); ds_destroy(&ds); } static void show_dp_rates(struct ds *ds, const char *heading, const struct avg_subfacet_rates *rates) { ds_put_format(ds, "%s add rate: %5.3f/min, del rate: %5.3f/min\n", heading, rates->add_rate, rates->del_rate); } static void dpif_show_backer(const struct dpif_backer *backer, struct ds *ds) { const struct shash_node **ofprotos; struct ofproto_dpif *ofproto; struct shash ofproto_shash; uint64_t n_hit, n_missed; long long int minutes; size_t i; n_hit = n_missed = 0; HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { if (ofproto->backer == backer) { n_missed += ofproto->n_missed; n_hit += ofproto->n_hit; } } ds_put_format(ds, "%s: hit:%"PRIu64" missed:%"PRIu64"\n", dpif_name(backer->dpif), n_hit, n_missed); ds_put_format(ds, "\tflows: cur: %zu, avg: %u, max: %u," " life span: %lldms\n", hmap_count(&backer->subfacets), backer->avg_n_subfacet, backer->max_n_subfacet, backer->avg_subfacet_life); minutes = (time_msec() - backer->created) / (1000 * 60); if (minutes >= 60) { show_dp_rates(ds, "\thourly avg:", &backer->hourly); } if (minutes >= 60 * 24) { show_dp_rates(ds, "\tdaily avg:", &backer->daily); } show_dp_rates(ds, "\toverall avg:", &backer->lifetime); shash_init(&ofproto_shash); ofprotos = get_ofprotos(&ofproto_shash); for (i = 0; i < shash_count(&ofproto_shash); i++) { struct ofproto_dpif *ofproto = ofprotos[i]->data; const struct shash_node **ports; size_t j; if (ofproto->backer != backer) { continue; } ds_put_format(ds, "\t%s: hit:%"PRIu64" missed:%"PRIu64"\n", ofproto->up.name, ofproto->n_hit, ofproto->n_missed); ports = shash_sort(&ofproto->up.port_by_name); for (j = 0; j < shash_count(&ofproto->up.port_by_name); j++) { const struct shash_node *node = ports[j]; struct ofport *ofport = node->data; struct smap config; odp_port_t odp_port; ds_put_format(ds, "\t\t%s %u/", netdev_get_name(ofport->netdev), ofport->ofp_port); odp_port = ofp_port_to_odp_port(ofproto, ofport->ofp_port); if (odp_port != ODPP_NONE) { ds_put_format(ds, "%"PRIu32":", odp_port); } else { ds_put_cstr(ds, "none:"); } ds_put_format(ds, " (%s", netdev_get_type(ofport->netdev)); smap_init(&config); if (!netdev_get_config(ofport->netdev, &config)) { const struct smap_node **nodes; size_t i; nodes = smap_sort(&config); for (i = 0; i < smap_count(&config); i++) { const struct smap_node *node = nodes[i]; ds_put_format(ds, "%c %s=%s", i ? ',' : ':', node->key, node->value); } free(nodes); } smap_destroy(&config); ds_put_char(ds, ')'); ds_put_char(ds, '\n'); } free(ports); } shash_destroy(&ofproto_shash); free(ofprotos); } static void ofproto_unixctl_dpif_show(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { struct ds ds = DS_EMPTY_INITIALIZER; const struct shash_node **backers; int i; backers = shash_sort(&all_dpif_backers); for (i = 0; i < shash_count(&all_dpif_backers); i++) { dpif_show_backer(backers[i]->data, &ds); } free(backers); unixctl_command_reply(conn, ds_cstr(&ds)); ds_destroy(&ds); } /* Dump the megaflow (facet) cache. This is useful to check the * correctness of flow wildcarding, since the same mechanism is used for * both xlate caching and kernel wildcarding. * * It's important to note that in the output the flow description uses * OpenFlow (OFP) ports, but the actions use datapath (ODP) ports. * * This command is only needed for advanced debugging, so it's not * documented in the man page. */ static void ofproto_unixctl_dpif_dump_megaflows(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) { struct ds ds = DS_EMPTY_INITIALIZER; const struct ofproto_dpif *ofproto; long long int now = time_msec(); struct cls_cursor cursor; struct facet *facet; ofproto = ofproto_dpif_lookup(argv[1]); if (!ofproto) { unixctl_command_reply_error(conn, "no such bridge"); return; } ovs_rwlock_rdlock(&ofproto->facets.rwlock); cls_cursor_init(&cursor, &ofproto->facets, NULL); CLS_CURSOR_FOR_EACH (facet, cr, &cursor) { cls_rule_format(&facet->cr, &ds); ds_put_cstr(&ds, ", "); ds_put_format(&ds, "n_subfacets:%zu, ", list_size(&facet->subfacets)); ds_put_format(&ds, "used:%.3fs, ", (now - facet->used) / 1000.0); ds_put_cstr(&ds, "Datapath actions: "); if (facet->xout.slow) { uint64_t slow_path_stub[128 / 8]; const struct nlattr *actions; size_t actions_len; compose_slow_path(ofproto, &facet->flow, facet->xout.slow, slow_path_stub, sizeof slow_path_stub, &actions, &actions_len); format_odp_actions(&ds, actions, actions_len); } else { format_odp_actions(&ds, facet->xout.odp_actions.data, facet->xout.odp_actions.size); } ds_put_cstr(&ds, "\n"); } ovs_rwlock_unlock(&ofproto->facets.rwlock); ds_chomp(&ds, '\n'); unixctl_command_reply(conn, ds_cstr(&ds)); ds_destroy(&ds); } /* Disable using the megaflows. * * This command is only needed for advanced debugging, so it's not * documented in the man page. */ static void ofproto_unixctl_dpif_disable_megaflows(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { struct ofproto_dpif *ofproto; enable_megaflows = false; HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { flush(&ofproto->up); } unixctl_command_reply(conn, "megaflows disabled"); } /* Re-enable using megaflows. * * This command is only needed for advanced debugging, so it's not * documented in the man page. */ static void ofproto_unixctl_dpif_enable_megaflows(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { struct ofproto_dpif *ofproto; enable_megaflows = true; HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { flush(&ofproto->up); } unixctl_command_reply(conn, "megaflows enabled"); } static void ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) { struct ds ds = DS_EMPTY_INITIALIZER; const struct ofproto_dpif *ofproto; struct subfacet *subfacet; ofproto = ofproto_dpif_lookup(argv[1]); if (!ofproto) { unixctl_command_reply_error(conn, "no such bridge"); return; } update_stats(ofproto->backer); HMAP_FOR_EACH (subfacet, hmap_node, &ofproto->backer->subfacets) { struct facet *facet = subfacet->facet; struct odputil_keybuf maskbuf; struct ofpbuf mask; if (facet->ofproto != ofproto) { continue; } ofpbuf_use_stack(&mask, &maskbuf, sizeof maskbuf); if (enable_megaflows) { mask_to_netlink_attr(&mask, facet->ofproto, &facet->xout.wc.masks, &facet->flow); } odp_flow_format(subfacet->key, subfacet->key_len, mask.data, mask.size, &ds, false); ds_put_format(&ds, ", packets:%"PRIu64", bytes:%"PRIu64", used:", subfacet->dp_packet_count, subfacet->dp_byte_count); if (subfacet->used) { ds_put_format(&ds, "%.3fs", (time_msec() - subfacet->used) / 1000.0); } else { ds_put_format(&ds, "never"); } if (subfacet->facet->tcp_flags) { ds_put_cstr(&ds, ", flags:"); packet_format_tcp_flags(&ds, subfacet->facet->tcp_flags); } ds_put_cstr(&ds, ", actions:"); if (facet->xout.slow) { uint64_t slow_path_stub[128 / 8]; const struct nlattr *actions; size_t actions_len; compose_slow_path(ofproto, &facet->flow, facet->xout.slow, slow_path_stub, sizeof slow_path_stub, &actions, &actions_len); format_odp_actions(&ds, actions, actions_len); } else { format_odp_actions(&ds, facet->xout.odp_actions.data, facet->xout.odp_actions.size); } ds_put_char(&ds, '\n'); } unixctl_command_reply(conn, ds_cstr(&ds)); ds_destroy(&ds); } static void ofproto_unixctl_dpif_del_flows(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) { struct ds ds = DS_EMPTY_INITIALIZER; struct ofproto_dpif *ofproto; ofproto = ofproto_dpif_lookup(argv[1]); if (!ofproto) { unixctl_command_reply_error(conn, "no such bridge"); return; } flush(&ofproto->up); unixctl_command_reply(conn, ds_cstr(&ds)); ds_destroy(&ds); } static void ofproto_dpif_unixctl_init(void) { static bool registered; if (registered) { return; } registered = true; unixctl_command_register( "ofproto/trace", "[dp_name]|bridge odp_flow|br_flow [-generate|packet]", 1, 3, ofproto_unixctl_trace, NULL); unixctl_command_register("fdb/flush", "[bridge]", 0, 1, ofproto_unixctl_fdb_flush, NULL); unixctl_command_register("fdb/show", "bridge", 1, 1, ofproto_unixctl_fdb_show, NULL); unixctl_command_register("ofproto/self-check", "[bridge]", 0, 1, ofproto_dpif_self_check, NULL); unixctl_command_register("dpif/dump-dps", "", 0, 0, ofproto_unixctl_dpif_dump_dps, NULL); unixctl_command_register("dpif/show", "", 0, 0, ofproto_unixctl_dpif_show, NULL); unixctl_command_register("dpif/dump-flows", "bridge", 1, 1, ofproto_unixctl_dpif_dump_flows, NULL); unixctl_command_register("dpif/del-flows", "bridge", 1, 1, ofproto_unixctl_dpif_del_flows, NULL); unixctl_command_register("dpif/dump-megaflows", "bridge", 1, 1, ofproto_unixctl_dpif_dump_megaflows, NULL); unixctl_command_register("dpif/disable-megaflows", "", 0, 0, ofproto_unixctl_dpif_disable_megaflows, NULL); unixctl_command_register("dpif/enable-megaflows", "", 0, 0, ofproto_unixctl_dpif_enable_megaflows, NULL); } /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) * * This is deprecated. It is only for compatibility with broken device drivers * in old versions of Linux that do not properly support VLANs when VLAN * devices are not used. When broken device drivers are no longer in * widespread use, we will delete these interfaces. */ static int set_realdev(struct ofport *ofport_, ofp_port_t realdev_ofp_port, int vid) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto); struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); if (realdev_ofp_port == ofport->realdev_ofp_port && vid == ofport->vlandev_vid) { return 0; } ofproto->backer->need_revalidate = REV_RECONFIGURE; if (ofport->realdev_ofp_port) { vsp_remove(ofport); } if (realdev_ofp_port && ofport->bundle) { /* vlandevs are enslaved to their realdevs, so they are not allowed to * themselves be part of a bundle. */ bundle_set(ofport->up.ofproto, ofport->bundle, NULL); } ofport->realdev_ofp_port = realdev_ofp_port; ofport->vlandev_vid = vid; if (realdev_ofp_port) { vsp_add(ofport, realdev_ofp_port, vid); } return 0; } static uint32_t hash_realdev_vid(ofp_port_t realdev_ofp_port, int vid) { return hash_2words(ofp_to_u16(realdev_ofp_port), vid); } bool ofproto_has_vlan_splinters(const struct ofproto_dpif *ofproto) OVS_EXCLUDED(ofproto->vsp_mutex) { bool ret; ovs_mutex_lock(&ofproto->vsp_mutex); ret = !hmap_is_empty(&ofproto->realdev_vid_map); ovs_mutex_unlock(&ofproto->vsp_mutex); return ret; } static ofp_port_t vsp_realdev_to_vlandev__(const struct ofproto_dpif *ofproto, ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci) OVS_REQUIRES(ofproto->vsp_mutex) { if (!hmap_is_empty(&ofproto->realdev_vid_map)) { int vid = vlan_tci_to_vid(vlan_tci); const struct vlan_splinter *vsp; HMAP_FOR_EACH_WITH_HASH (vsp, realdev_vid_node, hash_realdev_vid(realdev_ofp_port, vid), &ofproto->realdev_vid_map) { if (vsp->realdev_ofp_port == realdev_ofp_port && vsp->vid == vid) { return vsp->vlandev_ofp_port; } } } return realdev_ofp_port; } /* Returns the OFP port number of the Linux VLAN device that corresponds to * 'vlan_tci' on the network device with port number 'realdev_ofp_port' in * 'struct ofport_dpif'. For example, given 'realdev_ofp_port' of eth0 and * 'vlan_tci' 9, it would return the port number of eth0.9. * * Unless VLAN splinters are enabled for port 'realdev_ofp_port', this * function just returns its 'realdev_ofp_port' argument. */ ofp_port_t vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto, ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci) OVS_EXCLUDED(ofproto->vsp_mutex) { ofp_port_t ret; ovs_mutex_lock(&ofproto->vsp_mutex); ret = vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, vlan_tci); ovs_mutex_unlock(&ofproto->vsp_mutex); return ret; } static struct vlan_splinter * vlandev_find(const struct ofproto_dpif *ofproto, ofp_port_t vlandev_ofp_port) { struct vlan_splinter *vsp; HMAP_FOR_EACH_WITH_HASH (vsp, vlandev_node, hash_ofp_port(vlandev_ofp_port), &ofproto->vlandev_map) { if (vsp->vlandev_ofp_port == vlandev_ofp_port) { return vsp; } } return NULL; } /* Returns the OpenFlow port number of the "real" device underlying the Linux * VLAN device with OpenFlow port number 'vlandev_ofp_port' and stores the * VLAN VID of the Linux VLAN device in '*vid'. For example, given * 'vlandev_ofp_port' of eth0.9, it would return the OpenFlow port number of * eth0 and store 9 in '*vid'. * * Returns 0 and does not modify '*vid' if 'vlandev_ofp_port' is not a Linux * VLAN device. Unless VLAN splinters are enabled, this is what this function * always does.*/ static ofp_port_t vsp_vlandev_to_realdev(const struct ofproto_dpif *ofproto, ofp_port_t vlandev_ofp_port, int *vid) OVS_REQUIRES(ofproto->vsp_mutex) { if (!hmap_is_empty(&ofproto->vlandev_map)) { const struct vlan_splinter *vsp; vsp = vlandev_find(ofproto, vlandev_ofp_port); if (vsp) { if (vid) { *vid = vsp->vid; } return vsp->realdev_ofp_port; } } return 0; } /* Given 'flow', a flow representing a packet received on 'ofproto', checks * whether 'flow->in_port' represents a Linux VLAN device. If so, changes * 'flow->in_port' to the "real" device backing the VLAN device, sets * 'flow->vlan_tci' to the VLAN VID, and returns true. Otherwise (which is * always the case unless VLAN splinters are enabled), returns false without * making any changes. */ bool vsp_adjust_flow(const struct ofproto_dpif *ofproto, struct flow *flow) OVS_EXCLUDED(ofproto->vsp_mutex) { ofp_port_t realdev; int vid; ovs_mutex_lock(&ofproto->vsp_mutex); realdev = vsp_vlandev_to_realdev(ofproto, flow->in_port.ofp_port, &vid); ovs_mutex_unlock(&ofproto->vsp_mutex); if (!realdev) { return false; } /* Cause the flow to be processed as if it came in on the real device with * the VLAN device's VLAN ID. */ flow->in_port.ofp_port = realdev; flow->vlan_tci = htons((vid & VLAN_VID_MASK) | VLAN_CFI); return true; } static void vsp_remove(struct ofport_dpif *port) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); struct vlan_splinter *vsp; ovs_mutex_lock(&ofproto->vsp_mutex); vsp = vlandev_find(ofproto, port->up.ofp_port); if (vsp) { hmap_remove(&ofproto->vlandev_map, &vsp->vlandev_node); hmap_remove(&ofproto->realdev_vid_map, &vsp->realdev_vid_node); free(vsp); port->realdev_ofp_port = 0; } else { VLOG_ERR("missing vlan device record"); } ovs_mutex_unlock(&ofproto->vsp_mutex); } static void vsp_add(struct ofport_dpif *port, ofp_port_t realdev_ofp_port, int vid) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); ovs_mutex_lock(&ofproto->vsp_mutex); if (!vsp_vlandev_to_realdev(ofproto, port->up.ofp_port, NULL) && (vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, htons(vid)) == realdev_ofp_port)) { struct vlan_splinter *vsp; vsp = xmalloc(sizeof *vsp); vsp->realdev_ofp_port = realdev_ofp_port; vsp->vlandev_ofp_port = port->up.ofp_port; vsp->vid = vid; port->realdev_ofp_port = realdev_ofp_port; hmap_insert(&ofproto->vlandev_map, &vsp->vlandev_node, hash_ofp_port(port->up.ofp_port)); hmap_insert(&ofproto->realdev_vid_map, &vsp->realdev_vid_node, hash_realdev_vid(realdev_ofp_port, vid)); } else { VLOG_ERR("duplicate vlan device record"); } ovs_mutex_unlock(&ofproto->vsp_mutex); } static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port) { const struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port); return ofport ? ofport->odp_port : ODPP_NONE; } struct ofport_dpif * odp_port_to_ofport(const struct dpif_backer *backer, odp_port_t odp_port) { struct ofport_dpif *port; ovs_rwlock_rdlock(&backer->odp_to_ofport_lock); HMAP_FOR_EACH_IN_BUCKET (port, odp_port_node, hash_odp_port(odp_port), &backer->odp_to_ofport_map) { if (port->odp_port == odp_port) { ovs_rwlock_unlock(&backer->odp_to_ofport_lock); return port; } } ovs_rwlock_unlock(&backer->odp_to_ofport_lock); return NULL; } static ofp_port_t odp_port_to_ofp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port) { struct ofport_dpif *port; port = odp_port_to_ofport(ofproto->backer, odp_port); if (port && &ofproto->up == port->up.ofproto) { return port->up.ofp_port; } else { return OFPP_NONE; } } /* Compute exponentially weighted moving average, adding 'new' as the newest, * most heavily weighted element. 'base' designates the rate of decay: after * 'base' further updates, 'new''s weight in the EWMA decays to about 1/e * (about .37). */ static void exp_mavg(double *avg, int base, double new) { *avg = (*avg * (base - 1) + new) / base; } static void update_moving_averages(struct dpif_backer *backer) { const int min_ms = 60 * 1000; /* milliseconds in one minute. */ long long int minutes = (time_msec() - backer->created) / min_ms; if (minutes > 0) { backer->lifetime.add_rate = (double) backer->total_subfacet_add_count / minutes; backer->lifetime.del_rate = (double) backer->total_subfacet_del_count / minutes; } else { backer->lifetime.add_rate = 0.0; backer->lifetime.del_rate = 0.0; } /* Update hourly averages on the minute boundaries. */ if (time_msec() - backer->last_minute >= min_ms) { exp_mavg(&backer->hourly.add_rate, 60, backer->subfacet_add_count); exp_mavg(&backer->hourly.del_rate, 60, backer->subfacet_del_count); /* Update daily averages on the hour boundaries. */ if ((backer->last_minute - backer->created) / min_ms % 60 == 59) { exp_mavg(&backer->daily.add_rate, 24, backer->hourly.add_rate); exp_mavg(&backer->daily.del_rate, 24, backer->hourly.del_rate); } backer->total_subfacet_add_count += backer->subfacet_add_count; backer->total_subfacet_del_count += backer->subfacet_del_count; backer->subfacet_add_count = 0; backer->subfacet_del_count = 0; backer->last_minute += min_ms; } } /* Appends a representation of 'mask' as OVS_KEY_ATTR_* attributes to 'buf'. * Ommit vlan mask if the flow's vlan was added by VLAN splinter. */ void mask_to_netlink_attr(struct ofpbuf *buf, const struct ofproto_dpif *ofproto, const struct flow *mask, const struct flow *flow) { ofp_port_t vlandev_port; struct flow orig_flow; vlandev_port = vsp_realdev_to_vlandev(ofproto, flow->in_port.ofp_port, flow->vlan_tci); if (vlandev_port != flow->in_port.ofp_port) { /* This flow was received from the VLAN port, do not * generate vlan masks. */ orig_flow = *flow; orig_flow.vlan_tci = 0; flow = &orig_flow; } odp_flow_key_from_mask(buf, mask, flow, UINT32_MAX); } const struct ofproto_class ofproto_dpif_class = { init, enumerate_types, enumerate_names, del, port_open_type, type_run, type_run_fast, type_wait, alloc, construct, destruct, dealloc, run, run_fast, wait, get_memory_usage, flush, get_features, get_tables, port_alloc, port_construct, port_destruct, port_dealloc, port_modified, port_reconfigured, port_query_by_name, port_add, port_del, port_get_stats, port_dump_start, port_dump_next, port_dump_done, port_poll, port_poll_wait, port_is_lacp_current, NULL, /* rule_choose_table */ rule_alloc, rule_construct, rule_insert, rule_delete, rule_destruct, rule_dealloc, rule_get_stats, rule_execute, rule_modify_actions, set_frag_handling, packet_out, set_netflow, get_netflow_ids, set_sflow, set_ipfix, set_cfm, get_cfm_status, set_bfd, get_bfd_status, set_stp, get_stp_status, set_stp_port, get_stp_port_status, set_queues, bundle_set, bundle_remove, mirror_set__, mirror_get_stats__, set_flood_vlans, is_mirror_output_bundle, forward_bpdu_changed, set_mac_table_config, set_realdev, NULL, /* meter_get_features */ NULL, /* meter_set */ NULL, /* meter_get */ NULL, /* meter_del */ }; openvswitch-2.0.1+git20140120/ofproto/ofproto-dpif.h000066400000000000000000000101331226605124000217210ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OFPROTO_DPIF_H #define OFPROTO_DPIF_H 1 #include #include "hmapx.h" #include "odp-util.h" #include "ofp-util.h" #include "ovs-thread.h" #include "timer.h" #include "util.h" #include "ovs-thread.h" union user_action_cookie; struct dpif_flow_stats; struct ofproto_dpif; struct ofport_dpif; struct dpif_backer; struct OVS_LOCKABLE rule_dpif; /* Ofproto-dpif -- DPIF based ofproto implementation. * * Ofproto-dpif provides an ofproto implementation for those platforms which * implement the netdev and dpif interface defined in netdev.h and dpif.h. The * most important of which is the Linux Kernel Module (dpif-linux), but * alternatives are supported such as a userspace only implementation * (dpif-netdev), and a dummy implementation used for unit testing. * * Ofproto-dpif is divided into three major chunks. * * - ofproto-dpif.c * The main ofproto-dpif module is responsible for implementing the * provider interface, installing and removing datapath flows, maintaining * packet statistics, running protocols (BFD, LACP, STP, etc), and * configuring relevant submodules. * * - ofproto-dpif-upcall.c * Ofproto-dpif-upcall is responsible for retrieving upcalls from the kernel, * processing miss upcalls, and handing more complex ones up to the main * ofproto-dpif module. Miss upcall processing boils down to figuring out * what each packet's actions are, executing them (i.e. asking the kernel to * forward it), and handing it up to ofproto-dpif to decided whether or not * to install a kernel flow. * * - ofproto-dpif-xlate.c * Ofproto-dpif-xlate is responsible for translating translating OpenFlow * actions into datapath actions. */ void rule_dpif_lookup(struct ofproto_dpif *, const struct flow *, struct flow_wildcards *, struct rule_dpif **rule); bool rule_dpif_lookup_in_table(struct ofproto_dpif *, const struct flow *, struct flow_wildcards *, uint8_t table_id, struct rule_dpif **rule); void rule_dpif_ref(struct rule_dpif *); void rule_dpif_unref(struct rule_dpif *); void rule_dpif_credit_stats(struct rule_dpif *rule , const struct dpif_flow_stats *); bool rule_dpif_fail_open(const struct rule_dpif *rule); struct rule_actions *rule_dpif_get_actions(const struct rule_dpif *); ovs_be64 rule_dpif_get_flow_cookie(const struct rule_dpif *rule); void rule_dpif_reduce_timeouts(struct rule_dpif *rule, uint16_t idle_timeout, uint16_t hard_timeout); void choose_miss_rule(enum ofputil_port_config, struct rule_dpif *miss_rule, struct rule_dpif *no_packet_in_rule, struct rule_dpif **rule); bool ofproto_has_vlan_splinters(const struct ofproto_dpif *); ofp_port_t vsp_realdev_to_vlandev(const struct ofproto_dpif *, ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci); bool vsp_adjust_flow(const struct ofproto_dpif *, struct flow *); void ofproto_dpif_send_packet_in(struct ofproto_dpif *, struct ofputil_packet_in *pin); void ofproto_dpif_flow_mod(struct ofproto_dpif *, struct ofputil_flow_mod *); void mask_to_netlink_attr(struct ofpbuf *buf, const struct ofproto_dpif *, const struct flow *mask, const struct flow *flow); struct ofport_dpif *odp_port_to_ofport(const struct dpif_backer *, odp_port_t); #endif /* ofproto-dpif.h */ openvswitch-2.0.1+git20140120/ofproto/ofproto-provider.h000066400000000000000000002205561226605124000226450ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OFPROTO_OFPROTO_PROVIDER_H #define OFPROTO_OFPROTO_PROVIDER_H 1 /* Definitions for use within ofproto. * * * Thread-safety * ============= * * Lots of ofproto data structures are only accessed from a single thread. * Those data structures are generally not thread-safe. * * The ofproto-dpif ofproto implementation accesses the flow table from * multiple threads, including modifying the flow table from multiple threads * via the "learn" action, so the flow table and various structures that index * it have been made thread-safe. Refer to comments on individual data * structures for details. */ #include "cfm.h" #include "classifier.h" #include "guarded-list.h" #include "heap.h" #include "hindex.h" #include "list.h" #include "ofp-errors.h" #include "ofp-util.h" #include "ofproto/ofproto.h" #include "ovs-atomic.h" #include "ovs-thread.h" #include "shash.h" #include "simap.h" #include "timeval.h" struct match; struct ofpact; struct ofputil_flow_mod; struct bfd_cfg; struct meter; extern struct ovs_mutex ofproto_mutex; /* An OpenFlow switch. * * With few exceptions, ofproto implementations may look at these fields but * should not modify them. */ struct ofproto { struct hmap_node hmap_node; /* In global 'all_ofprotos' hmap. */ const struct ofproto_class *ofproto_class; char *type; /* Datapath type. */ char *name; /* Datapath name. */ /* Settings. */ uint64_t fallback_dpid; /* Datapath ID if no better choice found. */ uint64_t datapath_id; /* Datapath ID. */ bool forward_bpdu; /* Option to allow forwarding of BPDU frames * when NORMAL action is invoked. */ char *mfr_desc; /* Manufacturer (NULL for default)b. */ char *hw_desc; /* Hardware (NULL for default). */ char *sw_desc; /* Software version (NULL for default). */ char *serial_desc; /* Serial number (NULL for default). */ char *dp_desc; /* Datapath description (NULL for default). */ enum ofp_config_flags frag_handling; /* One of OFPC_*. */ /* Datapath. */ struct hmap ports; /* Contains "struct ofport"s. */ struct shash port_by_name; unsigned long *ofp_port_ids;/* Bitmap of used OpenFlow port numbers. */ struct simap ofp_requests; /* OpenFlow port number requests. */ uint16_t alloc_port_no; /* Last allocated OpenFlow port number. */ uint16_t max_ports; /* Max possible OpenFlow port num, plus one. */ /* Flow tables. */ long long int eviction_group_timer; /* For rate limited reheapification. */ struct oftable *tables; int n_tables; /* Rules indexed on their cookie values, in all flow tables. */ struct hindex cookies OVS_GUARDED_BY(ofproto_mutex); /* List of expirable flows, in all flow tables. */ struct list expirable OVS_GUARDED_BY(ofproto_mutex); /* Meter table. * OpenFlow meters start at 1. To avoid confusion we leave the first * pointer in the array un-used, and index directly with the OpenFlow * meter_id. */ struct ofputil_meter_features meter_features; struct meter **meters; /* 'meter_features.max_meter' + 1 pointers. */ /* OpenFlow connections. */ struct connmgr *connmgr; /* Flow table operation tracking. * * 'state' is meaningful only within ofproto.c, one of the enum * ofproto_state constants defined there. * * 'pending' is the list of "struct ofopgroup"s currently pending. * * 'n_pending' is the number of elements in 'pending'. * * 'deletions' contains pending ofoperations of type OFOPERATION_DELETE, * indexed on its rule's flow.*/ int state; struct list pending OVS_GUARDED_BY(ofproto_mutex); unsigned int n_pending OVS_GUARDED_BY(ofproto_mutex); struct hmap deletions OVS_GUARDED_BY(ofproto_mutex); /* Delayed rule executions. * * We delay calls to ->ofproto_class->rule_execute() past releasing * ofproto_mutex during a flow_mod, because otherwise a "learn" action * triggered by the executing the packet would try to recursively modify * the flow table and reacquire the global lock. */ struct guarded_list rule_executes; /* Contains "struct rule_execute"s. */ /* Flow table operation logging. */ int n_add, n_delete, n_modify; /* Number of unreported ops of each kind. */ long long int first_op, last_op; /* Range of times for unreported ops. */ long long int next_op_report; /* Time to report ops, or LLONG_MAX. */ long long int op_backoff; /* Earliest time to report ops again. */ /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) * * This is deprecated. It is only for compatibility with broken device * drivers in old versions of Linux that do not properly support VLANs when * VLAN devices are not used. When broken device drivers are no longer in * widespread use, we will delete these interfaces. */ unsigned long int *vlan_bitmap; /* 4096-bit bitmap of in-use VLANs. */ bool vlans_changed; /* True if new VLANs are in use. */ int min_mtu; /* Current MTU of non-internal ports. */ }; void ofproto_init_tables(struct ofproto *, int n_tables); void ofproto_init_max_ports(struct ofproto *, uint16_t max_ports); struct ofproto *ofproto_lookup(const char *name); struct ofport *ofproto_get_port(const struct ofproto *, ofp_port_t ofp_port); /* An OpenFlow port within a "struct ofproto". * * With few exceptions, ofproto implementations may look at these fields but * should not modify them. */ struct ofport { struct hmap_node hmap_node; /* In struct ofproto's "ports" hmap. */ struct ofproto *ofproto; /* The ofproto that contains this port. */ struct netdev *netdev; struct ofputil_phy_port pp; ofp_port_t ofp_port; /* OpenFlow port number. */ unsigned int change_seq; long long int created; /* Time created, in msec. */ int mtu; }; void ofproto_port_set_state(struct ofport *, enum ofputil_port_state); /* OpenFlow table flags: * * - "Hidden" tables are not included in OpenFlow operations that operate on * "all tables". For example, a request for flow stats on all tables will * omit flows in hidden tables, table stats requests will omit the table * entirely, and the switch features reply will not count the hidden table. * * However, operations that specifically name the particular table still * operate on it. For example, flow_mods and flow stats requests on a * hidden table work. * * To avoid gaps in table IDs (which have unclear validity in OpenFlow), * hidden tables must be the highest-numbered tables that a provider * implements. * * - "Read-only" tables can't be changed through OpenFlow operations. (At * the moment all flow table operations go effectively through OpenFlow, so * this means that read-only tables can't be changed at all after the * read-only flag is set.) * * The generic ofproto layer never sets these flags. An ofproto provider can * set them if it is appropriate. */ enum oftable_flags { OFTABLE_HIDDEN = 1 << 0, /* Hide from most OpenFlow operations. */ OFTABLE_READONLY = 1 << 1 /* Don't allow OpenFlow to change this table. */ }; /* A flow table within a "struct ofproto". * * * Thread-safety * ============= * * A cls->rwlock read-lock holder prevents rules from being added or deleted. * * Adding or removing rules requires holding ofproto_mutex AND the cls->rwlock * write-lock. * * cls->rwlock should be held only briefly. For extended access to a rule, * increment its ref_count with ofproto_rule_ref(). A rule will not be freed * until its ref_count reaches zero. * * Modifying a rule requires the rule's own mutex. Holding cls->rwlock (for * read or write) does not allow the holder to modify the rule. * * Freeing a rule requires ofproto_mutex and the cls->rwlock write-lock. After * removing the rule from the classifier, release a ref_count from the rule * ('cls''s reference to the rule). * * Refer to the thread-safety notes on struct rule for more information.*/ struct oftable { enum oftable_flags flags; struct classifier cls; /* Contains "struct rule"s. */ char *name; /* Table name exposed via OpenFlow, or NULL. */ /* Maximum number of flows or UINT_MAX if there is no limit besides any * limit imposed by resource limitations. */ unsigned int max_flows; /* These members determine the handling of an attempt to add a flow that * would cause the table to have more than 'max_flows' flows. * * If 'eviction_fields' is NULL, overflows will be rejected with an error. * * If 'eviction_fields' is nonnull (regardless of whether n_eviction_fields * is nonzero), an overflow will cause a flow to be removed. The flow to * be removed is chosen to give fairness among groups distinguished by * different values for the subfields within 'groups'. */ struct mf_subfield *eviction_fields; size_t n_eviction_fields; /* Eviction groups. * * When a flow is added that would cause the table to have more than * 'max_flows' flows, and 'eviction_fields' is nonnull, these groups are * used to decide which rule to evict: the rule is chosen from the eviction * group that contains the greatest number of rules.*/ uint32_t eviction_group_id_basis; struct hmap eviction_groups_by_id; struct heap eviction_groups_by_size; }; /* Assigns TABLE to each oftable, in turn, in OFPROTO. * * All parameters are evaluated multiple times. */ #define OFPROTO_FOR_EACH_TABLE(TABLE, OFPROTO) \ for ((TABLE) = (OFPROTO)->tables; \ (TABLE) < &(OFPROTO)->tables[(OFPROTO)->n_tables]; \ (TABLE)++) /* An OpenFlow flow within a "struct ofproto". * * With few exceptions, ofproto implementations may look at these fields but * should not modify them. * * * Thread-safety * ============= * * Except near the beginning or ending of its lifespan, rule 'rule' belongs to * the classifier rule->ofproto->tables[rule->table_id].cls. The text below * calls this classifier 'cls'. * * Motivation * ---------- * * The thread safety rules described here for "struct rule" are motivated by * two goals: * * - Prevent threads that read members of "struct rule" from reading bad * data due to changes by some thread concurrently modifying those * members. * * - Prevent two threads making changes to members of a given "struct rule" * from interfering with each other. * * * Rules * ----- * * A rule 'rule' may be accessed without a risk of being freed by code that * holds a read-lock or write-lock on 'cls->rwlock' or that owns a reference to * 'rule->ref_count' (or both). Code that needs to hold onto a rule for a * while should take 'cls->rwlock', find the rule it needs, increment * 'rule->ref_count' with ofproto_rule_ref(), and drop 'cls->rwlock'. * * 'rule->ref_count' protects 'rule' from being freed. It doesn't protect the * rule from being deleted from 'cls' (that's 'cls->rwlock') and it doesn't * protect members of 'rule' from modification (that's 'rule->rwlock'). * * 'rule->mutex' protects the members of 'rule' from modification. It doesn't * protect the rule from being deleted from 'cls' (that's 'cls->rwlock') and it * doesn't prevent the rule from being freed (that's 'rule->ref_count'). * * Regarding thread safety, the members of a rule fall into the following * categories: * * - Immutable. These members are marked 'const'. * * - Members that may be safely read or written only by code holding * ofproto_mutex. These are marked OVS_GUARDED_BY(ofproto_mutex). * * - Members that may be safely read only by code holding ofproto_mutex or * 'rule->mutex', and safely written only by coding holding ofproto_mutex * AND 'rule->mutex'. These are marked OVS_GUARDED. */ struct rule { /* Where this rule resides in an OpenFlow switch. * * These are immutable once the rule is constructed, hence 'const'. */ struct ofproto *const ofproto; /* The ofproto that contains this rule. */ const struct cls_rule cr; /* In owning ofproto's classifier. */ const uint8_t table_id; /* Index in ofproto's 'tables' array. */ /* Protects members marked OVS_GUARDED. * Readers only need to hold this mutex. * Writers must hold both this mutex AND ofproto_mutex. */ struct ovs_mutex mutex OVS_ACQ_AFTER(ofproto_mutex); /* Number of references. * The classifier owns one reference. * Any thread trying to keep a rule from being freed should hold its own * reference. */ atomic_uint ref_count; /* Operation now in progress, if nonnull. */ struct ofoperation *pending OVS_GUARDED_BY(ofproto_mutex); /* A "flow cookie" is the OpenFlow name for a 64-bit value associated with * a flow.. */ ovs_be64 flow_cookie OVS_GUARDED; struct hindex_node cookie_node OVS_GUARDED_BY(ofproto_mutex); /* Times. */ long long int created OVS_GUARDED; /* Creation time. */ long long int modified OVS_GUARDED; /* Time of last modification. */ long long int used OVS_GUARDED; /* Last use; time created if never used. */ bool send_flow_removed; /* Send a flow removed message? */ /* Timeouts. */ uint16_t hard_timeout OVS_GUARDED; /* In seconds from ->modified. */ uint16_t idle_timeout OVS_GUARDED; /* In seconds from ->used. */ /* Eviction groups (see comment on struct eviction_group for explanation) . * * 'eviction_group' is this rule's eviction group, or NULL if it is not in * any eviction group. When 'eviction_group' is nonnull, 'evg_node' is in * the ->eviction_group->rules hmap. */ struct eviction_group *eviction_group OVS_GUARDED_BY(ofproto_mutex); struct heap_node evg_node OVS_GUARDED_BY(ofproto_mutex); /* OpenFlow actions. See struct rule_actions for more thread-safety * notes. */ struct rule_actions *actions OVS_GUARDED; /* In owning meter's 'rules' list. An empty list if there is no meter. */ struct list meter_list_node OVS_GUARDED_BY(ofproto_mutex); /* Flow monitors (e.g. for NXST_FLOW_MONITOR, related to struct ofmonitor). * * 'add_seqno' is the sequence number when this rule was created. * 'modify_seqno' is the sequence number when this rule was last modified. * See 'monitor_seqno' in connmgr.c for more information. */ enum nx_flow_monitor_flags monitor_flags OVS_GUARDED_BY(ofproto_mutex); uint64_t add_seqno OVS_GUARDED_BY(ofproto_mutex); uint64_t modify_seqno OVS_GUARDED_BY(ofproto_mutex); /* Optimisation for flow expiry. In ofproto's 'expirable' list if this * rule is expirable, otherwise empty. */ struct list expirable OVS_GUARDED_BY(ofproto_mutex); }; void ofproto_rule_ref(struct rule *); void ofproto_rule_unref(struct rule *); struct rule_actions *rule_get_actions(const struct rule *rule) OVS_EXCLUDED(rule->mutex); struct rule_actions *rule_get_actions__(const struct rule *rule) OVS_REQUIRES(rule->mutex); /* A set of actions within a "struct rule". * * * Thread-safety * ============= * * A struct rule_actions 'actions' may be accessed without a risk of being * freed by code that holds a read-lock or write-lock on 'rule->mutex' (where * 'rule' is the rule for which 'rule->actions == actions') or that owns a * reference to 'actions->ref_count' (or both). */ struct rule_actions { atomic_uint ref_count; /* These members are immutable: they do not change during the struct's * lifetime. */ struct ofpact *ofpacts; /* Sequence of "struct ofpacts". */ unsigned int ofpacts_len; /* Size of 'ofpacts', in bytes. */ uint32_t meter_id; /* Non-zero OF meter_id, or zero. */ }; struct rule_actions *rule_actions_create(const struct ofpact *, size_t); void rule_actions_ref(struct rule_actions *); void rule_actions_unref(struct rule_actions *); /* A set of rules to which an OpenFlow operation applies. */ struct rule_collection { struct rule **rules; /* The rules. */ size_t n; /* Number of rules collected. */ size_t capacity; /* Number of rules that will fit in 'rules'. */ struct rule *stub[64]; /* Preallocated rules to avoid malloc(). */ }; void rule_collection_init(struct rule_collection *); void rule_collection_add(struct rule_collection *, struct rule *); void rule_collection_ref(struct rule_collection *) OVS_REQUIRES(ofproto_mutex); void rule_collection_unref(struct rule_collection *); void rule_collection_destroy(struct rule_collection *); /* Threshold at which to begin flow table eviction. Only affects the * ofproto-dpif implementation */ extern unsigned flow_eviction_threshold; /* Number of upcall handler threads. Only affects the ofproto-dpif * implementation. */ extern unsigned n_handler_threads; /* Determines which model to use for handling misses in the ofproto-dpif * implementation */ extern enum ofproto_flow_miss_model flow_miss_model; static inline struct rule * rule_from_cls_rule(const struct cls_rule *cls_rule) { return cls_rule ? CONTAINER_OF(cls_rule, struct rule, cr) : NULL; } void ofproto_rule_expire(struct rule *rule, uint8_t reason) OVS_REQUIRES(ofproto_mutex); void ofproto_rule_delete(struct ofproto *, struct rule *) OVS_EXCLUDED(ofproto_mutex); void ofproto_rule_reduce_timeouts(struct rule *rule, uint16_t idle_timeout, uint16_t hard_timeout) OVS_EXCLUDED(ofproto_mutex); void ofoperation_complete(struct ofoperation *, enum ofperr); bool ofoperation_has_out_port(const struct ofoperation *, ofp_port_t out_port) OVS_REQUIRES(ofproto_mutex); /* ofproto class structure, to be defined by each ofproto implementation. * * * Data Structures * =============== * * These functions work primarily with three different kinds of data * structures: * * - "struct ofproto", which represents an OpenFlow switch. * * - "struct ofport", which represents a port within an ofproto. * * - "struct rule", which represents an OpenFlow flow within an ofproto. * * Each of these data structures contains all of the implementation-independent * generic state for the respective concept, called the "base" state. None of * them contains any extra space for ofproto implementations to use. Instead, * each implementation is expected to declare its own data structure that * contains an instance of the generic data structure plus additional * implementation-specific members, called the "derived" state. The * implementation can use casts or (preferably) the CONTAINER_OF macro to * obtain access to derived state given only a pointer to the embedded generic * data structure. * * * Life Cycle * ========== * * Four stylized functions accompany each of these data structures: * * "alloc" "construct" "destruct" "dealloc" * ------------ ---------------- --------------- -------------- * ofproto ->alloc ->construct ->destruct ->dealloc * ofport ->port_alloc ->port_construct ->port_destruct ->port_dealloc * rule ->rule_alloc ->rule_construct ->rule_destruct ->rule_dealloc * * "ofproto" and "ofport" have this exact life cycle. The "rule" data * structure also follow this life cycle with some additional elaborations * described under "Rule Life Cycle" below. * * Any instance of a given data structure goes through the following life * cycle: * * 1. The client calls the "alloc" function to obtain raw memory. If "alloc" * fails, skip all the other steps. * * 2. The client initializes all of the data structure's base state. If this * fails, skip to step 7. * * 3. The client calls the "construct" function. The implementation * initializes derived state. It may refer to the already-initialized * base state. If "construct" fails, skip to step 6. * * 4. The data structure is now initialized and in use. * * 5. When the data structure is no longer needed, the client calls the * "destruct" function. The implementation uninitializes derived state. * The base state has not been uninitialized yet, so the implementation * may still refer to it. * * 6. The client uninitializes all of the data structure's base state. * * 7. The client calls the "dealloc" to free the raw memory. The * implementation must not refer to base or derived state in the data * structure, because it has already been uninitialized. * * Each "alloc" function allocates and returns a new instance of the respective * data structure. The "alloc" function is not given any information about the * use of the new data structure, so it cannot perform much initialization. * Its purpose is just to ensure that the new data structure has enough room * for base and derived state. It may return a null pointer if memory is not * available, in which case none of the other functions is called. * * Each "construct" function initializes derived state in its respective data * structure. When "construct" is called, all of the base state has already * been initialized, so the "construct" function may refer to it. The * "construct" function is allowed to fail, in which case the client calls the * "dealloc" function (but not the "destruct" function). * * Each "destruct" function uninitializes and frees derived state in its * respective data structure. When "destruct" is called, the base state has * not yet been uninitialized, so the "destruct" function may refer to it. The * "destruct" function is not allowed to fail. * * Each "dealloc" function frees raw memory that was allocated by the the * "alloc" function. The memory's base and derived members might not have ever * been initialized (but if "construct" returned successfully, then it has been * "destruct"ed already). The "dealloc" function is not allowed to fail. * * * Conventions * =========== * * Most of these functions return 0 if they are successful or a positive error * code on failure. Depending on the function, valid error codes are either * errno values or OFPERR_* OpenFlow error codes. * * Most of these functions are expected to execute synchronously, that is, to * block as necessary to obtain a result. Thus, these functions may return * EAGAIN (or EWOULDBLOCK or EINPROGRESS) only where the function descriptions * explicitly say those errors are a possibility. We may relax this * requirement in the future if and when we encounter performance problems. */ struct ofproto_class { /* ## ----------------- ## */ /* ## Factory Functions ## */ /* ## ----------------- ## */ /* Initializes provider. The caller may pass in 'iface_hints', * which contains an shash of "struct iface_hint" elements indexed * by the interface's name. The provider may use these hints to * describe the startup configuration in order to reinitialize its * state. The caller owns the provided data, so a provider must * make copies of anything required. An ofproto provider must * remove any existing state that is not described by the hint, and * may choose to remove it all. */ void (*init)(const struct shash *iface_hints); /* Enumerates the types of all support ofproto types into 'types'. The * caller has already initialized 'types' and other ofproto classes might * already have added names to it. */ void (*enumerate_types)(struct sset *types); /* Enumerates the names of all existing datapath of the specified 'type' * into 'names' 'all_dps'. The caller has already initialized 'names' as * an empty sset. * * 'type' is one of the types enumerated by ->enumerate_types(). * * Returns 0 if successful, otherwise a positive errno value. */ int (*enumerate_names)(const char *type, struct sset *names); /* Deletes the datapath with the specified 'type' and 'name'. The caller * should have closed any open ofproto with this 'type' and 'name'; this * function is allowed to fail if that is not the case. * * 'type' is one of the types enumerated by ->enumerate_types(). * 'name' is one of the names enumerated by ->enumerate_names() for 'type'. * * Returns 0 if successful, otherwise a positive errno value. */ int (*del)(const char *type, const char *name); /* Returns the type to pass to netdev_open() when a datapath of type * 'datapath_type' has a port of type 'port_type', for a few special * cases when a netdev type differs from a port type. For example, * when using the userspace datapath, a port of type "internal" * needs to be opened as "tap". * * Returns either 'type' itself or a string literal, which must not * be freed. */ const char *(*port_open_type)(const char *datapath_type, const char *port_type); /* ## ------------------------ ## */ /* ## Top-Level type Functions ## */ /* ## ------------------------ ## */ /* Performs any periodic activity required on ofprotos of type * 'type'. * * An ofproto provider may implement it or not, depending on whether * it needs type-level maintenance. * * Returns 0 if successful, otherwise a positive errno value. */ int (*type_run)(const char *type); /* Performs periodic activity required on ofprotos of type 'type' * that needs to be done with the least possible latency. * * This is run multiple times per main loop. An ofproto provider may * implement it or not, according to whether it provides a performance * boost for that ofproto implementation. * * Returns 0 if successful, otherwise a positive errno value. */ int (*type_run_fast)(const char *type); /* Causes the poll loop to wake up when a type 'type''s 'run' * function needs to be called, e.g. by calling the timer or fd * waiting functions in poll-loop.h. * * An ofproto provider may implement it or not, depending on whether * it needs type-level maintenance. */ void (*type_wait)(const char *type); /* ## --------------------------- ## */ /* ## Top-Level ofproto Functions ## */ /* ## --------------------------- ## */ /* Life-cycle functions for an "ofproto" (see "Life Cycle" above). * * * Construction * ============ * * ->construct() should not modify any base members of the ofproto. The * client will initialize the ofproto's 'ports' and 'tables' members after * construction is complete. * * When ->construct() is called, the client does not yet know how many flow * tables the datapath supports, so ofproto->n_tables will be 0 and * ofproto->tables will be NULL. ->construct() should call * ofproto_init_tables() to allocate and initialize ofproto->n_tables and * ofproto->tables. Each flow table will be initially empty, so * ->construct() should delete flows from the underlying datapath, if * necessary, rather than populating the tables. * * If the ofproto knows the maximum port number that the datapath can have, * then it can call ofproto_init_max_ports(). If it does so, then the * client will ensure that the actions it allows to be used through * OpenFlow do not refer to ports above that maximum number. * * Only one ofproto instance needs to be supported for any given datapath. * If a datapath is already open as part of one "ofproto", then another * attempt to "construct" the same datapath as part of another ofproto is * allowed to fail with an error. * * ->construct() returns 0 if successful, otherwise a positive errno * value. * * * Destruction * =========== * * If 'ofproto' has any pending asynchronous operations, ->destruct() * must complete all of them by calling ofoperation_complete(). * * ->destruct() must also destroy all remaining rules in the ofproto's * tables, by passing each remaining rule to ofproto_rule_delete(), and * then complete each of those deletions in turn by calling * ofoperation_complete(). * * (Thus, there is a multi-step process for any rule currently being * inserted or modified at the beginning of destruction: first * ofoperation_complete() that operation, then ofproto_rule_delete() the * rule, then ofoperation_complete() the deletion operation.) * * The client will destroy the flow tables themselves after ->destruct() * returns. */ struct ofproto *(*alloc)(void); int (*construct)(struct ofproto *ofproto); void (*destruct)(struct ofproto *ofproto); void (*dealloc)(struct ofproto *ofproto); /* Performs any periodic activity required by 'ofproto'. It should: * * - Call connmgr_send_packet_in() for each received packet that missed * in the OpenFlow flow table or that had a OFPP_CONTROLLER output * action. * * - Call ofproto_rule_expire() for each OpenFlow flow that has reached * its hard_timeout or idle_timeout, to expire the flow. * * (But rules that are part of a pending operation, e.g. rules for * which ->pending is true, may not expire.) * * Returns 0 if successful, otherwise a positive errno value. */ int (*run)(struct ofproto *ofproto); /* Performs periodic activity required by 'ofproto' that needs to be done * with the least possible latency. * * This is run multiple times per main loop. An ofproto provider may * implement it or not, according to whether it provides a performance * boost for that ofproto implementation. */ int (*run_fast)(struct ofproto *ofproto); /* Causes the poll loop to wake up when 'ofproto''s 'run' function needs to * be called, e.g. by calling the timer or fd waiting functions in * poll-loop.h. */ void (*wait)(struct ofproto *ofproto); /* Adds some memory usage statistics for the implementation of 'ofproto' * into 'usage', for use with memory_report(). * * This function is optional. */ void (*get_memory_usage)(const struct ofproto *ofproto, struct simap *usage); /* Every "struct rule" in 'ofproto' is about to be deleted, one by one. * This function may prepare for that, for example by clearing state in * advance. It should *not* actually delete any "struct rule"s from * 'ofproto', only prepare for it. * * This function is optional; it's really just for optimization in case * it's cheaper to delete all the flows from your hardware in a single pass * than to do it one by one. */ void (*flush)(struct ofproto *ofproto); /* Helper for the OpenFlow OFPT_FEATURES_REQUEST request. * * The implementation should store true in '*arp_match_ip' if the switch * supports matching IP addresses inside ARP requests and replies, false * otherwise. * * The implementation should store in '*actions' a bitmap of the supported * OpenFlow actions. Vendor actions are not included in '*actions'. */ void (*get_features)(struct ofproto *ofproto, bool *arp_match_ip, enum ofputil_action_bitmap *actions); /* Helper for the OpenFlow OFPST_TABLE statistics request. * * The 'ots' array contains 'ofproto->n_tables' elements. Each element is * initialized as: * * - 'table_id' to the array index. * * - 'name' to "table#" where # is the table ID. * * - 'match' and 'wildcards' to OFPXMT12_MASK. * * - 'write_actions' and 'apply_actions' to OFPAT12_OUTPUT. * * - 'write_setfields' and 'apply_setfields' to OFPXMT12_MASK. * * - 'metadata_match' and 'metadata_write' to UINT64_MAX. * * - 'instructions' to OFPIT11_ALL. * * - 'config' to OFPTC11_TABLE_MISS_MASK. * * - 'max_entries' to 1,000,000. * * - 'active_count' to the classifier_count() for the table. * * - 'lookup_count' and 'matched_count' to 0. * * The implementation should update any members in each element for which * it has better values: * * - 'name' to a more meaningful name. * * - 'wildcards' to the set of wildcards actually supported by the table * (if it doesn't support all OpenFlow wildcards). * * - 'instructions' to set the instructions actually supported by * the table. * * - 'write_actions' to set the write actions actually supported by * the table (if it doesn't support all OpenFlow actions). * * - 'apply_actions' to set the apply actions actually supported by * the table (if it doesn't support all OpenFlow actions). * * - 'write_setfields' to set the write setfields actually supported by * the table. * * - 'apply_setfields' to set the apply setfields actually supported by * the table. * * - 'max_entries' to the maximum number of flows actually supported by * the hardware. * * - 'lookup_count' to the number of packets looked up in this flow table * so far. * * - 'matched_count' to the number of packets looked up in this flow * table so far that matched one of the flow entries. * * All of the members of struct ofp12_table_stats are in network byte * order. */ void (*get_tables)(struct ofproto *ofproto, struct ofp12_table_stats *ots); /* ## ---------------- ## */ /* ## ofport Functions ## */ /* ## ---------------- ## */ /* Life-cycle functions for a "struct ofport" (see "Life Cycle" above). * * ->port_construct() should not modify any base members of the ofport. * An ofproto implementation should use the 'ofp_port' member of * "struct ofport" as the OpenFlow port number. * * ofports are managed by the base ofproto code. The ofproto * implementation should only create and destroy them in response to calls * to these functions. The base ofproto code will create and destroy * ofports in the following situations: * * - Just after the ->construct() function is called, the base ofproto * iterates over all of the implementation's ports, using * ->port_dump_start() and related functions, and constructs an ofport * for each dumped port. * * - If ->port_poll() reports that a specific port has changed, then the * base ofproto will query that port with ->port_query_by_name() and * construct or destruct ofports as necessary to reflect the updated * set of ports. * * - If ->port_poll() returns ENOBUFS to report an unspecified port set * change, then the base ofproto will iterate over all of the * implementation's ports, in the same way as at ofproto * initialization, and construct and destruct ofports to reflect all of * the changes. * * ->port_construct() returns 0 if successful, otherwise a positive errno * value. */ struct ofport *(*port_alloc)(void); int (*port_construct)(struct ofport *ofport); void (*port_destruct)(struct ofport *ofport); void (*port_dealloc)(struct ofport *ofport); /* Called after 'ofport->netdev' is replaced by a new netdev object. If * the ofproto implementation uses the ofport's netdev internally, then it * should switch to using the new one. The old one has been closed. * * An ofproto implementation that doesn't need to do anything in this * function may use a null pointer. */ void (*port_modified)(struct ofport *ofport); /* Called after an OpenFlow request changes a port's configuration. * 'ofport->pp.config' contains the new configuration. 'old_config' * contains the previous configuration. * * The caller implements OFPUTIL_PC_PORT_DOWN using netdev functions to * turn NETDEV_UP on and off, so this function doesn't have to do anything * for that bit (and it won't be called if that is the only bit that * changes). */ void (*port_reconfigured)(struct ofport *ofport, enum ofputil_port_config old_config); /* Looks up a port named 'devname' in 'ofproto'. On success, initializes * '*port' appropriately. * * The caller owns the data in 'port' and must free it with * ofproto_port_destroy() when it is no longer needed. */ int (*port_query_by_name)(const struct ofproto *ofproto, const char *devname, struct ofproto_port *port); /* Attempts to add 'netdev' as a port on 'ofproto'. Returns 0 if * successful, otherwise a positive errno value. The caller should * inform the implementation of the OpenFlow port through the * ->port_construct() method. * * It doesn't matter whether the new port will be returned by a later call * to ->port_poll(); the implementation may do whatever is more * convenient. */ int (*port_add)(struct ofproto *ofproto, struct netdev *netdev); /* Deletes port number 'ofp_port' from the datapath for 'ofproto'. Returns * 0 if successful, otherwise a positive errno value. * * It doesn't matter whether the new port will be returned by a later call * to ->port_poll(); the implementation may do whatever is more * convenient. */ int (*port_del)(struct ofproto *ofproto, ofp_port_t ofp_port); /* Get port stats */ int (*port_get_stats)(const struct ofport *port, struct netdev_stats *stats); /* Port iteration functions. * * The client might not be entirely in control of the ports within an * ofproto. Some hardware implementations, for example, might have a fixed * set of ports in a datapath. For this reason, the client needs a way to * iterate through all the ports that are actually in a datapath. These * functions provide that functionality. * * The 'state' pointer provides the implementation a place to * keep track of its position. Its format is opaque to the caller. * * The ofproto provider retains ownership of the data that it stores into * ->port_dump_next()'s 'port' argument. The data must remain valid until * at least the next call to ->port_dump_next() or ->port_dump_done() for * 'state'. The caller will not modify or free it. * * Details * ======= * * ->port_dump_start() attempts to begin dumping the ports in 'ofproto'. * On success, it should return 0 and initialize '*statep' with any data * needed for iteration. On failure, returns a positive errno value, and * the client will not call ->port_dump_next() or ->port_dump_done(). * * ->port_dump_next() attempts to retrieve another port from 'ofproto' for * 'state'. If there is another port, it should store the port's * information into 'port' and return 0. It should return EOF if all ports * have already been iterated. Otherwise, on error, it should return a * positive errno value. This function will not be called again once it * returns nonzero once for a given iteration (but the 'port_dump_done' * function will be called afterward). * * ->port_dump_done() allows the implementation to release resources used * for iteration. The caller might decide to stop iteration in the middle * by calling this function before ->port_dump_next() returns nonzero. * * Usage Example * ============= * * int error; * void *state; * * error = ofproto->ofproto_class->port_dump_start(ofproto, &state); * if (!error) { * for (;;) { * struct ofproto_port port; * * error = ofproto->ofproto_class->port_dump_next( * ofproto, state, &port); * if (error) { * break; * } * // Do something with 'port' here (without modifying or freeing * // any of its data). * } * ofproto->ofproto_class->port_dump_done(ofproto, state); * } * // 'error' is now EOF (success) or a positive errno value (failure). */ int (*port_dump_start)(const struct ofproto *ofproto, void **statep); int (*port_dump_next)(const struct ofproto *ofproto, void *state, struct ofproto_port *port); int (*port_dump_done)(const struct ofproto *ofproto, void *state); /* Polls for changes in the set of ports in 'ofproto'. If the set of ports * in 'ofproto' has changed, then this function should do one of the * following: * * - Preferably: store the name of the device that was added to or deleted * from 'ofproto' in '*devnamep' and return 0. The caller is responsible * for freeing '*devnamep' (with free()) when it no longer needs it. * * - Alternatively: return ENOBUFS, without indicating the device that was * added or deleted. * * Occasional 'false positives', in which the function returns 0 while * indicating a device that was not actually added or deleted or returns * ENOBUFS without any change, are acceptable. * * The purpose of 'port_poll' is to let 'ofproto' know about changes made * externally to the 'ofproto' object, e.g. by a system administrator via * ovs-dpctl. Therefore, it's OK, and even preferable, for port_poll() to * not report changes made through calls to 'port_add' or 'port_del' on the * same 'ofproto' object. (But it's OK for it to report them too, just * slightly less efficient.) * * If the set of ports in 'ofproto' has not changed, returns EAGAIN. May * also return other positive errno values to indicate that something has * gone wrong. * * If the set of ports in a datapath is fixed, or if the only way that the * set of ports in a datapath can change is through ->port_add() and * ->port_del(), then this function may be a null pointer. */ int (*port_poll)(const struct ofproto *ofproto, char **devnamep); /* Arranges for the poll loop to wake up when ->port_poll() will return a * value other than EAGAIN. * * If the set of ports in a datapath is fixed, or if the only way that the * set of ports in a datapath can change is through ->port_add() and * ->port_del(), or if the poll loop will always wake up anyway when * ->port_poll() will return a value other than EAGAIN, then this function * may be a null pointer. */ void (*port_poll_wait)(const struct ofproto *ofproto); /* Checks the status of LACP negotiation for 'port'. Returns 1 if LACP * partner information for 'port' is up-to-date, 0 if LACP partner * information is not current (generally indicating a connectivity * problem), or -1 if LACP is not enabled on 'port'. * * This function may be a null pointer if the ofproto implementation does * not support LACP. */ int (*port_is_lacp_current)(const struct ofport *port); /* ## ----------------------- ## */ /* ## OpenFlow Rule Functions ## */ /* ## ----------------------- ## */ /* Chooses an appropriate table for 'match' within 'ofproto'. On * success, stores the table ID into '*table_idp' and returns 0. On * failure, returns an OpenFlow error code. * * The choice of table should be a function of 'match' and 'ofproto''s * datapath capabilities. It should not depend on the flows already in * 'ofproto''s flow tables. Failure implies that an OpenFlow rule with * 'match' as its matching condition can never be inserted into 'ofproto', * even starting from an empty flow table. * * If multiple tables are candidates for inserting the flow, the function * should choose one arbitrarily (but deterministically). * * If this function is NULL then table 0 is always chosen. */ enum ofperr (*rule_choose_table)(const struct ofproto *ofproto, const struct match *match, uint8_t *table_idp); /* Life-cycle functions for a "struct rule". * * * Rule Life Cycle * =============== * * The life cycle of a struct rule is an elaboration of the basic life * cycle described above under "Life Cycle". * * After a rule is successfully constructed, it is then inserted. If * insertion completes successfully, then before it is later destructed, it * is deleted. * * You can think of a rule as having the following extra steps inserted * between "Life Cycle" steps 4 and 5: * * 4.1. The client inserts the rule into the flow table, making it * visible in flow table lookups. * * 4.2. The client calls "rule_insert". Immediately or eventually, the * implementation calls ofoperation_complete() to indicate that the * insertion completed. If the operation failed, skip to step 5. * * 4.3. The rule is now installed in the flow table. Eventually it will * be deleted. * * 4.4. The client removes the rule from the flow table. It is no longer * visible in flow table lookups. * * 4.5. The client calls "rule_delete". Immediately or eventually, the * implementation calls ofoperation_complete() to indicate that the * deletion completed. Deletion is not allowed to fail, so it must * be successful. * * * Asynchronous Operation Support * ============================== * * The "insert" and "delete" life-cycle operations on rules can operate * asynchronously, meaning that ->rule_insert() and ->rule_delete() only * need to initiate their respective operations and do not need to wait for * them to complete before they return. ->rule_modify_actions() also * operates asynchronously. * * An ofproto implementation reports the success or failure of an * asynchronous operation on a rule using the rule's 'pending' member, * which points to a opaque "struct ofoperation" that represents the * ongoing operation. When the operation completes, the ofproto * implementation calls ofoperation_complete(), passing the ofoperation and * an error indication. * * Only the following contexts may call ofoperation_complete(): * * - The function called to initiate the operation, e.g. ->rule_insert() * or ->rule_delete(). This is the best choice if the operation * completes quickly. * * - The implementation's ->run() function. * * - The implementation's ->destruct() function. * * The ofproto base code updates the flow table optimistically, assuming * that the operation will probably succeed: * * - ofproto adds the rule in the flow table before calling * ->rule_insert(). * * - ofproto updates the rule's actions and other properties before * calling ->rule_modify_actions(). * * - ofproto removes the rule before calling ->rule_delete(). * * With one exception, when an asynchronous operation completes with an * error, ofoperation_complete() backs out the already applied changes: * * - If adding a rule in the flow table fails, ofproto removes the new * rule. * * - If modifying a rule fails, ofproto restores the original actions * (and other properties). * * - Removing a rule is not allowed to fail. It must always succeed. * * The ofproto base code serializes operations: if any operation is in * progress on a given rule, ofproto postpones initiating any new operation * on that rule until the pending operation completes. Therefore, every * operation must eventually complete through a call to * ofoperation_complete() to avoid delaying new operations indefinitely * (including any OpenFlow request that affects the rule in question, even * just to query its statistics). * * * Construction * ============ * * When ->rule_construct() is called, 'rule' is a new rule that is not yet * inserted into a flow table. ->rule_construct() should initialize enough * of the rule's derived state for 'rule' to be suitable for inserting into * a flow table. ->rule_construct() should not modify any base members of * struct rule. * * If ->rule_construct() fails (as indicated by returning a nonzero * OpenFlow error code), the ofproto base code will uninitialize and * deallocate 'rule'. See "Rule Life Cycle" above for more details. * * ->rule_construct() may also: * * - Validate that the datapath supports the matching rule in 'rule->cr' * datapath. For example, if the rule's table does not support * registers, then it is an error if 'rule->cr' does not wildcard all * registers. * * - Validate that the datapath can correctly implement 'rule->ofpacts'. * * Some implementations might need to defer these tasks to ->rule_insert(), * which is also acceptable. * * * Insertion * ========= * * Following successful construction, the ofproto base case inserts 'rule' * into its flow table, then it calls ->rule_insert(). ->rule_insert() * should set in motion adding the new rule to the datapath flow table. It * must act as follows: * * - If it completes insertion, either by succeeding or failing, it must * call ofoperation_complete() * * - If insertion is only partially complete, then it must return without * calling ofoperation_complete(). Later, when the insertion is * complete, the ->run() or ->destruct() function must call * ofoperation_complete() to report success or failure. * * If ->rule_insert() fails, the ofproto base code will remove 'rule' from * the flow table, destruct, uninitialize, and deallocate 'rule'. See * "Rule Life Cycle" above for more details. * * * Deletion * ======== * * The ofproto base code removes 'rule' from its flow table before it calls * ->rule_delete(). ->rule_delete() should set in motion removing 'rule' * from the datapath flow table. It must act as follows: * * - If it completes deletion, it must call ofoperation_complete(). * * - If deletion is only partially complete, then it must return without * calling ofoperation_complete(). Later, when the deletion is * complete, the ->run() or ->destruct() function must call * ofoperation_complete(). * * Rule deletion must not fail. * * * Destruction * =========== * * ->rule_destruct() must uninitialize derived state. * * Rule destruction must not fail. */ struct rule *(*rule_alloc)(void); enum ofperr (*rule_construct)(struct rule *rule) /* OVS_REQUIRES(ofproto_mutex) */; void (*rule_insert)(struct rule *rule) /* OVS_REQUIRES(ofproto_mutex) */; void (*rule_delete)(struct rule *rule) /* OVS_REQUIRES(ofproto_mutex) */; void (*rule_destruct)(struct rule *rule); void (*rule_dealloc)(struct rule *rule); /* Obtains statistics for 'rule', storing the number of packets that have * matched it in '*packet_count' and the number of bytes in those packets * in '*byte_count'. UINT64_MAX indicates that the packet count or byte * count is unknown. */ void (*rule_get_stats)(struct rule *rule, uint64_t *packet_count, uint64_t *byte_count) /* OVS_EXCLUDED(ofproto_mutex) */; /* Applies the actions in 'rule' to 'packet'. (This implements sending * buffered packets for OpenFlow OFPT_FLOW_MOD commands.) * * Takes ownership of 'packet' (so it should eventually free it, with * ofpbuf_delete()). * * 'flow' reflects the flow information for 'packet'. All of the * information in 'flow' is extracted from 'packet', except for * flow->tunnel and flow->in_port, which are assigned the correct values * for the incoming packet. The register values are zeroed. 'packet''s * header pointers (e.g. packet->l3) are appropriately initialized. * packet->l3 is aligned on a 32-bit boundary. * * The implementation should add the statistics for 'packet' into 'rule'. * * Returns 0 if successful, otherwise an OpenFlow error code. */ enum ofperr (*rule_execute)(struct rule *rule, const struct flow *flow, struct ofpbuf *packet); /* When ->rule_modify_actions() is called, the caller has already replaced * the OpenFlow actions in 'rule' by a new set. (The original actions are * in rule->pending->actions.) * * ->rule_modify_actions() should set the following in motion: * * - Validate that the datapath can correctly implement the actions now * in 'rule'. * * - Update the datapath flow table with the new actions. * * - Only if 'reset_counters' is true, reset any packet or byte counters * associated with the rule to zero, so that rule_get_stats() will not * longer count those packets or bytes. * * If the operation synchronously completes, ->rule_modify_actions() may * call ofoperation_complete() before it returns. Otherwise, ->run() * should call ofoperation_complete() later, after the operation does * complete. * * If the operation fails, then the base ofproto code will restore the * original 'actions' and 'n_actions' of 'rule'. * * ->rule_modify_actions() should not modify any base members of struct * rule. */ void (*rule_modify_actions)(struct rule *rule, bool reset_counters) /* OVS_REQUIRES(ofproto_mutex) */; /* Changes the OpenFlow IP fragment handling policy to 'frag_handling', * which takes one of the following values, with the corresponding * meanings: * * - OFPC_FRAG_NORMAL: The switch should treat IP fragments the same way * as other packets, omitting TCP and UDP port numbers (always setting * them to 0). * * - OFPC_FRAG_DROP: The switch should drop all IP fragments without * passing them through the flow table. * * - OFPC_FRAG_REASM: The switch should reassemble IP fragments before * passing packets through the flow table. * * - OFPC_FRAG_NX_MATCH (a Nicira extension): Similar to OFPC_FRAG_NORMAL, * except that TCP and UDP port numbers should be included in fragments * with offset 0. * * Implementations are not required to support every mode. * OFPC_FRAG_NORMAL is the default mode when an ofproto is created. * * At the time of the call to ->set_frag_handling(), the current mode is * available in 'ofproto->frag_handling'. ->set_frag_handling() returns * true if the requested mode was set, false if it is not supported. * * Upon successful return, the caller changes 'ofproto->frag_handling' to * reflect the new mode. */ bool (*set_frag_handling)(struct ofproto *ofproto, enum ofp_config_flags frag_handling); /* Implements the OpenFlow OFPT_PACKET_OUT command. The datapath should * execute the 'ofpacts_len' bytes of "struct ofpacts" in 'ofpacts'. * * The caller retains ownership of 'packet' and of 'ofpacts', so * ->packet_out() should not modify or free them. * * This function must validate that it can correctly implement 'ofpacts'. * If not, then it should return an OpenFlow error code. * * 'flow' reflects the flow information for 'packet'. All of the * information in 'flow' is extracted from 'packet', except for * flow->in_port (see below). flow->tunnel and its register values are * zeroed. * * flow->in_port comes from the OpenFlow OFPT_PACKET_OUT message. The * implementation should reject invalid flow->in_port values by returning * OFPERR_OFPBRC_BAD_PORT. (If the implementation called * ofproto_init_max_ports(), then the client will reject these ports * itself.) For consistency, the implementation should consider valid for * flow->in_port any value that could possibly be seen in a packet that it * passes to connmgr_send_packet_in(). Ideally, even an implementation * that never generates packet-ins (e.g. due to hardware limitations) * should still allow flow->in_port values for every possible physical port * and OFPP_LOCAL. The only virtual ports (those above OFPP_MAX) that the * caller will ever pass in as flow->in_port, other than OFPP_LOCAL, are * OFPP_NONE and OFPP_CONTROLLER. The implementation should allow both of * these, treating each of them as packets generated by the controller as * opposed to packets originating from some switch port. * * (Ordinarily the only effect of flow->in_port is on output actions that * involve the input port, such as actions that output to OFPP_IN_PORT, * OFPP_FLOOD, or OFPP_ALL. flow->in_port can also affect Nicira extension * "resubmit" actions.) * * 'packet' is not matched against the OpenFlow flow table, so its * statistics should not be included in OpenFlow flow statistics. * * Returns 0 if successful, otherwise an OpenFlow error code. */ enum ofperr (*packet_out)(struct ofproto *ofproto, struct ofpbuf *packet, const struct flow *flow, const struct ofpact *ofpacts, size_t ofpacts_len); /* ## ------------------------- ## */ /* ## OFPP_NORMAL configuration ## */ /* ## ------------------------- ## */ /* Configures NetFlow on 'ofproto' according to the options in * 'netflow_options', or turns off NetFlow if 'netflow_options' is NULL. * * EOPNOTSUPP as a return value indicates that 'ofproto' does not support * NetFlow, as does a null pointer. */ int (*set_netflow)(struct ofproto *ofproto, const struct netflow_options *netflow_options); void (*get_netflow_ids)(const struct ofproto *ofproto, uint8_t *engine_type, uint8_t *engine_id); /* Configures sFlow on 'ofproto' according to the options in * 'sflow_options', or turns off sFlow if 'sflow_options' is NULL. * * EOPNOTSUPP as a return value indicates that 'ofproto' does not support * sFlow, as does a null pointer. */ int (*set_sflow)(struct ofproto *ofproto, const struct ofproto_sflow_options *sflow_options); /* Configures IPFIX on 'ofproto' according to the options in * 'bridge_exporter_options' and the 'flow_exporters_options' * array, or turns off IPFIX if 'bridge_exporter_options' and * 'flow_exporters_options' is NULL. * * EOPNOTSUPP as a return value indicates that 'ofproto' does not support * IPFIX, as does a null pointer. */ int (*set_ipfix)( struct ofproto *ofproto, const struct ofproto_ipfix_bridge_exporter_options *bridge_exporter_options, const struct ofproto_ipfix_flow_exporter_options *flow_exporters_options, size_t n_flow_exporters_options); /* Configures connectivity fault management on 'ofport'. * * If 'cfm_settings' is nonnull, configures CFM according to its members. * * If 'cfm_settings' is null, removes any connectivity fault management * configuration from 'ofport'. * * EOPNOTSUPP as a return value indicates that this ofproto_class does not * support CFM, as does a null pointer. */ int (*set_cfm)(struct ofport *ofport, const struct cfm_settings *s); /* Checks the status of CFM configured on 'ofport'. Returns true if the * port's CFM status was successfully stored into '*status'. Returns false * if the port did not have CFM configured, in which case '*status' is * indeterminate. * * The caller must provide and owns '*status', but it does not own and must * not modify or free the array returned in 'status->rmps'. */ bool (*get_cfm_status)(const struct ofport *ofport, struct ofproto_cfm_status *status); /* Configures BFD on 'ofport'. * * If 'cfg' is NULL, or 'cfg' does not contain the key value pair * "enable=true", removes BFD from 'ofport'. Otherwise, configures BFD * according to 'cfg'. * * EOPNOTSUPP as a return value indicates that this ofproto_class does not * support BFD, as does a null pointer. */ int (*set_bfd)(struct ofport *ofport, const struct smap *cfg); /* Populates 'smap' with the status of BFD on 'ofport'. Returns 0 on * success, or a positive errno. EOPNOTSUPP as a return value indicates * that this ofproto_class does not support BFD, as does a null pointer. */ int (*get_bfd_status)(struct ofport *ofport, struct smap *smap); /* Configures spanning tree protocol (STP) on 'ofproto' using the * settings defined in 's'. * * If 's' is nonnull, configures STP according to its members. * * If 's' is null, removes any STP configuration from 'ofproto'. * * EOPNOTSUPP as a return value indicates that this ofproto_class does not * support STP, as does a null pointer. */ int (*set_stp)(struct ofproto *ofproto, const struct ofproto_stp_settings *s); /* Retrieves state of spanning tree protocol (STP) on 'ofproto'. * * Stores STP state for 'ofproto' in 's'. If the 'enabled' member * is false, the other member values are not meaningful. * * EOPNOTSUPP as a return value indicates that this ofproto_class does not * support STP, as does a null pointer. */ int (*get_stp_status)(struct ofproto *ofproto, struct ofproto_stp_status *s); /* Configures spanning tree protocol (STP) on 'ofport' using the * settings defined in 's'. * * If 's' is nonnull, configures STP according to its members. The * caller is responsible for assigning STP port numbers (using the * 'port_num' member in the range of 1 through 255, inclusive) and * ensuring there are no duplicates. * * If 's' is null, removes any STP configuration from 'ofport'. * * EOPNOTSUPP as a return value indicates that this ofproto_class does not * support STP, as does a null pointer. */ int (*set_stp_port)(struct ofport *ofport, const struct ofproto_port_stp_settings *s); /* Retrieves spanning tree protocol (STP) port status of 'ofport'. * * Stores STP state for 'ofport' in 's'. If the 'enabled' member is * false, the other member values are not meaningful. * * EOPNOTSUPP as a return value indicates that this ofproto_class does not * support STP, as does a null pointer. */ int (*get_stp_port_status)(struct ofport *ofport, struct ofproto_port_stp_status *s); /* Registers meta-data associated with the 'n_qdscp' Qualities of Service * 'queues' attached to 'ofport'. This data is not intended to be * sufficient to implement QoS. Instead, providers may use this * information to implement features which require knowledge of what queues * exist on a port, and some basic information about them. * * EOPNOTSUPP as a return value indicates that this ofproto_class does not * support QoS, as does a null pointer. */ int (*set_queues)(struct ofport *ofport, const struct ofproto_port_queue *queues, size_t n_qdscp); /* If 's' is nonnull, this function registers a "bundle" associated with * client data pointer 'aux' in 'ofproto'. A bundle is the same concept as * a Port in OVSDB, that is, it consists of one or more "slave" devices * (Interfaces, in OVSDB) along with VLAN and LACP configuration and, if * there is more than one slave, a bonding configuration. If 'aux' is * already registered then this function updates its configuration to 's'. * Otherwise, this function registers a new bundle. * * If 's' is NULL, this function unregisters the bundle registered on * 'ofproto' associated with client data pointer 'aux'. If no such bundle * has been registered, this has no effect. * * This function affects only the behavior of the NXAST_AUTOPATH action and * output to the OFPP_NORMAL port. An implementation that does not support * it at all may set it to NULL or return EOPNOTSUPP. An implementation * that supports only a subset of the functionality should implement what * it can and return 0. */ int (*bundle_set)(struct ofproto *ofproto, void *aux, const struct ofproto_bundle_settings *s); /* If 'port' is part of any bundle, removes it from that bundle. If the * bundle now has no ports, deletes the bundle. If the bundle now has only * one port, deconfigures the bundle's bonding configuration. */ void (*bundle_remove)(struct ofport *ofport); /* If 's' is nonnull, this function registers a mirror associated with * client data pointer 'aux' in 'ofproto'. A mirror is the same concept as * a Mirror in OVSDB. If 'aux' is already registered then this function * updates its configuration to 's'. Otherwise, this function registers a * new mirror. * * If 's' is NULL, this function unregisters the mirror registered on * 'ofproto' associated with client data pointer 'aux'. If no such mirror * has been registered, this has no effect. * * An implementation that does not support mirroring at all may set * it to NULL or return EOPNOTSUPP. An implementation that supports * only a subset of the functionality should implement what it can * and return 0. */ int (*mirror_set)(struct ofproto *ofproto, void *aux, const struct ofproto_mirror_settings *s); /* Retrieves statistics from mirror associated with client data * pointer 'aux' in 'ofproto'. Stores packet and byte counts in * 'packets' and 'bytes', respectively. If a particular counter is * not supported, the appropriate argument is set to UINT64_MAX. * * EOPNOTSUPP as a return value indicates that this ofproto_class does not * support retrieving mirror statistics. */ int (*mirror_get_stats)(struct ofproto *ofproto, void *aux, uint64_t *packets, uint64_t *bytes); /* Configures the VLANs whose bits are set to 1 in 'flood_vlans' as VLANs * on which all packets are flooded, instead of using MAC learning. If * 'flood_vlans' is NULL, then MAC learning applies to all VLANs. * * This function affects only the behavior of the OFPP_NORMAL action. An * implementation that does not support it may set it to NULL or return * EOPNOTSUPP. */ int (*set_flood_vlans)(struct ofproto *ofproto, unsigned long *flood_vlans); /* Returns true if 'aux' is a registered bundle that is currently in use as * the output for a mirror. */ bool (*is_mirror_output_bundle)(const struct ofproto *ofproto, void *aux); /* When the configuration option of forward_bpdu changes, this function * will be invoked. */ void (*forward_bpdu_changed)(struct ofproto *ofproto); /* Sets the MAC aging timeout for the OFPP_NORMAL action to 'idle_time', in * seconds, and the maximum number of MAC table entries to * 'max_entries'. * * An implementation that doesn't support configuring these features may * set this function to NULL or implement it as a no-op. */ void (*set_mac_table_config)(struct ofproto *ofproto, unsigned int idle_time, size_t max_entries); /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) * * This is deprecated. It is only for compatibility with broken device drivers * in old versions of Linux that do not properly support VLANs when VLAN * devices are not used. When broken device drivers are no longer in * widespread use, we will delete these interfaces. */ /* If 'realdev_ofp_port' is nonzero, then this function configures 'ofport' * as a VLAN splinter port for VLAN 'vid', associated with the real device * that has OpenFlow port number 'realdev_ofp_port'. * * If 'realdev_ofp_port' is zero, then this function deconfigures 'ofport' * as a VLAN splinter port. * * This function should be NULL if an implementation does not support it. */ int (*set_realdev)(struct ofport *ofport, ofp_port_t realdev_ofp_port, int vid); /* ## ------------------------ ## */ /* ## OpenFlow meter functions ## */ /* ## ------------------------ ## */ /* These functions should be NULL if an implementation does not support * them. They must be all null or all non-null.. */ /* Initializes 'features' to describe the metering features supported by * 'ofproto'. */ void (*meter_get_features)(const struct ofproto *ofproto, struct ofputil_meter_features *features); /* If '*id' is UINT32_MAX, adds a new meter with the given 'config'. On * success the function must store a provider meter ID other than * UINT32_MAX in '*id'. All further references to the meter will be made * with the returned provider meter id rather than the OpenFlow meter id. * The caller does not try to interpret the provider meter id, giving the * implementation the freedom to either use the OpenFlow meter_id value * provided in the meter configuration, or any other value suitable for the * implementation. * * If '*id' is a value other than UINT32_MAX, modifies the existing meter * with that meter provider ID to have configuration 'config'. On failure, * the existing meter configuration is left intact. Regardless of success, * any change to '*id' updates the provider meter id used for this * meter. */ enum ofperr (*meter_set)(struct ofproto *ofproto, ofproto_meter_id *id, const struct ofputil_meter_config *config); /* Gets the meter and meter band packet and byte counts for maximum of * 'stats->n_bands' bands for the meter with provider ID 'id' within * 'ofproto'. The caller fills in the other stats values. The band stats * are copied to memory at 'stats->bands' provided by the caller. The * number of returned band stats is returned in 'stats->n_bands'. */ enum ofperr (*meter_get)(const struct ofproto *ofproto, ofproto_meter_id id, struct ofputil_meter_stats *stats); /* Deletes a meter, making the 'ofproto_meter_id' invalid for any * further calls. */ void (*meter_del)(struct ofproto *, ofproto_meter_id); }; extern const struct ofproto_class ofproto_dpif_class; int ofproto_class_register(const struct ofproto_class *); int ofproto_class_unregister(const struct ofproto_class *); /* ofproto_flow_mod() returns this value if the flow_mod could not be processed * because it overlaps with an ongoing flow table operation that has not yet * completed. The caller should retry the operation later. * * ofproto.c also uses this value internally for additional (similar) purposes. * * This particular value is a good choice because it is large, so that it does * not collide with any errno value, but not large enough to collide with an * OFPERR_* value. */ enum { OFPROTO_POSTPONE = 1 << 16 }; BUILD_ASSERT_DECL(OFPROTO_POSTPONE < OFPERR_OFS); int ofproto_flow_mod(struct ofproto *, struct ofputil_flow_mod *) OVS_EXCLUDED(ofproto_mutex); void ofproto_add_flow(struct ofproto *, const struct match *, unsigned int priority, const struct ofpact *ofpacts, size_t ofpacts_len) OVS_EXCLUDED(ofproto_mutex); bool ofproto_delete_flow(struct ofproto *, const struct match *, unsigned int priority) OVS_EXCLUDED(ofproto_mutex); void ofproto_flush_flows(struct ofproto *); #endif /* ofproto/ofproto-provider.h */ openvswitch-2.0.1+git20140120/ofproto/ofproto-unixctl.man000066400000000000000000000104351226605124000230160ustar00rootroot00000000000000.SS "OFPROTO COMMANDS" These commands manage the core OpenFlow switch implementation (called \fBofproto\fR). . .IP "\fBofproto/list\fR" Lists the names of the running ofproto instances. These are the names that may be used on \fBofproto/trace\fR. . .IP "\fBofproto/trace\fR [\fIdpname\fR] \fIodp_flow\fR [\fB\-generate \fR| \ \fIpacket\fR]" .IQ "\fBofproto/trace\fR \fIbridge\fR \fIbr_flow\fR \ [\fB\-generate \fR| \fIpacket\fR]" Traces the path of an imaginary packet through \fIswitch\fR and reports the path that it took. The packet's headers (e.g. source and destination) and metadata (e.g. input port), together called its ``flow,'' are usually all that matter for this purpose. You can specify the flow in the following ways: . .RS .IP "\fIdpname\fR \fIodp_flow\fR" \fIodp_flow\fR is a flow in the form printed by \fBovs\-dpctl\fR(8)'s \fBdump\-flows\fR command. If all of your bridges have the same type, which is the common case, then you can omit \fIdpname\fR, but if you have bridges of different types (say, both \fBovs-netdev\fR and \fBovs-system\fR), then you need to specify a \fIdpname\fR to disambiguate. . .IP "\fIbridge\fR \fIbr_flow\fR" \fIbr_flow\fR is a flow in the form similar to that accepted by \fBovs\-ofctl\fR(8)'s \fBadd\-flow\fR command. (This is not an OpenFlow flow: besides other differences, it never contains wildcards.) \fIbridge\fR names of the bridge through which \fIbr_flow\fR should be traced. .RE . .IP Most commonly, one specifies only a flow, using one of the forms above, but sometimes one might need to specify an actual packet instead of just a flow: . .RS .IP "Side effects." Some actions have side effects. For example, the \fBnormal\fR action can update the MAC learning table, and the \fBlearn\fR action can change OpenFlow tables. \fBofproto/trace\fR only performs side effects when a packet is specified. If you want side effects to take place, then you must supply a packet. . .IP (Output actions are obviously side effects too, but \fBofproto/trace\fR never executes them, even when one specifies a packet.) . .IP "Incomplete information." Most of the time, Open vSwitch can figure out everything about the path of a packet using just the flow, but in some special circumstances it needs to look at parts of the packet that are not included in the flow. When this is the case, and you do not supply a packet, then \fBofproto/trace\fR will tell you it needs a packet. .RE . .IP If you wish to include a packet as part of the \fBofproto/trace\fR operation, there are two ways to do it: . .RS .IP \fB\-generate\fR This option, added to one of the ways to specify a flow already described, causes Open vSwitch to internally generate a packet with the flow described and then to use that packet. If your goal is to execute side effects, then \fB\-generate\fR is the easiest way to do it, but \fB\-generate\fR is not a good way to fill in incomplete information, because it generates packets based on only the flow information, which means that the packets really do not have any more information than the flow. . .IP \fIpacket\fR This form supplies an explicit \fIpacket\fR as a sequence of hex digits. An Ethernet frame is at least 14 bytes long, so there must be at least 28 hex digits. Obviously, it is inconvenient to type in the hex digits by hand, so the \fBovs\-pcap\fR(1) and \fBovs\-tcpundump\fR(1) utilities provide easier ways. .IP With this form, packet headers are extracted directly from \fIpacket\fR, so the \fIodp_flow\fR or \fIbr_flow\fR should specify only metadata. The metadata can be: .RS .IP \fIskb_priority\fR Packet QoS priority. .IP \fIpkt_mark\fR Mark of the packet. .IP \fItun_id\fR The tunnel ID on which the packet arrived. .IP \fIin_port\fR The port on which the packet arrived. .RE . The in_port value is kernel datapath port number for the first format and OpenFlow port number for the second format. The numbering of these two types of port usually differs and there is no relationship. .RE .IP "\fBofproto/self\-check\fR [\fIswitch\fR]" Runs an internal consistency check on \fIswitch\fR, if specified, otherwise on all ofproto instances, and responds with a brief summary of the results. If the summary reports any errors, then the Open vSwitch logs should contain more detailed information. Please pass along errors reported by this command to the Open vSwitch developers as bugs. openvswitch-2.0.1+git20140120/ofproto/ofproto.c000066400000000000000000005743271226605124000210200ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * Copyright (c) 2010 Jean Tourrilhes - HP-Labs. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ofproto.h" #include #include #include #include #include #include "bitmap.h" #include "byte-order.h" #include "classifier.h" #include "connmgr.h" #include "coverage.h" #include "dynamic-string.h" #include "hash.h" #include "hmap.h" #include "meta-flow.h" #include "netdev.h" #include "nx-match.h" #include "ofp-actions.h" #include "ofp-errors.h" #include "ofp-msgs.h" #include "ofp-print.h" #include "ofp-util.h" #include "ofpbuf.h" #include "ofproto-provider.h" #include "openflow/nicira-ext.h" #include "openflow/openflow.h" #include "packets.h" #include "pinsched.h" #include "pktbuf.h" #include "poll-loop.h" #include "random.h" #include "shash.h" #include "simap.h" #include "sset.h" #include "timeval.h" #include "unaligned.h" #include "unixctl.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ofproto); COVERAGE_DEFINE(ofproto_error); COVERAGE_DEFINE(ofproto_flush); COVERAGE_DEFINE(ofproto_no_packet_in); COVERAGE_DEFINE(ofproto_packet_out); COVERAGE_DEFINE(ofproto_queue_req); COVERAGE_DEFINE(ofproto_recv_openflow); COVERAGE_DEFINE(ofproto_reinit_ports); COVERAGE_DEFINE(ofproto_uninstallable); COVERAGE_DEFINE(ofproto_update_port); enum ofproto_state { S_OPENFLOW, /* Processing OpenFlow commands. */ S_EVICT, /* Evicting flows from over-limit tables. */ S_FLUSH, /* Deleting all flow table rules. */ }; enum ofoperation_type { OFOPERATION_ADD, OFOPERATION_DELETE, OFOPERATION_MODIFY, OFOPERATION_REPLACE }; /* A single OpenFlow request can execute any number of operations. The * ofopgroup maintain OpenFlow state common to all of the operations, e.g. the * ofconn to which an error reply should be sent if necessary. * * ofproto initiates some operations internally. These operations are still * assigned to groups but will not have an associated ofconn. */ struct ofopgroup { struct ofproto *ofproto; /* Owning ofproto. */ struct list ofproto_node; /* In ofproto's "pending" list. */ struct list ops; /* List of "struct ofoperation"s. */ int n_running; /* Number of ops still pending. */ /* Data needed to send OpenFlow reply on failure or to send a buffered * packet on success. * * If list_is_empty(ofconn_node) then this ofopgroup never had an * associated ofconn or its ofconn's connection dropped after it initiated * the operation. In the latter case 'ofconn' is a wild pointer that * refers to freed memory, so the 'ofconn' member must be used only if * !list_is_empty(ofconn_node). */ struct list ofconn_node; /* In ofconn's list of pending opgroups. */ struct ofconn *ofconn; /* ofconn for reply (but see note above). */ struct ofp_header *request; /* Original request (truncated at 64 bytes). */ uint32_t buffer_id; /* Buffer id from original request. */ }; static struct ofopgroup *ofopgroup_create_unattached(struct ofproto *); static struct ofopgroup *ofopgroup_create(struct ofproto *, struct ofconn *, const struct ofp_header *, uint32_t buffer_id); static void ofopgroup_submit(struct ofopgroup *); static void ofopgroup_complete(struct ofopgroup *); /* A single flow table operation. */ struct ofoperation { struct ofopgroup *group; /* Owning group. */ struct list group_node; /* In ofopgroup's "ops" list. */ struct hmap_node hmap_node; /* In ofproto's "deletions" hmap. */ struct rule *rule; /* Rule being operated upon. */ enum ofoperation_type type; /* Type of operation. */ /* OFOPERATION_MODIFY, OFOPERATION_REPLACE: The old actions, if the actions * are changing. */ struct rule_actions *actions; /* OFOPERATION_DELETE. */ enum ofp_flow_removed_reason reason; /* Reason flow was removed. */ ovs_be64 flow_cookie; /* Rule's old flow cookie. */ uint16_t idle_timeout; /* Rule's old idle timeout. */ uint16_t hard_timeout; /* Rule's old hard timeout. */ bool send_flow_removed; /* Rule's old 'send_flow_removed'. */ enum ofperr error; /* 0 if no error. */ }; static struct ofoperation *ofoperation_create(struct ofopgroup *, struct rule *, enum ofoperation_type, enum ofp_flow_removed_reason); static void ofoperation_destroy(struct ofoperation *); /* oftable. */ static void oftable_init(struct oftable *); static void oftable_destroy(struct oftable *); static void oftable_set_name(struct oftable *, const char *name); static void oftable_disable_eviction(struct oftable *); static void oftable_enable_eviction(struct oftable *, const struct mf_subfield *fields, size_t n_fields); static void oftable_remove_rule(struct rule *rule) OVS_REQUIRES(ofproto_mutex); static void oftable_remove_rule__(struct ofproto *, struct rule *) OVS_REQUIRES(ofproto_mutex); static void oftable_insert_rule(struct rule *); /* A set of rules within a single OpenFlow table (oftable) that have the same * values for the oftable's eviction_fields. A rule to be evicted, when one is * needed, is taken from the eviction group that contains the greatest number * of rules. * * An oftable owns any number of eviction groups, each of which contains any * number of rules. * * Membership in an eviction group is imprecise, based on the hash of the * oftable's eviction_fields (in the eviction_group's id_node.hash member). * That is, if two rules have different eviction_fields, but those * eviction_fields hash to the same value, then they will belong to the same * eviction_group anyway. * * (When eviction is not enabled on an oftable, we don't track any eviction * groups, to save time and space.) */ struct eviction_group { struct hmap_node id_node; /* In oftable's "eviction_groups_by_id". */ struct heap_node size_node; /* In oftable's "eviction_groups_by_size". */ struct heap rules; /* Contains "struct rule"s. */ }; static bool choose_rule_to_evict(struct oftable *table, struct rule **rulep); static void ofproto_evict(struct ofproto *) OVS_EXCLUDED(ofproto_mutex); static uint32_t rule_eviction_priority(struct rule *); static void eviction_group_add_rule(struct rule *); static void eviction_group_remove_rule(struct rule *); /* Criteria that flow_mod and other operations use for selecting rules on * which to operate. */ struct rule_criteria { /* An OpenFlow table or 255 for all tables. */ uint8_t table_id; /* OpenFlow matching criteria. Interpreted different in "loose" way by * collect_rules_loose() and "strict" way by collect_rules_strict(), as * defined in the OpenFlow spec. */ struct cls_rule cr; /* Matching criteria for the OpenFlow cookie. Consider a bit B in a rule's * cookie and the corresponding bits C in 'cookie' and M in 'cookie_mask'. * The rule will not be selected if M is 1 and B != C. */ ovs_be64 cookie; ovs_be64 cookie_mask; /* Selection based on actions within a rule: * * If out_port != OFPP_ANY, selects only rules that output to out_port. */ ofp_port_t out_port; }; static void rule_criteria_init(struct rule_criteria *, uint8_t table_id, const struct match *match, unsigned int priority, ovs_be64 cookie, ovs_be64 cookie_mask, ofp_port_t out_port); static void rule_criteria_destroy(struct rule_criteria *); /* A packet that needs to be passed to rule_execute(). * * (We can't do this immediately from ofopgroup_complete() because that holds * ofproto_mutex, which rule_execute() needs released.) */ struct rule_execute { struct list list_node; /* In struct ofproto's "rule_executes" list. */ struct rule *rule; /* Owns a reference to the rule. */ ofp_port_t in_port; struct ofpbuf *packet; /* Owns the packet. */ }; static void run_rule_executes(struct ofproto *) OVS_EXCLUDED(ofproto_mutex); static void destroy_rule_executes(struct ofproto *); /* ofport. */ static void ofport_destroy__(struct ofport *) OVS_EXCLUDED(ofproto_mutex); static void ofport_destroy(struct ofport *); static void update_port(struct ofproto *, const char *devname); static int init_ports(struct ofproto *); static void reinit_ports(struct ofproto *); /* rule. */ static void ofproto_rule_destroy__(struct rule *); static void ofproto_rule_send_removed(struct rule *, uint8_t reason); static bool rule_is_modifiable(const struct rule *); /* OpenFlow. */ static enum ofperr add_flow(struct ofproto *, struct ofconn *, struct ofputil_flow_mod *, const struct ofp_header *); static enum ofperr modify_flows__(struct ofproto *, struct ofconn *, struct ofputil_flow_mod *, const struct ofp_header *, const struct rule_collection *); static void delete_flow__(struct rule *rule, struct ofopgroup *, enum ofp_flow_removed_reason) OVS_REQUIRES(ofproto_mutex); static bool handle_openflow(struct ofconn *, const struct ofpbuf *); static enum ofperr handle_flow_mod__(struct ofproto *, struct ofconn *, struct ofputil_flow_mod *, const struct ofp_header *) OVS_EXCLUDED(ofproto_mutex); static void calc_duration(long long int start, long long int now, uint32_t *sec, uint32_t *nsec); /* ofproto. */ static uint64_t pick_datapath_id(const struct ofproto *); static uint64_t pick_fallback_dpid(void); static void ofproto_destroy__(struct ofproto *); static void update_mtu(struct ofproto *, struct ofport *); static void meter_delete(struct ofproto *, uint32_t first, uint32_t last); /* unixctl. */ static void ofproto_unixctl_init(void); /* All registered ofproto classes, in probe order. */ static const struct ofproto_class **ofproto_classes; static size_t n_ofproto_classes; static size_t allocated_ofproto_classes; /* Global lock that protects all flow table operations. */ struct ovs_mutex ofproto_mutex = OVS_MUTEX_INITIALIZER; unsigned flow_eviction_threshold = OFPROTO_FLOW_EVICTION_THRESHOLD_DEFAULT; unsigned n_handler_threads; enum ofproto_flow_miss_model flow_miss_model = OFPROTO_HANDLE_MISS_AUTO; /* Map from datapath name to struct ofproto, for use by unixctl commands. */ static struct hmap all_ofprotos = HMAP_INITIALIZER(&all_ofprotos); /* Initial mappings of port to OpenFlow number mappings. */ static struct shash init_ofp_ports = SHASH_INITIALIZER(&init_ofp_ports); static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); /* The default value of true waits for flow restore. */ static bool flow_restore_wait = true; /* Must be called to initialize the ofproto library. * * The caller may pass in 'iface_hints', which contains an shash of * "iface_hint" elements indexed by the interface's name. The provider * may use these hints to describe the startup configuration in order to * reinitialize its state. The caller owns the provided data, so a * provider will make copies of anything required. An ofproto provider * will remove any existing state that is not described by the hint, and * may choose to remove it all. */ void ofproto_init(const struct shash *iface_hints) { struct shash_node *node; size_t i; ofproto_class_register(&ofproto_dpif_class); /* Make a local copy, since we don't own 'iface_hints' elements. */ SHASH_FOR_EACH(node, iface_hints) { const struct iface_hint *orig_hint = node->data; struct iface_hint *new_hint = xmalloc(sizeof *new_hint); const char *br_type = ofproto_normalize_type(orig_hint->br_type); new_hint->br_name = xstrdup(orig_hint->br_name); new_hint->br_type = xstrdup(br_type); new_hint->ofp_port = orig_hint->ofp_port; shash_add(&init_ofp_ports, node->name, new_hint); } for (i = 0; i < n_ofproto_classes; i++) { ofproto_classes[i]->init(&init_ofp_ports); } } /* 'type' should be a normalized datapath type, as returned by * ofproto_normalize_type(). Returns the corresponding ofproto_class * structure, or a null pointer if there is none registered for 'type'. */ static const struct ofproto_class * ofproto_class_find__(const char *type) { size_t i; for (i = 0; i < n_ofproto_classes; i++) { const struct ofproto_class *class = ofproto_classes[i]; struct sset types; bool found; sset_init(&types); class->enumerate_types(&types); found = sset_contains(&types, type); sset_destroy(&types); if (found) { return class; } } VLOG_WARN("unknown datapath type %s", type); return NULL; } /* Registers a new ofproto class. After successful registration, new ofprotos * of that type can be created using ofproto_create(). */ int ofproto_class_register(const struct ofproto_class *new_class) { size_t i; for (i = 0; i < n_ofproto_classes; i++) { if (ofproto_classes[i] == new_class) { return EEXIST; } } if (n_ofproto_classes >= allocated_ofproto_classes) { ofproto_classes = x2nrealloc(ofproto_classes, &allocated_ofproto_classes, sizeof *ofproto_classes); } ofproto_classes[n_ofproto_classes++] = new_class; return 0; } /* Unregisters a datapath provider. 'type' must have been previously * registered and not currently be in use by any ofprotos. After * unregistration new datapaths of that type cannot be opened using * ofproto_create(). */ int ofproto_class_unregister(const struct ofproto_class *class) { size_t i; for (i = 0; i < n_ofproto_classes; i++) { if (ofproto_classes[i] == class) { for (i++; i < n_ofproto_classes; i++) { ofproto_classes[i - 1] = ofproto_classes[i]; } n_ofproto_classes--; return 0; } } VLOG_WARN("attempted to unregister an ofproto class that is not " "registered"); return EAFNOSUPPORT; } /* Clears 'types' and enumerates all registered ofproto types into it. The * caller must first initialize the sset. */ void ofproto_enumerate_types(struct sset *types) { size_t i; for (i = 0; i < n_ofproto_classes; i++) { ofproto_classes[i]->enumerate_types(types); } } /* Returns the fully spelled out name for the given ofproto 'type'. * * Normalized type string can be compared with strcmp(). Unnormalized type * string might be the same even if they have different spellings. */ const char * ofproto_normalize_type(const char *type) { return type && type[0] ? type : "system"; } /* Clears 'names' and enumerates the names of all known created ofprotos with * the given 'type'. The caller must first initialize the sset. Returns 0 if * successful, otherwise a positive errno value. * * Some kinds of datapaths might not be practically enumerable. This is not * considered an error. */ int ofproto_enumerate_names(const char *type, struct sset *names) { const struct ofproto_class *class = ofproto_class_find__(type); return class ? class->enumerate_names(type, names) : EAFNOSUPPORT; } int ofproto_create(const char *datapath_name, const char *datapath_type, struct ofproto **ofprotop) { const struct ofproto_class *class; struct ofproto *ofproto; int error; int i; *ofprotop = NULL; ofproto_unixctl_init(); datapath_type = ofproto_normalize_type(datapath_type); class = ofproto_class_find__(datapath_type); if (!class) { VLOG_WARN("could not create datapath %s of unknown type %s", datapath_name, datapath_type); return EAFNOSUPPORT; } ofproto = class->alloc(); if (!ofproto) { VLOG_ERR("failed to allocate datapath %s of type %s", datapath_name, datapath_type); return ENOMEM; } /* Initialize. */ ovs_mutex_lock(&ofproto_mutex); memset(ofproto, 0, sizeof *ofproto); ofproto->ofproto_class = class; ofproto->name = xstrdup(datapath_name); ofproto->type = xstrdup(datapath_type); hmap_insert(&all_ofprotos, &ofproto->hmap_node, hash_string(ofproto->name, 0)); ofproto->datapath_id = 0; ofproto->forward_bpdu = false; ofproto->fallback_dpid = pick_fallback_dpid(); ofproto->mfr_desc = NULL; ofproto->hw_desc = NULL; ofproto->sw_desc = NULL; ofproto->serial_desc = NULL; ofproto->dp_desc = NULL; ofproto->frag_handling = OFPC_FRAG_NORMAL; hmap_init(&ofproto->ports); shash_init(&ofproto->port_by_name); simap_init(&ofproto->ofp_requests); ofproto->max_ports = ofp_to_u16(OFPP_MAX); ofproto->eviction_group_timer = LLONG_MIN; ofproto->tables = NULL; ofproto->n_tables = 0; hindex_init(&ofproto->cookies); list_init(&ofproto->expirable); ofproto->connmgr = connmgr_create(ofproto, datapath_name, datapath_name); ofproto->state = S_OPENFLOW; list_init(&ofproto->pending); ofproto->n_pending = 0; hmap_init(&ofproto->deletions); guarded_list_init(&ofproto->rule_executes); ofproto->n_add = ofproto->n_delete = ofproto->n_modify = 0; ofproto->first_op = ofproto->last_op = LLONG_MIN; ofproto->next_op_report = LLONG_MAX; ofproto->op_backoff = LLONG_MIN; ofproto->vlan_bitmap = NULL; ofproto->vlans_changed = false; ofproto->min_mtu = INT_MAX; ovs_mutex_unlock(&ofproto_mutex); error = ofproto->ofproto_class->construct(ofproto); if (error) { VLOG_ERR("failed to open datapath %s: %s", datapath_name, ovs_strerror(error)); ofproto_destroy__(ofproto); return error; } /* The "max_ports" member should have been set by ->construct(ofproto). * Port 0 is not a valid OpenFlow port, so mark that as unavailable. */ ofproto->ofp_port_ids = bitmap_allocate(ofproto->max_ports); bitmap_set1(ofproto->ofp_port_ids, 0); /* Check that hidden tables, if any, are at the end. */ ovs_assert(ofproto->n_tables); for (i = 0; i + 1 < ofproto->n_tables; i++) { enum oftable_flags flags = ofproto->tables[i].flags; enum oftable_flags next_flags = ofproto->tables[i + 1].flags; ovs_assert(!(flags & OFTABLE_HIDDEN) || next_flags & OFTABLE_HIDDEN); } ofproto->datapath_id = pick_datapath_id(ofproto); init_ports(ofproto); /* Initialize meters table. */ if (ofproto->ofproto_class->meter_get_features) { ofproto->ofproto_class->meter_get_features(ofproto, &ofproto->meter_features); } else { memset(&ofproto->meter_features, 0, sizeof ofproto->meter_features); } ofproto->meters = xzalloc((ofproto->meter_features.max_meters + 1) * sizeof(struct meter *)); *ofprotop = ofproto; return 0; } /* Must be called (only) by an ofproto implementation in its constructor * function. See the large comment on 'construct' in struct ofproto_class for * details. */ void ofproto_init_tables(struct ofproto *ofproto, int n_tables) { struct oftable *table; ovs_assert(!ofproto->n_tables); ovs_assert(n_tables >= 1 && n_tables <= 255); ofproto->n_tables = n_tables; ofproto->tables = xmalloc(n_tables * sizeof *ofproto->tables); OFPROTO_FOR_EACH_TABLE (table, ofproto) { oftable_init(table); } } /* To be optionally called (only) by an ofproto implementation in its * constructor function. See the large comment on 'construct' in struct * ofproto_class for details. * * Sets the maximum number of ports to 'max_ports'. The ofproto generic layer * will then ensure that actions passed into the ofproto implementation will * not refer to OpenFlow ports numbered 'max_ports' or higher. If this * function is not called, there will be no such restriction. * * Reserved ports numbered OFPP_MAX and higher are special and not subject to * the 'max_ports' restriction. */ void ofproto_init_max_ports(struct ofproto *ofproto, uint16_t max_ports) { ovs_assert(max_ports <= ofp_to_u16(OFPP_MAX)); ofproto->max_ports = max_ports; } uint64_t ofproto_get_datapath_id(const struct ofproto *ofproto) { return ofproto->datapath_id; } void ofproto_set_datapath_id(struct ofproto *p, uint64_t datapath_id) { uint64_t old_dpid = p->datapath_id; p->datapath_id = datapath_id ? datapath_id : pick_datapath_id(p); if (p->datapath_id != old_dpid) { /* Force all active connections to reconnect, since there is no way to * notify a controller that the datapath ID has changed. */ ofproto_reconnect_controllers(p); } } void ofproto_set_controllers(struct ofproto *p, const struct ofproto_controller *controllers, size_t n_controllers, uint32_t allowed_versions) { connmgr_set_controllers(p->connmgr, controllers, n_controllers, allowed_versions); } void ofproto_set_fail_mode(struct ofproto *p, enum ofproto_fail_mode fail_mode) { connmgr_set_fail_mode(p->connmgr, fail_mode); } /* Drops the connections between 'ofproto' and all of its controllers, forcing * them to reconnect. */ void ofproto_reconnect_controllers(struct ofproto *ofproto) { connmgr_reconnect(ofproto->connmgr); } /* Sets the 'n' TCP port addresses in 'extras' as ones to which 'ofproto''s * in-band control should guarantee access, in the same way that in-band * control guarantees access to OpenFlow controllers. */ void ofproto_set_extra_in_band_remotes(struct ofproto *ofproto, const struct sockaddr_in *extras, size_t n) { connmgr_set_extra_in_band_remotes(ofproto->connmgr, extras, n); } /* Sets the OpenFlow queue used by flows set up by in-band control on * 'ofproto' to 'queue_id'. If 'queue_id' is negative, then in-band control * flows will use the default queue. */ void ofproto_set_in_band_queue(struct ofproto *ofproto, int queue_id) { connmgr_set_in_band_queue(ofproto->connmgr, queue_id); } /* Sets the number of flows at which eviction from the kernel flow table * will occur. */ void ofproto_set_flow_eviction_threshold(unsigned threshold) { flow_eviction_threshold = MAX(OFPROTO_FLOW_EVICTION_THRESHOLD_MIN, threshold); } /* Sets the path for handling flow misses. */ void ofproto_set_flow_miss_model(unsigned model) { flow_miss_model = model; } /* If forward_bpdu is true, the NORMAL action will forward frames with * reserved (e.g. STP) destination Ethernet addresses. if forward_bpdu is false, * the NORMAL action will drop these frames. */ void ofproto_set_forward_bpdu(struct ofproto *ofproto, bool forward_bpdu) { bool old_val = ofproto->forward_bpdu; ofproto->forward_bpdu = forward_bpdu; if (old_val != ofproto->forward_bpdu) { if (ofproto->ofproto_class->forward_bpdu_changed) { ofproto->ofproto_class->forward_bpdu_changed(ofproto); } } } /* Sets the MAC aging timeout for the OFPP_NORMAL action on 'ofproto' to * 'idle_time', in seconds, and the maximum number of MAC table entries to * 'max_entries'. */ void ofproto_set_mac_table_config(struct ofproto *ofproto, unsigned idle_time, size_t max_entries) { if (ofproto->ofproto_class->set_mac_table_config) { ofproto->ofproto_class->set_mac_table_config(ofproto, idle_time, max_entries); } } /* Sets number of upcall handler threads. The default is * (number of online cores - 2). */ void ofproto_set_n_handler_threads(unsigned limit) { if (limit) { n_handler_threads = limit; } else { int n_proc = sysconf(_SC_NPROCESSORS_ONLN); n_handler_threads = n_proc > 2 ? n_proc - 2 : 1; } } void ofproto_set_dp_desc(struct ofproto *p, const char *dp_desc) { free(p->dp_desc); p->dp_desc = dp_desc ? xstrdup(dp_desc) : NULL; } int ofproto_set_snoops(struct ofproto *ofproto, const struct sset *snoops) { return connmgr_set_snoops(ofproto->connmgr, snoops); } int ofproto_set_netflow(struct ofproto *ofproto, const struct netflow_options *nf_options) { if (nf_options && sset_is_empty(&nf_options->collectors)) { nf_options = NULL; } if (ofproto->ofproto_class->set_netflow) { return ofproto->ofproto_class->set_netflow(ofproto, nf_options); } else { return nf_options ? EOPNOTSUPP : 0; } } int ofproto_set_sflow(struct ofproto *ofproto, const struct ofproto_sflow_options *oso) { if (oso && sset_is_empty(&oso->targets)) { oso = NULL; } if (ofproto->ofproto_class->set_sflow) { return ofproto->ofproto_class->set_sflow(ofproto, oso); } else { return oso ? EOPNOTSUPP : 0; } } int ofproto_set_ipfix(struct ofproto *ofproto, const struct ofproto_ipfix_bridge_exporter_options *bo, const struct ofproto_ipfix_flow_exporter_options *fo, size_t n_fo) { if (ofproto->ofproto_class->set_ipfix) { return ofproto->ofproto_class->set_ipfix(ofproto, bo, fo, n_fo); } else { return (bo || fo) ? EOPNOTSUPP : 0; } } void ofproto_set_flow_restore_wait(bool flow_restore_wait_db) { flow_restore_wait = flow_restore_wait_db; } bool ofproto_get_flow_restore_wait(void) { return flow_restore_wait; } /* Spanning Tree Protocol (STP) configuration. */ /* Configures STP on 'ofproto' using the settings defined in 's'. If * 's' is NULL, disables STP. * * Returns 0 if successful, otherwise a positive errno value. */ int ofproto_set_stp(struct ofproto *ofproto, const struct ofproto_stp_settings *s) { return (ofproto->ofproto_class->set_stp ? ofproto->ofproto_class->set_stp(ofproto, s) : EOPNOTSUPP); } /* Retrieves STP status of 'ofproto' and stores it in 's'. If the * 'enabled' member of 's' is false, then the other members are not * meaningful. * * Returns 0 if successful, otherwise a positive errno value. */ int ofproto_get_stp_status(struct ofproto *ofproto, struct ofproto_stp_status *s) { return (ofproto->ofproto_class->get_stp_status ? ofproto->ofproto_class->get_stp_status(ofproto, s) : EOPNOTSUPP); } /* Configures STP on 'ofp_port' of 'ofproto' using the settings defined * in 's'. The caller is responsible for assigning STP port numbers * (using the 'port_num' member in the range of 1 through 255, inclusive) * and ensuring there are no duplicates. If the 's' is NULL, then STP * is disabled on the port. * * Returns 0 if successful, otherwise a positive errno value.*/ int ofproto_port_set_stp(struct ofproto *ofproto, ofp_port_t ofp_port, const struct ofproto_port_stp_settings *s) { struct ofport *ofport = ofproto_get_port(ofproto, ofp_port); if (!ofport) { VLOG_WARN("%s: cannot configure STP on nonexistent port %"PRIu16, ofproto->name, ofp_port); return ENODEV; } return (ofproto->ofproto_class->set_stp_port ? ofproto->ofproto_class->set_stp_port(ofport, s) : EOPNOTSUPP); } /* Retrieves STP port status of 'ofp_port' on 'ofproto' and stores it in * 's'. If the 'enabled' member in 's' is false, then the other members * are not meaningful. * * Returns 0 if successful, otherwise a positive errno value.*/ int ofproto_port_get_stp_status(struct ofproto *ofproto, ofp_port_t ofp_port, struct ofproto_port_stp_status *s) { struct ofport *ofport = ofproto_get_port(ofproto, ofp_port); if (!ofport) { VLOG_WARN_RL(&rl, "%s: cannot get STP status on nonexistent " "port %"PRIu16, ofproto->name, ofp_port); return ENODEV; } return (ofproto->ofproto_class->get_stp_port_status ? ofproto->ofproto_class->get_stp_port_status(ofport, s) : EOPNOTSUPP); } /* Queue DSCP configuration. */ /* Registers meta-data associated with the 'n_qdscp' Qualities of Service * 'queues' attached to 'ofport'. This data is not intended to be sufficient * to implement QoS. Instead, it is used to implement features which require * knowledge of what queues exist on a port, and some basic information about * them. * * Returns 0 if successful, otherwise a positive errno value. */ int ofproto_port_set_queues(struct ofproto *ofproto, ofp_port_t ofp_port, const struct ofproto_port_queue *queues, size_t n_queues) { struct ofport *ofport = ofproto_get_port(ofproto, ofp_port); if (!ofport) { VLOG_WARN("%s: cannot set queues on nonexistent port %"PRIu16, ofproto->name, ofp_port); return ENODEV; } return (ofproto->ofproto_class->set_queues ? ofproto->ofproto_class->set_queues(ofport, queues, n_queues) : EOPNOTSUPP); } /* Connectivity Fault Management configuration. */ /* Clears the CFM configuration from 'ofp_port' on 'ofproto'. */ void ofproto_port_clear_cfm(struct ofproto *ofproto, ofp_port_t ofp_port) { struct ofport *ofport = ofproto_get_port(ofproto, ofp_port); if (ofport && ofproto->ofproto_class->set_cfm) { ofproto->ofproto_class->set_cfm(ofport, NULL); } } /* Configures connectivity fault management on 'ofp_port' in 'ofproto'. Takes * basic configuration from the configuration members in 'cfm', and the remote * maintenance point ID from remote_mpid. Ignores the statistics members of * 'cfm'. * * This function has no effect if 'ofproto' does not have a port 'ofp_port'. */ void ofproto_port_set_cfm(struct ofproto *ofproto, ofp_port_t ofp_port, const struct cfm_settings *s) { struct ofport *ofport; int error; ofport = ofproto_get_port(ofproto, ofp_port); if (!ofport) { VLOG_WARN("%s: cannot configure CFM on nonexistent port %"PRIu16, ofproto->name, ofp_port); return; } /* XXX: For configuration simplicity, we only support one remote_mpid * outside of the CFM module. It's not clear if this is the correct long * term solution or not. */ error = (ofproto->ofproto_class->set_cfm ? ofproto->ofproto_class->set_cfm(ofport, s) : EOPNOTSUPP); if (error) { VLOG_WARN("%s: CFM configuration on port %"PRIu16" (%s) failed (%s)", ofproto->name, ofp_port, netdev_get_name(ofport->netdev), ovs_strerror(error)); } } /* Configures BFD on 'ofp_port' in 'ofproto'. This function has no effect if * 'ofproto' does not have a port 'ofp_port'. */ void ofproto_port_set_bfd(struct ofproto *ofproto, ofp_port_t ofp_port, const struct smap *cfg) { struct ofport *ofport; int error; ofport = ofproto_get_port(ofproto, ofp_port); if (!ofport) { VLOG_WARN("%s: cannot configure bfd on nonexistent port %"PRIu16, ofproto->name, ofp_port); return; } error = (ofproto->ofproto_class->set_bfd ? ofproto->ofproto_class->set_bfd(ofport, cfg) : EOPNOTSUPP); if (error) { VLOG_WARN("%s: bfd configuration on port %"PRIu16" (%s) failed (%s)", ofproto->name, ofp_port, netdev_get_name(ofport->netdev), ovs_strerror(error)); } } /* Populates 'status' with key value pairs indicating the status of the BFD * session on 'ofp_port'. This information is intended to be populated in the * OVS database. Has no effect if 'ofp_port' is not na OpenFlow port in * 'ofproto'. */ int ofproto_port_get_bfd_status(struct ofproto *ofproto, ofp_port_t ofp_port, struct smap *status) { struct ofport *ofport = ofproto_get_port(ofproto, ofp_port); return (ofport && ofproto->ofproto_class->get_bfd_status ? ofproto->ofproto_class->get_bfd_status(ofport, status) : EOPNOTSUPP); } /* Checks the status of LACP negotiation for 'ofp_port' within ofproto. * Returns 1 if LACP partner information for 'ofp_port' is up-to-date, * 0 if LACP partner information is not current (generally indicating a * connectivity problem), or -1 if LACP is not enabled on 'ofp_port'. */ int ofproto_port_is_lacp_current(struct ofproto *ofproto, ofp_port_t ofp_port) { struct ofport *ofport = ofproto_get_port(ofproto, ofp_port); return (ofport && ofproto->ofproto_class->port_is_lacp_current ? ofproto->ofproto_class->port_is_lacp_current(ofport) : -1); } /* Bundles. */ /* Registers a "bundle" associated with client data pointer 'aux' in 'ofproto'. * A bundle is the same concept as a Port in OVSDB, that is, it consists of one * or more "slave" devices (Interfaces, in OVSDB) along with a VLAN * configuration plus, if there is more than one slave, a bonding * configuration. * * If 'aux' is already registered then this function updates its configuration * to 's'. Otherwise, this function registers a new bundle. * * Bundles only affect the NXAST_AUTOPATH action and output to the OFPP_NORMAL * port. */ int ofproto_bundle_register(struct ofproto *ofproto, void *aux, const struct ofproto_bundle_settings *s) { return (ofproto->ofproto_class->bundle_set ? ofproto->ofproto_class->bundle_set(ofproto, aux, s) : EOPNOTSUPP); } /* Unregisters the bundle registered on 'ofproto' with auxiliary data 'aux'. * If no such bundle has been registered, this has no effect. */ int ofproto_bundle_unregister(struct ofproto *ofproto, void *aux) { return ofproto_bundle_register(ofproto, aux, NULL); } /* Registers a mirror associated with client data pointer 'aux' in 'ofproto'. * If 'aux' is already registered then this function updates its configuration * to 's'. Otherwise, this function registers a new mirror. */ int ofproto_mirror_register(struct ofproto *ofproto, void *aux, const struct ofproto_mirror_settings *s) { return (ofproto->ofproto_class->mirror_set ? ofproto->ofproto_class->mirror_set(ofproto, aux, s) : EOPNOTSUPP); } /* Unregisters the mirror registered on 'ofproto' with auxiliary data 'aux'. * If no mirror has been registered, this has no effect. */ int ofproto_mirror_unregister(struct ofproto *ofproto, void *aux) { return ofproto_mirror_register(ofproto, aux, NULL); } /* Retrieves statistics from mirror associated with client data pointer * 'aux' in 'ofproto'. Stores packet and byte counts in 'packets' and * 'bytes', respectively. If a particular counters is not supported, * the appropriate argument is set to UINT64_MAX. */ int ofproto_mirror_get_stats(struct ofproto *ofproto, void *aux, uint64_t *packets, uint64_t *bytes) { if (!ofproto->ofproto_class->mirror_get_stats) { *packets = *bytes = UINT64_MAX; return EOPNOTSUPP; } return ofproto->ofproto_class->mirror_get_stats(ofproto, aux, packets, bytes); } /* Configures the VLANs whose bits are set to 1 in 'flood_vlans' as VLANs on * which all packets are flooded, instead of using MAC learning. If * 'flood_vlans' is NULL, then MAC learning applies to all VLANs. * * Flood VLANs affect only the treatment of packets output to the OFPP_NORMAL * port. */ int ofproto_set_flood_vlans(struct ofproto *ofproto, unsigned long *flood_vlans) { return (ofproto->ofproto_class->set_flood_vlans ? ofproto->ofproto_class->set_flood_vlans(ofproto, flood_vlans) : EOPNOTSUPP); } /* Returns true if 'aux' is a registered bundle that is currently in use as the * output for a mirror. */ bool ofproto_is_mirror_output_bundle(const struct ofproto *ofproto, void *aux) { return (ofproto->ofproto_class->is_mirror_output_bundle ? ofproto->ofproto_class->is_mirror_output_bundle(ofproto, aux) : false); } /* Configuration of OpenFlow tables. */ /* Returns the number of OpenFlow tables in 'ofproto'. */ int ofproto_get_n_tables(const struct ofproto *ofproto) { return ofproto->n_tables; } /* Configures the OpenFlow table in 'ofproto' with id 'table_id' with the * settings from 's'. 'table_id' must be in the range 0 through the number of * OpenFlow tables in 'ofproto' minus 1, inclusive. * * For read-only tables, only the name may be configured. */ void ofproto_configure_table(struct ofproto *ofproto, int table_id, const struct ofproto_table_settings *s) { struct oftable *table; ovs_assert(table_id >= 0 && table_id < ofproto->n_tables); table = &ofproto->tables[table_id]; oftable_set_name(table, s->name); if (table->flags & OFTABLE_READONLY) { return; } if (s->groups) { oftable_enable_eviction(table, s->groups, s->n_groups); } else { oftable_disable_eviction(table); } table->max_flows = s->max_flows; ovs_rwlock_rdlock(&table->cls.rwlock); if (classifier_count(&table->cls) > table->max_flows && table->eviction_fields) { /* 'table' contains more flows than allowed. We might not be able to * evict them right away because of the asynchronous nature of flow * table changes. Schedule eviction for later. */ switch (ofproto->state) { case S_OPENFLOW: ofproto->state = S_EVICT; break; case S_EVICT: case S_FLUSH: /* We're already deleting flows, nothing more to do. */ break; } } ovs_rwlock_unlock(&table->cls.rwlock); } bool ofproto_has_snoops(const struct ofproto *ofproto) { return connmgr_has_snoops(ofproto->connmgr); } void ofproto_get_snoops(const struct ofproto *ofproto, struct sset *snoops) { connmgr_get_snoops(ofproto->connmgr, snoops); } static void ofproto_rule_delete__(struct ofproto *ofproto, struct rule *rule, uint8_t reason) OVS_REQUIRES(ofproto_mutex) { struct ofopgroup *group; ovs_assert(!rule->pending); group = ofopgroup_create_unattached(ofproto); delete_flow__(rule, group, reason); ofopgroup_submit(group); } /* Deletes 'rule' from 'cls' within 'ofproto'. * * Within an ofproto implementation, this function allows an ofproto * implementation to destroy any rules that remain when its ->destruct() * function is called. This function is not suitable for use elsewhere in an * ofproto implementation. * * This function implements steps 4.4 and 4.5 in the section titled "Rule Life * Cycle" in ofproto-provider.h. */ void ofproto_rule_delete(struct ofproto *ofproto, struct rule *rule) OVS_EXCLUDED(ofproto_mutex) { struct ofopgroup *group; ovs_mutex_lock(&ofproto_mutex); ovs_assert(!rule->pending); group = ofopgroup_create_unattached(ofproto); ofoperation_create(group, rule, OFOPERATION_DELETE, OFPRR_DELETE); oftable_remove_rule__(ofproto, rule); ofproto->ofproto_class->rule_delete(rule); ofopgroup_submit(group); ovs_mutex_unlock(&ofproto_mutex); } static void ofproto_flush__(struct ofproto *ofproto) OVS_EXCLUDED(ofproto_mutex) { struct oftable *table; if (ofproto->ofproto_class->flush) { ofproto->ofproto_class->flush(ofproto); } ovs_mutex_lock(&ofproto_mutex); OFPROTO_FOR_EACH_TABLE (table, ofproto) { struct rule *rule, *next_rule; struct cls_cursor cursor; if (table->flags & OFTABLE_HIDDEN) { continue; } ovs_rwlock_rdlock(&table->cls.rwlock); cls_cursor_init(&cursor, &table->cls, NULL); ovs_rwlock_unlock(&table->cls.rwlock); CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, cr, &cursor) { if (!rule->pending) { ofproto_rule_delete__(ofproto, rule, OFPRR_DELETE); } } } ovs_mutex_unlock(&ofproto_mutex); } static void ofproto_destroy__(struct ofproto *ofproto) OVS_EXCLUDED(ofproto_mutex) { struct oftable *table; ovs_assert(list_is_empty(&ofproto->pending)); destroy_rule_executes(ofproto); guarded_list_destroy(&ofproto->rule_executes); connmgr_destroy(ofproto->connmgr); hmap_remove(&all_ofprotos, &ofproto->hmap_node); free(ofproto->name); free(ofproto->type); free(ofproto->mfr_desc); free(ofproto->hw_desc); free(ofproto->sw_desc); free(ofproto->serial_desc); free(ofproto->dp_desc); hmap_destroy(&ofproto->ports); shash_destroy(&ofproto->port_by_name); bitmap_free(ofproto->ofp_port_ids); simap_destroy(&ofproto->ofp_requests); OFPROTO_FOR_EACH_TABLE (table, ofproto) { oftable_destroy(table); } free(ofproto->tables); hmap_destroy(&ofproto->deletions); free(ofproto->vlan_bitmap); ofproto->ofproto_class->dealloc(ofproto); } void ofproto_destroy(struct ofproto *p) OVS_EXCLUDED(ofproto_mutex) { struct ofport *ofport, *next_ofport; if (!p) { return; } if (p->meters) { meter_delete(p, 1, p->meter_features.max_meters); p->meter_features.max_meters = 0; free(p->meters); p->meters = NULL; } ofproto_flush__(p); HMAP_FOR_EACH_SAFE (ofport, next_ofport, hmap_node, &p->ports) { ofport_destroy(ofport); } p->ofproto_class->destruct(p); ofproto_destroy__(p); } /* Destroys the datapath with the respective 'name' and 'type'. With the Linux * kernel datapath, for example, this destroys the datapath in the kernel, and * with the netdev-based datapath, it tears down the data structures that * represent the datapath. * * The datapath should not be currently open as an ofproto. */ int ofproto_delete(const char *name, const char *type) { const struct ofproto_class *class = ofproto_class_find__(type); return (!class ? EAFNOSUPPORT : !class->del ? EACCES : class->del(type, name)); } static void process_port_change(struct ofproto *ofproto, int error, char *devname) { if (error == ENOBUFS) { reinit_ports(ofproto); } else if (!error) { update_port(ofproto, devname); free(devname); } } int ofproto_type_run(const char *datapath_type) { const struct ofproto_class *class; int error; datapath_type = ofproto_normalize_type(datapath_type); class = ofproto_class_find__(datapath_type); error = class->type_run ? class->type_run(datapath_type) : 0; if (error && error != EAGAIN) { VLOG_ERR_RL(&rl, "%s: type_run failed (%s)", datapath_type, ovs_strerror(error)); } return error; } int ofproto_type_run_fast(const char *datapath_type) { const struct ofproto_class *class; int error; datapath_type = ofproto_normalize_type(datapath_type); class = ofproto_class_find__(datapath_type); error = class->type_run_fast ? class->type_run_fast(datapath_type) : 0; if (error && error != EAGAIN) { VLOG_ERR_RL(&rl, "%s: type_run_fast failed (%s)", datapath_type, ovs_strerror(error)); } return error; } void ofproto_type_wait(const char *datapath_type) { const struct ofproto_class *class; datapath_type = ofproto_normalize_type(datapath_type); class = ofproto_class_find__(datapath_type); if (class->type_wait) { class->type_wait(datapath_type); } } static bool any_pending_ops(const struct ofproto *p) OVS_EXCLUDED(ofproto_mutex) { bool b; ovs_mutex_lock(&ofproto_mutex); b = !list_is_empty(&p->pending); ovs_mutex_unlock(&ofproto_mutex); return b; } int ofproto_run(struct ofproto *p) { struct sset changed_netdevs; const char *changed_netdev; struct ofport *ofport; int error; error = p->ofproto_class->run(p); if (error && error != EAGAIN) { VLOG_ERR_RL(&rl, "%s: run failed (%s)", p->name, ovs_strerror(error)); } run_rule_executes(p); /* Restore the eviction group heap invariant occasionally. */ if (p->eviction_group_timer < time_msec()) { size_t i; p->eviction_group_timer = time_msec() + 1000; for (i = 0; i < p->n_tables; i++) { struct oftable *table = &p->tables[i]; struct eviction_group *evg; struct cls_cursor cursor; struct rule *rule; if (!table->eviction_fields) { continue; } ovs_mutex_lock(&ofproto_mutex); HEAP_FOR_EACH (evg, size_node, &table->eviction_groups_by_size) { heap_rebuild(&evg->rules); } ovs_rwlock_rdlock(&table->cls.rwlock); cls_cursor_init(&cursor, &table->cls, NULL); CLS_CURSOR_FOR_EACH (rule, cr, &cursor) { if (!rule->eviction_group && (rule->idle_timeout || rule->hard_timeout)) { eviction_group_add_rule(rule); } } ovs_rwlock_unlock(&table->cls.rwlock); ovs_mutex_unlock(&ofproto_mutex); } } if (p->ofproto_class->port_poll) { char *devname; while ((error = p->ofproto_class->port_poll(p, &devname)) != EAGAIN) { process_port_change(p, error, devname); } } /* Update OpenFlow port status for any port whose netdev has changed. * * Refreshing a given 'ofport' can cause an arbitrary ofport to be * destroyed, so it's not safe to update ports directly from the * HMAP_FOR_EACH loop, or even to use HMAP_FOR_EACH_SAFE. Instead, we * need this two-phase approach. */ sset_init(&changed_netdevs); HMAP_FOR_EACH (ofport, hmap_node, &p->ports) { unsigned int change_seq = netdev_change_seq(ofport->netdev); if (ofport->change_seq != change_seq) { ofport->change_seq = change_seq; sset_add(&changed_netdevs, netdev_get_name(ofport->netdev)); } } SSET_FOR_EACH (changed_netdev, &changed_netdevs) { update_port(p, changed_netdev); } sset_destroy(&changed_netdevs); switch (p->state) { case S_OPENFLOW: connmgr_run(p->connmgr, handle_openflow); break; case S_EVICT: connmgr_run(p->connmgr, NULL); ofproto_evict(p); if (!any_pending_ops(p)) { p->state = S_OPENFLOW; } break; case S_FLUSH: connmgr_run(p->connmgr, NULL); ofproto_flush__(p); if (!any_pending_ops(p)) { connmgr_flushed(p->connmgr); p->state = S_OPENFLOW; } break; default: NOT_REACHED(); } if (time_msec() >= p->next_op_report) { long long int ago = (time_msec() - p->first_op) / 1000; long long int interval = (p->last_op - p->first_op) / 1000; struct ds s; ds_init(&s); ds_put_format(&s, "%d flow_mods ", p->n_add + p->n_delete + p->n_modify); if (interval == ago) { ds_put_format(&s, "in the last %lld s", ago); } else if (interval) { ds_put_format(&s, "in the %lld s starting %lld s ago", interval, ago); } else { ds_put_format(&s, "%lld s ago", ago); } ds_put_cstr(&s, " ("); if (p->n_add) { ds_put_format(&s, "%d adds, ", p->n_add); } if (p->n_delete) { ds_put_format(&s, "%d deletes, ", p->n_delete); } if (p->n_modify) { ds_put_format(&s, "%d modifications, ", p->n_modify); } s.length -= 2; ds_put_char(&s, ')'); VLOG_INFO("%s: %s", p->name, ds_cstr(&s)); ds_destroy(&s); p->n_add = p->n_delete = p->n_modify = 0; p->next_op_report = LLONG_MAX; } return error; } /* Performs periodic activity required by 'ofproto' that needs to be done * with the least possible latency. * * It makes sense to call this function a couple of times per poll loop, to * provide a significant performance boost on some benchmarks with the * ofproto-dpif implementation. */ int ofproto_run_fast(struct ofproto *p) { int error; error = p->ofproto_class->run_fast ? p->ofproto_class->run_fast(p) : 0; if (error && error != EAGAIN) { VLOG_ERR_RL(&rl, "%s: fastpath run failed (%s)", p->name, ovs_strerror(error)); } return error; } void ofproto_wait(struct ofproto *p) { struct ofport *ofport; p->ofproto_class->wait(p); if (p->ofproto_class->port_poll_wait) { p->ofproto_class->port_poll_wait(p); } HMAP_FOR_EACH (ofport, hmap_node, &p->ports) { if (ofport->change_seq != netdev_change_seq(ofport->netdev)) { poll_immediate_wake(); } } switch (p->state) { case S_OPENFLOW: connmgr_wait(p->connmgr, true); break; case S_EVICT: case S_FLUSH: connmgr_wait(p->connmgr, false); if (!any_pending_ops(p)) { poll_immediate_wake(); } break; } } bool ofproto_is_alive(const struct ofproto *p) { return connmgr_has_controllers(p->connmgr); } /* Adds some memory usage statistics for 'ofproto' into 'usage', for use with * memory_report(). */ void ofproto_get_memory_usage(const struct ofproto *ofproto, struct simap *usage) { const struct oftable *table; unsigned int n_rules; simap_increase(usage, "ports", hmap_count(&ofproto->ports)); ovs_mutex_lock(&ofproto_mutex); simap_increase(usage, "ops", ofproto->n_pending + hmap_count(&ofproto->deletions)); ovs_mutex_unlock(&ofproto_mutex); n_rules = 0; OFPROTO_FOR_EACH_TABLE (table, ofproto) { ovs_rwlock_rdlock(&table->cls.rwlock); n_rules += classifier_count(&table->cls); ovs_rwlock_unlock(&table->cls.rwlock); } simap_increase(usage, "rules", n_rules); if (ofproto->ofproto_class->get_memory_usage) { ofproto->ofproto_class->get_memory_usage(ofproto, usage); } connmgr_get_memory_usage(ofproto->connmgr, usage); } void ofproto_get_ofproto_controller_info(const struct ofproto *ofproto, struct shash *info) { connmgr_get_controller_info(ofproto->connmgr, info); } void ofproto_free_ofproto_controller_info(struct shash *info) { connmgr_free_controller_info(info); } /* Makes a deep copy of 'old' into 'port'. */ void ofproto_port_clone(struct ofproto_port *port, const struct ofproto_port *old) { port->name = xstrdup(old->name); port->type = xstrdup(old->type); port->ofp_port = old->ofp_port; } /* Frees memory allocated to members of 'ofproto_port'. * * Do not call this function on an ofproto_port obtained from * ofproto_port_dump_next(): that function retains ownership of the data in the * ofproto_port. */ void ofproto_port_destroy(struct ofproto_port *ofproto_port) { free(ofproto_port->name); free(ofproto_port->type); } /* Initializes 'dump' to begin dumping the ports in an ofproto. * * This function provides no status indication. An error status for the entire * dump operation is provided when it is completed by calling * ofproto_port_dump_done(). */ void ofproto_port_dump_start(struct ofproto_port_dump *dump, const struct ofproto *ofproto) { dump->ofproto = ofproto; dump->error = ofproto->ofproto_class->port_dump_start(ofproto, &dump->state); } /* Attempts to retrieve another port from 'dump', which must have been created * with ofproto_port_dump_start(). On success, stores a new ofproto_port into * 'port' and returns true. On failure, returns false. * * Failure might indicate an actual error or merely that the last port has been * dumped. An error status for the entire dump operation is provided when it * is completed by calling ofproto_port_dump_done(). * * The ofproto owns the data stored in 'port'. It will remain valid until at * least the next time 'dump' is passed to ofproto_port_dump_next() or * ofproto_port_dump_done(). */ bool ofproto_port_dump_next(struct ofproto_port_dump *dump, struct ofproto_port *port) { const struct ofproto *ofproto = dump->ofproto; if (dump->error) { return false; } dump->error = ofproto->ofproto_class->port_dump_next(ofproto, dump->state, port); if (dump->error) { ofproto->ofproto_class->port_dump_done(ofproto, dump->state); return false; } return true; } /* Completes port table dump operation 'dump', which must have been created * with ofproto_port_dump_start(). Returns 0 if the dump operation was * error-free, otherwise a positive errno value describing the problem. */ int ofproto_port_dump_done(struct ofproto_port_dump *dump) { const struct ofproto *ofproto = dump->ofproto; if (!dump->error) { dump->error = ofproto->ofproto_class->port_dump_done(ofproto, dump->state); } return dump->error == EOF ? 0 : dump->error; } /* Returns the type to pass to netdev_open() when a datapath of type * 'datapath_type' has a port of type 'port_type', for a few special * cases when a netdev type differs from a port type. For example, when * using the userspace datapath, a port of type "internal" needs to be * opened as "tap". * * Returns either 'type' itself or a string literal, which must not be * freed. */ const char * ofproto_port_open_type(const char *datapath_type, const char *port_type) { const struct ofproto_class *class; datapath_type = ofproto_normalize_type(datapath_type); class = ofproto_class_find__(datapath_type); if (!class) { return port_type; } return (class->port_open_type ? class->port_open_type(datapath_type, port_type) : port_type); } /* Attempts to add 'netdev' as a port on 'ofproto'. If 'ofp_portp' is * non-null and '*ofp_portp' is not OFPP_NONE, attempts to use that as * the port's OpenFlow port number. * * If successful, returns 0 and sets '*ofp_portp' to the new port's * OpenFlow port number (if 'ofp_portp' is non-null). On failure, * returns a positive errno value and sets '*ofp_portp' to OFPP_NONE (if * 'ofp_portp' is non-null). */ int ofproto_port_add(struct ofproto *ofproto, struct netdev *netdev, ofp_port_t *ofp_portp) { ofp_port_t ofp_port = ofp_portp ? *ofp_portp : OFPP_NONE; int error; error = ofproto->ofproto_class->port_add(ofproto, netdev); if (!error) { const char *netdev_name = netdev_get_name(netdev); simap_put(&ofproto->ofp_requests, netdev_name, ofp_to_u16(ofp_port)); update_port(ofproto, netdev_name); } if (ofp_portp) { struct ofproto_port ofproto_port; ofproto_port_query_by_name(ofproto, netdev_get_name(netdev), &ofproto_port); *ofp_portp = error ? OFPP_NONE : ofproto_port.ofp_port; ofproto_port_destroy(&ofproto_port); } return error; } /* Looks up a port named 'devname' in 'ofproto'. On success, returns 0 and * initializes '*port' appropriately; on failure, returns a positive errno * value. * * The caller owns the data in 'ofproto_port' and must free it with * ofproto_port_destroy() when it is no longer needed. */ int ofproto_port_query_by_name(const struct ofproto *ofproto, const char *devname, struct ofproto_port *port) { int error; error = ofproto->ofproto_class->port_query_by_name(ofproto, devname, port); if (error) { memset(port, 0, sizeof *port); } return error; } /* Deletes port number 'ofp_port' from the datapath for 'ofproto'. * Returns 0 if successful, otherwise a positive errno. */ int ofproto_port_del(struct ofproto *ofproto, ofp_port_t ofp_port) { struct ofport *ofport = ofproto_get_port(ofproto, ofp_port); const char *name = ofport ? netdev_get_name(ofport->netdev) : ""; struct simap_node *ofp_request_node; int error; ofp_request_node = simap_find(&ofproto->ofp_requests, name); if (ofp_request_node) { simap_delete(&ofproto->ofp_requests, ofp_request_node); } error = ofproto->ofproto_class->port_del(ofproto, ofp_port); if (!error && ofport) { /* 'name' is the netdev's name and update_port() is going to close the * netdev. Just in case update_port() refers to 'name' after it * destroys 'ofport', make a copy of it around the update_port() * call. */ char *devname = xstrdup(name); update_port(ofproto, devname); free(devname); } return error; } static int simple_flow_mod(struct ofproto *ofproto, const struct match *match, unsigned int priority, const struct ofpact *ofpacts, size_t ofpacts_len, enum ofp_flow_mod_command command) { struct ofputil_flow_mod fm; memset(&fm, 0, sizeof fm); fm.match = *match; fm.priority = priority; fm.cookie = 0; fm.new_cookie = 0; fm.modify_cookie = false; fm.table_id = 0; fm.command = command; fm.idle_timeout = 0; fm.hard_timeout = 0; fm.buffer_id = UINT32_MAX; fm.out_port = OFPP_ANY; fm.flags = 0; fm.ofpacts = CONST_CAST(struct ofpact *, ofpacts); fm.ofpacts_len = ofpacts_len; return handle_flow_mod__(ofproto, NULL, &fm, NULL); } /* Adds a flow to OpenFlow flow table 0 in 'p' that matches 'cls_rule' and * performs the 'n_actions' actions in 'actions'. The new flow will not * timeout. * * If cls_rule->priority is in the range of priorities supported by OpenFlow * (0...65535, inclusive) then the flow will be visible to OpenFlow * controllers; otherwise, it will be hidden. * * The caller retains ownership of 'cls_rule' and 'ofpacts'. * * This is a helper function for in-band control and fail-open. */ void ofproto_add_flow(struct ofproto *ofproto, const struct match *match, unsigned int priority, const struct ofpact *ofpacts, size_t ofpacts_len) OVS_EXCLUDED(ofproto_mutex) { const struct rule *rule; bool must_add; /* First do a cheap check whether the rule we're looking for already exists * with the actions that we want. If it does, then we're done. */ ovs_rwlock_rdlock(&ofproto->tables[0].cls.rwlock); rule = rule_from_cls_rule(classifier_find_match_exactly( &ofproto->tables[0].cls, match, priority)); if (rule) { ovs_mutex_lock(&rule->mutex); must_add = !ofpacts_equal(rule->actions->ofpacts, rule->actions->ofpacts_len, ofpacts, ofpacts_len); ovs_mutex_unlock(&rule->mutex); } else { must_add = true; } ovs_rwlock_unlock(&ofproto->tables[0].cls.rwlock); /* If there's no such rule or the rule doesn't have the actions we want, * fall back to a executing a full flow mod. We can't optimize this at * all because we didn't take enough locks above to ensure that the flow * table didn't already change beneath us. */ if (must_add) { simple_flow_mod(ofproto, match, priority, ofpacts, ofpacts_len, OFPFC_MODIFY_STRICT); } } /* Executes the flow modification specified in 'fm'. Returns 0 on success, an * OFPERR_* OpenFlow error code on failure, or OFPROTO_POSTPONE if the * operation cannot be initiated now but may be retried later. * * This is a helper function for in-band control and fail-open and the "learn" * action. */ int ofproto_flow_mod(struct ofproto *ofproto, struct ofputil_flow_mod *fm) OVS_EXCLUDED(ofproto_mutex) { return handle_flow_mod__(ofproto, NULL, fm, NULL); } /* Searches for a rule with matching criteria exactly equal to 'target' in * ofproto's table 0 and, if it finds one, deletes it. * * This is a helper function for in-band control and fail-open. */ bool ofproto_delete_flow(struct ofproto *ofproto, const struct match *target, unsigned int priority) OVS_EXCLUDED(ofproto_mutex) { struct classifier *cls = &ofproto->tables[0].cls; struct rule *rule; /* First do a cheap check whether the rule we're looking for has already * been deleted. If so, then we're done. */ ovs_rwlock_rdlock(&cls->rwlock); rule = rule_from_cls_rule(classifier_find_match_exactly(cls, target, priority)); ovs_rwlock_unlock(&cls->rwlock); if (!rule) { return true; } /* Fall back to a executing a full flow mod. We can't optimize this at all * because we didn't take enough locks above to ensure that the flow table * didn't already change beneath us. */ return simple_flow_mod(ofproto, target, priority, NULL, 0, OFPFC_DELETE_STRICT) != OFPROTO_POSTPONE; } /* Starts the process of deleting all of the flows from all of ofproto's flow * tables and then reintroducing the flows required by in-band control and * fail-open. The process will complete in a later call to ofproto_run(). */ void ofproto_flush_flows(struct ofproto *ofproto) { COVERAGE_INC(ofproto_flush); ofproto->state = S_FLUSH; } static void reinit_ports(struct ofproto *p) { struct ofproto_port_dump dump; struct sset devnames; struct ofport *ofport; struct ofproto_port ofproto_port; const char *devname; COVERAGE_INC(ofproto_reinit_ports); sset_init(&devnames); HMAP_FOR_EACH (ofport, hmap_node, &p->ports) { sset_add(&devnames, netdev_get_name(ofport->netdev)); } OFPROTO_PORT_FOR_EACH (&ofproto_port, &dump, p) { sset_add(&devnames, ofproto_port.name); } SSET_FOR_EACH (devname, &devnames) { update_port(p, devname); } sset_destroy(&devnames); } static ofp_port_t alloc_ofp_port(struct ofproto *ofproto, const char *netdev_name) { uint16_t port_idx; port_idx = simap_get(&ofproto->ofp_requests, netdev_name); port_idx = port_idx ? port_idx : UINT16_MAX; if (port_idx >= ofproto->max_ports || bitmap_is_set(ofproto->ofp_port_ids, port_idx)) { uint16_t end_port_no = ofproto->alloc_port_no; /* Search for a free OpenFlow port number. We try not to * immediately reuse them to prevent problems due to old * flows. */ for (;;) { if (++ofproto->alloc_port_no >= ofproto->max_ports) { ofproto->alloc_port_no = 0; } if (!bitmap_is_set(ofproto->ofp_port_ids, ofproto->alloc_port_no)) { port_idx = ofproto->alloc_port_no; break; } if (ofproto->alloc_port_no == end_port_no) { return OFPP_NONE; } } } bitmap_set1(ofproto->ofp_port_ids, port_idx); return u16_to_ofp(port_idx); } static void dealloc_ofp_port(const struct ofproto *ofproto, ofp_port_t ofp_port) { if (ofp_to_u16(ofp_port) < ofproto->max_ports) { bitmap_set0(ofproto->ofp_port_ids, ofp_to_u16(ofp_port)); } } /* Opens and returns a netdev for 'ofproto_port' in 'ofproto', or a null * pointer if the netdev cannot be opened. On success, also fills in * 'opp'. */ static struct netdev * ofport_open(struct ofproto *ofproto, struct ofproto_port *ofproto_port, struct ofputil_phy_port *pp) { enum netdev_flags flags; struct netdev *netdev; int error; error = netdev_open(ofproto_port->name, ofproto_port->type, &netdev); if (error) { VLOG_WARN_RL(&rl, "%s: ignoring port %s (%"PRIu16") because netdev %s " "cannot be opened (%s)", ofproto->name, ofproto_port->name, ofproto_port->ofp_port, ofproto_port->name, ovs_strerror(error)); return NULL; } if (ofproto_port->ofp_port == OFPP_NONE) { if (!strcmp(ofproto->name, ofproto_port->name)) { ofproto_port->ofp_port = OFPP_LOCAL; } else { ofproto_port->ofp_port = alloc_ofp_port(ofproto, ofproto_port->name); } } pp->port_no = ofproto_port->ofp_port; netdev_get_etheraddr(netdev, pp->hw_addr); ovs_strlcpy(pp->name, ofproto_port->name, sizeof pp->name); netdev_get_flags(netdev, &flags); pp->config = flags & NETDEV_UP ? 0 : OFPUTIL_PC_PORT_DOWN; pp->state = netdev_get_carrier(netdev) ? 0 : OFPUTIL_PS_LINK_DOWN; netdev_get_features(netdev, &pp->curr, &pp->advertised, &pp->supported, &pp->peer); pp->curr_speed = netdev_features_to_bps(pp->curr, 0) / 1000; pp->max_speed = netdev_features_to_bps(pp->supported, 0) / 1000; return netdev; } /* Returns true if most fields of 'a' and 'b' are equal. Differences in name, * port number, and 'config' bits other than OFPUTIL_PS_LINK_DOWN are * disregarded. */ static bool ofport_equal(const struct ofputil_phy_port *a, const struct ofputil_phy_port *b) { return (eth_addr_equals(a->hw_addr, b->hw_addr) && a->state == b->state && !((a->config ^ b->config) & OFPUTIL_PC_PORT_DOWN) && a->curr == b->curr && a->advertised == b->advertised && a->supported == b->supported && a->peer == b->peer && a->curr_speed == b->curr_speed && a->max_speed == b->max_speed); } /* Adds an ofport to 'p' initialized based on the given 'netdev' and 'opp'. * The caller must ensure that 'p' does not have a conflicting ofport (that is, * one with the same name or port number). */ static void ofport_install(struct ofproto *p, struct netdev *netdev, const struct ofputil_phy_port *pp) { const char *netdev_name = netdev_get_name(netdev); struct ofport *ofport; int error; /* Create ofport. */ ofport = p->ofproto_class->port_alloc(); if (!ofport) { error = ENOMEM; goto error; } ofport->ofproto = p; ofport->netdev = netdev; ofport->change_seq = netdev_change_seq(netdev); ofport->pp = *pp; ofport->ofp_port = pp->port_no; ofport->created = time_msec(); /* Add port to 'p'. */ hmap_insert(&p->ports, &ofport->hmap_node, hash_ofp_port(ofport->ofp_port)); shash_add(&p->port_by_name, netdev_name, ofport); update_mtu(p, ofport); /* Let the ofproto_class initialize its private data. */ error = p->ofproto_class->port_construct(ofport); if (error) { goto error; } connmgr_send_port_status(p->connmgr, pp, OFPPR_ADD); return; error: VLOG_WARN_RL(&rl, "%s: could not add port %s (%s)", p->name, netdev_name, ovs_strerror(error)); if (ofport) { ofport_destroy__(ofport); } else { netdev_close(netdev); } } /* Removes 'ofport' from 'p' and destroys it. */ static void ofport_remove(struct ofport *ofport) { connmgr_send_port_status(ofport->ofproto->connmgr, &ofport->pp, OFPPR_DELETE); ofport_destroy(ofport); } /* If 'ofproto' contains an ofport named 'name', removes it from 'ofproto' and * destroys it. */ static void ofport_remove_with_name(struct ofproto *ofproto, const char *name) { struct ofport *port = shash_find_data(&ofproto->port_by_name, name); if (port) { ofport_remove(port); } } /* Updates 'port' with new 'pp' description. * * Does not handle a name or port number change. The caller must implement * such a change as a delete followed by an add. */ static void ofport_modified(struct ofport *port, struct ofputil_phy_port *pp) { memcpy(port->pp.hw_addr, pp->hw_addr, ETH_ADDR_LEN); port->pp.config = ((port->pp.config & ~OFPUTIL_PC_PORT_DOWN) | (pp->config & OFPUTIL_PC_PORT_DOWN)); port->pp.state = pp->state; port->pp.curr = pp->curr; port->pp.advertised = pp->advertised; port->pp.supported = pp->supported; port->pp.peer = pp->peer; port->pp.curr_speed = pp->curr_speed; port->pp.max_speed = pp->max_speed; connmgr_send_port_status(port->ofproto->connmgr, &port->pp, OFPPR_MODIFY); } /* Update OpenFlow 'state' in 'port' and notify controller. */ void ofproto_port_set_state(struct ofport *port, enum ofputil_port_state state) { if (port->pp.state != state) { port->pp.state = state; connmgr_send_port_status(port->ofproto->connmgr, &port->pp, OFPPR_MODIFY); } } void ofproto_port_unregister(struct ofproto *ofproto, ofp_port_t ofp_port) { struct ofport *port = ofproto_get_port(ofproto, ofp_port); if (port) { if (port->ofproto->ofproto_class->set_realdev) { port->ofproto->ofproto_class->set_realdev(port, 0, 0); } if (port->ofproto->ofproto_class->set_stp_port) { port->ofproto->ofproto_class->set_stp_port(port, NULL); } if (port->ofproto->ofproto_class->set_cfm) { port->ofproto->ofproto_class->set_cfm(port, NULL); } if (port->ofproto->ofproto_class->bundle_remove) { port->ofproto->ofproto_class->bundle_remove(port); } } } static void ofport_destroy__(struct ofport *port) { struct ofproto *ofproto = port->ofproto; const char *name = netdev_get_name(port->netdev); hmap_remove(&ofproto->ports, &port->hmap_node); shash_delete(&ofproto->port_by_name, shash_find(&ofproto->port_by_name, name)); netdev_close(port->netdev); ofproto->ofproto_class->port_dealloc(port); } static void ofport_destroy(struct ofport *port) { if (port) { dealloc_ofp_port(port->ofproto, port->ofp_port); port->ofproto->ofproto_class->port_destruct(port); ofport_destroy__(port); } } struct ofport * ofproto_get_port(const struct ofproto *ofproto, ofp_port_t ofp_port) { struct ofport *port; HMAP_FOR_EACH_IN_BUCKET (port, hmap_node, hash_ofp_port(ofp_port), &ofproto->ports) { if (port->ofp_port == ofp_port) { return port; } } return NULL; } int ofproto_port_get_stats(const struct ofport *port, struct netdev_stats *stats) { struct ofproto *ofproto = port->ofproto; int error; if (ofproto->ofproto_class->port_get_stats) { error = ofproto->ofproto_class->port_get_stats(port, stats); } else { error = EOPNOTSUPP; } return error; } static void update_port(struct ofproto *ofproto, const char *name) { struct ofproto_port ofproto_port; struct ofputil_phy_port pp; struct netdev *netdev; struct ofport *port; COVERAGE_INC(ofproto_update_port); /* Fetch 'name''s location and properties from the datapath. */ netdev = (!ofproto_port_query_by_name(ofproto, name, &ofproto_port) ? ofport_open(ofproto, &ofproto_port, &pp) : NULL); if (netdev) { port = ofproto_get_port(ofproto, ofproto_port.ofp_port); if (port && !strcmp(netdev_get_name(port->netdev), name)) { struct netdev *old_netdev = port->netdev; /* 'name' hasn't changed location. Any properties changed? */ if (!ofport_equal(&port->pp, &pp)) { ofport_modified(port, &pp); } update_mtu(ofproto, port); /* Install the newly opened netdev in case it has changed. * Don't close the old netdev yet in case port_modified has to * remove a retained reference to it.*/ port->netdev = netdev; port->change_seq = netdev_change_seq(netdev); if (port->ofproto->ofproto_class->port_modified) { port->ofproto->ofproto_class->port_modified(port); } netdev_close(old_netdev); } else { /* If 'port' is nonnull then its name differs from 'name' and thus * we should delete it. If we think there's a port named 'name' * then its port number must be wrong now so delete it too. */ if (port) { ofport_remove(port); } ofport_remove_with_name(ofproto, name); ofport_install(ofproto, netdev, &pp); } } else { /* Any port named 'name' is gone now. */ ofport_remove_with_name(ofproto, name); } ofproto_port_destroy(&ofproto_port); } static int init_ports(struct ofproto *p) { struct ofproto_port_dump dump; struct ofproto_port ofproto_port; struct shash_node *node, *next; OFPROTO_PORT_FOR_EACH (&ofproto_port, &dump, p) { const char *name = ofproto_port.name; if (shash_find(&p->port_by_name, name)) { VLOG_WARN_RL(&rl, "%s: ignoring duplicate device %s in datapath", p->name, name); } else { struct ofputil_phy_port pp; struct netdev *netdev; /* Check if an OpenFlow port number had been requested. */ node = shash_find(&init_ofp_ports, name); if (node) { const struct iface_hint *iface_hint = node->data; simap_put(&p->ofp_requests, name, ofp_to_u16(iface_hint->ofp_port)); } netdev = ofport_open(p, &ofproto_port, &pp); if (netdev) { ofport_install(p, netdev, &pp); if (ofp_to_u16(ofproto_port.ofp_port) < p->max_ports) { p->alloc_port_no = MAX(p->alloc_port_no, ofp_to_u16(ofproto_port.ofp_port)); } } } } SHASH_FOR_EACH_SAFE(node, next, &init_ofp_ports) { struct iface_hint *iface_hint = node->data; if (!strcmp(iface_hint->br_name, p->name)) { free(iface_hint->br_name); free(iface_hint->br_type); free(iface_hint); shash_delete(&init_ofp_ports, node); } } return 0; } /* Find the minimum MTU of all non-datapath devices attached to 'p'. * Returns ETH_PAYLOAD_MAX or the minimum of the ports. */ static int find_min_mtu(struct ofproto *p) { struct ofport *ofport; int mtu = 0; HMAP_FOR_EACH (ofport, hmap_node, &p->ports) { struct netdev *netdev = ofport->netdev; int dev_mtu; /* Skip any internal ports, since that's what we're trying to * set. */ if (!strcmp(netdev_get_type(netdev), "internal")) { continue; } if (netdev_get_mtu(netdev, &dev_mtu)) { continue; } if (!mtu || dev_mtu < mtu) { mtu = dev_mtu; } } return mtu ? mtu: ETH_PAYLOAD_MAX; } /* Update MTU of all datapath devices on 'p' to the minimum of the * non-datapath ports in event of 'port' added or changed. */ static void update_mtu(struct ofproto *p, struct ofport *port) { struct ofport *ofport; struct netdev *netdev = port->netdev; int dev_mtu, old_min; if (netdev_get_mtu(netdev, &dev_mtu)) { port->mtu = 0; return; } if (!strcmp(netdev_get_type(port->netdev), "internal")) { if (dev_mtu > p->min_mtu) { if (!netdev_set_mtu(port->netdev, p->min_mtu)) { dev_mtu = p->min_mtu; } } port->mtu = dev_mtu; return; } /* For non-internal port find new min mtu. */ old_min = p->min_mtu; port->mtu = dev_mtu; p->min_mtu = find_min_mtu(p); if (p->min_mtu == old_min) { return; } HMAP_FOR_EACH (ofport, hmap_node, &p->ports) { struct netdev *netdev = ofport->netdev; if (!strcmp(netdev_get_type(netdev), "internal")) { if (!netdev_set_mtu(netdev, p->min_mtu)) { ofport->mtu = p->min_mtu; } } } } void ofproto_rule_ref(struct rule *rule) { if (rule) { unsigned int orig; atomic_add(&rule->ref_count, 1, &orig); ovs_assert(orig != 0); } } void ofproto_rule_unref(struct rule *rule) { if (rule) { unsigned int orig; atomic_sub(&rule->ref_count, 1, &orig); if (orig == 1) { rule->ofproto->ofproto_class->rule_destruct(rule); ofproto_rule_destroy__(rule); } else { ovs_assert(orig != 0); } } } struct rule_actions * rule_get_actions(const struct rule *rule) OVS_EXCLUDED(rule->mutex) { struct rule_actions *actions; ovs_mutex_lock(&rule->mutex); actions = rule_get_actions__(rule); ovs_mutex_unlock(&rule->mutex); return actions; } struct rule_actions * rule_get_actions__(const struct rule *rule) OVS_REQUIRES(rule->mutex) { rule_actions_ref(rule->actions); return rule->actions; } static void ofproto_rule_destroy__(struct rule *rule) OVS_NO_THREAD_SAFETY_ANALYSIS { cls_rule_destroy(CONST_CAST(struct cls_rule *, &rule->cr)); rule_actions_unref(rule->actions); ovs_mutex_destroy(&rule->mutex); rule->ofproto->ofproto_class->rule_dealloc(rule); } /* Creates and returns a new 'struct rule_actions', with a ref_count of 1, * whose actions are a copy of from the 'ofpacts_len' bytes of 'ofpacts'. */ struct rule_actions * rule_actions_create(const struct ofpact *ofpacts, size_t ofpacts_len) { struct rule_actions *actions; actions = xmalloc(sizeof *actions); atomic_init(&actions->ref_count, 1); actions->ofpacts = xmemdup(ofpacts, ofpacts_len); actions->ofpacts_len = ofpacts_len; actions->meter_id = ofpacts_get_meter(ofpacts, ofpacts_len); return actions; } /* Increments 'actions''s ref_count. */ void rule_actions_ref(struct rule_actions *actions) { if (actions) { unsigned int orig; atomic_add(&actions->ref_count, 1, &orig); ovs_assert(orig != 0); } } /* Decrements 'actions''s ref_count and frees 'actions' if the ref_count * reaches 0. */ void rule_actions_unref(struct rule_actions *actions) { if (actions) { unsigned int orig; atomic_sub(&actions->ref_count, 1, &orig); if (orig == 1) { free(actions->ofpacts); free(actions); } else { ovs_assert(orig != 0); } } } /* Returns true if 'rule' has an OpenFlow OFPAT_OUTPUT or OFPAT_ENQUEUE action * that outputs to 'port' (output to OFPP_FLOOD and OFPP_ALL doesn't count). */ static bool ofproto_rule_has_out_port(const struct rule *rule, ofp_port_t port) OVS_REQUIRES(ofproto_mutex) { return (port == OFPP_ANY || ofpacts_output_to_port(rule->actions->ofpacts, rule->actions->ofpacts_len, port)); } /* Returns true if a rule related to 'op' has an OpenFlow OFPAT_OUTPUT or * OFPAT_ENQUEUE action that outputs to 'out_port'. */ bool ofoperation_has_out_port(const struct ofoperation *op, ofp_port_t out_port) OVS_REQUIRES(ofproto_mutex) { if (ofproto_rule_has_out_port(op->rule, out_port)) { return true; } switch (op->type) { case OFOPERATION_ADD: case OFOPERATION_DELETE: return false; case OFOPERATION_MODIFY: case OFOPERATION_REPLACE: return ofpacts_output_to_port(op->actions->ofpacts, op->actions->ofpacts_len, out_port); } NOT_REACHED(); } static void rule_execute_destroy(struct rule_execute *e) { ofproto_rule_unref(e->rule); list_remove(&e->list_node); free(e); } /* Executes all "rule_execute" operations queued up in ofproto->rule_executes, * by passing them to the ofproto provider. */ static void run_rule_executes(struct ofproto *ofproto) OVS_EXCLUDED(ofproto_mutex) { struct rule_execute *e, *next; struct list executes; guarded_list_pop_all(&ofproto->rule_executes, &executes); LIST_FOR_EACH_SAFE (e, next, list_node, &executes) { union flow_in_port in_port_; struct flow flow; in_port_.ofp_port = e->in_port; flow_extract(e->packet, 0, 0, NULL, &in_port_, &flow); ofproto->ofproto_class->rule_execute(e->rule, &flow, e->packet); rule_execute_destroy(e); } } /* Destroys and discards all "rule_execute" operations queued up in * ofproto->rule_executes. */ static void destroy_rule_executes(struct ofproto *ofproto) { struct rule_execute *e, *next; struct list executes; guarded_list_pop_all(&ofproto->rule_executes, &executes); LIST_FOR_EACH_SAFE (e, next, list_node, &executes) { ofpbuf_delete(e->packet); rule_execute_destroy(e); } } /* Returns true if 'rule' should be hidden from the controller. * * Rules with priority higher than UINT16_MAX are set up by ofproto itself * (e.g. by in-band control) and are intentionally hidden from the * controller. */ static bool ofproto_rule_is_hidden(const struct rule *rule) { return rule->cr.priority > UINT16_MAX; } static enum oftable_flags rule_get_flags(const struct rule *rule) { return rule->ofproto->tables[rule->table_id].flags; } static bool rule_is_modifiable(const struct rule *rule) { return !(rule_get_flags(rule) & OFTABLE_READONLY); } static enum ofperr handle_echo_request(struct ofconn *ofconn, const struct ofp_header *oh) { ofconn_send_reply(ofconn, make_echo_reply(oh)); return 0; } static enum ofperr handle_features_request(struct ofconn *ofconn, const struct ofp_header *oh) { struct ofproto *ofproto = ofconn_get_ofproto(ofconn); struct ofputil_switch_features features; struct ofport *port; bool arp_match_ip; struct ofpbuf *b; int n_tables; int i; ofproto->ofproto_class->get_features(ofproto, &arp_match_ip, &features.actions); ovs_assert(features.actions & OFPUTIL_A_OUTPUT); /* sanity check */ /* Count only non-hidden tables in the number of tables. (Hidden tables, * if present, are always at the end.) */ n_tables = ofproto->n_tables; for (i = 0; i < ofproto->n_tables; i++) { if (ofproto->tables[i].flags & OFTABLE_HIDDEN) { n_tables = i; break; } } features.datapath_id = ofproto->datapath_id; features.n_buffers = pktbuf_capacity(); features.n_tables = n_tables; features.capabilities = (OFPUTIL_C_FLOW_STATS | OFPUTIL_C_TABLE_STATS | OFPUTIL_C_PORT_STATS | OFPUTIL_C_QUEUE_STATS); if (arp_match_ip) { features.capabilities |= OFPUTIL_C_ARP_MATCH_IP; } /* FIXME: Fill in proper features.auxiliary_id for auxiliary connections */ features.auxiliary_id = 0; b = ofputil_encode_switch_features(&features, ofconn_get_protocol(ofconn), oh->xid); HMAP_FOR_EACH (port, hmap_node, &ofproto->ports) { ofputil_put_switch_features_port(&port->pp, b); } ofconn_send_reply(ofconn, b); return 0; } static enum ofperr handle_get_config_request(struct ofconn *ofconn, const struct ofp_header *oh) { struct ofproto *ofproto = ofconn_get_ofproto(ofconn); struct ofp_switch_config *osc; enum ofp_config_flags flags; struct ofpbuf *buf; /* Send reply. */ buf = ofpraw_alloc_reply(OFPRAW_OFPT_GET_CONFIG_REPLY, oh, 0); osc = ofpbuf_put_uninit(buf, sizeof *osc); flags = ofproto->frag_handling; /* OFPC_INVALID_TTL_TO_CONTROLLER is deprecated in OF 1.3 */ if (oh->version < OFP13_VERSION && ofconn_get_invalid_ttl_to_controller(ofconn)) { flags |= OFPC_INVALID_TTL_TO_CONTROLLER; } osc->flags = htons(flags); osc->miss_send_len = htons(ofconn_get_miss_send_len(ofconn)); ofconn_send_reply(ofconn, buf); return 0; } static enum ofperr handle_set_config(struct ofconn *ofconn, const struct ofp_header *oh) { const struct ofp_switch_config *osc = ofpmsg_body(oh); struct ofproto *ofproto = ofconn_get_ofproto(ofconn); uint16_t flags = ntohs(osc->flags); if (ofconn_get_type(ofconn) != OFCONN_PRIMARY || ofconn_get_role(ofconn) != OFPCR12_ROLE_SLAVE) { enum ofp_config_flags cur = ofproto->frag_handling; enum ofp_config_flags next = flags & OFPC_FRAG_MASK; ovs_assert((cur & OFPC_FRAG_MASK) == cur); if (cur != next) { if (ofproto->ofproto_class->set_frag_handling(ofproto, next)) { ofproto->frag_handling = next; } else { VLOG_WARN_RL(&rl, "%s: unsupported fragment handling mode %s", ofproto->name, ofputil_frag_handling_to_string(next)); } } } /* OFPC_INVALID_TTL_TO_CONTROLLER is deprecated in OF 1.3 */ ofconn_set_invalid_ttl_to_controller(ofconn, (oh->version < OFP13_VERSION && flags & OFPC_INVALID_TTL_TO_CONTROLLER)); ofconn_set_miss_send_len(ofconn, ntohs(osc->miss_send_len)); return 0; } /* Checks whether 'ofconn' is a slave controller. If so, returns an OpenFlow * error message code for the caller to propagate upward. Otherwise, returns * 0. * * The log message mentions 'msg_type'. */ static enum ofperr reject_slave_controller(struct ofconn *ofconn) { if (ofconn_get_type(ofconn) == OFCONN_PRIMARY && ofconn_get_role(ofconn) == OFPCR12_ROLE_SLAVE) { return OFPERR_OFPBRC_EPERM; } else { return 0; } } /* Checks that the 'ofpacts_len' bytes of actions in 'ofpacts' are appropriate * for a packet with the prerequisites satisfied by 'flow' in table 'table_id'. * 'flow' may be temporarily modified, but is restored at return. */ static enum ofperr ofproto_check_ofpacts(struct ofproto *ofproto, const struct ofpact ofpacts[], size_t ofpacts_len, struct flow *flow, uint8_t table_id) { enum ofperr error; uint32_t mid; error = ofpacts_check(ofpacts, ofpacts_len, flow, u16_to_ofp(ofproto->max_ports), table_id); if (error) { return error; } mid = ofpacts_get_meter(ofpacts, ofpacts_len); if (mid && ofproto_get_provider_meter_id(ofproto, mid) == UINT32_MAX) { return OFPERR_OFPMMFC_INVALID_METER; } return 0; } static enum ofperr handle_packet_out(struct ofconn *ofconn, const struct ofp_header *oh) { struct ofproto *p = ofconn_get_ofproto(ofconn); struct ofputil_packet_out po; struct ofpbuf *payload; uint64_t ofpacts_stub[1024 / 8]; struct ofpbuf ofpacts; struct flow flow; union flow_in_port in_port_; enum ofperr error; COVERAGE_INC(ofproto_packet_out); error = reject_slave_controller(ofconn); if (error) { goto exit; } /* Decode message. */ ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); error = ofputil_decode_packet_out(&po, oh, &ofpacts); if (error) { goto exit_free_ofpacts; } if (ofp_to_u16(po.in_port) >= p->max_ports && ofp_to_u16(po.in_port) < ofp_to_u16(OFPP_MAX)) { error = OFPERR_OFPBRC_BAD_PORT; goto exit_free_ofpacts; } /* Get payload. */ if (po.buffer_id != UINT32_MAX) { error = ofconn_pktbuf_retrieve(ofconn, po.buffer_id, &payload, NULL); if (error || !payload) { goto exit_free_ofpacts; } } else { /* Ensure that the L3 header is 32-bit aligned. */ payload = ofpbuf_clone_data_with_headroom(po.packet, po.packet_len, 2); } /* Verify actions against packet, then send packet if successful. */ in_port_.ofp_port = po.in_port; flow_extract(payload, 0, 0, NULL, &in_port_, &flow); error = ofproto_check_ofpacts(p, po.ofpacts, po.ofpacts_len, &flow, 0); if (!error) { error = p->ofproto_class->packet_out(p, payload, &flow, po.ofpacts, po.ofpacts_len); } ofpbuf_delete(payload); exit_free_ofpacts: ofpbuf_uninit(&ofpacts); exit: return error; } static void update_port_config(struct ofport *port, enum ofputil_port_config config, enum ofputil_port_config mask) { enum ofputil_port_config old_config = port->pp.config; enum ofputil_port_config toggle; toggle = (config ^ port->pp.config) & mask; if (toggle & OFPUTIL_PC_PORT_DOWN) { if (config & OFPUTIL_PC_PORT_DOWN) { netdev_turn_flags_off(port->netdev, NETDEV_UP, NULL); } else { netdev_turn_flags_on(port->netdev, NETDEV_UP, NULL); } toggle &= ~OFPUTIL_PC_PORT_DOWN; } port->pp.config ^= toggle; if (port->pp.config != old_config) { port->ofproto->ofproto_class->port_reconfigured(port, old_config); } } static enum ofperr handle_port_mod(struct ofconn *ofconn, const struct ofp_header *oh) { struct ofproto *p = ofconn_get_ofproto(ofconn); struct ofputil_port_mod pm; struct ofport *port; enum ofperr error; error = reject_slave_controller(ofconn); if (error) { return error; } error = ofputil_decode_port_mod(oh, &pm); if (error) { return error; } port = ofproto_get_port(p, pm.port_no); if (!port) { return OFPERR_OFPPMFC_BAD_PORT; } else if (!eth_addr_equals(port->pp.hw_addr, pm.hw_addr)) { return OFPERR_OFPPMFC_BAD_HW_ADDR; } else { update_port_config(port, pm.config, pm.mask); if (pm.advertise) { netdev_set_advertisements(port->netdev, pm.advertise); } } return 0; } static enum ofperr handle_desc_stats_request(struct ofconn *ofconn, const struct ofp_header *request) { static const char *default_mfr_desc = "Nicira, Inc."; static const char *default_hw_desc = "Open vSwitch"; static const char *default_sw_desc = VERSION; static const char *default_serial_desc = "None"; static const char *default_dp_desc = "None"; struct ofproto *p = ofconn_get_ofproto(ofconn); struct ofp_desc_stats *ods; struct ofpbuf *msg; msg = ofpraw_alloc_stats_reply(request, 0); ods = ofpbuf_put_zeros(msg, sizeof *ods); ovs_strlcpy(ods->mfr_desc, p->mfr_desc ? p->mfr_desc : default_mfr_desc, sizeof ods->mfr_desc); ovs_strlcpy(ods->hw_desc, p->hw_desc ? p->hw_desc : default_hw_desc, sizeof ods->hw_desc); ovs_strlcpy(ods->sw_desc, p->sw_desc ? p->sw_desc : default_sw_desc, sizeof ods->sw_desc); ovs_strlcpy(ods->serial_num, p->serial_desc ? p->serial_desc : default_serial_desc, sizeof ods->serial_num); ovs_strlcpy(ods->dp_desc, p->dp_desc ? p->dp_desc : default_dp_desc, sizeof ods->dp_desc); ofconn_send_reply(ofconn, msg); return 0; } static enum ofperr handle_table_stats_request(struct ofconn *ofconn, const struct ofp_header *request) { struct ofproto *p = ofconn_get_ofproto(ofconn); struct ofp12_table_stats *ots; struct ofpbuf *msg; int n_tables; size_t i; /* Set up default values. * * ofp12_table_stats is used as a generic structure as * it is able to hold all the fields for ofp10_table_stats * and ofp11_table_stats (and of course itself). */ ots = xcalloc(p->n_tables, sizeof *ots); for (i = 0; i < p->n_tables; i++) { ots[i].table_id = i; sprintf(ots[i].name, "table%zu", i); ots[i].match = htonll(OFPXMT12_MASK); ots[i].wildcards = htonll(OFPXMT12_MASK); ots[i].write_actions = htonl(OFPAT11_OUTPUT); ots[i].apply_actions = htonl(OFPAT11_OUTPUT); ots[i].write_setfields = htonll(OFPXMT12_MASK); ots[i].apply_setfields = htonll(OFPXMT12_MASK); ots[i].metadata_match = htonll(UINT64_MAX); ots[i].metadata_write = htonll(UINT64_MAX); ots[i].instructions = htonl(OFPIT11_ALL); ots[i].config = htonl(OFPTC11_TABLE_MISS_MASK); ots[i].max_entries = htonl(1000000); /* An arbitrary big number. */ ovs_rwlock_rdlock(&p->tables[i].cls.rwlock); ots[i].active_count = htonl(classifier_count(&p->tables[i].cls)); ovs_rwlock_unlock(&p->tables[i].cls.rwlock); } p->ofproto_class->get_tables(p, ots); /* Post-process the tables, dropping hidden tables. */ n_tables = p->n_tables; for (i = 0; i < p->n_tables; i++) { const struct oftable *table = &p->tables[i]; if (table->flags & OFTABLE_HIDDEN) { n_tables = i; break; } if (table->name) { ovs_strzcpy(ots[i].name, table->name, sizeof ots[i].name); } if (table->max_flows < ntohl(ots[i].max_entries)) { ots[i].max_entries = htonl(table->max_flows); } } msg = ofputil_encode_table_stats_reply(ots, n_tables, request); ofconn_send_reply(ofconn, msg); free(ots); return 0; } static void append_port_stat(struct ofport *port, struct list *replies) { struct ofputil_port_stats ops = { .port_no = port->pp.port_no }; calc_duration(port->created, time_msec(), &ops.duration_sec, &ops.duration_nsec); /* Intentionally ignore return value, since errors will set * 'stats' to all-1s, which is correct for OpenFlow, and * netdev_get_stats() will log errors. */ ofproto_port_get_stats(port, &ops.stats); ofputil_append_port_stat(replies, &ops); } static enum ofperr handle_port_stats_request(struct ofconn *ofconn, const struct ofp_header *request) { struct ofproto *p = ofconn_get_ofproto(ofconn); struct ofport *port; struct list replies; ofp_port_t port_no; enum ofperr error; error = ofputil_decode_port_stats_request(request, &port_no); if (error) { return error; } ofpmp_init(&replies, request); if (port_no != OFPP_ANY) { port = ofproto_get_port(p, port_no); if (port) { append_port_stat(port, &replies); } } else { HMAP_FOR_EACH (port, hmap_node, &p->ports) { append_port_stat(port, &replies); } } ofconn_send_replies(ofconn, &replies); return 0; } static enum ofperr handle_port_desc_stats_request(struct ofconn *ofconn, const struct ofp_header *request) { struct ofproto *p = ofconn_get_ofproto(ofconn); enum ofp_version version; struct ofport *port; struct list replies; ofpmp_init(&replies, request); version = ofputil_protocol_to_ofp_version(ofconn_get_protocol(ofconn)); HMAP_FOR_EACH (port, hmap_node, &p->ports) { ofputil_append_port_desc_stats_reply(version, &port->pp, &replies); } ofconn_send_replies(ofconn, &replies); return 0; } static uint32_t hash_cookie(ovs_be64 cookie) { return hash_2words((OVS_FORCE uint64_t)cookie >> 32, (OVS_FORCE uint64_t)cookie); } static void cookies_insert(struct ofproto *ofproto, struct rule *rule) OVS_REQUIRES(ofproto_mutex) { hindex_insert(&ofproto->cookies, &rule->cookie_node, hash_cookie(rule->flow_cookie)); } static void cookies_remove(struct ofproto *ofproto, struct rule *rule) OVS_REQUIRES(ofproto_mutex) { hindex_remove(&ofproto->cookies, &rule->cookie_node); } static void ofproto_rule_change_cookie(struct ofproto *ofproto, struct rule *rule, ovs_be64 new_cookie) OVS_REQUIRES(ofproto_mutex) { if (new_cookie != rule->flow_cookie) { cookies_remove(ofproto, rule); ovs_mutex_lock(&rule->mutex); rule->flow_cookie = new_cookie; ovs_mutex_unlock(&rule->mutex); cookies_insert(ofproto, rule); } } static void calc_duration(long long int start, long long int now, uint32_t *sec, uint32_t *nsec) { long long int msecs = now - start; *sec = msecs / 1000; *nsec = (msecs % 1000) * (1000 * 1000); } /* Checks whether 'table_id' is 0xff or a valid table ID in 'ofproto'. Returns * 0 if 'table_id' is OK, otherwise an OpenFlow error code. */ static enum ofperr check_table_id(const struct ofproto *ofproto, uint8_t table_id) { return (table_id == 0xff || table_id < ofproto->n_tables ? 0 : OFPERR_OFPBRC_BAD_TABLE_ID); } static struct oftable * next_visible_table(const struct ofproto *ofproto, uint8_t table_id) { struct oftable *table; for (table = &ofproto->tables[table_id]; table < &ofproto->tables[ofproto->n_tables]; table++) { if (!(table->flags & OFTABLE_HIDDEN)) { return table; } } return NULL; } static struct oftable * first_matching_table(const struct ofproto *ofproto, uint8_t table_id) { if (table_id == 0xff) { return next_visible_table(ofproto, 0); } else if (table_id < ofproto->n_tables) { return &ofproto->tables[table_id]; } else { return NULL; } } static struct oftable * next_matching_table(const struct ofproto *ofproto, const struct oftable *table, uint8_t table_id) { return (table_id == 0xff ? next_visible_table(ofproto, (table - ofproto->tables) + 1) : NULL); } /* Assigns TABLE to each oftable, in turn, that matches TABLE_ID in OFPROTO: * * - If TABLE_ID is 0xff, this iterates over every classifier table in * OFPROTO, skipping tables marked OFTABLE_HIDDEN. * * - If TABLE_ID is the number of a table in OFPROTO, then the loop iterates * only once, for that table. (This can be used to access tables marked * OFTABLE_HIDDEN.) * * - Otherwise, TABLE_ID isn't valid for OFPROTO, so the loop won't be * entered at all. (Perhaps you should have validated TABLE_ID with * check_table_id().) * * All parameters are evaluated multiple times. */ #define FOR_EACH_MATCHING_TABLE(TABLE, TABLE_ID, OFPROTO) \ for ((TABLE) = first_matching_table(OFPROTO, TABLE_ID); \ (TABLE) != NULL; \ (TABLE) = next_matching_table(OFPROTO, TABLE, TABLE_ID)) /* Initializes 'criteria' in a straightforward way based on the other * parameters. * * For "loose" matching, the 'priority' parameter is unimportant and may be * supplied as 0. */ static void rule_criteria_init(struct rule_criteria *criteria, uint8_t table_id, const struct match *match, unsigned int priority, ovs_be64 cookie, ovs_be64 cookie_mask, ofp_port_t out_port) { criteria->table_id = table_id; cls_rule_init(&criteria->cr, match, priority); criteria->cookie = cookie; criteria->cookie_mask = cookie_mask; criteria->out_port = out_port; } static void rule_criteria_destroy(struct rule_criteria *criteria) { cls_rule_destroy(&criteria->cr); } void rule_collection_init(struct rule_collection *rules) { rules->rules = rules->stub; rules->n = 0; rules->capacity = ARRAY_SIZE(rules->stub); } void rule_collection_add(struct rule_collection *rules, struct rule *rule) { if (rules->n >= rules->capacity) { size_t old_size, new_size; old_size = rules->capacity * sizeof *rules->rules; rules->capacity *= 2; new_size = rules->capacity * sizeof *rules->rules; if (rules->rules == rules->stub) { rules->rules = xmalloc(new_size); memcpy(rules->rules, rules->stub, old_size); } else { rules->rules = xrealloc(rules->rules, new_size); } } rules->rules[rules->n++] = rule; } void rule_collection_ref(struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { size_t i; for (i = 0; i < rules->n; i++) { ofproto_rule_ref(rules->rules[i]); } } void rule_collection_unref(struct rule_collection *rules) { size_t i; for (i = 0; i < rules->n; i++) { ofproto_rule_unref(rules->rules[i]); } } void rule_collection_destroy(struct rule_collection *rules) { if (rules->rules != rules->stub) { free(rules->rules); } } static enum ofperr collect_rule(struct rule *rule, const struct rule_criteria *c, struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { /* We ordinarily want to skip hidden rules, but there has to be a way for * code internal to OVS to modify and delete them, so if the criteria * specify a priority that can only be for a hidden flow, then allow hidden * rules to be selected. (This doesn't allow OpenFlow clients to meddle * with hidden flows because OpenFlow uses only a 16-bit field to specify * priority.) */ if (ofproto_rule_is_hidden(rule) && c->cr.priority <= UINT16_MAX) { return 0; } else if (rule->pending) { return OFPROTO_POSTPONE; } else { if ((c->table_id == rule->table_id || c->table_id == 0xff) && ofproto_rule_has_out_port(rule, c->out_port) && !((rule->flow_cookie ^ c->cookie) & c->cookie_mask)) { rule_collection_add(rules, rule); } return 0; } } /* Searches 'ofproto' for rules that match the criteria in 'criteria'. Matches * on classifiers rules are done in the "loose" way required for OpenFlow * OFPFC_MODIFY and OFPFC_DELETE requests. Puts the selected rules on list * 'rules'. * * Hidden rules are always omitted. * * Returns 0 on success, otherwise an OpenFlow error code. */ static enum ofperr collect_rules_loose(struct ofproto *ofproto, const struct rule_criteria *criteria, struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { struct oftable *table; enum ofperr error; rule_collection_init(rules); error = check_table_id(ofproto, criteria->table_id); if (error) { goto exit; } if (criteria->cookie_mask == htonll(UINT64_MAX)) { struct rule *rule; HINDEX_FOR_EACH_WITH_HASH (rule, cookie_node, hash_cookie(criteria->cookie), &ofproto->cookies) { if (cls_rule_is_loose_match(&rule->cr, &criteria->cr.match)) { error = collect_rule(rule, criteria, rules); if (error) { break; } } } } else { FOR_EACH_MATCHING_TABLE (table, criteria->table_id, ofproto) { struct cls_cursor cursor; struct rule *rule; ovs_rwlock_rdlock(&table->cls.rwlock); cls_cursor_init(&cursor, &table->cls, &criteria->cr); CLS_CURSOR_FOR_EACH (rule, cr, &cursor) { error = collect_rule(rule, criteria, rules); if (error) { break; } } ovs_rwlock_unlock(&table->cls.rwlock); } } exit: if (error) { rule_collection_destroy(rules); } return error; } /* Searches 'ofproto' for rules that match the criteria in 'criteria'. Matches * on classifiers rules are done in the "strict" way required for OpenFlow * OFPFC_MODIFY_STRICT and OFPFC_DELETE_STRICT requests. Puts the selected * rules on list 'rules'. * * Hidden rules are always omitted. * * Returns 0 on success, otherwise an OpenFlow error code. */ static enum ofperr collect_rules_strict(struct ofproto *ofproto, const struct rule_criteria *criteria, struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { struct oftable *table; int error; rule_collection_init(rules); error = check_table_id(ofproto, criteria->table_id); if (error) { goto exit; } if (criteria->cookie_mask == htonll(UINT64_MAX)) { struct rule *rule; HINDEX_FOR_EACH_WITH_HASH (rule, cookie_node, hash_cookie(criteria->cookie), &ofproto->cookies) { if (cls_rule_equal(&rule->cr, &criteria->cr)) { error = collect_rule(rule, criteria, rules); if (error) { break; } } } } else { FOR_EACH_MATCHING_TABLE (table, criteria->table_id, ofproto) { struct rule *rule; ovs_rwlock_rdlock(&table->cls.rwlock); rule = rule_from_cls_rule(classifier_find_rule_exactly( &table->cls, &criteria->cr)); ovs_rwlock_unlock(&table->cls.rwlock); if (rule) { error = collect_rule(rule, criteria, rules); if (error) { break; } } } } exit: if (error) { rule_collection_destroy(rules); } return error; } /* Returns 'age_ms' (a duration in milliseconds), converted to seconds and * forced into the range of a uint16_t. */ static int age_secs(long long int age_ms) { return (age_ms < 0 ? 0 : age_ms >= UINT16_MAX * 1000 ? UINT16_MAX : (unsigned int) age_ms / 1000); } static enum ofperr handle_flow_stats_request(struct ofconn *ofconn, const struct ofp_header *request) OVS_EXCLUDED(ofproto_mutex) { struct ofproto *ofproto = ofconn_get_ofproto(ofconn); struct ofputil_flow_stats_request fsr; struct rule_criteria criteria; struct rule_collection rules; struct list replies; enum ofperr error; size_t i; error = ofputil_decode_flow_stats_request(&fsr, request); if (error) { return error; } rule_criteria_init(&criteria, fsr.table_id, &fsr.match, 0, fsr.cookie, fsr.cookie_mask, fsr.out_port); ovs_mutex_lock(&ofproto_mutex); error = collect_rules_loose(ofproto, &criteria, &rules); rule_criteria_destroy(&criteria); if (!error) { rule_collection_ref(&rules); } ovs_mutex_unlock(&ofproto_mutex); if (error) { return error; } ofpmp_init(&replies, request); for (i = 0; i < rules.n; i++) { struct rule *rule = rules.rules[i]; long long int now = time_msec(); struct ofputil_flow_stats fs; long long int created, used, modified; struct rule_actions *actions; bool send_flow_removed; ovs_mutex_lock(&rule->mutex); fs.cookie = rule->flow_cookie; fs.idle_timeout = rule->idle_timeout; fs.hard_timeout = rule->hard_timeout; created = rule->created; used = rule->used; modified = rule->modified; actions = rule_get_actions__(rule); send_flow_removed = rule->send_flow_removed; ovs_mutex_unlock(&rule->mutex); minimatch_expand(&rule->cr.match, &fs.match); fs.table_id = rule->table_id; calc_duration(created, now, &fs.duration_sec, &fs.duration_nsec); fs.priority = rule->cr.priority; fs.idle_age = age_secs(now - used); fs.hard_age = age_secs(now - modified); ofproto->ofproto_class->rule_get_stats(rule, &fs.packet_count, &fs.byte_count); fs.ofpacts = actions->ofpacts; fs.ofpacts_len = actions->ofpacts_len; fs.flags = 0; if (send_flow_removed) { fs.flags |= OFPUTIL_FF_SEND_FLOW_REM; /* FIXME: Implement OFPUTIL_FF_NO_PKT_COUNTS and OFPUTIL_FF_NO_BYT_COUNTS. */ } ofputil_append_flow_stats_reply(&fs, &replies); rule_actions_unref(actions); } rule_collection_unref(&rules); rule_collection_destroy(&rules); ofconn_send_replies(ofconn, &replies); return 0; } static void flow_stats_ds(struct rule *rule, struct ds *results) { uint64_t packet_count, byte_count; struct rule_actions *actions; long long int created; rule->ofproto->ofproto_class->rule_get_stats(rule, &packet_count, &byte_count); ovs_mutex_lock(&rule->mutex); actions = rule_get_actions__(rule); created = rule->created; ovs_mutex_unlock(&rule->mutex); if (rule->table_id != 0) { ds_put_format(results, "table_id=%"PRIu8", ", rule->table_id); } ds_put_format(results, "duration=%llds, ", (time_msec() - created) / 1000); ds_put_format(results, "priority=%u, ", rule->cr.priority); ds_put_format(results, "n_packets=%"PRIu64", ", packet_count); ds_put_format(results, "n_bytes=%"PRIu64", ", byte_count); cls_rule_format(&rule->cr, results); ds_put_char(results, ','); ofpacts_format(actions->ofpacts, actions->ofpacts_len, results); ds_put_cstr(results, "\n"); rule_actions_unref(actions); } /* Adds a pretty-printed description of all flows to 'results', including * hidden flows (e.g., set up by in-band control). */ void ofproto_get_all_flows(struct ofproto *p, struct ds *results) { struct oftable *table; OFPROTO_FOR_EACH_TABLE (table, p) { struct cls_cursor cursor; struct rule *rule; ovs_rwlock_rdlock(&table->cls.rwlock); cls_cursor_init(&cursor, &table->cls, NULL); CLS_CURSOR_FOR_EACH (rule, cr, &cursor) { flow_stats_ds(rule, results); } ovs_rwlock_unlock(&table->cls.rwlock); } } /* Obtains the NetFlow engine type and engine ID for 'ofproto' into * '*engine_type' and '*engine_id', respectively. */ void ofproto_get_netflow_ids(const struct ofproto *ofproto, uint8_t *engine_type, uint8_t *engine_id) { ofproto->ofproto_class->get_netflow_ids(ofproto, engine_type, engine_id); } /* Checks the status of CFM configured on 'ofp_port' within 'ofproto'. Returns * true if the port's CFM status was successfully stored into '*status'. * Returns false if the port did not have CFM configured, in which case * '*status' is indeterminate. * * The caller must provide and owns '*status', and must free 'status->rmps'. */ bool ofproto_port_get_cfm_status(const struct ofproto *ofproto, ofp_port_t ofp_port, struct ofproto_cfm_status *status) { struct ofport *ofport = ofproto_get_port(ofproto, ofp_port); return (ofport && ofproto->ofproto_class->get_cfm_status && ofproto->ofproto_class->get_cfm_status(ofport, status)); } static enum ofperr handle_aggregate_stats_request(struct ofconn *ofconn, const struct ofp_header *oh) OVS_EXCLUDED(ofproto_mutex) { struct ofproto *ofproto = ofconn_get_ofproto(ofconn); struct ofputil_flow_stats_request request; struct ofputil_aggregate_stats stats; bool unknown_packets, unknown_bytes; struct rule_criteria criteria; struct rule_collection rules; struct ofpbuf *reply; enum ofperr error; size_t i; error = ofputil_decode_flow_stats_request(&request, oh); if (error) { return error; } rule_criteria_init(&criteria, request.table_id, &request.match, 0, request.cookie, request.cookie_mask, request.out_port); ovs_mutex_lock(&ofproto_mutex); error = collect_rules_loose(ofproto, &criteria, &rules); rule_criteria_destroy(&criteria); if (!error) { rule_collection_ref(&rules); } ovs_mutex_unlock(&ofproto_mutex); if (error) { return error; } memset(&stats, 0, sizeof stats); unknown_packets = unknown_bytes = false; for (i = 0; i < rules.n; i++) { struct rule *rule = rules.rules[i]; uint64_t packet_count; uint64_t byte_count; ofproto->ofproto_class->rule_get_stats(rule, &packet_count, &byte_count); if (packet_count == UINT64_MAX) { unknown_packets = true; } else { stats.packet_count += packet_count; } if (byte_count == UINT64_MAX) { unknown_bytes = true; } else { stats.byte_count += byte_count; } stats.flow_count++; } if (unknown_packets) { stats.packet_count = UINT64_MAX; } if (unknown_bytes) { stats.byte_count = UINT64_MAX; } rule_collection_unref(&rules); rule_collection_destroy(&rules); reply = ofputil_encode_aggregate_stats_reply(&stats, oh); ofconn_send_reply(ofconn, reply); return 0; } struct queue_stats_cbdata { struct ofport *ofport; struct list replies; long long int now; }; static void put_queue_stats(struct queue_stats_cbdata *cbdata, uint32_t queue_id, const struct netdev_queue_stats *stats) { struct ofputil_queue_stats oqs; oqs.port_no = cbdata->ofport->pp.port_no; oqs.queue_id = queue_id; oqs.tx_bytes = stats->tx_bytes; oqs.tx_packets = stats->tx_packets; oqs.tx_errors = stats->tx_errors; if (stats->created != LLONG_MIN) { calc_duration(stats->created, cbdata->now, &oqs.duration_sec, &oqs.duration_nsec); } else { oqs.duration_sec = oqs.duration_nsec = UINT32_MAX; } ofputil_append_queue_stat(&cbdata->replies, &oqs); } static void handle_queue_stats_dump_cb(uint32_t queue_id, struct netdev_queue_stats *stats, void *cbdata_) { struct queue_stats_cbdata *cbdata = cbdata_; put_queue_stats(cbdata, queue_id, stats); } static enum ofperr handle_queue_stats_for_port(struct ofport *port, uint32_t queue_id, struct queue_stats_cbdata *cbdata) { cbdata->ofport = port; if (queue_id == OFPQ_ALL) { netdev_dump_queue_stats(port->netdev, handle_queue_stats_dump_cb, cbdata); } else { struct netdev_queue_stats stats; if (!netdev_get_queue_stats(port->netdev, queue_id, &stats)) { put_queue_stats(cbdata, queue_id, &stats); } else { return OFPERR_OFPQOFC_BAD_QUEUE; } } return 0; } static enum ofperr handle_queue_stats_request(struct ofconn *ofconn, const struct ofp_header *rq) { struct ofproto *ofproto = ofconn_get_ofproto(ofconn); struct queue_stats_cbdata cbdata; struct ofport *port; enum ofperr error; struct ofputil_queue_stats_request oqsr; COVERAGE_INC(ofproto_queue_req); ofpmp_init(&cbdata.replies, rq); cbdata.now = time_msec(); error = ofputil_decode_queue_stats_request(rq, &oqsr); if (error) { return error; } if (oqsr.port_no == OFPP_ANY) { error = OFPERR_OFPQOFC_BAD_QUEUE; HMAP_FOR_EACH (port, hmap_node, &ofproto->ports) { if (!handle_queue_stats_for_port(port, oqsr.queue_id, &cbdata)) { error = 0; } } } else { port = ofproto_get_port(ofproto, oqsr.port_no); error = (port ? handle_queue_stats_for_port(port, oqsr.queue_id, &cbdata) : OFPERR_OFPQOFC_BAD_PORT); } if (!error) { ofconn_send_replies(ofconn, &cbdata.replies); } else { ofpbuf_list_delete(&cbdata.replies); } return error; } static bool is_flow_deletion_pending(const struct ofproto *ofproto, const struct cls_rule *cls_rule, uint8_t table_id) OVS_REQUIRES(ofproto_mutex) { if (!hmap_is_empty(&ofproto->deletions)) { struct ofoperation *op; HMAP_FOR_EACH_WITH_HASH (op, hmap_node, cls_rule_hash(cls_rule, table_id), &ofproto->deletions) { if (cls_rule_equal(cls_rule, &op->rule->cr)) { return true; } } } return false; } static bool should_evict_a_rule(struct oftable *table, unsigned int extra_space) OVS_REQUIRES(ofproto_mutex) OVS_NO_THREAD_SAFETY_ANALYSIS { return classifier_count(&table->cls) + extra_space > table->max_flows; } static enum ofperr evict_rules_from_table(struct ofproto *ofproto, struct oftable *table, unsigned int extra_space) OVS_REQUIRES(ofproto_mutex) { while (should_evict_a_rule(table, extra_space)) { struct rule *rule; if (!choose_rule_to_evict(table, &rule)) { return OFPERR_OFPFMFC_TABLE_FULL; } else if (rule->pending) { return OFPROTO_POSTPONE; } else { struct ofopgroup *group = ofopgroup_create_unattached(ofproto); delete_flow__(rule, group, OFPRR_EVICTION); ofopgroup_submit(group); } } return 0; } /* Implements OFPFC_ADD and the cases for OFPFC_MODIFY and OFPFC_MODIFY_STRICT * in which no matching flow already exists in the flow table. * * Adds the flow specified by 'ofm', which is followed by 'n_actions' * ofp_actions, to the ofproto's flow table. Returns 0 on success, an OpenFlow * error code on failure, or OFPROTO_POSTPONE if the operation cannot be * initiated now but may be retried later. * * The caller retains ownership of 'fm->ofpacts'. * * 'ofconn' is used to retrieve the packet buffer specified in ofm->buffer_id, * if any. */ static enum ofperr add_flow(struct ofproto *ofproto, struct ofconn *ofconn, struct ofputil_flow_mod *fm, const struct ofp_header *request) OVS_REQUIRES(ofproto_mutex) { struct oftable *table; struct ofopgroup *group; struct cls_rule cr; struct rule *rule; uint8_t table_id; int error; error = check_table_id(ofproto, fm->table_id); if (error) { return error; } /* Pick table. */ if (fm->table_id == 0xff) { if (ofproto->ofproto_class->rule_choose_table) { error = ofproto->ofproto_class->rule_choose_table(ofproto, &fm->match, &table_id); if (error) { return error; } ovs_assert(table_id < ofproto->n_tables); } else { table_id = 0; } } else if (fm->table_id < ofproto->n_tables) { table_id = fm->table_id; } else { return OFPERR_OFPBRC_BAD_TABLE_ID; } table = &ofproto->tables[table_id]; if (table->flags & OFTABLE_READONLY) { return OFPERR_OFPBRC_EPERM; } cls_rule_init(&cr, &fm->match, fm->priority); /* Transform "add" into "modify" if there's an existing identical flow. */ ovs_rwlock_rdlock(&table->cls.rwlock); rule = rule_from_cls_rule(classifier_find_rule_exactly(&table->cls, &cr)); ovs_rwlock_unlock(&table->cls.rwlock); if (rule) { cls_rule_destroy(&cr); if (!rule_is_modifiable(rule)) { return OFPERR_OFPBRC_EPERM; } else if (rule->pending) { return OFPROTO_POSTPONE; } else { struct rule_collection rules; rule_collection_init(&rules); rule_collection_add(&rules, rule); fm->modify_cookie = true; error = modify_flows__(ofproto, ofconn, fm, request, &rules); rule_collection_destroy(&rules); return error; } } /* Verify actions. */ error = ofproto_check_ofpacts(ofproto, fm->ofpacts, fm->ofpacts_len, &fm->match.flow, table_id); if (error) { cls_rule_destroy(&cr); return error; } /* Serialize against pending deletion. */ if (is_flow_deletion_pending(ofproto, &cr, table_id)) { cls_rule_destroy(&cr); return OFPROTO_POSTPONE; } /* Check for overlap, if requested. */ if (fm->flags & OFPUTIL_FF_CHECK_OVERLAP) { bool overlaps; ovs_rwlock_rdlock(&table->cls.rwlock); overlaps = classifier_rule_overlaps(&table->cls, &cr); ovs_rwlock_unlock(&table->cls.rwlock); if (overlaps) { cls_rule_destroy(&cr); return OFPERR_OFPFMFC_OVERLAP; } } /* If necessary, evict an existing rule to clear out space. */ error = evict_rules_from_table(ofproto, table, 1); if (error) { cls_rule_destroy(&cr); return error; } /* Allocate new rule. */ rule = ofproto->ofproto_class->rule_alloc(); if (!rule) { cls_rule_destroy(&cr); VLOG_WARN_RL(&rl, "%s: failed to create rule (%s)", ofproto->name, ovs_strerror(error)); return ENOMEM; } /* Initialize base state. */ *CONST_CAST(struct ofproto **, &rule->ofproto) = ofproto; cls_rule_move(CONST_CAST(struct cls_rule *, &rule->cr), &cr); atomic_init(&rule->ref_count, 1); rule->pending = NULL; rule->flow_cookie = fm->new_cookie; rule->created = rule->modified = rule->used = time_msec(); ovs_mutex_init(&rule->mutex); ovs_mutex_lock(&rule->mutex); rule->idle_timeout = fm->idle_timeout; rule->hard_timeout = fm->hard_timeout; ovs_mutex_unlock(&rule->mutex); *CONST_CAST(uint8_t *, &rule->table_id) = table - ofproto->tables; rule->send_flow_removed = (fm->flags & OFPUTIL_FF_SEND_FLOW_REM) != 0; rule->actions = rule_actions_create(fm->ofpacts, fm->ofpacts_len); list_init(&rule->meter_list_node); rule->eviction_group = NULL; list_init(&rule->expirable); rule->monitor_flags = 0; rule->add_seqno = 0; rule->modify_seqno = 0; /* Construct rule, initializing derived state. */ error = ofproto->ofproto_class->rule_construct(rule); if (error) { ofproto_rule_destroy__(rule); return error; } /* Insert rule. */ oftable_insert_rule(rule); group = ofopgroup_create(ofproto, ofconn, request, fm->buffer_id); ofoperation_create(group, rule, OFOPERATION_ADD, 0); ofproto->ofproto_class->rule_insert(rule); ofopgroup_submit(group); return error; } /* OFPFC_MODIFY and OFPFC_MODIFY_STRICT. */ /* Modifies the rules listed in 'rules', changing their actions to match those * in 'fm'. * * 'ofconn' is used to retrieve the packet buffer specified in fm->buffer_id, * if any. * * Returns 0 on success, otherwise an OpenFlow error code. */ static enum ofperr modify_flows__(struct ofproto *ofproto, struct ofconn *ofconn, struct ofputil_flow_mod *fm, const struct ofp_header *request, const struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { enum ofoperation_type type; struct ofopgroup *group; enum ofperr error; size_t i; type = fm->command == OFPFC_ADD ? OFOPERATION_REPLACE : OFOPERATION_MODIFY; group = ofopgroup_create(ofproto, ofconn, request, fm->buffer_id); error = OFPERR_OFPBRC_EPERM; for (i = 0; i < rules->n; i++) { struct rule *rule = rules->rules[i]; struct ofoperation *op; bool actions_changed; bool reset_counters; /* FIXME: Implement OFPFUTIL_FF_RESET_COUNTS */ if (rule_is_modifiable(rule)) { /* At least one rule is modifiable, don't report EPERM error. */ error = 0; } else { continue; } /* Verify actions. */ error = ofproto_check_ofpacts(ofproto, fm->ofpacts, fm->ofpacts_len, &fm->match.flow, rule->table_id); if (error) { return error; } actions_changed = !ofpacts_equal(fm->ofpacts, fm->ofpacts_len, rule->actions->ofpacts, rule->actions->ofpacts_len); op = ofoperation_create(group, rule, type, 0); if (fm->modify_cookie && fm->new_cookie != htonll(UINT64_MAX)) { ofproto_rule_change_cookie(ofproto, rule, fm->new_cookie); } if (type == OFOPERATION_REPLACE) { ovs_mutex_lock(&rule->mutex); rule->idle_timeout = fm->idle_timeout; rule->hard_timeout = fm->hard_timeout; ovs_mutex_unlock(&rule->mutex); rule->send_flow_removed = (fm->flags & OFPUTIL_FF_SEND_FLOW_REM) != 0; if (fm->idle_timeout || fm->hard_timeout) { if (!rule->eviction_group) { eviction_group_add_rule(rule); } } else { eviction_group_remove_rule(rule); } } reset_counters = (fm->flags & OFPUTIL_FF_RESET_COUNTS) != 0; if (actions_changed || reset_counters) { struct rule_actions *new_actions; op->actions = rule->actions; new_actions = rule_actions_create(fm->ofpacts, fm->ofpacts_len); ovs_mutex_lock(&rule->mutex); rule->actions = new_actions; ovs_mutex_unlock(&rule->mutex); rule->ofproto->ofproto_class->rule_modify_actions(rule, reset_counters); } else { ofoperation_complete(op, 0); } } ofopgroup_submit(group); return error; } static enum ofperr modify_flows_add(struct ofproto *ofproto, struct ofconn *ofconn, struct ofputil_flow_mod *fm, const struct ofp_header *request) OVS_REQUIRES(ofproto_mutex) { if (fm->cookie_mask != htonll(0) || fm->new_cookie == htonll(UINT64_MAX)) { return 0; } return add_flow(ofproto, ofconn, fm, request); } /* Implements OFPFC_MODIFY. Returns 0 on success or an OpenFlow error code on * failure. * * 'ofconn' is used to retrieve the packet buffer specified in fm->buffer_id, * if any. */ static enum ofperr modify_flows_loose(struct ofproto *ofproto, struct ofconn *ofconn, struct ofputil_flow_mod *fm, const struct ofp_header *request) OVS_REQUIRES(ofproto_mutex) { struct rule_criteria criteria; struct rule_collection rules; int error; rule_criteria_init(&criteria, fm->table_id, &fm->match, 0, fm->cookie, fm->cookie_mask, OFPP_ANY); error = collect_rules_loose(ofproto, &criteria, &rules); rule_criteria_destroy(&criteria); if (!error) { error = (rules.n > 0 ? modify_flows__(ofproto, ofconn, fm, request, &rules) : modify_flows_add(ofproto, ofconn, fm, request)); } rule_collection_destroy(&rules); return error; } /* Implements OFPFC_MODIFY_STRICT. Returns 0 on success or an OpenFlow error * code on failure. * * 'ofconn' is used to retrieve the packet buffer specified in fm->buffer_id, * if any. */ static enum ofperr modify_flow_strict(struct ofproto *ofproto, struct ofconn *ofconn, struct ofputil_flow_mod *fm, const struct ofp_header *request) OVS_REQUIRES(ofproto_mutex) { struct rule_criteria criteria; struct rule_collection rules; int error; rule_criteria_init(&criteria, fm->table_id, &fm->match, fm->priority, fm->cookie, fm->cookie_mask, OFPP_ANY); error = collect_rules_strict(ofproto, &criteria, &rules); rule_criteria_destroy(&criteria); if (!error) { if (rules.n == 0) { error = modify_flows_add(ofproto, ofconn, fm, request); } else if (rules.n == 1) { error = modify_flows__(ofproto, ofconn, fm, request, &rules); } } rule_collection_destroy(&rules); return error; } /* OFPFC_DELETE implementation. */ static void delete_flow__(struct rule *rule, struct ofopgroup *group, enum ofp_flow_removed_reason reason) OVS_REQUIRES(ofproto_mutex) { struct ofproto *ofproto = rule->ofproto; ofproto_rule_send_removed(rule, reason); ofoperation_create(group, rule, OFOPERATION_DELETE, reason); oftable_remove_rule(rule); ofproto->ofproto_class->rule_delete(rule); } /* Deletes the rules listed in 'rules'. * * Returns 0 on success, otherwise an OpenFlow error code. */ static enum ofperr delete_flows__(struct ofproto *ofproto, struct ofconn *ofconn, const struct ofp_header *request, const struct rule_collection *rules, enum ofp_flow_removed_reason reason) OVS_REQUIRES(ofproto_mutex) { struct ofopgroup *group; size_t i; group = ofopgroup_create(ofproto, ofconn, request, UINT32_MAX); for (i = 0; i < rules->n; i++) { delete_flow__(rules->rules[i], group, reason); } ofopgroup_submit(group); return 0; } /* Implements OFPFC_DELETE. */ static enum ofperr delete_flows_loose(struct ofproto *ofproto, struct ofconn *ofconn, const struct ofputil_flow_mod *fm, const struct ofp_header *request) OVS_REQUIRES(ofproto_mutex) { struct rule_criteria criteria; struct rule_collection rules; enum ofperr error; rule_criteria_init(&criteria, fm->table_id, &fm->match, 0, fm->cookie, fm->cookie_mask, fm->out_port); error = collect_rules_loose(ofproto, &criteria, &rules); rule_criteria_destroy(&criteria); if (!error && rules.n > 0) { error = delete_flows__(ofproto, ofconn, request, &rules, OFPRR_DELETE); } rule_collection_destroy(&rules); return error; } /* Implements OFPFC_DELETE_STRICT. */ static enum ofperr delete_flow_strict(struct ofproto *ofproto, struct ofconn *ofconn, const struct ofputil_flow_mod *fm, const struct ofp_header *request) OVS_REQUIRES(ofproto_mutex) { struct rule_criteria criteria; struct rule_collection rules; enum ofperr error; rule_criteria_init(&criteria, fm->table_id, &fm->match, fm->priority, fm->cookie, fm->cookie_mask, fm->out_port); error = collect_rules_strict(ofproto, &criteria, &rules); rule_criteria_destroy(&criteria); if (!error && rules.n > 0) { error = delete_flows__(ofproto, ofconn, request, &rules, OFPRR_DELETE); } rule_collection_destroy(&rules); return error; } static void ofproto_rule_send_removed(struct rule *rule, uint8_t reason) OVS_REQUIRES(ofproto_mutex) { struct ofputil_flow_removed fr; if (ofproto_rule_is_hidden(rule) || !rule->send_flow_removed) { return; } minimatch_expand(&rule->cr.match, &fr.match); fr.priority = rule->cr.priority; fr.cookie = rule->flow_cookie; fr.reason = reason; fr.table_id = rule->table_id; calc_duration(rule->created, time_msec(), &fr.duration_sec, &fr.duration_nsec); ovs_mutex_lock(&rule->mutex); fr.idle_timeout = rule->idle_timeout; fr.hard_timeout = rule->hard_timeout; ovs_mutex_unlock(&rule->mutex); rule->ofproto->ofproto_class->rule_get_stats(rule, &fr.packet_count, &fr.byte_count); connmgr_send_flow_removed(rule->ofproto->connmgr, &fr); } /* Sends an OpenFlow "flow removed" message with the given 'reason' (either * OFPRR_HARD_TIMEOUT or OFPRR_IDLE_TIMEOUT), and then removes 'rule' from its * ofproto. * * 'rule' must not have a pending operation (that is, 'rule->pending' must be * NULL). * * ofproto implementation ->run() functions should use this function to expire * OpenFlow flows. */ void ofproto_rule_expire(struct rule *rule, uint8_t reason) OVS_REQUIRES(ofproto_mutex) { struct ofproto *ofproto = rule->ofproto; ovs_assert(reason == OFPRR_HARD_TIMEOUT || reason == OFPRR_IDLE_TIMEOUT || reason == OFPRR_DELETE); ofproto_rule_delete__(ofproto, rule, reason); } /* Reduces '*timeout' to no more than 'max'. A value of zero in either case * means "infinite". */ static void reduce_timeout(uint16_t max, uint16_t *timeout) { if (max && (!*timeout || *timeout > max)) { *timeout = max; } } /* If 'idle_timeout' is nonzero, and 'rule' has no idle timeout or an idle * timeout greater than 'idle_timeout', lowers 'rule''s idle timeout to * 'idle_timeout' seconds. Similarly for 'hard_timeout'. * * Suitable for implementing OFPACT_FIN_TIMEOUT. */ void ofproto_rule_reduce_timeouts(struct rule *rule, uint16_t idle_timeout, uint16_t hard_timeout) OVS_EXCLUDED(ofproto_mutex, rule->mutex) { if (!idle_timeout && !hard_timeout) { return; } ovs_mutex_lock(&ofproto_mutex); if (list_is_empty(&rule->expirable)) { list_insert(&rule->ofproto->expirable, &rule->expirable); } ovs_mutex_unlock(&ofproto_mutex); ovs_mutex_lock(&rule->mutex); reduce_timeout(idle_timeout, &rule->idle_timeout); reduce_timeout(hard_timeout, &rule->hard_timeout); ovs_mutex_unlock(&rule->mutex); } static enum ofperr handle_flow_mod(struct ofconn *ofconn, const struct ofp_header *oh) OVS_EXCLUDED(ofproto_mutex) { struct ofproto *ofproto = ofconn_get_ofproto(ofconn); struct ofputil_flow_mod fm; uint64_t ofpacts_stub[1024 / 8]; struct ofpbuf ofpacts; enum ofperr error; long long int now; error = reject_slave_controller(ofconn); if (error) { goto exit; } ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); error = ofputil_decode_flow_mod(&fm, oh, ofconn_get_protocol(ofconn), &ofpacts); if (!error) { error = handle_flow_mod__(ofproto, ofconn, &fm, oh); } if (error) { goto exit_free_ofpacts; } /* Record the operation for logging a summary report. */ switch (fm.command) { case OFPFC_ADD: ofproto->n_add++; break; case OFPFC_MODIFY: case OFPFC_MODIFY_STRICT: ofproto->n_modify++; break; case OFPFC_DELETE: case OFPFC_DELETE_STRICT: ofproto->n_delete++; break; } now = time_msec(); if (ofproto->next_op_report == LLONG_MAX) { ofproto->first_op = now; ofproto->next_op_report = MAX(now + 10 * 1000, ofproto->op_backoff); ofproto->op_backoff = ofproto->next_op_report + 60 * 1000; } ofproto->last_op = now; exit_free_ofpacts: ofpbuf_uninit(&ofpacts); exit: return error; } static enum ofperr handle_flow_mod__(struct ofproto *ofproto, struct ofconn *ofconn, struct ofputil_flow_mod *fm, const struct ofp_header *oh) OVS_EXCLUDED(ofproto_mutex) { enum ofperr error; ovs_mutex_lock(&ofproto_mutex); if (ofproto->n_pending < 50) { switch (fm->command) { case OFPFC_ADD: error = add_flow(ofproto, ofconn, fm, oh); break; case OFPFC_MODIFY: error = modify_flows_loose(ofproto, ofconn, fm, oh); break; case OFPFC_MODIFY_STRICT: error = modify_flow_strict(ofproto, ofconn, fm, oh); break; case OFPFC_DELETE: error = delete_flows_loose(ofproto, ofconn, fm, oh); break; case OFPFC_DELETE_STRICT: error = delete_flow_strict(ofproto, ofconn, fm, oh); break; default: if (fm->command > 0xff) { VLOG_WARN_RL(&rl, "%s: flow_mod has explicit table_id but " "flow_mod_table_id extension is not enabled", ofproto->name); } error = OFPERR_OFPFMFC_BAD_COMMAND; break; } } else { ovs_assert(!list_is_empty(&ofproto->pending)); error = OFPROTO_POSTPONE; } ovs_mutex_unlock(&ofproto_mutex); run_rule_executes(ofproto); return error; } static enum ofperr handle_role_request(struct ofconn *ofconn, const struct ofp_header *oh) { struct ofputil_role_request request; struct ofputil_role_request reply; struct ofpbuf *buf; enum ofperr error; error = ofputil_decode_role_message(oh, &request); if (error) { return error; } if (request.role != OFPCR12_ROLE_NOCHANGE) { if (ofconn_get_role(ofconn) != request.role && ofconn_has_pending_opgroups(ofconn)) { return OFPROTO_POSTPONE; } if (request.have_generation_id && !ofconn_set_master_election_id(ofconn, request.generation_id)) { return OFPERR_OFPRRFC_STALE; } ofconn_set_role(ofconn, request.role); } reply.role = ofconn_get_role(ofconn); reply.have_generation_id = ofconn_get_master_election_id( ofconn, &reply.generation_id); buf = ofputil_encode_role_reply(oh, &reply); ofconn_send_reply(ofconn, buf); return 0; } static enum ofperr handle_nxt_flow_mod_table_id(struct ofconn *ofconn, const struct ofp_header *oh) { const struct nx_flow_mod_table_id *msg = ofpmsg_body(oh); enum ofputil_protocol cur, next; cur = ofconn_get_protocol(ofconn); next = ofputil_protocol_set_tid(cur, msg->set != 0); ofconn_set_protocol(ofconn, next); return 0; } static enum ofperr handle_nxt_set_flow_format(struct ofconn *ofconn, const struct ofp_header *oh) { const struct nx_set_flow_format *msg = ofpmsg_body(oh); enum ofputil_protocol cur, next; enum ofputil_protocol next_base; next_base = ofputil_nx_flow_format_to_protocol(ntohl(msg->format)); if (!next_base) { return OFPERR_OFPBRC_EPERM; } cur = ofconn_get_protocol(ofconn); next = ofputil_protocol_set_base(cur, next_base); if (cur != next && ofconn_has_pending_opgroups(ofconn)) { /* Avoid sending async messages in surprising protocol. */ return OFPROTO_POSTPONE; } ofconn_set_protocol(ofconn, next); return 0; } static enum ofperr handle_nxt_set_packet_in_format(struct ofconn *ofconn, const struct ofp_header *oh) { const struct nx_set_packet_in_format *msg = ofpmsg_body(oh); uint32_t format; format = ntohl(msg->format); if (format != NXPIF_OPENFLOW10 && format != NXPIF_NXM) { return OFPERR_OFPBRC_EPERM; } if (format != ofconn_get_packet_in_format(ofconn) && ofconn_has_pending_opgroups(ofconn)) { /* Avoid sending async message in surprsing packet in format. */ return OFPROTO_POSTPONE; } ofconn_set_packet_in_format(ofconn, format); return 0; } static enum ofperr handle_nxt_set_async_config(struct ofconn *ofconn, const struct ofp_header *oh) { const struct nx_async_config *msg = ofpmsg_body(oh); uint32_t master[OAM_N_TYPES]; uint32_t slave[OAM_N_TYPES]; master[OAM_PACKET_IN] = ntohl(msg->packet_in_mask[0]); master[OAM_PORT_STATUS] = ntohl(msg->port_status_mask[0]); master[OAM_FLOW_REMOVED] = ntohl(msg->flow_removed_mask[0]); slave[OAM_PACKET_IN] = ntohl(msg->packet_in_mask[1]); slave[OAM_PORT_STATUS] = ntohl(msg->port_status_mask[1]); slave[OAM_FLOW_REMOVED] = ntohl(msg->flow_removed_mask[1]); ofconn_set_async_config(ofconn, master, slave); if (ofconn_get_type(ofconn) == OFCONN_SERVICE && !ofconn_get_miss_send_len(ofconn)) { ofconn_set_miss_send_len(ofconn, OFP_DEFAULT_MISS_SEND_LEN); } return 0; } static enum ofperr handle_nxt_set_controller_id(struct ofconn *ofconn, const struct ofp_header *oh) { const struct nx_controller_id *nci = ofpmsg_body(oh); if (!is_all_zeros(nci->zero, sizeof nci->zero)) { return OFPERR_NXBRC_MUST_BE_ZERO; } ofconn_set_controller_id(ofconn, ntohs(nci->controller_id)); return 0; } static enum ofperr handle_barrier_request(struct ofconn *ofconn, const struct ofp_header *oh) { struct ofpbuf *buf; if (ofconn_has_pending_opgroups(ofconn)) { return OFPROTO_POSTPONE; } buf = ofpraw_alloc_reply((oh->version == OFP10_VERSION ? OFPRAW_OFPT10_BARRIER_REPLY : OFPRAW_OFPT11_BARRIER_REPLY), oh, 0); ofconn_send_reply(ofconn, buf); return 0; } static void ofproto_compose_flow_refresh_update(const struct rule *rule, enum nx_flow_monitor_flags flags, struct list *msgs) OVS_REQUIRES(ofproto_mutex) { struct ofoperation *op = rule->pending; const struct rule_actions *actions; struct ofputil_flow_update fu; struct match match; if (op && op->type == OFOPERATION_ADD) { /* We'll report the final flow when the operation completes. Reporting * it now would cause a duplicate report later. */ return; } fu.event = (flags & (NXFMF_INITIAL | NXFMF_ADD) ? NXFME_ADDED : NXFME_MODIFIED); fu.reason = 0; ovs_mutex_lock(&rule->mutex); fu.idle_timeout = rule->idle_timeout; fu.hard_timeout = rule->hard_timeout; ovs_mutex_unlock(&rule->mutex); fu.table_id = rule->table_id; fu.cookie = rule->flow_cookie; minimatch_expand(&rule->cr.match, &match); fu.match = &match; fu.priority = rule->cr.priority; if (!(flags & NXFMF_ACTIONS)) { actions = NULL; } else if (!op) { actions = rule->actions; } else { /* An operation is in progress. Use the previous version of the flow's * actions, so that when the operation commits we report the change. */ switch (op->type) { case OFOPERATION_ADD: NOT_REACHED(); case OFOPERATION_MODIFY: case OFOPERATION_REPLACE: actions = op->actions ? op->actions : rule->actions; break; case OFOPERATION_DELETE: actions = rule->actions; break; default: NOT_REACHED(); } } fu.ofpacts = actions ? actions->ofpacts : NULL; fu.ofpacts_len = actions ? actions->ofpacts_len : 0; if (list_is_empty(msgs)) { ofputil_start_flow_update(msgs); } ofputil_append_flow_update(&fu, msgs); } void ofmonitor_compose_refresh_updates(struct rule_collection *rules, struct list *msgs) OVS_REQUIRES(ofproto_mutex) { size_t i; for (i = 0; i < rules->n; i++) { struct rule *rule = rules->rules[i]; enum nx_flow_monitor_flags flags = rule->monitor_flags; rule->monitor_flags = 0; ofproto_compose_flow_refresh_update(rule, flags, msgs); } } static void ofproto_collect_ofmonitor_refresh_rule(const struct ofmonitor *m, struct rule *rule, uint64_t seqno, struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { enum nx_flow_monitor_flags update; if (ofproto_rule_is_hidden(rule)) { return; } if (!(rule->pending ? ofoperation_has_out_port(rule->pending, m->out_port) : ofproto_rule_has_out_port(rule, m->out_port))) { return; } if (seqno) { if (rule->add_seqno > seqno) { update = NXFMF_ADD | NXFMF_MODIFY; } else if (rule->modify_seqno > seqno) { update = NXFMF_MODIFY; } else { return; } if (!(m->flags & update)) { return; } } else { update = NXFMF_INITIAL; } if (!rule->monitor_flags) { rule_collection_add(rules, rule); } rule->monitor_flags |= update | (m->flags & NXFMF_ACTIONS); } static void ofproto_collect_ofmonitor_refresh_rules(const struct ofmonitor *m, uint64_t seqno, struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { const struct ofproto *ofproto = ofconn_get_ofproto(m->ofconn); const struct ofoperation *op; const struct oftable *table; struct cls_rule target; cls_rule_init_from_minimatch(&target, &m->match, 0); FOR_EACH_MATCHING_TABLE (table, m->table_id, ofproto) { struct cls_cursor cursor; struct rule *rule; ovs_rwlock_rdlock(&table->cls.rwlock); cls_cursor_init(&cursor, &table->cls, &target); CLS_CURSOR_FOR_EACH (rule, cr, &cursor) { ovs_assert(!rule->pending); /* XXX */ ofproto_collect_ofmonitor_refresh_rule(m, rule, seqno, rules); } ovs_rwlock_unlock(&table->cls.rwlock); } HMAP_FOR_EACH (op, hmap_node, &ofproto->deletions) { struct rule *rule = op->rule; if (((m->table_id == 0xff ? !(ofproto->tables[rule->table_id].flags & OFTABLE_HIDDEN) : m->table_id == rule->table_id)) && cls_rule_is_loose_match(&rule->cr, &target.match)) { ofproto_collect_ofmonitor_refresh_rule(m, rule, seqno, rules); } } cls_rule_destroy(&target); } static void ofproto_collect_ofmonitor_initial_rules(struct ofmonitor *m, struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { if (m->flags & NXFMF_INITIAL) { ofproto_collect_ofmonitor_refresh_rules(m, 0, rules); } } void ofmonitor_collect_resume_rules(struct ofmonitor *m, uint64_t seqno, struct rule_collection *rules) OVS_REQUIRES(ofproto_mutex) { ofproto_collect_ofmonitor_refresh_rules(m, seqno, rules); } static enum ofperr handle_flow_monitor_request(struct ofconn *ofconn, const struct ofp_header *oh) OVS_EXCLUDED(ofproto_mutex) { struct ofproto *ofproto = ofconn_get_ofproto(ofconn); struct ofmonitor **monitors; size_t n_monitors, allocated_monitors; struct rule_collection rules; struct list replies; enum ofperr error; struct ofpbuf b; size_t i; error = 0; ofpbuf_use_const(&b, oh, ntohs(oh->length)); monitors = NULL; n_monitors = allocated_monitors = 0; ovs_mutex_lock(&ofproto_mutex); for (;;) { struct ofputil_flow_monitor_request request; struct ofmonitor *m; int retval; retval = ofputil_decode_flow_monitor_request(&request, &b); if (retval == EOF) { break; } else if (retval) { error = retval; goto error; } if (request.table_id != 0xff && request.table_id >= ofproto->n_tables) { error = OFPERR_OFPBRC_BAD_TABLE_ID; goto error; } error = ofmonitor_create(&request, ofconn, &m); if (error) { goto error; } if (n_monitors >= allocated_monitors) { monitors = x2nrealloc(monitors, &allocated_monitors, sizeof *monitors); } monitors[n_monitors++] = m; } rule_collection_init(&rules); for (i = 0; i < n_monitors; i++) { ofproto_collect_ofmonitor_initial_rules(monitors[i], &rules); } ofpmp_init(&replies, oh); ofmonitor_compose_refresh_updates(&rules, &replies); ovs_mutex_unlock(&ofproto_mutex); rule_collection_destroy(&rules); ofconn_send_replies(ofconn, &replies); free(monitors); return 0; error: for (i = 0; i < n_monitors; i++) { ofmonitor_destroy(monitors[i]); } free(monitors); ovs_mutex_unlock(&ofproto_mutex); return error; } static enum ofperr handle_flow_monitor_cancel(struct ofconn *ofconn, const struct ofp_header *oh) OVS_EXCLUDED(ofproto_mutex) { struct ofmonitor *m; enum ofperr error; uint32_t id; id = ofputil_decode_flow_monitor_cancel(oh); ovs_mutex_lock(&ofproto_mutex); m = ofmonitor_lookup(ofconn, id); if (m) { ofmonitor_destroy(m); error = 0; } else { error = OFPERR_NXBRC_FM_BAD_ID; } ovs_mutex_unlock(&ofproto_mutex); return error; } /* Meters implementation. * * Meter table entry, indexed by the OpenFlow meter_id. * These are always dynamically allocated to allocate enough space for * the bands. * 'created' is used to compute the duration for meter stats. * 'list rules' is needed so that we can delete the dependent rules when the * meter table entry is deleted. * 'provider_meter_id' is for the provider's private use. */ struct meter { long long int created; /* Time created. */ struct list rules; /* List of "struct rule_dpif"s. */ ofproto_meter_id provider_meter_id; uint16_t flags; /* Meter flags. */ uint16_t n_bands; /* Number of meter bands. */ struct ofputil_meter_band *bands; }; /* * This is used in instruction validation at flow set-up time, * as flows may not use non-existing meters. * This is also used by ofproto-providers to translate OpenFlow meter_ids * in METER instructions to the corresponding provider meter IDs. * Return value of UINT32_MAX signifies an invalid meter. */ uint32_t ofproto_get_provider_meter_id(const struct ofproto * ofproto, uint32_t of_meter_id) { if (of_meter_id && of_meter_id <= ofproto->meter_features.max_meters) { const struct meter *meter = ofproto->meters[of_meter_id]; if (meter) { return meter->provider_meter_id.uint32; } } return UINT32_MAX; } static void meter_update(struct meter *meter, const struct ofputil_meter_config *config) { free(meter->bands); meter->flags = config->flags; meter->n_bands = config->n_bands; meter->bands = xmemdup(config->bands, config->n_bands * sizeof *meter->bands); } static struct meter * meter_create(const struct ofputil_meter_config *config, ofproto_meter_id provider_meter_id) { struct meter *meter; meter = xzalloc(sizeof *meter); meter->provider_meter_id = provider_meter_id; meter->created = time_msec(); list_init(&meter->rules); meter_update(meter, config); return meter; } static void meter_delete(struct ofproto *ofproto, uint32_t first, uint32_t last) OVS_REQUIRES(ofproto_mutex) { uint32_t mid; for (mid = first; mid <= last; ++mid) { struct meter *meter = ofproto->meters[mid]; if (meter) { ofproto->meters[mid] = NULL; ofproto->ofproto_class->meter_del(ofproto, meter->provider_meter_id); free(meter->bands); free(meter); } } } static enum ofperr handle_add_meter(struct ofproto *ofproto, struct ofputil_meter_mod *mm) { ofproto_meter_id provider_meter_id = { UINT32_MAX }; struct meter **meterp = &ofproto->meters[mm->meter.meter_id]; enum ofperr error; if (*meterp) { return OFPERR_OFPMMFC_METER_EXISTS; } error = ofproto->ofproto_class->meter_set(ofproto, &provider_meter_id, &mm->meter); if (!error) { ovs_assert(provider_meter_id.uint32 != UINT32_MAX); *meterp = meter_create(&mm->meter, provider_meter_id); } return 0; } static enum ofperr handle_modify_meter(struct ofproto *ofproto, struct ofputil_meter_mod *mm) { struct meter *meter = ofproto->meters[mm->meter.meter_id]; enum ofperr error; if (!meter) { return OFPERR_OFPMMFC_UNKNOWN_METER; } error = ofproto->ofproto_class->meter_set(ofproto, &meter->provider_meter_id, &mm->meter); ovs_assert(meter->provider_meter_id.uint32 != UINT32_MAX); if (!error) { meter_update(meter, &mm->meter); } return error; } static enum ofperr handle_delete_meter(struct ofconn *ofconn, const struct ofp_header *oh, struct ofputil_meter_mod *mm) OVS_EXCLUDED(ofproto_mutex) { struct ofproto *ofproto = ofconn_get_ofproto(ofconn); uint32_t meter_id = mm->meter.meter_id; struct rule_collection rules; enum ofperr error = 0; uint32_t first, last; if (meter_id == OFPM13_ALL) { first = 1; last = ofproto->meter_features.max_meters; } else { if (!meter_id || meter_id > ofproto->meter_features.max_meters) { return 0; } first = last = meter_id; } /* First delete the rules that use this meter. If any of those rules are * currently being modified, postpone the whole operation until later. */ rule_collection_init(&rules); ovs_mutex_lock(&ofproto_mutex); for (meter_id = first; meter_id <= last; ++meter_id) { struct meter *meter = ofproto->meters[meter_id]; if (meter && !list_is_empty(&meter->rules)) { struct rule *rule; LIST_FOR_EACH (rule, meter_list_node, &meter->rules) { if (rule->pending) { error = OFPROTO_POSTPONE; goto exit; } rule_collection_add(&rules, rule); } } } if (rules.n > 0) { delete_flows__(ofproto, ofconn, oh, &rules, OFPRR_METER_DELETE); } /* Delete the meters. */ meter_delete(ofproto, first, last); exit: ovs_mutex_unlock(&ofproto_mutex); rule_collection_destroy(&rules); return error; } static enum ofperr handle_meter_mod(struct ofconn *ofconn, const struct ofp_header *oh) { struct ofproto *ofproto = ofconn_get_ofproto(ofconn); struct ofputil_meter_mod mm; uint64_t bands_stub[256 / 8]; struct ofpbuf bands; uint32_t meter_id; enum ofperr error; error = reject_slave_controller(ofconn); if (error) { return error; } ofpbuf_use_stub(&bands, bands_stub, sizeof bands_stub); error = ofputil_decode_meter_mod(oh, &mm, &bands); if (error) { goto exit_free_bands; } meter_id = mm.meter.meter_id; if (mm.command != OFPMC13_DELETE) { /* Fails also when meters are not implemented by the provider. */ if (meter_id == 0 || meter_id > OFPM13_MAX) { error = OFPERR_OFPMMFC_INVALID_METER; goto exit_free_bands; } else if (meter_id > ofproto->meter_features.max_meters) { error = OFPERR_OFPMMFC_OUT_OF_METERS; goto exit_free_bands; } if (mm.meter.n_bands > ofproto->meter_features.max_bands) { error = OFPERR_OFPMMFC_OUT_OF_BANDS; goto exit_free_bands; } } switch (mm.command) { case OFPMC13_ADD: error = handle_add_meter(ofproto, &mm); break; case OFPMC13_MODIFY: error = handle_modify_meter(ofproto, &mm); break; case OFPMC13_DELETE: error = handle_delete_meter(ofconn, oh, &mm); break; default: error = OFPERR_OFPMMFC_BAD_COMMAND; break; } exit_free_bands: ofpbuf_uninit(&bands); return error; } static enum ofperr handle_meter_features_request(struct ofconn *ofconn, const struct ofp_header *request) { struct ofproto *ofproto = ofconn_get_ofproto(ofconn); struct ofputil_meter_features features; struct ofpbuf *b; if (ofproto->ofproto_class->meter_get_features) { ofproto->ofproto_class->meter_get_features(ofproto, &features); } else { memset(&features, 0, sizeof features); } b = ofputil_encode_meter_features_reply(&features, request); ofconn_send_reply(ofconn, b); return 0; } static enum ofperr handle_meter_request(struct ofconn *ofconn, const struct ofp_header *request, enum ofptype type) { struct ofproto *ofproto = ofconn_get_ofproto(ofconn); struct list replies; uint64_t bands_stub[256 / 8]; struct ofpbuf bands; uint32_t meter_id, first, last; ofputil_decode_meter_request(request, &meter_id); if (meter_id == OFPM13_ALL) { first = 1; last = ofproto->meter_features.max_meters; } else { if (!meter_id || meter_id > ofproto->meter_features.max_meters || !ofproto->meters[meter_id]) { return OFPERR_OFPMMFC_UNKNOWN_METER; } first = last = meter_id; } ofpbuf_use_stub(&bands, bands_stub, sizeof bands_stub); ofpmp_init(&replies, request); for (meter_id = first; meter_id <= last; ++meter_id) { struct meter *meter = ofproto->meters[meter_id]; if (!meter) { continue; /* Skip non-existing meters. */ } if (type == OFPTYPE_METER_STATS_REQUEST) { struct ofputil_meter_stats stats; stats.meter_id = meter_id; /* Provider sets the packet and byte counts, we do the rest. */ stats.flow_count = list_size(&meter->rules); calc_duration(meter->created, time_msec(), &stats.duration_sec, &stats.duration_nsec); stats.n_bands = meter->n_bands; ofpbuf_clear(&bands); stats.bands = ofpbuf_put_uninit(&bands, meter->n_bands * sizeof *stats.bands); if (!ofproto->ofproto_class->meter_get(ofproto, meter->provider_meter_id, &stats)) { ofputil_append_meter_stats(&replies, &stats); } } else { /* type == OFPTYPE_METER_CONFIG_REQUEST */ struct ofputil_meter_config config; config.meter_id = meter_id; config.flags = meter->flags; config.n_bands = meter->n_bands; config.bands = meter->bands; ofputil_append_meter_config(&replies, &config); } } ofconn_send_replies(ofconn, &replies); ofpbuf_uninit(&bands); return 0; } static enum ofperr handle_openflow__(struct ofconn *ofconn, const struct ofpbuf *msg) OVS_EXCLUDED(ofproto_mutex) { const struct ofp_header *oh = msg->data; enum ofptype type; enum ofperr error; error = ofptype_decode(&type, oh); if (error) { return error; } switch (type) { /* OpenFlow requests. */ case OFPTYPE_ECHO_REQUEST: return handle_echo_request(ofconn, oh); case OFPTYPE_FEATURES_REQUEST: return handle_features_request(ofconn, oh); case OFPTYPE_GET_CONFIG_REQUEST: return handle_get_config_request(ofconn, oh); case OFPTYPE_SET_CONFIG: return handle_set_config(ofconn, oh); case OFPTYPE_PACKET_OUT: return handle_packet_out(ofconn, oh); case OFPTYPE_PORT_MOD: return handle_port_mod(ofconn, oh); case OFPTYPE_FLOW_MOD: return handle_flow_mod(ofconn, oh); case OFPTYPE_METER_MOD: return handle_meter_mod(ofconn, oh); case OFPTYPE_BARRIER_REQUEST: return handle_barrier_request(ofconn, oh); case OFPTYPE_ROLE_REQUEST: return handle_role_request(ofconn, oh); /* OpenFlow replies. */ case OFPTYPE_ECHO_REPLY: return 0; /* Nicira extension requests. */ case OFPTYPE_FLOW_MOD_TABLE_ID: return handle_nxt_flow_mod_table_id(ofconn, oh); case OFPTYPE_SET_FLOW_FORMAT: return handle_nxt_set_flow_format(ofconn, oh); case OFPTYPE_SET_PACKET_IN_FORMAT: return handle_nxt_set_packet_in_format(ofconn, oh); case OFPTYPE_SET_CONTROLLER_ID: return handle_nxt_set_controller_id(ofconn, oh); case OFPTYPE_FLOW_AGE: /* Nothing to do. */ return 0; case OFPTYPE_FLOW_MONITOR_CANCEL: return handle_flow_monitor_cancel(ofconn, oh); case OFPTYPE_SET_ASYNC_CONFIG: return handle_nxt_set_async_config(ofconn, oh); /* Statistics requests. */ case OFPTYPE_DESC_STATS_REQUEST: return handle_desc_stats_request(ofconn, oh); case OFPTYPE_FLOW_STATS_REQUEST: return handle_flow_stats_request(ofconn, oh); case OFPTYPE_AGGREGATE_STATS_REQUEST: return handle_aggregate_stats_request(ofconn, oh); case OFPTYPE_TABLE_STATS_REQUEST: return handle_table_stats_request(ofconn, oh); case OFPTYPE_PORT_STATS_REQUEST: return handle_port_stats_request(ofconn, oh); case OFPTYPE_QUEUE_STATS_REQUEST: return handle_queue_stats_request(ofconn, oh); case OFPTYPE_PORT_DESC_STATS_REQUEST: return handle_port_desc_stats_request(ofconn, oh); case OFPTYPE_FLOW_MONITOR_STATS_REQUEST: return handle_flow_monitor_request(ofconn, oh); case OFPTYPE_METER_STATS_REQUEST: case OFPTYPE_METER_CONFIG_STATS_REQUEST: return handle_meter_request(ofconn, oh, type); case OFPTYPE_METER_FEATURES_STATS_REQUEST: return handle_meter_features_request(ofconn, oh); /* FIXME: Change the following once they are implemented: */ case OFPTYPE_QUEUE_GET_CONFIG_REQUEST: case OFPTYPE_GET_ASYNC_REQUEST: case OFPTYPE_GROUP_STATS_REQUEST: case OFPTYPE_GROUP_DESC_STATS_REQUEST: case OFPTYPE_GROUP_FEATURES_STATS_REQUEST: case OFPTYPE_TABLE_FEATURES_STATS_REQUEST: return OFPERR_OFPBRC_BAD_TYPE; case OFPTYPE_HELLO: case OFPTYPE_ERROR: case OFPTYPE_FEATURES_REPLY: case OFPTYPE_GET_CONFIG_REPLY: case OFPTYPE_PACKET_IN: case OFPTYPE_FLOW_REMOVED: case OFPTYPE_PORT_STATUS: case OFPTYPE_BARRIER_REPLY: case OFPTYPE_QUEUE_GET_CONFIG_REPLY: case OFPTYPE_DESC_STATS_REPLY: case OFPTYPE_FLOW_STATS_REPLY: case OFPTYPE_QUEUE_STATS_REPLY: case OFPTYPE_PORT_STATS_REPLY: case OFPTYPE_TABLE_STATS_REPLY: case OFPTYPE_AGGREGATE_STATS_REPLY: case OFPTYPE_PORT_DESC_STATS_REPLY: case OFPTYPE_ROLE_REPLY: case OFPTYPE_FLOW_MONITOR_PAUSED: case OFPTYPE_FLOW_MONITOR_RESUMED: case OFPTYPE_FLOW_MONITOR_STATS_REPLY: case OFPTYPE_GET_ASYNC_REPLY: case OFPTYPE_GROUP_STATS_REPLY: case OFPTYPE_GROUP_DESC_STATS_REPLY: case OFPTYPE_GROUP_FEATURES_STATS_REPLY: case OFPTYPE_METER_STATS_REPLY: case OFPTYPE_METER_CONFIG_STATS_REPLY: case OFPTYPE_METER_FEATURES_STATS_REPLY: case OFPTYPE_TABLE_FEATURES_STATS_REPLY: default: return OFPERR_OFPBRC_BAD_TYPE; } } static bool handle_openflow(struct ofconn *ofconn, const struct ofpbuf *ofp_msg) OVS_EXCLUDED(ofproto_mutex) { int error = handle_openflow__(ofconn, ofp_msg); if (error && error != OFPROTO_POSTPONE) { ofconn_send_error(ofconn, ofp_msg->data, error); } COVERAGE_INC(ofproto_recv_openflow); return error != OFPROTO_POSTPONE; } /* Asynchronous operations. */ /* Creates and returns a new ofopgroup that is not associated with any * OpenFlow connection. * * The caller should add operations to the returned group with * ofoperation_create() and then submit it with ofopgroup_submit(). */ static struct ofopgroup * ofopgroup_create_unattached(struct ofproto *ofproto) OVS_REQUIRES(ofproto_mutex) { struct ofopgroup *group = xzalloc(sizeof *group); group->ofproto = ofproto; list_init(&group->ofproto_node); list_init(&group->ops); list_init(&group->ofconn_node); return group; } /* Creates and returns a new ofopgroup for 'ofproto'. * * If 'ofconn' is NULL, the new ofopgroup is not associated with any OpenFlow * connection. The 'request' and 'buffer_id' arguments are ignored. * * If 'ofconn' is nonnull, then the new ofopgroup is associated with 'ofconn'. * If the ofopgroup eventually fails, then the error reply will include * 'request'. If the ofopgroup eventually succeeds, then the packet with * buffer id 'buffer_id' on 'ofconn' will be sent by 'ofconn''s ofproto. * * The caller should add operations to the returned group with * ofoperation_create() and then submit it with ofopgroup_submit(). */ static struct ofopgroup * ofopgroup_create(struct ofproto *ofproto, struct ofconn *ofconn, const struct ofp_header *request, uint32_t buffer_id) OVS_REQUIRES(ofproto_mutex) { struct ofopgroup *group = ofopgroup_create_unattached(ofproto); if (ofconn) { size_t request_len = ntohs(request->length); ovs_assert(ofconn_get_ofproto(ofconn) == ofproto); ofconn_add_opgroup(ofconn, &group->ofconn_node); group->ofconn = ofconn; group->request = xmemdup(request, MIN(request_len, 64)); group->buffer_id = buffer_id; } return group; } /* Submits 'group' for processing. * * If 'group' contains no operations (e.g. none were ever added, or all of the * ones that were added completed synchronously), then it is destroyed * immediately. Otherwise it is added to the ofproto's list of pending * groups. */ static void ofopgroup_submit(struct ofopgroup *group) OVS_REQUIRES(ofproto_mutex) { if (!group->n_running) { ofopgroup_complete(group); } else { list_push_back(&group->ofproto->pending, &group->ofproto_node); group->ofproto->n_pending++; } } static void ofopgroup_complete(struct ofopgroup *group) OVS_REQUIRES(ofproto_mutex) { struct ofproto *ofproto = group->ofproto; struct ofconn *abbrev_ofconn; ovs_be32 abbrev_xid; struct ofoperation *op, *next_op; int error; ovs_assert(!group->n_running); error = 0; LIST_FOR_EACH (op, group_node, &group->ops) { if (op->error) { error = op->error; break; } } if (!error && group->ofconn && group->buffer_id != UINT32_MAX) { LIST_FOR_EACH (op, group_node, &group->ops) { if (op->type != OFOPERATION_DELETE) { struct ofpbuf *packet; ofp_port_t in_port; error = ofconn_pktbuf_retrieve(group->ofconn, group->buffer_id, &packet, &in_port); if (packet) { struct rule_execute *re; ovs_assert(!error); ofproto_rule_ref(op->rule); re = xmalloc(sizeof *re); re->rule = op->rule; re->in_port = in_port; re->packet = packet; if (!guarded_list_push_back(&ofproto->rule_executes, &re->list_node, 1024)) { ofproto_rule_unref(op->rule); ofpbuf_delete(re->packet); free(re); } } break; } } } if (!error && !list_is_empty(&group->ofconn_node)) { abbrev_ofconn = group->ofconn; abbrev_xid = group->request->xid; } else { abbrev_ofconn = NULL; abbrev_xid = htonl(0); } LIST_FOR_EACH_SAFE (op, next_op, group_node, &group->ops) { struct rule *rule = op->rule; /* We generally want to report the change to active OpenFlow flow monitors (e.g. NXST_FLOW_MONITOR). There are three exceptions: - The operation failed. - The affected rule is not visible to controllers. - The operation's only effect was to update rule->modified. */ if (!(op->error || ofproto_rule_is_hidden(rule) || (op->type == OFOPERATION_MODIFY && op->actions && rule->flow_cookie == op->flow_cookie))) { /* Check that we can just cast from ofoperation_type to * nx_flow_update_event. */ enum nx_flow_update_event event_type; switch (op->type) { case OFOPERATION_ADD: case OFOPERATION_REPLACE: event_type = NXFME_ADDED; break; case OFOPERATION_DELETE: event_type = NXFME_DELETED; break; case OFOPERATION_MODIFY: event_type = NXFME_MODIFIED; break; default: NOT_REACHED(); } ofmonitor_report(ofproto->connmgr, rule, event_type, op->reason, abbrev_ofconn, abbrev_xid); } rule->pending = NULL; switch (op->type) { case OFOPERATION_ADD: if (!op->error) { uint16_t vid_mask; vid_mask = minimask_get_vid_mask(&rule->cr.match.mask); if (vid_mask == VLAN_VID_MASK) { if (ofproto->vlan_bitmap) { uint16_t vid = miniflow_get_vid(&rule->cr.match.flow); if (!bitmap_is_set(ofproto->vlan_bitmap, vid)) { bitmap_set1(ofproto->vlan_bitmap, vid); ofproto->vlans_changed = true; } } else { ofproto->vlans_changed = true; } } } else { oftable_remove_rule(rule); ofproto_rule_unref(rule); } break; case OFOPERATION_DELETE: ovs_assert(!op->error); ofproto_rule_unref(rule); op->rule = NULL; break; case OFOPERATION_MODIFY: case OFOPERATION_REPLACE: if (!op->error) { long long int now = time_msec(); rule->modified = now; if (op->type == OFOPERATION_REPLACE) { rule->created = rule->used = now; } } else { ofproto_rule_change_cookie(ofproto, rule, op->flow_cookie); ovs_mutex_lock(&rule->mutex); rule->idle_timeout = op->idle_timeout; rule->hard_timeout = op->hard_timeout; ovs_mutex_unlock(&rule->mutex); if (op->actions) { struct rule_actions *old_actions; ovs_mutex_lock(&rule->mutex); old_actions = rule->actions; rule->actions = op->actions; ovs_mutex_unlock(&rule->mutex); op->actions = NULL; rule_actions_unref(old_actions); } rule->send_flow_removed = op->send_flow_removed; } break; default: NOT_REACHED(); } ofoperation_destroy(op); } ofmonitor_flush(ofproto->connmgr); if (!list_is_empty(&group->ofproto_node)) { ovs_assert(ofproto->n_pending > 0); ofproto->n_pending--; list_remove(&group->ofproto_node); } if (!list_is_empty(&group->ofconn_node)) { list_remove(&group->ofconn_node); if (error) { ofconn_send_error(group->ofconn, group->request, error); } connmgr_retry(ofproto->connmgr); } free(group->request); free(group); } /* Initiates a new operation on 'rule', of the specified 'type', within * 'group'. Prior to calling, 'rule' must not have any pending operation. * * For a 'type' of OFOPERATION_DELETE, 'reason' should specify the reason that * the flow is being deleted. For other 'type's, 'reason' is ignored (use 0). * * Returns the newly created ofoperation (which is also available as * rule->pending). */ static struct ofoperation * ofoperation_create(struct ofopgroup *group, struct rule *rule, enum ofoperation_type type, enum ofp_flow_removed_reason reason) OVS_REQUIRES(ofproto_mutex) { struct ofproto *ofproto = group->ofproto; struct ofoperation *op; ovs_assert(!rule->pending); op = rule->pending = xzalloc(sizeof *op); op->group = group; list_push_back(&group->ops, &op->group_node); op->rule = rule; op->type = type; op->reason = reason; op->flow_cookie = rule->flow_cookie; ovs_mutex_lock(&rule->mutex); op->idle_timeout = rule->idle_timeout; op->hard_timeout = rule->hard_timeout; ovs_mutex_unlock(&rule->mutex); op->send_flow_removed = rule->send_flow_removed; group->n_running++; if (type == OFOPERATION_DELETE) { hmap_insert(&ofproto->deletions, &op->hmap_node, cls_rule_hash(&rule->cr, rule->table_id)); } return op; } static void ofoperation_destroy(struct ofoperation *op) OVS_REQUIRES(ofproto_mutex) { struct ofopgroup *group = op->group; if (op->rule) { op->rule->pending = NULL; } if (op->type == OFOPERATION_DELETE) { hmap_remove(&group->ofproto->deletions, &op->hmap_node); } list_remove(&op->group_node); rule_actions_unref(op->actions); free(op); } /* Indicates that 'op' completed with status 'error', which is either 0 to * indicate success or an OpenFlow error code on failure. * * If 'error' is 0, indicating success, the operation will be committed * permanently to the flow table. * * If 'error' is nonzero, then generally the operation will be rolled back: * * - If 'op' is an "add flow" operation, ofproto removes the new rule or * restores the original rule. The caller must have uninitialized any * derived state in the new rule, as in step 5 of in the "Life Cycle" in * ofproto/ofproto-provider.h. ofoperation_complete() performs steps 6 and * and 7 for the new rule, calling its ->rule_dealloc() function. * * - If 'op' is a "modify flow" operation, ofproto restores the original * actions. * * - 'op' must not be a "delete flow" operation. Removing a rule is not * allowed to fail. It must always succeed. * * Please see the large comment in ofproto/ofproto-provider.h titled * "Asynchronous Operation Support" for more information. */ void ofoperation_complete(struct ofoperation *op, enum ofperr error) { struct ofopgroup *group = op->group; ovs_assert(group->n_running > 0); ovs_assert(!error || op->type != OFOPERATION_DELETE); op->error = error; if (!--group->n_running && !list_is_empty(&group->ofproto_node)) { /* This function can be called from ->rule_construct(), in which case * ofproto_mutex is held, or it can be called from ->run(), in which * case ofproto_mutex is not held. But only in the latter case can we * arrive here, so we can safely take ofproto_mutex now. */ ovs_mutex_lock(&ofproto_mutex); ovs_assert(op->rule->pending == op); ofopgroup_complete(group); ovs_mutex_unlock(&ofproto_mutex); } } static uint64_t pick_datapath_id(const struct ofproto *ofproto) { const struct ofport *port; port = ofproto_get_port(ofproto, OFPP_LOCAL); if (port) { uint8_t ea[ETH_ADDR_LEN]; int error; error = netdev_get_etheraddr(port->netdev, ea); if (!error) { return eth_addr_to_uint64(ea); } VLOG_WARN("%s: could not get MAC address for %s (%s)", ofproto->name, netdev_get_name(port->netdev), ovs_strerror(error)); } return ofproto->fallback_dpid; } static uint64_t pick_fallback_dpid(void) { uint8_t ea[ETH_ADDR_LEN]; eth_addr_nicira_random(ea); return eth_addr_to_uint64(ea); } /* Table overflow policy. */ /* Chooses and updates 'rulep' with a rule to evict from 'table'. Sets 'rulep' * to NULL if the table is not configured to evict rules or if the table * contains no evictable rules. (Rules with a readlock on their evict rwlock, * or with no timeouts are not evictable.) */ static bool choose_rule_to_evict(struct oftable *table, struct rule **rulep) OVS_REQUIRES(ofproto_mutex) { struct eviction_group *evg; *rulep = NULL; if (!table->eviction_fields) { return false; } /* In the common case, the outer and inner loops here will each be entered * exactly once: * * - The inner loop normally "return"s in its first iteration. If the * eviction group has any evictable rules, then it always returns in * some iteration. * * - The outer loop only iterates more than once if the largest eviction * group has no evictable rules. * * - The outer loop can exit only if table's 'max_flows' is all filled up * by unevictable rules. */ HEAP_FOR_EACH (evg, size_node, &table->eviction_groups_by_size) { struct rule *rule; HEAP_FOR_EACH (rule, evg_node, &evg->rules) { *rulep = rule; return true; } } return false; } /* Searches 'ofproto' for tables that have more flows than their configured * maximum and that have flow eviction enabled, and evicts as many flows as * necessary and currently feasible from them. * * This triggers only when an OpenFlow table has N flows in it and then the * client configures a maximum number of flows less than N. */ static void ofproto_evict(struct ofproto *ofproto) { struct oftable *table; ovs_mutex_lock(&ofproto_mutex); OFPROTO_FOR_EACH_TABLE (table, ofproto) { evict_rules_from_table(ofproto, table, 0); } ovs_mutex_unlock(&ofproto_mutex); } /* Eviction groups. */ /* Returns the priority to use for an eviction_group that contains 'n_rules' * rules. The priority contains low-order random bits to ensure that eviction * groups with the same number of rules are prioritized randomly. */ static uint32_t eviction_group_priority(size_t n_rules) { uint16_t size = MIN(UINT16_MAX, n_rules); return (size << 16) | random_uint16(); } /* Updates 'evg', an eviction_group within 'table', following a change that * adds or removes rules in 'evg'. */ static void eviction_group_resized(struct oftable *table, struct eviction_group *evg) OVS_REQUIRES(ofproto_mutex) { heap_change(&table->eviction_groups_by_size, &evg->size_node, eviction_group_priority(heap_count(&evg->rules))); } /* Destroys 'evg', an eviction_group within 'table': * * - Removes all the rules, if any, from 'evg'. (It doesn't destroy the * rules themselves, just removes them from the eviction group.) * * - Removes 'evg' from 'table'. * * - Frees 'evg'. */ static void eviction_group_destroy(struct oftable *table, struct eviction_group *evg) OVS_REQUIRES(ofproto_mutex) { while (!heap_is_empty(&evg->rules)) { struct rule *rule; rule = CONTAINER_OF(heap_pop(&evg->rules), struct rule, evg_node); rule->eviction_group = NULL; } hmap_remove(&table->eviction_groups_by_id, &evg->id_node); heap_remove(&table->eviction_groups_by_size, &evg->size_node); heap_destroy(&evg->rules); free(evg); } /* Removes 'rule' from its eviction group, if any. */ static void eviction_group_remove_rule(struct rule *rule) OVS_REQUIRES(ofproto_mutex) { if (rule->eviction_group) { struct oftable *table = &rule->ofproto->tables[rule->table_id]; struct eviction_group *evg = rule->eviction_group; rule->eviction_group = NULL; heap_remove(&evg->rules, &rule->evg_node); if (heap_is_empty(&evg->rules)) { eviction_group_destroy(table, evg); } else { eviction_group_resized(table, evg); } } } /* Hashes the 'rule''s values for the eviction_fields of 'rule''s table, and * returns the hash value. */ static uint32_t eviction_group_hash_rule(struct rule *rule) OVS_REQUIRES(ofproto_mutex) { struct oftable *table = &rule->ofproto->tables[rule->table_id]; const struct mf_subfield *sf; struct flow flow; uint32_t hash; hash = table->eviction_group_id_basis; miniflow_expand(&rule->cr.match.flow, &flow); for (sf = table->eviction_fields; sf < &table->eviction_fields[table->n_eviction_fields]; sf++) { if (mf_are_prereqs_ok(sf->field, &flow)) { union mf_value value; mf_get_value(sf->field, &flow, &value); if (sf->ofs) { bitwise_zero(&value, sf->field->n_bytes, 0, sf->ofs); } if (sf->ofs + sf->n_bits < sf->field->n_bytes * 8) { unsigned int start = sf->ofs + sf->n_bits; bitwise_zero(&value, sf->field->n_bytes, start, sf->field->n_bytes * 8 - start); } hash = hash_bytes(&value, sf->field->n_bytes, hash); } else { hash = hash_int(hash, 0); } } return hash; } /* Returns an eviction group within 'table' with the given 'id', creating one * if necessary. */ static struct eviction_group * eviction_group_find(struct oftable *table, uint32_t id) OVS_REQUIRES(ofproto_mutex) { struct eviction_group *evg; HMAP_FOR_EACH_WITH_HASH (evg, id_node, id, &table->eviction_groups_by_id) { return evg; } evg = xmalloc(sizeof *evg); hmap_insert(&table->eviction_groups_by_id, &evg->id_node, id); heap_insert(&table->eviction_groups_by_size, &evg->size_node, eviction_group_priority(0)); heap_init(&evg->rules); return evg; } /* Returns an eviction priority for 'rule'. The return value should be * interpreted so that higher priorities make a rule more attractive candidates * for eviction. */ static uint32_t rule_eviction_priority(struct rule *rule) OVS_REQUIRES(ofproto_mutex) { long long int hard_expiration; long long int idle_expiration; long long int expiration; uint32_t expiration_offset; /* Calculate time of expiration. */ ovs_mutex_lock(&rule->mutex); hard_expiration = (rule->hard_timeout ? rule->modified + rule->hard_timeout * 1000 : LLONG_MAX); idle_expiration = (rule->idle_timeout ? rule->used + rule->idle_timeout * 1000 : LLONG_MAX); expiration = MIN(hard_expiration, idle_expiration); ovs_mutex_unlock(&rule->mutex); if (expiration == LLONG_MAX) { return 0; } /* Calculate the time of expiration as a number of (approximate) seconds * after program startup. * * This should work OK for program runs that last UINT32_MAX seconds or * less. Therefore, please restart OVS at least once every 136 years. */ expiration_offset = (expiration >> 10) - (time_boot_msec() >> 10); /* Invert the expiration offset because we're using a max-heap. */ return UINT32_MAX - expiration_offset; } /* Adds 'rule' to an appropriate eviction group for its oftable's * configuration. Does nothing if 'rule''s oftable doesn't have eviction * enabled, or if 'rule' is a permanent rule (one that will never expire on its * own). * * The caller must ensure that 'rule' is not already in an eviction group. */ static void eviction_group_add_rule(struct rule *rule) OVS_REQUIRES(ofproto_mutex) { struct ofproto *ofproto = rule->ofproto; struct oftable *table = &ofproto->tables[rule->table_id]; bool has_timeout; ovs_mutex_lock(&rule->mutex); has_timeout = rule->hard_timeout || rule->idle_timeout; ovs_mutex_unlock(&rule->mutex); if (table->eviction_fields && has_timeout) { struct eviction_group *evg; evg = eviction_group_find(table, eviction_group_hash_rule(rule)); rule->eviction_group = evg; heap_insert(&evg->rules, &rule->evg_node, rule_eviction_priority(rule)); eviction_group_resized(table, evg); } } /* oftables. */ /* Initializes 'table'. */ static void oftable_init(struct oftable *table) { memset(table, 0, sizeof *table); classifier_init(&table->cls); table->max_flows = UINT_MAX; } /* Destroys 'table', including its classifier and eviction groups. * * The caller is responsible for freeing 'table' itself. */ static void oftable_destroy(struct oftable *table) { ovs_rwlock_rdlock(&table->cls.rwlock); ovs_assert(classifier_is_empty(&table->cls)); ovs_rwlock_unlock(&table->cls.rwlock); oftable_disable_eviction(table); classifier_destroy(&table->cls); free(table->name); } /* Changes the name of 'table' to 'name'. If 'name' is NULL or the empty * string, then 'table' will use its default name. * * This only affects the name exposed for a table exposed through the OpenFlow * OFPST_TABLE (as printed by "ovs-ofctl dump-tables"). */ static void oftable_set_name(struct oftable *table, const char *name) { if (name && name[0]) { int len = strnlen(name, OFP_MAX_TABLE_NAME_LEN); if (!table->name || strncmp(name, table->name, len)) { free(table->name); table->name = xmemdup0(name, len); } } else { free(table->name); table->name = NULL; } } /* oftables support a choice of two policies when adding a rule would cause the * number of flows in the table to exceed the configured maximum number: either * they can refuse to add the new flow or they can evict some existing flow. * This function configures the former policy on 'table'. */ static void oftable_disable_eviction(struct oftable *table) OVS_REQUIRES(ofproto_mutex) { if (table->eviction_fields) { struct eviction_group *evg, *next; HMAP_FOR_EACH_SAFE (evg, next, id_node, &table->eviction_groups_by_id) { eviction_group_destroy(table, evg); } hmap_destroy(&table->eviction_groups_by_id); heap_destroy(&table->eviction_groups_by_size); free(table->eviction_fields); table->eviction_fields = NULL; table->n_eviction_fields = 0; } } /* oftables support a choice of two policies when adding a rule would cause the * number of flows in the table to exceed the configured maximum number: either * they can refuse to add the new flow or they can evict some existing flow. * This function configures the latter policy on 'table', with fairness based * on the values of the 'n_fields' fields specified in 'fields'. (Specifying * 'n_fields' as 0 disables fairness.) */ static void oftable_enable_eviction(struct oftable *table, const struct mf_subfield *fields, size_t n_fields) OVS_REQUIRES(ofproto_mutex) { struct cls_cursor cursor; struct rule *rule; if (table->eviction_fields && n_fields == table->n_eviction_fields && (!n_fields || !memcmp(fields, table->eviction_fields, n_fields * sizeof *fields))) { /* No change. */ return; } oftable_disable_eviction(table); table->n_eviction_fields = n_fields; table->eviction_fields = xmemdup(fields, n_fields * sizeof *fields); table->eviction_group_id_basis = random_uint32(); hmap_init(&table->eviction_groups_by_id); heap_init(&table->eviction_groups_by_size); ovs_rwlock_rdlock(&table->cls.rwlock); cls_cursor_init(&cursor, &table->cls, NULL); CLS_CURSOR_FOR_EACH (rule, cr, &cursor) { eviction_group_add_rule(rule); } ovs_rwlock_unlock(&table->cls.rwlock); } /* Removes 'rule' from the oftable that contains it. */ static void oftable_remove_rule__(struct ofproto *ofproto, struct rule *rule) OVS_REQUIRES(ofproto_mutex) { struct classifier *cls = &ofproto->tables[rule->table_id].cls; ovs_rwlock_wrlock(&cls->rwlock); classifier_remove(cls, CONST_CAST(struct cls_rule *, &rule->cr)); ovs_rwlock_unlock(&cls->rwlock); cookies_remove(ofproto, rule); eviction_group_remove_rule(rule); if (!list_is_empty(&rule->expirable)) { list_remove(&rule->expirable); } if (!list_is_empty(&rule->meter_list_node)) { list_remove(&rule->meter_list_node); list_init(&rule->meter_list_node); } } static void oftable_remove_rule(struct rule *rule) OVS_REQUIRES(ofproto_mutex) { oftable_remove_rule__(rule->ofproto, rule); } /* Inserts 'rule' into its oftable, which must not already contain any rule for * the same cls_rule. */ static void oftable_insert_rule(struct rule *rule) OVS_REQUIRES(ofproto_mutex) { struct ofproto *ofproto = rule->ofproto; struct oftable *table = &ofproto->tables[rule->table_id]; bool may_expire; ovs_mutex_lock(&rule->mutex); may_expire = rule->hard_timeout || rule->idle_timeout; ovs_mutex_unlock(&rule->mutex); if (may_expire) { list_insert(&ofproto->expirable, &rule->expirable); } cookies_insert(ofproto, rule); if (rule->actions->meter_id) { struct meter *meter = ofproto->meters[rule->actions->meter_id]; list_insert(&meter->rules, &rule->meter_list_node); } ovs_rwlock_wrlock(&table->cls.rwlock); classifier_insert(&table->cls, CONST_CAST(struct cls_rule *, &rule->cr)); ovs_rwlock_unlock(&table->cls.rwlock); eviction_group_add_rule(rule); } /* unixctl commands. */ struct ofproto * ofproto_lookup(const char *name) { struct ofproto *ofproto; HMAP_FOR_EACH_WITH_HASH (ofproto, hmap_node, hash_string(name, 0), &all_ofprotos) { if (!strcmp(ofproto->name, name)) { return ofproto; } } return NULL; } static void ofproto_unixctl_list(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { struct ofproto *ofproto; struct ds results; ds_init(&results); HMAP_FOR_EACH (ofproto, hmap_node, &all_ofprotos) { ds_put_format(&results, "%s\n", ofproto->name); } unixctl_command_reply(conn, ds_cstr(&results)); ds_destroy(&results); } static void ofproto_unixctl_init(void) { static bool registered; if (registered) { return; } registered = true; unixctl_command_register("ofproto/list", "", 0, 0, ofproto_unixctl_list, NULL); } /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) * * This is deprecated. It is only for compatibility with broken device drivers * in old versions of Linux that do not properly support VLANs when VLAN * devices are not used. When broken device drivers are no longer in * widespread use, we will delete these interfaces. */ /* Sets a 1-bit in the 4096-bit 'vlan_bitmap' for each VLAN ID that is matched * (exactly) by an OpenFlow rule in 'ofproto'. */ void ofproto_get_vlan_usage(struct ofproto *ofproto, unsigned long int *vlan_bitmap) { const struct oftable *oftable; free(ofproto->vlan_bitmap); ofproto->vlan_bitmap = bitmap_allocate(4096); ofproto->vlans_changed = false; OFPROTO_FOR_EACH_TABLE (oftable, ofproto) { const struct cls_table *table; ovs_rwlock_rdlock(&oftable->cls.rwlock); HMAP_FOR_EACH (table, hmap_node, &oftable->cls.tables) { if (minimask_get_vid_mask(&table->mask) == VLAN_VID_MASK) { const struct cls_rule *rule; HMAP_FOR_EACH (rule, hmap_node, &table->rules) { uint16_t vid = miniflow_get_vid(&rule->match.flow); bitmap_set1(vlan_bitmap, vid); bitmap_set1(ofproto->vlan_bitmap, vid); } } } ovs_rwlock_unlock(&oftable->cls.rwlock); } } /* Returns true if new VLANs have come into use by the flow table since the * last call to ofproto_get_vlan_usage(). * * We don't track when old VLANs stop being used. */ bool ofproto_has_vlan_usage_changed(const struct ofproto *ofproto) { return ofproto->vlans_changed; } /* Configures a VLAN splinter binding between the ports identified by OpenFlow * port numbers 'vlandev_ofp_port' and 'realdev_ofp_port'. If * 'realdev_ofp_port' is nonzero, then the VLAN device is enslaved to the real * device as a VLAN splinter for VLAN ID 'vid'. If 'realdev_ofp_port' is zero, * then the VLAN device is un-enslaved. */ int ofproto_port_set_realdev(struct ofproto *ofproto, ofp_port_t vlandev_ofp_port, ofp_port_t realdev_ofp_port, int vid) { struct ofport *ofport; int error; ovs_assert(vlandev_ofp_port != realdev_ofp_port); ofport = ofproto_get_port(ofproto, vlandev_ofp_port); if (!ofport) { VLOG_WARN("%s: cannot set realdev on nonexistent port %"PRIu16, ofproto->name, vlandev_ofp_port); return EINVAL; } if (!ofproto->ofproto_class->set_realdev) { if (!vlandev_ofp_port) { return 0; } VLOG_WARN("%s: vlan splinters not supported", ofproto->name); return EOPNOTSUPP; } error = ofproto->ofproto_class->set_realdev(ofport, realdev_ofp_port, vid); if (error) { VLOG_WARN("%s: setting realdev on port %"PRIu16" (%s) failed (%s)", ofproto->name, vlandev_ofp_port, netdev_get_name(ofport->netdev), ovs_strerror(error)); } return error; } openvswitch-2.0.1+git20140120/ofproto/ofproto.h000066400000000000000000000414031226605124000210050ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OFPROTO_H #define OFPROTO_H 1 #include #include #include #include #include #include "cfm.h" #include "flow.h" #include "netflow.h" #include "sset.h" #include "stp.h" #ifdef __cplusplus extern "C" { #endif struct bfd_cfg; struct cfm_settings; struct cls_rule; struct netdev; struct ofproto; struct ofport; struct shash; struct simap; struct smap; struct netdev_stats; struct ofproto_controller_info { bool is_connected; enum ofp12_controller_role role; struct { const char *keys[4]; const char *values[4]; size_t n; } pairs; }; struct ofexpired { struct flow flow; uint64_t packet_count; /* Packets from subrules. */ uint64_t byte_count; /* Bytes from subrules. */ long long int used; /* Last-used time (0 if never used). */ }; struct ofproto_sflow_options { struct sset targets; uint32_t sampling_rate; uint32_t polling_interval; uint32_t header_len; uint32_t sub_id; char *agent_device; char *control_ip; }; struct ofproto_ipfix_bridge_exporter_options { struct sset targets; uint32_t sampling_rate; uint32_t obs_domain_id; /* Bridge-wide Observation Domain ID. */ uint32_t obs_point_id; /* Bridge-wide Observation Point ID. */ uint32_t cache_active_timeout; uint32_t cache_max_flows; }; struct ofproto_ipfix_flow_exporter_options { uint32_t collector_set_id; struct sset targets; uint32_t cache_active_timeout; uint32_t cache_max_flows; }; struct ofproto_stp_settings { stp_identifier system_id; uint16_t priority; uint16_t hello_time; uint16_t max_age; uint16_t fwd_delay; }; struct ofproto_stp_status { bool enabled; /* If false, ignore other members. */ stp_identifier bridge_id; stp_identifier designated_root; int root_path_cost; }; struct ofproto_port_stp_settings { bool enable; uint8_t port_num; /* In the range 1-255, inclusive. */ uint8_t priority; uint16_t path_cost; }; struct ofproto_port_stp_status { bool enabled; /* If false, ignore other members. */ int port_id; enum stp_state state; unsigned int sec_in_state; enum stp_role role; int tx_count; /* Number of BPDUs transmitted. */ int rx_count; /* Number of valid BPDUs received. */ int error_count; /* Number of bad BPDUs received. */ }; struct ofproto_port_queue { uint32_t queue; /* Queue ID. */ uint8_t dscp; /* DSCP bits (e.g. [0, 63]). */ }; /* How the switch should act if the controller cannot be contacted. */ enum ofproto_fail_mode { OFPROTO_FAIL_SECURE, /* Preserve flow table. */ OFPROTO_FAIL_STANDALONE /* Act as a standalone switch. */ }; enum ofproto_band { OFPROTO_IN_BAND, /* In-band connection to controller. */ OFPROTO_OUT_OF_BAND /* Out-of-band connection to controller. */ }; struct ofproto_controller { char *target; /* e.g. "tcp:127.0.0.1" */ int max_backoff; /* Maximum reconnection backoff, in seconds. */ int probe_interval; /* Max idle time before probing, in seconds. */ enum ofproto_band band; /* In-band or out-of-band? */ bool enable_async_msgs; /* Initially enable asynchronous messages? */ /* OpenFlow packet-in rate-limiting. */ int rate_limit; /* Max packet-in rate in packets per second. */ int burst_limit; /* Limit on accumulating packet credits. */ uint8_t dscp; /* DSCP value for controller connection. */ }; void ofproto_enumerate_types(struct sset *types); const char *ofproto_normalize_type(const char *); int ofproto_enumerate_names(const char *type, struct sset *names); void ofproto_parse_name(const char *name, char **dp_name, char **dp_type); /* An interface hint element, which is used by ofproto_init() to * describe the caller's understanding of the startup state. */ struct iface_hint { char *br_name; /* Name of owning bridge. */ char *br_type; /* Type of owning bridge. */ ofp_port_t ofp_port; /* OpenFlow port number. */ }; void ofproto_init(const struct shash *iface_hints); int ofproto_type_run(const char *datapath_type); int ofproto_type_run_fast(const char *datapath_type); void ofproto_type_wait(const char *datapath_type); int ofproto_create(const char *datapath, const char *datapath_type, struct ofproto **ofprotop); void ofproto_destroy(struct ofproto *); int ofproto_delete(const char *name, const char *type); int ofproto_run(struct ofproto *); int ofproto_run_fast(struct ofproto *); void ofproto_wait(struct ofproto *); bool ofproto_is_alive(const struct ofproto *); void ofproto_get_memory_usage(const struct ofproto *, struct simap *); /* A port within an OpenFlow switch. * * 'name' and 'type' are suitable for passing to netdev_open(). */ struct ofproto_port { char *name; /* Network device name, e.g. "eth0". */ char *type; /* Network device type, e.g. "system". */ ofp_port_t ofp_port; /* OpenFlow port number. */ }; void ofproto_port_clone(struct ofproto_port *, const struct ofproto_port *); void ofproto_port_destroy(struct ofproto_port *); struct ofproto_port_dump { const struct ofproto *ofproto; int error; void *state; }; void ofproto_port_dump_start(struct ofproto_port_dump *, const struct ofproto *); bool ofproto_port_dump_next(struct ofproto_port_dump *, struct ofproto_port *); int ofproto_port_dump_done(struct ofproto_port_dump *); /* Iterates through each OFPROTO_PORT in OFPROTO, using DUMP as state. * * Arguments all have pointer type. * * If you break out of the loop, then you need to free the dump structure by * hand using ofproto_port_dump_done(). */ #define OFPROTO_PORT_FOR_EACH(OFPROTO_PORT, DUMP, OFPROTO) \ for (ofproto_port_dump_start(DUMP, OFPROTO); \ (ofproto_port_dump_next(DUMP, OFPROTO_PORT) \ ? true \ : (ofproto_port_dump_done(DUMP), false)); \ ) #define OFPROTO_FLOW_EVICTION_THRESHOLD_DEFAULT 2500 #define OFPROTO_FLOW_EVICTION_THRESHOLD_MIN 100 /* How flow misses should be handled in ofproto-dpif */ enum ofproto_flow_miss_model { OFPROTO_HANDLE_MISS_AUTO, /* Based on flow eviction threshold. */ OFPROTO_HANDLE_MISS_WITH_FACETS, /* Always create facets. */ OFPROTO_HANDLE_MISS_WITHOUT_FACETS /* Always handle without facets.*/ }; const char *ofproto_port_open_type(const char *datapath_type, const char *port_type); int ofproto_port_add(struct ofproto *, struct netdev *, ofp_port_t *ofp_portp); int ofproto_port_del(struct ofproto *, ofp_port_t ofp_port); int ofproto_port_get_stats(const struct ofport *, struct netdev_stats *stats); int ofproto_port_query_by_name(const struct ofproto *, const char *devname, struct ofproto_port *); /* Top-level configuration. */ uint64_t ofproto_get_datapath_id(const struct ofproto *); void ofproto_set_datapath_id(struct ofproto *, uint64_t datapath_id); void ofproto_set_controllers(struct ofproto *, const struct ofproto_controller *, size_t n, uint32_t allowed_versions); void ofproto_set_fail_mode(struct ofproto *, enum ofproto_fail_mode fail_mode); void ofproto_reconnect_controllers(struct ofproto *); void ofproto_set_extra_in_band_remotes(struct ofproto *, const struct sockaddr_in *, size_t n); void ofproto_set_in_band_queue(struct ofproto *, int queue_id); void ofproto_set_flow_eviction_threshold(unsigned threshold); void ofproto_set_flow_miss_model(unsigned model); void ofproto_set_forward_bpdu(struct ofproto *, bool forward_bpdu); void ofproto_set_mac_table_config(struct ofproto *, unsigned idle_time, size_t max_entries); void ofproto_set_n_handler_threads(unsigned limit); void ofproto_set_dp_desc(struct ofproto *, const char *dp_desc); int ofproto_set_snoops(struct ofproto *, const struct sset *snoops); int ofproto_set_netflow(struct ofproto *, const struct netflow_options *nf_options); int ofproto_set_sflow(struct ofproto *, const struct ofproto_sflow_options *); int ofproto_set_ipfix(struct ofproto *, const struct ofproto_ipfix_bridge_exporter_options *, const struct ofproto_ipfix_flow_exporter_options *, size_t); void ofproto_set_flow_restore_wait(bool flow_restore_wait_db); bool ofproto_get_flow_restore_wait(void); int ofproto_set_stp(struct ofproto *, const struct ofproto_stp_settings *); int ofproto_get_stp_status(struct ofproto *, struct ofproto_stp_status *); /* Configuration of ports. */ void ofproto_port_unregister(struct ofproto *, ofp_port_t ofp_port); void ofproto_port_clear_cfm(struct ofproto *, ofp_port_t ofp_port); void ofproto_port_set_cfm(struct ofproto *, ofp_port_t ofp_port, const struct cfm_settings *); void ofproto_port_set_bfd(struct ofproto *, ofp_port_t ofp_port, const struct smap *cfg); int ofproto_port_get_bfd_status(struct ofproto *, ofp_port_t ofp_port, struct smap *); int ofproto_port_is_lacp_current(struct ofproto *, ofp_port_t ofp_port); int ofproto_port_set_stp(struct ofproto *, ofp_port_t ofp_port, const struct ofproto_port_stp_settings *); int ofproto_port_get_stp_status(struct ofproto *, ofp_port_t ofp_port, struct ofproto_port_stp_status *); int ofproto_port_set_queues(struct ofproto *, ofp_port_t ofp_port, const struct ofproto_port_queue *, size_t n_queues); /* The behaviour of the port regarding VLAN handling */ enum port_vlan_mode { /* This port is an access port. 'vlan' is the VLAN ID. 'trunks' is * ignored. */ PORT_VLAN_ACCESS, /* This port is a trunk. 'trunks' is the set of trunks. 'vlan' is * ignored. */ PORT_VLAN_TRUNK, /* Untagged incoming packets are part of 'vlan', as are incoming packets * tagged with 'vlan'. Outgoing packets tagged with 'vlan' stay tagged. * Other VLANs in 'trunks' are trunked. */ PORT_VLAN_NATIVE_TAGGED, /* Untagged incoming packets are part of 'vlan', as are incoming packets * tagged with 'vlan'. Outgoing packets tagged with 'vlan' are untagged. * Other VLANs in 'trunks' are trunked. */ PORT_VLAN_NATIVE_UNTAGGED }; /* Configuration of bundles. */ struct ofproto_bundle_settings { char *name; /* For use in log messages. */ ofp_port_t *slaves; /* OpenFlow port numbers for slaves. */ size_t n_slaves; enum port_vlan_mode vlan_mode; /* Selects mode for vlan and trunks */ int vlan; /* VLAN VID, except for PORT_VLAN_TRUNK. */ unsigned long *trunks; /* vlan_bitmap, except for PORT_VLAN_ACCESS. */ bool use_priority_tags; /* Use 802.1p tag for frames in VLAN 0? */ struct bond_settings *bond; /* Must be nonnull iff if n_slaves > 1. */ struct lacp_settings *lacp; /* Nonnull to enable LACP. */ struct lacp_slave_settings *lacp_slaves; /* Array of n_slaves elements. */ /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) * * This is deprecated. It is only for compatibility with broken device * drivers in old versions of Linux that do not properly support VLANs when * VLAN devices are not used. When broken device drivers are no longer in * widespread use, we will delete these interfaces. */ ofp_port_t realdev_ofp_port;/* OpenFlow port number of real device. */ }; int ofproto_bundle_register(struct ofproto *, void *aux, const struct ofproto_bundle_settings *); int ofproto_bundle_unregister(struct ofproto *, void *aux); /* Configuration of mirrors. */ struct ofproto_mirror_settings { /* Name for log messages. */ char *name; /* Bundles that select packets for mirroring upon ingress. */ void **srcs; /* A set of registered ofbundle handles. */ size_t n_srcs; /* Bundles that select packets for mirroring upon egress. */ void **dsts; /* A set of registered ofbundle handles. */ size_t n_dsts; /* VLANs of packets to select for mirroring. */ unsigned long *src_vlans; /* vlan_bitmap, NULL selects all VLANs. */ /* Output (mutually exclusive). */ void *out_bundle; /* A registered ofbundle handle or NULL. */ uint16_t out_vlan; /* Output VLAN, only if out_bundle is NULL. */ }; int ofproto_mirror_register(struct ofproto *, void *aux, const struct ofproto_mirror_settings *); int ofproto_mirror_unregister(struct ofproto *, void *aux); int ofproto_mirror_get_stats(struct ofproto *, void *aux, uint64_t *packets, uint64_t *bytes); int ofproto_set_flood_vlans(struct ofproto *, unsigned long *flood_vlans); bool ofproto_is_mirror_output_bundle(const struct ofproto *, void *aux); /* Configuration of OpenFlow tables. */ struct ofproto_table_settings { char *name; /* Name exported via OpenFlow or NULL. */ unsigned int max_flows; /* Maximum number of flows or UINT_MAX. */ /* These members determine the handling of an attempt to add a flow that * would cause the table to have more than 'max_flows' flows. * * If 'groups' is NULL, overflows will be rejected with an error. * * If 'groups' is nonnull, an overflow will cause a flow to be removed. * The flow to be removed is chosen to give fairness among groups * distinguished by different values for the subfields within 'groups'. */ struct mf_subfield *groups; size_t n_groups; }; int ofproto_get_n_tables(const struct ofproto *); void ofproto_configure_table(struct ofproto *, int table_id, const struct ofproto_table_settings *); /* Configuration querying. */ bool ofproto_has_snoops(const struct ofproto *); void ofproto_get_snoops(const struct ofproto *, struct sset *); void ofproto_get_all_flows(struct ofproto *p, struct ds *); void ofproto_get_netflow_ids(const struct ofproto *, uint8_t *engine_type, uint8_t *engine_id); void ofproto_get_ofproto_controller_info(const struct ofproto *, struct shash *); void ofproto_free_ofproto_controller_info(struct shash *); /* CFM status query. */ struct ofproto_cfm_status { /* 0 if not faulted, otherwise a combination of one or more reasons. */ enum cfm_fault_reason faults; /* 0 if the remote CFM endpoint is operationally down, * 1 if the remote CFM endpoint is operationally up, * -1 if we don't know because the remote CFM endpoint is not in extended * mode. */ int remote_opstate; /* Ordinarily a "health status" in the range 0...100 inclusive, with 0 * being worst and 100 being best, or -1 if the health status is not * well-defined. */ int health; /* MPIDs of remote maintenance points whose CCMs have been received. */ uint64_t *rmps; size_t n_rmps; }; bool ofproto_port_get_cfm_status(const struct ofproto *, ofp_port_t ofp_port, struct ofproto_cfm_status *); /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) * * This is deprecated. It is only for compatibility with broken device drivers * in old versions of Linux that do not properly support VLANs when VLAN * devices are not used. When broken device drivers are no longer in * widespread use, we will delete these interfaces. */ void ofproto_get_vlan_usage(struct ofproto *, unsigned long int *vlan_bitmap); bool ofproto_has_vlan_usage_changed(const struct ofproto *); int ofproto_port_set_realdev(struct ofproto *, ofp_port_t vlandev_ofp_port, ofp_port_t realdev_ofp_port, int vid); uint32_t ofproto_get_provider_meter_id(const struct ofproto *, uint32_t of_meter_id); #ifdef __cplusplus } #endif #endif /* ofproto.h */ openvswitch-2.0.1+git20140120/ofproto/pinsched.c000066400000000000000000000200451226605124000211040ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "pinsched.h" #include #include #include #include #include #include "flow.h" #include "hash.h" #include "hmap.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "poll-loop.h" #include "random.h" #include "rconn.h" #include "sat-math.h" #include "timeval.h" #include "token-bucket.h" #include "vconn.h" struct pinqueue { struct hmap_node node; /* In struct pinsched's 'queues' hmap. */ ofp_port_t port_no; /* Port number. */ struct list packets; /* Contains "struct ofpbuf"s. */ int n; /* Number of packets in 'packets'. */ }; struct pinsched { struct token_bucket token_bucket; /* One queue per physical port. */ struct hmap queues; /* Contains "struct pinqueue"s. */ int n_queued; /* Sum over queues[*].n. */ struct pinqueue *next_txq; /* Next pinqueue check in round-robin. */ /* Transmission queue. */ int n_txq; /* No. of packets waiting in rconn for tx. */ /* Statistics reporting. */ unsigned long long n_normal; /* # txed w/o rate limit queuing. */ unsigned long long n_limited; /* # queued for rate limiting. */ unsigned long long n_queue_dropped; /* # dropped due to queue overflow. */ }; static void advance_txq(struct pinsched *ps) { struct hmap_node *next; next = (ps->next_txq ? hmap_next(&ps->queues, &ps->next_txq->node) : hmap_first(&ps->queues)); ps->next_txq = next ? CONTAINER_OF(next, struct pinqueue, node) : NULL; } static struct ofpbuf * dequeue_packet(struct pinsched *ps, struct pinqueue *q) { struct ofpbuf *packet = ofpbuf_from_list(list_pop_front(&q->packets)); q->n--; ps->n_queued--; return packet; } static void adjust_limits(int *rate_limit, int *burst_limit) { if (*rate_limit <= 0) { *rate_limit = 1000; } if (*burst_limit <= 0) { *burst_limit = *rate_limit / 4; } if (*burst_limit < 1) { *burst_limit = 1; } } /* Destroys 'q' and removes it from 'ps''s set of queues. * (The caller must ensure that 'q' is empty.) */ static void pinqueue_destroy(struct pinsched *ps, struct pinqueue *q) { hmap_remove(&ps->queues, &q->node); free(q); } static struct pinqueue * pinqueue_get(struct pinsched *ps, ofp_port_t port_no) { uint32_t hash = hash_ofp_port(port_no); struct pinqueue *q; HMAP_FOR_EACH_IN_BUCKET (q, node, hash, &ps->queues) { if (port_no == q->port_no) { return q; } } q = xmalloc(sizeof *q); hmap_insert(&ps->queues, &q->node, hash); q->port_no = port_no; list_init(&q->packets); q->n = 0; return q; } /* Drop a packet from the longest queue in 'ps'. */ static void drop_packet(struct pinsched *ps) { struct pinqueue *longest; /* Queue currently selected as longest. */ int n_longest = 0; /* # of queues of same length as 'longest'. */ struct pinqueue *q; ps->n_queue_dropped++; longest = NULL; HMAP_FOR_EACH (q, node, &ps->queues) { if (!longest || longest->n < q->n) { longest = q; n_longest = 1; } else if (longest->n == q->n) { n_longest++; /* Randomly select one of the longest queues, with a uniform * distribution (Knuth algorithm 3.4.2R). */ if (!random_range(n_longest)) { longest = q; } } } /* FIXME: do we want to pop the tail instead? */ ofpbuf_delete(dequeue_packet(ps, longest)); if (longest->n == 0) { pinqueue_destroy(ps, longest); } } /* Remove and return the next packet to transmit (in round-robin order). */ static struct ofpbuf * get_tx_packet(struct pinsched *ps) { struct ofpbuf *packet; struct pinqueue *q; if (!ps->next_txq) { advance_txq(ps); } q = ps->next_txq; packet = dequeue_packet(ps, q); advance_txq(ps); if (q->n == 0) { pinqueue_destroy(ps, q); } return packet; } /* Attempts to remove enough tokens from 'ps' to transmit a packet. Returns * true if successful, false otherwise. (In the latter case no tokens are * removed.) */ static bool get_token(struct pinsched *ps) { return token_bucket_withdraw(&ps->token_bucket, 1000); } void pinsched_send(struct pinsched *ps, ofp_port_t port_no, struct ofpbuf *packet, pinsched_tx_cb *cb, void *aux) { if (!ps) { cb(packet, aux); } else if (!ps->n_queued && get_token(ps)) { /* In the common case where we are not constrained by the rate limit, * let the packet take the normal path. */ ps->n_normal++; cb(packet, aux); } else { /* Otherwise queue it up for the periodic callback to drain out. */ struct pinqueue *q; /* We might be called with a buffer obtained from dpif_recv() that has * much more allocated space than actual content most of the time. * Since we're going to store the packet for some time, free up that * otherwise wasted space. */ ofpbuf_trim(packet); if (ps->n_queued * 1000 >= ps->token_bucket.burst) { drop_packet(ps); } q = pinqueue_get(ps, port_no); list_push_back(&q->packets, &packet->list_node); q->n++; ps->n_queued++; ps->n_limited++; } } void pinsched_run(struct pinsched *ps, pinsched_tx_cb *cb, void *aux) { if (ps) { int i; /* Drain some packets out of the bucket if possible, but limit the * number of iterations to allow other code to get work done too. */ for (i = 0; ps->n_queued && get_token(ps) && i < 50; i++) { cb(get_tx_packet(ps), aux); } } } void pinsched_wait(struct pinsched *ps) { if (ps && ps->n_queued) { token_bucket_wait(&ps->token_bucket, 1000); } } /* Creates and returns a scheduler for sending packet-in messages. */ struct pinsched * pinsched_create(int rate_limit, int burst_limit) { struct pinsched *ps; ps = xzalloc(sizeof *ps); adjust_limits(&rate_limit, &burst_limit); token_bucket_init(&ps->token_bucket, rate_limit, sat_mul(burst_limit, 1000)); hmap_init(&ps->queues); ps->n_queued = 0; ps->next_txq = NULL; ps->n_txq = 0; ps->n_normal = 0; ps->n_limited = 0; ps->n_queue_dropped = 0; return ps; } void pinsched_destroy(struct pinsched *ps) { if (ps) { struct pinqueue *q, *next; HMAP_FOR_EACH_SAFE (q, next, node, &ps->queues) { hmap_remove(&ps->queues, &q->node); ofpbuf_list_delete(&q->packets); free(q); } hmap_destroy(&ps->queues); free(ps); } } void pinsched_get_limits(const struct pinsched *ps, int *rate_limit, int *burst_limit) { *rate_limit = ps->token_bucket.rate; *burst_limit = ps->token_bucket.burst / 1000; } void pinsched_set_limits(struct pinsched *ps, int rate_limit, int burst_limit) { adjust_limits(&rate_limit, &burst_limit); token_bucket_set(&ps->token_bucket, rate_limit, sat_mul(burst_limit, 1000)); while (ps->n_queued > burst_limit) { drop_packet(ps); } } /* Returns the number of packets scheduled to be sent eventually by 'ps'. * Returns 0 if 'ps' is null. */ unsigned int pinsched_count_txqlen(const struct pinsched *ps) { return ps ? ps->n_txq : 0; } openvswitch-2.0.1+git20140120/ofproto/pinsched.h000066400000000000000000000025641226605124000211170ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef PINSCHED_H #define PINSCHED_H_H 1 #include #include "flow.h" struct ofpbuf; typedef void pinsched_tx_cb(struct ofpbuf *, void *aux); struct pinsched *pinsched_create(int rate_limit, int burst_limit); void pinsched_get_limits(const struct pinsched *, int *rate_limit, int *burst_limit); void pinsched_set_limits(struct pinsched *, int rate_limit, int burst_limit); void pinsched_destroy(struct pinsched *); void pinsched_send(struct pinsched *, ofp_port_t port_no, struct ofpbuf *, pinsched_tx_cb *, void *aux); void pinsched_run(struct pinsched *, pinsched_tx_cb *, void *aux); void pinsched_wait(struct pinsched *); unsigned int pinsched_count_txqlen(const struct pinsched *); #endif /* pinsched.h */ openvswitch-2.0.1+git20140120/ofproto/pktbuf.c000066400000000000000000000170261226605124000206070ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "pktbuf.h" #include #include #include "coverage.h" #include "ofp-util.h" #include "ofpbuf.h" #include "timeval.h" #include "util.h" #include "vconn.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(pktbuf); COVERAGE_DEFINE(pktbuf_buffer_unknown); COVERAGE_DEFINE(pktbuf_null_cookie); COVERAGE_DEFINE(pktbuf_retrieved); COVERAGE_DEFINE(pktbuf_reuse_error); /* Buffers are identified by a 32-bit opaque ID. We divide the ID * into a buffer number (low bits) and a cookie (high bits). The buffer number * is an index into an array of buffers. The cookie distinguishes between * different packets that have occupied a single buffer. Thus, the more * buffers we have, the lower-quality the cookie... */ #define PKTBUF_BITS 8 #define PKTBUF_MASK (PKTBUF_CNT - 1) #define PKTBUF_CNT (1u << PKTBUF_BITS) #define COOKIE_BITS (32 - PKTBUF_BITS) #define COOKIE_MAX ((1u << COOKIE_BITS) - 1) #define OVERWRITE_MSECS 5000 struct packet { struct ofpbuf *buffer; uint32_t cookie; long long int timeout; ofp_port_t in_port; }; struct pktbuf { struct packet packets[PKTBUF_CNT]; unsigned int buffer_idx; unsigned int null_idx; }; int pktbuf_capacity(void) { return PKTBUF_CNT; } struct pktbuf * pktbuf_create(void) { return xzalloc(sizeof *pktbuf_create()); } void pktbuf_destroy(struct pktbuf *pb) { if (pb) { size_t i; for (i = 0; i < PKTBUF_CNT; i++) { ofpbuf_delete(pb->packets[i].buffer); } free(pb); } } static unsigned int make_id(unsigned int buffer_idx, unsigned int cookie) { return buffer_idx | (cookie << PKTBUF_BITS); } /* Attempts to allocate an OpenFlow packet buffer id within 'pb'. The packet * buffer will store a copy of 'buffer_size' bytes in 'buffer' and the port * number 'in_port', which should be the OpenFlow port number on which 'buffer' * was received. * * If successful, returns the packet buffer id (a number other than * UINT32_MAX). pktbuf_retrieve() can later be used to retrieve the buffer and * its input port number (buffers do expire after a time, so this is not * guaranteed to be true forever). On failure, returns UINT32_MAX. * * The caller retains ownership of 'buffer'. */ uint32_t pktbuf_save(struct pktbuf *pb, const void *buffer, size_t buffer_size, ofp_port_t in_port) { struct packet *p = &pb->packets[pb->buffer_idx]; pb->buffer_idx = (pb->buffer_idx + 1) & PKTBUF_MASK; if (p->buffer) { if (time_msec() < p->timeout) { return UINT32_MAX; } ofpbuf_delete(p->buffer); } /* Don't use maximum cookie value since all-1-bits ID is special. */ if (++p->cookie >= COOKIE_MAX) { p->cookie = 0; } /* Use 2 bytes of headroom to 32-bit align the L3 header. */ p->buffer = ofpbuf_clone_data_with_headroom(buffer, buffer_size, 2); p->timeout = time_msec() + OVERWRITE_MSECS; p->in_port = in_port; return make_id(p - pb->packets, p->cookie); } /* * Allocates and returns a "null" packet buffer id. The returned packet buffer * id is considered valid by pktbuf_retrieve(), but it is not associated with * actual buffered data. * * This function is always successful. * * This is useful in one special case: with the current OpenFlow design, the * "fail-open" code cannot always know whether a connection to a controller is * actually valid until it receives a OFPT_PACKET_OUT or OFPT_FLOW_MOD request, * but at that point the packet in question has already been forwarded (since * we are still in "fail-open" mode). If the packet was buffered in the usual * way, then the OFPT_PACKET_OUT or OFPT_FLOW_MOD would cause a duplicate * packet in the network. Null packet buffer ids identify such a packet that * has already been forwarded, so that Open vSwitch can quietly ignore the * request to re-send it. (After that happens, the switch exits fail-open * mode.) * * See the top-level comment in fail-open.c for an overview. */ uint32_t pktbuf_get_null(void) { return make_id(0, COOKIE_MAX); } /* Attempts to retrieve a saved packet with the given 'id' from 'pb'. Returns * 0 if successful, otherwise an OpenFlow error code. * * On success, ordinarily stores the buffered packet in '*bufferp' and the * OpenFlow port number on which the packet was received in '*in_port'. The * caller becomes responsible for freeing the buffer. However, if 'id' * identifies a "null" packet buffer (created with pktbuf_get_null()), stores * NULL in '*bufferp' and OFPP_NONE in '*in_port'. * * 'in_port' may be NULL if the input port is not of interest. * * The L3 header of a returned packet will be 32-bit aligned. * * On failure, stores NULL in in '*bufferp' and UINT16_MAX in '*in_port'. */ enum ofperr pktbuf_retrieve(struct pktbuf *pb, uint32_t id, struct ofpbuf **bufferp, ofp_port_t *in_port) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 20); struct packet *p; enum ofperr error; if (id == UINT32_MAX) { error = 0; goto error; } if (!pb) { VLOG_WARN_RL(&rl, "attempt to send buffered packet via connection " "without buffers"); return OFPERR_OFPBRC_BUFFER_UNKNOWN; } p = &pb->packets[id & PKTBUF_MASK]; if (p->cookie == id >> PKTBUF_BITS) { struct ofpbuf *buffer = p->buffer; if (buffer) { *bufferp = buffer; if (in_port) { *in_port = p->in_port; } p->buffer = NULL; COVERAGE_INC(pktbuf_retrieved); return 0; } else { COVERAGE_INC(pktbuf_reuse_error); VLOG_WARN_RL(&rl, "attempt to reuse buffer %08"PRIx32, id); error = OFPERR_OFPBRC_BUFFER_EMPTY; } } else if (id >> PKTBUF_BITS != COOKIE_MAX) { COVERAGE_INC(pktbuf_buffer_unknown); VLOG_WARN_RL(&rl, "cookie mismatch: %08"PRIx32" != %08"PRIx32, id, (id & PKTBUF_MASK) | (p->cookie << PKTBUF_BITS)); error = OFPERR_OFPBRC_BUFFER_UNKNOWN; } else { COVERAGE_INC(pktbuf_null_cookie); VLOG_INFO_RL(&rl, "Received null cookie %08"PRIx32" (this is normal " "if the switch was recently in fail-open mode)", id); error = 0; } error: *bufferp = NULL; if (in_port) { *in_port = OFPP_NONE; } return error; } void pktbuf_discard(struct pktbuf *pb, uint32_t id) { struct packet *p = &pb->packets[id & PKTBUF_MASK]; if (p->cookie == id >> PKTBUF_BITS) { ofpbuf_delete(p->buffer); p->buffer = NULL; } } /* Returns the number of packets buffered in 'pb'. Returns 0 if 'pb' is * null. */ unsigned int pktbuf_count_packets(const struct pktbuf *pb) { int n = 0; if (pb) { int i; for (i = 0; i < PKTBUF_CNT; i++) { if (pb->packets[i].buffer) { n++; } } } return n; } openvswitch-2.0.1+git20140120/ofproto/pktbuf.h000066400000000000000000000023721226605124000206120ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef PKTBUF_H #define PKTBUF_H 1 #include #include #include "ofp-errors.h" struct pktbuf; struct ofpbuf; int pktbuf_capacity(void); struct pktbuf *pktbuf_create(void); void pktbuf_destroy(struct pktbuf *); uint32_t pktbuf_save(struct pktbuf *, const void *buffer, size_t buffer_size, ofp_port_t in_port); uint32_t pktbuf_get_null(void); enum ofperr pktbuf_retrieve(struct pktbuf *, uint32_t id, struct ofpbuf **bufferp, ofp_port_t *in_port); void pktbuf_discard(struct pktbuf *, uint32_t id); unsigned int pktbuf_count_packets(const struct pktbuf *); #endif /* pktbuf.h */ openvswitch-2.0.1+git20140120/ofproto/tunnel.c000066400000000000000000000377131226605124000206260ustar00rootroot00000000000000/* Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "tunnel.h" #include #include "byte-order.h" #include "dynamic-string.h" #include "hash.h" #include "hmap.h" #include "netdev.h" #include "odp-util.h" #include "packets.h" #include "smap.h" #include "socket-util.h" #include "tunnel.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(tunnel); /* skb mark used for IPsec tunnel packets */ #define IPSEC_MARK 1 struct tnl_match { ovs_be64 in_key; ovs_be32 ip_src; ovs_be32 ip_dst; odp_port_t odp_port; uint32_t pkt_mark; bool in_key_flow; bool ip_src_flow; bool ip_dst_flow; }; struct tnl_port { struct hmap_node ofport_node; struct hmap_node match_node; const struct ofport_dpif *ofport; unsigned int netdev_seq; struct netdev *netdev; struct tnl_match match; }; static struct ovs_rwlock rwlock = OVS_RWLOCK_INITIALIZER; static struct hmap tnl_match_map__ = HMAP_INITIALIZER(&tnl_match_map__); static struct hmap *tnl_match_map OVS_GUARDED_BY(rwlock) = &tnl_match_map__; static struct hmap ofport_map__ = HMAP_INITIALIZER(&ofport_map__); static struct hmap *ofport_map OVS_GUARDED_BY(rwlock) = &ofport_map__; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); static struct vlog_rate_limit dbg_rl = VLOG_RATE_LIMIT_INIT(60, 60); static struct tnl_port *tnl_find(struct tnl_match *) OVS_REQ_RDLOCK(rwlock); static struct tnl_port *tnl_find_exact(struct tnl_match *) OVS_REQ_RDLOCK(rwlock); static struct tnl_port *tnl_find_ofport(const struct ofport_dpif *) OVS_REQ_RDLOCK(rwlock); static uint32_t tnl_hash(struct tnl_match *); static void tnl_match_fmt(const struct tnl_match *, struct ds *); static char *tnl_port_fmt(const struct tnl_port *) OVS_REQ_RDLOCK(rwlock); static void tnl_port_mod_log(const struct tnl_port *, const char *action) OVS_REQ_RDLOCK(rwlock); static const char *tnl_port_get_name(const struct tnl_port *) OVS_REQ_RDLOCK(rwlock); static void tnl_port_del__(const struct ofport_dpif *) OVS_REQ_WRLOCK(rwlock); static bool tnl_port_add__(const struct ofport_dpif *ofport, const struct netdev *netdev, odp_port_t odp_port, bool warn) OVS_REQ_WRLOCK(rwlock) { const struct netdev_tunnel_config *cfg; struct tnl_port *existing_port; struct tnl_port *tnl_port; cfg = netdev_get_tunnel_config(netdev); ovs_assert(cfg); tnl_port = xzalloc(sizeof *tnl_port); tnl_port->ofport = ofport; tnl_port->netdev = netdev_ref(netdev); tnl_port->netdev_seq = netdev_change_seq(tnl_port->netdev); tnl_port->match.in_key = cfg->in_key; tnl_port->match.ip_src = cfg->ip_src; tnl_port->match.ip_dst = cfg->ip_dst; tnl_port->match.ip_src_flow = cfg->ip_src_flow; tnl_port->match.ip_dst_flow = cfg->ip_dst_flow; tnl_port->match.pkt_mark = cfg->ipsec ? IPSEC_MARK : 0; tnl_port->match.in_key_flow = cfg->in_key_flow; tnl_port->match.odp_port = odp_port; existing_port = tnl_find_exact(&tnl_port->match); if (existing_port) { if (warn) { struct ds ds = DS_EMPTY_INITIALIZER; tnl_match_fmt(&tnl_port->match, &ds); VLOG_WARN("%s: attempting to add tunnel port with same config as " "port '%s' (%s)", tnl_port_get_name(tnl_port), tnl_port_get_name(existing_port), ds_cstr(&ds)); ds_destroy(&ds); free(tnl_port); } return false; } hmap_insert(ofport_map, &tnl_port->ofport_node, hash_pointer(ofport, 0)); hmap_insert(tnl_match_map, &tnl_port->match_node, tnl_hash(&tnl_port->match)); tnl_port_mod_log(tnl_port, "adding"); return true; } /* Adds 'ofport' to the module with datapath port number 'odp_port'. 'ofport's * must be added before they can be used by the module. 'ofport' must be a * tunnel. */ void tnl_port_add(const struct ofport_dpif *ofport, const struct netdev *netdev, odp_port_t odp_port) OVS_EXCLUDED(rwlock) { ovs_rwlock_wrlock(&rwlock); tnl_port_add__(ofport, netdev, odp_port, true); ovs_rwlock_unlock(&rwlock); } /* Checks if the tunnel represented by 'ofport' reconfiguration due to changes * in its netdev_tunnel_config. If it does, returns true. Otherwise, returns * false. 'ofport' and 'odp_port' should be the same as would be passed to * tnl_port_add(). */ bool tnl_port_reconfigure(const struct ofport_dpif *ofport, const struct netdev *netdev, odp_port_t odp_port) OVS_EXCLUDED(rwlock) { struct tnl_port *tnl_port; bool changed = false; ovs_rwlock_wrlock(&rwlock); tnl_port = tnl_find_ofport(ofport); if (!tnl_port) { changed = tnl_port_add__(ofport, netdev, odp_port, false); } else if (tnl_port->netdev != netdev || tnl_port->match.odp_port != odp_port || tnl_port->netdev_seq != netdev_change_seq(netdev)) { VLOG_DBG("reconfiguring %s", tnl_port_get_name(tnl_port)); tnl_port_del__(ofport); tnl_port_add__(ofport, netdev, odp_port, true); changed = true; } ovs_rwlock_unlock(&rwlock); return changed; } static void tnl_port_del__(const struct ofport_dpif *ofport) OVS_REQ_WRLOCK(rwlock) { struct tnl_port *tnl_port; if (!ofport) { return; } tnl_port = tnl_find_ofport(ofport); if (tnl_port) { tnl_port_mod_log(tnl_port, "removing"); hmap_remove(tnl_match_map, &tnl_port->match_node); hmap_remove(ofport_map, &tnl_port->ofport_node); netdev_close(tnl_port->netdev); free(tnl_port); } } /* Removes 'ofport' from the module. */ void tnl_port_del(const struct ofport_dpif *ofport) OVS_EXCLUDED(rwlock) { ovs_rwlock_wrlock(&rwlock); tnl_port_del__(ofport); ovs_rwlock_unlock(&rwlock); } /* Looks in the table of tunnels for a tunnel matching the metadata in 'flow'. * Returns the 'ofport' corresponding to the new in_port, or a null pointer if * none is found. * * Callers should verify that 'flow' needs to be received by calling * tnl_port_should_receive() before this function. */ const struct ofport_dpif * tnl_port_receive(const struct flow *flow) OVS_EXCLUDED(rwlock) { char *pre_flow_str = NULL; const struct ofport_dpif *ofport; struct tnl_port *tnl_port; struct tnl_match match; memset(&match, 0, sizeof match); match.odp_port = flow->in_port.odp_port; match.ip_src = flow->tunnel.ip_dst; match.ip_dst = flow->tunnel.ip_src; match.in_key = flow->tunnel.tun_id; match.pkt_mark = flow->pkt_mark; ovs_rwlock_rdlock(&rwlock); tnl_port = tnl_find(&match); ofport = tnl_port ? tnl_port->ofport : NULL; if (!tnl_port) { struct ds ds = DS_EMPTY_INITIALIZER; tnl_match_fmt(&match, &ds); VLOG_WARN_RL(&rl, "receive tunnel port not found (%s)", ds_cstr(&ds)); ds_destroy(&ds); goto out; } if (!VLOG_DROP_DBG(&dbg_rl)) { pre_flow_str = flow_to_string(flow); } if (pre_flow_str) { char *post_flow_str = flow_to_string(flow); char *tnl_str = tnl_port_fmt(tnl_port); VLOG_DBG("flow received\n" "%s" " pre: %s\n" "post: %s", tnl_str, pre_flow_str, post_flow_str); free(tnl_str); free(pre_flow_str); free(post_flow_str); } out: ovs_rwlock_unlock(&rwlock); return ofport; } static bool tnl_ecn_ok(const struct flow *base_flow, struct flow *flow) { if (is_ip_any(base_flow) && (flow->tunnel.ip_tos & IP_ECN_MASK) == IP_ECN_CE) { if ((base_flow->nw_tos & IP_ECN_MASK) == IP_ECN_NOT_ECT) { VLOG_WARN_RL(&rl, "dropping tunnel packet marked ECN CE" " but is not ECN capable"); return false; } else { /* Set the ECN CE value in the tunneled packet. */ flow->nw_tos |= IP_ECN_CE; } } return true; } /* Should be called at the beginning of action translation to initialize * wildcards and perform any actions based on receiving on tunnel port. * * Returns false if the packet must be dropped. */ bool tnl_xlate_init(const struct flow *base_flow, struct flow *flow, struct flow_wildcards *wc) { if (tnl_port_should_receive(flow)) { memset(&wc->masks.tunnel, 0xff, sizeof wc->masks.tunnel); memset(&wc->masks.pkt_mark, 0xff, sizeof wc->masks.pkt_mark); if (!tnl_ecn_ok(base_flow, flow)) { return false; } flow->pkt_mark &= ~IPSEC_MARK; } return true; } /* Given that 'flow' should be output to the ofport corresponding to * 'tnl_port', updates 'flow''s tunnel headers and returns the actual datapath * port that the output should happen on. May return ODPP_NONE if the output * shouldn't occur. */ odp_port_t tnl_port_send(const struct ofport_dpif *ofport, struct flow *flow, struct flow_wildcards *wc) OVS_EXCLUDED(rwlock) { const struct netdev_tunnel_config *cfg; struct tnl_port *tnl_port; char *pre_flow_str = NULL; odp_port_t out_port; ovs_rwlock_rdlock(&rwlock); tnl_port = tnl_find_ofport(ofport); out_port = tnl_port ? tnl_port->match.odp_port : ODPP_NONE; if (!tnl_port) { goto out; } cfg = netdev_get_tunnel_config(tnl_port->netdev); ovs_assert(cfg); if (!VLOG_DROP_DBG(&dbg_rl)) { pre_flow_str = flow_to_string(flow); } if (!cfg->ip_src_flow) { flow->tunnel.ip_src = tnl_port->match.ip_src; } if (!cfg->ip_dst_flow) { flow->tunnel.ip_dst = tnl_port->match.ip_dst; } flow->pkt_mark = tnl_port->match.pkt_mark; if (!cfg->out_key_flow) { flow->tunnel.tun_id = cfg->out_key; } if (cfg->ttl_inherit && is_ip_any(flow)) { wc->masks.nw_ttl = 0xff; flow->tunnel.ip_ttl = flow->nw_ttl; } else { flow->tunnel.ip_ttl = cfg->ttl; } if (cfg->tos_inherit && is_ip_any(flow)) { wc->masks.nw_tos = 0xff; flow->tunnel.ip_tos = flow->nw_tos & IP_DSCP_MASK; } else { flow->tunnel.ip_tos = cfg->tos; } /* ECN fields are always inherited. */ if (is_ip_any(flow)) { wc->masks.nw_tos |= IP_ECN_MASK; } if ((flow->nw_tos & IP_ECN_MASK) == IP_ECN_CE) { flow->tunnel.ip_tos |= IP_ECN_ECT_0; } else { flow->tunnel.ip_tos |= flow->nw_tos & IP_ECN_MASK; } flow->tunnel.flags = (cfg->dont_fragment ? FLOW_TNL_F_DONT_FRAGMENT : 0) | (cfg->csum ? FLOW_TNL_F_CSUM : 0) | (cfg->out_key_present ? FLOW_TNL_F_KEY : 0); if (pre_flow_str) { char *post_flow_str = flow_to_string(flow); char *tnl_str = tnl_port_fmt(tnl_port); VLOG_DBG("flow sent\n" "%s" " pre: %s\n" "post: %s", tnl_str, pre_flow_str, post_flow_str); free(tnl_str); free(pre_flow_str); free(post_flow_str); } out: ovs_rwlock_unlock(&rwlock); return out_port; } static uint32_t tnl_hash(struct tnl_match *match) { BUILD_ASSERT_DECL(sizeof *match % sizeof(uint32_t) == 0); return hash_words((uint32_t *) match, sizeof *match / sizeof(uint32_t), 0); } static struct tnl_port * tnl_find_ofport(const struct ofport_dpif *ofport) OVS_REQ_RDLOCK(rwlock) { struct tnl_port *tnl_port; HMAP_FOR_EACH_IN_BUCKET (tnl_port, ofport_node, hash_pointer(ofport, 0), ofport_map) { if (tnl_port->ofport == ofport) { return tnl_port; } } return NULL; } static struct tnl_port * tnl_find_exact(struct tnl_match *match) OVS_REQ_RDLOCK(rwlock) { struct tnl_port *tnl_port; HMAP_FOR_EACH_WITH_HASH (tnl_port, match_node, tnl_hash(match), tnl_match_map) { if (!memcmp(match, &tnl_port->match, sizeof *match)) { return tnl_port; } } return NULL; } static struct tnl_port * tnl_find(struct tnl_match *match_) OVS_REQ_RDLOCK(rwlock) { struct tnl_match match = *match_; struct tnl_port *tnl_port; /* remote_ip, local_ip, in_key */ tnl_port = tnl_find_exact(&match); if (tnl_port) { return tnl_port; } /* remote_ip, in_key */ match.ip_src = 0; tnl_port = tnl_find_exact(&match); if (tnl_port) { return tnl_port; } match.ip_src = match_->ip_src; /* remote_ip, local_ip */ match.in_key = 0; match.in_key_flow = true; tnl_port = tnl_find_exact(&match); if (tnl_port) { return tnl_port; } /* remote_ip */ match.ip_src = 0; tnl_port = tnl_find_exact(&match); if (tnl_port) { return tnl_port; } /* Flow-based remote */ match.ip_dst = 0; match.ip_dst_flow = true; tnl_port = tnl_find_exact(&match); if (tnl_port) { return tnl_port; } /* Flow-based everything */ match.ip_src_flow = true; tnl_port = tnl_find_exact(&match); if (tnl_port) { return tnl_port; } return NULL; } static void tnl_match_fmt(const struct tnl_match *match, struct ds *ds) OVS_REQ_RDLOCK(rwlock) { if (!match->ip_dst_flow) { ds_put_format(ds, IP_FMT"->"IP_FMT, IP_ARGS(match->ip_src), IP_ARGS(match->ip_dst)); } else if (!match->ip_src_flow) { ds_put_format(ds, IP_FMT"->flow", IP_ARGS(match->ip_src)); } else { ds_put_cstr(ds, "flow->flow"); } if (match->in_key_flow) { ds_put_cstr(ds, ", key=flow"); } else { ds_put_format(ds, ", key=%#"PRIx64, ntohll(match->in_key)); } ds_put_format(ds, ", dp port=%"PRIu32, match->odp_port); ds_put_format(ds, ", pkt mark=%"PRIu32, match->pkt_mark); } static void tnl_port_mod_log(const struct tnl_port *tnl_port, const char *action) OVS_REQ_RDLOCK(rwlock) { if (VLOG_IS_DBG_ENABLED()) { struct ds ds = DS_EMPTY_INITIALIZER; tnl_match_fmt(&tnl_port->match, &ds); VLOG_INFO("%s tunnel port %s (%s)", action, tnl_port_get_name(tnl_port), ds_cstr(&ds)); ds_destroy(&ds); } } static char * tnl_port_fmt(const struct tnl_port *tnl_port) OVS_REQ_RDLOCK(rwlock) { const struct netdev_tunnel_config *cfg = netdev_get_tunnel_config(tnl_port->netdev); struct ds ds = DS_EMPTY_INITIALIZER; ds_put_format(&ds, "port %"PRIu32": %s (%s: ", tnl_port->match.odp_port, tnl_port_get_name(tnl_port), netdev_get_type(tnl_port->netdev)); tnl_match_fmt(&tnl_port->match, &ds); if (cfg->out_key != cfg->in_key || cfg->out_key_present != cfg->in_key_present || cfg->out_key_flow != cfg->in_key_flow) { ds_put_cstr(&ds, ", out_key="); if (!cfg->out_key_present) { ds_put_cstr(&ds, "none"); } else if (cfg->out_key_flow) { ds_put_cstr(&ds, "flow"); } else { ds_put_format(&ds, "%#"PRIx64, ntohll(cfg->out_key)); } } if (cfg->ttl_inherit) { ds_put_cstr(&ds, ", ttl=inherit"); } else { ds_put_format(&ds, ", ttl=%"PRIu8, cfg->ttl); } if (cfg->tos_inherit) { ds_put_cstr(&ds, ", tos=inherit"); } else if (cfg->tos) { ds_put_format(&ds, ", tos=%#"PRIx8, cfg->tos); } if (!cfg->dont_fragment) { ds_put_cstr(&ds, ", df=false"); } if (cfg->csum) { ds_put_cstr(&ds, ", csum=true"); } ds_put_cstr(&ds, ")\n"); return ds_steal_cstr(&ds); } static const char * tnl_port_get_name(const struct tnl_port *tnl_port) OVS_REQ_RDLOCK(rwlock) { return netdev_get_name(tnl_port->netdev); } openvswitch-2.0.1+git20140120/ofproto/tunnel.h000066400000000000000000000031711226605124000206220ustar00rootroot00000000000000/* Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef TUNNEL_H #define TUNNEL_H 1 #include #include #include "flow.h" /* Tunnel port emulation layer. * * These functions emulate tunnel virtual ports based on the outer * header information from the kernel. */ struct ofport_dpif; struct netdev; bool tnl_port_reconfigure(const struct ofport_dpif *, const struct netdev *, odp_port_t); void tnl_port_add(const struct ofport_dpif *, const struct netdev *, odp_port_t odp_port); void tnl_port_del(const struct ofport_dpif *); const struct ofport_dpif *tnl_port_receive(const struct flow *); bool tnl_xlate_init(const struct flow *base_flow, struct flow *flow, struct flow_wildcards *); odp_port_t tnl_port_send(const struct ofport_dpif *, struct flow *, struct flow_wildcards *wc); /* Returns true if 'flow' should be submitted to tnl_port_receive(). */ static inline bool tnl_port_should_receive(const struct flow *flow) { return flow->tunnel.ip_dst != 0; } #endif /* tunnel.h */ openvswitch-2.0.1+git20140120/ovsdb/000077500000000000000000000000001226605124000165675ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/ovsdb/.gitignore000066400000000000000000000001701226605124000205550ustar00rootroot00000000000000/ovsdb-client /ovsdb-client.1 /ovsdb-doc /ovsdb-dot /ovsdb-idlc /ovsdb-server /ovsdb-server.1 /ovsdb-tool /ovsdb-tool.1 openvswitch-2.0.1+git20140120/ovsdb/SPECS000066400000000000000000001301741226605124000173750ustar00rootroot00000000000000 =================================================== Open vSwitch Configuration Database Specification =================================================== Basic Notation -------------- OVSDB uses JSON, as defined by RFC 4627, for its schema format and its wire protocol format. The JSON implementation in Open vSwitch has the following limitations: - Null bytes (\u0000) are not allowed in strings. - Only UTF-8 encoding is supported. (RFC 4627 also mentions UTF-16BE, UTF-16LE, and UTF-32.) - RFC 4627 says that names within a JSON object should be unique. The Open vSwitch JSON parser discards all but the last value for a name that is specified more than once. The descriptions below use the following shorthand notations for JSON values. Additional notation is presented later. A JSON string. Any Unicode string is allowed, as specified by RFC 4627. Implementations may disallow null bytes. A JSON string matching [a-zA-Z_][a-zA-Z0-9_]*. s that begin with _ are reserved to the implementation and may not be used by the user. A JSON string that contains a version number that matches [0-9]+\.[0-9]+\.[0-9]+ A JSON true or false value. A JSON number. A JSON number with an integer value, within a certain range (currently -2**63...+2**63-1). Any JSON value. Any JSON value except null. A JSON object with the following members: "error": required "details": optional The value of the "error" member is a short string, specified in this document, that broadly indicates the class of the error. Most "error" strings are specific to contexts described elsewhere in this document, but the following "error" strings may appear in any context where an is permitted: "error": "resources exhausted" The operation requires more resources (memory, disk, CPU, etc.) than are currently available to the database server. "error": "I/O error" Problems accessing the disk, network, or other required resources prevented the operation from completing. Database implementations may use "error" strings not specified in this document to indicate errors that do not fit into any of the specified categories. Optionally, an may include a "details" member, whose value is a string that describes the error in more detail for the benefit of a human user or administrator. This document does not specify the format or content of the "details" string. An may also have other members that describe the error in more detail. This document does not specify the names or values of these members. Schema Format ------------- An Open vSwitch configuration database consists of a set of tables, each of which has a number of columns and zero or more rows. A schema is represented by , as described below. A JSON object with the following members: "name": required "version": required "cksum": optional "tables": {: , ...} required The "name" identifies the database as a whole. It must be provided to most JSON-RPC requests to identify the database being operated on. The value of "tables" is a JSON object whose names are table names and whose values are s. The "version" reports the version of the database schema. Because this is a recent addition to the schema format, OVSDB permits it to be omitted, but future versions of OVSDB will require it to be present. Open vSwitch semantics for "version" are described in ovs-vswitchd.conf.db(5). The "cksum" optionally reports an implementation-defined checksum for the database schema. A JSON object with the following members: "columns": {: , ...} required "maxRows": optional "isRoot": optional "indexes": [*] optional The value of "columns" is a JSON object whose names are column names and whose values are s. Every table has the following columns whose definitions are not included in the schema: "_uuid": This column, which contains exactly one UUID value, is initialized to a random value by the database engine when it creates a row. It is read-only, and its value never changes during the lifetime of a row. "_version": Like "_uuid", this column contains exactly one UUID value, initialized to a random value by the database engine when it creates a row, and it is read-only. However, its value changes to a new random value whenever any other field in the row changes. Furthermore, its value is ephemeral: when the database is closed and reopened, or when the database process is stopped and then started again, each "_version" also changes to a new random value. If "isRoot" is omitted or specified as false, then any given row in the table may exist only when there is at least one reference to it, with refType "strong", from a different row (in the same table or a different table). This is a "deferred" action: unreferenced rows in the table are deleted just before transaction commit. If "isRoot" is specified as true, then rows in the table exist independent of any references (they can be thought of as part of the "root set" in a garbage collector). For compatibility with schemas created before "isRoot" was introduced, if "isRoot" is omitted or false in every in a given , then every table is part of the root set. If "maxRows" is specified, as a positive integer, it limits the maximum number of rows that may be present in the table. This is a "deferred" constraint, enforced only at transaction commit time (see the "transact" request below). If "maxRows" is not specified, the size of the table is limited only by the resources available to the database server. "maxRows" constraints are enforced after unreferenced rows are deleted from tables with a false "isRoot". If "indexes" is specified, it must be an array of zero or more s. A is an array of one or more strings, each of which names a column. Each is a set of columns whose values, taken together within any given row, must be unique within the table. This is a "deferred" constraint, enforced only at transaction commit time, after unreferenced rows are deleted and dangling weak references are removed. Ephemeral columns may not be part of indexes. A JSON object with the following members: "type": required "ephemeral": optional "mutable": optional The "type" specifies the type of data stored in this column. If "ephemeral" is specified as true, then this column's values are not guaranteed to be durable; they may be lost when the database restarts. A column whose type (either key or value) is a strong reference to a table that is not part of the root set is always durable, regardless of this value. (Otherwise, restarting the database could lose entire rows.) If "mutable" is specified as false, then this column's values may not be modified after they are initially set with the "insert" operation. The type of a database column. Either an or a JSON object that describes the type of a database column, with the following members: "key": required "value": optional "min": optional "max": or "unlimited" optional If "min" or "max" is not specified, each defaults to 1. If "max" is specified as "unlimited", then there is no specified maximum number of elements, although the implementation will enforce some limit. After considering defaults, "min" must be exactly 0 or exactly 1, "max" must be at least 1, and "max" must be greater than or equal to "min". If "min" and "max" are both 1 and "value" is not specified, the type is the scalar type specified by "key". If "min" is not 1 or "max" is not 1, or both, and "value" is not specified, the type is a set of scalar type "key". If "value" is specified, the type is a map from type "key" to type "value". The type of a key or value in a database column. Either an or a JSON object with the following members: "type": required "enum": optional "minInteger": optional, integers only "maxInteger": optional, integers only "minReal": optional, reals only "maxReal": optional, reals only "minLength": optional, strings only "maxLength": optional, strings only "refTable": optional, uuids only "refType": "strong" or "weak" optional, only with "refTable" An by itself is equivalent to a JSON object with a single member "type" whose value is the . "enum" may be specified as a whose type is a set of one or more values specified for the member "type". If "enum" is specified, then the valid values of the are limited to those in the . "enum" is mutually exclusive with the following constraints. If "type" is "integer", then "minInteger" or "maxInteger" or both may also be specified, restricting the valid integer range. If both are specified, then the maxInteger must be greater than or equal to minInteger. If "type" is "real", then "minReal" or "maxReal" or both may also be specified, restricting the valid real range. If both are specified, then the maxReal must be greater than or equal to minReal. If "type" is "string", then "minLength" and "maxLength" or both may be specified, restricting the valid length of value strings. If both are specified, then maxLength must be greater than or equal to minLength. String length is measured in characters (not bytes or UTF-16 code units). If "type" is "uuid", then "refTable", if present, must be the name of a table within this database. If "refTable" is specified, then "refType" may also be specified. If "refTable" is set, the effect depends on "refType": - If "refType" is "strong" or if "refType" is omitted, the allowed UUIDs are limited to UUIDs for rows in the named table. - If "refType" is "weak", then any UUIDs are allowed, but UUIDs that do not correspond to rows in the named table will be automatically deleted. "refTable" constraints are "deferred" constraints: they are enforced only at transaction commit time (see the "transact" request below). The other contraints on are "immediate", enforced immediately by each operation. One of the strings "integer", "real", "boolean", "string", or "uuid", representing the specified scalar type. Wire Protocol ------------- The database wire protocol is implemented in JSON-RPC 1.0. We encourage use of JSON-RPC over stream connections instead of JSON-RPC over HTTP, for these reasons: * JSON-RPC is a peer-to-peer protocol, but HTTP is a client-server protocol, which is a poor match. Thus, JSON-RPC over HTTP requires the client to periodically poll the server to receive server requests. * HTTP is more complicated than stream connections and doesn't provide any corresponding advantage. * The JSON-RPC specification for HTTP transport is incomplete. We are using TCP port 6632 for the database JSON-RPC connection. The database wire protocol consists of the following JSON-RPC methods: list_dbs ........ Request object members: "method": "list_dbs" required "params": [] required "id": required Response object members: "result": [, ...] "error": null "id": same "id" as request This operation retrieves an array whose elements are s that name the databases that can be accessed over this JSON-RPC connection. get_schema .......... Request object members: "method": "get_schema" required "params": [] required "id": required Response object members: "result": "error": null "id": same "id" as request This operation retrieves a that describes hosted database . transact ........ Request object members: "method": "transact" required "params": [, *] required "id": required Response object members: "result": [*] "error": null "id": same "id" as request The "params" array for this method consists of a that identifies the database to which the transaction applies, followed by zero or more JSON objects, each of which represents a single database operation. The "Operations" section below describes the valid operations. The value of "id" must be unique among all in-flight transactions within the current JSON-RPC session. Otherwise, the server may return a JSON-RPC error. The database server executes each of the specified operations in the specified order, except that if an operation fails, then the remaining operations are not executed. The set of operations is executed as a single atomic, consistent, isolated transaction. The transaction is committed only if every operation succeeds. Durability of the commit is not guaranteed unless the "commit" operation, with "durable" set to true, is included in the operation set (see below). Regardless of whether errors occur, the response is always a JSON-RPC response with null "error" and a "result" member that is an array with the same number of elements as "params". Each element of the "result" array corresponds to the same element of the "params" array. The "result" array elements may be interpreted as follows: - A JSON object that does not contain an "error" member indicates that the operation completed successfully. The specific members of the object are specified below in the descriptions of individual operations. Some operations do not produce any results, in which case the object will have no members. - An , which indicates that the operation completed with an error. - A JSON null value indicates that the operation was not attempted because a prior operation failed. In general, "result" contains some number of successful results, possibly followed by an error, in turn followed by enough JSON null values to match the number of elements in "params". There is one exception: if all of the operations succeed, but the results cannot be committed, then "result" will have one more element than "params", with the additional element an . The possible "error" strings include at least the following: "error": "referential integrity violation" When the commit was attempted, a column's value referenced the UUID for a row that did not exist in the table named by the column's key or value "refTable" that has a "refType" of "strong". (This can be caused by inserting a row that references a nonexistent row, by deleting a row that is still referenced by another row, by specifying the UUID for a row in the wrong table, and other ways.) "error": "constraint violation" A column with a key or value "refTable" whose "refType" is "weak" became empty due to deletion(s) caused because the rows that it referenced were deleted (or never existed, if the column's row was inserted within the transaction), and this column is not allowed to be empty because its has a "min" of 1. "error": "constraint violation" The number of rows in a table exceeds the maximum number permitted by the table's "maxRows" value (see ). "error": "constraint violation" Two or more rows in a table had the same values in the columns that comprise an index. "error": "resources exhausted" "error": "I/O error" As described in the definition of above. If "params" contains one or more "wait" operations, then the transaction may take an arbitrary amount of time to complete. The database implementation must be capable of accepting, executing, and replying to other transactions and other JSON-RPC requests while a transaction or transactions containing "wait" operations are outstanding on the same or different JSON-RPC sessions. The section "Notation for the Wire Protocol" below describes additional notation for use with the wire protocol. After that, the "Operations" section describes each operation. cancel ...... Request object members: "method": "cancel" required "params": [the "id" for an outstanding request] required "id": null required Response object members: This JSON-RPC notification instructs the database server to immediately complete or cancel the "transact" request whose "id" is the same as the notification's "params" value. If the "transact" request can be completed immediately, then the server sends a response in the form described for "transact", above. Otherwise, the server sends a JSON-RPC error response of the following form: "result": null "error": "canceled" "id": the request "id" member The "cancel" notification itself has no reply. monitor ....... Request object members: "method": "monitor" required "params": [, , ] required "id": required is an object that maps from a table name to an array of objects. For backward compatibility, a single may be used instead of an array; it is treated as a single-element array. Each is an object with the following members: "columns": [*] optional "select": optional is an object with the following members: "initial": optional "insert": optional "delete": optional "modify": optional Response object members: "result": "error": null "id": same "id" as request This JSON-RPC request enables a client to replicate tables or subsets of tables within database . Each element of specifies a table to be replicated. The JSON-RPC response to the "monitor" includes the initial contents of each table, unless disabled (see below). Afterward, when changes to those tables are committed, the changes are automatically sent to the client using the "update" monitor notification. This monitoring persists until the JSON-RPC session terminates or until the client sends a "monitor_cancel" JSON-RPC request. Each describes how to monitor columns in a table: The circumstances in which an "update" notification is sent for a row within the table are determined by : If "initial" is omitted or true, every row in the table is sent as part of the reply to the "monitor" request. If "insert" is omitted or true, "update" notifications are sent for rows newly inserted into the table. If "delete" is omitted or true, "update" notifications are sent for rows deleted from the table. If "modify" is omitted or true, "update" notifications are sent whenever when a row in the table is modified. The "columns" member specifies the columns whose values are monitored. It must not contain duplicates. If "columns" is omitted, all columns in the table, except for "_uuid", are monitored. If there is more than one in an array of them, then each in the array should specify both "columns" and "select", and the "columns" must be non-overlapping sets. The "result" in the JSON-RPC response to the "monitor" request is a object (see below) that contains the contents of the tables for which "initial" rows are selected. If no tables' initial contents are requested, then "result" is an empty object. update ...... Notification object members: "method": "update" "params": [, ] "id": null The in "params" is the same as the value passed as the in "params" for the "monitor" request. is an object that maps from a table name to a . A is an object that maps from the row's UUID (as a 36-byte string) to a object. A is an object with the following members: "old": present for "delete" and "modify" updates "new": present for "initial", "insert", and "modify" updates This JSON-RPC notification is sent from the server to the client to tell it about changes to a monitored table (or the initial state of a modified table). Each table in which one or more rows has changed (or whose initial view is being presented) is represented in "updates". Each row that has changed (or whose initial view is being presented) is represented in its as a member with its name taken from the row's _uuid member. The corresponding value is a : The "old" member is present for "delete" and "modify" updates. For "delete" updates, each monitored column is included. For "modify" updates, the prior value of each monitored column whose value has changed is included (monitored columns that have not changed are represented in "new"). The "new" member is present for "initial", "insert", and "modify" updates. For "initial" and "insert" updates, each monitored column is included. For "modify" updates, the new value of each monitored column is included. monitor_cancel .............. Request object members: "method": "monitor_cancel" required "params": [] required "id": required Response object members: "result": {} "error": null "id": the request "id" member Cancels the ongoing table monitor request, identified by the in "params" matching the in "params" for an ongoing "monitor" request. No more "update" messages will be sent for this table monitor. lock operations ............... Request object members: "method": "lock", "steal", or "unlock" required "params": [] required "id": required Response object members: "result": {"locked": } for "lock" "result": {"locked": true} for "steal" "result": {} for "unlock" "error": null "id": same "id" as request Performs an operation on a "lock" object. The database server supports an arbitrary number of locks, each of which is identified by a client-defined id (given in "params"). At any given time, each lock may have at most one owner. The locking operation depends on "method": - "lock": The database will assign this client ownership of the lock as soon as it becomes available. When multiple clients request the same lock, they will receive it in first-come, first served order. - "steal": The database immediately assigns this client ownership of the lock. If there is an existing owner, it loses ownership. - "unlock": If the client owns the lock, releases it. If the client is waiting to obtain the lock, cancels the request and stops waiting. (Closing or otherwise disconnecting a database client connection unlocks all of its locks.) For any given lock, the client must alternate "lock" or "steal" operations with "unlock" operations. That is, if the previous operation on a lock was "lock" or "steal", it must be followed by an "unlock" operation, and vice versa. For a "lock" operation, the "locked" member in the response object is true if the lock has already been acquired, false if another client holds the lock and the client's request for it was queued. In the latter case, the client will be notified later with a "locked" message when acquisition succeeds. These requests complete and send a response quickly, without waiting. The "locked" and "stolen" notifications (see below) report asynchronous changes to ownership. The scope of a lock is a database server, not a database hosted by that server. A naming convention, such as "__", can effectively limit the scope of a lock to a particular database. locked ...... Notification object members: "method": "locked" "params": [] "id": null Notifies the client that a "lock" operation that it previously requested has succeeded. The client now owns the lock named in "params". The database server sends this notification after the reply to the corresponding "lock" request (but only if the "locked" member of the response was false), and before the reply to the client's subsequent "unlock" request. stolen ...... Notification object members: "method": "stolen" "params": [] "id": null Notifies the client that owns a lock that another database client has stolen ownership of the lock. The client no longer owns the lock named in "params". The client must still issue an "unlock" request before performing any subsequent "lock" or "steal" operation on the lock. If the client originally obtained the lock through a "lock" request, then it will automatically regain the lock later after the client that stole it releases it. (The database server will send the client a "locked" notification at that point to let it know.) If the client originally obtained the lock through a "steal" request, the database server won't automatically reassign it ownership of the lock when it later becomes available. To regain ownership, the client must "unlock" and then "lock" or "steal" the lock again. echo .... Request object members: "method": "echo" required "params": JSON array with any contents required "id": required Response object members: "result": same as "params" "error": null "id": the request "id" member Both the JSON-RPC client and the server must implement this request. This JSON-RPC request and response can be used to implement connection keepalives, by allowing the server to check that the client is still there or vice versa. Notation for the Wire Protocol ------------------------------ An that names a database. The valid s can be obtained using a "list-db" request. The is taken from the "name" member of . An that names a table. An that names a table column. A JSON object that describes a table row or a subset of a table row. Each member is the name of a table column paired with the of that column. A JSON value that represents the value of a column in a table row, one of , a , or a . A JSON value that represents a scalar value for a column, one of , , , , . Either an , representing a set with exactly one element, or a 2-element JSON array that represents a database set value. The first element of the array must be the string "set" and the second element must be an array of zero or more s giving the values in the set. All of the s must have the same type. A 2-element JSON array that represents a database map value. The first element of the array must be the string "map" and the second element must be an array of zero or more s giving the values in the map. All of the s must have the same key and value types. (JSON objects are not used to represent because JSON only allows string names in an object.) A 2-element JSON array that represents a pair within a database map. The first element is an that represents the key, the second element is an that represents the value. A 2-element JSON array that represents a UUID. The first element of the array must be the string "uuid" and the second element must be a 36-character string giving the UUID in the format described by RFC 4122. For example, the following represents the UUID 550e8400-e29b-41d4-a716-446655440000: ["uuid", "550e8400-e29b-41d4-a716-446655440000"] A 2-element JSON array that represents the UUID of a row inserted in an "insert" operation within the same transaction. The first element of the array must be the string "named-uuid" and the second element should be the specified as the "uuid-name" for an "insert" operation within the same transaction. For example, if an "insert" operation within this transaction specifies a "uuid-name" of "myrow", the following represents the UUID created by that operation: ["named-uuid", "myrow"] A may be used anywhere a is valid. A 3-element JSON array of the form [, , ] that represents a test on a column value. Except as otherwise specified below, must have the same type as . The meaning depends on the type of : integer real must be "<", "<=", "==", "!=", ">=", ">", "includes", or "excludes". The test is true if the column's value satisfies the relation , e.g. if the column has value 1 and is 2, the test is true if is "<", "<=" or "!=", but not otherwise. "includes" is equivalent to "=="; "excludes" is equivalent to "!=". boolean string uuid must be "!=", "==", "includes", or "excludes". If is "==" or "includes", the test is true if the column's value equals . If is "!=" or "excludes", the test is inverted. set map must be "!=", "==", "includes", or "excludes". If is "==", the test is true if the column's value contains exactly the same values (for sets) or pairs (for maps). If is "!=", the test is inverted. If is "includes", the test is true if the column's value contains all of the values (for sets) or pairs (for maps) in . The column's value may also contain other values or pairs. If is "excludes", the test is true if the column's value does not contain any of the values (for sets) or pairs (for maps) in . The column's value may contain other values or pairs not in . If is "includes" or "excludes", then the required type of is slightly relaxed, in that it may have fewer than the minimum number of elements specified by the column's type. If is "excludes", then the required type is additionally relaxed in that may have more than the maximum number of elements specified by the column's type. One of "<", "<=", "==", "!=", ">=", ">", "includes", "excludes". A 3-element JSON array of the form [, , ] that represents a change to a column value. Except as otherwise specified below, must have the same type as . The meaning depends on the type of : integer real must be "+=", "-=", "*=", "/=" or (integer only) "%=". The value of is changed to the sum, difference, product, quotient, or remainder, respectively, of and . Constraints on are ignored when parsing . boolean string uuid No valid s are currently defined for these types. set Any valid for the set's element type may be applied to the set, in which case the mutation is applied to each member of the set individually. must be a scalar value of the same type as the set's element type, except that contraints are ignored. If is "insert", then each of the values in the set in is added to if it is not already present. The required type of is slightly relaxed, in that it may have fewer than the minimum number of elements specified by the column's type. If is "delete", then each of the values in the set in is removed from if it is present there. The required type is slightly relaxed in that may have more or less than the maximum number of elements specified by the column's type. map must be "insert" or "delete". If is "insert", then each of the key-value pairs in the map in is added to only if its key is not already present. The required type of is slightly relaxed, in that it may have fewer than the minimum number of elements specified by the column's type. If is "delete", then may have the same type as (a map type) or it may be a set whose element type is the same as 's key type: - If is a map, the mutation deletes each key-value pair in whose key and value equal one of the key-value pairs in . - If is a set, the mutation deletes each key-value pair in whose key equals one of the values in . For "delete", may have any number of elements, regardless of restrictions on the number of elements in . One of "+=", "-=", "*=", "/=", "%=", "insert", "delete". Operations ---------- Each of the available operations is described below. insert ...... Request object members: "op": "insert" required "table":
required "row": required "uuid-name": optional Result object members: "uuid": Semantics: Inserts "row" into "table". If "row" does not specify values for all the columns in "table", those columns receive default values. The default value for a column depends on its type. The default for a column whose specifies a "min" of 0 is an empty set or empty map. Otherwise, the default is a single value or a single key-value pair, whose value(s) depend on its : - "integer" or "real": 0 - "boolean": false - "string": "" (the empty string) - "uuid": 00000000-0000-0000-0000-000000000000 The new row receives a new, randomly generated UUID. If "uuid-name" is supplied, then it is an error if is not unique among the "uuid-name"s supplied on all the "insert" operations within this transaction. The UUID for the new row is returned as the "uuid" member of the result. Errors: "error": "duplicate uuid-name" The same "uuid-name" appears on another "insert" operation within this transaction. "error": "constraint violation" One of the values in "row" does not satisfy the immediate constraints for its column's . This error will occur for columns that are not explicitly set by "row" if the default value does not satisfy the column's constraints. select ...... Request object members: "op": "select" required "table":
required "where": [*] required "columns": [*] optional Result object members: "rows": [*] Semantics: Searches "table" for rows that match all the conditions specified in "where". If "where" is an empty array, every row in "table" is selected. The "rows" member of the result is an array of objects. Each object corresponds to a matching row, with each column specified in "columns" as a member, the column's name as the member name and its value as the member value. If "columns" is not specified, all the table's columns are included. If two rows of the result have the same values for all included columns, only one copy of that row is included in "rows". Specifying "_uuid" within "columns" will avoid dropping duplicates, since every row has a unique UUID. The ordering of rows within "rows" is unspecified. update ...... Request object members: "op": "update" required "table":
required "where": [*] required "row": required Result object members: "count": Semantics: Updates rows in a table. Searches "table" for rows that match all the conditions specified in "where". For each matching row, changes the value of each column specified in "row" to the value for that column specified in "row". The "_uuid" and "_version" columns of a table may not be directly updated with this operation. Columns designated read-only in the schema also may not be updated. The "count" member of the result specifies the number of rows that matched. Errors: "error": "constraint violation" One of the values in "row" does not satisfy the immediate constraints for its column's . mutate ...... Request object members: "op": "mutate" required "table":
required "where": [*] required "mutations": [*] required Result object members: "count": Semantics: Mutates rows in a table. Searches "table" for rows that match all the conditions specified in "where". For each matching row, mutates its columns as specified by each in "mutations", in the order specified. The "_uuid" and "_version" columns of a table may not be directly modified with this operation. Columns designated read-only in the schema also may not be updated. The "count" member of the result specifies the number of rows that matched. Errors: "error": "domain error" The result of the mutation is not mathematically defined, e.g. division by zero. "error": "range error" The result of the mutation is not representable within the database's format, e.g. an integer result outside the range INT64_MIN...INT64_MAX or a real result outside the range -DBL_MAX...DBL_MAX. "error": "constraint violation" The mutation caused the column's value to violate a constraint, e.g. it caused a column to have more or fewer values than are allowed, an arithmetic operation caused a set or map to have duplicate elements, or it violated a constraint specified by a column's . delete ...... Request object members: "op": "delete" required "table":
required "where": [*] required Result object members: "count": Semantics: Deletes all the rows from "table" that match all the conditions specified in "where". The "count" member of the result specifies the number of deleted rows. wait .... Request object members: "op": "wait" required "timeout": optional "table":
required "where": [*] required "columns": [*] required "until": "==" or "!=" required "rows": [*] required Result object members: none Semantics: Waits until a condition becomes true. If "until" is "==", checks whether the query on "table" specified by "where" and "columns", which is evaluated in the same way as specified for "select", returns the result set specified by "rows". If it does, then the operation completes successfully. Otherwise, the entire transaction rolls back. It is automatically restarted later, after a change in the database makes it possible for the operation to succeed. The client will not receive a response until the operation permanently succeeds or fails. If "until" is "!=", the sense of the test is negated. That is, as long as the query on "table" specified by "where" and "columns" returns "rows", the transaction will be rolled back and restarted later. If "timeout" is specified, then the transaction aborts after the specified number of milliseconds. The transaction is guaranteed to be attempted at least once before it aborts. A "timeout" of 0 will abort the transaction on the first mismatch. Errors: "error": "not supported" One or more of the columns in this table do not support triggers. This error will not occur if "timeout" is 0. "error": "timed out" The "timeout" was reached before the transaction was able to complete. commit ...... Request object members: "op": "commit" required "durable": required Result object members: none Semantics: If "durable" is specified as true, then the transaction, if it commits, will be stored durably (to disk) before the reply is sent to the client. Errors: "error": "not supported" When "durable" is true, this database implementation does not support durable commits. abort ..... Request object members: "op": "abort" required Result object members: (never succeeds) Semantics: Aborts the transaction with an error. This may be useful for testing. Errors: "error": "aborted" This operation always fails with this error. comment ....... Request object members: "op": "comment" required "comment": required Result object members: none Semantics: Provides information to a database administrator on the purpose of a transaction. The OVSDB server, for example, adds comments in transactions that modify the database to the database journal. assert ...... Request object members: "op": "assert" required "lock": required Result object members: none Semantics: If the client does not own the lock named , aborts the transaction. Errors: "error": "not owner" The client does not own the named lock. openvswitch-2.0.1+git20140120/ovsdb/automake.mk000066400000000000000000000053231226605124000207310ustar00rootroot00000000000000# libovsdb noinst_LIBRARIES += ovsdb/libovsdb.a ovsdb_libovsdb_a_SOURCES = \ ovsdb/column.c \ ovsdb/column.h \ ovsdb/condition.c \ ovsdb/condition.h \ ovsdb/execution.c \ ovsdb/file.c \ ovsdb/file.h \ ovsdb/jsonrpc-server.c \ ovsdb/jsonrpc-server.h \ ovsdb/log.c \ ovsdb/log.h \ ovsdb/mutation.c \ ovsdb/mutation.h \ ovsdb/ovsdb-server.c \ ovsdb/ovsdb.c \ ovsdb/ovsdb.h \ ovsdb/query.c \ ovsdb/query.h \ ovsdb/row.c \ ovsdb/row.h \ ovsdb/server.c \ ovsdb/server.h \ ovsdb/table.c \ ovsdb/table.h \ ovsdb/trigger.c \ ovsdb/trigger.h \ ovsdb/transaction.c \ ovsdb/transaction.h MAN_FRAGMENTS += \ ovsdb/remote-active.man \ ovsdb/remote-passive.man # ovsdb-tool bin_PROGRAMS += ovsdb/ovsdb-tool ovsdb_ovsdb_tool_SOURCES = ovsdb/ovsdb-tool.c ovsdb_ovsdb_tool_LDADD = ovsdb/libovsdb.a lib/libopenvswitch.a $(SSL_LIBS) # ovsdb-tool.1 man_MANS += ovsdb/ovsdb-tool.1 DISTCLEANFILES += ovsdb/ovsdb-tool.1 MAN_ROOTS += ovsdb/ovsdb-tool.1.in # ovsdb-client bin_PROGRAMS += ovsdb/ovsdb-client ovsdb_ovsdb_client_SOURCES = ovsdb/ovsdb-client.c ovsdb_ovsdb_client_LDADD = ovsdb/libovsdb.a lib/libopenvswitch.a $(SSL_LIBS) # ovsdb-client.1 man_MANS += ovsdb/ovsdb-client.1 DISTCLEANFILES += ovsdb/ovsdb-client.1 MAN_ROOTS += ovsdb/ovsdb-client.1.in # ovsdb-server sbin_PROGRAMS += ovsdb/ovsdb-server ovsdb_ovsdb_server_SOURCES = ovsdb/ovsdb-server.c ovsdb_ovsdb_server_LDADD = ovsdb/libovsdb.a lib/libopenvswitch.a $(SSL_LIBS) # ovsdb-server.1 man_MANS += ovsdb/ovsdb-server.1 DISTCLEANFILES += ovsdb/ovsdb-server.1 MAN_ROOTS += ovsdb/ovsdb-server.1.in # ovsdb-idlc EXTRA_DIST += ovsdb/SPECS noinst_SCRIPTS += ovsdb/ovsdb-idlc EXTRA_DIST += ovsdb/ovsdb-idlc.in MAN_ROOTS += ovsdb/ovsdb-idlc.1 DISTCLEANFILES += ovsdb/ovsdb-idlc SUFFIXES += .ovsidl .ovsschema OVSDB_IDLC = $(run_python) $(srcdir)/ovsdb/ovsdb-idlc.in .ovsidl.c: $(OVSDB_IDLC) c-idl-source $< > $@.tmp mv $@.tmp $@ .ovsidl.h: $(OVSDB_IDLC) c-idl-header $< > $@.tmp mv $@.tmp $@ EXTRA_DIST += $(OVSIDL_BUILT) BUILT_SOURCES += $(OVSIDL_BUILT) # This must be done late: macros in targets are expanded when the # target line is read, so if this file were to be included before some # other file that added to OVSIDL_BUILT, then those files wouldn't get # the dependency. # # However, current versions of Automake seem to output all variable # assignments before any targets, so it doesn't seem to be a problem, # at least for now. $(OVSIDL_BUILT): ovsdb/ovsdb-idlc.in # ovsdb-doc EXTRA_DIST += ovsdb/ovsdb-doc OVSDB_DOC = $(run_python) $(srcdir)/ovsdb/ovsdb-doc # ovsdb-dot EXTRA_DIST += ovsdb/ovsdb-dot.in ovsdb/dot2pic noinst_SCRIPTS += ovsdb/ovsdb-dot DISTCLEANFILES += ovsdb/ovsdb-dot OVSDB_DOT = $(run_python) $(srcdir)/ovsdb/ovsdb-dot.in include ovsdb/ovsdbmonitor/automake.mk openvswitch-2.0.1+git20140120/ovsdb/column.c000066400000000000000000000175741226605124000202460ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ovsdb/column.h" #include #include "column.h" #include "dynamic-string.h" #include "json.h" #include "ovsdb-error.h" #include "ovsdb-parser.h" #include "table.h" #include "util.h" struct ovsdb_column * ovsdb_column_create(const char *name, bool mutable, bool persistent, const struct ovsdb_type *type) { /* Doesn't set the new column's 'index': the caller must do that. */ struct ovsdb_column *column; column = xzalloc(sizeof *column); column->name = xstrdup(name); column->mutable = mutable; column->persistent = persistent; ovsdb_type_clone(&column->type, type); return column; } struct ovsdb_column * ovsdb_column_clone(const struct ovsdb_column *old) { /* Doesn't copy the column's 'index': the caller must do that. */ return ovsdb_column_create(old->name, old->mutable, old->persistent, &old->type); } void ovsdb_column_destroy(struct ovsdb_column *column) { ovsdb_type_destroy(&column->type); free(column->name); free(column); } struct ovsdb_error * ovsdb_column_from_json(const struct json *json, const char *name, struct ovsdb_column **columnp) { const struct json *mutable, *ephemeral, *type_json; struct ovsdb_error *error; struct ovsdb_type type; struct ovsdb_parser parser; bool persistent; *columnp = NULL; ovsdb_parser_init(&parser, json, "schema for column %s", name); mutable = ovsdb_parser_member(&parser, "mutable", OP_TRUE | OP_FALSE | OP_OPTIONAL); ephemeral = ovsdb_parser_member(&parser, "ephemeral", OP_TRUE | OP_FALSE | OP_OPTIONAL); type_json = ovsdb_parser_member(&parser, "type", OP_STRING | OP_OBJECT); error = ovsdb_parser_finish(&parser); if (error) { return error; } error = ovsdb_type_from_json(&type, type_json); if (error) { return error; } persistent = ephemeral ? !json_boolean(ephemeral) : true; *columnp = ovsdb_column_create(name, mutable ? json_boolean(mutable) : true, persistent, &type); ovsdb_type_destroy(&type); return NULL; } struct json * ovsdb_column_to_json(const struct ovsdb_column *column) { struct json *json = json_object_create(); if (!column->mutable) { json_object_put(json, "mutable", json_boolean_create(false)); } if (!column->persistent) { json_object_put(json, "ephemeral", json_boolean_create(true)); } json_object_put(json, "type", ovsdb_type_to_json(&column->type)); return json; } void ovsdb_column_set_init(struct ovsdb_column_set *set) { set->columns = NULL; set->n_columns = set->allocated_columns = 0; } void ovsdb_column_set_destroy(struct ovsdb_column_set *set) { free(set->columns); } void ovsdb_column_set_clone(struct ovsdb_column_set *new, const struct ovsdb_column_set *old) { new->columns = xmemdup(old->columns, old->n_columns * sizeof *old->columns); new->n_columns = new->allocated_columns = old->n_columns; } struct ovsdb_error * ovsdb_column_set_from_json(const struct json *json, const struct ovsdb_table_schema *schema, struct ovsdb_column_set *set) { ovsdb_column_set_init(set); if (!json) { struct shash_node *node; SHASH_FOR_EACH (node, &schema->columns) { const struct ovsdb_column *column = node->data; ovsdb_column_set_add(set, column); } return NULL; } else { struct ovsdb_error *error = NULL; size_t i; if (json->type != JSON_ARRAY) { goto error; } /* XXX this is O(n**2) */ for (i = 0; i < json->u.array.n; i++) { const struct ovsdb_column *column; const char *s; if (json->u.array.elems[i]->type != JSON_STRING) { goto error; } s = json->u.array.elems[i]->u.string; column = shash_find_data(&schema->columns, s); if (!column) { error = ovsdb_syntax_error(json, NULL, "%s is not a valid " "column name", s); goto error; } else if (ovsdb_column_set_contains(set, column->index)) { goto error; } ovsdb_column_set_add(set, column); } return NULL; error: ovsdb_column_set_destroy(set); ovsdb_column_set_init(set); if (!error) { error = ovsdb_syntax_error(json, NULL, "array of distinct column " "names expected"); } return error; } } struct json * ovsdb_column_set_to_json(const struct ovsdb_column_set *set) { struct json *json; size_t i; json = json_array_create_empty(); for (i = 0; i < set->n_columns; i++) { json_array_add(json, json_string_create(set->columns[i]->name)); } return json; } /* Returns an English string listing the contents of 'set', e.g. "columns * \"a\", \"b\", and \"c\"". The caller must free the string. */ char * ovsdb_column_set_to_string(const struct ovsdb_column_set *set) { if (!set->n_columns) { return xstrdup("no columns"); } else { struct ds s; size_t i; ds_init(&s); ds_put_format(&s, "column%s ", set->n_columns > 1 ? "s" : ""); for (i = 0; i < set->n_columns; i++) { const char *delimiter = english_list_delimiter(i, set->n_columns); ds_put_format(&s, "%s\"%s\"", delimiter, set->columns[i]->name); } return ds_steal_cstr(&s); } } void ovsdb_column_set_add(struct ovsdb_column_set *set, const struct ovsdb_column *column) { if (set->n_columns >= set->allocated_columns) { set->columns = x2nrealloc(set->columns, &set->allocated_columns, sizeof *set->columns); } set->columns[set->n_columns++] = column; } void ovsdb_column_set_add_all(struct ovsdb_column_set *set, const struct ovsdb_table *table) { struct shash_node *node; SHASH_FOR_EACH (node, &table->schema->columns) { const struct ovsdb_column *column = node->data; ovsdb_column_set_add(set, column); } } bool ovsdb_column_set_contains(const struct ovsdb_column_set *set, unsigned int column_index) { size_t i; for (i = 0; i < set->n_columns; i++) { if (set->columns[i]->index == column_index) { return true; } } return false; } /* This comparison is sensitive to ordering of columns within a set, but that's * good: the only existing caller wants to make sure that hash values are * comparable, which is only true if column ordering is the same. */ bool ovsdb_column_set_equals(const struct ovsdb_column_set *a, const struct ovsdb_column_set *b) { size_t i; if (a->n_columns != b->n_columns) { return false; } for (i = 0; i < a->n_columns; i++) { if (a->columns[i] != b->columns[i]) { return false; } } return true; } openvswitch-2.0.1+git20140120/ovsdb/column.h000066400000000000000000000060441226605124000202410ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_COLUMN_H #define OVSDB_COLUMN_H 1 #include #include "compiler.h" #include "ovsdb-types.h" struct ovsdb_table; struct ovsdb_table_schema; /* A column or a column schema (currently there is no distinction). */ struct ovsdb_column { unsigned int index; char *name; bool mutable; bool persistent; struct ovsdb_type type; }; /* A few columns appear in every table with standardized column indexes. * These macros define those columns' indexes. * * Don't change these values, because ovsdb_query() depends on OVSDB_COL_UUID * having value 0. */ enum { OVSDB_COL_UUID = 0, /* UUID for the row. */ OVSDB_COL_VERSION = 1, /* Version number for the row. */ OVSDB_N_STD_COLUMNS }; struct ovsdb_column *ovsdb_column_create( const char *name, bool mutable, bool persistent, const struct ovsdb_type *); struct ovsdb_column *ovsdb_column_clone(const struct ovsdb_column *); void ovsdb_column_destroy(struct ovsdb_column *); struct ovsdb_error *ovsdb_column_from_json(const struct json *, const char *name, struct ovsdb_column **) WARN_UNUSED_RESULT; struct json *ovsdb_column_to_json(const struct ovsdb_column *); /* An unordered set of distinct columns. */ struct ovsdb_column_set { const struct ovsdb_column **columns; size_t n_columns, allocated_columns; }; #define OVSDB_COLUMN_SET_INITIALIZER { NULL, 0, 0 } void ovsdb_column_set_init(struct ovsdb_column_set *); void ovsdb_column_set_destroy(struct ovsdb_column_set *); void ovsdb_column_set_clone(struct ovsdb_column_set *, const struct ovsdb_column_set *); struct ovsdb_error *ovsdb_column_set_from_json( const struct json *, const struct ovsdb_table_schema *, struct ovsdb_column_set *); struct json *ovsdb_column_set_to_json(const struct ovsdb_column_set *); char *ovsdb_column_set_to_string(const struct ovsdb_column_set *); void ovsdb_column_set_add(struct ovsdb_column_set *, const struct ovsdb_column *); void ovsdb_column_set_add_all(struct ovsdb_column_set *, const struct ovsdb_table *); bool ovsdb_column_set_contains(const struct ovsdb_column_set *, unsigned int column_index); bool ovsdb_column_set_equals(const struct ovsdb_column_set *, const struct ovsdb_column_set *); #endif /* column.h */ openvswitch-2.0.1+git20140120/ovsdb/condition.c000066400000000000000000000213041226605124000207210ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "condition.h" #include #include "column.h" #include "json.h" #include "ovsdb-error.h" #include "row.h" #include "table.h" struct ovsdb_error * ovsdb_function_from_string(const char *name, enum ovsdb_function *function) { #define OVSDB_FUNCTION(ENUM, NAME) \ if (!strcmp(name, NAME)) { \ *function = ENUM; \ return NULL; \ } OVSDB_FUNCTIONS; #undef OVSDB_FUNCTION return ovsdb_syntax_error(NULL, "unknown function", "No function named %s.", name); } const char * ovsdb_function_to_string(enum ovsdb_function function) { switch (function) { #define OVSDB_FUNCTION(ENUM, NAME) case ENUM: return NAME; OVSDB_FUNCTIONS; #undef OVSDB_FUNCTION } return NULL; } static WARN_UNUSED_RESULT struct ovsdb_error * ovsdb_clause_from_json(const struct ovsdb_table_schema *ts, const struct json *json, struct ovsdb_symbol_table *symtab, struct ovsdb_clause *clause) { const struct json_array *array; struct ovsdb_error *error; const char *function_name; const char *column_name; struct ovsdb_type type; if (json->type != JSON_ARRAY || json->u.array.n != 3 || json->u.array.elems[0]->type != JSON_STRING || json->u.array.elems[1]->type != JSON_STRING) { return ovsdb_syntax_error(json, NULL, "Parse error in condition."); } array = json_array(json); column_name = json_string(array->elems[0]); clause->column = ovsdb_table_schema_get_column(ts, column_name); if (!clause->column) { return ovsdb_syntax_error(json, "unknown column", "No column %s in table %s.", column_name, ts->name); } type = clause->column->type; function_name = json_string(array->elems[1]); error = ovsdb_function_from_string(function_name, &clause->function); if (error) { return error; } /* Type-check and relax restrictions on 'type' if appropriate. */ switch (clause->function) { case OVSDB_F_LT: case OVSDB_F_LE: case OVSDB_F_GT: case OVSDB_F_GE: /* XXX should we also allow these operators for types with n_min == 0, * n_max == 1? (They would always be "false" if the value was * missing.) */ if (!ovsdb_type_is_scalar(&type) || (type.key.type != OVSDB_TYPE_INTEGER && type.key.type != OVSDB_TYPE_REAL)) { char *s = ovsdb_type_to_english(&type); error = ovsdb_syntax_error( json, NULL, "Type mismatch: \"%s\" operator may not be " "applied to column %s of type %s.", ovsdb_function_to_string(clause->function), clause->column->name, s); free(s); return error; } break; case OVSDB_F_EQ: case OVSDB_F_NE: break; case OVSDB_F_EXCLUDES: if (!ovsdb_type_is_scalar(&type)) { type.n_min = 0; type.n_max = UINT_MAX; } break; case OVSDB_F_INCLUDES: if (!ovsdb_type_is_scalar(&type)) { type.n_min = 0; } break; } return ovsdb_datum_from_json(&clause->arg, &type, array->elems[2], symtab); } static void ovsdb_clause_free(struct ovsdb_clause *clause) { ovsdb_datum_destroy(&clause->arg, &clause->column->type); } static int compare_clauses_3way(const void *a_, const void *b_) { const struct ovsdb_clause *a = a_; const struct ovsdb_clause *b = b_; if (a->function != b->function) { /* Bring functions to the front based on the fraction of table rows * that they are (heuristically) expected to leave in the query * results. Note that "enum ovsdb_function" is intentionally ordered * to make this trivial. */ return a->function < b->function ? -1 : 1; } else if (a->column->index != b->column->index) { if (a->column->index < OVSDB_N_STD_COLUMNS || b->column->index < OVSDB_N_STD_COLUMNS) { /* Bring the standard columns and in particular the UUID column * (since OVSDB_COL_UUID has value 0) to the front. We have an * index on the UUID column, so that makes our queries cheaper. */ return a->column->index < b->column->index ? -1 : 1; } else { /* Order clauses predictably to make testing easier. */ return strcmp(a->column->name, b->column->name); } } else { return 0; } } struct ovsdb_error * ovsdb_condition_from_json(const struct ovsdb_table_schema *ts, const struct json *json, struct ovsdb_symbol_table *symtab, struct ovsdb_condition *cnd) { const struct json_array *array = json_array(json); size_t i; cnd->clauses = xmalloc(array->n * sizeof *cnd->clauses); cnd->n_clauses = 0; for (i = 0; i < array->n; i++) { struct ovsdb_error *error; error = ovsdb_clause_from_json(ts, array->elems[i], symtab, &cnd->clauses[i]); if (error) { ovsdb_condition_destroy(cnd); cnd->clauses = NULL; cnd->n_clauses = 0; return error; } cnd->n_clauses++; } /* A real database would have a query optimizer here. */ qsort(cnd->clauses, cnd->n_clauses, sizeof *cnd->clauses, compare_clauses_3way); return NULL; } static struct json * ovsdb_clause_to_json(const struct ovsdb_clause *clause) { return json_array_create_3( json_string_create(clause->column->name), json_string_create(ovsdb_function_to_string(clause->function)), ovsdb_datum_to_json(&clause->arg, &clause->column->type)); } struct json * ovsdb_condition_to_json(const struct ovsdb_condition *cnd) { struct json **clauses; size_t i; clauses = xmalloc(cnd->n_clauses * sizeof *clauses); for (i = 0; i < cnd->n_clauses; i++) { clauses[i] = ovsdb_clause_to_json(&cnd->clauses[i]); } return json_array_create(clauses, cnd->n_clauses); } static bool ovsdb_clause_evaluate(const struct ovsdb_row *row, const struct ovsdb_clause *c) { const struct ovsdb_datum *field = &row->fields[c->column->index]; const struct ovsdb_datum *arg = &c->arg; const struct ovsdb_type *type = &c->column->type; if (ovsdb_type_is_scalar(type)) { int cmp = ovsdb_atom_compare_3way(&field->keys[0], &arg->keys[0], type->key.type); switch (c->function) { case OVSDB_F_LT: return cmp < 0; case OVSDB_F_LE: return cmp <= 0; case OVSDB_F_EQ: case OVSDB_F_INCLUDES: return cmp == 0; case OVSDB_F_NE: case OVSDB_F_EXCLUDES: return cmp != 0; case OVSDB_F_GE: return cmp >= 0; case OVSDB_F_GT: return cmp > 0; } } else { switch (c->function) { case OVSDB_F_EQ: return ovsdb_datum_equals(field, arg, type); case OVSDB_F_NE: return !ovsdb_datum_equals(field, arg, type); case OVSDB_F_INCLUDES: return ovsdb_datum_includes_all(arg, field, type); case OVSDB_F_EXCLUDES: return ovsdb_datum_excludes_all(arg, field, type); case OVSDB_F_LT: case OVSDB_F_LE: case OVSDB_F_GE: case OVSDB_F_GT: NOT_REACHED(); } } NOT_REACHED(); } bool ovsdb_condition_evaluate(const struct ovsdb_row *row, const struct ovsdb_condition *cnd) { size_t i; for (i = 0; i < cnd->n_clauses; i++) { if (!ovsdb_clause_evaluate(row, &cnd->clauses[i])) { return false; } } return true; } void ovsdb_condition_destroy(struct ovsdb_condition *cnd) { size_t i; for (i = 0; i < cnd->n_clauses; i++) { ovsdb_clause_free(&cnd->clauses[i]); } free(cnd->clauses); } openvswitch-2.0.1+git20140120/ovsdb/condition.h000066400000000000000000000046401226605124000207320ustar00rootroot00000000000000/* Copyright (c) 2009, 2010 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_CONDITION_H #define OVSDB_CONDITION_H 1 #include #include "compiler.h" #include "ovsdb-data.h" struct json; struct ovsdb_table_schema; struct ovsdb_row; /* These list is ordered in ascending order of the fraction of tables row that * they are (heuristically) expected to leave in query results. */ #define OVSDB_FUNCTIONS \ OVSDB_FUNCTION(OVSDB_F_EQ, "==") \ OVSDB_FUNCTION(OVSDB_F_INCLUDES, "includes") \ OVSDB_FUNCTION(OVSDB_F_LE, "<=") \ OVSDB_FUNCTION(OVSDB_F_LT, "<") \ OVSDB_FUNCTION(OVSDB_F_GE, ">=") \ OVSDB_FUNCTION(OVSDB_F_GT, ">") \ OVSDB_FUNCTION(OVSDB_F_EXCLUDES, "excludes") \ OVSDB_FUNCTION(OVSDB_F_NE, "!=") enum ovsdb_function { #define OVSDB_FUNCTION(ENUM, NAME) ENUM, OVSDB_FUNCTIONS #undef OVSDB_FUNCTION }; struct ovsdb_error *ovsdb_function_from_string(const char *, enum ovsdb_function *) WARN_UNUSED_RESULT; const char *ovsdb_function_to_string(enum ovsdb_function); struct ovsdb_clause { enum ovsdb_function function; const struct ovsdb_column *column; struct ovsdb_datum arg; }; struct ovsdb_condition { struct ovsdb_clause *clauses; size_t n_clauses; }; #define OVSDB_CONDITION_INITIALIZER { NULL, 0 } struct ovsdb_error *ovsdb_condition_from_json( const struct ovsdb_table_schema *, const struct json *, struct ovsdb_symbol_table *, struct ovsdb_condition *) WARN_UNUSED_RESULT; struct json *ovsdb_condition_to_json(const struct ovsdb_condition *); void ovsdb_condition_destroy(struct ovsdb_condition *); bool ovsdb_condition_evaluate(const struct ovsdb_row *, const struct ovsdb_condition *); #endif /* ovsdb/condition.h */ openvswitch-2.0.1+git20140120/ovsdb/dot2pic000077500000000000000000000044561226605124000200720ustar00rootroot00000000000000#! /usr/bin/perl # Copyright (c) 2009, 2010, 2011 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. use strict; use warnings; my ($scale) = 1; print ".PS\n"; print "linethick = 1;\n"; while (<>) { if (/^graph/) { (undef, $scale) = split; } elsif (/^node/) { my (undef, $name, $x, $y, $width, $height, $label, $style, $shape, $color, $fillcolor) = split; $x *= $scale; $y *= $scale; $width *= $scale; $height *= $scale; print "linethick = ", ($style eq 'bold' ? 0.5 : 1.0), ";\n"; print "box at $x,$y wid $width height $height \"$name\"\n"; if ($style eq 'bold') { my $inset = 2.0 / 72.0; $width -= $inset * 2; $height -= $inset * 2; print "box at $x,$y wid $width height $height\n"; } } elsif (/edge/) { my (undef, $tail, $head, $n, $rest) = split(' ', $_, 5); my @xy; for (1...$n) { my ($x, $y); ($x, $y, $rest) = split(' ', $rest, 3); push(@xy, [$x * $scale, $y * $scale]); } my ($label, $xl, $yl); if (scalar(my @junk = split(' ', $rest)) > 2) { if ($rest =~ s/^"([^"]*)"\s+//) { $label = $1; } else { ($label, $rest) = split(' ', $rest, 2); } ($xl, $yl, $rest) = split(' ', $rest, 3); $xl *= $scale; $yl *= $scale; } my ($style, $color) = split(' ', $rest); print "linethick = ", ($style eq 'dotted' ? 0.5 : 1), ";\n"; print "spline -> from $xy[0][0],$xy[0][1]"; for (my ($i) = 0; $i <= $#xy; $i++) { print " to $xy[$i][0],$xy[$i][1]"; } print "\n"; print "\"$label\" at $xl,$yl\n" if defined($label); } } print ".PE\n"; openvswitch-2.0.1+git20140120/ovsdb/execution.c000066400000000000000000000574241226605124000207520ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include "column.h" #include "condition.h" #include "file.h" #include "json.h" #include "mutation.h" #include "ovsdb-data.h" #include "ovsdb-error.h" #include "ovsdb-parser.h" #include "ovsdb.h" #include "query.h" #include "row.h" #include "server.h" #include "table.h" #include "timeval.h" #include "transaction.h" struct ovsdb_execution { struct ovsdb *db; const struct ovsdb_session *session; struct ovsdb_txn *txn; struct ovsdb_symbol_table *symtab; bool durable; /* Triggers. */ long long int elapsed_msec; long long int timeout_msec; }; typedef struct ovsdb_error *ovsdb_operation_executor(struct ovsdb_execution *, struct ovsdb_parser *, struct json *result); static ovsdb_operation_executor ovsdb_execute_insert; static ovsdb_operation_executor ovsdb_execute_select; static ovsdb_operation_executor ovsdb_execute_update; static ovsdb_operation_executor ovsdb_execute_mutate; static ovsdb_operation_executor ovsdb_execute_delete; static ovsdb_operation_executor ovsdb_execute_wait; static ovsdb_operation_executor ovsdb_execute_commit; static ovsdb_operation_executor ovsdb_execute_abort; static ovsdb_operation_executor ovsdb_execute_comment; static ovsdb_operation_executor ovsdb_execute_assert; static ovsdb_operation_executor * lookup_executor(const char *name) { struct ovsdb_operation { const char *name; ovsdb_operation_executor *executor; }; static const struct ovsdb_operation operations[] = { { "insert", ovsdb_execute_insert }, { "select", ovsdb_execute_select }, { "update", ovsdb_execute_update }, { "mutate", ovsdb_execute_mutate }, { "delete", ovsdb_execute_delete }, { "wait", ovsdb_execute_wait }, { "commit", ovsdb_execute_commit }, { "abort", ovsdb_execute_abort }, { "comment", ovsdb_execute_comment }, { "assert", ovsdb_execute_assert }, }; size_t i; for (i = 0; i < ARRAY_SIZE(operations); i++) { const struct ovsdb_operation *c = &operations[i]; if (!strcmp(c->name, name)) { return c->executor; } } return NULL; } struct json * ovsdb_execute(struct ovsdb *db, const struct ovsdb_session *session, const struct json *params, long long int elapsed_msec, long long int *timeout_msec) { struct ovsdb_execution x; struct ovsdb_error *error; struct json *results; size_t n_operations; size_t i; if (params->type != JSON_ARRAY || !params->u.array.n || params->u.array.elems[0]->type != JSON_STRING || strcmp(params->u.array.elems[0]->u.string, db->schema->name)) { if (params->type != JSON_ARRAY) { error = ovsdb_syntax_error(params, NULL, "array expected"); } else { error = ovsdb_syntax_error(params, NULL, "database name expected " "as first parameter"); } results = ovsdb_error_to_json(error); ovsdb_error_destroy(error); return results; } x.db = db; x.session = session; x.txn = ovsdb_txn_create(db); x.symtab = ovsdb_symbol_table_create(); x.durable = false; x.elapsed_msec = elapsed_msec; x.timeout_msec = LLONG_MAX; results = NULL; results = json_array_create_empty(); n_operations = params->u.array.n - 1; error = NULL; for (i = 1; i <= n_operations; i++) { struct json *operation = params->u.array.elems[i]; struct ovsdb_error *parse_error; struct ovsdb_parser parser; struct json *result; const struct json *op; /* Parse and execute operation. */ ovsdb_parser_init(&parser, operation, "ovsdb operation %zu of %zu", i, n_operations); op = ovsdb_parser_member(&parser, "op", OP_ID); result = json_object_create(); if (op) { const char *op_name = json_string(op); ovsdb_operation_executor *executor = lookup_executor(op_name); if (executor) { error = executor(&x, &parser, result); } else { ovsdb_parser_raise_error(&parser, "No operation \"%s\"", op_name); } } else { ovs_assert(ovsdb_parser_has_error(&parser)); } /* A parse error overrides any other error. * An error overrides any other result. */ parse_error = ovsdb_parser_finish(&parser); if (parse_error) { ovsdb_error_destroy(error); error = parse_error; } if (error) { json_destroy(result); result = ovsdb_error_to_json(error); } if (error && !strcmp(ovsdb_error_get_tag(error), "not supported") && timeout_msec) { ovsdb_txn_abort(x.txn); *timeout_msec = x.timeout_msec; json_destroy(result); json_destroy(results); results = NULL; goto exit; } /* Add result to array. */ json_array_add(results, result); if (error) { break; } } if (!error) { error = ovsdb_txn_commit(x.txn, x.durable); if (error) { json_array_add(results, ovsdb_error_to_json(error)); } } else { ovsdb_txn_abort(x.txn); } while (json_array(results)->n < n_operations) { json_array_add(results, json_null_create()); } exit: ovsdb_error_destroy(error); ovsdb_symbol_table_destroy(x.symtab); return results; } static struct ovsdb_error * ovsdb_execute_commit(struct ovsdb_execution *x, struct ovsdb_parser *parser, struct json *result OVS_UNUSED) { const struct json *durable; durable = ovsdb_parser_member(parser, "durable", OP_BOOLEAN); if (durable && json_boolean(durable)) { x->durable = true; } return NULL; } static struct ovsdb_error * ovsdb_execute_abort(struct ovsdb_execution *x OVS_UNUSED, struct ovsdb_parser *parser OVS_UNUSED, struct json *result OVS_UNUSED) { return ovsdb_error("aborted", "aborted by request"); } static struct ovsdb_table * parse_table(struct ovsdb_execution *x, struct ovsdb_parser *parser, const char *member) { struct ovsdb_table *table; const char *table_name; const struct json *json; json = ovsdb_parser_member(parser, member, OP_ID); if (!json) { return NULL; } table_name = json_string(json); table = shash_find_data(&x->db->tables, table_name); if (!table) { ovsdb_parser_raise_error(parser, "No table named %s.", table_name); } return table; } static WARN_UNUSED_RESULT struct ovsdb_error * parse_row(const struct json *json, const struct ovsdb_table *table, struct ovsdb_symbol_table *symtab, struct ovsdb_row **rowp, struct ovsdb_column_set *columns) { struct ovsdb_error *error; struct ovsdb_row *row; *rowp = NULL; if (!table) { return OVSDB_BUG("null table"); } if (!json) { return OVSDB_BUG("null row"); } row = ovsdb_row_create(table); error = ovsdb_row_from_json(row, json, symtab, columns); if (error) { ovsdb_row_destroy(row); return error; } else { *rowp = row; return NULL; } } static struct ovsdb_error * ovsdb_execute_insert(struct ovsdb_execution *x, struct ovsdb_parser *parser, struct json *result) { struct ovsdb_table *table; struct ovsdb_row *row = NULL; const struct json *uuid_name, *row_json; struct ovsdb_error *error; struct uuid row_uuid; table = parse_table(x, parser, "table"); uuid_name = ovsdb_parser_member(parser, "uuid-name", OP_ID | OP_OPTIONAL); row_json = ovsdb_parser_member(parser, "row", OP_OBJECT); error = ovsdb_parser_get_error(parser); if (error) { return error; } if (uuid_name) { struct ovsdb_symbol *symbol; symbol = ovsdb_symbol_table_insert(x->symtab, json_string(uuid_name)); if (symbol->created) { return ovsdb_syntax_error(uuid_name, "duplicate uuid-name", "This \"uuid-name\" appeared on an " "earlier \"insert\" operation."); } row_uuid = symbol->uuid; symbol->created = true; } else { uuid_generate(&row_uuid); } if (!error) { error = parse_row(row_json, table, x->symtab, &row, NULL); } if (!error) { /* Check constraints for columns not included in "row", in case the * default values do not satisfy the constraints. We could check only * the columns that have their default values by supplying an * ovsdb_column_set to parse_row() above, but I suspect that this is * cheaper. */ const struct shash_node *node; SHASH_FOR_EACH (node, &table->schema->columns) { const struct ovsdb_column *column = node->data; const struct ovsdb_datum *datum = &row->fields[column->index]; /* If there are 0 keys or pairs, there's nothing to check. * If there is 1, it might be a default value. * If there are more, it can't be a default value, so the value has * already been checked. */ if (datum->n == 1) { error = ovsdb_datum_check_constraints(datum, &column->type); if (error) { ovsdb_row_destroy(row); break; } } } } if (!error) { *ovsdb_row_get_uuid_rw(row) = row_uuid; ovsdb_txn_row_insert(x->txn, row); json_object_put(result, "uuid", ovsdb_datum_to_json(&row->fields[OVSDB_COL_UUID], &ovsdb_type_uuid)); } return error; } static struct ovsdb_error * ovsdb_execute_select(struct ovsdb_execution *x, struct ovsdb_parser *parser, struct json *result) { struct ovsdb_table *table; const struct json *where, *columns_json, *sort_json; struct ovsdb_condition condition = OVSDB_CONDITION_INITIALIZER; struct ovsdb_column_set columns = OVSDB_COLUMN_SET_INITIALIZER; struct ovsdb_column_set sort = OVSDB_COLUMN_SET_INITIALIZER; struct ovsdb_error *error; table = parse_table(x, parser, "table"); where = ovsdb_parser_member(parser, "where", OP_ARRAY); columns_json = ovsdb_parser_member(parser, "columns", OP_ARRAY | OP_OPTIONAL); sort_json = ovsdb_parser_member(parser, "sort", OP_ARRAY | OP_OPTIONAL); error = ovsdb_parser_get_error(parser); if (!error) { error = ovsdb_condition_from_json(table->schema, where, x->symtab, &condition); } if (!error) { error = ovsdb_column_set_from_json(columns_json, table->schema, &columns); } if (!error) { error = ovsdb_column_set_from_json(sort_json, table->schema, &sort); } if (!error) { struct ovsdb_row_set rows = OVSDB_ROW_SET_INITIALIZER; ovsdb_query_distinct(table, &condition, &columns, &rows); ovsdb_row_set_sort(&rows, &sort); json_object_put(result, "rows", ovsdb_row_set_to_json(&rows, &columns)); ovsdb_row_set_destroy(&rows); } ovsdb_column_set_destroy(&columns); ovsdb_column_set_destroy(&sort); ovsdb_condition_destroy(&condition); return error; } struct update_row_cbdata { size_t n_matches; struct ovsdb_txn *txn; const struct ovsdb_row *row; const struct ovsdb_column_set *columns; }; static bool update_row_cb(const struct ovsdb_row *row, void *ur_) { struct update_row_cbdata *ur = ur_; ur->n_matches++; if (!ovsdb_row_equal_columns(row, ur->row, ur->columns)) { ovsdb_row_update_columns(ovsdb_txn_row_modify(ur->txn, row), ur->row, ur->columns); } return true; } static struct ovsdb_error * ovsdb_execute_update(struct ovsdb_execution *x, struct ovsdb_parser *parser, struct json *result) { struct ovsdb_table *table; const struct json *where, *row_json; struct ovsdb_condition condition = OVSDB_CONDITION_INITIALIZER; struct ovsdb_column_set columns = OVSDB_COLUMN_SET_INITIALIZER; struct ovsdb_row *row = NULL; struct update_row_cbdata ur; struct ovsdb_error *error; table = parse_table(x, parser, "table"); where = ovsdb_parser_member(parser, "where", OP_ARRAY); row_json = ovsdb_parser_member(parser, "row", OP_OBJECT); error = ovsdb_parser_get_error(parser); if (!error) { error = parse_row(row_json, table, x->symtab, &row, &columns); } if (!error) { size_t i; for (i = 0; i < columns.n_columns; i++) { const struct ovsdb_column *column = columns.columns[i]; if (!column->mutable) { error = ovsdb_syntax_error(parser->json, "constraint violation", "Cannot update immutable column %s " "in table %s.", column->name, table->schema->name); break; } } } if (!error) { error = ovsdb_condition_from_json(table->schema, where, x->symtab, &condition); } if (!error) { ur.n_matches = 0; ur.txn = x->txn; ur.row = row; ur.columns = &columns; ovsdb_query(table, &condition, update_row_cb, &ur); json_object_put(result, "count", json_integer_create(ur.n_matches)); } ovsdb_row_destroy(row); ovsdb_column_set_destroy(&columns); ovsdb_condition_destroy(&condition); return error; } struct mutate_row_cbdata { size_t n_matches; struct ovsdb_txn *txn; const struct ovsdb_mutation_set *mutations; struct ovsdb_error **error; }; static bool mutate_row_cb(const struct ovsdb_row *row, void *mr_) { struct mutate_row_cbdata *mr = mr_; mr->n_matches++; *mr->error = ovsdb_mutation_set_execute(ovsdb_txn_row_modify(mr->txn, row), mr->mutations); return *mr->error == NULL; } static struct ovsdb_error * ovsdb_execute_mutate(struct ovsdb_execution *x, struct ovsdb_parser *parser, struct json *result) { struct ovsdb_table *table; const struct json *where; const struct json *mutations_json; struct ovsdb_condition condition = OVSDB_CONDITION_INITIALIZER; struct ovsdb_mutation_set mutations = OVSDB_MUTATION_SET_INITIALIZER; struct ovsdb_row *row = NULL; struct mutate_row_cbdata mr; struct ovsdb_error *error; table = parse_table(x, parser, "table"); where = ovsdb_parser_member(parser, "where", OP_ARRAY); mutations_json = ovsdb_parser_member(parser, "mutations", OP_ARRAY); error = ovsdb_parser_get_error(parser); if (!error) { error = ovsdb_mutation_set_from_json(table->schema, mutations_json, x->symtab, &mutations); } if (!error) { error = ovsdb_condition_from_json(table->schema, where, x->symtab, &condition); } if (!error) { mr.n_matches = 0; mr.txn = x->txn; mr.mutations = &mutations; mr.error = &error; ovsdb_query(table, &condition, mutate_row_cb, &mr); json_object_put(result, "count", json_integer_create(mr.n_matches)); } ovsdb_row_destroy(row); ovsdb_mutation_set_destroy(&mutations); ovsdb_condition_destroy(&condition); return error; } struct delete_row_cbdata { size_t n_matches; const struct ovsdb_table *table; struct ovsdb_txn *txn; }; static bool delete_row_cb(const struct ovsdb_row *row, void *dr_) { struct delete_row_cbdata *dr = dr_; dr->n_matches++; ovsdb_txn_row_delete(dr->txn, row); return true; } static struct ovsdb_error * ovsdb_execute_delete(struct ovsdb_execution *x, struct ovsdb_parser *parser, struct json *result) { struct ovsdb_table *table; const struct json *where; struct ovsdb_condition condition = OVSDB_CONDITION_INITIALIZER; struct ovsdb_error *error; where = ovsdb_parser_member(parser, "where", OP_ARRAY); table = parse_table(x, parser, "table"); error = ovsdb_parser_get_error(parser); if (!error) { error = ovsdb_condition_from_json(table->schema, where, x->symtab, &condition); } if (!error) { struct delete_row_cbdata dr; dr.n_matches = 0; dr.table = table; dr.txn = x->txn; ovsdb_query(table, &condition, delete_row_cb, &dr); json_object_put(result, "count", json_integer_create(dr.n_matches)); } ovsdb_condition_destroy(&condition); return error; } struct wait_auxdata { struct ovsdb_row_hash *actual; struct ovsdb_row_hash *expected; bool *equal; }; static bool ovsdb_execute_wait_query_cb(const struct ovsdb_row *row, void *aux_) { struct wait_auxdata *aux = aux_; if (ovsdb_row_hash_contains(aux->expected, row)) { ovsdb_row_hash_insert(aux->actual, row); return true; } else { /* The query row isn't in the expected result set, so the actual and * expected results sets definitely differ and we can short-circuit the * rest of the query. */ *aux->equal = false; return false; } } static struct ovsdb_error * ovsdb_execute_wait(struct ovsdb_execution *x, struct ovsdb_parser *parser, struct json *result OVS_UNUSED) { struct ovsdb_table *table; const struct json *timeout, *where, *columns_json, *until, *rows; struct ovsdb_condition condition = OVSDB_CONDITION_INITIALIZER; struct ovsdb_column_set columns = OVSDB_COLUMN_SET_INITIALIZER; struct ovsdb_row_hash expected = OVSDB_ROW_HASH_INITIALIZER(expected); struct ovsdb_row_hash actual = OVSDB_ROW_HASH_INITIALIZER(actual); struct ovsdb_error *error; struct wait_auxdata aux; long long int timeout_msec = 0; size_t i; timeout = ovsdb_parser_member(parser, "timeout", OP_NUMBER | OP_OPTIONAL); where = ovsdb_parser_member(parser, "where", OP_ARRAY); columns_json = ovsdb_parser_member(parser, "columns", OP_ARRAY | OP_OPTIONAL); until = ovsdb_parser_member(parser, "until", OP_STRING); rows = ovsdb_parser_member(parser, "rows", OP_ARRAY); table = parse_table(x, parser, "table"); error = ovsdb_parser_get_error(parser); if (!error) { error = ovsdb_condition_from_json(table->schema, where, x->symtab, &condition); } if (!error) { error = ovsdb_column_set_from_json(columns_json, table->schema, &columns); } if (!error) { if (timeout) { timeout_msec = MIN(LLONG_MAX, json_real(timeout)); if (timeout_msec < 0) { error = ovsdb_syntax_error(timeout, NULL, "timeout must be nonnegative"); } else if (timeout_msec < x->timeout_msec) { x->timeout_msec = timeout_msec; } } else { timeout_msec = LLONG_MAX; } if (strcmp(json_string(until), "==") && strcmp(json_string(until), "!=")) { error = ovsdb_syntax_error(until, NULL, "\"until\" must be \"==\" or \"!=\""); } } if (!error) { /* Parse "rows" into 'expected'. */ ovsdb_row_hash_init(&expected, &columns); for (i = 0; i < rows->u.array.n; i++) { struct ovsdb_row *row; row = ovsdb_row_create(table); error = ovsdb_row_from_json(row, rows->u.array.elems[i], x->symtab, NULL); if (error) { break; } if (!ovsdb_row_hash_insert(&expected, row)) { /* XXX Perhaps we should abort with an error or log a * warning. */ ovsdb_row_destroy(row); } } } if (!error) { /* Execute query. */ bool equal = true; ovsdb_row_hash_init(&actual, &columns); aux.actual = &actual; aux.expected = &expected; aux.equal = &equal; ovsdb_query(table, &condition, ovsdb_execute_wait_query_cb, &aux); if (equal) { /* We know that every row in 'actual' is also in 'expected'. We * also know that all of the rows in 'actual' are distinct and that * all of the rows in 'expected' are distinct. Therefore, if * 'actual' and 'expected' have the same number of rows, then they * have the same content. */ size_t n_actual = ovsdb_row_hash_count(&actual); size_t n_expected = ovsdb_row_hash_count(&expected); equal = n_actual == n_expected; } if (!strcmp(json_string(until), "==") != equal) { if (timeout && x->elapsed_msec >= timeout_msec) { if (x->elapsed_msec) { error = ovsdb_error("timed out", "\"wait\" timed out after %lld ms", x->elapsed_msec); } else { error = ovsdb_error("timed out", "\"wait\" timed out"); } } else { /* ovsdb_execute() will change this, if triggers really are * supported. */ error = ovsdb_error("not supported", "triggers not supported"); } } } ovsdb_row_hash_destroy(&expected, true); ovsdb_row_hash_destroy(&actual, false); ovsdb_column_set_destroy(&columns); ovsdb_condition_destroy(&condition); return error; } static struct ovsdb_error * ovsdb_execute_comment(struct ovsdb_execution *x, struct ovsdb_parser *parser, struct json *result OVS_UNUSED) { const struct json *comment; comment = ovsdb_parser_member(parser, "comment", OP_STRING); if (!comment) { return NULL; } ovsdb_txn_add_comment(x->txn, json_string(comment)); return NULL; } static struct ovsdb_error * ovsdb_execute_assert(struct ovsdb_execution *x, struct ovsdb_parser *parser, struct json *result OVS_UNUSED) { const struct json *lock_name; lock_name = ovsdb_parser_member(parser, "lock", OP_ID); if (!lock_name) { return NULL; } if (x->session) { const struct ovsdb_lock_waiter *waiter; waiter = ovsdb_session_get_lock_waiter(x->session, json_string(lock_name)); if (waiter && ovsdb_lock_waiter_is_owner(waiter)) { return NULL; } } return ovsdb_error("not owner", "Asserted lock %s not held.", json_string(lock_name)); } openvswitch-2.0.1+git20140120/ovsdb/file.c000066400000000000000000000600341226605124000176550ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "file.h" #include #include #include #include "bitmap.h" #include "column.h" #include "log.h" #include "json.h" #include "lockfile.h" #include "ovsdb.h" #include "ovsdb-error.h" #include "row.h" #include "socket-util.h" #include "table.h" #include "timeval.h" #include "transaction.h" #include "uuid.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ovsdb_file); /* Minimum number of milliseconds between database compactions. */ #define COMPACT_MIN_MSEC (10 * 60 * 1000) /* 10 minutes. */ /* Minimum number of milliseconds between trying to compact the database if * compacting fails. */ #define COMPACT_RETRY_MSEC (60 * 1000) /* 1 minute. */ /* A transaction being converted to JSON for writing to a file. */ struct ovsdb_file_txn { struct json *json; /* JSON for the whole transaction. */ struct json *table_json; /* JSON for 'table''s transaction. */ struct ovsdb_table *table; /* Table described in 'table_json'. */ }; static void ovsdb_file_txn_init(struct ovsdb_file_txn *); static void ovsdb_file_txn_add_row(struct ovsdb_file_txn *, const struct ovsdb_row *old, const struct ovsdb_row *new, const unsigned long int *changed); static struct ovsdb_error *ovsdb_file_txn_commit(struct json *, const char *comment, bool durable, struct ovsdb_log *); static struct ovsdb_error *ovsdb_file_open__(const char *file_name, const struct ovsdb_schema *, bool read_only, struct ovsdb **, struct ovsdb_file **); static struct ovsdb_error *ovsdb_file_txn_from_json( struct ovsdb *, const struct json *, bool converting, struct ovsdb_txn **); static struct ovsdb_error *ovsdb_file_create(struct ovsdb *, struct ovsdb_log *, const char *file_name, unsigned int n_transactions, struct ovsdb_file **filep); /* Opens database 'file_name' and stores a pointer to the new database in * '*dbp'. If 'read_only' is false, then the database will be locked and * changes to the database will be written to disk. If 'read_only' is true, * the database will not be locked and changes to the database will persist * only as long as the "struct ovsdb". * * If 'filep' is nonnull and 'read_only' is false, then on success sets * '*filep' to an ovsdb_file that represents the open file. This ovsdb_file * persists until '*dbp' is destroyed. * * On success, returns NULL. On failure, returns an ovsdb_error (which the * caller must destroy) and sets '*dbp' and '*filep' to NULL. */ struct ovsdb_error * ovsdb_file_open(const char *file_name, bool read_only, struct ovsdb **dbp, struct ovsdb_file **filep) { return ovsdb_file_open__(file_name, NULL, read_only, dbp, filep); } /* Opens database 'file_name' with an alternate schema. The specified 'schema' * is used to interpret the data in 'file_name', ignoring the schema actually * stored in the file. Data in the file for tables or columns that do not * exist in 'schema' are ignored, but the ovsdb file format must otherwise be * observed, including column constraints. * * This function can be useful for upgrading or downgrading databases to * "almost-compatible" formats. * * The database will not be locked. Changes to the database will persist only * as long as the "struct ovsdb". * * On success, stores a pointer to the new database in '*dbp' and returns a * null pointer. On failure, returns an ovsdb_error (which the caller must * destroy) and sets '*dbp' to NULL. */ struct ovsdb_error * ovsdb_file_open_as_schema(const char *file_name, const struct ovsdb_schema *schema, struct ovsdb **dbp) { return ovsdb_file_open__(file_name, schema, true, dbp, NULL); } static struct ovsdb_error * ovsdb_file_open_log(const char *file_name, enum ovsdb_log_open_mode open_mode, struct ovsdb_log **logp, struct ovsdb_schema **schemap) { struct ovsdb_schema *schema = NULL; struct ovsdb_log *log = NULL; struct ovsdb_error *error; struct json *json = NULL; ovs_assert(logp || schemap); error = ovsdb_log_open(file_name, open_mode, -1, &log); if (error) { goto error; } error = ovsdb_log_read(log, &json); if (error) { goto error; } else if (!json) { error = ovsdb_io_error(EOF, "%s: database file contains no schema", file_name); goto error; } if (schemap) { error = ovsdb_schema_from_json(json, &schema); if (error) { error = ovsdb_wrap_error(error, "failed to parse \"%s\" as ovsdb schema", file_name); goto error; } } json_destroy(json); if (logp) { *logp = log; } else { ovsdb_log_close(log); } if (schemap) { *schemap = schema; } return NULL; error: ovsdb_log_close(log); json_destroy(json); if (logp) { *logp = NULL; } if (schemap) { *schemap = NULL; } return error; } static struct ovsdb_error * ovsdb_file_open__(const char *file_name, const struct ovsdb_schema *alternate_schema, bool read_only, struct ovsdb **dbp, struct ovsdb_file **filep) { enum ovsdb_log_open_mode open_mode; unsigned int n_transactions; struct ovsdb_schema *schema = NULL; struct ovsdb_error *error; struct ovsdb_log *log; struct json *json; struct ovsdb *db = NULL; /* In read-only mode there is no ovsdb_file so 'filep' must be null. */ ovs_assert(!(read_only && filep)); open_mode = read_only ? OVSDB_LOG_READ_ONLY : OVSDB_LOG_READ_WRITE; error = ovsdb_file_open_log(file_name, open_mode, &log, alternate_schema ? NULL : &schema); if (error) { goto error; } db = ovsdb_create(schema ? schema : ovsdb_schema_clone(alternate_schema)); n_transactions = 0; while ((error = ovsdb_log_read(log, &json)) == NULL && json) { struct ovsdb_txn *txn; error = ovsdb_file_txn_from_json(db, json, alternate_schema != NULL, &txn); json_destroy(json); if (error) { ovsdb_log_unread(log); break; } n_transactions++; error = ovsdb_txn_commit(txn, false); if (error) { ovsdb_log_unread(log); break; } } if (error) { /* Log error but otherwise ignore it. Probably the database just got * truncated due to power failure etc. and we should use its current * contents. */ char *msg = ovsdb_error_to_string(error); VLOG_ERR("%s", msg); free(msg); ovsdb_error_destroy(error); } if (!read_only) { struct ovsdb_file *file; error = ovsdb_file_create(db, log, file_name, n_transactions, &file); if (error) { goto error; } if (filep) { *filep = file; } } else { ovsdb_log_close(log); } *dbp = db; return NULL; error: *dbp = NULL; if (filep) { *filep = NULL; } ovsdb_destroy(db); ovsdb_log_close(log); return error; } static struct ovsdb_error * ovsdb_file_update_row_from_json(struct ovsdb_row *row, bool converting, const struct json *json) { struct ovsdb_table_schema *schema = row->table->schema; struct ovsdb_error *error; struct shash_node *node; if (json->type != JSON_OBJECT) { return ovsdb_syntax_error(json, NULL, "row must be JSON object"); } SHASH_FOR_EACH (node, json_object(json)) { const char *column_name = node->name; const struct ovsdb_column *column; struct ovsdb_datum datum; column = ovsdb_table_schema_get_column(schema, column_name); if (!column) { if (converting) { continue; } return ovsdb_syntax_error(json, "unknown column", "No column %s in table %s.", column_name, schema->name); } error = ovsdb_datum_from_json(&datum, &column->type, node->data, NULL); if (error) { return error; } ovsdb_datum_swap(&row->fields[column->index], &datum); ovsdb_datum_destroy(&datum, &column->type); } return NULL; } static struct ovsdb_error * ovsdb_file_txn_row_from_json(struct ovsdb_txn *txn, struct ovsdb_table *table, bool converting, const struct uuid *row_uuid, struct json *json) { const struct ovsdb_row *row = ovsdb_table_get_row(table, row_uuid); if (json->type == JSON_NULL) { if (!row) { return ovsdb_syntax_error(NULL, NULL, "transaction deletes " "row "UUID_FMT" that does not exist", UUID_ARGS(row_uuid)); } ovsdb_txn_row_delete(txn, row); return NULL; } else if (row) { return ovsdb_file_update_row_from_json(ovsdb_txn_row_modify(txn, row), converting, json); } else { struct ovsdb_error *error; struct ovsdb_row *new; new = ovsdb_row_create(table); *ovsdb_row_get_uuid_rw(new) = *row_uuid; error = ovsdb_file_update_row_from_json(new, converting, json); if (error) { ovsdb_row_destroy(new); } else { ovsdb_txn_row_insert(txn, new); } return error; } } static struct ovsdb_error * ovsdb_file_txn_table_from_json(struct ovsdb_txn *txn, struct ovsdb_table *table, bool converting, struct json *json) { struct shash_node *node; if (json->type != JSON_OBJECT) { return ovsdb_syntax_error(json, NULL, "object expected"); } SHASH_FOR_EACH (node, json->u.object) { const char *uuid_string = node->name; struct json *txn_row_json = node->data; struct ovsdb_error *error; struct uuid row_uuid; if (!uuid_from_string(&row_uuid, uuid_string)) { return ovsdb_syntax_error(json, NULL, "\"%s\" is not a valid UUID", uuid_string); } error = ovsdb_file_txn_row_from_json(txn, table, converting, &row_uuid, txn_row_json); if (error) { return error; } } return NULL; } /* Converts 'json' to an ovsdb_txn for 'db', storing the new transaction in * '*txnp'. Returns NULL if successful, otherwise an error. * * If 'converting' is true, then unknown table and column names are ignored * (which can ease upgrading and downgrading schemas); otherwise, they are * treated as errors. */ static struct ovsdb_error * ovsdb_file_txn_from_json(struct ovsdb *db, const struct json *json, bool converting, struct ovsdb_txn **txnp) { struct ovsdb_error *error; struct shash_node *node; struct ovsdb_txn *txn; *txnp = NULL; if (json->type != JSON_OBJECT) { return ovsdb_syntax_error(json, NULL, "object expected"); } txn = ovsdb_txn_create(db); SHASH_FOR_EACH (node, json->u.object) { const char *table_name = node->name; struct json *node_json = node->data; struct ovsdb_table *table; table = shash_find_data(&db->tables, table_name); if (!table) { if (!strcmp(table_name, "_date") && node_json->type == JSON_INTEGER) { continue; } else if (!strcmp(table_name, "_comment") || converting) { continue; } error = ovsdb_syntax_error(json, "unknown table", "No table named %s.", table_name); goto error; } error = ovsdb_file_txn_table_from_json(txn, table, converting, node_json); if (error) { goto error; } } *txnp = txn; return NULL; error: ovsdb_txn_abort(txn); return error; } static struct ovsdb_error * ovsdb_file_save_copy__(const char *file_name, int locking, const char *comment, const struct ovsdb *db, struct ovsdb_log **logp) { const struct shash_node *node; struct ovsdb_file_txn ftxn; struct ovsdb_error *error; struct ovsdb_log *log; struct json *json; error = ovsdb_log_open(file_name, OVSDB_LOG_CREATE, locking, &log); if (error) { return error; } /* Write schema. */ json = ovsdb_schema_to_json(db->schema); error = ovsdb_log_write(log, json); json_destroy(json); if (error) { goto exit; } /* Write data. */ ovsdb_file_txn_init(&ftxn); SHASH_FOR_EACH (node, &db->tables) { const struct ovsdb_table *table = node->data; const struct ovsdb_row *row; HMAP_FOR_EACH (row, hmap_node, &table->rows) { ovsdb_file_txn_add_row(&ftxn, NULL, row, NULL); } } error = ovsdb_file_txn_commit(ftxn.json, comment, true, log); exit: if (logp) { if (!error) { *logp = log; log = NULL; } else { *logp = NULL; } } ovsdb_log_close(log); if (error) { remove(file_name); } return error; } /* Saves a snapshot of 'db''s current contents as 'file_name'. If 'comment' is * nonnull, then it is added along with the data contents and can be viewed * with "ovsdb-tool show-log". * * 'locking' is passed along to ovsdb_log_open() untouched. */ struct ovsdb_error * ovsdb_file_save_copy(const char *file_name, int locking, const char *comment, const struct ovsdb *db) { return ovsdb_file_save_copy__(file_name, locking, comment, db, NULL); } /* Opens database 'file_name', reads its schema, and closes it. On success, * stores the schema into '*schemap' and returns NULL; the caller then owns the * schema. On failure, returns an ovsdb_error (which the caller must destroy) * and sets '*dbp' to NULL. */ struct ovsdb_error * ovsdb_file_read_schema(const char *file_name, struct ovsdb_schema **schemap) { ovs_assert(schemap != NULL); return ovsdb_file_open_log(file_name, OVSDB_LOG_READ_ONLY, NULL, schemap); } /* Replica implementation. */ struct ovsdb_file { struct ovsdb_replica replica; struct ovsdb *db; struct ovsdb_log *log; char *file_name; long long int last_compact; long long int next_compact; unsigned int n_transactions; }; static const struct ovsdb_replica_class ovsdb_file_class; static struct ovsdb_error * ovsdb_file_create(struct ovsdb *db, struct ovsdb_log *log, const char *file_name, unsigned int n_transactions, struct ovsdb_file **filep) { struct ovsdb_file *file; char *deref_name; char *abs_name; /* Use the absolute name of the file because ovsdb-server opens its * database before daemonize() chdirs to "/". */ deref_name = follow_symlinks(file_name); abs_name = abs_file_name(NULL, deref_name); free(deref_name); if (!abs_name) { *filep = NULL; return ovsdb_io_error(0, "could not determine current " "working directory"); } file = xmalloc(sizeof *file); ovsdb_replica_init(&file->replica, &ovsdb_file_class); file->db = db; file->log = log; file->file_name = abs_name; file->last_compact = time_msec(); file->next_compact = file->last_compact + COMPACT_MIN_MSEC; file->n_transactions = n_transactions; ovsdb_add_replica(db, &file->replica); *filep = file; return NULL; } static struct ovsdb_file * ovsdb_file_cast(struct ovsdb_replica *replica) { ovs_assert(replica->class == &ovsdb_file_class); return CONTAINER_OF(replica, struct ovsdb_file, replica); } static bool ovsdb_file_change_cb(const struct ovsdb_row *old, const struct ovsdb_row *new, const unsigned long int *changed, void *ftxn_) { struct ovsdb_file_txn *ftxn = ftxn_; ovsdb_file_txn_add_row(ftxn, old, new, changed); return true; } static struct ovsdb_error * ovsdb_file_commit(struct ovsdb_replica *replica, const struct ovsdb_txn *txn, bool durable) { struct ovsdb_file *file = ovsdb_file_cast(replica); struct ovsdb_file_txn ftxn; struct ovsdb_error *error; ovsdb_file_txn_init(&ftxn); ovsdb_txn_for_each_change(txn, ovsdb_file_change_cb, &ftxn); if (!ftxn.json) { /* Nothing to commit. */ return NULL; } error = ovsdb_file_txn_commit(ftxn.json, ovsdb_txn_get_comment(txn), durable, file->log); if (error) { return error; } file->n_transactions++; /* If it has been at least COMPACT_MIN_MSEC ms since the last time we * compacted (or at least COMPACT_RETRY_MSEC ms since the last time we * tried), and if there are at least 100 transactions in the database, and * if the database is at least 10 MB, then compact the database. */ if (time_msec() >= file->next_compact && file->n_transactions >= 100 && ovsdb_log_get_offset(file->log) >= 10 * 1024 * 1024) { error = ovsdb_file_compact(file); if (error) { char *s = ovsdb_error_to_string(error); ovsdb_error_destroy(error); VLOG_WARN("%s: compacting database failed (%s), retrying in " "%d seconds", file->file_name, s, COMPACT_RETRY_MSEC / 1000); free(s); file->next_compact = time_msec() + COMPACT_RETRY_MSEC; } } return NULL; } struct ovsdb_error * ovsdb_file_compact(struct ovsdb_file *file) { struct ovsdb_log *new_log = NULL; struct lockfile *tmp_lock = NULL; struct ovsdb_error *error; char *tmp_name = NULL; char *comment = NULL; int retval; comment = xasprintf("compacting database online " "(%.3f seconds old, %u transactions, %llu bytes)", (time_wall_msec() - file->last_compact) / 1000.0, file->n_transactions, (unsigned long long) ovsdb_log_get_offset(file->log)); VLOG_INFO("%s: %s", file->file_name, comment); /* Commit the old version, so that we can be assured that we'll eventually * have either the old or the new version. */ error = ovsdb_log_commit(file->log); if (error) { goto exit; } /* Lock temporary file. */ tmp_name = xasprintf("%s.tmp", file->file_name); retval = lockfile_lock(tmp_name, &tmp_lock); if (retval) { error = ovsdb_io_error(retval, "could not get lock on %s", tmp_name); goto exit; } /* Remove temporary file. (It might not exist.) */ if (unlink(tmp_name) < 0 && errno != ENOENT) { error = ovsdb_io_error(errno, "failed to remove %s", tmp_name); goto exit; } /* Save a copy. */ error = ovsdb_file_save_copy__(tmp_name, false, comment, file->db, &new_log); if (error) { goto exit; } /* Replace original by temporary. */ if (rename(tmp_name, file->file_name)) { error = ovsdb_io_error(errno, "failed to rename \"%s\" to \"%s\"", tmp_name, file->file_name); goto exit; } fsync_parent_dir(file->file_name); exit: if (!error) { ovsdb_log_close(file->log); file->log = new_log; file->last_compact = time_msec(); file->next_compact = file->last_compact + COMPACT_MIN_MSEC; file->n_transactions = 1; } else { ovsdb_log_close(new_log); if (tmp_lock) { unlink(tmp_name); } } lockfile_unlock(tmp_lock); free(tmp_name); free(comment); return error; } static void ovsdb_file_destroy(struct ovsdb_replica *replica) { struct ovsdb_file *file = ovsdb_file_cast(replica); ovsdb_log_close(file->log); free(file->file_name); free(file); } static const struct ovsdb_replica_class ovsdb_file_class = { ovsdb_file_commit, ovsdb_file_destroy }; static void ovsdb_file_txn_init(struct ovsdb_file_txn *ftxn) { ftxn->json = NULL; ftxn->table_json = NULL; ftxn->table = NULL; } static void ovsdb_file_txn_add_row(struct ovsdb_file_txn *ftxn, const struct ovsdb_row *old, const struct ovsdb_row *new, const unsigned long int *changed) { struct json *row; if (!new) { row = json_null_create(); } else { struct shash_node *node; row = old ? NULL : json_object_create(); SHASH_FOR_EACH (node, &new->table->schema->columns) { const struct ovsdb_column *column = node->data; const struct ovsdb_type *type = &column->type; unsigned int idx = column->index; if (idx != OVSDB_COL_UUID && column->persistent && (old ? bitmap_is_set(changed, idx) : !ovsdb_datum_is_default(&new->fields[idx], type))) { if (!row) { row = json_object_create(); } json_object_put(row, column->name, ovsdb_datum_to_json(&new->fields[idx], type)); } } } if (row) { struct ovsdb_table *table = new ? new->table : old->table; char uuid[UUID_LEN + 1]; if (table != ftxn->table) { /* Create JSON object for transaction overall. */ if (!ftxn->json) { ftxn->json = json_object_create(); } /* Create JSON object for transaction on this table. */ ftxn->table_json = json_object_create(); ftxn->table = table; json_object_put(ftxn->json, table->schema->name, ftxn->table_json); } /* Add row to transaction for this table. */ snprintf(uuid, sizeof uuid, UUID_FMT, UUID_ARGS(ovsdb_row_get_uuid(new ? new : old))); json_object_put(ftxn->table_json, uuid, row); } } static struct ovsdb_error * ovsdb_file_txn_commit(struct json *json, const char *comment, bool durable, struct ovsdb_log *log) { struct ovsdb_error *error; if (!json) { json = json_object_create(); } if (comment) { json_object_put_string(json, "_comment", comment); } json_object_put(json, "_date", json_integer_create(time_wall_msec())); error = ovsdb_log_write(log, json); json_destroy(json); if (error) { return ovsdb_wrap_error(error, "writing transaction failed"); } if (durable) { error = ovsdb_log_commit(log); if (error) { return ovsdb_wrap_error(error, "committing transaction failed"); } } return NULL; } openvswitch-2.0.1+git20140120/ovsdb/file.h000066400000000000000000000031641226605124000176630ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_FILE_H #define OVSDB_FILE_H 1 #include #include "compiler.h" #include "log.h" struct ovsdb; struct ovsdb_file; struct ovsdb_schema; struct ovsdb_error *ovsdb_file_open(const char *file_name, bool read_only, struct ovsdb **, struct ovsdb_file **) WARN_UNUSED_RESULT; struct ovsdb_error *ovsdb_file_open_as_schema(const char *file_name, const struct ovsdb_schema *, struct ovsdb **) WARN_UNUSED_RESULT; struct ovsdb_error *ovsdb_file_save_copy(const char *file_name, int locking, const char *comment, const struct ovsdb *) WARN_UNUSED_RESULT; struct ovsdb_error *ovsdb_file_compact(struct ovsdb_file *); struct ovsdb_error *ovsdb_file_read_schema(const char *file_name, struct ovsdb_schema **) WARN_UNUSED_RESULT; #endif /* ovsdb/file.h */ openvswitch-2.0.1+git20140120/ovsdb/jsonrpc-server.c000066400000000000000000001524741226605124000217320ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "jsonrpc-server.h" #include #include "bitmap.h" #include "column.h" #include "dynamic-string.h" #include "json.h" #include "jsonrpc.h" #include "ovsdb-error.h" #include "ovsdb-parser.h" #include "ovsdb.h" #include "reconnect.h" #include "row.h" #include "server.h" #include "simap.h" #include "stream.h" #include "table.h" #include "timeval.h" #include "transaction.h" #include "trigger.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ovsdb_jsonrpc_server); struct ovsdb_jsonrpc_remote; struct ovsdb_jsonrpc_session; /* Message rate-limiting. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); /* Sessions. */ static struct ovsdb_jsonrpc_session *ovsdb_jsonrpc_session_create( struct ovsdb_jsonrpc_remote *, struct jsonrpc_session *); static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *); static void ovsdb_jsonrpc_session_wait_all(struct ovsdb_jsonrpc_remote *); static void ovsdb_jsonrpc_session_get_memory_usage_all( const struct ovsdb_jsonrpc_remote *, struct simap *usage); static void ovsdb_jsonrpc_session_close_all(struct ovsdb_jsonrpc_remote *); static void ovsdb_jsonrpc_session_reconnect_all(struct ovsdb_jsonrpc_remote *); static void ovsdb_jsonrpc_session_set_all_options( struct ovsdb_jsonrpc_remote *, const struct ovsdb_jsonrpc_options *); static bool ovsdb_jsonrpc_session_get_status( const struct ovsdb_jsonrpc_remote *, struct ovsdb_jsonrpc_remote_status *); static void ovsdb_jsonrpc_session_unlock_all(struct ovsdb_jsonrpc_session *); static void ovsdb_jsonrpc_session_unlock__(struct ovsdb_lock_waiter *); /* Triggers. */ static void ovsdb_jsonrpc_trigger_create(struct ovsdb_jsonrpc_session *, struct ovsdb *, struct json *id, struct json *params); static struct ovsdb_jsonrpc_trigger *ovsdb_jsonrpc_trigger_find( struct ovsdb_jsonrpc_session *, const struct json *id, size_t hash); static void ovsdb_jsonrpc_trigger_complete(struct ovsdb_jsonrpc_trigger *); static void ovsdb_jsonrpc_trigger_complete_all(struct ovsdb_jsonrpc_session *); static void ovsdb_jsonrpc_trigger_complete_done( struct ovsdb_jsonrpc_session *); /* Monitors. */ static struct json *ovsdb_jsonrpc_monitor_create( struct ovsdb_jsonrpc_session *, struct ovsdb *, struct json *params); static struct jsonrpc_msg *ovsdb_jsonrpc_monitor_cancel( struct ovsdb_jsonrpc_session *, struct json_array *params, const struct json *request_id); static void ovsdb_jsonrpc_monitor_remove_all(struct ovsdb_jsonrpc_session *); static size_t ovsdb_jsonrpc_monitor_json_length_all( struct ovsdb_jsonrpc_session *); /* JSON-RPC database server. */ struct ovsdb_jsonrpc_server { struct ovsdb_server up; unsigned int n_sessions, max_sessions; struct shash remotes; /* Contains "struct ovsdb_jsonrpc_remote *"s. */ }; /* A configured remote. This is either a passive stream listener plus a list * of the currently connected sessions, or a list of exactly one active * session. */ struct ovsdb_jsonrpc_remote { struct ovsdb_jsonrpc_server *server; struct pstream *listener; /* Listener, if passive. */ struct list sessions; /* List of "struct ovsdb_jsonrpc_session"s. */ uint8_t dscp; }; static struct ovsdb_jsonrpc_remote *ovsdb_jsonrpc_server_add_remote( struct ovsdb_jsonrpc_server *, const char *name, const struct ovsdb_jsonrpc_options *options ); static void ovsdb_jsonrpc_server_del_remote(struct shash_node *); /* Creates and returns a new server to provide JSON-RPC access to an OVSDB. * * The caller must call ovsdb_jsonrpc_server_add_db() for each database to * which 'server' should provide access. */ struct ovsdb_jsonrpc_server * ovsdb_jsonrpc_server_create(void) { struct ovsdb_jsonrpc_server *server = xzalloc(sizeof *server); ovsdb_server_init(&server->up); server->max_sessions = 64; shash_init(&server->remotes); return server; } /* Adds 'db' to the set of databases served out by 'svr'. Returns true if * successful, false if 'db''s name is the same as some database already in * 'server'. */ bool ovsdb_jsonrpc_server_add_db(struct ovsdb_jsonrpc_server *svr, struct ovsdb *db) { /* The OVSDB protocol doesn't have a way to notify a client that a * database has been added. If some client tried to use the database * that we're adding and failed, then forcing it to reconnect seems like * a reasonable way to make it try again. * * If this is too big of a hammer in practice, we could be more selective, * e.g. disconnect only connections that actually tried to use a database * with 'db''s name. */ ovsdb_jsonrpc_server_reconnect(svr); return ovsdb_server_add_db(&svr->up, db); } /* Removes 'db' from the set of databases served out by 'svr'. Returns * true if successful, false if there is no database associated with 'db'. */ bool ovsdb_jsonrpc_server_remove_db(struct ovsdb_jsonrpc_server *svr, struct ovsdb *db) { /* There might be pointers to 'db' from 'svr', such as monitors or * outstanding transactions. Disconnect all JSON-RPC connections to avoid * accesses to freed memory. * * If this is too big of a hammer in practice, we could be more selective, * e.g. disconnect only connections that actually reference 'db'. */ ovsdb_jsonrpc_server_reconnect(svr); return ovsdb_server_remove_db(&svr->up, db); } void ovsdb_jsonrpc_server_destroy(struct ovsdb_jsonrpc_server *svr) { struct shash_node *node, *next; SHASH_FOR_EACH_SAFE (node, next, &svr->remotes) { ovsdb_jsonrpc_server_del_remote(node); } shash_destroy(&svr->remotes); ovsdb_server_destroy(&svr->up); free(svr); } struct ovsdb_jsonrpc_options * ovsdb_jsonrpc_default_options(const char *target) { struct ovsdb_jsonrpc_options *options = xzalloc(sizeof *options); options->max_backoff = RECONNECT_DEFAULT_MAX_BACKOFF; options->probe_interval = (stream_or_pstream_needs_probes(target) ? RECONNECT_DEFAULT_PROBE_INTERVAL : 0); return options; } /* Sets 'svr''s current set of remotes to the names in 'new_remotes', with * options in the struct ovsdb_jsonrpc_options supplied as the data values. * * A remote is an active or passive stream connection method, e.g. "pssl:" or * "tcp:1.2.3.4". */ void ovsdb_jsonrpc_server_set_remotes(struct ovsdb_jsonrpc_server *svr, const struct shash *new_remotes) { struct shash_node *node, *next; SHASH_FOR_EACH_SAFE (node, next, &svr->remotes) { if (!shash_find(new_remotes, node->name)) { VLOG_INFO("%s: remote deconfigured", node->name); ovsdb_jsonrpc_server_del_remote(node); } } SHASH_FOR_EACH (node, new_remotes) { const struct ovsdb_jsonrpc_options *options = node->data; struct ovsdb_jsonrpc_remote *remote; remote = shash_find_data(&svr->remotes, node->name); if (!remote) { remote = ovsdb_jsonrpc_server_add_remote(svr, node->name, options); if (!remote) { continue; } } ovsdb_jsonrpc_session_set_all_options(remote, options); } } static struct ovsdb_jsonrpc_remote * ovsdb_jsonrpc_server_add_remote(struct ovsdb_jsonrpc_server *svr, const char *name, const struct ovsdb_jsonrpc_options *options) { struct ovsdb_jsonrpc_remote *remote; struct pstream *listener; int error; error = jsonrpc_pstream_open(name, &listener, options->dscp); if (error && error != EAFNOSUPPORT) { VLOG_ERR_RL(&rl, "%s: listen failed: %s", name, ovs_strerror(error)); return NULL; } remote = xmalloc(sizeof *remote); remote->server = svr; remote->listener = listener; list_init(&remote->sessions); remote->dscp = options->dscp; shash_add(&svr->remotes, name, remote); if (!listener) { ovsdb_jsonrpc_session_create(remote, jsonrpc_session_open(name, true)); } return remote; } static void ovsdb_jsonrpc_server_del_remote(struct shash_node *node) { struct ovsdb_jsonrpc_remote *remote = node->data; ovsdb_jsonrpc_session_close_all(remote); pstream_close(remote->listener); shash_delete(&remote->server->remotes, node); free(remote); } /* Stores status information for the remote named 'target', which should have * been configured on 'svr' with a call to ovsdb_jsonrpc_server_set_remotes(), * into '*status'. On success returns true, on failure (if 'svr' doesn't have * a remote named 'target' or if that remote is an inbound remote that has no * active connections) returns false. On failure, 'status' will be zeroed. */ bool ovsdb_jsonrpc_server_get_remote_status( const struct ovsdb_jsonrpc_server *svr, const char *target, struct ovsdb_jsonrpc_remote_status *status) { const struct ovsdb_jsonrpc_remote *remote; memset(status, 0, sizeof *status); remote = shash_find_data(&svr->remotes, target); return remote && ovsdb_jsonrpc_session_get_status(remote, status); } void ovsdb_jsonrpc_server_free_remote_status( struct ovsdb_jsonrpc_remote_status *status) { free(status->locks_held); free(status->locks_waiting); free(status->locks_lost); } /* Forces all of the JSON-RPC sessions managed by 'svr' to disconnect and * reconnect. */ void ovsdb_jsonrpc_server_reconnect(struct ovsdb_jsonrpc_server *svr) { struct shash_node *node; SHASH_FOR_EACH (node, &svr->remotes) { struct ovsdb_jsonrpc_remote *remote = node->data; ovsdb_jsonrpc_session_reconnect_all(remote); } } void ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr) { struct shash_node *node; SHASH_FOR_EACH (node, &svr->remotes) { struct ovsdb_jsonrpc_remote *remote = node->data; if (remote->listener && svr->n_sessions < svr->max_sessions) { struct stream *stream; int error; error = pstream_accept(remote->listener, &stream); if (!error) { struct jsonrpc_session *js; js = jsonrpc_session_open_unreliably(jsonrpc_open(stream), remote->dscp); ovsdb_jsonrpc_session_create(remote, js); } else if (error != EAGAIN) { VLOG_WARN_RL(&rl, "%s: accept failed: %s", pstream_get_name(remote->listener), ovs_strerror(error)); } } ovsdb_jsonrpc_session_run_all(remote); } } void ovsdb_jsonrpc_server_wait(struct ovsdb_jsonrpc_server *svr) { struct shash_node *node; SHASH_FOR_EACH (node, &svr->remotes) { struct ovsdb_jsonrpc_remote *remote = node->data; if (remote->listener && svr->n_sessions < svr->max_sessions) { pstream_wait(remote->listener); } ovsdb_jsonrpc_session_wait_all(remote); } } /* Adds some memory usage statistics for 'svr' into 'usage', for use with * memory_report(). */ void ovsdb_jsonrpc_server_get_memory_usage(const struct ovsdb_jsonrpc_server *svr, struct simap *usage) { struct shash_node *node; simap_increase(usage, "sessions", svr->n_sessions); SHASH_FOR_EACH (node, &svr->remotes) { struct ovsdb_jsonrpc_remote *remote = node->data; ovsdb_jsonrpc_session_get_memory_usage_all(remote, usage); } } /* JSON-RPC database server session. */ struct ovsdb_jsonrpc_session { struct list node; /* Element in remote's sessions list. */ struct ovsdb_session up; struct ovsdb_jsonrpc_remote *remote; size_t backlog_threshold; /* See ovsdb_jsonrpc_session_run(). */ size_t reply_backlog; /* Triggers. */ struct hmap triggers; /* Hmap of "struct ovsdb_jsonrpc_trigger"s. */ /* Monitors. */ struct hmap monitors; /* Hmap of "struct ovsdb_jsonrpc_monitor"s. */ /* Network connectivity. */ struct jsonrpc_session *js; /* JSON-RPC session. */ unsigned int js_seqno; /* Last jsonrpc_session_get_seqno() value. */ }; static void ovsdb_jsonrpc_session_close(struct ovsdb_jsonrpc_session *); static int ovsdb_jsonrpc_session_run(struct ovsdb_jsonrpc_session *); static void ovsdb_jsonrpc_session_wait(struct ovsdb_jsonrpc_session *); static void ovsdb_jsonrpc_session_get_memory_usage( const struct ovsdb_jsonrpc_session *, struct simap *usage); static void ovsdb_jsonrpc_session_set_options( struct ovsdb_jsonrpc_session *, const struct ovsdb_jsonrpc_options *); static void ovsdb_jsonrpc_session_got_request(struct ovsdb_jsonrpc_session *, struct jsonrpc_msg *); static void ovsdb_jsonrpc_session_got_notify(struct ovsdb_jsonrpc_session *, struct jsonrpc_msg *); static struct ovsdb_jsonrpc_session * ovsdb_jsonrpc_session_create(struct ovsdb_jsonrpc_remote *remote, struct jsonrpc_session *js) { struct ovsdb_jsonrpc_session *s; s = xzalloc(sizeof *s); ovsdb_session_init(&s->up, &remote->server->up); s->remote = remote; list_push_back(&remote->sessions, &s->node); hmap_init(&s->triggers); hmap_init(&s->monitors); s->reply_backlog = 0; s->backlog_threshold = 1024 * 1024; s->js = js; s->js_seqno = jsonrpc_session_get_seqno(js); remote->server->n_sessions++; return s; } static void ovsdb_jsonrpc_session_close(struct ovsdb_jsonrpc_session *s) { ovsdb_jsonrpc_monitor_remove_all(s); ovsdb_jsonrpc_session_unlock_all(s); ovsdb_jsonrpc_trigger_complete_all(s); hmap_destroy(&s->monitors); hmap_destroy(&s->triggers); jsonrpc_session_close(s->js); list_remove(&s->node); s->remote->server->n_sessions--; ovsdb_session_destroy(&s->up); free(s); } static int ovsdb_jsonrpc_session_run(struct ovsdb_jsonrpc_session *s) { size_t backlog; jsonrpc_session_run(s->js); if (s->js_seqno != jsonrpc_session_get_seqno(s->js)) { s->js_seqno = jsonrpc_session_get_seqno(s->js); ovsdb_jsonrpc_trigger_complete_all(s); ovsdb_jsonrpc_monitor_remove_all(s); ovsdb_jsonrpc_session_unlock_all(s); } ovsdb_jsonrpc_trigger_complete_done(s); backlog = jsonrpc_session_get_backlog(s->js); if (!backlog) { struct jsonrpc_msg *msg = jsonrpc_session_recv(s->js); if (msg) { if (msg->type == JSONRPC_REQUEST) { ovsdb_jsonrpc_session_got_request(s, msg); } else if (msg->type == JSONRPC_NOTIFY) { ovsdb_jsonrpc_session_got_notify(s, msg); } else { VLOG_WARN("%s: received unexpected %s message", jsonrpc_session_get_name(s->js), jsonrpc_msg_type_to_string(msg->type)); jsonrpc_session_force_reconnect(s->js); jsonrpc_msg_destroy(msg); } } s->reply_backlog = jsonrpc_session_get_backlog(s->js); } else if (backlog > s->reply_backlog + s->backlog_threshold) { /* We have a lot of data queued to send to the client. The data is * likely to be mostly monitor updates. It is unlikely that the * monitor updates are due to transactions by 's', because we will not * let 's' make any more transactions until it drains its backlog to 0 * (see previous 'if' case). So the monitor updates are probably due * to transactions made by database clients other than 's'. We can't * fix that by preventing 's' from executing more transactions. We * could fix it by preventing every client from executing transactions, * but then one slow or hung client could prevent other clients from * doing useful work. * * Our solution is to cap the maximum backlog to O(1) in the amount of * data in the database. If the backlog exceeds that amount, then we * disconnect the client. When it reconnects, it can fetch the entire * contents of the database using less data than was previously * backlogged. */ size_t monitor_length; monitor_length = ovsdb_jsonrpc_monitor_json_length_all(s); if (backlog > s->reply_backlog + monitor_length * 2) { VLOG_INFO("%s: %zu bytes backlogged but a complete replica " "would only take %zu bytes, disconnecting", jsonrpc_session_get_name(s->js), backlog - s->reply_backlog, monitor_length); jsonrpc_session_force_reconnect(s->js); } else { /* The backlog is not unreasonably big. Only check again after it * becomes much bigger. */ s->backlog_threshold = 2 * MAX(s->backlog_threshold * 2, monitor_length); } } return jsonrpc_session_is_alive(s->js) ? 0 : ETIMEDOUT; } static void ovsdb_jsonrpc_session_set_options(struct ovsdb_jsonrpc_session *session, const struct ovsdb_jsonrpc_options *options) { jsonrpc_session_set_max_backoff(session->js, options->max_backoff); jsonrpc_session_set_probe_interval(session->js, options->probe_interval); jsonrpc_session_set_dscp(session->js, options->dscp); } static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *remote) { struct ovsdb_jsonrpc_session *s, *next; LIST_FOR_EACH_SAFE (s, next, node, &remote->sessions) { int error = ovsdb_jsonrpc_session_run(s); if (error) { ovsdb_jsonrpc_session_close(s); } } } static void ovsdb_jsonrpc_session_wait(struct ovsdb_jsonrpc_session *s) { jsonrpc_session_wait(s->js); if (!jsonrpc_session_get_backlog(s->js)) { jsonrpc_session_recv_wait(s->js); } } static void ovsdb_jsonrpc_session_wait_all(struct ovsdb_jsonrpc_remote *remote) { struct ovsdb_jsonrpc_session *s; LIST_FOR_EACH (s, node, &remote->sessions) { ovsdb_jsonrpc_session_wait(s); } } static void ovsdb_jsonrpc_session_get_memory_usage(const struct ovsdb_jsonrpc_session *s, struct simap *usage) { simap_increase(usage, "triggers", hmap_count(&s->triggers)); simap_increase(usage, "monitors", hmap_count(&s->monitors)); simap_increase(usage, "backlog", jsonrpc_session_get_backlog(s->js)); } static void ovsdb_jsonrpc_session_get_memory_usage_all( const struct ovsdb_jsonrpc_remote *remote, struct simap *usage) { struct ovsdb_jsonrpc_session *s; LIST_FOR_EACH (s, node, &remote->sessions) { ovsdb_jsonrpc_session_get_memory_usage(s, usage); } } static void ovsdb_jsonrpc_session_close_all(struct ovsdb_jsonrpc_remote *remote) { struct ovsdb_jsonrpc_session *s, *next; LIST_FOR_EACH_SAFE (s, next, node, &remote->sessions) { ovsdb_jsonrpc_session_close(s); } } /* Forces all of the JSON-RPC sessions managed by 'remote' to disconnect and * reconnect. */ static void ovsdb_jsonrpc_session_reconnect_all(struct ovsdb_jsonrpc_remote *remote) { struct ovsdb_jsonrpc_session *s, *next; LIST_FOR_EACH_SAFE (s, next, node, &remote->sessions) { jsonrpc_session_force_reconnect(s->js); if (!jsonrpc_session_is_alive(s->js)) { ovsdb_jsonrpc_session_close(s); } } } /* Sets the options for all of the JSON-RPC sessions managed by 'remote' to * 'options'. */ static void ovsdb_jsonrpc_session_set_all_options( struct ovsdb_jsonrpc_remote *remote, const struct ovsdb_jsonrpc_options *options) { struct ovsdb_jsonrpc_session *s; if (remote->listener) { int error; error = pstream_set_dscp(remote->listener, options->dscp); if (error) { VLOG_ERR("%s: set_dscp failed %s", pstream_get_name(remote->listener), ovs_strerror(error)); } else { remote->dscp = options->dscp; } /* * XXX race window between setting dscp to listening socket * and accepting socket. Accepted socket may have old dscp value. * Ignore this race window for now. */ } LIST_FOR_EACH (s, node, &remote->sessions) { ovsdb_jsonrpc_session_set_options(s, options); } } static bool ovsdb_jsonrpc_session_get_status(const struct ovsdb_jsonrpc_remote *remote, struct ovsdb_jsonrpc_remote_status *status) { const struct ovsdb_jsonrpc_session *s; const struct jsonrpc_session *js; struct ovsdb_lock_waiter *waiter; struct reconnect_stats rstats; struct ds locks_held, locks_waiting, locks_lost; status->bound_port = (remote->listener ? pstream_get_bound_port(remote->listener) : htons(0)); if (list_is_empty(&remote->sessions)) { return false; } s = CONTAINER_OF(remote->sessions.next, struct ovsdb_jsonrpc_session, node); js = s->js; status->is_connected = jsonrpc_session_is_connected(js); status->last_error = jsonrpc_session_get_status(js); jsonrpc_session_get_reconnect_stats(js, &rstats); status->state = rstats.state; status->sec_since_connect = rstats.msec_since_connect == UINT_MAX ? UINT_MAX : rstats.msec_since_connect / 1000; status->sec_since_disconnect = rstats.msec_since_disconnect == UINT_MAX ? UINT_MAX : rstats.msec_since_disconnect / 1000; ds_init(&locks_held); ds_init(&locks_waiting); ds_init(&locks_lost); HMAP_FOR_EACH (waiter, session_node, &s->up.waiters) { struct ds *string; string = (ovsdb_lock_waiter_is_owner(waiter) ? &locks_held : waiter->mode == OVSDB_LOCK_WAIT ? &locks_waiting : &locks_lost); if (string->length) { ds_put_char(string, ' '); } ds_put_cstr(string, waiter->lock_name); } status->locks_held = ds_steal_cstr(&locks_held); status->locks_waiting = ds_steal_cstr(&locks_waiting); status->locks_lost = ds_steal_cstr(&locks_lost); status->n_connections = list_size(&remote->sessions); return true; } /* Examines 'request' to determine the database to which it relates, and then * searches 's' to find that database: * * - If successful, returns the database and sets '*replyp' to NULL. * * - If no such database exists, returns NULL and sets '*replyp' to an * appropriate JSON-RPC error reply, owned by the caller. */ static struct ovsdb * ovsdb_jsonrpc_lookup_db(const struct ovsdb_jsonrpc_session *s, const struct jsonrpc_msg *request, struct jsonrpc_msg **replyp) { struct json_array *params; struct ovsdb_error *error; const char *db_name; struct ovsdb *db; params = json_array(request->params); if (!params->n || params->elems[0]->type != JSON_STRING) { error = ovsdb_syntax_error( request->params, NULL, "%s request params must begin with ", request->method); goto error; } db_name = params->elems[0]->u.string; db = shash_find_data(&s->up.server->dbs, db_name); if (!db) { error = ovsdb_syntax_error( request->params, "unknown database", "%s request specifies unknown database %s", request->method, db_name); goto error; } *replyp = NULL; return db; error: *replyp = jsonrpc_create_reply(ovsdb_error_to_json(error), request->id); ovsdb_error_destroy(error); return NULL; } static struct ovsdb_error * ovsdb_jsonrpc_session_parse_lock_name(const struct jsonrpc_msg *request, const char **lock_namep) { const struct json_array *params; params = json_array(request->params); if (params->n != 1 || params->elems[0]->type != JSON_STRING || !ovsdb_parser_is_id(json_string(params->elems[0]))) { *lock_namep = NULL; return ovsdb_syntax_error(request->params, NULL, "%s request params must be ", request->method); } *lock_namep = json_string(params->elems[0]); return NULL; } static void ovsdb_jsonrpc_session_notify(struct ovsdb_session *session, const char *lock_name, const char *method) { struct ovsdb_jsonrpc_session *s; struct json *params; s = CONTAINER_OF(session, struct ovsdb_jsonrpc_session, up); params = json_array_create_1(json_string_create(lock_name)); jsonrpc_session_send(s->js, jsonrpc_create_notify(method, params)); } static struct jsonrpc_msg * ovsdb_jsonrpc_session_lock(struct ovsdb_jsonrpc_session *s, struct jsonrpc_msg *request, enum ovsdb_lock_mode mode) { struct ovsdb_lock_waiter *waiter; struct jsonrpc_msg *reply; struct ovsdb_error *error; struct ovsdb_session *victim; const char *lock_name; struct json *result; error = ovsdb_jsonrpc_session_parse_lock_name(request, &lock_name); if (error) { goto error; } /* Report error if this session has issued a "lock" or "steal" without a * matching "unlock" for this lock. */ waiter = ovsdb_session_get_lock_waiter(&s->up, lock_name); if (waiter) { error = ovsdb_syntax_error( request->params, NULL, "must issue \"unlock\" before new \"%s\"", request->method); goto error; } /* Get the lock, add us as a waiter. */ waiter = ovsdb_server_lock(&s->remote->server->up, &s->up, lock_name, mode, &victim); if (victim) { ovsdb_jsonrpc_session_notify(victim, lock_name, "stolen"); } result = json_object_create(); json_object_put(result, "locked", json_boolean_create(ovsdb_lock_waiter_is_owner(waiter))); return jsonrpc_create_reply(result, request->id); error: reply = jsonrpc_create_reply(ovsdb_error_to_json(error), request->id); ovsdb_error_destroy(error); return reply; } static void ovsdb_jsonrpc_session_unlock_all(struct ovsdb_jsonrpc_session *s) { struct ovsdb_lock_waiter *waiter, *next; HMAP_FOR_EACH_SAFE (waiter, next, session_node, &s->up.waiters) { ovsdb_jsonrpc_session_unlock__(waiter); } } static void ovsdb_jsonrpc_session_unlock__(struct ovsdb_lock_waiter *waiter) { struct ovsdb_lock *lock = waiter->lock; if (lock) { struct ovsdb_session *new_owner = ovsdb_lock_waiter_remove(waiter); if (new_owner) { ovsdb_jsonrpc_session_notify(new_owner, lock->name, "locked"); } else { /* ovsdb_server_lock() might have freed 'lock'. */ } } ovsdb_lock_waiter_destroy(waiter); } static struct jsonrpc_msg * ovsdb_jsonrpc_session_unlock(struct ovsdb_jsonrpc_session *s, struct jsonrpc_msg *request) { struct ovsdb_lock_waiter *waiter; struct jsonrpc_msg *reply; struct ovsdb_error *error; const char *lock_name; error = ovsdb_jsonrpc_session_parse_lock_name(request, &lock_name); if (error) { goto error; } /* Report error if this session has not issued a "lock" or "steal" for this * lock. */ waiter = ovsdb_session_get_lock_waiter(&s->up, lock_name); if (!waiter) { error = ovsdb_syntax_error( request->params, NULL, "\"unlock\" without \"lock\" or \"steal\""); goto error; } ovsdb_jsonrpc_session_unlock__(waiter); return jsonrpc_create_reply(json_object_create(), request->id); error: reply = jsonrpc_create_reply(ovsdb_error_to_json(error), request->id); ovsdb_error_destroy(error); return reply; } static struct jsonrpc_msg * execute_transaction(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, struct jsonrpc_msg *request) { ovsdb_jsonrpc_trigger_create(s, db, request->id, request->params); request->id = NULL; request->params = NULL; jsonrpc_msg_destroy(request); return NULL; } static void ovsdb_jsonrpc_session_got_request(struct ovsdb_jsonrpc_session *s, struct jsonrpc_msg *request) { struct jsonrpc_msg *reply; if (!strcmp(request->method, "transact")) { struct ovsdb *db = ovsdb_jsonrpc_lookup_db(s, request, &reply); if (!reply) { reply = execute_transaction(s, db, request); } } else if (!strcmp(request->method, "monitor")) { struct ovsdb *db = ovsdb_jsonrpc_lookup_db(s, request, &reply); if (!reply) { reply = jsonrpc_create_reply( ovsdb_jsonrpc_monitor_create(s, db, request->params), request->id); } } else if (!strcmp(request->method, "monitor_cancel")) { reply = ovsdb_jsonrpc_monitor_cancel(s, json_array(request->params), request->id); } else if (!strcmp(request->method, "get_schema")) { struct ovsdb *db = ovsdb_jsonrpc_lookup_db(s, request, &reply); if (!reply) { reply = jsonrpc_create_reply(ovsdb_schema_to_json(db->schema), request->id); } } else if (!strcmp(request->method, "list_dbs")) { size_t n_dbs = shash_count(&s->up.server->dbs); struct shash_node *node; struct json **dbs; size_t i; dbs = xmalloc(n_dbs * sizeof *dbs); i = 0; SHASH_FOR_EACH (node, &s->up.server->dbs) { dbs[i++] = json_string_create(node->name); } reply = jsonrpc_create_reply(json_array_create(dbs, n_dbs), request->id); } else if (!strcmp(request->method, "lock")) { reply = ovsdb_jsonrpc_session_lock(s, request, OVSDB_LOCK_WAIT); } else if (!strcmp(request->method, "steal")) { reply = ovsdb_jsonrpc_session_lock(s, request, OVSDB_LOCK_STEAL); } else if (!strcmp(request->method, "unlock")) { reply = ovsdb_jsonrpc_session_unlock(s, request); } else if (!strcmp(request->method, "echo")) { reply = jsonrpc_create_reply(json_clone(request->params), request->id); } else { reply = jsonrpc_create_error(json_string_create("unknown method"), request->id); } if (reply) { jsonrpc_msg_destroy(request); jsonrpc_session_send(s->js, reply); } } static void execute_cancel(struct ovsdb_jsonrpc_session *s, struct jsonrpc_msg *request) { if (json_array(request->params)->n == 1) { struct ovsdb_jsonrpc_trigger *t; struct json *id; id = request->params->u.array.elems[0]; t = ovsdb_jsonrpc_trigger_find(s, id, json_hash(id, 0)); if (t) { ovsdb_jsonrpc_trigger_complete(t); } } } static void ovsdb_jsonrpc_session_got_notify(struct ovsdb_jsonrpc_session *s, struct jsonrpc_msg *request) { if (!strcmp(request->method, "cancel")) { execute_cancel(s, request); } jsonrpc_msg_destroy(request); } /* JSON-RPC database server triggers. * * (Every transaction is treated as a trigger even if it doesn't actually have * any "wait" operations.) */ struct ovsdb_jsonrpc_trigger { struct ovsdb_trigger trigger; struct hmap_node hmap_node; /* In session's "triggers" hmap. */ struct json *id; }; static void ovsdb_jsonrpc_trigger_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, struct json *id, struct json *params) { struct ovsdb_jsonrpc_trigger *t; size_t hash; /* Check for duplicate ID. */ hash = json_hash(id, 0); t = ovsdb_jsonrpc_trigger_find(s, id, hash); if (t) { struct jsonrpc_msg *msg; msg = jsonrpc_create_error(json_string_create("duplicate request ID"), id); jsonrpc_session_send(s->js, msg); json_destroy(id); json_destroy(params); return; } /* Insert into trigger table. */ t = xmalloc(sizeof *t); ovsdb_trigger_init(&s->up, db, &t->trigger, params, time_msec()); t->id = id; hmap_insert(&s->triggers, &t->hmap_node, hash); /* Complete early if possible. */ if (ovsdb_trigger_is_complete(&t->trigger)) { ovsdb_jsonrpc_trigger_complete(t); } } static struct ovsdb_jsonrpc_trigger * ovsdb_jsonrpc_trigger_find(struct ovsdb_jsonrpc_session *s, const struct json *id, size_t hash) { struct ovsdb_jsonrpc_trigger *t; HMAP_FOR_EACH_WITH_HASH (t, hmap_node, hash, &s->triggers) { if (json_equal(t->id, id)) { return t; } } return NULL; } static void ovsdb_jsonrpc_trigger_complete(struct ovsdb_jsonrpc_trigger *t) { struct ovsdb_jsonrpc_session *s; s = CONTAINER_OF(t->trigger.session, struct ovsdb_jsonrpc_session, up); if (jsonrpc_session_is_connected(s->js)) { struct jsonrpc_msg *reply; struct json *result; result = ovsdb_trigger_steal_result(&t->trigger); if (result) { reply = jsonrpc_create_reply(result, t->id); } else { reply = jsonrpc_create_error(json_string_create("canceled"), t->id); } jsonrpc_session_send(s->js, reply); } json_destroy(t->id); ovsdb_trigger_destroy(&t->trigger); hmap_remove(&s->triggers, &t->hmap_node); free(t); } static void ovsdb_jsonrpc_trigger_complete_all(struct ovsdb_jsonrpc_session *s) { struct ovsdb_jsonrpc_trigger *t, *next; HMAP_FOR_EACH_SAFE (t, next, hmap_node, &s->triggers) { ovsdb_jsonrpc_trigger_complete(t); } } static void ovsdb_jsonrpc_trigger_complete_done(struct ovsdb_jsonrpc_session *s) { while (!list_is_empty(&s->up.completions)) { struct ovsdb_jsonrpc_trigger *t = CONTAINER_OF(s->up.completions.next, struct ovsdb_jsonrpc_trigger, trigger.node); ovsdb_jsonrpc_trigger_complete(t); } } /* JSON-RPC database table monitors. */ enum ovsdb_jsonrpc_monitor_selection { OJMS_INITIAL = 1 << 0, /* All rows when monitor is created. */ OJMS_INSERT = 1 << 1, /* New rows. */ OJMS_DELETE = 1 << 2, /* Deleted rows. */ OJMS_MODIFY = 1 << 3 /* Modified rows. */ }; /* A particular column being monitored. */ struct ovsdb_jsonrpc_monitor_column { const struct ovsdb_column *column; enum ovsdb_jsonrpc_monitor_selection select; }; /* A particular table being monitored. */ struct ovsdb_jsonrpc_monitor_table { const struct ovsdb_table *table; /* This is the union (bitwise-OR) of the 'select' values in all of the * members of 'columns' below. */ enum ovsdb_jsonrpc_monitor_selection select; /* Columns being monitored. */ struct ovsdb_jsonrpc_monitor_column *columns; size_t n_columns; }; /* A collection of tables being monitored. */ struct ovsdb_jsonrpc_monitor { struct ovsdb_replica replica; struct ovsdb_jsonrpc_session *session; struct ovsdb *db; struct hmap_node node; /* In ovsdb_jsonrpc_session's "monitors". */ struct json *monitor_id; struct shash tables; /* Holds "struct ovsdb_jsonrpc_monitor_table"s. */ }; static const struct ovsdb_replica_class ovsdb_jsonrpc_replica_class; struct ovsdb_jsonrpc_monitor *ovsdb_jsonrpc_monitor_find( struct ovsdb_jsonrpc_session *, const struct json *monitor_id); static void ovsdb_jsonrpc_monitor_destroy(struct ovsdb_replica *); static struct json *ovsdb_jsonrpc_monitor_get_initial( const struct ovsdb_jsonrpc_monitor *); static size_t ovsdb_jsonrpc_monitor_json_length( const struct ovsdb_jsonrpc_monitor *); static bool parse_bool(struct ovsdb_parser *parser, const char *name, bool default_value) { const struct json *json; json = ovsdb_parser_member(parser, name, OP_BOOLEAN | OP_OPTIONAL); return json ? json_boolean(json) : default_value; } struct ovsdb_jsonrpc_monitor * ovsdb_jsonrpc_monitor_find(struct ovsdb_jsonrpc_session *s, const struct json *monitor_id) { struct ovsdb_jsonrpc_monitor *m; HMAP_FOR_EACH_WITH_HASH (m, node, json_hash(monitor_id, 0), &s->monitors) { if (json_equal(m->monitor_id, monitor_id)) { return m; } } return NULL; } static void ovsdb_jsonrpc_add_monitor_column(struct ovsdb_jsonrpc_monitor_table *mt, const struct ovsdb_column *column, enum ovsdb_jsonrpc_monitor_selection select, size_t *allocated_columns) { struct ovsdb_jsonrpc_monitor_column *c; if (mt->n_columns >= *allocated_columns) { mt->columns = x2nrealloc(mt->columns, allocated_columns, sizeof *mt->columns); } c = &mt->columns[mt->n_columns++]; c->column = column; c->select = select; } static int compare_ovsdb_jsonrpc_monitor_column(const void *a_, const void *b_) { const struct ovsdb_jsonrpc_monitor_column *a = a_; const struct ovsdb_jsonrpc_monitor_column *b = b_; return a->column < b->column ? -1 : a->column > b->column; } static struct ovsdb_error * WARN_UNUSED_RESULT ovsdb_jsonrpc_parse_monitor_request(struct ovsdb_jsonrpc_monitor_table *mt, const struct json *monitor_request, size_t *allocated_columns) { const struct ovsdb_table_schema *ts = mt->table->schema; enum ovsdb_jsonrpc_monitor_selection select; const struct json *columns, *select_json; struct ovsdb_parser parser; struct ovsdb_error *error; ovsdb_parser_init(&parser, monitor_request, "table %s", ts->name); columns = ovsdb_parser_member(&parser, "columns", OP_ARRAY | OP_OPTIONAL); select_json = ovsdb_parser_member(&parser, "select", OP_OBJECT | OP_OPTIONAL); error = ovsdb_parser_finish(&parser); if (error) { return error; } if (select_json) { select = 0; ovsdb_parser_init(&parser, select_json, "table %s select", ts->name); if (parse_bool(&parser, "initial", true)) { select |= OJMS_INITIAL; } if (parse_bool(&parser, "insert", true)) { select |= OJMS_INSERT; } if (parse_bool(&parser, "delete", true)) { select |= OJMS_DELETE; } if (parse_bool(&parser, "modify", true)) { select |= OJMS_MODIFY; } error = ovsdb_parser_finish(&parser); if (error) { return error; } } else { select = OJMS_INITIAL | OJMS_INSERT | OJMS_DELETE | OJMS_MODIFY; } mt->select |= select; if (columns) { size_t i; if (columns->type != JSON_ARRAY) { return ovsdb_syntax_error(columns, NULL, "array of column names expected"); } for (i = 0; i < columns->u.array.n; i++) { const struct ovsdb_column *column; const char *s; if (columns->u.array.elems[i]->type != JSON_STRING) { return ovsdb_syntax_error(columns, NULL, "array of column names expected"); } s = columns->u.array.elems[i]->u.string; column = shash_find_data(&mt->table->schema->columns, s); if (!column) { return ovsdb_syntax_error(columns, NULL, "%s is not a valid " "column name", s); } ovsdb_jsonrpc_add_monitor_column(mt, column, select, allocated_columns); } } else { struct shash_node *node; SHASH_FOR_EACH (node, &ts->columns) { const struct ovsdb_column *column = node->data; if (column->index != OVSDB_COL_UUID) { ovsdb_jsonrpc_add_monitor_column(mt, column, select, allocated_columns); } } } return NULL; } static struct json * ovsdb_jsonrpc_monitor_create(struct ovsdb_jsonrpc_session *s, struct ovsdb *db, struct json *params) { struct ovsdb_jsonrpc_monitor *m = NULL; struct json *monitor_id, *monitor_requests; struct ovsdb_error *error = NULL; struct shash_node *node; struct json *json; if (json_array(params)->n != 3) { error = ovsdb_syntax_error(params, NULL, "invalid parameters"); goto error; } monitor_id = params->u.array.elems[1]; monitor_requests = params->u.array.elems[2]; if (monitor_requests->type != JSON_OBJECT) { error = ovsdb_syntax_error(monitor_requests, NULL, "monitor-requests must be object"); goto error; } if (ovsdb_jsonrpc_monitor_find(s, monitor_id)) { error = ovsdb_syntax_error(monitor_id, NULL, "duplicate monitor ID"); goto error; } m = xzalloc(sizeof *m); ovsdb_replica_init(&m->replica, &ovsdb_jsonrpc_replica_class); ovsdb_add_replica(db, &m->replica); m->session = s; m->db = db; hmap_insert(&s->monitors, &m->node, json_hash(monitor_id, 0)); m->monitor_id = json_clone(monitor_id); shash_init(&m->tables); SHASH_FOR_EACH (node, json_object(monitor_requests)) { const struct ovsdb_table *table; struct ovsdb_jsonrpc_monitor_table *mt; size_t allocated_columns; const struct json *mr_value; size_t i; table = ovsdb_get_table(m->db, node->name); if (!table) { error = ovsdb_syntax_error(NULL, NULL, "no table named %s", node->name); goto error; } mt = xzalloc(sizeof *mt); mt->table = table; shash_add(&m->tables, table->schema->name, mt); /* Parse columns. */ mr_value = node->data; allocated_columns = 0; if (mr_value->type == JSON_ARRAY) { const struct json_array *array = &mr_value->u.array; for (i = 0; i < array->n; i++) { error = ovsdb_jsonrpc_parse_monitor_request( mt, array->elems[i], &allocated_columns); if (error) { goto error; } } } else { error = ovsdb_jsonrpc_parse_monitor_request( mt, mr_value, &allocated_columns); if (error) { goto error; } } /* Check for duplicate columns. */ qsort(mt->columns, mt->n_columns, sizeof *mt->columns, compare_ovsdb_jsonrpc_monitor_column); for (i = 1; i < mt->n_columns; i++) { if (mt->columns[i].column == mt->columns[i - 1].column) { error = ovsdb_syntax_error(mr_value, NULL, "column %s " "mentioned more than once", mt->columns[i].column->name); goto error; } } } return ovsdb_jsonrpc_monitor_get_initial(m); error: if (m) { ovsdb_remove_replica(m->db, &m->replica); } json = ovsdb_error_to_json(error); ovsdb_error_destroy(error); return json; } static struct jsonrpc_msg * ovsdb_jsonrpc_monitor_cancel(struct ovsdb_jsonrpc_session *s, struct json_array *params, const struct json *request_id) { if (params->n != 1) { return jsonrpc_create_error(json_string_create("invalid parameters"), request_id); } else { struct ovsdb_jsonrpc_monitor *m; m = ovsdb_jsonrpc_monitor_find(s, params->elems[0]); if (!m) { return jsonrpc_create_error(json_string_create("unknown monitor"), request_id); } else { ovsdb_remove_replica(m->db, &m->replica); return jsonrpc_create_reply(json_object_create(), request_id); } } } static void ovsdb_jsonrpc_monitor_remove_all(struct ovsdb_jsonrpc_session *s) { struct ovsdb_jsonrpc_monitor *m, *next; HMAP_FOR_EACH_SAFE (m, next, node, &s->monitors) { ovsdb_remove_replica(m->db, &m->replica); } } /* Returns an overestimate of the number of bytes of JSON data required to * report the current contents of the database over all the monitors currently * configured in 's'. */ static size_t ovsdb_jsonrpc_monitor_json_length_all(struct ovsdb_jsonrpc_session *s) { struct ovsdb_jsonrpc_monitor *m; size_t length; length = 0; HMAP_FOR_EACH (m, node, &s->monitors) { length += ovsdb_jsonrpc_monitor_json_length(m); } return length; } static struct ovsdb_jsonrpc_monitor * ovsdb_jsonrpc_monitor_cast(struct ovsdb_replica *replica) { ovs_assert(replica->class == &ovsdb_jsonrpc_replica_class); return CONTAINER_OF(replica, struct ovsdb_jsonrpc_monitor, replica); } struct ovsdb_jsonrpc_monitor_aux { bool initial; /* Sending initial contents of table? */ const struct ovsdb_jsonrpc_monitor *monitor; struct json *json; /* JSON for the whole transaction. */ /* Current table. */ struct ovsdb_jsonrpc_monitor_table *mt; struct json *table_json; /* JSON for table's transaction. */ }; static bool any_reportable_change(const struct ovsdb_jsonrpc_monitor_table *mt, const unsigned long int *changed) { size_t i; for (i = 0; i < mt->n_columns; i++) { const struct ovsdb_jsonrpc_monitor_column *c = &mt->columns[i]; unsigned int idx = c->column->index; if (c->select & OJMS_MODIFY && bitmap_is_set(changed, idx)) { return true; } } return false; } static bool ovsdb_jsonrpc_monitor_change_cb(const struct ovsdb_row *old, const struct ovsdb_row *new, const unsigned long int *changed, void *aux_) { struct ovsdb_jsonrpc_monitor_aux *aux = aux_; const struct ovsdb_jsonrpc_monitor *m = aux->monitor; struct ovsdb_table *table = new ? new->table : old->table; enum ovsdb_jsonrpc_monitor_selection type; struct json *old_json, *new_json; struct json *row_json; char uuid[UUID_LEN + 1]; size_t i; if (!aux->mt || table != aux->mt->table) { aux->mt = shash_find_data(&m->tables, table->schema->name); aux->table_json = NULL; if (!aux->mt) { /* We don't care about rows in this table at all. Tell the caller * to skip it. */ return false; } } type = (aux->initial ? OJMS_INITIAL : !old ? OJMS_INSERT : !new ? OJMS_DELETE : OJMS_MODIFY); if (!(aux->mt->select & type)) { /* We don't care about this type of change (but do want to be called * back for changes to other rows in the same table). */ return true; } if (type == OJMS_MODIFY && !any_reportable_change(aux->mt, changed)) { /* Nothing of interest changed. */ return true; } old_json = new_json = NULL; if (type & (OJMS_DELETE | OJMS_MODIFY)) { old_json = json_object_create(); } if (type & (OJMS_INITIAL | OJMS_INSERT | OJMS_MODIFY)) { new_json = json_object_create(); } for (i = 0; i < aux->mt->n_columns; i++) { const struct ovsdb_jsonrpc_monitor_column *c = &aux->mt->columns[i]; const struct ovsdb_column *column = c->column; unsigned int idx = c->column->index; if (!(type & c->select)) { /* We don't care about this type of change for this particular * column (but we will care about it for some other column). */ continue; } if ((type == OJMS_MODIFY && bitmap_is_set(changed, idx)) || type == OJMS_DELETE) { json_object_put(old_json, column->name, ovsdb_datum_to_json(&old->fields[idx], &column->type)); } if (type & (OJMS_INITIAL | OJMS_INSERT | OJMS_MODIFY)) { json_object_put(new_json, column->name, ovsdb_datum_to_json(&new->fields[idx], &column->type)); } } /* Create JSON object for transaction overall. */ if (!aux->json) { aux->json = json_object_create(); } /* Create JSON object for transaction on this table. */ if (!aux->table_json) { aux->table_json = json_object_create(); json_object_put(aux->json, aux->mt->table->schema->name, aux->table_json); } /* Create JSON object for transaction on this row. */ row_json = json_object_create(); if (old_json) { json_object_put(row_json, "old", old_json); } if (new_json) { json_object_put(row_json, "new", new_json); } /* Add JSON row to JSON table. */ snprintf(uuid, sizeof uuid, UUID_FMT, UUID_ARGS(ovsdb_row_get_uuid(new ? new : old))); json_object_put(aux->table_json, uuid, row_json); return true; } /* Returns an overestimate of the number of bytes of JSON data required to * report the current contents of the database over monitor 'm'. */ static size_t ovsdb_jsonrpc_monitor_json_length(const struct ovsdb_jsonrpc_monitor *m) { const struct shash_node *node; size_t length; /* Top-level overhead of monitor JSON. */ length = 256; SHASH_FOR_EACH (node, &m->tables) { const struct ovsdb_jsonrpc_monitor_table *mt = node->data; const struct ovsdb_table *table = mt->table; const struct ovsdb_row *row; size_t i; /* Per-table JSON overhead: "
":{...}. */ length += strlen(table->schema->name) + 32; /* Per-row JSON overhead: ,"":{"old":{...},"new":{...}} */ length += hmap_count(&table->rows) * (UUID_LEN + 32); /* Per-row, per-column JSON overhead: ,"": */ for (i = 0; i < mt->n_columns; i++) { const struct ovsdb_jsonrpc_monitor_column *c = &mt->columns[i]; const struct ovsdb_column *column = c->column; length += hmap_count(&table->rows) * (8 + strlen(column->name)); } /* Data. */ HMAP_FOR_EACH (row, hmap_node, &table->rows) { for (i = 0; i < mt->n_columns; i++) { const struct ovsdb_jsonrpc_monitor_column *c = &mt->columns[i]; const struct ovsdb_column *column = c->column; length += ovsdb_datum_json_length(&row->fields[column->index], &column->type); } } } return length; } static void ovsdb_jsonrpc_monitor_init_aux(struct ovsdb_jsonrpc_monitor_aux *aux, const struct ovsdb_jsonrpc_monitor *m, bool initial) { aux->initial = initial; aux->monitor = m; aux->json = NULL; aux->mt = NULL; aux->table_json = NULL; } static struct ovsdb_error * ovsdb_jsonrpc_monitor_commit(struct ovsdb_replica *replica, const struct ovsdb_txn *txn, bool durable OVS_UNUSED) { struct ovsdb_jsonrpc_monitor *m = ovsdb_jsonrpc_monitor_cast(replica); struct ovsdb_jsonrpc_monitor_aux aux; ovsdb_jsonrpc_monitor_init_aux(&aux, m, false); ovsdb_txn_for_each_change(txn, ovsdb_jsonrpc_monitor_change_cb, &aux); if (aux.json) { struct jsonrpc_msg *msg; struct json *params; params = json_array_create_2(json_clone(aux.monitor->monitor_id), aux.json); msg = jsonrpc_create_notify("update", params); jsonrpc_session_send(aux.monitor->session->js, msg); } return NULL; } static struct json * ovsdb_jsonrpc_monitor_get_initial(const struct ovsdb_jsonrpc_monitor *m) { struct ovsdb_jsonrpc_monitor_aux aux; struct shash_node *node; ovsdb_jsonrpc_monitor_init_aux(&aux, m, true); SHASH_FOR_EACH (node, &m->tables) { struct ovsdb_jsonrpc_monitor_table *mt = node->data; if (mt->select & OJMS_INITIAL) { struct ovsdb_row *row; HMAP_FOR_EACH (row, hmap_node, &mt->table->rows) { ovsdb_jsonrpc_monitor_change_cb(NULL, row, NULL, &aux); } } } return aux.json ? aux.json : json_object_create(); } static void ovsdb_jsonrpc_monitor_destroy(struct ovsdb_replica *replica) { struct ovsdb_jsonrpc_monitor *m = ovsdb_jsonrpc_monitor_cast(replica); struct shash_node *node; json_destroy(m->monitor_id); SHASH_FOR_EACH (node, &m->tables) { struct ovsdb_jsonrpc_monitor_table *mt = node->data; free(mt->columns); free(mt); } shash_destroy(&m->tables); hmap_remove(&m->session->monitors, &m->node); free(m); } static const struct ovsdb_replica_class ovsdb_jsonrpc_replica_class = { ovsdb_jsonrpc_monitor_commit, ovsdb_jsonrpc_monitor_destroy }; openvswitch-2.0.1+git20140120/ovsdb/jsonrpc-server.h000066400000000000000000000050631226605124000217260ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_JSONRPC_SERVER_H #define OVSDB_JSONRPC_SERVER_H 1 #include #include "openvswitch/types.h" struct ovsdb; struct shash; struct simap; struct ovsdb_jsonrpc_server *ovsdb_jsonrpc_server_create(void); bool ovsdb_jsonrpc_server_add_db(struct ovsdb_jsonrpc_server *, struct ovsdb *); bool ovsdb_jsonrpc_server_remove_db(struct ovsdb_jsonrpc_server *, struct ovsdb *); void ovsdb_jsonrpc_server_destroy(struct ovsdb_jsonrpc_server *); /* Options for a remote. */ struct ovsdb_jsonrpc_options { int max_backoff; /* Maximum reconnection backoff, in msec. */ int probe_interval; /* Max idle time before probing, in msec. */ int dscp; /* Dscp value for manager connections */ }; struct ovsdb_jsonrpc_options * ovsdb_jsonrpc_default_options(const char *target); void ovsdb_jsonrpc_server_set_remotes(struct ovsdb_jsonrpc_server *, const struct shash *); /* Status of a single remote connection. */ struct ovsdb_jsonrpc_remote_status { const char *state; int last_error; unsigned int sec_since_connect; unsigned int sec_since_disconnect; bool is_connected; char *locks_held; char *locks_waiting; char *locks_lost; int n_connections; ovs_be16 bound_port; }; bool ovsdb_jsonrpc_server_get_remote_status( const struct ovsdb_jsonrpc_server *, const char *target, struct ovsdb_jsonrpc_remote_status *); void ovsdb_jsonrpc_server_free_remote_status( struct ovsdb_jsonrpc_remote_status *); void ovsdb_jsonrpc_server_reconnect(struct ovsdb_jsonrpc_server *); void ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *); void ovsdb_jsonrpc_server_wait(struct ovsdb_jsonrpc_server *); void ovsdb_jsonrpc_server_get_memory_usage(const struct ovsdb_jsonrpc_server *, struct simap *usage); #endif /* ovsdb/jsonrpc-server.h */ openvswitch-2.0.1+git20140120/ovsdb/log.c000066400000000000000000000273411226605124000175230ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "log.h" #include #include #include #include #include #include #include "json.h" #include "lockfile.h" #include "ovsdb.h" #include "ovsdb-error.h" #include "sha1.h" #include "socket-util.h" #include "transaction.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ovsdb_log); enum ovsdb_log_mode { OVSDB_LOG_READ, OVSDB_LOG_WRITE }; struct ovsdb_log { off_t prev_offset; off_t offset; char *name; struct lockfile *lockfile; FILE *stream; struct ovsdb_error *read_error; bool write_error; enum ovsdb_log_mode mode; }; /* Attempts to open 'name' with the specified 'open_mode'. On success, stores * the new log into '*filep' and returns NULL; otherwise returns NULL and * stores NULL into '*filep'. * * Whether the file will be locked using lockfile_lock() depends on 'locking': * use true to lock it, false not to lock it, or -1 to lock it only if * 'open_mode' is a mode that allows writing. */ struct ovsdb_error * ovsdb_log_open(const char *name, enum ovsdb_log_open_mode open_mode, int locking, struct ovsdb_log **filep) { struct lockfile *lockfile; struct ovsdb_error *error; struct ovsdb_log *file; struct stat s; FILE *stream; int flags; int fd; *filep = NULL; ovs_assert(locking == -1 || locking == false || locking == true); if (locking < 0) { locking = open_mode != OVSDB_LOG_READ_ONLY; } if (locking) { int retval = lockfile_lock(name, &lockfile); if (retval) { error = ovsdb_io_error(retval, "%s: failed to lock lockfile", name); goto error; } } else { lockfile = NULL; } if (open_mode == OVSDB_LOG_READ_ONLY) { flags = O_RDONLY; } else if (open_mode == OVSDB_LOG_READ_WRITE) { flags = O_RDWR; } else if (open_mode == OVSDB_LOG_CREATE) { if (stat(name, &s) == -1 && errno == ENOENT && lstat(name, &s) == 0 && S_ISLNK(s.st_mode)) { /* 'name' is a dangling symlink. We want to create the file that * the symlink points to, but POSIX says that open() with O_EXCL * must fail with EEXIST if the named file is a symlink. So, we * have to leave off O_EXCL and accept the race. */ flags = O_RDWR | O_CREAT; } else { flags = O_RDWR | O_CREAT | O_EXCL; } } else { NOT_REACHED(); } fd = open(name, flags, 0666); if (fd < 0) { const char *op = open_mode == OVSDB_LOG_CREATE ? "create" : "open"; error = ovsdb_io_error(errno, "%s: %s failed", op, name); goto error_unlock; } if (!fstat(fd, &s) && s.st_size == 0) { /* It's (probably) a new file so fsync() its parent directory to ensure * that its directory entry is committed to disk. */ fsync_parent_dir(name); } stream = fdopen(fd, open_mode == OVSDB_LOG_READ_ONLY ? "rb" : "w+b"); if (!stream) { error = ovsdb_io_error(errno, "%s: fdopen failed", name); goto error_close; } file = xmalloc(sizeof *file); file->name = xstrdup(name); file->lockfile = lockfile; file->stream = stream; file->prev_offset = 0; file->offset = 0; file->read_error = NULL; file->write_error = false; file->mode = OVSDB_LOG_READ; *filep = file; return NULL; error_close: close(fd); error_unlock: lockfile_unlock(lockfile); error: return error; } void ovsdb_log_close(struct ovsdb_log *file) { if (file) { free(file->name); fclose(file->stream); lockfile_unlock(file->lockfile); ovsdb_error_destroy(file->read_error); free(file); } } static const char magic[] = "OVSDB JSON "; static bool parse_header(char *header, unsigned long int *length, uint8_t sha1[SHA1_DIGEST_SIZE]) { char *p; /* 'header' must consist of a magic string... */ if (strncmp(header, magic, strlen(magic))) { return false; } /* ...followed by a length in bytes... */ *length = strtoul(header + strlen(magic), &p, 10); if (!*length || *length == ULONG_MAX || *p != ' ') { return false; } p++; /* ...followed by a SHA-1 hash... */ if (!sha1_from_hex(sha1, p)) { return false; } p += SHA1_HEX_DIGEST_LEN; /* ...and ended by a new-line. */ if (*p != '\n') { return false; } return true; } struct ovsdb_log_read_cbdata { char input[4096]; struct ovsdb_log *file; int error; unsigned long length; }; static struct ovsdb_error * parse_body(struct ovsdb_log *file, off_t offset, unsigned long int length, uint8_t sha1[SHA1_DIGEST_SIZE], struct json **jsonp) { struct json_parser *parser; struct sha1_ctx ctx; sha1_init(&ctx); parser = json_parser_create(JSPF_TRAILER); while (length > 0) { char input[BUFSIZ]; int chunk; chunk = MIN(length, sizeof input); if (fread(input, 1, chunk, file->stream) != chunk) { json_parser_abort(parser); return ovsdb_io_error(ferror(file->stream) ? errno : EOF, "%s: error reading %lu bytes " "starting at offset %lld", file->name, length, (long long int) offset); } sha1_update(&ctx, input, chunk); json_parser_feed(parser, input, chunk); length -= chunk; } sha1_final(&ctx, sha1); *jsonp = json_parser_finish(parser); return NULL; } struct ovsdb_error * ovsdb_log_read(struct ovsdb_log *file, struct json **jsonp) { uint8_t expected_sha1[SHA1_DIGEST_SIZE]; uint8_t actual_sha1[SHA1_DIGEST_SIZE]; struct ovsdb_error *error; off_t data_offset; unsigned long data_length; struct json *json; char header[128]; *jsonp = json = NULL; if (file->read_error) { return ovsdb_error_clone(file->read_error); } else if (file->mode == OVSDB_LOG_WRITE) { return OVSDB_BUG("reading file in write mode"); } if (!fgets(header, sizeof header, file->stream)) { if (feof(file->stream)) { error = NULL; } else { error = ovsdb_io_error(errno, "%s: read failed", file->name); } goto error; } if (!parse_header(header, &data_length, expected_sha1)) { error = ovsdb_syntax_error(NULL, NULL, "%s: parse error at offset " "%lld in header line \"%.*s\"", file->name, (long long int) file->offset, (int) strcspn(header, "\n"), header); goto error; } data_offset = file->offset + strlen(header); error = parse_body(file, data_offset, data_length, actual_sha1, &json); if (error) { goto error; } if (memcmp(expected_sha1, actual_sha1, SHA1_DIGEST_SIZE)) { error = ovsdb_syntax_error(NULL, NULL, "%s: %lu bytes starting at " "offset %lld have SHA-1 hash "SHA1_FMT" " "but should have hash "SHA1_FMT, file->name, data_length, (long long int) data_offset, SHA1_ARGS(actual_sha1), SHA1_ARGS(expected_sha1)); goto error; } if (json->type == JSON_STRING) { error = ovsdb_syntax_error(NULL, NULL, "%s: %lu bytes starting at " "offset %lld are not valid JSON (%s)", file->name, data_length, (long long int) data_offset, json->u.string); goto error; } file->prev_offset = file->offset; file->offset = data_offset + data_length; *jsonp = json; return NULL; error: file->read_error = ovsdb_error_clone(error); json_destroy(json); return error; } /* Causes the log record read by the previous call to ovsdb_log_read() to be * effectively discarded. The next call to ovsdb_log_write() will overwrite * that previously read record. * * Calling this function more than once has no additional effect. * * This function is useful when ovsdb_log_read() successfully reads a record * but that record does not make sense at a higher level (e.g. it specifies an * invalid transaction). */ void ovsdb_log_unread(struct ovsdb_log *file) { ovs_assert(file->mode == OVSDB_LOG_READ); file->offset = file->prev_offset; } struct ovsdb_error * ovsdb_log_write(struct ovsdb_log *file, struct json *json) { uint8_t sha1[SHA1_DIGEST_SIZE]; struct ovsdb_error *error; char *json_string; char header[128]; size_t length; json_string = NULL; if (file->mode == OVSDB_LOG_READ || file->write_error) { file->mode = OVSDB_LOG_WRITE; file->write_error = false; if (fseeko(file->stream, file->offset, SEEK_SET)) { error = ovsdb_io_error(errno, "%s: cannot seek to offset %lld", file->name, (long long int) file->offset); goto error; } if (ftruncate(fileno(file->stream), file->offset)) { error = ovsdb_io_error(errno, "%s: cannot truncate to length %lld", file->name, (long long int) file->offset); goto error; } } if (json->type != JSON_OBJECT && json->type != JSON_ARRAY) { error = OVSDB_BUG("bad JSON type"); goto error; } /* Compose content. Add a new-line (replacing the null terminator) to make * the file easier to read, even though it has no semantic value. */ json_string = json_to_string(json, 0); length = strlen(json_string) + 1; json_string[length - 1] = '\n'; /* Compose header. */ sha1_bytes(json_string, length, sha1); snprintf(header, sizeof header, "%s%zu "SHA1_FMT"\n", magic, length, SHA1_ARGS(sha1)); /* Write. */ if (fwrite(header, strlen(header), 1, file->stream) != 1 || fwrite(json_string, length, 1, file->stream) != 1 || fflush(file->stream)) { error = ovsdb_io_error(errno, "%s: write failed", file->name); /* Remove any partially written data, ignoring errors since there is * nothing further we can do. */ ignore(ftruncate(fileno(file->stream), file->offset)); goto error; } file->offset += strlen(header) + length; free(json_string); return NULL; error: file->write_error = true; free(json_string); return error; } struct ovsdb_error * ovsdb_log_commit(struct ovsdb_log *file) { if (fsync(fileno(file->stream))) { return ovsdb_io_error(errno, "%s: fsync failed", file->name); } return NULL; } /* Returns the current offset into the file backing 'log', in bytes. This * reflects the number of bytes that have been read or written in the file. If * the whole file has been read, this is the file size. */ off_t ovsdb_log_get_offset(const struct ovsdb_log *log) { return log->offset; } openvswitch-2.0.1+git20140120/ovsdb/log.h000066400000000000000000000031301226605124000175160ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_LOG_H #define OVSDB_LOG_H 1 #include #include "compiler.h" struct json; struct ovsdb_log; /* Access mode for opening an OVSDB log. */ enum ovsdb_log_open_mode { OVSDB_LOG_READ_ONLY, /* Open existing file, read-only. */ OVSDB_LOG_READ_WRITE, /* Open existing file, read/write. */ OVSDB_LOG_CREATE /* Create new file, read/write. */ }; struct ovsdb_error *ovsdb_log_open(const char *name, enum ovsdb_log_open_mode, int locking, struct ovsdb_log **) WARN_UNUSED_RESULT; void ovsdb_log_close(struct ovsdb_log *); struct ovsdb_error *ovsdb_log_read(struct ovsdb_log *, struct json **) WARN_UNUSED_RESULT; void ovsdb_log_unread(struct ovsdb_log *); struct ovsdb_error *ovsdb_log_write(struct ovsdb_log *, struct json *) WARN_UNUSED_RESULT; struct ovsdb_error *ovsdb_log_commit(struct ovsdb_log *) WARN_UNUSED_RESULT; off_t ovsdb_log_get_offset(const struct ovsdb_log *); #endif /* ovsdb/log.h */ openvswitch-2.0.1+git20140120/ovsdb/mutation.c000066400000000000000000000337761226605124000206130ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "mutation.h" #include #include #include "column.h" #include "ovsdb-error.h" #include "json.h" #include "row.h" #include "table.h" struct ovsdb_error * ovsdb_mutator_from_string(const char *name, enum ovsdb_mutator *mutator) { #define OVSDB_MUTATOR(ENUM, NAME) \ if (!strcmp(name, NAME)) { \ *mutator = ENUM; \ return NULL; \ } OVSDB_MUTATORS; #undef OVSDB_MUTATOR return ovsdb_syntax_error(NULL, "unknown mutator", "No mutator named %s.", name); } const char * ovsdb_mutator_to_string(enum ovsdb_mutator mutator) { switch (mutator) { #define OVSDB_MUTATOR(ENUM, NAME) case ENUM: return NAME; OVSDB_MUTATORS; #undef OVSDB_MUTATOR } return NULL; } static WARN_UNUSED_RESULT struct ovsdb_error * type_mismatch(const struct ovsdb_mutation *m, const struct json *json) { struct ovsdb_error *error; char *s; s = ovsdb_type_to_english(&m->column->type); error = ovsdb_syntax_error( json, NULL, "Type mismatch: \"%s\" operator may not be " "applied to column %s of type %s.", ovsdb_mutator_to_string(m->mutator), m->column->name, s); free(s); return error; } static WARN_UNUSED_RESULT struct ovsdb_error * ovsdb_mutation_from_json(const struct ovsdb_table_schema *ts, const struct json *json, struct ovsdb_symbol_table *symtab, struct ovsdb_mutation *m) { const struct json_array *array; struct ovsdb_error *error; const char *mutator_name; const char *column_name; if (json->type != JSON_ARRAY || json->u.array.n != 3 || json->u.array.elems[0]->type != JSON_STRING || json->u.array.elems[1]->type != JSON_STRING) { return ovsdb_syntax_error(json, NULL, "Parse error in mutation."); } array = json_array(json); column_name = json_string(array->elems[0]); m->column = ovsdb_table_schema_get_column(ts, column_name); if (!m->column) { return ovsdb_syntax_error(json, "unknown column", "No column %s in table %s.", column_name, ts->name); } if (!m->column->mutable) { return ovsdb_syntax_error(json, "constraint violation", "Cannot mutate immutable column %s in " "table %s.", column_name, ts->name); } ovsdb_type_clone(&m->type, &m->column->type); mutator_name = json_string(array->elems[1]); error = ovsdb_mutator_from_string(mutator_name, &m->mutator); if (error) { goto exit; } /* Type-check and relax restrictions on 'type' if appropriate. */ switch (m->mutator) { case OVSDB_M_ADD: case OVSDB_M_SUB: case OVSDB_M_MUL: case OVSDB_M_DIV: case OVSDB_M_MOD: if ((!ovsdb_type_is_scalar(&m->type) && !ovsdb_type_is_set(&m->type)) || (m->type.key.type != OVSDB_TYPE_INTEGER && m->type.key.type != OVSDB_TYPE_REAL) || (m->mutator == OVSDB_M_MOD && m->type.key.type == OVSDB_TYPE_REAL)) { return type_mismatch(m, json); } ovsdb_base_type_clear_constraints(&m->type.key); m->type.n_min = m->type.n_max = 1; error = ovsdb_datum_from_json(&m->arg, &m->type, array->elems[2], symtab); break; case OVSDB_M_INSERT: case OVSDB_M_DELETE: if (!ovsdb_type_is_set(&m->type) && !ovsdb_type_is_map(&m->type)) { return type_mismatch(m, json); } m->type.n_min = 0; if (m->mutator == OVSDB_M_DELETE) { m->type.n_max = UINT_MAX; } error = ovsdb_datum_from_json(&m->arg, &m->type, array->elems[2], symtab); if (error && ovsdb_type_is_map(&m->type) && m->mutator == OVSDB_M_DELETE) { ovsdb_error_destroy(error); m->type.value.type = OVSDB_TYPE_VOID; error = ovsdb_datum_from_json(&m->arg, &m->type, array->elems[2], symtab); } break; default: NOT_REACHED(); } exit: if (error) { ovsdb_type_destroy(&m->type); } return error; } static void ovsdb_mutation_free(struct ovsdb_mutation *m) { ovsdb_datum_destroy(&m->arg, &m->type); ovsdb_type_destroy(&m->type); } struct ovsdb_error * ovsdb_mutation_set_from_json(const struct ovsdb_table_schema *ts, const struct json *json, struct ovsdb_symbol_table *symtab, struct ovsdb_mutation_set *set) { const struct json_array *array = json_array(json); size_t i; set->mutations = xmalloc(array->n * sizeof *set->mutations); set->n_mutations = 0; for (i = 0; i < array->n; i++) { struct ovsdb_error *error; error = ovsdb_mutation_from_json(ts, array->elems[i], symtab, &set->mutations[i]); if (error) { ovsdb_mutation_set_destroy(set); set->mutations = NULL; set->n_mutations = 0; return error; } set->n_mutations++; } return NULL; } static struct json * ovsdb_mutation_to_json(const struct ovsdb_mutation *m) { return json_array_create_3( json_string_create(m->column->name), json_string_create(ovsdb_mutator_to_string(m->mutator)), ovsdb_datum_to_json(&m->arg, &m->type)); } struct json * ovsdb_mutation_set_to_json(const struct ovsdb_mutation_set *set) { struct json **mutations; size_t i; mutations = xmalloc(set->n_mutations * sizeof *mutations); for (i = 0; i < set->n_mutations; i++) { mutations[i] = ovsdb_mutation_to_json(&set->mutations[i]); } return json_array_create(mutations, set->n_mutations); } void ovsdb_mutation_set_destroy(struct ovsdb_mutation_set *set) { size_t i; for (i = 0; i < set->n_mutations; i++) { ovsdb_mutation_free(&set->mutations[i]); } free(set->mutations); } enum ovsdb_mutation_scalar_error { ME_OK, ME_DOM, ME_RANGE }; struct ovsdb_scalar_mutation { int (*mutate_integer)(int64_t *x, int64_t y); int (*mutate_real)(double *x, double y); enum ovsdb_mutator mutator; }; static const struct ovsdb_scalar_mutation add_mutation; static const struct ovsdb_scalar_mutation sub_mutation; static const struct ovsdb_scalar_mutation mul_mutation; static const struct ovsdb_scalar_mutation div_mutation; static const struct ovsdb_scalar_mutation mod_mutation; static struct ovsdb_error * ovsdb_mutation_scalar_error(enum ovsdb_mutation_scalar_error error, enum ovsdb_mutator mutator) { switch (error) { case ME_OK: return OVSDB_BUG("unexpected success"); case ME_DOM: return ovsdb_error("domain error", "Division by zero."); case ME_RANGE: return ovsdb_error("range error", "Result of \"%s\" operation is out of range.", ovsdb_mutator_to_string(mutator)); default: return OVSDB_BUG("unexpected error"); } } static int check_real_range(double x) { return x >= -DBL_MAX && x <= DBL_MAX ? 0 : ME_RANGE; } static struct ovsdb_error * mutate_scalar(const struct ovsdb_type *dst_type, struct ovsdb_datum *dst, const union ovsdb_atom *arg, const struct ovsdb_scalar_mutation *mutation) { const struct ovsdb_base_type *base = &dst_type->key; struct ovsdb_error *error; unsigned int i; if (base->type == OVSDB_TYPE_INTEGER) { int64_t y = arg->integer; for (i = 0; i < dst->n; i++) { enum ovsdb_mutation_scalar_error me; me = (mutation->mutate_integer)(&dst->keys[i].integer, y); if (me != ME_OK) { return ovsdb_mutation_scalar_error(me, mutation->mutator); } } } else if (base->type == OVSDB_TYPE_REAL) { double y = arg->real; for (i = 0; i < dst->n; i++) { double *x = &dst->keys[i].real; enum ovsdb_mutation_scalar_error me; me = (mutation->mutate_real)(x, y); if (me == ME_OK) { me = check_real_range(*x); } if (me != ME_OK) { return ovsdb_mutation_scalar_error(me, mutation->mutator); } } } else { NOT_REACHED(); } for (i = 0; i < dst->n; i++) { error = ovsdb_atom_check_constraints(&dst->keys[i], base); if (error) { return error; } } error = ovsdb_datum_sort(dst, dst_type->key.type); if (error) { ovsdb_error_destroy(error); return ovsdb_error("constraint violation", "Result of \"%s\" operation contains duplicates.", ovsdb_mutator_to_string(mutation->mutator)); } return NULL; } static struct ovsdb_error * ovsdb_mutation_check_count(struct ovsdb_datum *dst, const struct ovsdb_type *dst_type) { if (!ovsdb_datum_conforms_to_type(dst, dst_type)) { char *s = ovsdb_type_to_english(dst_type); struct ovsdb_error *e = ovsdb_error( "constraint violation", "Attempted to store %u elements in %s.", dst->n, s); free(s); return e; } return NULL; } struct ovsdb_error * ovsdb_mutation_set_execute(struct ovsdb_row *row, const struct ovsdb_mutation_set *set) { size_t i; for (i = 0; i < set->n_mutations; i++) { const struct ovsdb_mutation *m = &set->mutations[i]; struct ovsdb_datum *dst = &row->fields[m->column->index]; const struct ovsdb_type *dst_type = &m->column->type; const struct ovsdb_datum *arg = &set->mutations[i].arg; const struct ovsdb_type *arg_type = &m->type; struct ovsdb_error *error; switch (m->mutator) { case OVSDB_M_ADD: error = mutate_scalar(dst_type, dst, &arg->keys[0], &add_mutation); break; case OVSDB_M_SUB: error = mutate_scalar(dst_type, dst, &arg->keys[0], &sub_mutation); break; case OVSDB_M_MUL: error = mutate_scalar(dst_type, dst, &arg->keys[0], &mul_mutation); break; case OVSDB_M_DIV: error = mutate_scalar(dst_type, dst, &arg->keys[0], &div_mutation); break; case OVSDB_M_MOD: error = mutate_scalar(dst_type, dst, &arg->keys[0], &mod_mutation); break; case OVSDB_M_INSERT: ovsdb_datum_union(dst, arg, dst_type, false); error = ovsdb_mutation_check_count(dst, dst_type); break; case OVSDB_M_DELETE: ovsdb_datum_subtract(dst, dst_type, arg, arg_type); error = ovsdb_mutation_check_count(dst, dst_type); break; default: NOT_REACHED(); } if (error) { return error; } } return NULL; } static int add_int(int64_t *x, int64_t y) { /* Check for overflow. See _Hacker's Delight_ pp. 27. */ int64_t z = ~(*x ^ y) & INT64_MIN; if ((~(*x ^ y) & ~(((*x ^ z) + y) ^ y)) >> 63) { return ME_RANGE; } else { *x += y; return 0; } } static int sub_int(int64_t *x, int64_t y) { /* Check for overflow. See _Hacker's Delight_ pp. 27. */ int64_t z = (*x ^ y) & INT64_MIN; if (((*x ^ y) & (((*x ^ z) - y) ^ y)) >> 63) { return ME_RANGE; } else { *x -= y; return 0; } } static int mul_int(int64_t *x, int64_t y) { /* Check for overflow. See _Hacker's Delight_ pp. 30. */ if (*x > 0 ? (y > 0 ? *x >= INT64_MAX / y : y < INT64_MIN / *x) : (y > 0 ? *x < INT64_MIN / y : *x != 0 && y < INT64_MAX / y)) { return ME_RANGE; } else { *x *= y; return 0; } } static int check_int_div(int64_t x, int64_t y) { /* Check for overflow. See _Hacker's Delight_ pp. 32. */ if (!y) { return ME_DOM; } else if (x == INT64_MIN && y == -1) { return ME_RANGE; } else { return 0; } } static int div_int(int64_t *x, int64_t y) { int error = check_int_div(*x, y); if (!error) { *x /= y; } return error; } static int mod_int(int64_t *x, int64_t y) { int error = check_int_div(*x, y); if (!error) { *x %= y; } return error; } static int add_double(double *x, double y) { *x += y; return 0; } static int sub_double(double *x, double y) { *x -= y; return 0; } static int mul_double(double *x, double y) { *x *= y; return 0; } static int div_double(double *x, double y) { if (y == 0) { return ME_DOM; } else { *x /= y; return 0; } } static const struct ovsdb_scalar_mutation add_mutation = { add_int, add_double, OVSDB_M_ADD }; static const struct ovsdb_scalar_mutation sub_mutation = { sub_int, sub_double, OVSDB_M_SUB }; static const struct ovsdb_scalar_mutation mul_mutation = { mul_int, mul_double, OVSDB_M_MUL }; static const struct ovsdb_scalar_mutation div_mutation = { div_int, div_double, OVSDB_M_DIV }; static const struct ovsdb_scalar_mutation mod_mutation = { mod_int, NULL, OVSDB_M_MOD }; openvswitch-2.0.1+git20140120/ovsdb/mutation.h000066400000000000000000000046431226605124000206070ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_MUTATION_H #define OVSDB_MUTATION_H 1 #include #include "compiler.h" #include "ovsdb-data.h" struct json; struct ovsdb_table_schema; struct ovsdb_row; /* These list is ordered in ascending order of the fraction of tables row that * they are (heuristically) expected to leave in query results. */ #define OVSDB_MUTATORS \ OVSDB_MUTATOR(OVSDB_M_ADD, "+=") \ OVSDB_MUTATOR(OVSDB_M_SUB, "-=") \ OVSDB_MUTATOR(OVSDB_M_MUL, "*=") \ OVSDB_MUTATOR(OVSDB_M_DIV, "/=") \ OVSDB_MUTATOR(OVSDB_M_MOD, "%=") \ OVSDB_MUTATOR(OVSDB_M_INSERT, "insert") \ OVSDB_MUTATOR(OVSDB_M_DELETE, "delete") enum ovsdb_mutator { #define OVSDB_MUTATOR(ENUM, NAME) ENUM, OVSDB_MUTATORS #undef OVSDB_MUTATOR }; struct ovsdb_error *ovsdb_mutator_from_string(const char *, enum ovsdb_mutator *) WARN_UNUSED_RESULT; const char *ovsdb_mutator_to_string(enum ovsdb_mutator); struct ovsdb_mutation { enum ovsdb_mutator mutator; const struct ovsdb_column *column; struct ovsdb_datum arg; struct ovsdb_type type; }; struct ovsdb_mutation_set { struct ovsdb_mutation *mutations; size_t n_mutations; }; #define OVSDB_MUTATION_SET_INITIALIZER { NULL, 0 } struct ovsdb_error *ovsdb_mutation_set_from_json( const struct ovsdb_table_schema *, const struct json *, struct ovsdb_symbol_table *, struct ovsdb_mutation_set *) WARN_UNUSED_RESULT; struct json *ovsdb_mutation_set_to_json(const struct ovsdb_mutation_set *); void ovsdb_mutation_set_destroy(struct ovsdb_mutation_set *); struct ovsdb_error *ovsdb_mutation_set_execute( struct ovsdb_row *, const struct ovsdb_mutation_set *) WARN_UNUSED_RESULT; #endif /* ovsdb/mutation.h */ openvswitch-2.0.1+git20140120/ovsdb/ovsdb-client.1.in000066400000000000000000000133131226605124000216500ustar00rootroot00000000000000.\" -*- nroff -*- .de IQ . br . ns . IP "\\$1" .. .\" -*- nroff -*- .TH ovsdb\-client 1 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" .\" This program's name: .ds PN ovsdb\-client .\" SSL peer program's name: .ds SN ovsdb\-server . .SH NAME ovsdb\-client \- command-line interface to \fBovsdb-server\fR(1) . .SH SYNOPSIS \fBovsdb\-client \fR[\fIoptions\fR] \fBlist\-dbs \fR[\fIserver\fR] .br \fBovsdb\-client \fR[\fIoptions\fR] \fBget\-schema \fR[\fIserver\fR] \fR[\fIdatabase\fR] .br \fBovsdb\-client \fR[\fIoptions\fR] \fBget\-schema\-version\fI \fR[\fIserver\fR] \fR[\fIdatabase\fR] .br \fBovsdb\-client \fR[\fIoptions\fR] \fBlist\-tables\fI \fR[\fIserver\fR] \fR[\fIdatabase\fR] .br \fBovsdb\-client \fR[\fIoptions\fR] \fBlist\-columns\fI \fR[\fIserver\fR] \fR[\fIdatabase\fR] [\fItable\fR] .br \fBovsdb\-client \fR[\fIoptions\fR] \fBtransact\fI \fR[\fIserver\fR] \fItransaction\fR .br \fBovsdb\-client \fR[\fIoptions\fR] \fBdump\fI \fR[\fIserver\fR] \fR[\fIdatabase\fR]\fR .br \fBovsdb\-client \fR[\fIoptions\fR] \fBmonitor\fI \fR[\fIserver\fR] \fR[\fIdatabase\fR] \fItable\fR [\fIcolumn\fR[\fB,\fIcolumn\fR]...]... .br \fBovsdb\-client help\fR .IP "Output formatting options:" [\fB\-\-format=\fIformat\fR] [\fB\-\-data=\fIformat\fR] [\fB\-\-no-heading\fR] [\fB\-\-pretty\fR] [\fB\-\-bare\fR] [\fB\-\-no\-heading\fR] [\fB\-\-timestamp\fR] .so lib/daemon-syn.man .so lib/vlog-syn.man .so lib/ssl-syn.man .so lib/ssl-bootstrap-syn.man .so lib/common-syn.man . .SH DESCRIPTION The \fBovsdb\-client\fR program is a command-line client for interacting with a running \fBovsdb\-server\fR process. Each command connects to an OVSDB server, which is \fBunix:@RUNDIR@/db.sock\fR by default, or may be specified as \fIserver\fR in one of the following forms: .RS .so ovsdb/remote-active.man .so ovsdb/remote-passive.man .RE .PP The default \fIdatabase\fR is \fBOpen_vSwitch\fR. . .SS "Commands" The following commands are implemented: .IP "\fBlist\-dbs \fR[\fIserver\fR]" Connects to \fIserver\fR, retrieves the list of known databases, and prints them one per line. These database names are the ones that may be used for \fIdatabase\fR in the following commands. . .IP "\fBget\-schema \fR[\fIserver\fR] \fR[\fIdatabase\fR]" Connects to \fIserver\fR, retrieves the schema for \fIdatabase\fR, and prints it in JSON format. . .IP "\fBget\-schema\-version\fI \fR[\fIserver\fR] \fR[\fIdatabase\fR]" Connects to \fIserver\fR, retrieves the schema for \fIdatabase\fR, and prints its version number on stdout. A schema version number has the form \fIx\fB.\fIy\fB.\fIz\fR. See \fBovs\-vswitchd.conf.db\fR(5) for details. .IP Schema version numbers and Open vSwitch version numbers are independent. .IP If \fIdatabase\fR was created before schema versioning was introduced, then it will not have a version number and this command will print a blank line. . .IP "\fBlist\-tables\fI \fR[\fIserver\fR] \fR[\fIdatabase\fR]" Connects to \fIserver\fR, retrieves the schema for \fIdatabase\fR, and prints a table listing the name of each table within the database. . .IP "\fBlist\-columns\fI \fR[\fIserver\fR] \fR[\fIdatabase\fR] \fItable\fR" Connects to \fIserver\fR, retrieves the schema for \fIdatabase\fR, and prints a table listing the name and type of each column. If \fItable\fR is specified, only columns in that table are listed; otherwise, the tables include columns in all tables. . .IP "\fBtransact\fI \fR[\fIserver\fR] \fItransaction\fR" Connects to \fIserver\fR, sends it the specified \fItransaction\fR, which must be a JSON array containing one or more valid OVSDB operations, and prints the received reply on stdout. . .IP "\fBdump\fI \fR[\fIserver\fR] \fR[\fIdatabase\fR]\fR" Connects to \fIserver\fR, retrieves all of the data in \fIdatabase\fR, and prints it on stdout as a series of tables. . .IP "\fBmonitor\fI \fR[\fIserver\fR] \fR[\fIdatabase\fR] \fItable\fR [\fIcolumn\fR[\fB,\fIcolumn\fR]...]..." Connects to \fIserver\fR and monitors the contents of \fItable\fR in \fIdatabase\fR. By default, the initial contents of \fItable\fR are printed, followed by each change as it occurs. If at least one \fIcolumn\fR is specified, only those columns are monitored. The following \fIcolumn\fR names have special meanings: .RS .IP "\fB!initial\fR" Do not print the initial contents of the specified columns. .IP "\fB!insert\fR" Do not print newly inserted rows. .IP "\fB!delete\fR" Do not print deleted rows. .IP "\fB!modify\fR" Do not print modifications to existing rows. .RE .IP Multiple [\fIcolumn\fR[\fB,\fIcolumn\fR]...] groups may be specified as separate arguments, e.g. to apply different reporting parameters to each group. Whether multiple groups or only a single group is specified, any given column may only be mentioned once on the command line. .IP If \fB\-\-detach\fR is used with \fBmonitor\fR, then \fBovsdb\-client\fR detaches after it has successfully received and printed the initial contents of \fItable\fR. .SH OPTIONS .SS "Output Formatting Options" Much of the output from \fBovsdb\-client\fR is in the form of tables. The following options controlling output formatting: . .ds TD (default) .so lib/table.man . .IP "\fB\-\-timestamp\fR" For the \fBmonitor\fR command, adds a timestamp to each table update. Most output formats add the timestamp on a line of its own just above the table. The JSON output format puts the timestamp in a member of the top-level JSON object named \fBtime\fR. . .SS "Daemon Options" The daemon options apply only to the \fBmonitor\fR command. With any other command, they have no effect. .ds DD .so lib/daemon.man .SS "Logging Options" .so lib/vlog.man .SS "Public Key Infrastructure Options" .so lib/ssl.man .so lib/ssl-bootstrap.man .SS "Other Options" .so lib/common.man .SH "SEE ALSO" . \fBovsdb\-server\fR(1), \fBovsdb\-client\fR(1), and the OVSDB specification. openvswitch-2.0.1+git20140120/ovsdb/ovsdb-client.c000066400000000000000000000753421226605124000213370ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include "command-line.h" #include "column.h" #include "compiler.h" #include "daemon.h" #include "dirs.h" #include "dynamic-string.h" #include "json.h" #include "jsonrpc.h" #include "lib/table.h" #include "ovsdb.h" #include "ovsdb-data.h" #include "ovsdb-error.h" #include "sort.h" #include "svec.h" #include "stream.h" #include "stream-ssl.h" #include "table.h" #include "timeval.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ovsdb_client); enum args_needed { NEED_NONE, /* No JSON-RPC connection or database name needed. */ NEED_RPC, /* JSON-RPC connection needed. */ NEED_DATABASE /* JSON-RPC connection and database name needed. */ }; struct ovsdb_client_command { const char *name; enum args_needed need; int min_args; int max_args; void (*handler)(struct jsonrpc *rpc, const char *database, int argc, char *argv[]); }; /* --timestamp: Print a timestamp before each update on "monitor" command? */ static bool timestamp; /* Format for table output. */ static struct table_style table_style = TABLE_STYLE_DEFAULT; static const struct ovsdb_client_command *get_all_commands(void); static void usage(void) NO_RETURN; static void parse_options(int argc, char *argv[]); static struct jsonrpc *open_jsonrpc(const char *server); static void fetch_dbs(struct jsonrpc *, struct svec *dbs); int main(int argc, char *argv[]) { const struct ovsdb_client_command *command; const char *database; struct jsonrpc *rpc; proctitle_init(argc, argv); set_program_name(argv[0]); parse_options(argc, argv); signal(SIGPIPE, SIG_IGN); if (optind >= argc) { ovs_fatal(0, "missing command name; use --help for help"); } for (command = get_all_commands(); ; command++) { if (!command->name) { VLOG_FATAL("unknown command '%s'; use --help for help", argv[optind]); } else if (!strcmp(command->name, argv[optind])) { break; } } optind++; if (command->need != NEED_NONE) { if (argc - optind > command->min_args && (isalpha((unsigned char) argv[optind][0]) && strchr(argv[optind], ':'))) { rpc = open_jsonrpc(argv[optind++]); } else { char *sock = xasprintf("unix:%s/db.sock", ovs_rundir()); rpc = open_jsonrpc(sock); free(sock); } } else { rpc = NULL; } if (command->need == NEED_DATABASE) { struct svec dbs; svec_init(&dbs); fetch_dbs(rpc, &dbs); if (argc - optind > command->min_args && svec_contains(&dbs, argv[optind])) { database = argv[optind++]; } else if (dbs.n == 1) { database = xstrdup(dbs.names[0]); } else if (svec_contains(&dbs, "Open_vSwitch")) { database = "Open_vSwitch"; } else { ovs_fatal(0, "no default database for `%s' command, please " "specify a database name", command->name); } svec_destroy(&dbs); } else { database = NULL; } if (argc - optind < command->min_args || argc - optind > command->max_args) { VLOG_FATAL("invalid syntax for '%s' (use --help for help)", command->name); } command->handler(rpc, database, argc - optind, argv + optind); jsonrpc_close(rpc); if (ferror(stdout)) { VLOG_FATAL("write to stdout failed"); } if (ferror(stderr)) { VLOG_FATAL("write to stderr failed"); } return 0; } static void parse_options(int argc, char *argv[]) { enum { OPT_BOOTSTRAP_CA_CERT = UCHAR_MAX + 1, OPT_TIMESTAMP, DAEMON_OPTION_ENUMS, TABLE_OPTION_ENUMS }; static const struct option long_options[] = { {"verbose", optional_argument, NULL, 'v'}, {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'V'}, {"timestamp", no_argument, NULL, OPT_TIMESTAMP}, DAEMON_LONG_OPTIONS, #ifdef HAVE_OPENSSL {"bootstrap-ca-cert", required_argument, NULL, OPT_BOOTSTRAP_CA_CERT}, STREAM_SSL_LONG_OPTIONS, #endif TABLE_LONG_OPTIONS, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); for (;;) { int c; c = getopt_long(argc, argv, short_options, long_options, NULL); if (c == -1) { break; } switch (c) { case 'h': usage(); case 'V': ovs_print_version(0, 0); exit(EXIT_SUCCESS); case 'v': vlog_set_verbosity(optarg); break; DAEMON_OPTION_HANDLERS TABLE_OPTION_HANDLERS(&table_style) STREAM_SSL_OPTION_HANDLERS case OPT_BOOTSTRAP_CA_CERT: stream_ssl_set_ca_cert_file(optarg, true); break; case OPT_TIMESTAMP: timestamp = true; break; case '?': exit(EXIT_FAILURE); case 0: /* getopt_long() already set the value for us. */ break; default: abort(); } } free(short_options); } static void usage(void) { printf("%s: Open vSwitch database JSON-RPC client\n" "usage: %s [OPTIONS] COMMAND [ARG...]\n" "\nValid commands are:\n" "\n list-dbs [SERVER]\n" " list databases available on SERVER\n" "\n get-schema [SERVER] [DATABASE]\n" " retrieve schema for DATABASE from SERVER\n" "\n get-schema-version [SERVER] [DATABASE]\n" " retrieve schema for DATABASE from SERVER and report only its\n" " version number on stdout\n" "\n list-tables [SERVER] [DATABASE]\n" " list tables for DATABASE on SERVER\n" "\n list-columns [SERVER] [DATABASE] [TABLE]\n" " list columns in TABLE (or all tables) in DATABASE on SERVER\n" "\n transact [SERVER] TRANSACTION\n" " run TRANSACTION (a JSON array of operations) on SERVER\n" " and print the results as JSON on stdout\n" "\n monitor [SERVER] [DATABASE] TABLE [COLUMN,...]...\n" " monitor contents of COLUMNs in TABLE in DATABASE on SERVER.\n" " COLUMNs may include !initial, !insert, !delete, !modify\n" " to avoid seeing the specified kinds of changes.\n" "\n dump [SERVER] [DATABASE]\n" " dump contents of DATABASE on SERVER to stdout\n" "\nThe default SERVER is unix:%s/db.sock.\n" "The default DATABASE is Open_vSwitch.\n", program_name, program_name, ovs_rundir()); stream_usage("SERVER", true, true, true); printf("\nOutput formatting options:\n" " -f, --format=FORMAT set output formatting to FORMAT\n" " (\"table\", \"html\", \"csv\", " "or \"json\")\n" " --no-headings omit table heading row\n" " --pretty pretty-print JSON in output\n" " --timestamp timestamp \"monitor\" output"); daemon_usage(); vlog_usage(); printf("\nOther options:\n" " -h, --help display this help message\n" " -V, --version display version information\n"); exit(EXIT_SUCCESS); } static void check_txn(int error, struct jsonrpc_msg **reply_) { struct jsonrpc_msg *reply = *reply_; if (error) { ovs_fatal(error, "transaction failed"); } if (reply->error) { ovs_fatal(error, "transaction returned error: %s", json_to_string(reply->error, table_style.json_flags)); } } static struct json * parse_json(const char *s) { struct json *json = json_from_string(s); if (json->type == JSON_STRING) { ovs_fatal(0, "\"%s\": %s", s, json->u.string); } return json; } static struct jsonrpc * open_jsonrpc(const char *server) { struct stream *stream; int error; error = stream_open_block(jsonrpc_stream_open(server, &stream, DSCP_DEFAULT), &stream); if (error == EAFNOSUPPORT) { struct pstream *pstream; error = jsonrpc_pstream_open(server, &pstream, DSCP_DEFAULT); if (error) { ovs_fatal(error, "failed to connect or listen to \"%s\"", server); } VLOG_INFO("%s: waiting for connection...", server); error = pstream_accept_block(pstream, &stream); if (error) { ovs_fatal(error, "failed to accept connection on \"%s\"", server); } pstream_close(pstream); } else if (error) { ovs_fatal(error, "failed to connect to \"%s\"", server); } return jsonrpc_open(stream); } static void print_json(struct json *json) { char *string = json_to_string(json, table_style.json_flags); fputs(string, stdout); free(string); } static void print_and_free_json(struct json *json) { print_json(json); json_destroy(json); } static void check_ovsdb_error(struct ovsdb_error *error) { if (error) { ovs_fatal(0, "%s", ovsdb_error_to_string(error)); } } static struct ovsdb_schema * fetch_schema(struct jsonrpc *rpc, const char *database) { struct jsonrpc_msg *request, *reply; struct ovsdb_schema *schema; request = jsonrpc_create_request("get_schema", json_array_create_1( json_string_create(database)), NULL); check_txn(jsonrpc_transact_block(rpc, request, &reply), &reply); check_ovsdb_error(ovsdb_schema_from_json(reply->result, &schema)); jsonrpc_msg_destroy(reply); return schema; } static void fetch_dbs(struct jsonrpc *rpc, struct svec *dbs) { struct jsonrpc_msg *request, *reply; size_t i; request = jsonrpc_create_request("list_dbs", json_array_create_empty(), NULL); check_txn(jsonrpc_transact_block(rpc, request, &reply), &reply); if (reply->result->type != JSON_ARRAY) { ovs_fatal(0, "list_dbs response is not array"); } for (i = 0; i < reply->result->u.array.n; i++) { const struct json *name = reply->result->u.array.elems[i]; if (name->type != JSON_STRING) { ovs_fatal(0, "list_dbs response %zu is not string", i); } svec_add(dbs, name->u.string); } jsonrpc_msg_destroy(reply); svec_sort(dbs); } static void do_list_dbs(struct jsonrpc *rpc, const char *database OVS_UNUSED, int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { const char *db_name; struct svec dbs; size_t i; svec_init(&dbs); fetch_dbs(rpc, &dbs); SVEC_FOR_EACH (i, db_name, &dbs) { puts(db_name); } svec_destroy(&dbs); } static void do_get_schema(struct jsonrpc *rpc, const char *database, int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { struct ovsdb_schema *schema = fetch_schema(rpc, database); print_and_free_json(ovsdb_schema_to_json(schema)); ovsdb_schema_destroy(schema); } static void do_get_schema_version(struct jsonrpc *rpc, const char *database, int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { struct ovsdb_schema *schema = fetch_schema(rpc, database); puts(schema->version); ovsdb_schema_destroy(schema); } static void do_list_tables(struct jsonrpc *rpc, const char *database, int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { struct ovsdb_schema *schema; struct shash_node *node; struct table t; schema = fetch_schema(rpc, database); table_init(&t); table_add_column(&t, "Table"); SHASH_FOR_EACH (node, &schema->tables) { struct ovsdb_table_schema *ts = node->data; table_add_row(&t); table_add_cell(&t)->text = xstrdup(ts->name); } ovsdb_schema_destroy(schema); table_print(&t, &table_style); } static void do_list_columns(struct jsonrpc *rpc, const char *database, int argc OVS_UNUSED, char *argv[]) { const char *table_name = argv[0]; struct ovsdb_schema *schema; struct shash_node *table_node; struct table t; schema = fetch_schema(rpc, database); table_init(&t); if (!table_name) { table_add_column(&t, "Table"); } table_add_column(&t, "Column"); table_add_column(&t, "Type"); SHASH_FOR_EACH (table_node, &schema->tables) { struct ovsdb_table_schema *ts = table_node->data; if (!table_name || !strcmp(table_name, ts->name)) { struct shash_node *column_node; SHASH_FOR_EACH (column_node, &ts->columns) { const struct ovsdb_column *column = column_node->data; table_add_row(&t); if (!table_name) { table_add_cell(&t)->text = xstrdup(ts->name); } table_add_cell(&t)->text = xstrdup(column->name); table_add_cell(&t)->json = ovsdb_type_to_json(&column->type); } } } ovsdb_schema_destroy(schema); table_print(&t, &table_style); } static void do_transact(struct jsonrpc *rpc, const char *database OVS_UNUSED, int argc OVS_UNUSED, char *argv[]) { struct jsonrpc_msg *request, *reply; struct json *transaction; transaction = parse_json(argv[0]); request = jsonrpc_create_request("transact", transaction, NULL); check_txn(jsonrpc_transact_block(rpc, request, &reply), &reply); print_json(reply->result); putchar('\n'); jsonrpc_msg_destroy(reply); } static void monitor_print_row(struct json *row, const char *type, const char *uuid, const struct ovsdb_column_set *columns, struct table *t) { size_t i; if (!row) { ovs_error(0, "missing %s row", type); return; } else if (row->type != JSON_OBJECT) { ovs_error(0, " is not object"); return; } table_add_row(t); table_add_cell(t)->text = xstrdup(uuid); table_add_cell(t)->text = xstrdup(type); for (i = 0; i < columns->n_columns; i++) { const struct ovsdb_column *column = columns->columns[i]; struct json *value = shash_find_data(json_object(row), column->name); struct cell *cell = table_add_cell(t); if (value) { cell->json = json_clone(value); cell->type = &column->type; } } } static void monitor_print(struct json *table_updates, const struct ovsdb_table_schema *table, const struct ovsdb_column_set *columns, bool initial) { struct json *table_update; struct shash_node *node; struct table t; size_t i; table_init(&t); table_set_timestamp(&t, timestamp); if (table_updates->type != JSON_OBJECT) { ovs_error(0, " is not object"); return; } table_update = shash_find_data(json_object(table_updates), table->name); if (!table_update) { return; } if (table_update->type != JSON_OBJECT) { ovs_error(0, " is not object"); return; } table_add_column(&t, "row"); table_add_column(&t, "action"); for (i = 0; i < columns->n_columns; i++) { table_add_column(&t, "%s", columns->columns[i]->name); } SHASH_FOR_EACH (node, json_object(table_update)) { struct json *row_update = node->data; struct json *old, *new; if (row_update->type != JSON_OBJECT) { ovs_error(0, " is not object"); continue; } old = shash_find_data(json_object(row_update), "old"); new = shash_find_data(json_object(row_update), "new"); if (initial) { monitor_print_row(new, "initial", node->name, columns, &t); } else if (!old) { monitor_print_row(new, "insert", node->name, columns, &t); } else if (!new) { monitor_print_row(old, "delete", node->name, columns, &t); } else { monitor_print_row(old, "old", node->name, columns, &t); monitor_print_row(new, "new", "", columns, &t); } } table_print(&t, &table_style); table_destroy(&t); } static void add_column(const char *server, const struct ovsdb_column *column, struct ovsdb_column_set *columns, struct json *columns_json) { if (ovsdb_column_set_contains(columns, column->index)) { ovs_fatal(0, "%s: column \"%s\" mentioned multiple times", server, column->name); } ovsdb_column_set_add(columns, column); json_array_add(columns_json, json_string_create(column->name)); } static struct json * parse_monitor_columns(char *arg, const char *server, const char *database, const struct ovsdb_table_schema *table, struct ovsdb_column_set *columns) { bool initial, insert, delete, modify; struct json *mr, *columns_json; char *save_ptr = NULL; char *token; mr = json_object_create(); columns_json = json_array_create_empty(); json_object_put(mr, "columns", columns_json); initial = insert = delete = modify = true; for (token = strtok_r(arg, ",", &save_ptr); token != NULL; token = strtok_r(NULL, ",", &save_ptr)) { if (!strcmp(token, "!initial")) { initial = false; } else if (!strcmp(token, "!insert")) { insert = false; } else if (!strcmp(token, "!delete")) { delete = false; } else if (!strcmp(token, "!modify")) { modify = false; } else { const struct ovsdb_column *column; column = ovsdb_table_schema_get_column(table, token); if (!column) { ovs_fatal(0, "%s: table \"%s\" in %s does not have a " "column named \"%s\"", server, table->name, database, token); } add_column(server, column, columns, columns_json); } } if (columns_json->u.array.n == 0) { const struct shash_node **nodes; size_t i, n; n = shash_count(&table->columns); nodes = shash_sort(&table->columns); for (i = 0; i < n; i++) { const struct ovsdb_column *column = nodes[i]->data; if (column->index != OVSDB_COL_UUID && column->index != OVSDB_COL_VERSION) { add_column(server, column, columns, columns_json); } } free(nodes); add_column(server, ovsdb_table_schema_get_column(table,"_version"), columns, columns_json); } if (!initial || !insert || !delete || !modify) { struct json *select = json_object_create(); json_object_put(select, "initial", json_boolean_create(initial)); json_object_put(select, "insert", json_boolean_create(insert)); json_object_put(select, "delete", json_boolean_create(delete)); json_object_put(select, "modify", json_boolean_create(modify)); json_object_put(mr, "select", select); } return mr; } static void do_monitor(struct jsonrpc *rpc, const char *database, int argc, char *argv[]) { const char *server = jsonrpc_get_name(rpc); const char *table_name = argv[0]; struct ovsdb_column_set columns = OVSDB_COLUMN_SET_INITIALIZER; struct ovsdb_table_schema *table; struct ovsdb_schema *schema; struct jsonrpc_msg *request; struct json *monitor, *monitor_request_array, *monitor_requests, *request_id; schema = fetch_schema(rpc, database); table = shash_find_data(&schema->tables, table_name); if (!table) { ovs_fatal(0, "%s: %s does not have a table named \"%s\"", server, database, table_name); } monitor_request_array = json_array_create_empty(); if (argc > 1) { int i; for (i = 1; i < argc; i++) { json_array_add( monitor_request_array, parse_monitor_columns(argv[i], server, database, table, &columns)); } } else { /* Allocate a writable empty string since parse_monitor_columns() is * going to strtok() it and that's risky with literal "". */ char empty[] = ""; json_array_add( monitor_request_array, parse_monitor_columns(empty, server, database, table, &columns)); } monitor_requests = json_object_create(); json_object_put(monitor_requests, table_name, monitor_request_array); monitor = json_array_create_3(json_string_create(database), json_null_create(), monitor_requests); request = jsonrpc_create_request("monitor", monitor, NULL); request_id = json_clone(request->id); jsonrpc_send(rpc, request); for (;;) { struct jsonrpc_msg *msg; int error; error = jsonrpc_recv_block(rpc, &msg); if (error) { ovsdb_schema_destroy(schema); ovs_fatal(error, "%s: receive failed", server); } if (msg->type == JSONRPC_REQUEST && !strcmp(msg->method, "echo")) { jsonrpc_send(rpc, jsonrpc_create_reply(json_clone(msg->params), msg->id)); } else if (msg->type == JSONRPC_REPLY && json_equal(msg->id, request_id)) { monitor_print(msg->result, table, &columns, true); fflush(stdout); if (get_detach()) { daemon_save_fd(STDOUT_FILENO); daemonize(); } } else if (msg->type == JSONRPC_NOTIFY && !strcmp(msg->method, "update")) { struct json *params = msg->params; if (params->type == JSON_ARRAY && params->u.array.n == 2 && params->u.array.elems[0]->type == JSON_NULL) { monitor_print(params->u.array.elems[1], table, &columns, false); fflush(stdout); } } jsonrpc_msg_destroy(msg); } } struct dump_table_aux { struct ovsdb_datum **data; const struct ovsdb_column **columns; size_t n_columns; }; static int compare_data(size_t a_y, size_t b_y, size_t x, const struct dump_table_aux *aux) { return ovsdb_datum_compare_3way(&aux->data[a_y][x], &aux->data[b_y][x], &aux->columns[x]->type); } static int compare_rows(size_t a_y, size_t b_y, void *aux_) { struct dump_table_aux *aux = aux_; size_t x; /* Skip UUID columns on the first pass, since their values tend to be * random and make our results less reproducible. */ for (x = 0; x < aux->n_columns; x++) { if (aux->columns[x]->type.key.type != OVSDB_TYPE_UUID) { int cmp = compare_data(a_y, b_y, x, aux); if (cmp) { return cmp; } } } /* Use UUID columns as tie-breakers. */ for (x = 0; x < aux->n_columns; x++) { if (aux->columns[x]->type.key.type == OVSDB_TYPE_UUID) { int cmp = compare_data(a_y, b_y, x, aux); if (cmp) { return cmp; } } } return 0; } static void swap_rows(size_t a_y, size_t b_y, void *aux_) { struct dump_table_aux *aux = aux_; struct ovsdb_datum *tmp = aux->data[a_y]; aux->data[a_y] = aux->data[b_y]; aux->data[b_y] = tmp; } static int compare_columns(const void *a_, const void *b_) { const struct ovsdb_column *const *ap = a_; const struct ovsdb_column *const *bp = b_; const struct ovsdb_column *a = *ap; const struct ovsdb_column *b = *bp; return strcmp(a->name, b->name); } static void dump_table(const struct ovsdb_table_schema *ts, struct json_array *rows) { const struct ovsdb_column **columns; size_t n_columns; struct ovsdb_datum **data; struct dump_table_aux aux; struct shash_node *node; struct table t; size_t x, y; /* Sort columns by name, for reproducibility. */ columns = xmalloc(shash_count(&ts->columns) * sizeof *columns); n_columns = 0; SHASH_FOR_EACH (node, &ts->columns) { struct ovsdb_column *column = node->data; if (strcmp(column->name, "_version")) { columns[n_columns++] = column; } } qsort(columns, n_columns, sizeof *columns, compare_columns); /* Extract data from table. */ data = xmalloc(rows->n * sizeof *data); for (y = 0; y < rows->n; y++) { struct shash *row; if (rows->elems[y]->type != JSON_OBJECT) { ovs_fatal(0, "row %zu in table %s response is not a JSON object: " "%s", y, ts->name, json_to_string(rows->elems[y], 0)); } row = json_object(rows->elems[y]); data[y] = xmalloc(n_columns * sizeof **data); for (x = 0; x < n_columns; x++) { const struct json *json = shash_find_data(row, columns[x]->name); if (!json) { ovs_fatal(0, "row %zu in table %s response lacks %s column", y, ts->name, columns[x]->name); } check_ovsdb_error(ovsdb_datum_from_json(&data[y][x], &columns[x]->type, json, NULL)); } } /* Sort rows by column values, for reproducibility. */ aux.data = data; aux.columns = columns; aux.n_columns = n_columns; sort(rows->n, compare_rows, swap_rows, &aux); /* Add column headings. */ table_init(&t); table_set_caption(&t, xasprintf("%s table", ts->name)); for (x = 0; x < n_columns; x++) { table_add_column(&t, "%s", columns[x]->name); } /* Print rows. */ for (y = 0; y < rows->n; y++) { table_add_row(&t); for (x = 0; x < n_columns; x++) { struct cell *cell = table_add_cell(&t); cell->json = ovsdb_datum_to_json(&data[y][x], &columns[x]->type); cell->type = &columns[x]->type; ovsdb_datum_destroy(&data[y][x], &columns[x]->type); } free(data[y]); } table_print(&t, &table_style); table_destroy(&t); free(data); free(columns); } static void do_dump(struct jsonrpc *rpc, const char *database, int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { struct jsonrpc_msg *request, *reply; struct ovsdb_schema *schema; struct json *transaction; const struct shash_node **tables; size_t n_tables; size_t i; schema = fetch_schema(rpc, database); tables = shash_sort(&schema->tables); n_tables = shash_count(&schema->tables); /* Construct transaction to retrieve entire database. */ transaction = json_array_create_1(json_string_create(database)); for (i = 0; i < n_tables; i++) { const struct ovsdb_table_schema *ts = tables[i]->data; struct json *op, *columns; struct shash_node *node; columns = json_array_create_empty(); SHASH_FOR_EACH (node, &ts->columns) { const struct ovsdb_column *column = node->data; if (strcmp(column->name, "_version")) { json_array_add(columns, json_string_create(column->name)); } } op = json_object_create(); json_object_put_string(op, "op", "select"); json_object_put_string(op, "table", tables[i]->name); json_object_put(op, "where", json_array_create_empty()); json_object_put(op, "columns", columns); json_array_add(transaction, op); } /* Send request, get reply. */ request = jsonrpc_create_request("transact", transaction, NULL); check_txn(jsonrpc_transact_block(rpc, request, &reply), &reply); /* Print database contents. */ if (reply->result->type != JSON_ARRAY || reply->result->u.array.n != n_tables) { ovs_fatal(0, "reply is not array of %zu elements: %s", n_tables, json_to_string(reply->result, 0)); } for (i = 0; i < n_tables; i++) { const struct ovsdb_table_schema *ts = tables[i]->data; const struct json *op_result = reply->result->u.array.elems[i]; struct json *rows; if (op_result->type != JSON_OBJECT || !(rows = shash_find_data(json_object(op_result), "rows")) || rows->type != JSON_ARRAY) { ovs_fatal(0, "%s table reply is not an object with a \"rows\" " "member array: %s", ts->name, json_to_string(op_result, 0)); } dump_table(ts, &rows->u.array); } jsonrpc_msg_destroy(reply); free(tables); ovsdb_schema_destroy(schema); } static void do_help(struct jsonrpc *rpc OVS_UNUSED, const char *database OVS_UNUSED, int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { usage(); } /* All command handlers (except for "help") are expected to take an optional * server socket name (e.g. "unix:...") as their first argument. The socket * name argument must be included in max_args (but left out of min_args). The * command name and socket name are not included in the arguments passed to the * handler: the argv[0] passed to the handler is the first argument after the * optional server socket name. The connection to the server is available as * global variable 'rpc'. */ static const struct ovsdb_client_command all_commands[] = { { "list-dbs", NEED_RPC, 0, 0, do_list_dbs }, { "get-schema", NEED_DATABASE, 0, 0, do_get_schema }, { "get-schema-version", NEED_DATABASE, 0, 0, do_get_schema_version }, { "list-tables", NEED_DATABASE, 0, 0, do_list_tables }, { "list-columns", NEED_DATABASE, 0, 1, do_list_columns }, { "transact", NEED_RPC, 1, 1, do_transact }, { "monitor", NEED_DATABASE, 1, INT_MAX, do_monitor }, { "dump", NEED_DATABASE, 0, 0, do_dump }, { "help", NEED_NONE, 0, INT_MAX, do_help }, { NULL, 0, 0, 0, NULL }, }; static const struct ovsdb_client_command *get_all_commands(void) { return all_commands; } openvswitch-2.0.1+git20140120/ovsdb/ovsdb-doc000077500000000000000000000342061226605124000204020ustar00rootroot00000000000000#! /usr/bin/python from datetime import date import getopt import os import re import sys import xml.dom.minidom import ovs.json from ovs.db import error import ovs.db.schema argv0 = sys.argv[0] def textToNroff(s, font=r'\fR'): def escape(match): c = match.group(0) if c.startswith('-'): if c != '-' or font == r'\fB': return '\\' + c else: return '-' if c == '\\': return r'\e' elif c == '"': return r'\(dq' elif c == "'": return r'\(cq' else: raise error.Error("bad escape") # Escape - \ " ' as needed by nroff. s = re.sub('(-[0-9]|[-"\'\\\\])', escape, s) if s.startswith('.'): s = '\\' + s return s def escapeNroffLiteral(s): return r'\fB%s\fR' % textToNroff(s, r'\fB') def inlineXmlToNroff(node, font): if node.nodeType == node.TEXT_NODE: return textToNroff(node.data, font) elif node.nodeType == node.ELEMENT_NODE: if node.tagName in ['code', 'em', 'option']: s = r'\fB' for child in node.childNodes: s += inlineXmlToNroff(child, r'\fB') return s + font elif node.tagName == 'ref': s = r'\fB' if node.hasAttribute('column'): s += node.attributes['column'].nodeValue if node.hasAttribute('key'): s += ':' + node.attributes['key'].nodeValue elif node.hasAttribute('table'): s += node.attributes['table'].nodeValue elif node.hasAttribute('group'): s += node.attributes['group'].nodeValue else: raise error.Error("'ref' lacks required attributes: %s" % node.attributes.keys()) return s + font elif node.tagName == 'var': s = r'\fI' for child in node.childNodes: s += inlineXmlToNroff(child, r'\fI') return s + font else: raise error.Error("element <%s> unknown or invalid here" % node.tagName) else: raise error.Error("unknown node %s in inline xml" % node) def blockXmlToNroff(nodes, para='.PP'): s = '' for node in nodes: if node.nodeType == node.TEXT_NODE: s += textToNroff(node.data) s = s.lstrip() elif node.nodeType == node.ELEMENT_NODE: if node.tagName in ['ul', 'ol']: if s != "": s += "\n" s += ".RS\n" i = 0 for liNode in node.childNodes: if (liNode.nodeType == node.ELEMENT_NODE and liNode.tagName == 'li'): i += 1 if node.tagName == 'ul': s += ".IP \\(bu\n" else: s += ".IP %d. .25in\n" % i s += blockXmlToNroff(liNode.childNodes, ".IP") elif (liNode.nodeType != node.TEXT_NODE or not liNode.data.isspace()): raise error.Error("<%s> element may only have
  • children" % node.tagName) s += ".RE\n" elif node.tagName == 'dl': if s != "": s += "\n" s += ".RS\n" prev = "dd" for liNode in node.childNodes: if (liNode.nodeType == node.ELEMENT_NODE and liNode.tagName == 'dt'): if prev == 'dd': s += '.TP\n' else: s += '.TQ\n' prev = 'dt' elif (liNode.nodeType == node.ELEMENT_NODE and liNode.tagName == 'dd'): if prev == 'dd': s += '.IP\n' prev = 'dd' elif (liNode.nodeType != node.TEXT_NODE or not liNode.data.isspace()): raise error.Error("
    element may only have
    and
    children") s += blockXmlToNroff(liNode.childNodes, ".IP") s += ".RE\n" elif node.tagName == 'p': if s != "": if not s.endswith("\n"): s += "\n" s += para + "\n" s += blockXmlToNroff(node.childNodes, para) elif node.tagName in ('h1', 'h2', 'h3'): if s != "": if not s.endswith("\n"): s += "\n" nroffTag = {'h1': 'SH', 'h2': 'SS', 'h3': 'ST'}[node.tagName] s += ".%s " % nroffTag for child_node in node.childNodes: s += inlineXmlToNroff(child_node, r'\fR') s += "\n" else: s += inlineXmlToNroff(node, r'\fR') else: raise error.Error("unknown node %s in block xml" % node) if s != "" and not s.endswith('\n'): s += '\n' return s def typeAndConstraintsToNroff(column): type = column.type.toEnglish(escapeNroffLiteral) constraints = column.type.constraintsToEnglish(escapeNroffLiteral, textToNroff) if constraints: type += ", " + constraints if column.unique: type += " (must be unique within table)" return type def columnGroupToNroff(table, groupXml): introNodes = [] columnNodes = [] for node in groupXml.childNodes: if (node.nodeType == node.ELEMENT_NODE and node.tagName in ('column', 'group')): columnNodes += [node] else: if (columnNodes and not (node.nodeType == node.TEXT_NODE and node.data.isspace())): raise error.Error("text follows or inside : %s" % node) introNodes += [node] summary = [] intro = blockXmlToNroff(introNodes) body = '' for node in columnNodes: if node.tagName == 'column': name = node.attributes['name'].nodeValue column = table.columns[name] if node.hasAttribute('key'): key = node.attributes['key'].nodeValue if node.hasAttribute('type'): type_string = node.attributes['type'].nodeValue type_json = ovs.json.from_string(str(type_string)) if type(type_json) in (str, unicode): raise error.Error("%s %s:%s has invalid 'type': %s" % (table.name, name, key, type_json)) type_ = ovs.db.types.BaseType.from_json(type_json) else: type_ = column.type.value nameNroff = "%s : %s" % (name, key) if column.type.value: typeNroff = "optional %s" % column.type.value.toEnglish( escapeNroffLiteral) if (column.type.value.type == ovs.db.types.StringType and type_.type == ovs.db.types.BooleanType): # This is a little more explicit and helpful than # "containing a boolean" typeNroff += r", either \fBtrue\fR or \fBfalse\fR" else: if type_.type != column.type.value.type: type_english = type_.toEnglish() if type_english[0] in 'aeiou': typeNroff += ", containing an %s" % type_english else: typeNroff += ", containing a %s" % type_english constraints = ( type_.constraintsToEnglish(escapeNroffLiteral, textToNroff)) if constraints: typeNroff += ", %s" % constraints else: typeNroff = "none" else: nameNroff = name typeNroff = typeAndConstraintsToNroff(column) body += '.IP "\\fB%s\\fR: %s"\n' % (nameNroff, typeNroff) body += blockXmlToNroff(node.childNodes, '.IP') + "\n" summary += [('column', nameNroff, typeNroff)] elif node.tagName == 'group': title = node.attributes["title"].nodeValue subSummary, subIntro, subBody = columnGroupToNroff(table, node) summary += [('group', title, subSummary)] body += '.ST "%s:"\n' % textToNroff(title) body += subIntro + subBody else: raise error.Error("unknown element %s in
  • " % node.tagName) return summary, intro, body def tableSummaryToNroff(summary, level=0): s = "" for type, name, arg in summary: if type == 'column': s += ".TQ %.2fin\n\\fB%s\\fR\n%s\n" % (3 - level * .25, name, arg) else: s += ".TQ .25in\n\\fI%s:\\fR\n.RS .25in\n" % name s += tableSummaryToNroff(arg, level + 1) s += ".RE\n" return s def tableToNroff(schema, tableXml): tableName = tableXml.attributes['name'].nodeValue table = schema.tables[tableName] s = """.bp .SH "%s TABLE" """ % tableName summary, intro, body = columnGroupToNroff(table, tableXml) s += intro s += '.SS "Summary:\n' s += tableSummaryToNroff(summary) s += '.SS "Details:\n' s += body return s def docsToNroff(schemaFile, xmlFile, erFile, title=None, version=None): schema = ovs.db.schema.DbSchema.from_json(ovs.json.from_file(schemaFile)) doc = xml.dom.minidom.parse(xmlFile).documentElement schemaDate = os.stat(schemaFile).st_mtime xmlDate = os.stat(xmlFile).st_mtime d = date.fromtimestamp(max(schemaDate, xmlDate)) if title == None: title = schema.name if version == None: version = "UNKNOWN" # Putting '\" p as the first line tells "man" that the manpage # needs to be preprocessed by "pic". s = r''''\" p .TH "%s" 5 "%s" "Open vSwitch" "Open vSwitch Manual" .\" -*- nroff -*- .de TQ . br . ns . TP "\\$1" .. .de ST . PP . RS -0.15in . I "\\$1" . RE .. .SH NAME %s \- %s database schema .PP ''' % (title, version, textToNroff(schema.name), schema.name) tables = "" introNodes = [] tableNodes = [] summary = [] for dbNode in doc.childNodes: if (dbNode.nodeType == dbNode.ELEMENT_NODE and dbNode.tagName == "table"): tableNodes += [dbNode] name = dbNode.attributes['name'].nodeValue if dbNode.hasAttribute("title"): title = dbNode.attributes['title'].nodeValue else: title = name + " configuration." summary += [(name, title)] else: introNodes += [dbNode] s += blockXmlToNroff(introNodes) + "\n" s += r""" .SH "TABLE SUMMARY" .PP The following list summarizes the purpose of each of the tables in the \fB%s\fR database. Each table is described in more detail on a later page. .IP "Table" 1in Purpose """ % schema.name for name, title in summary: s += r""" .TQ 1in \fB%s\fR %s """ % (name, textToNroff(title)) if erFile: s += """ .\\" check if in troff mode (TTY) .if t \{ .bp .SH "TABLE RELATIONSHIPS" .PP The following diagram shows the relationship among tables in the database. Each node represents a table. Tables that are part of the ``root set'' are shown with double borders. Each edge leads from the table that contains it and points to the table that its value represents. Edges are labeled with their column names, followed by a constraint on the number of allowed values: \\fB?\\fR for zero or one, \\fB*\\fR for zero or more, \\fB+\\fR for one or more. Thick lines represent strong references; thin lines represent weak references. .RS -1in """ erStream = open(erFile, "r") for line in erStream: s += line + '\n' erStream.close() s += ".RE\\}\n" for node in tableNodes: s += tableToNroff(schema, node) + "\n" return s def usage(): print """\ %(argv0)s: ovsdb schema documentation generator Prints documentation for an OVSDB schema as an nroff-formatted manpage. usage: %(argv0)s [OPTIONS] SCHEMA XML where SCHEMA is an OVSDB schema in JSON format and XML is OVSDB documentation in XML format. The following options are also available: --er-diagram=DIAGRAM.PIC include E-R diagram from DIAGRAM.PIC --title=TITLE use TITLE as title instead of schema name --version=VERSION use VERSION to display on document footer -h, --help display this help message\ """ % {'argv0': argv0} sys.exit(0) if __name__ == "__main__": try: try: options, args = getopt.gnu_getopt(sys.argv[1:], 'hV', ['er-diagram=', 'title=', 'version=', 'help']) except getopt.GetoptError, geo: sys.stderr.write("%s: %s\n" % (argv0, geo.msg)) sys.exit(1) er_diagram = None title = None version = None for key, value in options: if key == '--er-diagram': er_diagram = value elif key == '--title': title = value elif key == '--version': version = value elif key in ['-h', '--help']: usage() else: sys.exit(0) if len(args) != 2: sys.stderr.write("%s: exactly 2 non-option arguments required " "(use --help for help)\n" % argv0) sys.exit(1) # XXX we should warn about undocumented tables or columns s = docsToNroff(args[0], args[1], er_diagram, title, version) for line in s.split("\n"): line = line.strip() if len(line): print line except error.Error, e: sys.stderr.write("%s: %s\n" % (argv0, e.msg)) sys.exit(1) # Local variables: # mode: python # End: openvswitch-2.0.1+git20140120/ovsdb/ovsdb-dot.in000077500000000000000000000064321226605124000210300ustar00rootroot00000000000000#! @PYTHON@ from datetime import date import ovs.db.error import ovs.db.schema import getopt import os import re import sys argv0 = sys.argv[0] def printEdge(tableName, type, baseType, label): if baseType.ref_table_name: if type.n_min == 0: if type.n_max == 1: arity = "?" elif type.n_max == sys.maxint: arity = "*" else: arity = "{,%d}" % type.n_max elif type.n_min == 1: if type.n_max == 1: arity = "" elif type.n_max == sys.maxint: arity = "+" else: arity = "{1,%d}" % type.n_max options = {} options['label'] = '"%s%s"' % (label, arity) if baseType.ref_type == 'weak': options['constraint'] = 'false' options['style'] = 'dotted' print "\t%s -> %s [%s];" % ( tableName, baseType.ref_table_name, ', '.join(['%s=%s' % (k,v) for k,v in options.items()])) def schemaToDot(schemaFile): schema = ovs.db.schema.DbSchema.from_json(ovs.json.from_file(schemaFile)) print "digraph %s {" % schema.name print '\tsize="6.5,4";' print '\tmargin="0";' print "\tnode [shape=box];" print "\tedge [dir=none, arrowhead=none, arrowtail=none];" for tableName, table in schema.tables.iteritems(): options = {} if table.is_root: options['style'] = 'bold' print "\t%s [%s];" % ( tableName, ', '.join(['%s=%s' % (k,v) for k,v in options.items()])) for columnName, column in table.columns.iteritems(): if column.type.value: printEdge(tableName, column.type, column.type.key, "%s key" % columnName) printEdge(tableName, column.type, column.type.value, "%s value" % columnName) else: printEdge(tableName, column.type, column.type.key, columnName) print "}"; def usage(): print """\ %(argv0)s: compiles ovsdb schemas to graphviz format Prints a .dot file that "dot" can render to an entity-relationship diagram usage: %(argv0)s [OPTIONS] SCHEMA where SCHEMA is an OVSDB schema in JSON format The following options are also available: -h, --help display this help message -V, --version display version information\ """ % {'argv0': argv0} sys.exit(0) if __name__ == "__main__": try: try: options, args = getopt.gnu_getopt(sys.argv[1:], 'hV', ['help', 'version']) except getopt.GetoptError, geo: sys.stderr.write("%s: %s\n" % (argv0, geo.msg)) sys.exit(1) for key, value in options: if key in ['-h', '--help']: usage() elif key in ['-V', '--version']: print "ovsdb-dot (Open vSwitch) @VERSION@" else: sys.exit(0) if len(args) != 1: sys.stderr.write("%s: exactly 1 non-option argument required " "(use --help for help)\n" % argv0) sys.exit(1) schemaToDot(args[0]) except ovs.db.error.Error, e: sys.stderr.write("%s: %s\n" % (argv0, e.msg)) sys.exit(1) # Local variables: # mode: python # End: openvswitch-2.0.1+git20140120/ovsdb/ovsdb-idlc.1000066400000000000000000000051131226605124000206770ustar00rootroot00000000000000.\" -*- nroff -*- .TH ovsdb\-idlc 1 "November 2009" "Open vSwitch" "Open vSwitch Manual" .ds PN ovsdb\-idlc . .SH NAME ovsdb\-idlc \- Open vSwitch IDL (Interface Definition Language) compiler . .SH SYNOPSIS \fBovsdb\-idlc \fBannotate\fI schema annotations\fR .br \fBovsdb\-idlc \fBc\-idl\-header\fI idl\fR .br \fBovsdb\-idlc \fBc\-idl\-source\fI idl\fR .br \fBovsdb\-idlc \-\-help\fR .br \fBovsdb\-idlc \-\-version\fR . .SH DESCRIPTION The \fBovsdb\-idlc\fR program is a command-line tool for translating Open vSwitch database interface definition language (IDL) schemas into other formats. It is used while building Open vSwitch, not at installation or configuration time. Thus, it is not normally installed as part of Open vSwitch. . .PP The \fIidl\fR files used as input for most \fBovsdb\-idlc\fR commands have the same format as the OVSDB schemas, specified in the OVSDB specification, with a few additions: . .IP "\fB""\fBidlPrefix\fR"" member of " This member, which is required, specifies a string that is prefixed to top-level names in C bindings. It should probably end in an underscore. . .IP "\fB""\fBidlHeader\fR"" member of " This member, which is required, specifies the name of the IDL header. It will be output on an \fB#include\fR line in the source file generated by the C bindings. It should include the bracketing \fB""\fR or \fB<>\fR. . .SS "Commands" .IP "\fBannotate\fI schema annotations\fR" Reads \fIschema\fR, which should be a file in JSON format (ordinarily an OVSDB schema file), then reads and executes the Python syntax fragment in \fIannotations\fR. The Python syntax fragment is passed the JSON object as a local variable named \fBs\fR. It may modify this data in any way. After the Python code returns, the object as modified is re-serialized as JSON on standard output. . .IP "\fBc\-idl\-header\fI idl\fR" Reads \fIidl\fR and prints on standard output a C header file that defines a structure for each table defined by the schema. . .IP "\fBc\-idl\-source\fI idl\fR" Reads \fIidl\fR and prints on standard output a C source file that implements C bindings for the database defined by the schema. . .IP "\fBdoc\fI idl\fR" Reads \fIidl\fR and prints on standard output a text file that documents the schema. The output may have very long lines, so it makes sense to pipe it through, e.g. \fBfmt \-s\fR. . .SS "Options" .so lib/common.man . .SH "BUGS" \fBovsdb\-idlc\fR is more lenient about the format of OVSDB schemas than other OVSDB tools. It may successfully parse schemas that, e.g., \fBovsdb\-tool\fR rejects. . .SH "SEE ALSO" The OVSDB specification. openvswitch-2.0.1+git20140120/ovsdb/ovsdb-idlc.in000077500000000000000000000672261226605124000211650ustar00rootroot00000000000000#! @PYTHON@ import getopt import os import re import sys import ovs.json import ovs.db.error import ovs.db.schema argv0 = sys.argv[0] def parseSchema(filename): return ovs.db.schema.IdlSchema.from_json(ovs.json.from_file(filename)) def annotateSchema(schemaFile, annotationFile): schemaJson = ovs.json.from_file(schemaFile) execfile(annotationFile, globals(), {"s": schemaJson}) ovs.json.to_stream(schemaJson, sys.stdout) sys.stdout.write('\n') def constify(cType, const): if (const and cType.endswith('*') and not cType.endswith('**')): return 'const %s' % cType else: return cType def cMembers(prefix, columnName, column, const): type = column.type if type.is_smap(): return [{'name': columnName, 'type': 'struct smap ', 'comment': ''}] if type.n_min == 1 and type.n_max == 1: singleton = True pointer = '' else: singleton = False if type.is_optional_pointer(): pointer = '' else: pointer = '*' if type.value: key = {'name': "key_%s" % columnName, 'type': constify(type.key.toCType(prefix) + pointer, const), 'comment': ''} value = {'name': "value_%s" % columnName, 'type': constify(type.value.toCType(prefix) + pointer, const), 'comment': ''} members = [key, value] else: m = {'name': columnName, 'type': constify(type.key.toCType(prefix) + pointer, const), 'comment': type.cDeclComment()} members = [m] if not singleton and not type.is_optional_pointer(): members.append({'name': 'n_%s' % columnName, 'type': 'size_t ', 'comment': ''}) return members def printCIDLHeader(schemaFile): schema = parseSchema(schemaFile) prefix = schema.idlPrefix print '''\ /* Generated automatically -- do not modify! -*- buffer-read-only: t -*- */ #ifndef %(prefix)sIDL_HEADER #define %(prefix)sIDL_HEADER 1 #include #include #include #include "ovsdb-data.h" #include "ovsdb-idl-provider.h" #include "smap.h" #include "uuid.h"''' % {'prefix': prefix.upper()} for tableName, table in sorted(schema.tables.iteritems()): structName = "%s%s" % (prefix, tableName.lower()) print " " print "/* %s table. */" % tableName print "struct %s {" % structName print "\tstruct ovsdb_idl_row header_;" for columnName, column in sorted(table.columns.iteritems()): print "\n\t/* %s column. */" % columnName for member in cMembers(prefix, columnName, column, False): print "\t%(type)s%(name)s;%(comment)s" % member print "};" # Column indexes. printEnum(["%s_COL_%s" % (structName.upper(), columnName.upper()) for columnName in sorted(table.columns)] + ["%s_N_COLUMNS" % structName.upper()]) print for columnName in table.columns: print "#define %(s)s_col_%(c)s (%(s)s_columns[%(S)s_COL_%(C)s])" % { 's': structName, 'S': structName.upper(), 'c': columnName, 'C': columnName.upper()} print "\nextern struct ovsdb_idl_column %s_columns[%s_N_COLUMNS];" % (structName, structName.upper()) print ''' const struct %(s)s *%(s)s_first(const struct ovsdb_idl *); const struct %(s)s *%(s)s_next(const struct %(s)s *); #define %(S)s_FOR_EACH(ROW, IDL) \\ for ((ROW) = %(s)s_first(IDL); \\ (ROW); \\ (ROW) = %(s)s_next(ROW)) #define %(S)s_FOR_EACH_SAFE(ROW, NEXT, IDL) \\ for ((ROW) = %(s)s_first(IDL); \\ (ROW) ? ((NEXT) = %(s)s_next(ROW), 1) : 0; \\ (ROW) = (NEXT)) void %(s)s_init(struct %(s)s *); void %(s)s_delete(const struct %(s)s *); struct %(s)s *%(s)s_insert(struct ovsdb_idl_txn *); ''' % {'s': structName, 'S': structName.upper()} for columnName, column in sorted(table.columns.iteritems()): print 'void %(s)s_verify_%(c)s(const struct %(s)s *);' % {'s': structName, 'c': columnName} print """ /* Functions for fetching columns as \"struct ovsdb_datum\"s. (This is rarely useful. More often, it is easier to access columns by using the members of %(s)s directly.) */""" % {'s': structName} for columnName, column in sorted(table.columns.iteritems()): if column.type.value: valueParam = ', enum ovsdb_atomic_type value_type' else: valueParam = '' print 'const struct ovsdb_datum *%(s)s_get_%(c)s(const struct %(s)s *, enum ovsdb_atomic_type key_type%(v)s);' % { 's': structName, 'c': columnName, 'v': valueParam} print for columnName, column in sorted(table.columns.iteritems()): print 'void %(s)s_set_%(c)s(const struct %(s)s *,' % {'s': structName, 'c': columnName}, if column.type.is_smap(): args = ['const struct smap *'] else: args = ['%(type)s%(name)s' % member for member in cMembers(prefix, columnName, column, True)] print '%s);' % ', '.join(args) print # Table indexes. printEnum(["%sTABLE_%s" % (prefix.upper(), tableName.upper()) for tableName in sorted(schema.tables)] + ["%sN_TABLES" % prefix.upper()]) print for tableName in schema.tables: print "#define %(p)stable_%(t)s (%(p)stable_classes[%(P)sTABLE_%(T)s])" % { 'p': prefix, 'P': prefix.upper(), 't': tableName.lower(), 'T': tableName.upper()} print "\nextern struct ovsdb_idl_table_class %stable_classes[%sN_TABLES];" % (prefix, prefix.upper()) print "\nextern struct ovsdb_idl_class %sidl_class;" % prefix print "\nvoid %sinit(void);" % prefix print "\n#endif /* %(prefix)sIDL_HEADER */" % {'prefix': prefix.upper()} def printEnum(members): if len(members) == 0: return print "\nenum {"; for member in members[:-1]: print " %s," % member print " %s" % members[-1] print "};" def printCIDLSource(schemaFile): schema = parseSchema(schemaFile) prefix = schema.idlPrefix print '''\ /* Generated automatically -- do not modify! -*- buffer-read-only: t -*- */ #include #include %s #include #include "ovs-thread.h" #include "ovsdb-data.h" #include "ovsdb-error.h" #include "util.h" #ifdef __CHECKER__ /* Sparse dislikes sizeof(bool) ("warning: expression using sizeof bool"). */ enum { sizeof_bool = 1 }; #else enum { sizeof_bool = sizeof(bool) }; #endif static bool inited; ''' % schema.idlHeader # Cast functions. for tableName, table in sorted(schema.tables.iteritems()): structName = "%s%s" % (prefix, tableName.lower()) print ''' static struct %(s)s * %(s)s_cast(const struct ovsdb_idl_row *row) { return row ? CONTAINER_OF(row, struct %(s)s, header_) : NULL; }\ ''' % {'s': structName} for tableName, table in sorted(schema.tables.iteritems()): structName = "%s%s" % (prefix, tableName.lower()) print " " print "/* %s table. */" % (tableName) # Parse functions. for columnName, column in sorted(table.columns.iteritems()): print ''' static void %(s)s_parse_%(c)s(struct ovsdb_idl_row *row_, const struct ovsdb_datum *datum) { struct %(s)s *row = %(s)s_cast(row_);''' % {'s': structName, 'c': columnName} type = column.type if type.value: keyVar = "row->key_%s" % columnName valueVar = "row->value_%s" % columnName else: keyVar = "row->%s" % columnName valueVar = None if type.is_smap(): print " size_t i;" print print " ovs_assert(inited);" print " smap_init(&row->%s);" % columnName print " for (i = 0; i < datum->n; i++) {" print " smap_add(&row->%s," % columnName print " datum->keys[i].string," print " datum->values[i].string);" print " }" elif (type.n_min == 1 and type.n_max == 1) or type.is_optional_pointer(): print print " ovs_assert(inited);" print " if (datum->n >= 1) {" if not type.key.ref_table: print " %s = datum->keys[0].%s;" % (keyVar, type.key.type.to_string()) else: print " %s = %s%s_cast(ovsdb_idl_get_row_arc(row_, &%stable_classes[%sTABLE_%s], &datum->keys[0].uuid));" % (keyVar, prefix, type.key.ref_table.name.lower(), prefix, prefix.upper(), type.key.ref_table.name.upper()) if valueVar: if type.value.ref_table: print " %s = datum->values[0].%s;" % (valueVar, type.value.type.to_string()) else: print " %s = %s%s_cast(ovsdb_idl_get_row_arc(row_, &%stable_classes[%sTABLE_%s], &datum->values[0].uuid));" % (valueVar, prefix, type.value.ref_table.name.lower(), prefix, prefix.upper(), type.value.ref_table.name.upper()) print " } else {" print " %s" % type.key.initCDefault(keyVar, type.n_min == 0) if valueVar: print " %s" % type.value.initCDefault(valueVar, type.n_min == 0) print " }" else: if type.n_max != sys.maxint: print " size_t n = MIN(%d, datum->n);" % type.n_max nMax = "n" else: nMax = "datum->n" print " size_t i;" print print " ovs_assert(inited);" print " %s = NULL;" % keyVar if valueVar: print " %s = NULL;" % valueVar print " row->n_%s = 0;" % columnName print " for (i = 0; i < %s; i++) {" % nMax refs = [] if type.key.ref_table: print " struct %s%s *keyRow = %s%s_cast(ovsdb_idl_get_row_arc(row_, &%stable_classes[%sTABLE_%s], &datum->keys[i].uuid));" % (prefix, type.key.ref_table.name.lower(), prefix, type.key.ref_table.name.lower(), prefix, prefix.upper(), type.key.ref_table.name.upper()) keySrc = "keyRow" refs.append('keyRow') else: keySrc = "datum->keys[i].%s" % type.key.type.to_string() if type.value and type.value.ref_table: print " struct %s%s *valueRow = %s%s_cast(ovsdb_idl_get_row_arc(row_, &%stable_classes[%sTABLE_%s], &datum->values[i].uuid));" % (prefix, type.value.ref_table.name.lower(), prefix, type.value.ref_table.name.lower(), prefix, prefix.upper(), type.value.ref_table.name.upper()) valueSrc = "valueRow" refs.append('valueRow') elif valueVar: valueSrc = "datum->values[i].%s" % type.value.type.to_string() if refs: print " if (%s) {" % ' && '.join(refs) indent = " " else: indent = " " print "%sif (!row->n_%s) {" % (indent, columnName) # Special case for boolean types. This is only here because # sparse does not like the "normal" case ("warning: expression # using sizeof bool"). if type.key.type == ovs.db.types.BooleanType: sizeof = "sizeof_bool" else: sizeof = "sizeof *%s" % keyVar print "%s %s = xmalloc(%s * %s);" % (indent, keyVar, nMax, sizeof) if valueVar: # Special case for boolean types (see above). if type.value.type == ovs.db.types.BooleanType: sizeof = " * sizeof_bool" else: sizeof = "sizeof *%s" % valueVar print "%s %s = xmalloc(%s * %s);" % (indent, valueVar, nMax, sizeof) print "%s}" % indent print "%s%s[row->n_%s] = %s;" % (indent, keyVar, columnName, keySrc) if valueVar: print "%s%s[row->n_%s] = %s;" % (indent, valueVar, columnName, valueSrc) print "%srow->n_%s++;" % (indent, columnName) if refs: print " }" print " }" print "}" # Unparse functions. for columnName, column in sorted(table.columns.iteritems()): type = column.type if type.is_smap() or (type.n_min != 1 or type.n_max != 1) and not type.is_optional_pointer(): print ''' static void %(s)s_unparse_%(c)s(struct ovsdb_idl_row *row_) { struct %(s)s *row = %(s)s_cast(row_); ovs_assert(inited);''' % {'s': structName, 'c': columnName} if type.is_smap(): print " smap_destroy(&row->%s);" % columnName else: if type.value: keyVar = "row->key_%s" % columnName valueVar = "row->value_%s" % columnName else: keyVar = "row->%s" % columnName valueVar = None print " free(%s);" % keyVar if valueVar: print " free(%s);" % valueVar print '}' else: print ''' static void %(s)s_unparse_%(c)s(struct ovsdb_idl_row *row OVS_UNUSED) { /* Nothing to do. */ }''' % {'s': structName, 'c': columnName} # Generic Row Initialization function. print """ static void %(s)s_init__(struct ovsdb_idl_row *row) { %(s)s_init(%(s)s_cast(row)); }""" % {'s': structName} # Row Initialization function. print """ void %(s)s_init(struct %(s)s *row) { memset(row, 0, sizeof *row); """ % {'s': structName} for columnName, column in sorted(table.columns.iteritems()): if column.type.is_smap(): print " smap_init(&row->%s);" % columnName print "}" # First, next functions. print ''' const struct %(s)s * %(s)s_first(const struct ovsdb_idl *idl) { return %(s)s_cast(ovsdb_idl_first_row(idl, &%(p)stable_classes[%(P)sTABLE_%(T)s])); } const struct %(s)s * %(s)s_next(const struct %(s)s *row) { return %(s)s_cast(ovsdb_idl_next_row(&row->header_)); }''' % {'s': structName, 'p': prefix, 'P': prefix.upper(), 'T': tableName.upper()} print ''' void %(s)s_delete(const struct %(s)s *row) { ovsdb_idl_txn_delete(&row->header_); } struct %(s)s * %(s)s_insert(struct ovsdb_idl_txn *txn) { return %(s)s_cast(ovsdb_idl_txn_insert(txn, &%(p)stable_classes[%(P)sTABLE_%(T)s], NULL)); } ''' % {'s': structName, 'p': prefix, 'P': prefix.upper(), 'T': tableName.upper()} # Verify functions. for columnName, column in sorted(table.columns.iteritems()): print ''' void %(s)s_verify_%(c)s(const struct %(s)s *row) { ovs_assert(inited); ovsdb_idl_txn_verify(&row->header_, &%(s)s_columns[%(S)s_COL_%(C)s]); }''' % {'s': structName, 'S': structName.upper(), 'c': columnName, 'C': columnName.upper()} # Get functions. for columnName, column in sorted(table.columns.iteritems()): if column.type.value: valueParam = ',\n\tenum ovsdb_atomic_type value_type OVS_UNUSED' valueType = '\n ovs_assert(value_type == %s);' % column.type.value.toAtomicType() valueComment = "\n * 'value_type' must be %s." % column.type.value.toAtomicType() else: valueParam = '' valueType = '' valueComment = '' print """ /* Returns the %(c)s column's value in 'row' as a struct ovsdb_datum. * This is useful occasionally: for example, ovsdb_datum_find_key() is an * easier and more efficient way to search for a given key than implementing * the same operation on the "cooked" form in 'row'. * * 'key_type' must be %(kt)s.%(vc)s * (This helps to avoid silent bugs if someone changes %(c)s's * type without updating the caller.) * * The caller must not modify or free the returned value. * * Various kinds of changes can invalidate the returned value: modifying * 'column' within 'row', deleting 'row', or completing an ongoing transaction. * If the returned value is needed for a long time, it is best to make a copy * of it with ovsdb_datum_clone(). */ const struct ovsdb_datum * %(s)s_get_%(c)s(const struct %(s)s *row, \tenum ovsdb_atomic_type key_type OVS_UNUSED%(v)s) { ovs_assert(key_type == %(kt)s);%(vt)s return ovsdb_idl_read(&row->header_, &%(s)s_col_%(c)s); }""" % {'s': structName, 'c': columnName, 'kt': column.type.key.toAtomicType(), 'v': valueParam, 'vt': valueType, 'vc': valueComment} # Set functions. for columnName, column in sorted(table.columns.iteritems()): type = column.type if type.is_smap(): print """ void %(s)s_set_%(c)s(const struct %(s)s *row, const struct smap *smap) { struct ovsdb_datum datum; ovs_assert(inited); if (smap) { struct smap_node *node; size_t i; datum.n = smap_count(smap); datum.keys = xmalloc(datum.n * sizeof *datum.keys); datum.values = xmalloc(datum.n * sizeof *datum.values); i = 0; SMAP_FOR_EACH (node, smap) { datum.keys[i].string = xstrdup(node->key); datum.values[i].string = xstrdup(node->value); i++; } ovsdb_datum_sort_unique(&datum, OVSDB_TYPE_STRING, OVSDB_TYPE_STRING); } else { ovsdb_datum_init_empty(&datum); } ovsdb_idl_txn_write(&row->header_, &%(s)s_columns[%(S)s_COL_%(C)s], &datum); } """ % {'s': structName, 'S': structName.upper(), 'c': columnName, 'C': columnName.upper()} continue print '\nvoid' members = cMembers(prefix, columnName, column, True) keyVar = members[0]['name'] nVar = None valueVar = None if type.value: valueVar = members[1]['name'] if len(members) > 2: nVar = members[2]['name'] else: if len(members) > 1: nVar = members[1]['name'] print '%(s)s_set_%(c)s(const struct %(s)s *row, %(args)s)' % \ {'s': structName, 'c': columnName, 'args': ', '.join(['%(type)s%(name)s' % m for m in members])} print "{" print " struct ovsdb_datum datum;" if type.n_min == 1 and type.n_max == 1: print " union ovsdb_atom key;" if type.value: print " union ovsdb_atom value;" print print " ovs_assert(inited);" print " datum.n = 1;" print " datum.keys = &key;" print " " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar) if type.value: print " datum.values = &value;" print " "+ type.value.assign_c_value_casting_away_const("value.%s" % type.value.type.to_string(), valueVar) else: print " datum.values = NULL;" txn_write_func = "ovsdb_idl_txn_write_clone" elif type.is_optional_pointer(): print " union ovsdb_atom key;" print print " ovs_assert(inited);" print " if (%s) {" % keyVar print " datum.n = 1;" print " datum.keys = &key;" print " " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar) print " } else {" print " datum.n = 0;" print " datum.keys = NULL;" print " }" print " datum.values = NULL;" txn_write_func = "ovsdb_idl_txn_write_clone" elif type.n_max == 1: print " union ovsdb_atom key;" print print " ovs_assert(inited);" print " if (%s) {" % nVar print " datum.n = 1;" print " datum.keys = &key;" print " " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), "*" + keyVar) print " } else {" print " datum.n = 0;" print " datum.keys = NULL;" print " }" print " datum.values = NULL;" txn_write_func = "ovsdb_idl_txn_write_clone" else: print " size_t i;" print print " ovs_assert(inited);" print " datum.n = %s;" % nVar print " datum.keys = %s ? xmalloc(%s * sizeof *datum.keys) : NULL;" % (nVar, nVar) if type.value: print " datum.values = xmalloc(%s * sizeof *datum.values);" % nVar else: print " datum.values = NULL;" print " for (i = 0; i < %s; i++) {" % nVar print " " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_string(), "%s[i]" % keyVar) if type.value: print " " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_string(), "%s[i]" % valueVar) print " }" if type.value: valueType = type.value.toAtomicType() else: valueType = "OVSDB_TYPE_VOID" print " ovsdb_datum_sort_unique(&datum, %s, %s);" % ( type.key.toAtomicType(), valueType) txn_write_func = "ovsdb_idl_txn_write" print " %(f)s(&row->header_, &%(s)s_columns[%(S)s_COL_%(C)s], &datum);" \ % {'f': txn_write_func, 's': structName, 'S': structName.upper(), 'C': columnName.upper()} print "}" # Table columns. print "\nstruct ovsdb_idl_column %s_columns[%s_N_COLUMNS];" % ( structName, structName.upper()) print """ static void\n%s_columns_init(void) { struct ovsdb_idl_column *c;\ """ % structName for columnName, column in sorted(table.columns.iteritems()): cs = "%s_col_%s" % (structName, columnName) d = {'cs': cs, 'c': columnName, 's': structName} if column.mutable: mutable = "true" else: mutable = "false" print print " /* Initialize %(cs)s. */" % d print " c = &%(cs)s;" % d print " c->name = \"%(c)s\";" % d print column.type.cInitType(" ", "c->type") print " c->mutable = %s;" % mutable print " c->parse = %(s)s_parse_%(c)s;" % d print " c->unparse = %(s)s_unparse_%(c)s;" % d print "}" # Table classes. print " " print "struct ovsdb_idl_table_class %stable_classes[%sN_TABLES] = {" % (prefix, prefix.upper()) for tableName, table in sorted(schema.tables.iteritems()): structName = "%s%s" % (prefix, tableName.lower()) if table.is_root: is_root = "true" else: is_root = "false" print " {\"%s\", %s," % (tableName, is_root) print " %s_columns, ARRAY_SIZE(%s_columns)," % ( structName, structName) print " sizeof(struct %s), %s_init__}," % (structName, structName) print "};" # IDL class. print "\nstruct ovsdb_idl_class %sidl_class = {" % prefix print " \"%s\", %stable_classes, ARRAY_SIZE(%stable_classes)" % ( schema.name, prefix, prefix) print "};" # global init function print """ void %sinit(void) { if (inited) { return; } assert_single_threaded(); inited = true; """ % prefix for tableName, table in sorted(schema.tables.iteritems()): structName = "%s%s" % (prefix, tableName.lower()) print " %s_columns_init();" % structName print "}" def ovsdb_escape(string): def escape(match): c = match.group(0) if c == '\0': raise ovs.db.error.Error("strings may not contain null bytes") elif c == '\\': return '\\\\' elif c == '\n': return '\\n' elif c == '\r': return '\\r' elif c == '\t': return '\\t' elif c == '\b': return '\\b' elif c == '\a': return '\\a' else: return '\\x%02x' % ord(c) return re.sub(r'["\\\000-\037]', escape, string) def usage(): print """\ %(argv0)s: ovsdb schema compiler usage: %(argv0)s [OPTIONS] COMMAND ARG... The following commands are supported: annotate SCHEMA ANNOTATIONS print SCHEMA combined with ANNOTATIONS c-idl-header IDL print C header file for IDL c-idl-source IDL print C source file for IDL implementation nroff IDL print schema documentation in nroff format The following options are also available: -h, --help display this help message -V, --version display version information\ """ % {'argv0': argv0} sys.exit(0) if __name__ == "__main__": try: try: options, args = getopt.gnu_getopt(sys.argv[1:], 'C:hV', ['directory', 'help', 'version']) except getopt.GetoptError, geo: sys.stderr.write("%s: %s\n" % (argv0, geo.msg)) sys.exit(1) for key, value in options: if key in ['-h', '--help']: usage() elif key in ['-V', '--version']: print "ovsdb-idlc (Open vSwitch) @VERSION@" elif key in ['-C', '--directory']: os.chdir(value) else: sys.exit(0) optKeys = [key for key, value in options] if not args: sys.stderr.write("%s: missing command argument " "(use --help for help)\n" % argv0) sys.exit(1) commands = {"annotate": (annotateSchema, 2), "c-idl-header": (printCIDLHeader, 1), "c-idl-source": (printCIDLSource, 1)} if not args[0] in commands: sys.stderr.write("%s: unknown command \"%s\" " "(use --help for help)\n" % (argv0, args[0])) sys.exit(1) func, n_args = commands[args[0]] if len(args) - 1 != n_args: sys.stderr.write("%s: \"%s\" requires %d arguments but %d " "provided\n" % (argv0, args[0], n_args, len(args) - 1)) sys.exit(1) func(*args[1:]) except ovs.db.error.Error, e: sys.stderr.write("%s: %s\n" % (argv0, e)) sys.exit(1) # Local variables: # mode: python # End: openvswitch-2.0.1+git20140120/ovsdb/ovsdb-server.1.in000066400000000000000000000160511226605124000217020ustar00rootroot00000000000000.\" -*- nroff -*- .de IQ . br . ns . IP "\\$1" .. .TH ovsdb\-server 1 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" .\" This program's name: .ds PN ovsdb\-server .\" SSL peer program's name: .ds SN ovsdb\-client . .SH NAME ovsdb\-server \- Open vSwitch database server . .SH SYNOPSIS \fBovsdb\-server\fR [\fIdatabase\fR]\&... [\fB\-\-remote=\fIremote\fR]\&... [\fB\-\-run=\fIcommand\fR] .so lib/daemon-syn.man .so lib/vlog-syn.man .so lib/ssl-syn.man .so lib/ssl-bootstrap-syn.man .so lib/unixctl-syn.man .so lib/common-syn.man . .SH DESCRIPTION The \fBovsdb\-server\fR program provides RPC interfaces to one or more Open vSwitch databases (OVSDBs). It supports JSON-RPC client connections over active or passive TCP/IP or Unix domain sockets. .PP Each OVSDB file may be specified on the command line as \fIdatabase\fR. If none is specified, the default is \fB@DBDIR@/conf.db\fR. The database files must already have been created and initialized using, for example, \fBovsdb\-tool create\fR. . .SH OPTIONS . .IP "\fB\-\-remote=\fIremote\fR" Adds \fIremote\fR as a connection method used by \fBovsdb\-server\fR. \fIremote\fR must take one of the following forms: . .RS .so ovsdb/remote-passive.man .so ovsdb/remote-active.man . .IP "\fBdb:\fIdb\fB,\fItable\fB,\fIcolumn\fR" Reads additional connection methods from \fIcolumn\fR in all of the rows in \fItable\fR within \fIdb\fR. As the contents of \fIcolumn\fR changes, \fBovsdb\-server\fR also adds and drops connection methods accordingly. .IP If \fIcolumn\fR's type is string or set of strings, then the connection methods are taken directly from the column. The connection methods in the column must have one of the forms described above. .IP If \fIcolumn\fR's type is UUID or set of UUIDs and references a table, then each UUID is looked up in the referenced table to obtain a row. The following columns in the row, if present and of the correct type, configure a connection method. Any additional columns are ignored. .RS .IP "\fBtarget\fR (string)" Connection method, in one of the forms described above. This column is mandatory: if it is missing or empty then no connection method can be configured. .IP "\fBmax_backoff\fR (integer)" Maximum number of milliseconds to wait between connection attempts. .IP "\fBinactivity_probe\fR (integer)" Maximum number of milliseconds of idle time on connection to client before sending an inactivity probe message. .RE .IP It is an error for \fIcolumn\fR to have another type. .RE . .IP "\fB\-\-run=\fIcommand\fR]" Ordinarily \fBovsdb\-server\fR runs forever, or until it is told to exit (see \fBRUNTIME MANAGEMENT COMMANDS\fR below). With this option, \fBovsdb\-server\fR instead starts a shell subprocess running \fIcommand\fR. When the subprocess terminates, \fBovsdb\-server\fR also exits gracefully. If the subprocess exits normally with exit code 0, then \fBovsdb\-server\fR exits with exit code 0 also; otherwise, it exits with exit code 1. .IP This option can be useful where a database server is needed only to run a single command, e.g.: .B "ovsdb\-server \-\-remote=punix:socket \-\-run='ovsdb\-client dump unix:socket Open_vSwitch'" .SS "Daemon Options" .ds DD \ \fBovsdb\-server\fR detaches only after it starts listening on all \ configured remotes. .so lib/daemon.man .SS "Logging Options" .so lib/vlog.man .SS "Public Key Infrastructure Options" The options described below for configuring the SSL public key infrastructure accept a special syntax for obtaining their configuration from the database. If any of these options is given \fBdb:\fIdb\fB,\fItable\fB,\fIcolumn\fR as its argument, then the actual file name is read from the specified \fIcolumn\fR in \fItable\fR within the \fIdb\fR database. The \fIcolumn\fR must have type string or set of strings. The first nonempty string in the table is taken as the file name. (This means that ordinarily there should be at most one row in \fItable\fR.) .so lib/ssl.man .so lib/ssl-bootstrap.man .SS "Other Options" .so lib/unixctl.man .so lib/common.man .SH "RUNTIME MANAGEMENT COMMANDS" \fBovs\-appctl\fR(8) can send commands to a running \fBovsdb\-server\fR process. The currently supported commands are described below. .SS "OVSDB\-SERVER COMMANDS" These commands are specific to \fBovsdb\-server\fR. .IP "\fBexit\fR" Causes \fBovsdb\-server\fR to gracefully terminate. .IP "\fBovsdb\-server/compact\fR [\fIdb\fR]\&..." Compacts each database \fIdb\fR in-place. If no \fIdb\fR is specified, compacts every database in-place. Databases are also automatically compacted occasionally. . .IP "\fBovsdb\-server/reconnect\fR" Makes \fBovsdb\-server\fR drop all of the JSON\-RPC connections to database clients and reconnect. .IP This command might be useful for debugging issues with database clients. . .IP "\fBovsdb\-server/add\-remote \fIremote\fR" Adds a remote, as if \fB\-\-remote=\fIremote\fR had been specified on the \fBovsdb\-server\fR command line. (If \fIremote\fR is already a remote, this command succeeds without changing the configuration.) . .IP "\fBovsdb\-server/remove\-remote \fIremote\fR" Removes the specified \fIremote\fR from the configuration, failing with an error if \fIremote\fR is not configured as a remote. This command only works with remotes that were named on \fB\-\-remote\fR or \fBovsdb\-server/add\-remote\fR, that is, it will not remove remotes added indirectly because they were read from the database by configuring a \fBdb:\fIdb\fB,\fItable\fB,\fIcolumn\fR remote. (You can remove a database source with \fBovsdb\-server/remove\-remote \fBdb:\fIdb\fB,\fItable\fB,\fIcolumn\fR, but not individual remotes found indirectly through the database.) . .IP "\fBovsdb\-server/list\-remotes" Outputs a list of the currently configured remotes named on \fB\-\-remote\fR or \fBovsdb\-server/add\-remote\fR, that is, it does not list remotes added indirectly because they were read from the database by configuring a \fBdb:\fIdb\fB,\fItable\fB,\fIcolumn\fR remote. . .IP "\fBovsdb\-server/add\-db \fIdatabase\fR" Adds the \fIdatabase\fR to the running \fBovsdb\-server\fR. The database file must already have been created and initialized using, for example, \fBovsdb\-tool create\fR. . .IP "\fBovsdb\-server/remove\-db \fIdatabase\fR" Removes \fIdatabase\fR from the running \fBovsdb\-server\fR. \fIdatabase\fR must be a database name as listed by \fBovsdb-server/list\-dbs\fR. .IP If a remote has been configured that points to the specified \fIdatabase\fR (e.g. \fB\-\-remote=db:\fIdatabase\fB,\fR... on the command line), then it will be disabled until another database with the same name is added again (with \fBovsdb\-server/add\-db\fR). .IP Any public key infrastructure options specified through this database (e.g. \fB\-\-private\-key=db:\fIdatabase,\fR... on the command line) will be disabled until another database with the same name is added again (with \fBovsdb\-server/add\-db\fR). . .IP "\fBovsdb\-server/list\-dbs" Outputs a list of the currently configured databases added either through the command line or through the \fBovsdb\-server/add\-db\fR command. . .so lib/vlog-unixctl.man .so lib/memory-unixctl.man .so lib/coverage-unixctl.man .SH "SEE ALSO" . .BR ovsdb\-tool (1). openvswitch-2.0.1+git20140120/ovsdb/ovsdb-server.c000066400000000000000000001176551226605124000213730ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include "column.h" #include "command-line.h" #include "daemon.h" #include "dirs.h" #include "dummy.h" #include "dynamic-string.h" #include "file.h" #include "hash.h" #include "json.h" #include "jsonrpc.h" #include "jsonrpc-server.h" #include "list.h" #include "memory.h" #include "ovsdb.h" #include "ovsdb-data.h" #include "ovsdb-types.h" #include "ovsdb-error.h" #include "poll-loop.h" #include "process.h" #include "row.h" #include "simap.h" #include "shash.h" #include "stream-ssl.h" #include "stream.h" #include "sset.h" #include "table.h" #include "timeval.h" #include "transaction.h" #include "trigger.h" #include "util.h" #include "unixctl.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ovsdb_server); struct db { /* Initialized in main(). */ char *filename; struct ovsdb_file *file; struct ovsdb *db; /* Only used by update_remote_status(). */ struct ovsdb_txn *txn; }; /* SSL configuration. */ static char *private_key_file; static char *certificate_file; static char *ca_cert_file; static bool bootstrap_ca_cert; static unixctl_cb_func ovsdb_server_exit; static unixctl_cb_func ovsdb_server_compact; static unixctl_cb_func ovsdb_server_reconnect; struct server_config { struct sset *remotes; struct shash *all_dbs; FILE *config_tmpfile; struct ovsdb_jsonrpc_server *jsonrpc; }; static unixctl_cb_func ovsdb_server_add_remote; static unixctl_cb_func ovsdb_server_remove_remote; static unixctl_cb_func ovsdb_server_list_remotes; static unixctl_cb_func ovsdb_server_add_database; static unixctl_cb_func ovsdb_server_remove_database; static unixctl_cb_func ovsdb_server_list_databases; static char *open_db(struct server_config *config, const char *filename); static void parse_options(int *argc, char **argvp[], struct sset *remotes, char **unixctl_pathp, char **run_command); static void usage(void) NO_RETURN; static char *reconfigure_remotes(struct ovsdb_jsonrpc_server *, const struct shash *all_dbs, struct sset *remotes); static char *reconfigure_ssl(const struct shash *all_dbs); static void report_error_if_changed(char *error, char **last_errorp); static void update_remote_status(const struct ovsdb_jsonrpc_server *jsonrpc, const struct sset *remotes, struct shash *all_dbs); static void save_config__(FILE *config_file, const struct sset *remotes, const struct sset *db_filenames); static void save_config(struct server_config *); static void load_config(FILE *config_file, struct sset *remotes, struct sset *db_filenames); int main(int argc, char *argv[]) { char *unixctl_path = NULL; char *run_command = NULL; struct unixctl_server *unixctl; struct ovsdb_jsonrpc_server *jsonrpc; struct sset remotes, db_filenames; const char *db_filename; struct process *run_process; bool exiting; int retval; long long int status_timer = LLONG_MIN; FILE *config_tmpfile; struct server_config server_config; struct shash all_dbs; struct shash_node *node; char *remotes_error, *ssl_error; char *error; int i; proctitle_init(argc, argv); set_program_name(argv[0]); signal(SIGPIPE, SIG_IGN); process_init(); parse_options(&argc, &argv, &remotes, &unixctl_path, &run_command); /* Create and initialize 'config_tmpfile' as a temporary file to hold * ovsdb-server's most basic configuration, and then save our initial * configuration to it. When --monitor is used, this preserves the effects * of ovs-appctl commands such as ovsdb-server/add-remote (which saves the * new configuration) across crashes. */ config_tmpfile = tmpfile(); if (!config_tmpfile) { ovs_fatal(errno, "failed to create temporary file"); } sset_init(&db_filenames); if (argc > 0) { for (i = 0; i < argc; i++) { sset_add(&db_filenames, argv[i]); } } else { char *default_db = xasprintf("%s/conf.db", ovs_dbdir()); sset_add(&db_filenames, default_db); free(default_db); } server_config.remotes = &remotes; server_config.config_tmpfile = config_tmpfile; save_config__(config_tmpfile, &remotes, &db_filenames); daemonize_start(); /* Load the saved config. */ load_config(config_tmpfile, &remotes, &db_filenames); jsonrpc = ovsdb_jsonrpc_server_create(); shash_init(&all_dbs); server_config.all_dbs = &all_dbs; server_config.jsonrpc = jsonrpc; SSET_FOR_EACH (db_filename, &db_filenames) { error = open_db(&server_config, db_filename); if (error) { ovs_fatal(0, "%s", error); } } error = reconfigure_remotes(jsonrpc, &all_dbs, &remotes); if (!error) { error = reconfigure_ssl(&all_dbs); } if (error) { ovs_fatal(0, "%s", error); } retval = unixctl_server_create(unixctl_path, &unixctl); if (retval) { exit(EXIT_FAILURE); } if (run_command) { char *run_argv[4]; run_argv[0] = "/bin/sh"; run_argv[1] = "-c"; run_argv[2] = run_command; run_argv[3] = NULL; retval = process_start(run_argv, &run_process); if (retval) { ovs_fatal(retval, "%s: process failed to start", run_command); } } else { run_process = NULL; } daemonize_complete(); if (!run_command) { /* ovsdb-server is usually a long-running process, in which case it * makes plenty of sense to log the version, but --run makes * ovsdb-server more like a command-line tool, so skip it. */ VLOG_INFO("%s (Open vSwitch) %s", program_name, VERSION); } unixctl_command_register("exit", "", 0, 0, ovsdb_server_exit, &exiting); unixctl_command_register("ovsdb-server/compact", "", 0, 1, ovsdb_server_compact, &all_dbs); unixctl_command_register("ovsdb-server/reconnect", "", 0, 0, ovsdb_server_reconnect, jsonrpc); unixctl_command_register("ovsdb-server/add-remote", "REMOTE", 1, 1, ovsdb_server_add_remote, &server_config); unixctl_command_register("ovsdb-server/remove-remote", "REMOTE", 1, 1, ovsdb_server_remove_remote, &server_config); unixctl_command_register("ovsdb-server/list-remotes", "", 0, 0, ovsdb_server_list_remotes, &remotes); unixctl_command_register("ovsdb-server/add-db", "DB", 1, 1, ovsdb_server_add_database, &server_config); unixctl_command_register("ovsdb-server/remove-db", "DB", 1, 1, ovsdb_server_remove_database, &server_config); unixctl_command_register("ovsdb-server/list-dbs", "", 0, 0, ovsdb_server_list_databases, &all_dbs); exiting = false; ssl_error = NULL; remotes_error = NULL; while (!exiting) { memory_run(); if (memory_should_report()) { struct simap usage; simap_init(&usage); ovsdb_jsonrpc_server_get_memory_usage(jsonrpc, &usage); SHASH_FOR_EACH(node, &all_dbs) { struct db *db = node->data; ovsdb_get_memory_usage(db->db, &usage); } memory_report(&usage); simap_destroy(&usage); } /* Run unixctl_server_run() before reconfigure_remotes() because * ovsdb-server/add-remote and ovsdb-server/remove-remote can change * the set of remotes that reconfigure_remotes() uses. */ unixctl_server_run(unixctl); report_error_if_changed( reconfigure_remotes(jsonrpc, &all_dbs, &remotes), &remotes_error); report_error_if_changed(reconfigure_ssl(&all_dbs), &ssl_error); ovsdb_jsonrpc_server_run(jsonrpc); SHASH_FOR_EACH(node, &all_dbs) { struct db *db = node->data; ovsdb_trigger_run(db->db, time_msec()); } if (run_process) { process_run(); if (process_exited(run_process)) { exiting = true; } } /* update Manager status(es) every 5 seconds */ if (time_msec() >= status_timer) { status_timer = time_msec() + 5000; update_remote_status(jsonrpc, &remotes, &all_dbs); } memory_wait(); ovsdb_jsonrpc_server_wait(jsonrpc); unixctl_server_wait(unixctl); SHASH_FOR_EACH(node, &all_dbs) { struct db *db = node->data; ovsdb_trigger_wait(db->db, time_msec()); } if (run_process) { process_wait(run_process); } if (exiting) { poll_immediate_wake(); } poll_timer_wait_until(status_timer); poll_block(); } ovsdb_jsonrpc_server_destroy(jsonrpc); SHASH_FOR_EACH(node, &all_dbs) { struct db *db = node->data; ovsdb_destroy(db->db); } sset_destroy(&remotes); unixctl_server_destroy(unixctl); if (run_process && process_exited(run_process)) { int status = process_status(run_process); if (status) { ovs_fatal(0, "%s: child exited, %s", run_command, process_status_msg(status)); } } return 0; } static char * open_db(struct server_config *config, const char *filename) { struct ovsdb_error *db_error; struct db *db; char *error; db = xzalloc(sizeof *db); db->filename = xstrdup(filename); db_error = ovsdb_file_open(db->filename, false, &db->db, &db->file); if (db_error) { error = ovsdb_error_to_string(db_error); } else if (!ovsdb_jsonrpc_server_add_db(config->jsonrpc, db->db)) { error = xasprintf("%s: duplicate database name", db->db->schema->name); } else { shash_add_assert(config->all_dbs, db->db->schema->name, db); return NULL; } ovsdb_error_destroy(db_error); ovsdb_destroy(db->db); free(db->filename); free(db); return error; } static const struct db * find_db(const struct shash *all_dbs, const char *db_name) { struct shash_node *node; SHASH_FOR_EACH(node, all_dbs) { struct db *db = node->data; if (!strcmp(db->db->schema->name, db_name)) { return db; } } return NULL; } static char * WARN_UNUSED_RESULT parse_db_column__(const struct shash *all_dbs, const char *name_, char *name, const struct db **dbp, const struct ovsdb_table **tablep, const struct ovsdb_column **columnp) { const char *db_name, *table_name, *column_name; const struct ovsdb_column *column; const struct ovsdb_table *table; const char *tokens[3]; char *save_ptr = NULL; const struct db *db; *dbp = NULL; *tablep = NULL; *columnp = NULL; strtok_r(name, ":", &save_ptr); /* "db:" */ tokens[0] = strtok_r(NULL, ",", &save_ptr); tokens[1] = strtok_r(NULL, ",", &save_ptr); tokens[2] = strtok_r(NULL, ",", &save_ptr); if (!tokens[0] || !tokens[1] || !tokens[2]) { return xasprintf("\"%s\": invalid syntax", name_); } db_name = tokens[0]; table_name = tokens[1]; column_name = tokens[2]; db = find_db(all_dbs, tokens[0]); if (!db) { return xasprintf("\"%s\": no database named %s", name_, db_name); } table = ovsdb_get_table(db->db, table_name); if (!table) { return xasprintf("\"%s\": no table named %s", name_, table_name); } column = ovsdb_table_schema_get_column(table->schema, column_name); if (!column) { return xasprintf("\"%s\": table \"%s\" has no column \"%s\"", name_, table_name, column_name); } *dbp = db; *columnp = column; *tablep = table; return NULL; } /* Returns NULL if successful, otherwise a malloc()'d string describing the * error. */ static char * WARN_UNUSED_RESULT parse_db_column(const struct shash *all_dbs, const char *name_, const struct db **dbp, const struct ovsdb_table **tablep, const struct ovsdb_column **columnp) { char *name = xstrdup(name_); char *retval = parse_db_column__(all_dbs, name_, name, dbp, tablep, columnp); free(name); return retval; } /* Returns NULL if successful, otherwise a malloc()'d string describing the * error. */ static char * WARN_UNUSED_RESULT parse_db_string_column(const struct shash *all_dbs, const char *name, const struct db **dbp, const struct ovsdb_table **tablep, const struct ovsdb_column **columnp) { char *retval; retval = parse_db_column(all_dbs, name, dbp, tablep, columnp); if (retval) { return retval; } if ((*columnp)->type.key.type != OVSDB_TYPE_STRING || (*columnp)->type.value.type != OVSDB_TYPE_VOID) { return xasprintf("\"%s\": table \"%s\" column \"%s\" is " "not string or set of strings", name, (*tablep)->schema->name, (*columnp)->name); } return NULL; } static const char * query_db_string(const struct shash *all_dbs, const char *name, struct ds *errors) { if (!name || strncmp(name, "db:", 3)) { return name; } else { const struct ovsdb_column *column; const struct ovsdb_table *table; const struct ovsdb_row *row; const struct db *db; char *retval; retval = parse_db_string_column(all_dbs, name, &db, &table, &column); if (retval) { ds_put_format(errors, "%s\n", retval); return NULL; } HMAP_FOR_EACH (row, hmap_node, &table->rows) { const struct ovsdb_datum *datum; size_t i; datum = &row->fields[column->index]; for (i = 0; i < datum->n; i++) { if (datum->keys[i].string[0]) { return datum->keys[i].string; } } } return NULL; } } static struct ovsdb_jsonrpc_options * add_remote(struct shash *remotes, const char *target) { struct ovsdb_jsonrpc_options *options; options = shash_find_data(remotes, target); if (!options) { options = ovsdb_jsonrpc_default_options(target); shash_add(remotes, target, options); } return options; } static struct ovsdb_datum * get_datum(struct ovsdb_row *row, const char *column_name, const enum ovsdb_atomic_type key_type, const enum ovsdb_atomic_type value_type, const size_t n_max) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); const struct ovsdb_table_schema *schema = row->table->schema; const struct ovsdb_column *column; column = ovsdb_table_schema_get_column(schema, column_name); if (!column) { VLOG_DBG_RL(&rl, "Table `%s' has no `%s' column", schema->name, column_name); return NULL; } if (column->type.key.type != key_type || column->type.value.type != value_type || column->type.n_max != n_max) { if (!VLOG_DROP_DBG(&rl)) { char *type_name = ovsdb_type_to_english(&column->type); VLOG_DBG("Table `%s' column `%s' has type %s, not expected " "key type %s, value type %s, max elements %zd.", schema->name, column_name, type_name, ovsdb_atomic_type_to_string(key_type), ovsdb_atomic_type_to_string(value_type), n_max); free(type_name); } return NULL; } return &row->fields[column->index]; } /* Read string-string key-values from a map. Returns the value associated with * 'key', if found, or NULL */ static const char * read_map_string_column(const struct ovsdb_row *row, const char *column_name, const char *key) { const struct ovsdb_datum *datum; union ovsdb_atom *atom_key = NULL, *atom_value = NULL; size_t i; datum = get_datum(CONST_CAST(struct ovsdb_row *, row), column_name, OVSDB_TYPE_STRING, OVSDB_TYPE_STRING, UINT_MAX); if (!datum) { return NULL; } for (i = 0; i < datum->n; i++) { atom_key = &datum->keys[i]; if (!strcmp(atom_key->string, key)){ atom_value = &datum->values[i]; break; } } return atom_value ? atom_value->string : NULL; } static const union ovsdb_atom * read_column(const struct ovsdb_row *row, const char *column_name, enum ovsdb_atomic_type type) { const struct ovsdb_datum *datum; datum = get_datum(CONST_CAST(struct ovsdb_row *, row), column_name, type, OVSDB_TYPE_VOID, 1); return datum && datum->n ? datum->keys : NULL; } static bool read_integer_column(const struct ovsdb_row *row, const char *column_name, long long int *integerp) { const union ovsdb_atom *atom; atom = read_column(row, column_name, OVSDB_TYPE_INTEGER); *integerp = atom ? atom->integer : 0; return atom != NULL; } static bool read_string_column(const struct ovsdb_row *row, const char *column_name, const char **stringp) { const union ovsdb_atom *atom; atom = read_column(row, column_name, OVSDB_TYPE_STRING); *stringp = atom ? atom->string : NULL; return atom != NULL; } static void write_bool_column(struct ovsdb_row *row, const char *column_name, bool value) { const struct ovsdb_column *column; struct ovsdb_datum *datum; column = ovsdb_table_schema_get_column(row->table->schema, column_name); datum = get_datum(row, column_name, OVSDB_TYPE_BOOLEAN, OVSDB_TYPE_VOID, 1); if (!datum) { return; } if (datum->n != 1) { ovsdb_datum_destroy(datum, &column->type); datum->n = 1; datum->keys = xmalloc(sizeof *datum->keys); datum->values = NULL; } datum->keys[0].boolean = value; } static void write_string_string_column(struct ovsdb_row *row, const char *column_name, char **keys, char **values, size_t n) { const struct ovsdb_column *column; struct ovsdb_datum *datum; size_t i; column = ovsdb_table_schema_get_column(row->table->schema, column_name); datum = get_datum(row, column_name, OVSDB_TYPE_STRING, OVSDB_TYPE_STRING, UINT_MAX); if (!datum) { for (i = 0; i < n; i++) { free(keys[i]); free(values[i]); } return; } /* Free existing data. */ ovsdb_datum_destroy(datum, &column->type); /* Allocate space for new values. */ datum->n = n; datum->keys = xmalloc(n * sizeof *datum->keys); datum->values = xmalloc(n * sizeof *datum->values); for (i = 0; i < n; ++i) { datum->keys[i].string = keys[i]; datum->values[i].string = values[i]; } /* Sort and check constraints. */ ovsdb_datum_sort_assert(datum, column->type.key.type); } /* Adds a remote and options to 'remotes', based on the Manager table row in * 'row'. */ static void add_manager_options(struct shash *remotes, const struct ovsdb_row *row) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); struct ovsdb_jsonrpc_options *options; long long int max_backoff, probe_interval; const char *target, *dscp_string; if (!read_string_column(row, "target", &target) || !target) { VLOG_INFO_RL(&rl, "Table `%s' has missing or invalid `target' column", row->table->schema->name); return; } options = add_remote(remotes, target); if (read_integer_column(row, "max_backoff", &max_backoff)) { options->max_backoff = max_backoff; } if (read_integer_column(row, "inactivity_probe", &probe_interval)) { options->probe_interval = probe_interval; } options->dscp = DSCP_DEFAULT; dscp_string = read_map_string_column(row, "other_config", "dscp"); if (dscp_string) { int dscp = atoi(dscp_string); if (dscp >= 0 && dscp <= 63) { options->dscp = dscp; } } } static void query_db_remotes(const char *name, const struct shash *all_dbs, struct shash *remotes, struct ds *errors) { const struct ovsdb_column *column; const struct ovsdb_table *table; const struct ovsdb_row *row; const struct db *db; char *retval; retval = parse_db_column(all_dbs, name, &db, &table, &column); if (retval) { ds_put_format(errors, "%s\n", retval); free(retval); return; } if (column->type.key.type == OVSDB_TYPE_STRING && column->type.value.type == OVSDB_TYPE_VOID) { HMAP_FOR_EACH (row, hmap_node, &table->rows) { const struct ovsdb_datum *datum; size_t i; datum = &row->fields[column->index]; for (i = 0; i < datum->n; i++) { add_remote(remotes, datum->keys[i].string); } } } else if (column->type.key.type == OVSDB_TYPE_UUID && column->type.key.u.uuid.refTable && column->type.value.type == OVSDB_TYPE_VOID) { const struct ovsdb_table *ref_table = column->type.key.u.uuid.refTable; HMAP_FOR_EACH (row, hmap_node, &table->rows) { const struct ovsdb_datum *datum; size_t i; datum = &row->fields[column->index]; for (i = 0; i < datum->n; i++) { const struct ovsdb_row *ref_row; ref_row = ovsdb_table_get_row(ref_table, &datum->keys[i].uuid); if (ref_row) { add_manager_options(remotes, ref_row); } } } } } static void update_remote_row(const struct ovsdb_row *row, struct ovsdb_txn *txn, const struct ovsdb_jsonrpc_server *jsonrpc) { struct ovsdb_jsonrpc_remote_status status; struct ovsdb_row *rw_row; const char *target; char *keys[9], *values[9]; size_t n = 0; /* Get the "target" (protocol/host/port) spec. */ if (!read_string_column(row, "target", &target)) { /* Bad remote spec or incorrect schema. */ return; } rw_row = ovsdb_txn_row_modify(txn, row); ovsdb_jsonrpc_server_get_remote_status(jsonrpc, target, &status); /* Update status information columns. */ write_bool_column(rw_row, "is_connected", status.is_connected); if (status.state) { keys[n] = xstrdup("state"); values[n++] = xstrdup(status.state); } if (status.sec_since_connect != UINT_MAX) { keys[n] = xstrdup("sec_since_connect"); values[n++] = xasprintf("%u", status.sec_since_connect); } if (status.sec_since_disconnect != UINT_MAX) { keys[n] = xstrdup("sec_since_disconnect"); values[n++] = xasprintf("%u", status.sec_since_disconnect); } if (status.last_error) { keys[n] = xstrdup("last_error"); values[n++] = xstrdup(ovs_retval_to_string(status.last_error)); } if (status.locks_held && status.locks_held[0]) { keys[n] = xstrdup("locks_held"); values[n++] = xstrdup(status.locks_held); } if (status.locks_waiting && status.locks_waiting[0]) { keys[n] = xstrdup("locks_waiting"); values[n++] = xstrdup(status.locks_waiting); } if (status.locks_lost && status.locks_lost[0]) { keys[n] = xstrdup("locks_lost"); values[n++] = xstrdup(status.locks_lost); } if (status.n_connections > 1) { keys[n] = xstrdup("n_connections"); values[n++] = xasprintf("%d", status.n_connections); } if (status.bound_port != htons(0)) { keys[n] = xstrdup("bound_port"); values[n++] = xasprintf("%"PRIu16, ntohs(status.bound_port)); } write_string_string_column(rw_row, "status", keys, values, n); ovsdb_jsonrpc_server_free_remote_status(&status); } static void update_remote_rows(const struct shash *all_dbs, const char *remote_name, const struct ovsdb_jsonrpc_server *jsonrpc) { const struct ovsdb_table *table, *ref_table; const struct ovsdb_column *column; const struct ovsdb_row *row; const struct db *db; char *retval; if (strncmp("db:", remote_name, 3)) { return; } retval = parse_db_column(all_dbs, remote_name, &db, &table, &column); if (retval) { free(retval); return; } if (column->type.key.type != OVSDB_TYPE_UUID || !column->type.key.u.uuid.refTable || column->type.value.type != OVSDB_TYPE_VOID) { return; } ref_table = column->type.key.u.uuid.refTable; HMAP_FOR_EACH (row, hmap_node, &table->rows) { const struct ovsdb_datum *datum; size_t i; datum = &row->fields[column->index]; for (i = 0; i < datum->n; i++) { const struct ovsdb_row *ref_row; ref_row = ovsdb_table_get_row(ref_table, &datum->keys[i].uuid); if (ref_row) { update_remote_row(ref_row, db->txn, jsonrpc); } } } } static void update_remote_status(const struct ovsdb_jsonrpc_server *jsonrpc, const struct sset *remotes, struct shash *all_dbs) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); const char *remote; struct db *db; struct shash_node *node; SHASH_FOR_EACH(node, all_dbs) { db = node->data; db->txn = ovsdb_txn_create(db->db); } /* Iterate over --remote arguments given on command line. */ SSET_FOR_EACH (remote, remotes) { update_remote_rows(all_dbs, remote, jsonrpc); } SHASH_FOR_EACH(node, all_dbs) { struct ovsdb_error *error; db = node->data; error = ovsdb_txn_commit(db->txn, false); if (error) { VLOG_ERR_RL(&rl, "Failed to update remote status: %s", ovsdb_error_to_string(error)); ovsdb_error_destroy(error); } } } /* Reconfigures ovsdb-server's remotes based on information in the database. */ static char * reconfigure_remotes(struct ovsdb_jsonrpc_server *jsonrpc, const struct shash *all_dbs, struct sset *remotes) { struct ds errors = DS_EMPTY_INITIALIZER; struct shash resolved_remotes; const char *name; /* Configure remotes. */ shash_init(&resolved_remotes); SSET_FOR_EACH (name, remotes) { if (!strncmp(name, "db:", 3)) { query_db_remotes(name, all_dbs, &resolved_remotes, &errors); } else { add_remote(&resolved_remotes, name); } } ovsdb_jsonrpc_server_set_remotes(jsonrpc, &resolved_remotes); shash_destroy_free_data(&resolved_remotes); return errors.string; } static char * reconfigure_ssl(const struct shash *all_dbs) { struct ds errors = DS_EMPTY_INITIALIZER; const char *resolved_private_key; const char *resolved_certificate; const char *resolved_ca_cert; resolved_private_key = query_db_string(all_dbs, private_key_file, &errors); resolved_certificate = query_db_string(all_dbs, certificate_file, &errors); resolved_ca_cert = query_db_string(all_dbs, ca_cert_file, &errors); stream_ssl_set_key_and_cert(resolved_private_key, resolved_certificate); stream_ssl_set_ca_cert_file(resolved_ca_cert, bootstrap_ca_cert); return errors.string; } static void report_error_if_changed(char *error, char **last_errorp) { if (error) { if (!*last_errorp || strcmp(error, *last_errorp)) { VLOG_WARN("%s", error); free(*last_errorp); *last_errorp = error; return; } free(error); } else { free(*last_errorp); *last_errorp = NULL; } } static void ovsdb_server_exit(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *exiting_) { bool *exiting = exiting_; *exiting = true; unixctl_command_reply(conn, NULL); } static void ovsdb_server_compact(struct unixctl_conn *conn, int argc, const char *argv[], void *dbs_) { struct shash *all_dbs = dbs_; struct ds reply; struct db *db; struct shash_node *node; int n = 0; ds_init(&reply); SHASH_FOR_EACH(node, all_dbs) { const char *name; db = node->data; name = db->db->schema->name; if (argc < 2 || !strcmp(argv[1], name)) { struct ovsdb_error *error; VLOG_INFO("compacting %s database by user request", name); error = ovsdb_file_compact(db->file); if (error) { char *s = ovsdb_error_to_string(error); ds_put_format(&reply, "%s\n", s); free(s); ovsdb_error_destroy(error); } n++; } } if (!n) { unixctl_command_reply_error(conn, "no database by that name"); } else if (reply.length) { unixctl_command_reply_error(conn, ds_cstr(&reply)); } else { unixctl_command_reply(conn, NULL); } ds_destroy(&reply); } /* "ovsdb-server/reconnect": makes ovsdb-server drop all of its JSON-RPC * connections and reconnect. */ static void ovsdb_server_reconnect(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *jsonrpc_) { struct ovsdb_jsonrpc_server *jsonrpc = jsonrpc_; ovsdb_jsonrpc_server_reconnect(jsonrpc); unixctl_command_reply(conn, NULL); } /* "ovsdb-server/add-remote REMOTE": adds REMOTE to the set of remotes that * ovsdb-server services. */ static void ovsdb_server_add_remote(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *config_) { struct server_config *config = config_; const char *remote = argv[1]; const struct ovsdb_column *column; const struct ovsdb_table *table; const struct db *db; char *retval; retval = (strncmp("db:", remote, 3) ? NULL : parse_db_column(config->all_dbs, remote, &db, &table, &column)); if (!retval) { if (sset_add(config->remotes, remote)) { save_config(config); } unixctl_command_reply(conn, NULL); } else { unixctl_command_reply_error(conn, retval); free(retval); } } /* "ovsdb-server/remove-remote REMOTE": removes REMOTE frmo the set of remotes * that ovsdb-server services. */ static void ovsdb_server_remove_remote(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *config_) { struct server_config *config = config_; struct sset_node *node; node = sset_find(config->remotes, argv[1]); if (node) { sset_delete(config->remotes, node); save_config(config); unixctl_command_reply(conn, NULL); } else { unixctl_command_reply_error(conn, "no such remote"); } } /* "ovsdb-server/list-remotes": outputs a list of configured rmeotes. */ static void ovsdb_server_list_remotes(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *remotes_) { struct sset *remotes = remotes_; const char **list, **p; struct ds s; ds_init(&s); list = sset_sort(remotes); for (p = list; *p; p++) { ds_put_format(&s, "%s\n", *p); } free(list); unixctl_command_reply(conn, ds_cstr(&s)); ds_destroy(&s); } /* "ovsdb-server/add-db DB": adds the DB to ovsdb-server. */ static void ovsdb_server_add_database(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *config_) { struct server_config *config = config_; const char *filename = argv[1]; char *error; error = open_db(config, filename); if (!error) { save_config(config); unixctl_command_reply(conn, NULL); } else { unixctl_command_reply_error(conn, error); free(error); } } static void ovsdb_server_remove_database(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *config_) { struct server_config *config = config_; struct shash_node *node; struct db *db; bool ok; node = shash_find(config->all_dbs, argv[1]); if (!node) { unixctl_command_reply_error(conn, "Failed to find the database."); return; } db = node->data; ok = ovsdb_jsonrpc_server_remove_db(config->jsonrpc, db->db); ovs_assert(ok); ovsdb_destroy(db->db); shash_delete(config->all_dbs, node); free(db->filename); free(db); save_config(config); unixctl_command_reply(conn, NULL); } static void ovsdb_server_list_databases(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *all_dbs_) { struct shash *all_dbs = all_dbs_; const struct shash_node **nodes; struct ds s; size_t i; ds_init(&s); nodes = shash_sort(all_dbs); for (i = 0; i < shash_count(all_dbs); i++) { struct db *db = nodes[i]->data; ds_put_format(&s, "%s\n", db->db->schema->name); } free(nodes); unixctl_command_reply(conn, ds_cstr(&s)); ds_destroy(&s); } static void parse_options(int *argcp, char **argvp[], struct sset *remotes, char **unixctl_pathp, char **run_command) { enum { OPT_REMOTE = UCHAR_MAX + 1, OPT_UNIXCTL, OPT_RUN, OPT_BOOTSTRAP_CA_CERT, OPT_ENABLE_DUMMY, VLOG_OPTION_ENUMS, DAEMON_OPTION_ENUMS }; static const struct option long_options[] = { {"remote", required_argument, NULL, OPT_REMOTE}, {"unixctl", required_argument, NULL, OPT_UNIXCTL}, {"run", required_argument, NULL, OPT_RUN}, {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'V'}, DAEMON_LONG_OPTIONS, VLOG_LONG_OPTIONS, {"bootstrap-ca-cert", required_argument, NULL, OPT_BOOTSTRAP_CA_CERT}, {"private-key", required_argument, NULL, 'p'}, {"certificate", required_argument, NULL, 'c'}, {"ca-cert", required_argument, NULL, 'C'}, {"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY}, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); int argc = *argcp; char **argv = *argvp; sset_init(remotes); for (;;) { int c; c = getopt_long(argc, argv, short_options, long_options, NULL); if (c == -1) { break; } switch (c) { case OPT_REMOTE: sset_add(remotes, optarg); break; case OPT_UNIXCTL: *unixctl_pathp = optarg; break; case OPT_RUN: *run_command = optarg; break; case 'h': usage(); case 'V': ovs_print_version(0, 0); exit(EXIT_SUCCESS); VLOG_OPTION_HANDLERS DAEMON_OPTION_HANDLERS case 'p': private_key_file = optarg; break; case 'c': certificate_file = optarg; break; case 'C': ca_cert_file = optarg; bootstrap_ca_cert = false; break; case OPT_BOOTSTRAP_CA_CERT: ca_cert_file = optarg; bootstrap_ca_cert = true; break; case OPT_ENABLE_DUMMY: dummy_enable(optarg && !strcmp(optarg, "override")); break; case '?': exit(EXIT_FAILURE); default: abort(); } } free(short_options); *argcp -= optind; *argvp += optind; } static void usage(void) { printf("%s: Open vSwitch database server\n" "usage: %s [OPTIONS] [DATABASE...]\n" "where each DATABASE is a database file in ovsdb format.\n" "The default DATABASE, if none is given, is\n%s/conf.db.\n", program_name, program_name, ovs_dbdir()); printf("\nJSON-RPC options (may be specified any number of times):\n" " --remote=REMOTE connect or listen to REMOTE\n"); stream_usage("JSON-RPC", true, true, true); daemon_usage(); vlog_usage(); printf("\nOther options:\n" " --run COMMAND run COMMAND as subprocess then exit\n" " --unixctl=SOCKET override default control socket name\n" " -h, --help display this help message\n" " -V, --version display version information\n"); exit(EXIT_SUCCESS); } static struct json * sset_to_json(const struct sset *sset) { struct json *array; const char *s; array = json_array_create_empty(); SSET_FOR_EACH (s, sset) { json_array_add(array, json_string_create(s)); } return array; } /* Truncates and replaces the contents of 'config_file' by a representation of * 'remotes' and 'db_filenames'. */ static void save_config__(FILE *config_file, const struct sset *remotes, const struct sset *db_filenames) { struct json *obj; char *s; if (ftruncate(fileno(config_file), 0) == -1) { VLOG_FATAL("failed to truncate temporary file (%s)", ovs_strerror(errno)); } obj = json_object_create(); json_object_put(obj, "remotes", sset_to_json(remotes)); json_object_put(obj, "db_filenames", sset_to_json(db_filenames)); s = json_to_string(obj, 0); json_destroy(obj); if (fseek(config_file, 0, SEEK_SET) != 0 || fputs(s, config_file) == EOF || fflush(config_file) == EOF) { VLOG_FATAL("failed to write temporary file (%s)", ovs_strerror(errno)); } free(s); } /* Truncates and replaces the contents of 'config_file' by a representation of * 'config'. */ static void save_config(struct server_config *config) { struct sset db_filenames; struct shash_node *node; sset_init(&db_filenames); SHASH_FOR_EACH (node, config->all_dbs) { struct db *db = node->data; sset_add(&db_filenames, db->filename); } save_config__(config->config_tmpfile, config->remotes, &db_filenames); sset_destroy(&db_filenames); } static void sset_from_json(struct sset *sset, const struct json *array) { size_t i; sset_clear(sset); ovs_assert(array->type == JSON_ARRAY); for (i = 0; i < array->u.array.n; i++) { const struct json *elem = array->u.array.elems[i]; sset_add(sset, json_string(elem)); } } /* Clears and replaces 'remotes' and 'dbnames' by a configuration read from * 'config_file', which must have been previously written by save_config(). */ static void load_config(FILE *config_file, struct sset *remotes, struct sset *db_filenames) { struct json *json; if (fseek(config_file, 0, SEEK_SET) != 0) { VLOG_FATAL("seek failed in temporary file (%s)", ovs_strerror(errno)); } json = json_from_stream(config_file); if (json->type == JSON_STRING) { VLOG_FATAL("reading json failed (%s)", json_string(json)); } ovs_assert(json->type == JSON_OBJECT); sset_from_json(remotes, shash_find_data(json_object(json), "remotes")); sset_from_json(db_filenames, shash_find_data(json_object(json), "db_filenames")); json_destroy(json); } openvswitch-2.0.1+git20140120/ovsdb/ovsdb-tool.1.in000066400000000000000000000137721226605124000213600ustar00rootroot00000000000000.\" -*- nroff -*- .de IQ . br . ns . IP "\\$1" .. .\" -*- nroff -*- .TH ovsdb\-tool 1 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" .ds PN ovsdb\-tool . .SH NAME ovsdb\-tool \- Open vSwitch database management utility . .SH SYNOPSIS \fBovsdb\-tool \fR[\fIoptions\fR] \fBcreate \fR[\fIdb\fR [\fIschema\fR]] .br \fBovsdb\-tool \fR[\fIoptions\fR] \fBcompact \fR[\fIdb\fR [\fItarget\fR]] .br \fBovsdb\-tool \fR[\fIoptions\fR] \fBconvert \fR[\fIdb\fR [\fIschema \fR[\fItarget\fR]]] .br \fBovsdb\-tool \fR[\fIoptions\fR] \fBneeds\-conversion \fR[\fIdb\fR [\fIschema\fR]] .br \fBovsdb\-tool \fR[\fIoptions\fR] \fBdb\-version \fR[\fIdb\fR] .br \fBovsdb\-tool \fR[\fIoptions\fR] \fBschema\-version \fR[\fIschema\fR] .br \fBovsdb\-tool \fR[\fIoptions\fR] \fBdb\-cksum \fR[\fIdb\fR] .br \fBovsdb\-tool \fR[\fIoptions\fR] \fBschema\-cksum \fR[\fIschema\fR] .br \fBovsdb\-tool \fR[\fIoptions\fR] \fBquery \fR[\fIdb\fR] \fItransaction\fR .br \fBovsdb\-tool \fR[\fIoptions\fR] \fBtransact \fR[\fIdb\fR] \fItransaction\fR .br \fBovsdb\-tool \fR[\fIoptions\fR] [\fB\-m\fR | \fB\-\-more\fR]... \fBshow\-log \fR[\fIdb\fR] .br \fBovsdb\-tool help\fR .so lib/vlog-syn.man .so lib/common-syn.man . .SH DESCRIPTION The \fBovsdb\-tool\fR program is a command-line tool for managing Open vSwitch database (OVSDB) files. It does not interact directly with running Open vSwitch database servers (instead, use \fBovsdb\-client\fR). . .SS "Basic Commands" .IP "\fBcreate\fI db schema\fR" Reads an OVSDB schema from the file named \fIschema\fR and creates a new OVSDB database file named \fIdb\fR using that schema. The new database is initially empty. This command will not overwrite an existing \fIdb\fR. .IP \fIschema\fR must contain an OVSDB schema in JSON format. Refer to the OVSDB specification for details. . .IP "\fBcompact\fI db \fR[\fItarget\fR]" Reads \fIdb\fR and writes a compacted version. If \fItarget\fR is specified, the compacted version is written as a new file named \fItarget\fR, which must not already exist. If \fItarget\fR is omitted, then the compacted version of the database replaces \fIdb\fR in-place. . .IP "\fBconvert\fI db schema \fR[\fItarget\fR]" Reads \fIdb\fR, translating it into to the schema specified in \fIschema\fR, and writes out the new interpretation. If \fItarget\fR is specified, the translated version is written as a new file named \fItarget\fR, which must not already exist. If \fItarget\fR is omitted, then the translated version of the database replaces \fIdb\fR in-place. .IP This command can do simple ``upgrades'' and ``downgrades'' on a database's schema. The data in \fIdb\fR must be valid when interpreted under \fIschema\fR, with only one exception: data in \fIdb\fR for tables and columns that do not exist in \fIschema\fR are ignored. Columns that exist in \fIschema\fR but not in \fIdb\fR are set to their default values. All of \fIschema\fR's constraints apply in full. . .IP "\fBneeds\-conversion\fI db schema\fR" Reads the schema embedded in \fIdb\fR and the standalone schema in \fIschema\fR and compares them. If the schemas are the same, prints \fBno\fR on stdout; if they differ, print \fByes\fR. . .IP "\fBdb\-version\fI db\fR" .IQ "\fBschema\-version\fI schema\fR" Prints the version number in the schema embedded within the database \fIdb\fR or in the standalone schema \fIschema\fR on stdout. A schema version number has the form \fIx\fB.\fIy\fB.\fIz\fR. See \fBovs\-vswitchd.conf.db\fR(5) for details. .IP Schema version numbers and Open vSwitch version numbers are independent. .IP If \fIschema\fR or \fIdb\fR was created before schema versioning was introduced, then it will not have a version number and this command will print a blank line. . .IP "\fBdb\-cksum\fI db\fR" .IQ "\fBschema\-cksum\fI schema\fR" Prints the checksum in the schema embedded within the database \fIdb\fR or of the standalone schema \fIschema\fR on stdout. .IP If \fIschema\fR or \fIdb\fR was created before schema checksums were introduced, then it will not have a checksum and this command will print a blank line. . .IP "\fBquery\fI db transaction\fR" Opens \fIdb\fR, executes \fItransaction\fR on it, and prints the results. The \fItransaction\fR must be a JSON array in the format of the \fBparams\fR array for the JSON-RPC \fBtransact\fR method, as described in the OVSDB specification. .IP The \fIdb\fR is opened for read-only access, so this command may safely run concurrently with other database activity, including \fBovsdb\-server\fR and other database writers. The \fItransaction\fR may specify database modifications, but these will have no effect on \fIdb\fR. . .IP "\fBtransact\fI db transaction\fR" Opens \fIdb\fR, executes \fItransaction\fR on it, prints the results, and commits any changes to \fIdb\fR. The \fItransaction\fR must be a JSON array in the format of the \fBparams\fR array for the JSON-RPC \fBtransact\fR method, as described in the OVSDB specification. .IP The \fIdb\fR is opened and locked for read/write access, so this command will fail if the database is opened for writing by any other process, including \fBovsdb\-server\fR(1). Use \fBovsdb\-client\fR(1), instead, to write to a database that is served by \fBovsdb\-server\fR(1). . .IP "\fBshow\-log\fI db\fR" Prints a summary of the records in \fIdb\fR's log, including the time and date at which each database change occurred and any associated comment. This may be useful for debugging. .IP To increase the verbosity of output, add \fB\-m\fR (or \fB\-\-more\fR) one or more times to the command line. With one \fB\-m\fR, \fBshow\-log\fR prints a summary of the records added, deleted, or modified by each transaction. With two \fB\-m\fRs, \fBshow\-log\fR also prints the values of the columns modified by each change to a record. . .SH OPTIONS .SS "Logging Options" .so lib/vlog.man .SS "Other Options" .so lib/common.man .SH "FILES" The default \fIdb\fR is \fB@DBDIR@/conf.db\fR. The default \fIschema\fR is \fB@pkgdatadir@/vswitch.ovsschema\fR. The \fBhelp\fR command also displays these defaults. .SH "SEE ALSO" . \fBovsdb\-server\fR(1), \fBovsdb\-client\fR(1), and the OVSDB specification. openvswitch-2.0.1+git20140120/ovsdb/ovsdb-tool.c000066400000000000000000000421311226605124000210240ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include "column.h" #include "command-line.h" #include "compiler.h" #include "dirs.h" #include "dynamic-string.h" #include "file.h" #include "lockfile.h" #include "log.h" #include "json.h" #include "ovsdb.h" #include "ovsdb-data.h" #include "ovsdb-error.h" #include "socket-util.h" #include "table.h" #include "timeval.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(ovsdb_tool); /* -m, --more: Verbosity level for "show-log" command output. */ static int show_log_verbosity; static const struct command *get_all_commands(void); static void usage(void) NO_RETURN; static void parse_options(int argc, char *argv[]); static const char *default_db(void); static const char *default_schema(void); int main(int argc, char *argv[]) { set_program_name(argv[0]); parse_options(argc, argv); signal(SIGPIPE, SIG_IGN); run_command(argc - optind, argv + optind, get_all_commands()); return 0; } static void parse_options(int argc, char *argv[]) { static const struct option long_options[] = { {"more", no_argument, NULL, 'm'}, {"verbose", optional_argument, NULL, 'v'}, {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'V'}, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); for (;;) { int c; c = getopt_long(argc, argv, short_options, long_options, NULL); if (c == -1) { break; } switch (c) { case 'm': show_log_verbosity++; break; case 'h': usage(); case 'V': ovs_print_version(0, 0); exit(EXIT_SUCCESS); case 'v': vlog_set_verbosity(optarg); break; case '?': exit(EXIT_FAILURE); default: abort(); } } free(short_options); } static void usage(void) { printf("%s: Open vSwitch database management utility\n" "usage: %s [OPTIONS] COMMAND [ARG...]\n" " create [DB [SCHEMA]] create DB with the given SCHEMA\n" " compact [DB [DST]] compact DB in-place (or to DST)\n" " convert [DB [SCHEMA [DST]]] convert DB to SCHEMA (to DST)\n" " db-version [DB] report version of schema used by DB\n" " db-cksum [DB] report checksum of schema used by DB\n" " schema-version [SCHEMA] report SCHEMA's schema version\n" " schema-cksum [SCHEMA] report SCHEMA's checksum\n" " query [DB] TRNS execute read-only transaction on DB\n" " transact [DB] TRNS execute read/write transaction on DB\n" " [-m]... show-log [DB] print DB's log entries\n" "The default DB is %s.\n" "The default SCHEMA is %s.\n", program_name, program_name, default_db(), default_schema()); vlog_usage(); printf("\nOther options:\n" " -m, --more increase show-log verbosity\n" " -h, --help display this help message\n" " -V, --version display version information\n"); exit(EXIT_SUCCESS); } static const char * default_db(void) { static char *db; if (!db) { db = xasprintf("%s/conf.db", ovs_dbdir()); } return db; } static const char * default_schema(void) { static char *schema; if (!schema) { schema = xasprintf("%s/vswitch.ovsschema", ovs_pkgdatadir()); } return schema; } static struct json * parse_json(const char *s) { struct json *json = json_from_string(s); if (json->type == JSON_STRING) { ovs_fatal(0, "\"%s\": %s", s, json->u.string); } return json; } static void print_and_free_json(struct json *json) { char *string = json_to_string(json, JSSF_SORT); json_destroy(json); puts(string); free(string); } static void check_ovsdb_error(struct ovsdb_error *error) { if (error) { ovs_fatal(0, "%s", ovsdb_error_to_string(error)); } } static void do_create(int argc, char *argv[]) { const char *db_file_name = argc >= 2 ? argv[1] : default_db(); const char *schema_file_name = argc >= 3 ? argv[2] : default_schema(); struct ovsdb_schema *schema; struct ovsdb_log *log; struct json *json; /* Read schema from file and convert to JSON. */ check_ovsdb_error(ovsdb_schema_from_file(schema_file_name, &schema)); json = ovsdb_schema_to_json(schema); ovsdb_schema_destroy(schema); /* Create database file. */ check_ovsdb_error(ovsdb_log_open(db_file_name, OVSDB_LOG_CREATE, -1, &log)); check_ovsdb_error(ovsdb_log_write(log, json)); check_ovsdb_error(ovsdb_log_commit(log)); ovsdb_log_close(log); json_destroy(json); } static void compact_or_convert(const char *src_name_, const char *dst_name_, const struct ovsdb_schema *new_schema, const char *comment) { char *src_name, *dst_name; struct lockfile *src_lock; struct lockfile *dst_lock; bool in_place = dst_name_ == NULL; struct ovsdb *db; int retval; /* Dereference symlinks for source and destination names. In the in-place * case this ensures that, if the source name is a symlink, we replace its * target instead of replacing the symlink by a regular file. In the * non-in-place, this has the same effect for the destination name. */ src_name = follow_symlinks(src_name_); dst_name = (in_place ? xasprintf("%s.tmp", src_name) : follow_symlinks(dst_name_)); /* Lock the source, if we will be replacing it. */ if (in_place) { retval = lockfile_lock(src_name, &src_lock); if (retval) { ovs_fatal(retval, "%s: failed to lock lockfile", src_name); } } /* Get (temporary) destination and lock it. */ retval = lockfile_lock(dst_name, &dst_lock); if (retval) { ovs_fatal(retval, "%s: failed to lock lockfile", dst_name); } /* Save a copy. */ check_ovsdb_error(new_schema ? ovsdb_file_open_as_schema(src_name, new_schema, &db) : ovsdb_file_open(src_name, true, &db, NULL)); check_ovsdb_error(ovsdb_file_save_copy(dst_name, false, comment, db)); ovsdb_destroy(db); /* Replace source. */ if (in_place) { if (rename(dst_name, src_name)) { ovs_fatal(errno, "failed to rename \"%s\" to \"%s\"", dst_name, src_name); } fsync_parent_dir(dst_name); lockfile_unlock(src_lock); } lockfile_unlock(dst_lock); free(src_name); free(dst_name); } static void do_compact(int argc, char *argv[]) { const char *db = argc >= 2 ? argv[1] : default_db(); const char *target = argc >= 3 ? argv[2] : NULL; compact_or_convert(db, target, NULL, "compacted by ovsdb-tool "VERSION); } static void do_convert(int argc, char *argv[]) { const char *db = argc >= 2 ? argv[1] : default_db(); const char *schema = argc >= 3 ? argv[2] : default_schema(); const char *target = argc >= 4 ? argv[3] : NULL; struct ovsdb_schema *new_schema; check_ovsdb_error(ovsdb_schema_from_file(schema, &new_schema)); compact_or_convert(db, target, new_schema, "converted by ovsdb-tool "VERSION); ovsdb_schema_destroy(new_schema); } static void do_needs_conversion(int argc, char *argv[]) { const char *db_file_name = argc >= 2 ? argv[1] : default_db(); const char *schema_file_name = argc >= 3 ? argv[2] : default_schema(); struct ovsdb_schema *schema1, *schema2; check_ovsdb_error(ovsdb_file_read_schema(db_file_name, &schema1)); check_ovsdb_error(ovsdb_schema_from_file(schema_file_name, &schema2)); puts(ovsdb_schema_equal(schema1, schema2) ? "no" : "yes"); ovsdb_schema_destroy(schema1); ovsdb_schema_destroy(schema2); } static void do_db_version(int argc, char *argv[]) { const char *db_file_name = argc >= 2 ? argv[1] : default_db(); struct ovsdb_schema *schema; check_ovsdb_error(ovsdb_file_read_schema(db_file_name, &schema)); puts(schema->version); ovsdb_schema_destroy(schema); } static void do_db_cksum(int argc OVS_UNUSED, char *argv[]) { const char *db_file_name = argc >= 2 ? argv[1] : default_db(); struct ovsdb_schema *schema; check_ovsdb_error(ovsdb_file_read_schema(db_file_name, &schema)); puts(schema->cksum); ovsdb_schema_destroy(schema); } static void do_schema_version(int argc, char *argv[]) { const char *schema_file_name = argc >= 2 ? argv[1] : default_schema(); struct ovsdb_schema *schema; check_ovsdb_error(ovsdb_schema_from_file(schema_file_name, &schema)); puts(schema->version); ovsdb_schema_destroy(schema); } static void do_schema_cksum(int argc, char *argv[]) { const char *schema_file_name = argc >= 2 ? argv[1] : default_schema(); struct ovsdb_schema *schema; check_ovsdb_error(ovsdb_schema_from_file(schema_file_name, &schema)); puts(schema->cksum); ovsdb_schema_destroy(schema); } static void transact(bool read_only, int argc, char *argv[]) { const char *db_file_name = argc >= 3 ? argv[1] : default_db(); const char *transaction = argv[argc - 1]; struct json *request, *result; struct ovsdb *db; check_ovsdb_error(ovsdb_file_open(db_file_name, read_only, &db, NULL)); request = parse_json(transaction); result = ovsdb_execute(db, NULL, request, 0, NULL); json_destroy(request); print_and_free_json(result); ovsdb_destroy(db); } static void do_query(int argc, char *argv[]) { transact(true, argc, argv); } static void do_transact(int argc, char *argv[]) { transact(false, argc, argv); } static void print_db_changes(struct shash *tables, struct shash *names, const struct ovsdb_schema *schema) { struct shash_node *n1; SHASH_FOR_EACH (n1, tables) { const char *table = n1->name; struct ovsdb_table_schema *table_schema; struct json *rows = n1->data; struct shash_node *n2; if (n1->name[0] == '_' || rows->type != JSON_OBJECT) { continue; } table_schema = shash_find_data(&schema->tables, table); SHASH_FOR_EACH (n2, json_object(rows)) { const char *row_uuid = n2->name; struct json *columns = n2->data; struct shash_node *n3; char *old_name, *new_name; bool free_new_name = false; old_name = new_name = shash_find_data(names, row_uuid); if (columns->type == JSON_OBJECT) { struct json *new_name_json; new_name_json = shash_find_data(json_object(columns), "name"); if (new_name_json) { new_name = json_to_string(new_name_json, JSSF_SORT); free_new_name = true; } } printf("\ttable %s", table); if (!old_name) { if (new_name) { printf(" insert row %s (%.8s):\n", new_name, row_uuid); } else { printf(" insert row %.8s:\n", row_uuid); } } else { printf(" row %s (%.8s):\n", old_name, row_uuid); } if (columns->type == JSON_OBJECT) { if (show_log_verbosity > 1) { SHASH_FOR_EACH (n3, json_object(columns)) { const char *column = n3->name; const struct ovsdb_column *column_schema; struct json *value = n3->data; char *value_string = NULL; column_schema = (table_schema ? shash_find_data(&table_schema->columns, column) : NULL); if (column_schema) { const struct ovsdb_type *type; struct ovsdb_error *error; struct ovsdb_datum datum; type = &column_schema->type; error = ovsdb_datum_from_json(&datum, type, value, NULL); if (!error) { struct ds s; ds_init(&s); ovsdb_datum_to_string(&datum, type, &s); value_string = ds_steal_cstr(&s); } else { ovsdb_error_destroy(error); } } if (!value_string) { value_string = json_to_string(value, JSSF_SORT); } printf("\t\t%s=%s\n", column, value_string); free(value_string); } } if (!old_name || (new_name != old_name && strcmp(old_name, new_name))) { if (old_name) { shash_delete(names, shash_find(names, row_uuid)); free(old_name); } shash_add(names, row_uuid, (new_name ? xstrdup(new_name) : xmemdup0(row_uuid, 8))); } } else if (columns->type == JSON_NULL) { struct shash_node *node; printf("\t\tdelete row\n"); node = shash_find(names, row_uuid); if (node) { shash_delete(names, node); } free(old_name); } if (free_new_name) { free(new_name); } } } } static void do_show_log(int argc, char *argv[]) { const char *db_file_name = argc >= 2 ? argv[1] : default_db(); struct shash names; struct ovsdb_log *log; struct ovsdb_schema *schema; unsigned int i; check_ovsdb_error(ovsdb_log_open(db_file_name, OVSDB_LOG_READ_ONLY, -1, &log)); shash_init(&names); schema = NULL; for (i = 0; ; i++) { struct json *json; check_ovsdb_error(ovsdb_log_read(log, &json)); if (!json) { break; } printf("record %u:", i); if (i == 0) { check_ovsdb_error(ovsdb_schema_from_json(json, &schema)); printf(" \"%s\" schema, version=\"%s\", cksum=\"%s\"\n", schema->name, schema->version, schema->cksum); } else if (json->type == JSON_OBJECT) { struct json *date, *comment; date = shash_find_data(json_object(json), "_date"); if (date && date->type == JSON_INTEGER) { long long int t = json_integer(date); char *s; if (t < INT32_MAX) { /* Older versions of ovsdb wrote timestamps in seconds. */ t *= 1000; } s = xastrftime_msec(" %Y-%m-%d %H:%M:%S.###", t, true); fputs(s, stdout); free(s); } comment = shash_find_data(json_object(json), "_comment"); if (comment && comment->type == JSON_STRING) { printf(" \"%s\"", json_string(comment)); } if (i > 0 && show_log_verbosity > 0) { putchar('\n'); print_db_changes(json_object(json), &names, schema); } } json_destroy(json); putchar('\n'); } ovsdb_log_close(log); ovsdb_schema_destroy(schema); /* XXX free 'names'. */ } static void do_help(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { usage(); } static const struct command all_commands[] = { { "create", 0, 2, do_create }, { "compact", 0, 2, do_compact }, { "convert", 0, 3, do_convert }, { "needs-conversion", 0, 2, do_needs_conversion }, { "db-version", 0, 1, do_db_version }, { "db-cksum", 0, 1, do_db_cksum }, { "schema-version", 0, 1, do_schema_version }, { "schema-cksum", 0, 1, do_schema_cksum }, { "query", 1, 2, do_query }, { "transact", 1, 2, do_transact }, { "show-log", 0, 1, do_show_log }, { "help", 0, INT_MAX, do_help }, { NULL, 0, 0, NULL }, }; static const struct command *get_all_commands(void) { return all_commands; } openvswitch-2.0.1+git20140120/ovsdb/ovsdb.c000066400000000000000000000314661226605124000200620ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ovsdb.h" #include "column.h" #include "json.h" #include "ovsdb-error.h" #include "ovsdb-parser.h" #include "ovsdb-types.h" #include "simap.h" #include "table.h" #include "transaction.h" struct ovsdb_schema * ovsdb_schema_create(const char *name, const char *version, const char *cksum) { struct ovsdb_schema *schema; schema = xzalloc(sizeof *schema); schema->name = xstrdup(name); schema->version = xstrdup(version); schema->cksum = xstrdup(cksum); shash_init(&schema->tables); return schema; } struct ovsdb_schema * ovsdb_schema_clone(const struct ovsdb_schema *old) { struct ovsdb_schema *new; struct shash_node *node; new = ovsdb_schema_create(old->name, old->version, old->cksum); SHASH_FOR_EACH (node, &old->tables) { const struct ovsdb_table_schema *ts = node->data; shash_add(&new->tables, node->name, ovsdb_table_schema_clone(ts)); } return new; } void ovsdb_schema_destroy(struct ovsdb_schema *schema) { struct shash_node *node; if (!schema) { return; } SHASH_FOR_EACH (node, &schema->tables) { ovsdb_table_schema_destroy(node->data); } shash_destroy(&schema->tables); free(schema->name); free(schema->version); free(schema->cksum); free(schema); } struct ovsdb_error * ovsdb_schema_from_file(const char *file_name, struct ovsdb_schema **schemap) { struct ovsdb_schema *schema; struct ovsdb_error *error; struct json *json; *schemap = NULL; json = json_from_file(file_name); if (json->type == JSON_STRING) { error = ovsdb_error("failed to read schema", "\"%s\" could not be read as JSON (%s)", file_name, json_string(json)); json_destroy(json); return error; } error = ovsdb_schema_from_json(json, &schema); json_destroy(json); if (error) { return ovsdb_wrap_error(error, "failed to parse \"%s\" as ovsdb schema", file_name); } *schemap = schema; return NULL; } static struct ovsdb_error * WARN_UNUSED_RESULT ovsdb_schema_check_ref_table(struct ovsdb_column *column, const struct shash *tables, const struct ovsdb_base_type *base, const char *base_name) { struct ovsdb_table_schema *refTable; if (base->type != OVSDB_TYPE_UUID || !base->u.uuid.refTableName) { return NULL; } refTable = shash_find_data(tables, base->u.uuid.refTableName); if (!refTable) { return ovsdb_syntax_error(NULL, NULL, "column %s %s refers to undefined table %s", column->name, base_name, base->u.uuid.refTableName); } if (ovsdb_base_type_is_strong_ref(base) && !refTable->is_root) { /* We cannot allow a strong reference to a non-root table to be * ephemeral: if it is the only reference to a row, then replaying the * database log from disk will cause the referenced row to be deleted, * even though it did exist in memory. If there are references to that * row later in the log (to modify it, to delete it, or just to point * to it), then this will yield a transaction error. */ column->persistent = true; } return NULL; } static bool is_valid_version(const char *s) { int n = -1; ignore(sscanf(s, "%*[0-9].%*[0-9].%*[0-9]%n", &n)); return n != -1 && s[n] == '\0'; } /* Returns the number of tables in 'schema''s root set. */ static size_t root_set_size(const struct ovsdb_schema *schema) { struct shash_node *node; size_t n_root = 0; SHASH_FOR_EACH (node, &schema->tables) { struct ovsdb_table_schema *table = node->data; n_root += table->is_root; } return n_root; } struct ovsdb_error * ovsdb_schema_from_json(struct json *json, struct ovsdb_schema **schemap) { struct ovsdb_schema *schema; const struct json *name, *tables, *version_json, *cksum; struct ovsdb_error *error; struct shash_node *node; struct ovsdb_parser parser; const char *version; *schemap = NULL; ovsdb_parser_init(&parser, json, "database schema"); name = ovsdb_parser_member(&parser, "name", OP_ID); version_json = ovsdb_parser_member(&parser, "version", OP_STRING | OP_OPTIONAL); cksum = ovsdb_parser_member(&parser, "cksum", OP_STRING | OP_OPTIONAL); tables = ovsdb_parser_member(&parser, "tables", OP_OBJECT); error = ovsdb_parser_finish(&parser); if (error) { return error; } if (version_json) { version = json_string(version_json); if (!is_valid_version(version)) { return ovsdb_syntax_error(json, NULL, "schema version \"%s\" not " "in format x.y.z", version); } } else { /* Backward compatibility with old databases. */ version = ""; } schema = ovsdb_schema_create(json_string(name), version, cksum ? json_string(cksum) : ""); SHASH_FOR_EACH (node, json_object(tables)) { struct ovsdb_table_schema *table; if (node->name[0] == '_') { error = ovsdb_syntax_error(json, NULL, "names beginning with " "\"_\" are reserved"); } else if (!ovsdb_parser_is_id(node->name)) { error = ovsdb_syntax_error(json, NULL, "name must be a valid id"); } else { error = ovsdb_table_schema_from_json(node->data, node->name, &table); } if (error) { ovsdb_schema_destroy(schema); return error; } shash_add(&schema->tables, table->name, table); } /* "isRoot" was not part of the original schema definition. Before it was * added, there was no support for garbage collection. So, for backward * compatibility, if the root set is empty then assume that every table is * in the root set. */ if (root_set_size(schema) == 0) { SHASH_FOR_EACH (node, &schema->tables) { struct ovsdb_table_schema *table = node->data; table->is_root = true; } } /* Validate that all refTables refer to the names of tables that exist. * * Also force certain columns to be persistent, as explained in * ovsdb_schema_check_ref_table(). This requires 'is_root' to be known, so * this must follow the loop updating 'is_root' above. */ SHASH_FOR_EACH (node, &schema->tables) { struct ovsdb_table_schema *table = node->data; struct shash_node *node2; SHASH_FOR_EACH (node2, &table->columns) { struct ovsdb_column *column = node2->data; error = ovsdb_schema_check_ref_table(column, &schema->tables, &column->type.key, "key"); if (!error) { error = ovsdb_schema_check_ref_table(column, &schema->tables, &column->type.value, "value"); } if (error) { ovsdb_schema_destroy(schema); return error; } } } *schemap = schema; return NULL; } struct json * ovsdb_schema_to_json(const struct ovsdb_schema *schema) { struct json *json, *tables; struct shash_node *node; bool default_is_root; json = json_object_create(); json_object_put_string(json, "name", schema->name); if (schema->version[0]) { json_object_put_string(json, "version", schema->version); } if (schema->cksum[0]) { json_object_put_string(json, "cksum", schema->cksum); } /* "isRoot" was not part of the original schema definition. Before it was * added, there was no support for garbage collection. So, for backward * compatibility, if every table is in the root set then do not output * "isRoot" in table schemas. */ default_is_root = root_set_size(schema) == shash_count(&schema->tables); tables = json_object_create(); SHASH_FOR_EACH (node, &schema->tables) { struct ovsdb_table_schema *table = node->data; json_object_put(tables, table->name, ovsdb_table_schema_to_json(table, default_is_root)); } json_object_put(json, "tables", tables); return json; } /* Returns true if 'a' and 'b' specify equivalent schemas, false if they * differ. */ bool ovsdb_schema_equal(const struct ovsdb_schema *a, const struct ovsdb_schema *b) { /* This implementation is simple, stupid, and slow, but I doubt that it * will ever require much maintenance. */ struct json *ja = ovsdb_schema_to_json(a); struct json *jb = ovsdb_schema_to_json(b); bool equals = json_equal(ja, jb); json_destroy(ja); json_destroy(jb); return equals; } static void ovsdb_set_ref_table(const struct shash *tables, struct ovsdb_base_type *base) { if (base->type == OVSDB_TYPE_UUID && base->u.uuid.refTableName) { struct ovsdb_table *table; table = shash_find_data(tables, base->u.uuid.refTableName); base->u.uuid.refTable = table; } } struct ovsdb * ovsdb_create(struct ovsdb_schema *schema) { struct shash_node *node; struct ovsdb *db; db = xmalloc(sizeof *db); db->schema = schema; list_init(&db->replicas); list_init(&db->triggers); db->run_triggers = false; shash_init(&db->tables); SHASH_FOR_EACH (node, &schema->tables) { struct ovsdb_table_schema *ts = node->data; shash_add(&db->tables, node->name, ovsdb_table_create(ts)); } /* Set all the refTables. */ SHASH_FOR_EACH (node, &schema->tables) { struct ovsdb_table_schema *table = node->data; struct shash_node *node2; SHASH_FOR_EACH (node2, &table->columns) { struct ovsdb_column *column = node2->data; ovsdb_set_ref_table(&db->tables, &column->type.key); ovsdb_set_ref_table(&db->tables, &column->type.value); } } return db; } void ovsdb_destroy(struct ovsdb *db) { if (db) { struct shash_node *node; /* Remove all the replicas. */ while (!list_is_empty(&db->replicas)) { struct ovsdb_replica *r = CONTAINER_OF(list_pop_back(&db->replicas), struct ovsdb_replica, node); ovsdb_remove_replica(db, r); } /* Delete all the tables. This also deletes their schemas. */ SHASH_FOR_EACH (node, &db->tables) { struct ovsdb_table *table = node->data; ovsdb_table_destroy(table); } shash_destroy(&db->tables); /* The schemas, but not the table that points to them, were deleted in * the previous step, so we need to clear out the table. We can't * destroy the table, because ovsdb_schema_destroy() will do that. */ shash_clear(&db->schema->tables); ovsdb_schema_destroy(db->schema); free(db); } } /* Adds some memory usage statistics for 'db' into 'usage', for use with * memory_report(). */ void ovsdb_get_memory_usage(const struct ovsdb *db, struct simap *usage) { const struct shash_node *node; unsigned int cells = 0; SHASH_FOR_EACH (node, &db->tables) { const struct ovsdb_table *table = node->data; unsigned int n_columns = shash_count(&table->schema->columns); unsigned int n_rows = hmap_count(&table->rows); cells += n_rows * n_columns; } simap_increase(usage, "cells", cells); } struct ovsdb_table * ovsdb_get_table(const struct ovsdb *db, const char *name) { return shash_find_data(&db->tables, name); } void ovsdb_replica_init(struct ovsdb_replica *r, const struct ovsdb_replica_class *class) { r->class = class; } void ovsdb_add_replica(struct ovsdb *db, struct ovsdb_replica *r) { list_push_back(&db->replicas, &r->node); } void ovsdb_remove_replica(struct ovsdb *db OVS_UNUSED, struct ovsdb_replica *r) { list_remove(&r->node); (r->class->destroy)(r); } openvswitch-2.0.1+git20140120/ovsdb/ovsdb.h000066400000000000000000000063121226605124000200570ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_OVSDB_H #define OVSDB_OVSDB_H 1 #include "compiler.h" #include "hmap.h" #include "list.h" #include "shash.h" struct json; struct ovsdb_log; struct ovsdb_session; struct ovsdb_txn; struct simap; struct uuid; /* Database schema. */ struct ovsdb_schema { char *name; char *version; char *cksum; struct shash tables; /* Contains "struct ovsdb_table_schema *"s. */ }; struct ovsdb_schema *ovsdb_schema_create(const char *name, const char *version, const char *cksum); struct ovsdb_schema *ovsdb_schema_clone(const struct ovsdb_schema *); void ovsdb_schema_destroy(struct ovsdb_schema *); struct ovsdb_error *ovsdb_schema_from_file(const char *file_name, struct ovsdb_schema **) WARN_UNUSED_RESULT; struct ovsdb_error *ovsdb_schema_from_json(struct json *, struct ovsdb_schema **) WARN_UNUSED_RESULT; struct json *ovsdb_schema_to_json(const struct ovsdb_schema *); bool ovsdb_schema_equal(const struct ovsdb_schema *, const struct ovsdb_schema *); /* Database. */ struct ovsdb { struct ovsdb_schema *schema; struct list replicas; /* Contains "struct ovsdb_replica"s. */ struct shash tables; /* Contains "struct ovsdb_table *"s. */ /* Triggers. */ struct list triggers; /* Contains "struct ovsdb_trigger"s. */ bool run_triggers; }; struct ovsdb *ovsdb_create(struct ovsdb_schema *); void ovsdb_destroy(struct ovsdb *); void ovsdb_get_memory_usage(const struct ovsdb *, struct simap *usage); struct ovsdb_table *ovsdb_get_table(const struct ovsdb *, const char *); struct json *ovsdb_execute(struct ovsdb *, const struct ovsdb_session *, const struct json *params, long long int elapsed_msec, long long int *timeout_msec); /* Database replication. */ struct ovsdb_replica { struct list node; /* Element in "struct ovsdb" replicas list. */ const struct ovsdb_replica_class *class; }; struct ovsdb_replica_class { struct ovsdb_error *(*commit)(struct ovsdb_replica *, const struct ovsdb_txn *, bool durable); void (*destroy)(struct ovsdb_replica *); }; void ovsdb_replica_init(struct ovsdb_replica *, const struct ovsdb_replica_class *); void ovsdb_add_replica(struct ovsdb *, struct ovsdb_replica *); void ovsdb_remove_replica(struct ovsdb *, struct ovsdb_replica *); #endif /* ovsdb/ovsdb.h */ openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/000077500000000000000000000000001226605124000213145ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/.gitignore000066400000000000000000000000161226605124000233010ustar00rootroot00000000000000/ovsdbmonitor openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/COPYING000066400000000000000000000010661226605124000223520ustar00rootroot00000000000000Copyright (c) 2010 Citrix Systems, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/ConfigWindow.ui000066400000000000000000000127131226605124000242540ustar00rootroot00000000000000 ConfigWindow 0 0 386 303 Qt::TabFocus OVSDB Monitor Configuration 0 Hosts 10 10 341 194 Add Edit Delete Qt::Vertical 20 40 Logging Whether to log traffic exchanges in the log window Log traffic Qt::Vertical 20 164 View Replaces UUIDs with a shorter string of the first few characters. The tooltip still contains the full value Truncate UUIDs Qt::Vertical 20 164 Qt::Horizontal 40 20 QDialogButtonBox::Apply|QDialogButtonBox::Cancel|QDialogButtonBox::Ok hostList hostAddButton hostEditButton hostDeleteButton buttonBox tabWidget openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/FlowWindow.ui000066400000000000000000000134561226605124000237630ustar00rootroot00000000000000 FlowWindow 0 0 800 600 OVSDB Monitor 0 Awaiting update... Server-side grep true 20 QComboBox::NoInsert 32 Save Delete Qt::Horizontal 40 20 Host hostComboBox QComboBox::AdjustToContents Auto-refetch every s 1 1000000 Qt::Horizontal 40 20 Refetches the datapath names and rebuilds the window tabs to reflect them. Use when the network has been reconfigured, e.g. a bond has been created Refetch Datapath List Refetch Qt::Horizontal 0 0 800 28 File Show Log New DB Window Preferences Quit New Flow Window openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/HostWindow.ui000066400000000000000000000071341226605124000237650ustar00rootroot00000000000000 HostWindow Qt::WindowModal 0 0 400 300 0 0 Host Properties Host name or IP hostAddressEdit 256 0 SSH Password hostPasswordEdit 256 0 QLineEdit::Password Connect target hostConnectTarget 256 0 Qt::Horizontal QDialogButtonBox::Cancel|QDialogButtonBox::Ok hostAddressEdit hostPasswordEdit buttonBox buttonBox accepted() HostWindow accept() 248 254 157 274 buttonBox rejected() HostWindow reject() 316 260 286 274 openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/LogWindow.ui000066400000000000000000000031411226605124000235630ustar00rootroot00000000000000 LogWindow 0 0 735 558 OVSDB Monitor Log Qt::Horizontal QDialogButtonBox::Close|QDialogButtonBox::Reset buttonBox accepted() LogWindow accept() 248 254 157 274 buttonBox rejected() LogWindow reject() 316 260 286 274 openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/MainWindow.ui000066400000000000000000000167561226605124000237460ustar00rootroot00000000000000 MainWindow 0 0 800 600 OVSDB Monitor 0 Bridge Controller Interface Mirror NetFlow Open_vSwitch Port QoS Queue sFlow SSL Host hostComboBox QComboBox::AdjustToContents Auto-refetch every s 1 1000000 Qt::Horizontal 40 20 Refetch 0 0 800 28 File Show Log New DB Window Preferences Quit New Flow Window openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/OVEApp.py000066400000000000000000000070401226605124000227610ustar00rootroot00000000000000# Copyright (c) 2010 Citrix Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from OVEStandard import * from OVEConfig import * from OVEFetch import * from OVEConfigWindow import * from OVEFlowWindow import * from OVELogWindow import * from OVEMainWindow import * class OVEApp: def __init__(self): self.app = globalApp self.app.setOrganizationName("Citrix_Systems_Inc") self.app.setOrganizationDomain("citrix.com") self.app.setApplicationName("ovsdbmonitor") self.mainWindows = [] self.flowWindows = [] self.configWindow = None def enter(self): if len(OVEConfig.Inst().hosts) < 1: self.showConfig(True) QtGui.QMessageBox.information( None, "OVSDB Monitor", "This application browses openvswitch databases on remote hosts. Please add one or more openvswitch hosts to continue") self.loadMainWindows() self.loadFlowWindows() if len(self.mainWindows) == 0 and len(self.flowWindows) == 0: self.newMainWindow() self.newLogWindow() # Reactor must be started after the event loop is running, so use a zero timeout QtCore.QTimer.singleShot(0, OVEFetch.startReactor) OVELog("Application started") retCode = self.app.exec_() index = 0 for mainWindow in self.mainWindows: if mainWindow.isVisible(): mainWindow.saveSettings(index) index += 1 # Indent intentional OVEMainWindow.terminateSettings(index) index = 0 for flowWindow in self.flowWindows: if flowWindow.isVisible(): flowWindow.saveSettings(index) index += 1 # Indent intentional OVEFlowWindow.terminateSettings(index) self.logWindow.saveSettings() def quit(self): self.app.quit() def showLog(self, value): if value: self.logWindow.hide() self.logWindow.show() else: self.logWindow.hide() def showConfig(self, value): if value: del self.configWindow self.configWindow = OVEConfigWindow(self) self.configWindow.show() else: self.configWindow.hide() def newMainWindow(self, loadIndex = None): self.mainWindows.append(OVEMainWindow(self, loadIndex)) self.mainWindows[-1].show() def newFlowWindow(self, loadIndex = None): self.flowWindows.append(OVEFlowWindow(self, loadIndex)) self.flowWindows[-1].show() def newLogWindow(self): self.logWindow = OVELogWindow(self) def loadMainWindows(self): for loadIndex in range(0, 100): if OVEMainWindow.isLoadable(loadIndex): self.newMainWindow(loadIndex) else: break def loadFlowWindows(self): for loadIndex in range(0, 100): if OVEFlowWindow.isLoadable(loadIndex): self.newFlowWindow(loadIndex) else: break openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/OVECommonWindow.py000066400000000000000000000222451226605124000246650ustar00rootroot00000000000000# Copyright (c) 2010 Citrix Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from OVEStandard import * from OVEConfig import * from OVEFetch import * from OVELogger import * from OVEUtil import * from Ui_MainWindow import * class OVECommonWindow: def __init__(self, app, loadIndex = None): self.app = app self.intervalTimerId = None self.hostUuid = '' self.intervalChecked = True self.intervalSeconds = 5 self.fetchSkip = 0 self.currentRef = self.BASE_REF self.ui.setupUi(self) if loadIndex is not None: self.loadSettings(loadIndex) self.connect(self.ui.actionNew_DB_Window, QtCore.SIGNAL("triggered()"), self.xon_actionNew_DB_Window_triggered) self.connect(self.ui.actionNew_Flow_Window, QtCore.SIGNAL("triggered()"), self.xon_actionNew_Flow_Window_triggered) self.connect(self.ui.actionShow_Log, QtCore.SIGNAL("triggered()"), self.xon_actionShow_Log_triggered) self.connect(self.ui.actionPreferences, QtCore.SIGNAL("triggered()"), self.xon_actionPreferences_triggered) self.connect(self.ui.actionQuit, QtCore.SIGNAL("triggered()"), self.xon_actionQuit_triggered) self.connect(self.ui.fetchButton, QtCore.SIGNAL("clicked()"), self.xon_fetchButton_clicked) self.connect(self.ui.tabWidget, QtCore.SIGNAL("currentChanged(int)"), self.xon_tabWidget_currentChanged) self.connect(self.ui.hostComboBox, QtCore.SIGNAL("currentIndexChanged(int)"), self.xon_hostComboBox_currentIndexChanged) self.connect(self.ui.intervalCheckBox, QtCore.SIGNAL("stateChanged(int)"), self.xon_intervalCheckBox_stateChanged) self.connect(self.ui.intervalSpinBox, QtCore.SIGNAL("valueChanged(int)"), self.xon_intervalSpinBox_valueChanged) self.connect(OVEConfig.Inst(), QtCore.SIGNAL("configUpdated()"), self.xon_configUpdated) self.updateHosts() self.updateInterval() self.updateIntervalState() self.updateTable() def xon_actionNew_DB_Window_triggered(self): self.app.newMainWindow() def xon_actionNew_Flow_Window_triggered(self): self.app.newFlowWindow() def xon_actionShow_Log_triggered(self): self.app.showLog(True) def xon_actionPreferences_triggered(self): self.app.showConfig(True) def xon_actionQuit_triggered(self): self.app.quit() def xon_tabWidget_currentChanged(self, value): self.updateTable() def xon_fetchButton_clicked(self): self.updateTable() def xon_configUpdated(self): self.updateHosts() def xon_hostComboBox_currentIndexChanged(self, index): if (index >= 0): itemData = self.ui.hostComboBox.itemData(index) self.hostUuid = str(itemData.toString()) self.deleteCurrentTable() self.updateTable() def xon_intervalCheckBox_stateChanged(self, state): self.intervalChecked = (state == Qt.Checked) self.updateIntervalState() def xon_intervalSpinBox_valueChanged(self, value): self.intervalSeconds = value self.updateIntervalState() def updateIntervalState(self): if self.intervalTimerId is not None: self.killTimer(self.intervalTimerId) if self.intervalChecked: self.intervalTimerId = self.startTimer(1000*self.intervalSeconds) def updateHosts(self): currentHostUuid = self.hostUuid # self.hostUuid will change due to currentIndexChanged events as we rebuild the combo box self.hostUuid = '' self.ui.hostComboBox.clear() for i, host in enumerate(OVEConfig.Inst().hosts): self.ui.hostComboBox.addItem(host['address'], QVariant(host['uuid'])) if host['uuid'] == currentHostUuid: # This is the currently selected host self.ui.hostComboBox.setCurrentIndex(i) if len(OVEConfig.Inst().hosts) == 0: self.ui.hostComboBox.addItem('(No hosts configured)', QVariant('')) def updateInterval(self): self.ui.intervalCheckBox.setChecked(self.intervalChecked) self.ui.intervalSpinBox.setValue(self.intervalSeconds) def handleFetchEvent(self, ref, values): OVELog('Unhandled FetchEvent') def handleFetchFailEvent(self, ref, message): OVELog('Unhandled FetchFailEvent') def setFetchSkip(self): # Call before sending a request via OVEFetch self.fetchSkip = 6 def timerEvent(self, event): if event.timerId() == self.intervalTimerId: if self.fetchSkip > 0: self.statusBar().showMessage('Fetch stalled... resend in '+str(self.fetchSkip*self.intervalSeconds)+'s') self.fetchSkip -= 1 if self.fetchSkip == 0: # Stall has timed out. The connection might have hung so reset. Seems to happen with PySide only OVEFetch.Inst(self.hostUuid).resetTransport() else: self.updateTable() else: QtGui.QMainWindow.timerEvent(self, event) def customEvent(self, event): if event.type() == OVEFetchEvent.TYPE: if isinstance(event, OVEFetchEvent): # The right way to get data ref = event.ref values = event.data else: # Workaround for PySide issue ref = OVEFetch.Inst(self.hostUuid).snoopRef(self) values = OVEFetch.Inst(self.hostUuid).snoopValues(self) try: if ref == self.currentRef: self.fetchSkip = 0 self.currentRef += 1 # PySide workaround self.handleFetchEvent(ref, values) else: # If refs don't match this event relates to a request before the current one. We've moved # on since then, e.g. changed the table we've viewing, so ignore it if OVEConfig.Inst().logTraffic: OVELog('FetchEvent ref mismatch '+str(ref)+' != '+str(self.currentRef)) except Exception, e: OVELog("Error during data handling: "+str(e)) elif event.type() == OVEFetchFailEvent.TYPE: if isinstance(event, OVEFetchFailEvent): # The right way to get data ref = event.ref message = event.message else: # Workaround for PySide issue ref = OVEFetch.Inst(self.hostUuid).snoopRef(self) message = OVEFetch.Inst(self.hostUuid).snoopMessage(self) if message is not None: OVELog(message) if ref == self.currentRef: self.fetchSkip = 0 self.currentRef += 1 # PySide workaround self.handleFetchFailEvent(ref, message) else: if OVEConfig.Inst().logTraffic: OVELog('FetchFailEvent ref mismatch '+str(ref)+' != '+str(self.currentRef)) def deleteCurrentTable(self): pass def saveSettings(self, index): key = self.LOAD_KEY+str(index) settings = QtCore.QSettings() settings.setValue(key+"/loadable", QVariant(True)) settings.setValue(key+"/pos", QVariant(self.pos())) settings.setValue(key+"/size", QVariant(self.size())) settings.setValue(key+"/hostUuid", QVariant(self.hostUuid)) settings.setValue(key+"/intervalChecked", QVariant(self.intervalChecked)) settings.setValue(key+"/intervalSeconds", QVariant(self.intervalSeconds)) return settings, key def loadSettings(self, index): key = self.LOAD_KEY+str(index) settings = QtCore.QSettings() pos = settings.value(key+"/pos", QVariant(QtCore.QPoint(200, 200))).toPoint() size = settings.value(key+"/size", QVariant(QtCore.QSize(400, 400))).toSize(); self.hostUuid = str(settings.value(key+"/hostUuid", QVariant('Unloaded')).toString()) self.intervalChecked = settings.value(key+"/intervalChecked", QVariant(True)).toBool() self.intervalSeconds = settings.value(key+"/intervalSeconds", QVariant(5)).toInt()[0] self.resize(size) self.move(pos) return settings, key @classmethod def terminateSettings(self, index): key = self.LOAD_KEY+str(index) settings = QtCore.QSettings() settings.setValue(key+"/loadable", QVariant(False)) settings.sync() @classmethod def isLoadable(cls, index): key = cls.LOAD_KEY+str(index) settings = QtCore.QSettings() return settings.value(key+"/loadable", QVariant(False)).toBool() openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/OVEConfig.py000066400000000000000000000067001226605124000234500ustar00rootroot00000000000000# Copyright (c) 2011 Nicira, Inc. # Copyright (c) 2010 Citrix Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from OVEStandard import * from OVELogger import * import ovs.json def str_recursive(x): t = type(x) if t == unicode: return str(x) elif t == list: return [str_recursive(_) for _ in x] elif t == dict: out = {} for k,v in x.iteritems(): out[str_recursive(k)] = str_recursive(v) return out else: return x class OVEConfig(QtCore.QObject): instance = None def __init__(self): QtCore.QObject.__init__(self) self.hosts = [] self.logTraffic = True self.truncateUuids = True self.ssgList = [] @classmethod def Inst(cls): if cls.instance is None: cls.instance = OVEConfig() cls.instance.loadConfig() return cls.instance def hostFromUuid(self, uuid): for host in self.hosts: if host['uuid'] == uuid: return host OVELog("+++ Couldn't find host '"+str(uuid)+"' in "+str([x['uuid'] for x in self.hosts])) return None def saveConfig(self): settings = QtCore.QSettings() settings.setValue('config/hosts', QVariant(ovs.json.to_string((self.hosts)))) settings.setValue('config/logTraffic', QVariant(self.logTraffic)) settings.setValue('config/truncateUuids', QVariant(self.truncateUuids)) settings.setValue('config/ssgList', QVariant(ovs.json.to_string(self.ssgList))) settings.sync() self.emitUpdated() def loadConfig(self): settings = QtCore.QSettings() jsonText = unicode(settings.value('config/hosts', QVariant('[]')).toString()) self.hosts = str_recursive(ovs.json.from_string(str(jsonText))) self.logTraffic = settings.value('config/logTraffic', QVariant(False)).toBool() self.truncateUuids = settings.value('config/truncateUuids', QVariant(False)).toBool() jsonText = unicode(settings.value('config/ssgList', QVariant('[]')).toString()) self.ssgList = ovs.json.from_string(str(jsonText)) if len(self.ssgList) == 0: self.ssgList = [ r'in_port0000', r'in_port0001', r'in_port0002', r'in_port0003', r'vlan65535', r'type0800', r'type0806', r'proto0', r'proto6', r'proto17', r'ff:ff:ff:ff:ff:ff', r'!ff:ff:ff:ff:ff:ff', r'0\.0\.0\.0', r'!0\.0\.0\.0', r'255\.255\.255\.255', r'!255\.255\.255\.255', r'never', r'drop', r'!never', r'!drop', r'(never|drop)', r'!(never|drop)' ] def emitUpdated(self): self.emit(QtCore.SIGNAL("configUpdated()")) openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/OVEConfigWindow.py000066400000000000000000000124261226605124000246420ustar00rootroot00000000000000# Copyright (c) 2010 Citrix Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from OVEStandard import * from OVEConfig import * from OVELogger import * from Ui_ConfigWindow import * from OVEHostWindow import * class OVEConfigWindow(QtGui.QDialog): def __init__(self, app): QtGui.QDialog.__init__(self) self.app = app self.ui = Ui_ConfigWindow() self.ui.setupUi(self) self.connect(self.ui.hostAddButton, QtCore.SIGNAL("clicked()"), self.xon_hostAddButton_clicked) self.connect(self.ui.hostEditButton, QtCore.SIGNAL("clicked()"), self.xon_hostEditButton_clicked) self.connect(self.ui.hostDeleteButton, QtCore.SIGNAL("clicked()"), self.xon_hostDeleteButton_clicked) self.connect(self.ui.buttonBox, QtCore.SIGNAL("clicked(QAbstractButton *)"), self.xon_actionButton_Box_clicked) self.connect(self.ui.hostList, QtCore.SIGNAL("currentItemChanged(QListWidgetItem *, QListWidgetItem *)"), self.xon_hostList_currentItemChanged) self.connect(self.ui.logTrafficCheckBox, QtCore.SIGNAL("stateChanged(int)"), self.xon_logTrafficCheckBox_stateChanged) self.connect(self.ui.truncateUuidsCheckBox, QtCore.SIGNAL("stateChanged(int)"), self.xon_truncateUuidsCheckBox_stateChanged) self.readConfig() self.updateWidgets() def handleHostWindowRecord(self, record, isEdit): if record['accepted'] and record['address'].strip() != '': currentRow = self.ui.hostList.currentRow() if isEdit: self.configHosts[currentRow] = record else: self.configHosts.append(record) self.updateWidgets() def xon_hostAddButton_clicked(self): hostWindow = OVEHostWindow(self) hostWindow.exec_() self.handleHostWindowRecord(hostWindow.record(), False) def xon_hostEditButton_clicked(self): if self.ui.hostList.currentItem() is None: pass # OVELog('No item to edit') else: currentRow = self.ui.hostList.currentRow() hostWindow = OVEHostWindow(self, self.configHosts[currentRow]) hostWindow.exec_() self.handleHostWindowRecord(hostWindow.record(), True) def xon_hostDeleteButton_clicked(self): if self.ui.hostList.currentItem() is not None: currentRow = self.ui.hostList.currentRow() del self.configHosts[currentRow] self.updateWidgets() def xon_actionButton_Box_clicked(self, button): role = self.ui.buttonBox.buttonRole(button) if role == QtGui.QDialogButtonBox.AcceptRole: self.writeConfig() self.close() elif role == QtGui.QDialogButtonBox.ApplyRole: self.writeConfig() elif role == QtGui.QDialogButtonBox.RejectRole: if self.configChanged(): self.close() else: ret = QtGui.QMessageBox.warning( self, "OVSDB Monitor", "Changes not applied. Discard?", QtGui.QMessageBox.Discard | QtGui.QMessageBox.Cancel | QtGui.QMessageBox.Apply, QtGui.QMessageBox.Discard) if ret == QtGui.QMessageBox.Apply: self.writeConfig() if ret != QtGui.QMessageBox.Cancel: self.close() def xon_hostList_currentItemChanged(self, current, previous): editable = (current is not None) self.ui.hostEditButton.setEnabled(editable) self.ui.hostDeleteButton.setEnabled(editable) def xon_logTrafficCheckBox_stateChanged(self, value): self.configLogTraffic = (value == Qt.Checked) def xon_truncateUuidsCheckBox_stateChanged(self, value): self.configTruncateUuids = (value == Qt.Checked) def updateWidgets(self): self.ui.hostList.clear() for host in self.configHosts: self.ui.hostList.addItem(host['address']) self.ui.logTrafficCheckBox.setChecked(self.configLogTraffic) self.ui.truncateUuidsCheckBox.setChecked(self.configTruncateUuids) def configChanged(self): return ( (self.configHosts == OVEConfig.Inst().hosts) and (self.configLogTraffic == (OVEConfig.Inst().logTraffic))and (self.configTruncateUuids == (OVEConfig.Inst().truncateUuids)) ) def readConfig(self): self.configHosts = deepcopy(OVEConfig.Inst().hosts) self.configLogTraffic = OVEConfig.Inst().logTraffic self.configTruncateUuids = OVEConfig.Inst().truncateUuids def writeConfig(self): OVEConfig.Inst().hosts = deepcopy(self.configHosts) OVEConfig.Inst().logTraffic = self.configLogTraffic OVEConfig.Inst().truncateUuids = self.configTruncateUuids OVEConfig.Inst().saveConfig() openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/OVEFetch.py000066400000000000000000000412631226605124000232770ustar00rootroot00000000000000# Copyright (c) 2011 Nicira, Inc. # Copyright (c) 2010 Citrix Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from OVEStandard import * from OVEConfig import * from OVELogger import * import ovs.json # This sequence installs the qt4reactor before twisted gets a chance to install its reactor import qt4reactor globalApp = QtGui.QApplication([]) qt4reactor.install() try: from twisted.conch.ssh import transport, userauth, connection, common, keys, channel from twisted.internet import defer, protocol, reactor from twisted.application import reactors except Exception, e: print('+++ Python Twisted Conch module is required\n') raise class OVEFetchUserAuth(userauth.SSHUserAuthClient): def __init__(self, fetch, *params): userauth.SSHUserAuthClient.__init__(self, *params) self.fetch = fetch self.authFails = 0 def getPassword(self): return defer.succeed(self.fetch.config()['password']) def ssh_USERAUTH_FAILURE(self, packet): if self.authFails > 0: # We normally get one so ignore. Real failures send these repeatedly OVELog('Authentication failure for '+self.fetch.config()['address']) self.authFails += 1 userauth.SSHUserAuthClient.ssh_USERAUTH_FAILURE(self, packet) class OVEFetchConnection(connection.SSHConnection, QtCore.QObject): def __init__(self, fetch, *params): connection.SSHConnection.__init__(self, *params) QtCore.QObject.__init__(self) self.fetch = fetch self._channel = None self._oldChannels = [] def serviceStarted(self): self.emit(QtCore.SIGNAL('connectionService(QObject)'), self) def serviceStopped(self): self.emit(QtCore.SIGNAL('connectionService(QObject)'), None) def execCommand(self, requester, ref, command, commandType): if self._channel is not None: # Don't delete old channels immediately in case they're e.g. going to time out with a failure self._oldChannels.append(self._channel) if len(self._oldChannels) > 90: # For 30 second timeouts at 1 second refresh interval and three windows open on a single host, need 90 channels del self._oldChannels[1] self._channel = OVECommandChannel(self.fetch, requester, ref, command, commandType, 2**16, 2**15, self) self.openChannel(self._channel) def connectionLost(self, reason): if self._channel is not None: self._channel.connectionLost(reason) class OVEFetchTransport(transport.SSHClientTransport, QtCore.QObject): def __init__(self, fetch, *params): # There is no __init__ method for this class # transport.SSHClientTransport.__init__(self, *params) QtCore.QObject.__init__(self) self.fetch = fetch self._connection = None self.connect(self, QtCore.SIGNAL('channelFailure(QObject, int, QString, QString, QString)'), self.fetch.xon_channelFailure) def verifyHostKey(self, hostKey, fingerprint): return defer.succeed(1) def connectionSecure(self): self._connection = OVEFetchConnection(self.fetch) QtCore.QObject.connect(self._connection, QtCore.SIGNAL('connectionService(QObject)'), self.fetch.xon_connectionService) self.requestService( OVEFetchUserAuth(self.fetch, self.fetch.config().get('username', 'root'), self._connection)) def connectionLost(self, reason): if self._connection is not None: self._connection.connectionLost(reason) class OVEFetchWrapper: def __init__(self, contents): self.contents = contents class OVECommandChannel(channel.SSHChannel, QtCore.QObject): name = 'session' MSEC_TIMEOUT=10000 STATUS_CONNECTION_LOST = 100001 STATUS_TIMEOUT = 100002 END_MARKER='END-MARKER' END_MARKER_RE=re.compile(r'^END-MARKER$', re.MULTILINE) def __init__(self, fetch, requester, ref, command, commandType, *params): channel.SSHChannel.__init__(self, *params) QtCore.QObject.__init__(self) self.fetch = fetch self.requester = requester self.ref = ref self.command = command self.commandType= commandType self._data = '' self._extData = '' self._jsonValues = None self._timerId = None self._status = None self.connect(self, QtCore.SIGNAL('channelData(QObject, int, QString)'), self.fetch.xon_channelData) self.connect(self, QtCore.SIGNAL('channelExtData(QObject, int, QString)'), self.fetch.xon_channelExtData) self.connect(self, QtCore.SIGNAL('channelSuccess(QObject, int, QString, QString, QVariant)'), self.fetch.xon_channelSuccess) self.connect(self, QtCore.SIGNAL('channelFailure(QObject, int, QString, QString, QString)'), self.fetch.xon_channelFailure) def openFailed(self, reason): if self._timerId is not None: self.killTimer(self._timerId) self.emit(QtCore.SIGNAL('channelFailure(QObject, int, QString, QString, QString)'), self.requester, self.ref, 'Open failed:'+str(reason), '', '') def channelOpen(self, ignoredData): try: nsCommand = common.NS(str(self.command)) self._timerId = self.startTimer(self.MSEC_TIMEOUT) self.conn.sendRequest(self, 'exec', nsCommand, wantReply=1) except Exception, e: self.emit(QtCore.SIGNAL('channelFailure(QObject, int, QString, QString, QString)'), self.requester, self.ref, 'Open failed:'+str(e), self._data, self._extData) def dataReceived(self, data): self._data += data if OVEConfig.Inst().logTraffic: self.emit(QtCore.SIGNAL('channelData(QObject, int, QString)'), self.requester, self.ref, data) self.testIfDone() def extDataReceived(self, extData): self._extData += extData if OVEConfig.Inst().logTraffic: self.emit(QtCore.SIGNAL('channelExtData(QObject, int, QString)'), self.requester, self.ref, extData) def request_exit_status(self, data): # We can get the exit status before the data, so delay calling sendResult until we get both self._status = struct.unpack('>L', data)[0] self.testIfDone() def testIfDone(self): if self._status is not None: if self._status != 0: self.sendResult() # Failed, so send what we have elif len(self._data) > 0: # Status == success and we have some data if self.commandType == 'JSON': try: # Decode the JSON data, to confirm that we have all of the data self._jsonValues = ovs.json.from_string(str(self._data)) # FIXME: Should handle unicode self.sendResult() except: pass # Wait for more data elif self.commandType == 'framed': match = self.END_MARKER_RE.search(self._data) if match: self._data = self._data[:match.start()] # Remove end marker self.sendResult() else: OVELog('Bad command type') def sendResult(self): if self._timerId is not None: self.killTimer(self._timerId) if self.commandType == 'JSON' and self._status == 0 and self._jsonValues is not None: self.emit(QtCore.SIGNAL('channelSuccess(QObject, int, QString, QString, QVariant)'), self.requester, self.ref, self._data, self._extData, QVariant(OVEFetchWrapper(self._jsonValues))) elif self.commandType != 'JSON' and self._status == 0: self.emit(QtCore.SIGNAL('channelSuccess(QObject, int, QString, QString, QVariant)'), self.requester, self.ref, self._data, self._extData, QVariant(None)) else: self.emit(QtCore.SIGNAL('channelFailure(QObject, int, QString, QString, QString)'), self.requester, self.ref, 'Remote command failed (rc='+str(self._status)+')', self._data, self._extData) if self._status != self.STATUS_CONNECTION_LOST: try: self.loseConnection() except Exception, e: OVELog('OVECommandChannel.sendResult loseConnection error: '+str(e)) def connectionLost(self, reason): self._extData += '+++ Connection lost' self._status = self.STATUS_CONNECTION_LOST self.sendResult() def timerEvent(self, event): if event.timerId() == self._timerId: self._extData += '+++ Timeout' self._status = self.STATUS_TIMEOUT self.sendResult() else: QtCore.QObject.timerEvent(self, event) class OVEFetchEvent(QtCore.QEvent): TYPE = QtCore.QEvent.Type(QtCore.QEvent.registerEventType()) def __init__(self, ref, data): QtCore.QEvent.__init__(self, self.TYPE) self.ref = ref self.data = data class OVEFetchFailEvent(QtCore.QEvent): TYPE = QtCore.QEvent.Type(QtCore.QEvent.registerEventType()) def __init__(self, ref, message): QtCore.QEvent.__init__(self, self.TYPE) self.ref = ref self.message = str(message) class OVEFetch(QtCore.QObject): instances = {} SEC_TIMEOUT = 10.0 def __init__(self, uuid): QtCore.QObject.__init__(self) self._hostUuid = uuid self._config = None self._transport = None self._connection = None self._commandQueue = [] self._timerRef = 0 self.refs = {} self.messages = {} self.values = {} self.connect(OVEConfig.Inst(), QtCore.SIGNAL("configUpdated()"), self.xon_configUpdated) @classmethod def Inst(cls, uuid): if uuid not in cls.instances: cls.instances[uuid] = OVEFetch(uuid) return cls.instances[uuid] @classmethod def startReactor(cls): reactor.runReturn() def xon_configUpdated(self): self._config = None self.resetTransport() def xon_connectionService(self, connection): self._connection = connection if self._connection is not None: OVELog('SSH connection to '+self.config()['address'] +' established') for command in self._commandQueue: # OVELog('Unqueueing '+str(command)) self.execCommand2(*command) self._commandQueue = [] def xon_channelData(self, requester, ref, data): if OVEConfig.Inst().logTraffic: OVELog('Channel data received: '+str(data)) def xon_channelExtData(self, requester, ref, data): if OVEConfig.Inst().logTraffic: OVELog('+++ Channel extData (stderr) received: '+str(data)) def xon_channelFailure(self, requester, ref, message, data, extData): if OVEConfig.Inst().logTraffic: OVELog('+++ Channel failure: '+str(message)) OVELog("Closing SSH session due to failure") errMessage = message if len(data) > 0: errMessage += '\n+++ Failed command output: '+data if len(extData) > 0: errMessage += '\n+++ Failed command output (stderr): '+extData self.refs[requester] = ref # For PySide workaround self.messages[requester] = errMessage # For PySide workaround event = OVEFetchFailEvent(ref, errMessage) QtCore.QCoreApplication.postEvent(requester, event) self.resetTransport() def xon_channelSuccess(self, requester, ref, data, extData, jsonValueVariant): jsonValues = jsonValueVariant.toPyObject() if OVEConfig.Inst().logTraffic: OVELog('--- Channel success') try: if jsonValues is not None: values = jsonValues.contents else: values = str(data) self.refs[requester] = ref # For PySide workaround self.values[requester] = values # For PySide workaround event = OVEFetchEvent(ref, values) QtCore.QCoreApplication.postEvent(requester, event) except Exception, e: message = ('+++ Failed to decode JSON reply: '+str(e)) if len(data) > 0: message += "\n++++++ Data (stdout): "+str(data) if len(extData) > 0: message += '\n++++++ Error (stderr): '+str(extData) self.refs[requester] = ref # For PySide workaround self.messages[requester] = message # For PySide workaround event = OVEFetchFailEvent(ref, message) QtCore.QCoreApplication.postEvent(requester, event) # Use for workaround only def snoopRef(self, requester): return self.refs.get(requester, None) # Use for workaround only def snoopValues(self, requester): return self.values.get(requester, None) # Use for workaround only def snoopMessage(self, requester): return self.messages.get(requester, None) def config(self): if self._config is None: self._config = OVEConfig.Inst().hostFromUuid(self._hostUuid) return self._config def resetTransport(self): if OVEConfig.Inst().logTraffic: OVELog('Transport reset for '+self.config()['address']) del self._connection del self._transport self._connection = None self._transport = None def transportErrback(self, failure, requester, ref, address): self._timerRef += 1 # Prevent timeout handling self.resetTransport() message = 'Failure connecting to '+address+': '+failure.getErrorMessage() self.refs[requester] = ref # For PySide workaround self.messages[requester] = message # For PySide workaround event = OVEFetchFailEvent(ref, message) QtCore.QCoreApplication.postEvent(requester, event) def transportTimeout(self, timerRef, requester, ref, address): if self._timerRef == timerRef and self._transport is not None and self._connection is None: message = 'Connection attempt to ' +address+' timed out' self.refs[requester] = ref # For PySide workaround self.messages[requester] = message # For PySide workaround event = OVEFetchFailEvent(ref, message) QtCore.QCoreApplication.postEvent(requester, event) self.resetTransport() def execCommand(self, requester, ref, command, commandType): if OVEConfig.Inst().logTraffic: hostName = (self.config() or {}).get('address', '
    ') OVELog(str(QtCore.QTime.currentTime().toString())+' '+hostName+': Executing '+command) if self._transport is None: self._connection = None self._commandQueue.append((requester, ref, command, commandType)) config = self.config() creator = protocol.ClientCreator(reactor, OVEFetchTransport, self) self._transport = creator.connectTCP(config['address'], config.get('port', 22), timeout = self.SEC_TIMEOUT) self._transport.addErrback(self.transportErrback, requester, ref, config['address']) self._timerRef += 1 # Set this timer slightly longer than the twisted.conch timeout, as transportErrback can cancel # the timeout and prevent double handling # lambda timerRef = self._timerRef: takes a copy of self._timerRef QtCore.QTimer.singleShot(int((1+self.SEC_TIMEOUT) * 1000), lambda timerRef = self._timerRef: self.transportTimeout(timerRef, requester, ref, config['address'])) else: self.execCommand2(requester, ref, command, commandType) def execCommand2(self, requester, ref, command, commandType): if self._connection is None: self._commandQueue.append((requester, ref, command, commandType)) else: self._connection.execCommand(requester, ref, command, commandType) def getTable(self, requester, tableName, ref = QtCore.QObject()): command = '/usr/bin/ovsdb-client transact '+self.config()['connectTarget']+' \'["Open_vSwitch", {"op":"select","table":"'+tableName+'", "where":[]}]\'' self.execCommand(requester, ref, command, 'JSON') def execCommandFramed(self, requester, ref, command): self.execCommand(requester, ref, command + ' && echo ' + OVECommandChannel.END_MARKER, 'framed') openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/OVEFlowWindow.py000066400000000000000000000326701226605124000243470ustar00rootroot00000000000000# Copyright (c) 2011 Nicira, Inc. # Copyright (c) 2010 Citrix Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from OVEStandard import * from OVEConfig import * from OVEFetch import * from OVELogger import * from OVEUtil import * from OVECommonWindow import * from Ui_FlowWindow import * import re class OVEFlowWindow(QtGui.QMainWindow, OVECommonWindow): LOAD_KEY = 'FlowWindow/window' COMMAND_OVS_DPCTL='/usr/bin/ovs-dpctl' BASE_REF=200000 def __init__(self, app, loadIndex = None): QtGui.QMainWindow.__init__(self) self.ui = Ui_FlowWindow() self.dpNames = [] self.dpTables = [] self.currentOpIndex = None self.resizeCount = [] self.ssgChecked = False self.ssgText = '' self.lastTime = None self.lastByteCount = 0 OVECommonWindow.__init__(self, app, loadIndex) self.updateSsgList() self.updateDatapaths() self.updateSsgState() self.connect(self.ui.fetchPathsButton, QtCore.SIGNAL("clicked()"), self.xon_fetchPathsButton_clicked) self.connect(self.ui.ssgSaveButton, QtCore.SIGNAL("clicked()"), self.xon_ssgSaveButton_clicked) self.connect(self.ui.ssgDeleteButton, QtCore.SIGNAL("clicked()"), self.xon_ssgDeleteButton_clicked) self.connect(self.ui.ssgComboBox, QtCore.SIGNAL("activated(int)"), self.xon_ssgComboBox_activated) self.connect(self.ui.ssgComboBox, QtCore.SIGNAL("editTextChanged(QString)"), self.xon_ssgComboBox_editTextChanged) self.connect(self.ui.ssgCheckBox, QtCore.SIGNAL("stateChanged(int)"), self.xon_ssgCheckBox_stateChanged) def xon_fetchPathsButton_clicked(self): self.updateDatapaths() def xon_hostComboBox_currentIndexChanged(self, index): OVECommonWindow.xon_hostComboBox_currentIndexChanged(self, index) if (index >= 0): self.updateDatapaths() def xon_ssgSaveButton_clicked(self): if self.ssgText not in OVEConfig.Inst().ssgList: OVEConfig.Inst().ssgList.append(self.ssgText) OVEConfig.Inst().saveConfig() self.updateSsgList() def updateSsgList(self): currentSsgText = self.ssgText self.ui.ssgComboBox.clear() isFound = False for i, ssgText in enumerate(OVEConfig.Inst().ssgList): self.ui.ssgComboBox.addItem(ssgText) if ssgText == currentSsgText: # This is the currently selected item self.ui.ssgComboBox.setCurrentIndex(i) isFound = True if not isFound: self.ui.ssgComboBox.setCurrentIndex(-1) self.ui.ssgComboBox.lineEdit().setText(currentSsgText) def xon_ssgDeleteButton_clicked(self): if self.ssgText in OVEConfig.Inst().ssgList: OVEConfig.Inst().ssgList.remove(self.ssgText) self.ssgText = '' OVEConfig.Inst().saveConfig() self.updateSsgList() def xon_ssgComboBox_activated(self, index): if (index >= 0): itemData = self.ui.ssgComboBox.itemText(index) self.ssgText = str(itemData) self.updateTable() def xon_ssgComboBox_editTextChanged(self, text): self.ssgText = str(text) self.statusBar().showMessage('Remote command is: '+self.updateCommand()) present = (self.ssgText in OVEConfig.Inst().ssgList) self.ui.ssgDeleteButton.setEnabled(present) self.ui.ssgSaveButton.setEnabled(not present) def xon_ssgCheckBox_stateChanged(self, state): self.ssgChecked = (state == Qt.Checked) self.updateTable() def xon_configUpdated(self): OVECommonWindow.xon_configUpdated(self) self.updateSsgList() self.updateDatapaths() def timerEvent(self, event): OVECommonWindow.timerEvent(self, event) def customEvent(self, event): OVECommonWindow.customEvent(self, event) def updateDatapaths(self): if self.hostUuid == '': self.statusBar().showMessage('No host selected') else: self.currentRef += 1 self.currentOp = 'dump-dps' command = self.COMMAND_OVS_DPCTL+' dump-dps' OVEFetch.Inst(self.hostUuid).execCommandFramed(self, self.currentRef, command) def rebuildTables(self): self.ui.tabWidget.clear() # Let the garbage collector delete the pages self.dpTables = [] self.dpFlows = [] self.resizeCount = [] headings = OVEUtil.flowDecodeHeadings() for dpName in self.dpNames: pageWidget = QtGui.QWidget() pageWidget.setObjectName(dpName+'_page') gridLayout = QtGui.QGridLayout(pageWidget) gridLayout.setObjectName(dpName+"_gridLayout") table = QtGui.QTableWidget(pageWidget) table.setObjectName(dpName+"_table") table.setColumnCount(len(headings)) table.setRowCount(0) gridLayout.addWidget(table, 0, 0, 1, 1) self.dpTables.append(table) self.ui.tabWidget.addTab(pageWidget, dpName) self.dpFlows.append([]) self.resizeCount.append(0) for i, heading in enumerate(headings): table.setHorizontalHeaderItem(i, QtGui.QTableWidgetItem(heading)) table.setSortingEnabled(True) table.sortItems(OVEUtil.getFlowColumn('source mac')) table.setSelectionMode(QtGui.QAbstractItemView.NoSelection) def updateSsgState(self): self.ui.ssgCheckBox.setChecked(self.ssgChecked) def updateCommand(self, overrideText = None): command = self.COMMAND_OVS_DPCTL+' dump-flows ' if self.currentOpIndex is not None: command += self.dpNames[self.currentOpIndex] exp = None if overrideText is not None: exp = overrideText elif self.ssgChecked: exp = self.ssgText if exp is not None: opts='-E ' if exp.startswith('!'): exp =exp[1:] opts += '-v ' command += " | grep "+opts+"'"+exp+"' ; test ${PIPESTATUS[0]} -eq 0 " return command def updateTable(self): if self.hostUuid == '': self.statusBar().showMessage('No host selected') self.setWindowTitle('OVS Flows') elif len(self.dpNames) > 0: config = OVEConfig.Inst().hostFromUuid(self.hostUuid) self.setWindowTitle('OVS Flows - '+config.get('address', '')) try: self.setFetchSkip() self.statusBar().showMessage('Fetching data...') self.currentRef += 1 self.currentOp = 'dump-flows' self.currentOpIndex = self.ui.tabWidget.currentIndex() OVEFetch.Inst(self.hostUuid).execCommandFramed(self, self.currentRef, self.updateCommand()) except Exception, e: message = 'Update failed: '+str(e) OVELog(message) self.statusBar().showMessage(message) def writeCurrentTable(self): index = self.ui.tabWidget.currentIndex() actionsColumn = OVEUtil.getFlowColumn('actions') usedColumn = OVEUtil.getFlowColumn('used') srcMacColumn = OVEUtil.getFlowColumn('source mac') destMacColumn = OVEUtil.getFlowColumn('destination mac') srcIPColumn = OVEUtil.getFlowColumn('source ip') destIPColumn = OVEUtil.getFlowColumn('destination ip') inportColumn = OVEUtil.getFlowColumn('inport') vlanColumn = OVEUtil.getFlowColumn('vlan') bytesColumn = OVEUtil.getFlowColumn('bytes') byteCount = 0 try: table = self.dpTables[index] table.setUpdatesEnabled(False) table.setSortingEnabled(False) try: flows = self.dpFlows[index] table.setRowCount(len(flows)) if len(flows) > 0: table.setColumnCount(len(flows[0])) for rowNum, flow in enumerate(flows): inport = flow[inportColumn] if flow[actionsColumn] == 'drop': baseLum=172 else: baseLum=239 background = QtGui.QColor(baseLum+16*(inport % 2), baseLum+8*(inport % 3), baseLum+4*(inport % 5)) if flow[usedColumn] == 'never': colour = QtGui.QColor(112,112,112) else: colour = Qt.black for colNum, data in enumerate(flow): item = None try: item = table.takeItem(rowNum, colNum) except: pass if item is None: item = QtGui.QTableWidgetItem('') if colNum == vlanColumn: item.setBackground(QtGui.QColor(255-(10*data % 192), 255-((17*data) % 192), 255-((37*data) % 192))) elif colNum == srcMacColumn or colNum == destMacColumn: cols = [int(x, 16) for x in data.split(':')] item.setBackground(QtGui.QColor(255-cols[2]*cols[3] % 192, 255-cols[3]*cols[4] % 192, 255-cols[4]*cols[5] % 192)) elif re.match(r'[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+', str(data)): cols = [int(x) for x in data.split('.')] item.setBackground(QtGui.QColor(255-cols[1]*cols[2] % 192, 255-cols[2]*cols[3] % 192, 255-cols[3]*cols[0] % 192)) else: item.setBackground(background) item.setForeground(colour) if colNum == bytesColumn: byteCount += int(data) # PySide 0.2.3 fails to convert long ints to QVariants and logs 'long int too large to convert to int' errors try: item.setData(Qt.DisplayRole, QVariant(data)) item.setToolTip(str(data)) except Exception, e: item.setText('Error: See tooltip') item.setToolTip(str(e)) table.setItem(rowNum, colNum, item) if self.resizeCount[index] < 2: self.resizeCount[index] += 1 for i in range(0, table.columnCount()): table.resizeColumnToContents(i) finally: table.setUpdatesEnabled(True) table.setSortingEnabled(True) message = 'Updated at '+str(QtCore.QTime.currentTime().toString()) if self.lastTime is not None: timeDiff = time.time() - self.lastTime byteDiff = byteCount - self.lastByteCount bitRate = long(8 * byteDiff / timeDiff) if abs(bitRate) < 10*2**20: message += ' ('+str(bitRate/2**10)+' kbit/s)' elif abs(bitRate) < 10*2**30: message += ' ('+str(bitRate/2**20)+' Mbit/s)' else: message += ' ('+str(bitRate/2**30)+' Gbit/s)' self.lastByteCount = byteCount self.lastTime = time.time() if table.rowCount() == 0: message += ' - Table is empty' self.statusBar().showMessage(message) except Exception, e: message = 'Table update failed: '+str(e) OVELog(message) self.statusBar().showMessage(message) def handleFetchEvent(self, ref, values): if self.currentOp == 'dump-dps': self.dpNames =values.strip().split('\n') self.rebuildTables() self.updateTable() elif self.currentOp == 'dump-flows': self.dpFlows[self.currentOpIndex] = OVEUtil.decodeFlows(values) self.writeCurrentTable() def handleFetchFailEvent(self, ref, message): self.statusBar().showMessage(message) OVELog('Fetch ('+self.currentOp+') failed') def customEvent(self, event): OVECommonWindow.customEvent(self, event) def saveSettings(self, index): settings, key = OVECommonWindow.saveSettings(self, index) settings.setValue(key+"/ssgText", QVariant(self.ssgText)) settings.setValue(key+"/ssgChecked", QVariant(self.ssgChecked)) def loadSettings(self, index): settings, key = OVECommonWindow.loadSettings(self, index) self.ssgText = str(settings.value(key+"/ssgText", QVariant('10\.80\.226\..*')).toString()) self.ssgChecked = settings.value(key+"/ssgChecked", QVariant(False)).toBool() self.ssgRe = re.compile(self.ssgText) openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/OVEHostWindow.py000066400000000000000000000043241226605124000243500ustar00rootroot00000000000000# Copyright (c) 2010 Citrix Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from OVEStandard import * from OVELogger import * from Ui_HostWindow import * class OVEHostWindow(QtGui.QDialog): DEFAULT_CONNECT_TARGET = 'unix:/var/run/openvswitch/db.sock' def __init__(self, parent, currentValues = None): QtGui.QDialog.__init__(self, parent) self.ui = Ui_HostWindow() self.ui.setupUi(self) self.resize(-1, -1) self.connect(self.ui.buttonBox, QtCore.SIGNAL("clicked(QAbstractButton *)"), self.xon_actionButton_Box_clicked) if currentValues is not None: self.ui.hostAddressEdit.setText(currentValues['address']) self.ui.hostPasswordEdit.setText(currentValues['password']) self.ui.hostConnectTarget.setText(currentValues.get('connectTarget', self.DEFAULT_CONNECT_TARGET)) self.uuid = currentValues.get('uuid', str(uuid.uuid4())) else: self.ui.hostConnectTarget.setText(self.DEFAULT_CONNECT_TARGET) self.uuid = str(uuid.uuid4()) self.accepted = None def xon_actionButton_Box_clicked(self, button): role = self.ui.buttonBox.buttonRole(button) if role == QtGui.QDialogButtonBox.AcceptRole: self.accepted = True self.close() elif role == QtGui.QDialogButtonBox.RejectRole: self.accepted = False self.close() def record(self): return { 'accepted' : self.accepted, 'uuid' : self.uuid, 'address' : str(self.ui.hostAddressEdit.text()), 'password' : str(self.ui.hostPasswordEdit.text()), 'connectTarget' : str(self.ui.hostConnectTarget.text()) } openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/OVELogWindow.py000066400000000000000000000050101226605124000241450ustar00rootroot00000000000000# Copyright (c) 2010 Citrix Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from OVEStandard import * from OVELogger import * from Ui_LogWindow import * class OVELogWindow(QtGui.QDialog): LOAD_KEY = 'LogWindow/window' def __init__(self, app): QtGui.QDialog.__init__(self) self.app = app self.ui = Ui_LogWindow() self.ui.setupUi(self) if self.isLoadable(): self.loadSettings() self.connect(OVELogger.Inst(), QtCore.SIGNAL("logUpdated()"), self.logUpdated) self.connect(self.ui.buttonBox, QtCore.SIGNAL("clicked(QAbstractButton *)"), self.xon_actionButton_Box_clicked) def xon_actionButton_Box_clicked(self, button): role = self.ui.buttonBox.buttonRole(button) if role == QtGui.QDialogButtonBox.ResetRole: OVELogger.Inst().reset() OVELog("Log reset") def logUpdated(self): self.ui.textBrowser.setText("\n".join(OVELogger.Inst().contents)) self.ui.textBrowser.moveCursor(QtGui.QTextCursor.End) self.ui.textBrowser.ensureCursorVisible() def saveSettings(self): key = self.LOAD_KEY settings = QtCore.QSettings() settings.setValue(key+"/loadable", QVariant(True)) settings.setValue(key+"/pos", QVariant(self.pos())) settings.setValue(key+"/size", QVariant(self.size())) settings.setValue(key+"/visible", QVariant(self.isVisible())) def loadSettings(self): key = self.LOAD_KEY settings = QtCore.QSettings() pos = settings.value(key+"/pos", QVariant(QtCore.QPoint(200, 200))).toPoint() size = settings.value(key+"/size", QVariant(QtCore.QSize(400, 400))).toSize() visible = settings.value(key+"/visible", QVariant(True)).toBool() self.resize(size) self.move(pos) self.setVisible(visible) @classmethod def isLoadable(cls): key = cls.LOAD_KEY settings = QtCore.QSettings() return settings.value(key+"/loadable", QVariant(False)).toBool() openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/OVELogger.py000066400000000000000000000025251226605124000234630ustar00rootroot00000000000000# Copyright (c) 2010 Citrix Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from OVEStandard import * class OVELogger(QtCore.QObject): instance = None def __init__(self): QtCore.QObject.__init__(self) self.contents = [] self.loggers = [] @classmethod def Inst(cls): if cls.instance is None: cls.instance = OVELogger() return cls.instance def reset(self): self.contents = [] self.update() def logString(self, message): self.contents += [str(message)] if len(self.contents) > 500: self.contents = ['+++ Log truncated', ''] + self.contents[50:] self.update() def update(self): self.emit(QtCore.SIGNAL("logUpdated()")) def OVELog(message): OVELogger.Inst().logString(message) openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/OVEMainWindow.py000066400000000000000000000120331226605124000243130ustar00rootroot00000000000000# Copyright (c) 2010 Citrix Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from OVEStandard import * from OVEConfig import * from OVEFetch import * from OVELogger import * from OVEUtil import * from OVECommonWindow import * from Ui_MainWindow import * class OVEMainWindow(QtGui.QMainWindow, OVECommonWindow): LOAD_KEY = 'MainWindow/window' BASE_REF=100000 def __init__(self, app, loadIndex = None): QtGui.QMainWindow.__init__(self) self.ui = Ui_MainWindow() OVECommonWindow.__init__(self, app, loadIndex) def xon_tabWidget_currentChanged(self, value): self.deleteCurrentTable() OVECommonWindow.xon_tabWidget_currentChanged(self, value) def updateTable(self): if self.hostUuid == '': self.setWindowTitle('OVS Database') self.deleteCurrentTable() self.statusBar().showMessage('No host selected. Choose File->Preferences to add a host') else: config = OVEConfig.Inst().hostFromUuid(self.hostUuid) self.setWindowTitle('OVS Database - '+config.get('address', '')) self.invalidateCurrentTable('Fetching data...') tabName = self.ui.tabWidget.currentWidget().objectName() try: self.setFetchSkip() self.currentRef += 1 OVEFetch.Inst(self.hostUuid).getTable(self, tabName, self.currentRef) except Exception, e: OVELog("Error fetching data: "+str(e)) self.invalidateCurrentTable(str(e)) def timerEvent(self, event): OVECommonWindow.timerEvent(self, event) def customEvent(self, event): OVECommonWindow.customEvent(self, event) def handleFetchEvent(self, ref, values): tabName = self.ui.tabWidget.currentWidget().objectName() self.structToTable(getattr(self.ui, str(tabName)+'Table'), values) def handleFetchFailEvent(self, ref, message): self.invalidateCurrentTable(str(message)) def structToTable(self, table, values): table.setUpdatesEnabled(False) table.setSortingEnabled(False) for result in values: rowNum = 0 table.setRowCount(len(result['rows'])) for row in result['rows']: table.setColumnCount(len(row)) colNum=0 for k in sorted(row.keys()): v = row[k] headerItem = QtGui.QTableWidgetItem(k) table.setHorizontalHeaderItem(colNum, headerItem) text = OVEUtil.paramToString(v) item = QtGui.QTableWidgetItem(text) longText = OVEUtil.paramToLongString(v) item.setToolTip(longText) table.setItem(rowNum, colNum, item) colNum+=1 rowNum+=1 for i in range(0, table.columnCount()): table.resizeColumnToContents(i) for i in range(0, table.rowCount()): table.resizeRowToContents(i) # table.setSortingEnabled(True) table.setUpdatesEnabled(True) message = 'Updated at '+str(QtCore.QTime.currentTime().toString()) if table.rowCount() == 0: message += ' - Table is empty' self.statusBar().showMessage(message) def invalidateCurrentTable(self, message): tabName = self.ui.tabWidget.currentWidget().objectName() self.invalidateTable(getattr(self.ui, str(tabName)+'Table'), message) def invalidateTable(self, table, message): table.setUpdatesEnabled(False) table.setSortingEnabled(False) for rowNum in range(0, table.rowCount()): for colNum in range(0, table.columnCount()): item = table.takeItem(rowNum, colNum) if item is not None: item.setForeground(Qt.darkGray) table.setItem(rowNum, colNum, item) self.statusBar().showMessage(message) # table.setSortingEnabled(True) table.setUpdatesEnabled(True) def deleteCurrentTable(self): tabName = self.ui.tabWidget.currentWidget().objectName() self.deleteTable(getattr(self.ui, str(tabName)+'Table')) def deleteTable(self, table): table.clear() table.setRowCount(0) table.setColumnCount(0) def saveSettings(self, index): settings = OVECommonWindow.saveSettings(self, index) def loadSettings(self, index): settings = OVECommonWindow.loadSettings(self, index) openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/OVEStandard.py000066400000000000000000000024611226605124000240030ustar00rootroot00000000000000# Copyright (c) 2011 Nicira, Inc. # Copyright (c) 2010 Citrix Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os, re, struct, sys, time, types, uuid from copy import deepcopy from pprint import pprint # Set globalForcePySide to True to use PySide instead of PyQt if both are installed globalForcePySide = False try: import ovs.json except Exception, e: print('+++ OVS JSON module is required\n') raise try: if globalForcePySide: print('Forcing use of PySide') raise Exception() from PyQt4.QtCore import Qt, QVariant from PyQt4 import QtCore, QtGui except: try: from PySide.QtCore import Qt, QVariant from PySide import QtCore, QtGui except Exception, e: print('+++ This application requires either PyQt4 or PySide\n') raise openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/OVEUtil.py000066400000000000000000000117131226605124000231600ustar00rootroot00000000000000# Copyright (c) 2011 Nicira, Inc. # Copyright (c) 2010 Citrix Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from OVEStandard import * from OVEConfig import * import re class OVEUtil: UUID_RE = re.compile(r'([a-f0-9]{8}-[a-f0-9]{2})[a-f0-9]{2}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}') @classmethod def paramToLongString(cls, param): if isinstance(param, (types.ListType, types.TupleType)) and len(param) > 1: text = str(param[1]) else: text = str(param) return text.replace(', ', ',\n') @classmethod def paramToString(cls, param): if isinstance(param, (types.ListType, types.TupleType)) and len(param) > 1: text = str(param[1]) else: text = str(param) if OVEConfig.Inst().truncateUuids: text = cls.UUID_RE.sub('\\1...', text) return text.replace(', ', ',\n') @classmethod def flowDecodeHeadings(self): return [ 'Type', 'Proto', 'Inport', 'VLAN', 'Source MAC', 'Destination MAC', 'Source IP', 'Destination IP', 'Src port', 'Dest port', 'Packet count', 'Bytes', 'Used', 'Tos', 'PCP', 'Tunnel', 'Actions', ] @classmethod def getFlowColumn(cls, name): lowerName = name.lower() for i, columnName in enumerate(cls.flowDecodeHeadings()): if lowerName == columnName.lower(): return i return None ETHERTYPE_TRANS = { '05ff':'ESX probe', '0800':'IP', '0806':'ARP', '86dd':'IPv6', '88cc':'LLDP' } ETHERPROTO_TRANS = { '1':'ICMP', '6':'TCP', '17':'UDP' } # Parsing of ovs-dpctl dump-flows output should be localised in this method and flowDecodeHeadings @classmethod def decodeFlows(cls, srcLines): retVal = [] for line in srcLines.split('\n'): if line != '': fields = {} for name, val in re.findall(r'([a-zA-Z0-9_+]+)\(([^)]+)\)', line): if '=' in val: for setting in val.split(','): k,v = setting.split('=') fields['%s.%s' % (name, k)] = v else: fields[name] = val for setting in re.split(', ', line)[1:]: if ':' in setting: k,v = setting.split(':') fields[k] = v tun_id = fields.get('tun_id', '') in_port = int(fields.get('in_port', 0)) eth_src = fields.get('eth.src', '') eth_dst = fields.get('eth.dst', '') vlan_vid = int(fields.get('vlan.vid', 0)) vlan_pcp = int(fields.get('vlan.pcp', 0)) eth_type = fields.get('eth_type', '') ip_src = fields.get('ipv4.src', fields.get('ipv6.src', '')) ip_dst = fields.get('ipv4.dst', fields.get('ipv6.dst', '')) ip_proto = fields.get('ipv4.proto', fields.get('ipv6.proto', '')) ip_tos = fields.get('ipv4.tos', fields.get('ipv6.tos', '')) tp_src = fields.get('tcp.src', fields.get('udp.src', fields.get('arp.sip', fields.get('icmp.type', fields.get('icmpv6.type', ''))))) tp_dst = fields.get('tcp.dst', fields.get('udp.dst', fields.get('arp.tip', fields.get('icmp.code', fields.get('icmpv6.code', ''))))) packets = fields.get('packets', '') bytes = fields.get('bytes', '') actions = fields.get('actions', '') used = fields.get('used', '') # Order below needs to match that in flowDecodeHeadings retVal.append((eth_type, ip_proto, in_port, vlan_vid, eth_src, eth_dst, ip_src, ip_dst, tp_src, tp_dst, packets, bytes, used, ip_tos, vlan_pcp, tun_id, actions)) return retVal COLOURS = [Qt.black, Qt.darkBlue, Qt.darkRed, Qt.darkGreen, Qt.darkMagenta, Qt.darkCyan, Qt.darkGray, Qt.darkYellow, Qt.blue, Qt.gray, Qt.magenta, Qt.red] @classmethod def intToColour(cls, value): return cls.COLOURS[value % len(cls.COLOURS)] openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/Ui_ConfigWindow.py000066400000000000000000000150301226605124000247170ustar00rootroot00000000000000# -*- coding: utf-8 -*- # Form implementation generated from reading ui file 'ConfigWindow.ui' # # Created: Fri May 7 17:20:33 2010 # by: PyQt4 UI code generator 4.4.2 # # WARNING! All changes made in this file will be lost! try: from OVEStandard import globalForcePySide if globalForcePySide: raise Exception() from PyQt4 import QtCore, QtGui except: from PySide import QtCore, QtGui class Ui_ConfigWindow(object): def setupUi(self, ConfigWindow): ConfigWindow.setObjectName("ConfigWindow") ConfigWindow.resize(386,303) ConfigWindow.setFocusPolicy(QtCore.Qt.TabFocus) self.gridLayout = QtGui.QGridLayout(ConfigWindow) self.gridLayout.setObjectName("gridLayout") self.verticalLayout = QtGui.QVBoxLayout() self.verticalLayout.setObjectName("verticalLayout") self.tabWidget = QtGui.QTabWidget(ConfigWindow) self.tabWidget.setObjectName("tabWidget") self.hosts = QtGui.QWidget() self.hosts.setObjectName("hosts") self.layoutWidget = QtGui.QWidget(self.hosts) self.layoutWidget.setGeometry(QtCore.QRect(10,10,341,194)) self.layoutWidget.setObjectName("layoutWidget") self.horizontalLayout_2 = QtGui.QHBoxLayout(self.layoutWidget) self.horizontalLayout_2.setObjectName("horizontalLayout_2") self.hostList = QtGui.QListWidget(self.layoutWidget) self.hostList.setObjectName("hostList") self.horizontalLayout_2.addWidget(self.hostList) self.verticalLayout_2 = QtGui.QVBoxLayout() self.verticalLayout_2.setObjectName("verticalLayout_2") self.hostAddButton = QtGui.QPushButton(self.layoutWidget) self.hostAddButton.setObjectName("hostAddButton") self.verticalLayout_2.addWidget(self.hostAddButton) self.hostEditButton = QtGui.QPushButton(self.layoutWidget) self.hostEditButton.setObjectName("hostEditButton") self.verticalLayout_2.addWidget(self.hostEditButton) self.hostDeleteButton = QtGui.QPushButton(self.layoutWidget) self.hostDeleteButton.setObjectName("hostDeleteButton") self.verticalLayout_2.addWidget(self.hostDeleteButton) spacerItem = QtGui.QSpacerItem(20,40,QtGui.QSizePolicy.Minimum,QtGui.QSizePolicy.Expanding) self.verticalLayout_2.addItem(spacerItem) self.horizontalLayout_2.addLayout(self.verticalLayout_2) self.tabWidget.addTab(self.hosts,"") self.logging = QtGui.QWidget() self.logging.setObjectName("logging") self.gridLayout_2 = QtGui.QGridLayout(self.logging) self.gridLayout_2.setObjectName("gridLayout_2") self.logTrafficCheckBox = QtGui.QCheckBox(self.logging) self.logTrafficCheckBox.setObjectName("logTrafficCheckBox") self.gridLayout_2.addWidget(self.logTrafficCheckBox,0,0,1,1) spacerItem1 = QtGui.QSpacerItem(20,164,QtGui.QSizePolicy.Minimum,QtGui.QSizePolicy.Expanding) self.gridLayout_2.addItem(spacerItem1,1,0,1,1) self.tabWidget.addTab(self.logging,"") self.view = QtGui.QWidget() self.view.setObjectName("view") self.verticalLayout_3 = QtGui.QVBoxLayout(self.view) self.verticalLayout_3.setObjectName("verticalLayout_3") self.truncateUuidsCheckBox = QtGui.QCheckBox(self.view) self.truncateUuidsCheckBox.setObjectName("truncateUuidsCheckBox") self.verticalLayout_3.addWidget(self.truncateUuidsCheckBox) spacerItem2 = QtGui.QSpacerItem(20,164,QtGui.QSizePolicy.Minimum,QtGui.QSizePolicy.Expanding) self.verticalLayout_3.addItem(spacerItem2) self.tabWidget.addTab(self.view,"") self.verticalLayout.addWidget(self.tabWidget) self.horizontalLayout = QtGui.QHBoxLayout() self.horizontalLayout.setObjectName("horizontalLayout") spacerItem3 = QtGui.QSpacerItem(40,20,QtGui.QSizePolicy.Expanding,QtGui.QSizePolicy.Minimum) self.horizontalLayout.addItem(spacerItem3) self.buttonBox = QtGui.QDialogButtonBox(ConfigWindow) self.buttonBox.setStandardButtons(QtGui.QDialogButtonBox.Apply|QtGui.QDialogButtonBox.Cancel|QtGui.QDialogButtonBox.Ok) self.buttonBox.setObjectName("buttonBox") self.horizontalLayout.addWidget(self.buttonBox) self.verticalLayout.addLayout(self.horizontalLayout) self.gridLayout.addLayout(self.verticalLayout,0,0,1,1) self.retranslateUi(ConfigWindow) self.tabWidget.setCurrentIndex(0) QtCore.QMetaObject.connectSlotsByName(ConfigWindow) ConfigWindow.setTabOrder(self.hostList,self.hostAddButton) ConfigWindow.setTabOrder(self.hostAddButton,self.hostEditButton) ConfigWindow.setTabOrder(self.hostEditButton,self.hostDeleteButton) ConfigWindow.setTabOrder(self.hostDeleteButton,self.buttonBox) ConfigWindow.setTabOrder(self.buttonBox,self.tabWidget) def retranslateUi(self, ConfigWindow): ConfigWindow.setWindowTitle(QtGui.QApplication.translate("ConfigWindow", "OVSDB Monitor Configuration", None, QtGui.QApplication.UnicodeUTF8)) self.hostAddButton.setText(QtGui.QApplication.translate("ConfigWindow", "Add", None, QtGui.QApplication.UnicodeUTF8)) self.hostEditButton.setText(QtGui.QApplication.translate("ConfigWindow", "Edit", None, QtGui.QApplication.UnicodeUTF8)) self.hostDeleteButton.setText(QtGui.QApplication.translate("ConfigWindow", "Delete", None, QtGui.QApplication.UnicodeUTF8)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.hosts), QtGui.QApplication.translate("ConfigWindow", "Hosts", None, QtGui.QApplication.UnicodeUTF8)) self.logTrafficCheckBox.setToolTip(QtGui.QApplication.translate("ConfigWindow", "Whether to log traffic exchanges in the log window", None, QtGui.QApplication.UnicodeUTF8)) self.logTrafficCheckBox.setText(QtGui.QApplication.translate("ConfigWindow", "Log traffic", None, QtGui.QApplication.UnicodeUTF8)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.logging), QtGui.QApplication.translate("ConfigWindow", "Logging", None, QtGui.QApplication.UnicodeUTF8)) self.truncateUuidsCheckBox.setToolTip(QtGui.QApplication.translate("ConfigWindow", "Replaces UUIDs with a shorter string of the first few characters. The tooltip still contains the full value", None, QtGui.QApplication.UnicodeUTF8)) self.truncateUuidsCheckBox.setText(QtGui.QApplication.translate("ConfigWindow", "Truncate UUIDs", None, QtGui.QApplication.UnicodeUTF8)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.view), QtGui.QApplication.translate("ConfigWindow", "View", None, QtGui.QApplication.UnicodeUTF8)) openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/Ui_FlowWindow.py000066400000000000000000000204521226605124000244250ustar00rootroot00000000000000# -*- coding: utf-8 -*- # Form implementation generated from reading ui file 'FlowWindow.ui' # # Created: Fri May 7 17:20:33 2010 # by: PyQt4 UI code generator 4.4.2 # # WARNING! All changes made in this file will be lost! try: from OVEStandard import globalForcePySide if globalForcePySide: raise Exception() from PyQt4 import QtCore, QtGui except: from PySide import QtCore, QtGui class Ui_FlowWindow(object): def setupUi(self, FlowWindow): FlowWindow.setObjectName("FlowWindow") FlowWindow.resize(800,600) self.centralwidget = QtGui.QWidget(FlowWindow) self.centralwidget.setObjectName("centralwidget") self.gridLayout = QtGui.QGridLayout(self.centralwidget) self.gridLayout.setObjectName("gridLayout") self.tabWidget = QtGui.QTabWidget(self.centralwidget) self.tabWidget.setObjectName("tabWidget") self.unset = QtGui.QWidget() self.unset.setObjectName("unset") self.gridLayout_10 = QtGui.QGridLayout(self.unset) self.gridLayout_10.setObjectName("gridLayout_10") self.tabWidget.addTab(self.unset,"") self.gridLayout.addWidget(self.tabWidget,0,0,1,1) self.horizontalLayout_2 = QtGui.QHBoxLayout() self.horizontalLayout_2.setObjectName("horizontalLayout_2") self.ssgCheckBox = QtGui.QCheckBox(self.centralwidget) self.ssgCheckBox.setObjectName("ssgCheckBox") self.horizontalLayout_2.addWidget(self.ssgCheckBox) self.ssgComboBox = QtGui.QComboBox(self.centralwidget) self.ssgComboBox.setEditable(True) self.ssgComboBox.setMaxVisibleItems(20) self.ssgComboBox.setInsertPolicy(QtGui.QComboBox.NoInsert) self.ssgComboBox.setMinimumContentsLength(32) self.ssgComboBox.setObjectName("ssgComboBox") self.horizontalLayout_2.addWidget(self.ssgComboBox) self.ssgSaveButton = QtGui.QPushButton(self.centralwidget) self.ssgSaveButton.setObjectName("ssgSaveButton") self.horizontalLayout_2.addWidget(self.ssgSaveButton) self.ssgDeleteButton = QtGui.QPushButton(self.centralwidget) self.ssgDeleteButton.setObjectName("ssgDeleteButton") self.horizontalLayout_2.addWidget(self.ssgDeleteButton) spacerItem = QtGui.QSpacerItem(40,20,QtGui.QSizePolicy.Expanding,QtGui.QSizePolicy.Minimum) self.horizontalLayout_2.addItem(spacerItem) self.gridLayout.addLayout(self.horizontalLayout_2,1,0,1,1) self.horizontalLayout = QtGui.QHBoxLayout() self.horizontalLayout.setObjectName("horizontalLayout") self.hostLabel = QtGui.QLabel(self.centralwidget) self.hostLabel.setObjectName("hostLabel") self.horizontalLayout.addWidget(self.hostLabel) self.hostComboBox = QtGui.QComboBox(self.centralwidget) self.hostComboBox.setSizeAdjustPolicy(QtGui.QComboBox.AdjustToContents) self.hostComboBox.setObjectName("hostComboBox") self.horizontalLayout.addWidget(self.hostComboBox) self.intervalCheckBox = QtGui.QCheckBox(self.centralwidget) self.intervalCheckBox.setObjectName("intervalCheckBox") self.horizontalLayout.addWidget(self.intervalCheckBox) self.intervalSpinBox = QtGui.QSpinBox(self.centralwidget) self.intervalSpinBox.setMinimum(1) self.intervalSpinBox.setMaximum(1000000) self.intervalSpinBox.setObjectName("intervalSpinBox") self.horizontalLayout.addWidget(self.intervalSpinBox) spacerItem1 = QtGui.QSpacerItem(40,20,QtGui.QSizePolicy.Expanding,QtGui.QSizePolicy.Minimum) self.horizontalLayout.addItem(spacerItem1) self.fetchPathsButton = QtGui.QPushButton(self.centralwidget) self.fetchPathsButton.setObjectName("fetchPathsButton") self.horizontalLayout.addWidget(self.fetchPathsButton) self.fetchButton = QtGui.QPushButton(self.centralwidget) self.fetchButton.setObjectName("fetchButton") self.horizontalLayout.addWidget(self.fetchButton) self.gridLayout.addLayout(self.horizontalLayout,3,0,1,1) self.line = QtGui.QFrame(self.centralwidget) self.line.setFrameShape(QtGui.QFrame.HLine) self.line.setFrameShadow(QtGui.QFrame.Sunken) self.line.setObjectName("line") self.gridLayout.addWidget(self.line,2,0,1,1) FlowWindow.setCentralWidget(self.centralwidget) self.menubar = QtGui.QMenuBar(FlowWindow) self.menubar.setGeometry(QtCore.QRect(0,0,800,28)) self.menubar.setObjectName("menubar") self.menuFile = QtGui.QMenu(self.menubar) self.menuFile.setObjectName("menuFile") FlowWindow.setMenuBar(self.menubar) self.statusbar = QtGui.QStatusBar(FlowWindow) self.statusbar.setObjectName("statusbar") FlowWindow.setStatusBar(self.statusbar) self.actionShow_Log = QtGui.QAction(FlowWindow) self.actionShow_Log.setObjectName("actionShow_Log") self.actionNew_DB_Window = QtGui.QAction(FlowWindow) self.actionNew_DB_Window.setObjectName("actionNew_DB_Window") self.actionPreferences = QtGui.QAction(FlowWindow) self.actionPreferences.setObjectName("actionPreferences") self.actionQuit = QtGui.QAction(FlowWindow) self.actionQuit.setObjectName("actionQuit") self.actionNew_Flow_Window = QtGui.QAction(FlowWindow) self.actionNew_Flow_Window.setObjectName("actionNew_Flow_Window") self.menuFile.addAction(self.actionNew_DB_Window) self.menuFile.addAction(self.actionNew_Flow_Window) self.menuFile.addAction(self.actionShow_Log) self.menuFile.addAction(self.actionPreferences) self.menuFile.addSeparator() self.menuFile.addAction(self.actionQuit) self.menubar.addAction(self.menuFile.menuAction()) self.hostLabel.setBuddy(self.hostComboBox) self.retranslateUi(FlowWindow) self.tabWidget.setCurrentIndex(0) QtCore.QMetaObject.connectSlotsByName(FlowWindow) def retranslateUi(self, FlowWindow): FlowWindow.setWindowTitle(QtGui.QApplication.translate("FlowWindow", "OVSDB Monitor", None, QtGui.QApplication.UnicodeUTF8)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.unset), QtGui.QApplication.translate("FlowWindow", "Awaiting update...", None, QtGui.QApplication.UnicodeUTF8)) self.ssgCheckBox.setText(QtGui.QApplication.translate("FlowWindow", "Server-side grep", None, QtGui.QApplication.UnicodeUTF8)) self.ssgSaveButton.setText(QtGui.QApplication.translate("FlowWindow", "Save", None, QtGui.QApplication.UnicodeUTF8)) self.ssgDeleteButton.setText(QtGui.QApplication.translate("FlowWindow", "Delete", None, QtGui.QApplication.UnicodeUTF8)) self.hostLabel.setText(QtGui.QApplication.translate("FlowWindow", "Host", None, QtGui.QApplication.UnicodeUTF8)) self.intervalCheckBox.setText(QtGui.QApplication.translate("FlowWindow", "Auto-refetch every", None, QtGui.QApplication.UnicodeUTF8)) self.intervalSpinBox.setSuffix(QtGui.QApplication.translate("FlowWindow", "s", None, QtGui.QApplication.UnicodeUTF8)) self.fetchPathsButton.setToolTip(QtGui.QApplication.translate("FlowWindow", "Refetches the datapath names and rebuilds the window tabs to reflect them. Use when the network has been reconfigured, e.g. a bond has been created", None, QtGui.QApplication.UnicodeUTF8)) self.fetchPathsButton.setText(QtGui.QApplication.translate("FlowWindow", "Refetch Datapath List", None, QtGui.QApplication.UnicodeUTF8)) self.fetchButton.setText(QtGui.QApplication.translate("FlowWindow", "Refetch", None, QtGui.QApplication.UnicodeUTF8)) self.menuFile.setTitle(QtGui.QApplication.translate("FlowWindow", "File", None, QtGui.QApplication.UnicodeUTF8)) self.actionShow_Log.setText(QtGui.QApplication.translate("FlowWindow", "Show Log", None, QtGui.QApplication.UnicodeUTF8)) self.actionNew_DB_Window.setText(QtGui.QApplication.translate("FlowWindow", "New DB Window", None, QtGui.QApplication.UnicodeUTF8)) self.actionPreferences.setText(QtGui.QApplication.translate("FlowWindow", "Preferences", None, QtGui.QApplication.UnicodeUTF8)) self.actionQuit.setText(QtGui.QApplication.translate("FlowWindow", "Quit", None, QtGui.QApplication.UnicodeUTF8)) self.actionNew_Flow_Window.setText(QtGui.QApplication.translate("FlowWindow", "New Flow Window", None, QtGui.QApplication.UnicodeUTF8)) openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/Ui_HostWindow.py000066400000000000000000000075451226605124000244430ustar00rootroot00000000000000# -*- coding: utf-8 -*- # Form implementation generated from reading ui file 'HostWindow.ui' # # Created: Fri May 7 17:20:33 2010 # by: PyQt4 UI code generator 4.4.2 # # WARNING! All changes made in this file will be lost! try: from OVEStandard import globalForcePySide if globalForcePySide: raise Exception() from PyQt4 import QtCore, QtGui except: from PySide import QtCore, QtGui class Ui_HostWindow(object): def setupUi(self, HostWindow): HostWindow.setObjectName("HostWindow") HostWindow.setWindowModality(QtCore.Qt.WindowModal) HostWindow.resize(400,300) sizePolicy = QtGui.QSizePolicy(QtGui.QSizePolicy.Minimum,QtGui.QSizePolicy.Minimum) sizePolicy.setHorizontalStretch(0) sizePolicy.setVerticalStretch(0) sizePolicy.setHeightForWidth(HostWindow.sizePolicy().hasHeightForWidth()) HostWindow.setSizePolicy(sizePolicy) self.gridLayout_2 = QtGui.QGridLayout(HostWindow) self.gridLayout_2.setObjectName("gridLayout_2") self.gridLayout = QtGui.QGridLayout() self.gridLayout.setObjectName("gridLayout") self.label = QtGui.QLabel(HostWindow) self.label.setObjectName("label") self.gridLayout.addWidget(self.label,0,0,1,1) self.hostAddressEdit = QtGui.QLineEdit(HostWindow) self.hostAddressEdit.setMinimumSize(QtCore.QSize(256,0)) self.hostAddressEdit.setObjectName("hostAddressEdit") self.gridLayout.addWidget(self.hostAddressEdit,0,1,1,1) self.label_2 = QtGui.QLabel(HostWindow) self.label_2.setObjectName("label_2") self.gridLayout.addWidget(self.label_2,1,0,1,1) self.hostPasswordEdit = QtGui.QLineEdit(HostWindow) self.hostPasswordEdit.setMinimumSize(QtCore.QSize(256,0)) self.hostPasswordEdit.setEchoMode(QtGui.QLineEdit.Password) self.hostPasswordEdit.setObjectName("hostPasswordEdit") self.gridLayout.addWidget(self.hostPasswordEdit,1,1,1,1) self.label_3 = QtGui.QLabel(HostWindow) self.label_3.setObjectName("label_3") self.gridLayout.addWidget(self.label_3,2,0,1,1) self.hostConnectTarget = QtGui.QLineEdit(HostWindow) self.hostConnectTarget.setMinimumSize(QtCore.QSize(256,0)) self.hostConnectTarget.setObjectName("hostConnectTarget") self.gridLayout.addWidget(self.hostConnectTarget,2,1,1,1) self.gridLayout_2.addLayout(self.gridLayout,0,0,1,1) self.buttonBox = QtGui.QDialogButtonBox(HostWindow) self.buttonBox.setOrientation(QtCore.Qt.Horizontal) self.buttonBox.setStandardButtons(QtGui.QDialogButtonBox.Cancel|QtGui.QDialogButtonBox.Ok) self.buttonBox.setObjectName("buttonBox") self.gridLayout_2.addWidget(self.buttonBox,1,0,1,1) self.label.setBuddy(self.hostAddressEdit) self.label_2.setBuddy(self.hostPasswordEdit) self.label_3.setBuddy(self.hostConnectTarget) self.retranslateUi(HostWindow) QtCore.QObject.connect(self.buttonBox,QtCore.SIGNAL("accepted()"),HostWindow.accept) QtCore.QObject.connect(self.buttonBox,QtCore.SIGNAL("rejected()"),HostWindow.reject) QtCore.QMetaObject.connectSlotsByName(HostWindow) HostWindow.setTabOrder(self.hostAddressEdit,self.hostPasswordEdit) HostWindow.setTabOrder(self.hostPasswordEdit,self.buttonBox) def retranslateUi(self, HostWindow): HostWindow.setWindowTitle(QtGui.QApplication.translate("HostWindow", "Host Properties", None, QtGui.QApplication.UnicodeUTF8)) self.label.setText(QtGui.QApplication.translate("HostWindow", "Host name or IP", None, QtGui.QApplication.UnicodeUTF8)) self.label_2.setText(QtGui.QApplication.translate("HostWindow", "SSH Password", None, QtGui.QApplication.UnicodeUTF8)) self.label_3.setText(QtGui.QApplication.translate("HostWindow", "Connect target", None, QtGui.QApplication.UnicodeUTF8)) openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/Ui_LogWindow.py000066400000000000000000000033601226605124000242360ustar00rootroot00000000000000# -*- coding: utf-8 -*- # Form implementation generated from reading ui file 'LogWindow.ui' # # Created: Fri May 7 17:20:33 2010 # by: PyQt4 UI code generator 4.4.2 # # WARNING! All changes made in this file will be lost! try: from OVEStandard import globalForcePySide if globalForcePySide: raise Exception() from PyQt4 import QtCore, QtGui except: from PySide import QtCore, QtGui class Ui_LogWindow(object): def setupUi(self, LogWindow): LogWindow.setObjectName("LogWindow") LogWindow.resize(735,558) self.gridLayout = QtGui.QGridLayout(LogWindow) self.gridLayout.setObjectName("gridLayout") self.verticalLayout = QtGui.QVBoxLayout() self.verticalLayout.setObjectName("verticalLayout") self.textBrowser = QtGui.QTextBrowser(LogWindow) self.textBrowser.setObjectName("textBrowser") self.verticalLayout.addWidget(self.textBrowser) self.buttonBox = QtGui.QDialogButtonBox(LogWindow) self.buttonBox.setOrientation(QtCore.Qt.Horizontal) self.buttonBox.setStandardButtons(QtGui.QDialogButtonBox.Close|QtGui.QDialogButtonBox.Reset) self.buttonBox.setObjectName("buttonBox") self.verticalLayout.addWidget(self.buttonBox) self.gridLayout.addLayout(self.verticalLayout,0,0,1,1) self.retranslateUi(LogWindow) QtCore.QObject.connect(self.buttonBox,QtCore.SIGNAL("accepted()"),LogWindow.accept) QtCore.QObject.connect(self.buttonBox,QtCore.SIGNAL("rejected()"),LogWindow.reject) QtCore.QMetaObject.connectSlotsByName(LogWindow) def retranslateUi(self, LogWindow): LogWindow.setWindowTitle(QtGui.QApplication.translate("LogWindow", "OVSDB Monitor Log", None, QtGui.QApplication.UnicodeUTF8)) openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/Ui_MainWindow.py000066400000000000000000000315541226605124000244070ustar00rootroot00000000000000# -*- coding: utf-8 -*- # Form implementation generated from reading ui file '../ovsdb/ovsdbmonitor/MainWindow.ui' # # Created: Mon May 17 16:23:47 2010 # by: PyQt4 UI code generator 4.7.3 # # WARNING! All changes made in this file will be lost! try: from OVEStandard import globalForcePySide if globalForcePySide: raise Exception() from PyQt4 import QtCore, QtGui except: from PySide import QtCore, QtGui class Ui_MainWindow(object): def setupUi(self, MainWindow): MainWindow.setObjectName("MainWindow") MainWindow.resize(800, 600) self.centralwidget = QtGui.QWidget(MainWindow) self.centralwidget.setObjectName("centralwidget") self.gridLayout = QtGui.QGridLayout(self.centralwidget) self.gridLayout.setObjectName("gridLayout") self.verticalLayout = QtGui.QVBoxLayout() self.verticalLayout.setObjectName("verticalLayout") self.tabWidget = QtGui.QTabWidget(self.centralwidget) self.tabWidget.setObjectName("tabWidget") self.Bridge = QtGui.QWidget() self.Bridge.setObjectName("Bridge") self.gridLayout_2 = QtGui.QGridLayout(self.Bridge) self.gridLayout_2.setObjectName("gridLayout_2") self.BridgeTable = QtGui.QTableWidget(self.Bridge) self.BridgeTable.setObjectName("BridgeTable") self.BridgeTable.setColumnCount(0) self.BridgeTable.setRowCount(0) self.gridLayout_2.addWidget(self.BridgeTable, 0, 0, 1, 1) self.tabWidget.addTab(self.Bridge, "") self.Controller = QtGui.QWidget() self.Controller.setObjectName("Controller") self.gridLayout_3 = QtGui.QGridLayout(self.Controller) self.gridLayout_3.setObjectName("gridLayout_3") self.ControllerTable = QtGui.QTableWidget(self.Controller) self.ControllerTable.setObjectName("ControllerTable") self.ControllerTable.setColumnCount(0) self.ControllerTable.setRowCount(0) self.gridLayout_3.addWidget(self.ControllerTable, 0, 0, 1, 1) self.tabWidget.addTab(self.Controller, "") self.Interface = QtGui.QWidget() self.Interface.setObjectName("Interface") self.gridLayout_4 = QtGui.QGridLayout(self.Interface) self.gridLayout_4.setObjectName("gridLayout_4") self.InterfaceTable = QtGui.QTableWidget(self.Interface) self.InterfaceTable.setObjectName("InterfaceTable") self.InterfaceTable.setColumnCount(0) self.InterfaceTable.setRowCount(0) self.gridLayout_4.addWidget(self.InterfaceTable, 0, 0, 1, 1) self.tabWidget.addTab(self.Interface, "") self.Mirror = QtGui.QWidget() self.Mirror.setObjectName("Mirror") self.gridLayout_5 = QtGui.QGridLayout(self.Mirror) self.gridLayout_5.setObjectName("gridLayout_5") self.MirrorTable = QtGui.QTableWidget(self.Mirror) self.MirrorTable.setObjectName("MirrorTable") self.MirrorTable.setColumnCount(0) self.MirrorTable.setRowCount(0) self.gridLayout_5.addWidget(self.MirrorTable, 0, 0, 1, 1) self.tabWidget.addTab(self.Mirror, "") self.NetFlow = QtGui.QWidget() self.NetFlow.setObjectName("NetFlow") self.gridLayout_6 = QtGui.QGridLayout(self.NetFlow) self.gridLayout_6.setObjectName("gridLayout_6") self.NetFlowTable = QtGui.QTableWidget(self.NetFlow) self.NetFlowTable.setObjectName("NetFlowTable") self.NetFlowTable.setColumnCount(0) self.NetFlowTable.setRowCount(0) self.gridLayout_6.addWidget(self.NetFlowTable, 0, 0, 1, 1) self.tabWidget.addTab(self.NetFlow, "") self.Open_vSwitch = QtGui.QWidget() self.Open_vSwitch.setObjectName("Open_vSwitch") self.gridLayout_7 = QtGui.QGridLayout(self.Open_vSwitch) self.gridLayout_7.setObjectName("gridLayout_7") self.Open_vSwitchTable = QtGui.QTableWidget(self.Open_vSwitch) self.Open_vSwitchTable.setObjectName("Open_vSwitchTable") self.Open_vSwitchTable.setColumnCount(0) self.Open_vSwitchTable.setRowCount(0) self.gridLayout_7.addWidget(self.Open_vSwitchTable, 0, 0, 1, 1) self.tabWidget.addTab(self.Open_vSwitch, "") self.Port = QtGui.QWidget() self.Port.setObjectName("Port") self.gridLayout_8 = QtGui.QGridLayout(self.Port) self.gridLayout_8.setObjectName("gridLayout_8") self.PortTable = QtGui.QTableWidget(self.Port) self.PortTable.setObjectName("PortTable") self.PortTable.setColumnCount(0) self.PortTable.setRowCount(0) self.gridLayout_8.addWidget(self.PortTable, 0, 0, 1, 1) self.tabWidget.addTab(self.Port, "") self.QoS = QtGui.QWidget() self.QoS.setObjectName("QoS") self.gridLayout_10 = QtGui.QGridLayout(self.QoS) self.gridLayout_10.setObjectName("gridLayout_10") self.QoSTable = QtGui.QTableWidget(self.QoS) self.QoSTable.setObjectName("QoSTable") self.QoSTable.setColumnCount(0) self.QoSTable.setRowCount(0) self.gridLayout_10.addWidget(self.QoSTable, 0, 0, 1, 1) self.tabWidget.addTab(self.QoS, "") self.Queue = QtGui.QWidget() self.Queue.setObjectName("Queue") self.gridLayout_11 = QtGui.QGridLayout(self.Queue) self.gridLayout_11.setObjectName("gridLayout_11") self.QueueTable = QtGui.QTableWidget(self.Queue) self.QueueTable.setObjectName("QueueTable") self.QueueTable.setColumnCount(0) self.QueueTable.setRowCount(0) self.gridLayout_11.addWidget(self.QueueTable, 0, 0, 1, 1) self.tabWidget.addTab(self.Queue, "") self.sFlow = QtGui.QWidget() self.sFlow.setObjectName("sFlow") self.gridLayout_9 = QtGui.QGridLayout(self.sFlow) self.gridLayout_9.setObjectName("gridLayout_9") self.sFlowTable = QtGui.QTableWidget(self.sFlow) self.sFlowTable.setObjectName("sFlowTable") self.sFlowTable.setColumnCount(0) self.sFlowTable.setRowCount(0) self.gridLayout_9.addWidget(self.sFlowTable, 0, 0, 1, 1) self.tabWidget.addTab(self.sFlow, "") self.SSL = QtGui.QWidget() self.SSL.setObjectName("SSL") self.gridLayout_101 = QtGui.QGridLayout(self.SSL) self.gridLayout_101.setObjectName("gridLayout_101") self.SSLTable = QtGui.QTableWidget(self.SSL) self.SSLTable.setObjectName("SSLTable") self.SSLTable.setColumnCount(0) self.SSLTable.setRowCount(0) self.gridLayout_101.addWidget(self.SSLTable, 0, 0, 1, 1) self.tabWidget.addTab(self.SSL, "") self.verticalLayout.addWidget(self.tabWidget) self.horizontalLayout = QtGui.QHBoxLayout() self.horizontalLayout.setObjectName("horizontalLayout") self.hostLabel = QtGui.QLabel(self.centralwidget) self.hostLabel.setObjectName("hostLabel") self.horizontalLayout.addWidget(self.hostLabel) self.hostComboBox = QtGui.QComboBox(self.centralwidget) self.hostComboBox.setSizeAdjustPolicy(QtGui.QComboBox.AdjustToContents) self.hostComboBox.setObjectName("hostComboBox") self.horizontalLayout.addWidget(self.hostComboBox) self.intervalCheckBox = QtGui.QCheckBox(self.centralwidget) self.intervalCheckBox.setObjectName("intervalCheckBox") self.horizontalLayout.addWidget(self.intervalCheckBox) self.intervalSpinBox = QtGui.QSpinBox(self.centralwidget) self.intervalSpinBox.setMinimum(1) self.intervalSpinBox.setMaximum(1000000) self.intervalSpinBox.setObjectName("intervalSpinBox") self.horizontalLayout.addWidget(self.intervalSpinBox) spacerItem = QtGui.QSpacerItem(40, 20, QtGui.QSizePolicy.Expanding, QtGui.QSizePolicy.Minimum) self.horizontalLayout.addItem(spacerItem) self.fetchButton = QtGui.QPushButton(self.centralwidget) self.fetchButton.setObjectName("fetchButton") self.horizontalLayout.addWidget(self.fetchButton) self.verticalLayout.addLayout(self.horizontalLayout) self.gridLayout.addLayout(self.verticalLayout, 0, 0, 1, 1) MainWindow.setCentralWidget(self.centralwidget) self.menubar = QtGui.QMenuBar(MainWindow) self.menubar.setGeometry(QtCore.QRect(0, 0, 800, 28)) self.menubar.setObjectName("menubar") self.menuFile = QtGui.QMenu(self.menubar) self.menuFile.setObjectName("menuFile") MainWindow.setMenuBar(self.menubar) self.statusbar = QtGui.QStatusBar(MainWindow) self.statusbar.setObjectName("statusbar") MainWindow.setStatusBar(self.statusbar) self.actionShow_Log = QtGui.QAction(MainWindow) self.actionShow_Log.setObjectName("actionShow_Log") self.actionNew_DB_Window = QtGui.QAction(MainWindow) self.actionNew_DB_Window.setObjectName("actionNew_DB_Window") self.actionPreferences = QtGui.QAction(MainWindow) self.actionPreferences.setObjectName("actionPreferences") self.actionQuit = QtGui.QAction(MainWindow) self.actionQuit.setObjectName("actionQuit") self.actionNew_Flow_Window = QtGui.QAction(MainWindow) self.actionNew_Flow_Window.setObjectName("actionNew_Flow_Window") self.menuFile.addAction(self.actionNew_DB_Window) self.menuFile.addAction(self.actionNew_Flow_Window) self.menuFile.addAction(self.actionShow_Log) self.menuFile.addAction(self.actionPreferences) self.menuFile.addSeparator() self.menuFile.addAction(self.actionQuit) self.menubar.addAction(self.menuFile.menuAction()) self.hostLabel.setBuddy(self.hostComboBox) self.retranslateUi(MainWindow) self.tabWidget.setCurrentIndex(0) QtCore.QMetaObject.connectSlotsByName(MainWindow) def retranslateUi(self, MainWindow): MainWindow.setWindowTitle(QtGui.QApplication.translate("MainWindow", "OVSDB Monitor", None, QtGui.QApplication.UnicodeUTF8)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.Bridge), QtGui.QApplication.translate("MainWindow", "Bridge", None, QtGui.QApplication.UnicodeUTF8)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.Controller), QtGui.QApplication.translate("MainWindow", "Controller", None, QtGui.QApplication.UnicodeUTF8)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.Interface), QtGui.QApplication.translate("MainWindow", "Interface", None, QtGui.QApplication.UnicodeUTF8)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.Mirror), QtGui.QApplication.translate("MainWindow", "Mirror", None, QtGui.QApplication.UnicodeUTF8)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.NetFlow), QtGui.QApplication.translate("MainWindow", "NetFlow", None, QtGui.QApplication.UnicodeUTF8)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.Open_vSwitch), QtGui.QApplication.translate("MainWindow", "Open_vSwitch", None, QtGui.QApplication.UnicodeUTF8)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.Port), QtGui.QApplication.translate("MainWindow", "Port", None, QtGui.QApplication.UnicodeUTF8)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.QoS), QtGui.QApplication.translate("MainWindow", "QoS", None, QtGui.QApplication.UnicodeUTF8)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.Queue), QtGui.QApplication.translate("MainWindow", "Queue", None, QtGui.QApplication.UnicodeUTF8)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.sFlow), QtGui.QApplication.translate("MainWindow", "sFlow", None, QtGui.QApplication.UnicodeUTF8)) self.tabWidget.setTabText(self.tabWidget.indexOf(self.SSL), QtGui.QApplication.translate("MainWindow", "SSL", None, QtGui.QApplication.UnicodeUTF8)) self.hostLabel.setText(QtGui.QApplication.translate("MainWindow", "Host", None, QtGui.QApplication.UnicodeUTF8)) self.intervalCheckBox.setText(QtGui.QApplication.translate("MainWindow", "Auto-refetch every", None, QtGui.QApplication.UnicodeUTF8)) self.intervalSpinBox.setSuffix(QtGui.QApplication.translate("MainWindow", "s", None, QtGui.QApplication.UnicodeUTF8)) self.fetchButton.setText(QtGui.QApplication.translate("MainWindow", "Refetch", None, QtGui.QApplication.UnicodeUTF8)) self.menuFile.setTitle(QtGui.QApplication.translate("MainWindow", "File", None, QtGui.QApplication.UnicodeUTF8)) self.actionShow_Log.setText(QtGui.QApplication.translate("MainWindow", "Show Log", None, QtGui.QApplication.UnicodeUTF8)) self.actionNew_DB_Window.setText(QtGui.QApplication.translate("MainWindow", "New DB Window", None, QtGui.QApplication.UnicodeUTF8)) self.actionPreferences.setText(QtGui.QApplication.translate("MainWindow", "Preferences", None, QtGui.QApplication.UnicodeUTF8)) self.actionQuit.setText(QtGui.QApplication.translate("MainWindow", "Quit", None, QtGui.QApplication.UnicodeUTF8)) self.actionNew_Flow_Window.setText(QtGui.QApplication.translate("MainWindow", "New Flow Window", None, QtGui.QApplication.UnicodeUTF8)) openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/automake.mk000066400000000000000000000041641226605124000234600ustar00rootroot00000000000000ovsdbmonitor_pyfiles = \ ovsdb/ovsdbmonitor/OVEApp.py \ ovsdb/ovsdbmonitor/OVECommonWindow.py \ ovsdb/ovsdbmonitor/OVEConfig.py \ ovsdb/ovsdbmonitor/OVEConfigWindow.py \ ovsdb/ovsdbmonitor/OVEFetch.py \ ovsdb/ovsdbmonitor/OVEFlowWindow.py \ ovsdb/ovsdbmonitor/OVEHostWindow.py \ ovsdb/ovsdbmonitor/OVELogWindow.py \ ovsdb/ovsdbmonitor/OVELogger.py \ ovsdb/ovsdbmonitor/OVEMainWindow.py \ ovsdb/ovsdbmonitor/OVEStandard.py \ ovsdb/ovsdbmonitor/OVEUtil.py \ ovsdb/ovsdbmonitor/Ui_ConfigWindow.py \ ovsdb/ovsdbmonitor/Ui_FlowWindow.py \ ovsdb/ovsdbmonitor/Ui_HostWindow.py \ ovsdb/ovsdbmonitor/Ui_LogWindow.py \ ovsdb/ovsdbmonitor/Ui_MainWindow.py \ ovsdb/ovsdbmonitor/qt4reactor.py EXTRA_DIST += \ $(ovsdbmonitor_pyfiles) \ ovsdb/ovsdbmonitor/COPYING \ ovsdb/ovsdbmonitor/ConfigWindow.ui \ ovsdb/ovsdbmonitor/FlowWindow.ui \ ovsdb/ovsdbmonitor/HostWindow.ui \ ovsdb/ovsdbmonitor/LogWindow.ui \ ovsdb/ovsdbmonitor/MainWindow.ui \ ovsdb/ovsdbmonitor/ovsdbmonitor.in \ ovsdb/ovsdbmonitor/ovsdbmonitor.desktop MAN_ROOTS += ovsdb/ovsdbmonitor/ovsdbmonitor.1 ovsdbmonitordir = ${datadir}/ovsdbmonitor desktopdir = ${datadir}/applications if BUILD_OVSDBMONITOR noinst_SCRIPTS += ovsdb/ovsdbmonitor/ovsdbmonitor ovsdbmonitor_DATA = $(ovsdbmonitor_pyfiles) desktop_DATA = ovsdb/ovsdbmonitor/ovsdbmonitor.desktop install-exec-hook: sed -e '/NOINSTALL/d' < ovsdb/ovsdbmonitor/ovsdbmonitor > ovsdb/ovsdbmonitor/ovsdbmonitor.tmp chmod +x ovsdb/ovsdbmonitor/ovsdbmonitor.tmp $(INSTALL_PROGRAM) ovsdb/ovsdbmonitor/ovsdbmonitor.tmp $(DESTDIR)$(bindir)/ovsdbmonitor rm ovsdb/ovsdbmonitor/ovsdbmonitor.tmp DISTCLEANFILES += \ ovsdb/ovsdbmonitor/ovsdbmonitor \ ovsdb/ovsdbmonitor/ovsdbmonitor.tmp man_MANS += ovsdb/ovsdbmonitor/ovsdbmonitor.1 endif UNINSTALL_LOCAL += ovsdbmonitor-uninstall-local ovsdbmonitor-uninstall-local: rm -f $(DESTDIR)$(bindir)/ovsdbmonitor SUFFIXES += .ui .py .ui.py: $(PYUIC4) $< | sed 's/from PyQt4 import QtCore, QtGui/\ try:\ from OVEStandard import globalForcePySide\ if globalForcePySide:\ raise Exception()\ from PyQt4 import QtCore, QtGui\ except:\ from PySide import QtCore, QtGui/' > $@ openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/ovsdbmonitor.1000066400000000000000000000011531226605124000241230ustar00rootroot00000000000000.\" -*- nroff -*- .TH ovsdbmonitor 1 "May 2011" "Open vSwitch" "Open vSwitch Manual" . .SH NAME ovsdbmonitor \- GUI tool for monitoring Open vSwitch installations . .SH SYNOPSIS \fBovsdbmonitor\fR . .SH DESCRIPTION The \fBovsdbmonitor\fR program is a Qt-based GUI tool for monitoring and troubleshooting Open vSwitch. It presents GUI tables that graphically represent an Open vSwitch kernel flow table (similar to \fBovs\-dpctl dump\-flows\fR) and Open vSwitch database contents (similar to \fBovs\-vsctl list \fItable\fR). .SH "SEE ALSO" . \fBovsdb\-server\fR(1), \fBovsdb\-client\fR(1), and the OVSDB specification. openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/ovsdbmonitor.desktop000066400000000000000000000003021226605124000254270ustar00rootroot00000000000000[Desktop Entry] Name=Open vSwitch DB Monitor Comment=Monitor and troubleshoot local or remote Open vSwitch instances Exec=ovsdbmonitor Terminal=false Type=Application Categories=System;Monitor; openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/ovsdbmonitor.in000077500000000000000000000020761226605124000244010ustar00rootroot00000000000000#! @PYTHON@ # Copyright (c) 2010 Citrix Systems, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Version 1.51 # 2010-05-07 import sys sys.path.insert(0, "@ovsdbmonitordir@") sys.path.insert(0, "@abs_top_srcdir@/ovsdb/ovsdbmonitor") # NOINSTALL import sys, traceback from pprint import pprint from OVEApp import * app = OVEApp() try: retVal = app.enter() except Exception, e: print str(e) try: trace = traceback.format_tb(sys.exc_info()[2]) except: trace = ['Traceback not available'] print("".join(trace)) retVal = 1 sys.exit(retVal) openvswitch-2.0.1+git20140120/ovsdb/ovsdbmonitor/qt4reactor.py000066400000000000000000000234301226605124000237600ustar00rootroot00000000000000# Copyright (c) 2001-2008 Twisted Matrix Laboratories. # See LICENSE for details. # The referred licence file contains: # #Copyright (c) 2001-2010 #Allen Short #Andy Gayton #Andrew Bennetts #Antoine Pitrou #Apple Computer, Inc. #Benjamin Bruheim #Bob Ippolito #Canonical Limited #Christopher Armstrong #David Reid #Donovan Preston #Eric Mangold #Eyal Lotem #Itamar Shtull-Trauring #James Knight #Jason A. Mobarak #Jean-Paul Calderone #Jessica McKellar #Jonathan Jacobs #Jonathan Lange #Jonathan D. Simms #Jurgen Hermann #Kevin Horn #Kevin Turner #Mary Gardiner #Matthew Lefkowitz #Massachusetts Institute of Technology #Moshe Zadka #Paul Swartz #Pavel Pergamenshchik #Ralph Meijer #Sean Riley #Software Freedom Conservancy #Travis B. Hartwell #Thijs Triemstra #Thomas Herve #Timothy Allen # #Permission is hereby granted, free of charge, to any person obtaining #a copy of this software and associated documentation files (the #"Software"), to deal in the Software without restriction, including #without limitation the rights to use, copy, modify, merge, publish, #distribute, sublicense, and/or sell copies of the Software, and to #permit persons to whom the Software is furnished to do so, subject to #the following conditions: # #The above copyright notice and this permission notice shall be #included in all copies or substantial portions of the Software. # #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, #EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF #MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND #NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE #LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION #WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ This module provides support for Twisted to be driven by the Qt mainloop. In order to use this support, simply do the following:: | app = QApplication(sys.argv) # your code to init Qt | import qt4reactor | qt4reactor.install() alternatively: | from twisted.application import reactors | reactors.installReactor('qt4') Then use twisted.internet APIs as usual. The other methods here are not intended to be called directly. If you don't instantiate a QApplication or QCoreApplication prior to installing the reactor, a QCoreApplication will be constructed by the reactor. QCoreApplication does not require a GUI so trial testing can occur normally. Twisted can be initialized after QApplication.exec_() with a call to reactor.runReturn(). calling reactor.stop() will unhook twisted but leave your Qt application running API Stability: stable Maintainer: U{Glenn H Tarbox, PhD} Previous maintainer: U{Itamar Shtull-Trauring} Original port to QT4: U{Gabe Rudy} Subsequent port by therve """ __all__ = ['install'] import sys, time try: from zope.interface import implements except: print('+++ Python Zope interface module is required\n') raise try: from OVEStandard import globalForcePySide if globalForcePySide: raise Exception() from PyQt4.QtCore import QSocketNotifier, QObject, SIGNAL, QTimer, QCoreApplication from PyQt4.QtCore import QEventLoop except: from PySide.QtCore import QSocketNotifier, QObject, SIGNAL, QTimer, QCoreApplication from PySide.QtCore import QEventLoop try: from twisted.internet.interfaces import IReactorFDSet from twisted.python import log from twisted.internet.posixbase import PosixReactorBase except: print('+++ Python Twisted Conch module is required\n') raise class TwistedSocketNotifier(QSocketNotifier): """ Connection between an fd event and reader/writer callbacks. """ def __init__(self, reactor, watcher, type): QSocketNotifier.__init__(self, watcher.fileno(), type) self.reactor = reactor self.watcher = watcher self.fn = None if type == QSocketNotifier.Read: self.fn = self.read elif type == QSocketNotifier.Write: self.fn = self.write QObject.connect(self, SIGNAL("activated(int)"), self.fn) def shutdown(self): QObject.disconnect(self, SIGNAL("activated(int)"), self.fn) self.setEnabled(False) self.fn = self.watcher = None self.deleteLater() def read(self, sock): w = self.watcher #self.setEnabled(False) # ??? do I need this? def _read(): why = None try: why = w.doRead() except: log.err() why = sys.exc_info()[1] if why: self.reactor._disconnectSelectable(w, why, True) elif self.watcher: pass #self.setEnabled(True) log.callWithLogger(w, _read) self.reactor.reactorInvocation() def write(self, sock): w = self.watcher self.setEnabled(False) def _write(): why = None try: why = w.doWrite() except: log.err() why = sys.exc_info()[1] if why: self.reactor._disconnectSelectable(w, why, False) elif self.watcher: self.setEnabled(True) log.callWithLogger(w, _write) self.reactor.reactorInvocation() class fakeApplication(QEventLoop): def __init__(self): QEventLoop.__init__(self) def exec_(self): QEventLoop.exec_(self) class QTReactor(PosixReactorBase): """ Qt based reactor. """ implements(IReactorFDSet) _timer = None def __init__(self): self._reads = {} self._writes = {} self._timer=QTimer() self._timer.setSingleShot(True) if QCoreApplication.startingUp(): self.qApp=QCoreApplication([]) self._ownApp=True else: self.qApp = QCoreApplication.instance() self._ownApp=False self._blockApp = None self._readWriteQ=[] """ some debugging instrumentation """ self._doSomethingCount=0 PosixReactorBase.__init__(self) def addReader(self, reader): if not reader in self._reads: self._reads[reader] = TwistedSocketNotifier(self, reader, QSocketNotifier.Read) def addWriter(self, writer): if not writer in self._writes: self._writes[writer] = TwistedSocketNotifier(self, writer, QSocketNotifier.Write) def removeReader(self, reader): if reader in self._reads: #self._reads[reader].shutdown() #del self._reads[reader] self._reads.pop(reader).shutdown() def removeWriter(self, writer): if writer in self._writes: self._writes[writer].shutdown() #del self._writes[writer] self._writes.pop(writer) def removeAll(self): return self._removeAll(self._reads, self._writes) def getReaders(self): return self._reads.keys() def getWriters(self): return self._writes.keys() def callLater(self,howlong, *args, **kargs): rval = super(QTReactor,self).callLater(howlong, *args, **kargs) self.reactorInvocation() return rval def crash(self): super(QTReactor,self).crash() def iterate(self,delay=0.0): t=self.running # not sure I entirely get the state of running self.running=True self._timer.stop() # in case its not (rare?) try: if delay == 0.0: self.reactorInvokePrivate() self._timer.stop() # supports multiple invocations else: endTime = delay + time.time() self.reactorInvokePrivate() while True: t = endTime - time.time() if t <= 0.0: return self.qApp.processEvents(QEventLoop.AllEvents | QEventLoop.WaitForMoreEvents,t*1010) finally: self.running=t def addReadWrite(self,t): self._readWriteQ.append(t) def runReturn(self, installSignalHandlers=True): QObject.connect(self._timer, SIGNAL("timeout()"), self.reactorInvokePrivate) self.startRunning(installSignalHandlers=installSignalHandlers) self._timer.start(0) def run(self, installSignalHandlers=True): try: if self._ownApp: self._blockApp=self.qApp else: self._blockApp = fakeApplication() self.runReturn(installSignalHandlers) self._blockApp.exec_() finally: self._timer.stop() # should already be stopped def reactorInvocation(self): self._timer.setInterval(0) def reactorInvokePrivate(self): if not self.running: if self._blockApp is None: # Andy's fix for Ctrl-C quit self.qApp.quit() else: self._blockApp.quit() self._doSomethingCount += 1 self.runUntilCurrent() t = self.timeout() if t is None: t=0.1 else: t = min(t,0.1) self._timer.setInterval(int(t*1010)) self.qApp.processEvents() # could change interval self._timer.start() def doIteration(self): assert False, "doiteration is invalid call" def install(): """ Configure the twisted mainloop to be run inside the qt mainloop. """ from twisted.internet import main reactor = QTReactor() main.installReactor(reactor) openvswitch-2.0.1+git20140120/ovsdb/query.c000066400000000000000000000061041226605124000201010ustar00rootroot00000000000000/* Copyright (c) 2009, 2010 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "query.h" #include "column.h" #include "condition.h" #include "row.h" #include "table.h" void ovsdb_query(struct ovsdb_table *table, const struct ovsdb_condition *cnd, bool (*output_row)(const struct ovsdb_row *, void *aux), void *aux) { if (cnd->n_clauses > 0 && cnd->clauses[0].column->index == OVSDB_COL_UUID && cnd->clauses[0].function == OVSDB_F_EQ) { /* Optimize the case where the query has a clause of the form "uuid == * ", since we have an index on UUID. */ const struct ovsdb_row *row; row = ovsdb_table_get_row(table, &cnd->clauses[0].arg.keys[0].uuid); if (row && row->table == table && ovsdb_condition_evaluate(row, cnd)) { output_row(row, aux); } } else { /* Linear scan. */ const struct ovsdb_row *row, *next; HMAP_FOR_EACH_SAFE (row, next, hmap_node, &table->rows) { if (ovsdb_condition_evaluate(row, cnd) && !output_row(row, aux)) { break; } } } } static bool query_row_set_cb(const struct ovsdb_row *row, void *results_) { struct ovsdb_row_set *results = results_; ovsdb_row_set_add_row(results, row); return true; } void ovsdb_query_row_set(struct ovsdb_table *table, const struct ovsdb_condition *condition, struct ovsdb_row_set *results) { ovsdb_query(table, condition, query_row_set_cb, results); } static bool query_distinct_cb(const struct ovsdb_row *row, void *hash_) { struct ovsdb_row_hash *hash = hash_; ovsdb_row_hash_insert(hash, row); return true; } void ovsdb_query_distinct(struct ovsdb_table *table, const struct ovsdb_condition *condition, const struct ovsdb_column_set *columns, struct ovsdb_row_set *results) { if (!columns || ovsdb_column_set_contains(columns, OVSDB_COL_UUID)) { /* All the result rows are guaranteed to be distinct anyway. */ return ovsdb_query_row_set(table, condition, results); } else { /* Use hash table to drop duplicates. */ struct ovsdb_row_hash_node *node; struct ovsdb_row_hash hash; ovsdb_row_hash_init(&hash, columns); ovsdb_query(table, condition, query_distinct_cb, &hash); HMAP_FOR_EACH (node, hmap_node, &hash.rows) { ovsdb_row_set_add_row(results, node->row); } ovsdb_row_hash_destroy(&hash, false); } } openvswitch-2.0.1+git20140120/ovsdb/query.h000066400000000000000000000024451226605124000201120ustar00rootroot00000000000000/* Copyright (c) 2009 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_QUERY_H #define OVSDB_QUERY_H 1 #include struct ovsdb_column_set; struct ovsdb_condition; struct ovsdb_row; struct ovsdb_row_set; struct ovsdb_table; struct ovsdb_txn; void ovsdb_query(struct ovsdb_table *, const struct ovsdb_condition *, bool (*output_row)(const struct ovsdb_row *, void *aux), void *aux); void ovsdb_query_row_set(struct ovsdb_table *, const struct ovsdb_condition *, struct ovsdb_row_set *); void ovsdb_query_distinct(struct ovsdb_table *, const struct ovsdb_condition *, const struct ovsdb_column_set *, struct ovsdb_row_set *); #endif /* ovsdb/query.h */ openvswitch-2.0.1+git20140120/ovsdb/remote-active.man000066400000000000000000000006771226605124000220420ustar00rootroot00000000000000.IP "\fBssl:\fIip\fB:\fIport\fR" The specified SSL \fIport\fR on the host at the given \fIip\fR, which must be expressed as an IP address (not a DNS name). The \fB\-\-private\-key\fR, \fB\-\-certificate\fR, and \fB\-\-ca\-cert\fR options are mandatory when this form is used. . .IP "\fBtcp:\fIip\fB:\fIport\fR" Connect to the given TCP \fIport\fR on \fIip\fR. . .IP "\fBunix:\fIfile\fR" Connect to the Unix domain server socket named \fIfile\fR. openvswitch-2.0.1+git20140120/ovsdb/remote-passive.man000066400000000000000000000013151226605124000222270ustar00rootroot00000000000000.IP "\fBpssl:\fIport\fR[\fB:\fIip\fR]" Listen on the given SSL \fIport\fR for a connection. By default, connections are not bound to a particular local IP address, but specifying \fIip\fR limits connections to those from the given \fIip\fR. The \fB\-\-private\-key\fR, \fB\-\-certificate\fR, and \fB\-\-ca\-cert\fR options are mandatory when this form is used. . .IP "\fBptcp:\fIport\fR[\fB:\fIip\fR]" Listen on the given TCP \fIport\fR for a connection. By default, connections are not bound to a particular local IP address, but \fIip\fR may be specified to listen only for connections to the given \fIip\fR. . .IP "\fBpunix:\fIfile\fR" Listen on the Unix domain server socket named \fIfile\fR for a connection. openvswitch-2.0.1+git20140120/ovsdb/row.c000066400000000000000000000300361226605124000175440ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "row.h" #include #include "dynamic-string.h" #include "json.h" #include "ovsdb-error.h" #include "shash.h" #include "sort.h" #include "table.h" static struct ovsdb_row * allocate_row(const struct ovsdb_table *table) { size_t n_fields = shash_count(&table->schema->columns); size_t n_indexes = table->schema->n_indexes; size_t row_size = (offsetof(struct ovsdb_row, fields) + sizeof(struct ovsdb_datum) * n_fields + sizeof(struct hmap_node) * n_indexes); struct ovsdb_row *row = xmalloc(row_size); row->table = CONST_CAST(struct ovsdb_table *, table); row->txn_row = NULL; list_init(&row->src_refs); list_init(&row->dst_refs); row->n_refs = 0; return row; } struct ovsdb_row * ovsdb_row_create(const struct ovsdb_table *table) { struct shash_node *node; struct ovsdb_row *row; row = allocate_row(table); SHASH_FOR_EACH (node, &table->schema->columns) { const struct ovsdb_column *column = node->data; ovsdb_datum_init_default(&row->fields[column->index], &column->type); } return row; } struct ovsdb_row * ovsdb_row_clone(const struct ovsdb_row *old) { const struct ovsdb_table *table = old->table; const struct shash_node *node; struct ovsdb_row *new; new = allocate_row(table); SHASH_FOR_EACH (node, &table->schema->columns) { const struct ovsdb_column *column = node->data; ovsdb_datum_clone(&new->fields[column->index], &old->fields[column->index], &column->type); } return new; } /* The caller is responsible for ensuring that 'row' has been removed from its * table and that it is not participating in a transaction. */ void ovsdb_row_destroy(struct ovsdb_row *row) { if (row) { const struct ovsdb_table *table = row->table; struct ovsdb_weak_ref *weak, *next; const struct shash_node *node; LIST_FOR_EACH_SAFE (weak, next, dst_node, &row->dst_refs) { list_remove(&weak->src_node); list_remove(&weak->dst_node); free(weak); } LIST_FOR_EACH_SAFE (weak, next, src_node, &row->src_refs) { list_remove(&weak->src_node); list_remove(&weak->dst_node); free(weak); } SHASH_FOR_EACH (node, &table->schema->columns) { const struct ovsdb_column *column = node->data; ovsdb_datum_destroy(&row->fields[column->index], &column->type); } free(row); } } uint32_t ovsdb_row_hash_columns(const struct ovsdb_row *row, const struct ovsdb_column_set *columns, uint32_t basis) { size_t i; for (i = 0; i < columns->n_columns; i++) { const struct ovsdb_column *column = columns->columns[i]; basis = ovsdb_datum_hash(&row->fields[column->index], &column->type, basis); } return basis; } int ovsdb_row_compare_columns_3way(const struct ovsdb_row *a, const struct ovsdb_row *b, const struct ovsdb_column_set *columns) { size_t i; for (i = 0; i < columns->n_columns; i++) { const struct ovsdb_column *column = columns->columns[i]; int cmp = ovsdb_datum_compare_3way(&a->fields[column->index], &b->fields[column->index], &column->type); if (cmp) { return cmp; } } return 0; } bool ovsdb_row_equal_columns(const struct ovsdb_row *a, const struct ovsdb_row *b, const struct ovsdb_column_set *columns) { size_t i; for (i = 0; i < columns->n_columns; i++) { const struct ovsdb_column *column = columns->columns[i]; if (!ovsdb_datum_equals(&a->fields[column->index], &b->fields[column->index], &column->type)) { return false; } } return true; } void ovsdb_row_update_columns(struct ovsdb_row *dst, const struct ovsdb_row *src, const struct ovsdb_column_set *columns) { size_t i; for (i = 0; i < columns->n_columns; i++) { const struct ovsdb_column *column = columns->columns[i]; ovsdb_datum_destroy(&dst->fields[column->index], &column->type); ovsdb_datum_clone(&dst->fields[column->index], &src->fields[column->index], &column->type); } } /* Appends the string form of the value in 'row' of each of the columns in * 'columns' to 'out', e.g. "1, \"xyz\", and [1, 2, 3]". */ void ovsdb_row_columns_to_string(const struct ovsdb_row *row, const struct ovsdb_column_set *columns, struct ds *out) { size_t i; for (i = 0; i < columns->n_columns; i++) { const struct ovsdb_column *column = columns->columns[i]; ds_put_cstr(out, english_list_delimiter(i, columns->n_columns)); ovsdb_datum_to_string(&row->fields[column->index], &column->type, out); } } struct ovsdb_error * ovsdb_row_from_json(struct ovsdb_row *row, const struct json *json, struct ovsdb_symbol_table *symtab, struct ovsdb_column_set *included) { struct ovsdb_table_schema *schema = row->table->schema; struct ovsdb_error *error; struct shash_node *node; if (json->type != JSON_OBJECT) { return ovsdb_syntax_error(json, NULL, "row must be JSON object"); } SHASH_FOR_EACH (node, json_object(json)) { const char *column_name = node->name; const struct ovsdb_column *column; struct ovsdb_datum datum; column = ovsdb_table_schema_get_column(schema, column_name); if (!column) { return ovsdb_syntax_error(json, "unknown column", "No column %s in table %s.", column_name, schema->name); } error = ovsdb_datum_from_json(&datum, &column->type, node->data, symtab); if (error) { return error; } ovsdb_datum_swap(&row->fields[column->index], &datum); ovsdb_datum_destroy(&datum, &column->type); if (included) { ovsdb_column_set_add(included, column); } } return NULL; } static void put_json_column(struct json *object, const struct ovsdb_row *row, const struct ovsdb_column *column) { json_object_put(object, column->name, ovsdb_datum_to_json(&row->fields[column->index], &column->type)); } struct json * ovsdb_row_to_json(const struct ovsdb_row *row, const struct ovsdb_column_set *columns) { struct json *json; size_t i; json = json_object_create(); for (i = 0; i < columns->n_columns; i++) { put_json_column(json, row, columns->columns[i]); } return json; } void ovsdb_row_set_init(struct ovsdb_row_set *set) { set->rows = NULL; set->n_rows = set->allocated_rows = 0; } void ovsdb_row_set_destroy(struct ovsdb_row_set *set) { free(set->rows); } void ovsdb_row_set_add_row(struct ovsdb_row_set *set, const struct ovsdb_row *row) { if (set->n_rows >= set->allocated_rows) { set->rows = x2nrealloc(set->rows, &set->allocated_rows, sizeof *set->rows); } set->rows[set->n_rows++] = row; } struct json * ovsdb_row_set_to_json(const struct ovsdb_row_set *rows, const struct ovsdb_column_set *columns) { struct json **json_rows; size_t i; json_rows = xmalloc(rows->n_rows * sizeof *json_rows); for (i = 0; i < rows->n_rows; i++) { json_rows[i] = ovsdb_row_to_json(rows->rows[i], columns); } return json_array_create(json_rows, rows->n_rows); } struct ovsdb_row_set_sort_cbdata { struct ovsdb_row_set *set; const struct ovsdb_column_set *columns; }; static int ovsdb_row_set_sort_compare_cb(size_t a, size_t b, void *cbdata_) { struct ovsdb_row_set_sort_cbdata *cbdata = cbdata_; return ovsdb_row_compare_columns_3way(cbdata->set->rows[a], cbdata->set->rows[b], cbdata->columns); } static void ovsdb_row_set_sort_swap_cb(size_t a, size_t b, void *cbdata_) { struct ovsdb_row_set_sort_cbdata *cbdata = cbdata_; const struct ovsdb_row *tmp = cbdata->set->rows[a]; cbdata->set->rows[a] = cbdata->set->rows[b]; cbdata->set->rows[b] = tmp; } void ovsdb_row_set_sort(struct ovsdb_row_set *set, const struct ovsdb_column_set *columns) { if (columns && columns->n_columns && set->n_rows > 1) { struct ovsdb_row_set_sort_cbdata cbdata; cbdata.set = set; cbdata.columns = columns; sort(set->n_rows, ovsdb_row_set_sort_compare_cb, ovsdb_row_set_sort_swap_cb, &cbdata); } } void ovsdb_row_hash_init(struct ovsdb_row_hash *rh, const struct ovsdb_column_set *columns) { hmap_init(&rh->rows); ovsdb_column_set_clone(&rh->columns, columns); } void ovsdb_row_hash_destroy(struct ovsdb_row_hash *rh, bool destroy_rows) { struct ovsdb_row_hash_node *node, *next; HMAP_FOR_EACH_SAFE (node, next, hmap_node, &rh->rows) { hmap_remove(&rh->rows, &node->hmap_node); if (destroy_rows) { ovsdb_row_destroy(CONST_CAST(struct ovsdb_row *, node->row)); } free(node); } hmap_destroy(&rh->rows); ovsdb_column_set_destroy(&rh->columns); } size_t ovsdb_row_hash_count(const struct ovsdb_row_hash *rh) { return hmap_count(&rh->rows); } bool ovsdb_row_hash_contains(const struct ovsdb_row_hash *rh, const struct ovsdb_row *row) { size_t hash = ovsdb_row_hash_columns(row, &rh->columns, 0); return ovsdb_row_hash_contains__(rh, row, hash); } /* Returns true if every row in 'b' has an equal row in 'a'. */ bool ovsdb_row_hash_contains_all(const struct ovsdb_row_hash *a, const struct ovsdb_row_hash *b) { struct ovsdb_row_hash_node *node; ovs_assert(ovsdb_column_set_equals(&a->columns, &b->columns)); HMAP_FOR_EACH (node, hmap_node, &b->rows) { if (!ovsdb_row_hash_contains__(a, node->row, node->hmap_node.hash)) { return false; } } return true; } bool ovsdb_row_hash_insert(struct ovsdb_row_hash *rh, const struct ovsdb_row *row) { size_t hash = ovsdb_row_hash_columns(row, &rh->columns, 0); return ovsdb_row_hash_insert__(rh, row, hash); } bool ovsdb_row_hash_contains__(const struct ovsdb_row_hash *rh, const struct ovsdb_row *row, size_t hash) { struct ovsdb_row_hash_node *node; HMAP_FOR_EACH_WITH_HASH (node, hmap_node, hash, &rh->rows) { if (ovsdb_row_equal_columns(row, node->row, &rh->columns)) { return true; } } return false; } bool ovsdb_row_hash_insert__(struct ovsdb_row_hash *rh, const struct ovsdb_row *row, size_t hash) { if (!ovsdb_row_hash_contains__(rh, row, hash)) { struct ovsdb_row_hash_node *node = xmalloc(sizeof *node); node->row = row; hmap_insert(&rh->rows, &node->hmap_node, hash); return true; } else { return false; } } openvswitch-2.0.1+git20140120/ovsdb/row.h000066400000000000000000000146431226605124000175570ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_ROW_H #define OVSDB_ROW_H 1 #include #include #include "column.h" #include "hmap.h" #include "list.h" #include "ovsdb-data.h" struct ovsdb_column_set; /* A weak reference. * * When a column in row A contains a weak reference to UUID of a row B this * constitutes a weak reference from A (the source) to B (the destination). * * Rows A and B may be in the same table or different tables. * * Weak references from a row to itself are allowed, but no "struct * ovsdb_weak_ref" structures are created for them. */ struct ovsdb_weak_ref { struct list src_node; /* In src->src_refs list. */ struct list dst_node; /* In destination row's dst_refs list. */ struct ovsdb_row *src; /* Source row. */ }; /* A row in a database table. */ struct ovsdb_row { struct hmap_node hmap_node; /* Element in ovsdb_table's 'rows' hmap. */ struct ovsdb_table *table; /* Table to which this belongs. */ struct ovsdb_txn_row *txn_row; /* Transaction that row is in, if any. */ /* Weak references. */ struct list src_refs; /* Weak references from this row. */ struct list dst_refs; /* Weak references to this row. */ /* Number of strong refs to this row from other rows, in this table or * other tables, through 'uuid' columns that have a 'refTable' constraint * pointing to this table and a 'refType' of "strong". A row with nonzero * 'n_refs' cannot be deleted. Updated and checked only at transaction * commit. */ size_t n_refs; /* One datum for each column (shash_count(&table->schema->columns) * elements). */ struct ovsdb_datum fields[]; /* Followed by table->schema->n_indexes "struct hmap_node"s. In rows that * have have been committed as part of the database, the hmap_node with * index 'i' is contained in hmap table->indexes[i]. */ }; struct ovsdb_row *ovsdb_row_create(const struct ovsdb_table *); struct ovsdb_row *ovsdb_row_clone(const struct ovsdb_row *); void ovsdb_row_destroy(struct ovsdb_row *); uint32_t ovsdb_row_hash_columns(const struct ovsdb_row *, const struct ovsdb_column_set *, uint32_t basis); bool ovsdb_row_equal_columns(const struct ovsdb_row *, const struct ovsdb_row *, const struct ovsdb_column_set *); int ovsdb_row_compare_columns_3way(const struct ovsdb_row *, const struct ovsdb_row *, const struct ovsdb_column_set *); void ovsdb_row_update_columns(struct ovsdb_row *, const struct ovsdb_row *, const struct ovsdb_column_set *); void ovsdb_row_columns_to_string(const struct ovsdb_row *, const struct ovsdb_column_set *, struct ds *); struct ovsdb_error *ovsdb_row_from_json(struct ovsdb_row *, const struct json *, struct ovsdb_symbol_table *, struct ovsdb_column_set *included) WARN_UNUSED_RESULT; struct json *ovsdb_row_to_json(const struct ovsdb_row *, const struct ovsdb_column_set *include); static inline const struct uuid * ovsdb_row_get_uuid(const struct ovsdb_row *row) { return &row->fields[OVSDB_COL_UUID].keys[0].uuid; } static inline struct uuid * ovsdb_row_get_uuid_rw(struct ovsdb_row *row) { return &row->fields[OVSDB_COL_UUID].keys[0].uuid; } static inline const struct uuid * ovsdb_row_get_version(const struct ovsdb_row *row) { return &row->fields[OVSDB_COL_VERSION].keys[0].uuid; } static inline struct uuid * ovsdb_row_get_version_rw(struct ovsdb_row *row) { return &row->fields[OVSDB_COL_VERSION].keys[0].uuid; } static inline uint32_t ovsdb_row_hash(const struct ovsdb_row *row) { return uuid_hash(ovsdb_row_get_uuid(row)); } /* An unordered collection of rows. */ struct ovsdb_row_set { const struct ovsdb_row **rows; size_t n_rows, allocated_rows; }; #define OVSDB_ROW_SET_INITIALIZER { NULL, 0, 0 } void ovsdb_row_set_init(struct ovsdb_row_set *); void ovsdb_row_set_destroy(struct ovsdb_row_set *); void ovsdb_row_set_add_row(struct ovsdb_row_set *, const struct ovsdb_row *); struct json *ovsdb_row_set_to_json(const struct ovsdb_row_set *, const struct ovsdb_column_set *); void ovsdb_row_set_sort(struct ovsdb_row_set *, const struct ovsdb_column_set *); /* A hash table of rows. A specified set of columns is used for hashing and * comparing rows. * * The row hash doesn't necessarily own its rows. They may be owned by, for * example, an ovsdb_table. */ struct ovsdb_row_hash { struct hmap rows; struct ovsdb_column_set columns; }; #define OVSDB_ROW_HASH_INITIALIZER(RH) \ { HMAP_INITIALIZER(&(RH).rows), OVSDB_COLUMN_SET_INITIALIZER } struct ovsdb_row_hash_node { struct hmap_node hmap_node; const struct ovsdb_row *row; }; void ovsdb_row_hash_init(struct ovsdb_row_hash *, const struct ovsdb_column_set *); void ovsdb_row_hash_destroy(struct ovsdb_row_hash *, bool destroy_rows); size_t ovsdb_row_hash_count(const struct ovsdb_row_hash *); bool ovsdb_row_hash_contains(const struct ovsdb_row_hash *, const struct ovsdb_row *); bool ovsdb_row_hash_contains_all(const struct ovsdb_row_hash *, const struct ovsdb_row_hash *); bool ovsdb_row_hash_insert(struct ovsdb_row_hash *, const struct ovsdb_row *); bool ovsdb_row_hash_contains__(const struct ovsdb_row_hash *, const struct ovsdb_row *, size_t hash); bool ovsdb_row_hash_insert__(struct ovsdb_row_hash *, const struct ovsdb_row *, size_t hash); #endif /* ovsdb/row.h */ openvswitch-2.0.1+git20140120/ovsdb/server.c000066400000000000000000000157471226605124000202570ustar00rootroot00000000000000/* Copyright (c) 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "server.h" #include "hash.h" #include "ovsdb.h" /* Initializes 'session' as a session within 'server'. */ void ovsdb_session_init(struct ovsdb_session *session, struct ovsdb_server *server) { session->server = server; list_init(&session->completions); hmap_init(&session->waiters); } /* Destroys 'session'. */ void ovsdb_session_destroy(struct ovsdb_session *session) { ovs_assert(hmap_is_empty(&session->waiters)); hmap_destroy(&session->waiters); } /* Searches 'session' for an ovsdb_lock_waiter named 'lock_name' and returns * it if it finds one, otherwise NULL. */ struct ovsdb_lock_waiter * ovsdb_session_get_lock_waiter(const struct ovsdb_session *session, const char *lock_name) { struct ovsdb_lock_waiter *waiter; HMAP_FOR_EACH_WITH_HASH (waiter, session_node, hash_string(lock_name, 0), &session->waiters) { if (!strcmp(lock_name, waiter->lock_name)) { return waiter; } } return NULL; } /* Returns the waiter that owns 'lock'. * * A lock always has an owner, so this function will never return NULL. */ struct ovsdb_lock_waiter * ovsdb_lock_get_owner(const struct ovsdb_lock *lock) { return CONTAINER_OF(list_front(&lock->waiters), struct ovsdb_lock_waiter, lock_node); } /* Removes 'waiter' from its lock's list. This means that, if 'waiter' was * formerly the owner of its lock, then it no longer owns it. * * Returns the session that now owns 'waiter'. This is NULL if 'waiter' was * the lock's owner and no other sessions were waiting for the lock. In this * case, the lock has been destroyed, so the caller must be sure not to refer * to it again. A nonnull return value reflects a change in the lock's * ownership if and only if 'waiter' formerly owned the lock. */ struct ovsdb_session * ovsdb_lock_waiter_remove(struct ovsdb_lock_waiter *waiter) { struct ovsdb_lock *lock = waiter->lock; list_remove(&waiter->lock_node); waiter->lock = NULL; if (list_is_empty(&lock->waiters)) { hmap_remove(&lock->server->locks, &lock->hmap_node); free(lock->name); free(lock); return NULL; } return ovsdb_lock_get_owner(lock)->session; } /* Destroys 'waiter', which must have already been removed from its lock's * waiting list with ovsdb_lock_waiter_remove(). * * Removing and destroying locks are decoupled because a lock initially created * by the "steal" request, that is later stolen by another client, remains in * the database session until the database client sends an "unlock" request. */ void ovsdb_lock_waiter_destroy(struct ovsdb_lock_waiter *waiter) { ovs_assert(!waiter->lock); hmap_remove(&waiter->session->waiters, &waiter->session_node); free(waiter->lock_name); free(waiter); } /* Returns true if 'waiter' owns its associated lock. */ bool ovsdb_lock_waiter_is_owner(const struct ovsdb_lock_waiter *waiter) { return waiter->lock && waiter == ovsdb_lock_get_owner(waiter->lock); } /* Initializes 'server'. * * The caller must call ovsdb_server_add_db() for each database to which * 'server' should provide access. */ void ovsdb_server_init(struct ovsdb_server *server) { shash_init(&server->dbs); hmap_init(&server->locks); } /* Adds 'db' to the set of databases served out by 'server'. Returns true if * successful, false if 'db''s name is the same as some database already in * 'server'. */ bool ovsdb_server_add_db(struct ovsdb_server *server, struct ovsdb *db) { return shash_add_once(&server->dbs, db->schema->name, db); } /* Removes 'db' from the set of databases served out by 'server'. Returns * true if successful, false if there is no db associated with * db->schema->name. */ bool ovsdb_server_remove_db(struct ovsdb_server *server, struct ovsdb *db) { void *data = shash_find_and_delete(&server->dbs, db->schema->name); if (data) { return true; } return false; } /* Destroys 'server'. */ void ovsdb_server_destroy(struct ovsdb_server *server) { shash_destroy(&server->dbs); hmap_destroy(&server->locks); } static struct ovsdb_lock * ovsdb_server_create_lock__(struct ovsdb_server *server, const char *lock_name, uint32_t hash) { struct ovsdb_lock *lock; HMAP_FOR_EACH_WITH_HASH (lock, hmap_node, hash, &server->locks) { if (!strcmp(lock->name, lock_name)) { return lock; } } lock = xzalloc(sizeof *lock); lock->server = server; lock->name = xstrdup(lock_name); hmap_insert(&server->locks, &lock->hmap_node, hash); list_init(&lock->waiters); return lock; } /* Attempts to acquire the lock named 'lock_name' for 'session' within * 'server'. Returns the new lock waiter. * * If 'mode' is OVSDB_LOCK_STEAL, then the new lock waiter is always the owner * of the lock. '*victimp' receives the session of the previous owner or NULL * if the lock was previously unowned. (If the victim itself originally * obtained the lock through a "steal" operation, then this function also * removes the victim from the lock's waiting list.) * * If 'mode' is OVSDB_LOCK_WAIT, then the new lock waiter is the owner of the * lock only if this lock had no existing owner. '*victimp' is set to NULL. */ struct ovsdb_lock_waiter * ovsdb_server_lock(struct ovsdb_server *server, struct ovsdb_session *session, const char *lock_name, enum ovsdb_lock_mode mode, struct ovsdb_session **victimp) { uint32_t hash = hash_string(lock_name, 0); struct ovsdb_lock_waiter *waiter, *victim; struct ovsdb_lock *lock; lock = ovsdb_server_create_lock__(server, lock_name, hash); victim = (mode == OVSDB_LOCK_STEAL && !list_is_empty(&lock->waiters) ? ovsdb_lock_get_owner(lock) : NULL); waiter = xmalloc(sizeof *waiter); waiter->mode = mode; waiter->lock_name = xstrdup(lock_name); waiter->lock = lock; if (mode == OVSDB_LOCK_STEAL) { list_push_front(&lock->waiters, &waiter->lock_node); } else { list_push_back(&lock->waiters, &waiter->lock_node); } waiter->session = session; hmap_insert(&waiter->session->waiters, &waiter->session_node, hash); if (victim && victim->mode == OVSDB_LOCK_STEAL) { ovsdb_lock_waiter_remove(victim); } *victimp = victim ? victim->session : NULL; return waiter; } openvswitch-2.0.1+git20140120/ovsdb/server.h000066400000000000000000000070501226605124000202500ustar00rootroot00000000000000/* Copyright (c) 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef SERVER_H #define SERVER_H 1 #include "hmap.h" #include "list.h" #include "shash.h" struct ovsdb; struct ovsdb_server; /* Abstract representation of an OVSDB client connection, not tied to any * particular network protocol. Protocol implementations * (e.g. jsonrpc-server.c) embed this in a larger data structure. */ struct ovsdb_session { struct ovsdb_server *server; struct list completions; /* Completed triggers. */ struct hmap waiters; /* "ovsdb_lock_waiter *"s by lock name. */ }; void ovsdb_session_init(struct ovsdb_session *, struct ovsdb_server *); void ovsdb_session_destroy(struct ovsdb_session *); struct ovsdb_lock_waiter *ovsdb_session_get_lock_waiter( const struct ovsdb_session *, const char *lock_name); /* A database lock. * * A lock always has one or more "lock waiters" kept on a list. The waiter at * the head of the list owns the lock. */ struct ovsdb_lock { struct hmap_node hmap_node; /* In ovsdb_server's "locks" hmap. */ struct ovsdb_server *server; /* The containing server. */ char *name; /* Unique name. */ struct list waiters; /* Contains "struct ovsdb_lock_waiter"s. */ }; struct ovsdb_lock_waiter *ovsdb_lock_get_owner(const struct ovsdb_lock *); /* How to obtain a lock. */ enum ovsdb_lock_mode { OVSDB_LOCK_WAIT, /* By waiting for it to become available. */ OVSDB_LOCK_STEAL /* By stealing it from the owner. */ }; /* A session's request for a database lock. */ struct ovsdb_lock_waiter { struct hmap_node session_node; /* In ->session->locks's hmap. */ struct ovsdb_lock *lock; /* The lock being waited for. */ enum ovsdb_lock_mode mode; char *lock_name; struct ovsdb_session *session; struct list lock_node; /* In ->lock->waiters's list. */ }; struct ovsdb_session *ovsdb_lock_waiter_remove(struct ovsdb_lock_waiter *); void ovsdb_lock_waiter_destroy(struct ovsdb_lock_waiter *); bool ovsdb_lock_waiter_is_owner(const struct ovsdb_lock_waiter *); /* Abstract representation of an OVSDB server not tied to any particular * network protocol. Protocol implementations (e.g. jsonrpc-server.c) embed * this in a larger data structure. */ struct ovsdb_server { struct shash dbs; /* Maps from a db name to a "struct ovsdb *". */ struct hmap locks; /* Contains "struct ovsdb_lock"s indexed by name. */ }; void ovsdb_server_init(struct ovsdb_server *); bool ovsdb_server_add_db(struct ovsdb_server *, struct ovsdb *); bool ovsdb_server_remove_db(struct ovsdb_server *, struct ovsdb *); void ovsdb_server_destroy(struct ovsdb_server *); struct ovsdb_lock_waiter *ovsdb_server_lock(struct ovsdb_server *, struct ovsdb_session *, const char *lock_name, enum ovsdb_lock_mode, struct ovsdb_session **victimp); #endif /* ovsdb/server.h */ openvswitch-2.0.1+git20140120/ovsdb/table.c000066400000000000000000000243771226605124000200370ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "table.h" #include #include "json.h" #include "column.h" #include "ovsdb-error.h" #include "ovsdb-parser.h" #include "ovsdb-types.h" #include "row.h" static void add_column(struct ovsdb_table_schema *ts, struct ovsdb_column *column) { ovs_assert(!shash_find(&ts->columns, column->name)); column->index = shash_count(&ts->columns); shash_add(&ts->columns, column->name, column); } struct ovsdb_table_schema * ovsdb_table_schema_create(const char *name, bool mutable, unsigned int max_rows, bool is_root) { struct ovsdb_column *uuid, *version; struct ovsdb_table_schema *ts; ts = xzalloc(sizeof *ts); ts->name = xstrdup(name); ts->mutable = mutable; shash_init(&ts->columns); ts->max_rows = max_rows; ts->is_root = is_root; uuid = ovsdb_column_create("_uuid", false, true, &ovsdb_type_uuid); add_column(ts, uuid); ovs_assert(uuid->index == OVSDB_COL_UUID); version = ovsdb_column_create("_version", false, false, &ovsdb_type_uuid); add_column(ts, version); ovs_assert(version->index == OVSDB_COL_VERSION); ts->n_indexes = 0; ts->indexes = NULL; return ts; } struct ovsdb_table_schema * ovsdb_table_schema_clone(const struct ovsdb_table_schema *old) { struct ovsdb_table_schema *new; struct shash_node *node; size_t i; new = ovsdb_table_schema_create(old->name, old->mutable, old->max_rows, old->is_root); SHASH_FOR_EACH (node, &old->columns) { const struct ovsdb_column *column = node->data; if (column->name[0] == '_') { /* Added automatically by ovsdb_table_schema_create(). */ continue; } add_column(new, ovsdb_column_clone(column)); } new->n_indexes = old->n_indexes; new->indexes = xmalloc(new->n_indexes * sizeof *new->indexes); for (i = 0; i < new->n_indexes; i++) { const struct ovsdb_column_set *old_index = &old->indexes[i]; struct ovsdb_column_set *new_index = &new->indexes[i]; size_t j; ovsdb_column_set_init(new_index); for (j = 0; j < old_index->n_columns; j++) { const struct ovsdb_column *old_column = old_index->columns[j]; const struct ovsdb_column *new_column; new_column = ovsdb_table_schema_get_column(new, old_column->name); ovsdb_column_set_add(new_index, new_column); } } return new; } void ovsdb_table_schema_destroy(struct ovsdb_table_schema *ts) { struct shash_node *node; size_t i; for (i = 0; i < ts->n_indexes; i++) { ovsdb_column_set_destroy(&ts->indexes[i]); } free(ts->indexes); SHASH_FOR_EACH (node, &ts->columns) { ovsdb_column_destroy(node->data); } shash_destroy(&ts->columns); free(ts->name); free(ts); } struct ovsdb_error * ovsdb_table_schema_from_json(const struct json *json, const char *name, struct ovsdb_table_schema **tsp) { struct ovsdb_table_schema *ts; const struct json *columns, *mutable, *max_rows, *is_root, *indexes; struct shash_node *node; struct ovsdb_parser parser; struct ovsdb_error *error; long long int n_max_rows; *tsp = NULL; ovsdb_parser_init(&parser, json, "table schema for table %s", name); columns = ovsdb_parser_member(&parser, "columns", OP_OBJECT); mutable = ovsdb_parser_member(&parser, "mutable", OP_TRUE | OP_FALSE | OP_OPTIONAL); max_rows = ovsdb_parser_member(&parser, "maxRows", OP_INTEGER | OP_OPTIONAL); is_root = ovsdb_parser_member(&parser, "isRoot", OP_BOOLEAN | OP_OPTIONAL); indexes = ovsdb_parser_member(&parser, "indexes", OP_ARRAY | OP_OPTIONAL); error = ovsdb_parser_finish(&parser); if (error) { return error; } if (max_rows) { if (json_integer(max_rows) <= 0) { return ovsdb_syntax_error(json, NULL, "maxRows must be at least 1"); } n_max_rows = max_rows->u.integer; } else { n_max_rows = UINT_MAX; } if (shash_is_empty(json_object(columns))) { return ovsdb_syntax_error(json, NULL, "table must have at least one column"); } ts = ovsdb_table_schema_create(name, mutable ? json_boolean(mutable) : true, MIN(n_max_rows, UINT_MAX), is_root ? json_boolean(is_root) : false); SHASH_FOR_EACH (node, json_object(columns)) { struct ovsdb_column *column; if (node->name[0] == '_') { error = ovsdb_syntax_error(json, NULL, "names beginning with " "\"_\" are reserved"); } else if (!ovsdb_parser_is_id(node->name)) { error = ovsdb_syntax_error(json, NULL, "name must be a valid id"); } else { error = ovsdb_column_from_json(node->data, node->name, &column); } if (error) { goto error; } add_column(ts, column); } if (indexes) { size_t i; ts->indexes = xmalloc(indexes->u.array.n * sizeof *ts->indexes); for (i = 0; i < indexes->u.array.n; i++) { struct ovsdb_column_set *index = &ts->indexes[i]; size_t j; error = ovsdb_column_set_from_json(indexes->u.array.elems[i], ts, index); if (error) { goto error; } if (index->n_columns == 0) { error = ovsdb_syntax_error(json, NULL, "index must have " "at least one column"); goto error; } ts->n_indexes++; for (j = 0; j < index->n_columns; j++) { const struct ovsdb_column *column = index->columns[j]; if (!column->persistent) { error = ovsdb_syntax_error(json, NULL, "ephemeral columns " "(such as %s) may not be " "indexed", column->name); goto error; } } } } *tsp = ts; return NULL; error: ovsdb_table_schema_destroy(ts); return error; } /* Returns table schema 'ts' serialized into JSON. * * The "isRoot" member is included in the JSON only if its value would differ * from 'default_is_root'. Ordinarily 'default_is_root' should be false, * because ordinarily a table would be not be part of the root set if its * "isRoot" member is omitted. However, garbage collection was not originally * included in OVSDB, so in older schemas that do not include any "isRoot" * members, every table is implicitly part of the root set. To serialize such * a schema in a way that can be read by older OVSDB tools, specify * 'default_is_root' as true. */ struct json * ovsdb_table_schema_to_json(const struct ovsdb_table_schema *ts, bool default_is_root) { struct json *json, *columns; struct shash_node *node; json = json_object_create(); if (!ts->mutable) { json_object_put(json, "mutable", json_boolean_create(false)); } if (default_is_root != ts->is_root) { json_object_put(json, "isRoot", json_boolean_create(ts->is_root)); } columns = json_object_create(); SHASH_FOR_EACH (node, &ts->columns) { const struct ovsdb_column *column = node->data; if (node->name[0] != '_') { json_object_put(columns, column->name, ovsdb_column_to_json(column)); } } json_object_put(json, "columns", columns); if (ts->max_rows != UINT_MAX) { json_object_put(json, "maxRows", json_integer_create(ts->max_rows)); } if (ts->n_indexes) { struct json **indexes; size_t i; indexes = xmalloc(ts->n_indexes * sizeof *indexes); for (i = 0; i < ts->n_indexes; i++) { indexes[i] = ovsdb_column_set_to_json(&ts->indexes[i]); } json_object_put(json, "indexes", json_array_create(indexes, ts->n_indexes)); } return json; } const struct ovsdb_column * ovsdb_table_schema_get_column(const struct ovsdb_table_schema *ts, const char *name) { return shash_find_data(&ts->columns, name); } struct ovsdb_table * ovsdb_table_create(struct ovsdb_table_schema *ts) { struct ovsdb_table *table; size_t i; table = xmalloc(sizeof *table); table->schema = ts; table->txn_table = NULL; table->indexes = xmalloc(ts->n_indexes * sizeof *table->indexes); for (i = 0; i < ts->n_indexes; i++) { hmap_init(&table->indexes[i]); } hmap_init(&table->rows); return table; } void ovsdb_table_destroy(struct ovsdb_table *table) { if (table) { struct ovsdb_row *row, *next; size_t i; HMAP_FOR_EACH_SAFE (row, next, hmap_node, &table->rows) { ovsdb_row_destroy(row); } hmap_destroy(&table->rows); for (i = 0; i < table->schema->n_indexes; i++) { hmap_destroy(&table->indexes[i]); } free(table->indexes); ovsdb_table_schema_destroy(table->schema); free(table); } } const struct ovsdb_row * ovsdb_table_get_row(const struct ovsdb_table *table, const struct uuid *uuid) { struct ovsdb_row *row; HMAP_FOR_EACH_WITH_HASH (row, hmap_node, uuid_hash(uuid), &table->rows) { if (uuid_equals(ovsdb_row_get_uuid(row), uuid)) { return row; } } return NULL; } openvswitch-2.0.1+git20140120/ovsdb/table.h000066400000000000000000000051771226605124000200410ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_TABLE_H #define OVSDB_TABLE_H 1 #include #include "compiler.h" #include "hmap.h" #include "shash.h" struct json; struct uuid; /* Schema for a database table. */ struct ovsdb_table_schema { char *name; bool mutable; struct shash columns; /* Contains "struct ovsdb_column *"s. */ unsigned int max_rows; /* Maximum number of rows. */ bool is_root; /* Part of garbage collection root set? */ struct ovsdb_column_set *indexes; size_t n_indexes; }; struct ovsdb_table_schema *ovsdb_table_schema_create( const char *name, bool mutable, unsigned int max_rows, bool is_root); struct ovsdb_table_schema *ovsdb_table_schema_clone( const struct ovsdb_table_schema *); void ovsdb_table_schema_destroy(struct ovsdb_table_schema *); struct ovsdb_error *ovsdb_table_schema_from_json(const struct json *, const char *name, struct ovsdb_table_schema **) WARN_UNUSED_RESULT; struct json *ovsdb_table_schema_to_json(const struct ovsdb_table_schema *, bool default_is_root); const struct ovsdb_column *ovsdb_table_schema_get_column( const struct ovsdb_table_schema *, const char *name); /* Database table. */ struct ovsdb_table { struct ovsdb_table_schema *schema; struct ovsdb_txn_table *txn_table; /* Only if table is in a transaction. */ struct hmap rows; /* Contains "struct ovsdb_row"s. */ /* An array of schema->n_indexes hmaps, each of which contains "struct * ovsdb_row"s. Each of the hmap_nodes in indexes[i] are at index 'i' at * the end of struct ovsdb_row, following the 'fields' member. */ struct hmap *indexes; }; struct ovsdb_table *ovsdb_table_create(struct ovsdb_table_schema *); void ovsdb_table_destroy(struct ovsdb_table *); const struct ovsdb_row *ovsdb_table_get_row(const struct ovsdb_table *, const struct uuid *); #endif /* ovsdb/table.h */ openvswitch-2.0.1+git20140120/ovsdb/transaction.c000066400000000000000000001020131226605124000212550ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "transaction.h" #include "bitmap.h" #include "dynamic-string.h" #include "hash.h" #include "hmap.h" #include "json.h" #include "list.h" #include "ovsdb-error.h" #include "ovsdb.h" #include "row.h" #include "table.h" #include "uuid.h" struct ovsdb_txn { struct ovsdb *db; struct list txn_tables; /* Contains "struct ovsdb_txn_table"s. */ struct ds comment; }; /* A table modified by a transaction. */ struct ovsdb_txn_table { struct list node; /* Element in ovsdb_txn's txn_tables list. */ struct ovsdb_table *table; struct hmap txn_rows; /* Contains "struct ovsdb_txn_row"s. */ /* This has the same form as the 'indexes' member of struct ovsdb_table, * but it is only used or updated at transaction commit time, from * check_index_uniqueness(). */ struct hmap *txn_indexes; /* Used by for_each_txn_row(). */ unsigned int serial; /* Serial number of in-progress iteration. */ unsigned int n_processed; /* Number of rows processed. */ }; /* A row modified by the transaction: * * - A row added by a transaction will have null 'old' and non-null 'new'. * * - A row deleted by a transaction will have non-null 'old' and null * 'new'. * * - A row modified by a transaction will have non-null 'old' and 'new'. * * - 'old' and 'new' both null indicates that a row was added then deleted * within a single transaction. Most of the time we instead delete the * ovsdb_txn_row entirely, but inside a for_each_txn_row() callback * there are restrictions that sometimes mean we have to leave the * ovsdb_txn_row in place. */ struct ovsdb_txn_row { struct hmap_node hmap_node; /* In ovsdb_txn_table's txn_rows hmap. */ struct ovsdb_row *old; /* The old row. */ struct ovsdb_row *new; /* The new row. */ size_t n_refs; /* Number of remaining references. */ /* These members are the same as the corresponding members of 'old' or * 'new'. They are present here for convenience and because occasionally * there can be an ovsdb_txn_row where both 'old' and 'new' are NULL. */ struct uuid uuid; struct ovsdb_table *table; /* Used by for_each_txn_row(). */ unsigned int serial; /* Serial number of in-progress commit. */ unsigned long changed[]; /* Bits set to 1 for columns that changed. */ }; static struct ovsdb_error * WARN_UNUSED_RESULT delete_garbage_row(struct ovsdb_txn *txn, struct ovsdb_txn_row *r); static void ovsdb_txn_row_prefree(struct ovsdb_txn_row *); static struct ovsdb_error * WARN_UNUSED_RESULT for_each_txn_row(struct ovsdb_txn *txn, struct ovsdb_error *(*)(struct ovsdb_txn *, struct ovsdb_txn_row *)); /* Used by for_each_txn_row() to track tables and rows that have been * processed. */ static unsigned int serial; struct ovsdb_txn * ovsdb_txn_create(struct ovsdb *db) { struct ovsdb_txn *txn = xmalloc(sizeof *txn); txn->db = db; list_init(&txn->txn_tables); ds_init(&txn->comment); return txn; } static void ovsdb_txn_free(struct ovsdb_txn *txn) { ovs_assert(list_is_empty(&txn->txn_tables)); ds_destroy(&txn->comment); free(txn); } static struct ovsdb_error * ovsdb_txn_row_abort(struct ovsdb_txn *txn OVS_UNUSED, struct ovsdb_txn_row *txn_row) { struct ovsdb_row *old = txn_row->old; struct ovsdb_row *new = txn_row->new; ovsdb_txn_row_prefree(txn_row); if (!old) { if (new) { hmap_remove(&new->table->rows, &new->hmap_node); } } else if (!new) { hmap_insert(&old->table->rows, &old->hmap_node, ovsdb_row_hash(old)); } else { hmap_replace(&new->table->rows, &new->hmap_node, &old->hmap_node); } ovsdb_row_destroy(new); free(txn_row); return NULL; } /* Returns the offset in bytes from the start of an ovsdb_row for 'table' to * the hmap_node for the index numbered 'i'. */ static size_t ovsdb_row_index_offset__(const struct ovsdb_table *table, size_t i) { size_t n_fields = shash_count(&table->schema->columns); return (offsetof(struct ovsdb_row, fields) + n_fields * sizeof(struct ovsdb_datum) + i * sizeof(struct hmap_node)); } /* Returns the hmap_node in 'row' for the index numbered 'i'. */ static struct hmap_node * ovsdb_row_get_index_node(struct ovsdb_row *row, size_t i) { return (void *) ((char *) row + ovsdb_row_index_offset__(row->table, i)); } /* Returns the ovsdb_row given 'index_node', which is a pointer to that row's * hmap_node for the index numbered 'i' within 'table'. */ static struct ovsdb_row * ovsdb_row_from_index_node(struct hmap_node *index_node, const struct ovsdb_table *table, size_t i) { return (void *) ((char *) index_node - ovsdb_row_index_offset__(table, i)); } void ovsdb_txn_abort(struct ovsdb_txn *txn) { ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_row_abort)); ovsdb_txn_free(txn); } static struct ovsdb_txn_row * find_txn_row(const struct ovsdb_table *table, const struct uuid *uuid) { struct ovsdb_txn_row *txn_row; if (!table->txn_table) { return NULL; } HMAP_FOR_EACH_WITH_HASH (txn_row, hmap_node, uuid_hash(uuid), &table->txn_table->txn_rows) { if (uuid_equals(uuid, &txn_row->uuid)) { return txn_row; } } return NULL; } static struct ovsdb_txn_row * find_or_make_txn_row(struct ovsdb_txn *txn, const struct ovsdb_table *table, const struct uuid *uuid) { struct ovsdb_txn_row *txn_row = find_txn_row(table, uuid); if (!txn_row) { const struct ovsdb_row *row = ovsdb_table_get_row(table, uuid); if (row) { txn_row = ovsdb_txn_row_modify(txn, row)->txn_row; } } return txn_row; } static struct ovsdb_error * WARN_UNUSED_RESULT ovsdb_txn_adjust_atom_refs(struct ovsdb_txn *txn, const struct ovsdb_row *r, const struct ovsdb_column *c, const struct ovsdb_base_type *base, const union ovsdb_atom *atoms, unsigned int n, int delta) { const struct ovsdb_table *table; unsigned int i; if (!ovsdb_base_type_is_strong_ref(base)) { return NULL; } table = base->u.uuid.refTable; for (i = 0; i < n; i++) { const struct uuid *uuid = &atoms[i].uuid; struct ovsdb_txn_row *txn_row; if (uuid_equals(uuid, ovsdb_row_get_uuid(r))) { /* Self-references don't count. */ continue; } txn_row = find_or_make_txn_row(txn, table, uuid); if (!txn_row) { return ovsdb_error("referential integrity violation", "Table %s column %s row "UUID_FMT" " "references nonexistent row "UUID_FMT" in " "table %s.", r->table->schema->name, c->name, UUID_ARGS(ovsdb_row_get_uuid(r)), UUID_ARGS(uuid), table->schema->name); } txn_row->n_refs += delta; } return NULL; } static struct ovsdb_error * WARN_UNUSED_RESULT ovsdb_txn_adjust_row_refs(struct ovsdb_txn *txn, const struct ovsdb_row *r, const struct ovsdb_column *column, int delta) { const struct ovsdb_datum *field = &r->fields[column->index]; struct ovsdb_error *error; error = ovsdb_txn_adjust_atom_refs(txn, r, column, &column->type.key, field->keys, field->n, delta); if (!error) { error = ovsdb_txn_adjust_atom_refs(txn, r, column, &column->type.value, field->values, field->n, delta); } return error; } static struct ovsdb_error * WARN_UNUSED_RESULT update_row_ref_count(struct ovsdb_txn *txn, struct ovsdb_txn_row *r) { struct ovsdb_table *table = r->table; struct shash_node *node; SHASH_FOR_EACH (node, &table->schema->columns) { const struct ovsdb_column *column = node->data; struct ovsdb_error *error; if (r->old) { error = ovsdb_txn_adjust_row_refs(txn, r->old, column, -1); if (error) { return OVSDB_WRAP_BUG("error decreasing refcount", error); } } if (r->new) { error = ovsdb_txn_adjust_row_refs(txn, r->new, column, 1); if (error) { return error; } } } return NULL; } static struct ovsdb_error * WARN_UNUSED_RESULT check_ref_count(struct ovsdb_txn *txn OVS_UNUSED, struct ovsdb_txn_row *r) { if (r->new || !r->n_refs) { return NULL; } else { return ovsdb_error("referential integrity violation", "cannot delete %s row "UUID_FMT" because " "of %zu remaining reference(s)", r->table->schema->name, UUID_ARGS(&r->uuid), r->n_refs); } } static struct ovsdb_error * WARN_UNUSED_RESULT delete_row_refs(struct ovsdb_txn *txn, const struct ovsdb_row *row, const struct ovsdb_base_type *base, const union ovsdb_atom *atoms, unsigned int n) { const struct ovsdb_table *table; unsigned int i; if (!ovsdb_base_type_is_strong_ref(base)) { return NULL; } table = base->u.uuid.refTable; for (i = 0; i < n; i++) { const struct uuid *uuid = &atoms[i].uuid; struct ovsdb_txn_row *txn_row; if (uuid_equals(uuid, ovsdb_row_get_uuid(row))) { /* Self-references don't count. */ continue; } txn_row = find_or_make_txn_row(txn, table, uuid); if (!txn_row) { return OVSDB_BUG("strong ref target missing"); } else if (!txn_row->n_refs) { return OVSDB_BUG("strong ref target has zero n_refs"); } else if (!txn_row->new) { return OVSDB_BUG("deleted strong ref target"); } if (--txn_row->n_refs == 0) { struct ovsdb_error *error = delete_garbage_row(txn, txn_row); if (error) { return error; } } } return NULL; } static struct ovsdb_error * WARN_UNUSED_RESULT delete_garbage_row(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) { struct shash_node *node; struct ovsdb_row *row; if (txn_row->table->schema->is_root) { return NULL; } row = txn_row->new; txn_row->new = NULL; hmap_remove(&txn_row->table->rows, &row->hmap_node); SHASH_FOR_EACH (node, &txn_row->table->schema->columns) { const struct ovsdb_column *column = node->data; const struct ovsdb_datum *field = &row->fields[column->index]; struct ovsdb_error *error; error = delete_row_refs(txn, row, &column->type.key, field->keys, field->n); if (error) { return error; } error = delete_row_refs(txn, row, &column->type.value, field->values, field->n); if (error) { return error; } } ovsdb_row_destroy(row); return NULL; } static struct ovsdb_error * WARN_UNUSED_RESULT collect_garbage(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) { if (txn_row->new && !txn_row->n_refs) { return delete_garbage_row(txn, txn_row); } return NULL; } static struct ovsdb_error * WARN_UNUSED_RESULT update_ref_counts(struct ovsdb_txn *txn) { struct ovsdb_error *error; error = for_each_txn_row(txn, update_row_ref_count); if (error) { return error; } return for_each_txn_row(txn, check_ref_count); } static struct ovsdb_error * ovsdb_txn_row_commit(struct ovsdb_txn *txn OVS_UNUSED, struct ovsdb_txn_row *txn_row) { size_t n_indexes = txn_row->table->schema->n_indexes; if (txn_row->old) { size_t i; for (i = 0; i < n_indexes; i++) { struct hmap_node *node = ovsdb_row_get_index_node(txn_row->old, i); hmap_remove(&txn_row->table->indexes[i], node); } } if (txn_row->new) { size_t i; for (i = 0; i < n_indexes; i++) { struct hmap_node *node = ovsdb_row_get_index_node(txn_row->new, i); hmap_insert(&txn_row->table->indexes[i], node, node->hash); } } ovsdb_txn_row_prefree(txn_row); if (txn_row->new) { txn_row->new->n_refs = txn_row->n_refs; } ovsdb_row_destroy(txn_row->old); free(txn_row); return NULL; } static void add_weak_ref(struct ovsdb_txn *txn, const struct ovsdb_row *src_, const struct ovsdb_row *dst_) { struct ovsdb_row *src = CONST_CAST(struct ovsdb_row *, src_); struct ovsdb_row *dst = CONST_CAST(struct ovsdb_row *, dst_); struct ovsdb_weak_ref *weak; if (src == dst) { return; } dst = ovsdb_txn_row_modify(txn, dst); if (!list_is_empty(&dst->dst_refs)) { /* Omit duplicates. */ weak = CONTAINER_OF(list_back(&dst->dst_refs), struct ovsdb_weak_ref, dst_node); if (weak->src == src) { return; } } weak = xmalloc(sizeof *weak); weak->src = src; list_push_back(&dst->dst_refs, &weak->dst_node); list_push_back(&src->src_refs, &weak->src_node); } static struct ovsdb_error * WARN_UNUSED_RESULT assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) { struct ovsdb_table *table; struct shash_node *node; if (txn_row->old) { /* Mark rows that have weak references to 'txn_row' as modified, so * that their weak references will get reassessed. */ struct ovsdb_weak_ref *weak, *next; LIST_FOR_EACH_SAFE (weak, next, dst_node, &txn_row->old->dst_refs) { if (!weak->src->txn_row) { ovsdb_txn_row_modify(txn, weak->src); } } } if (!txn_row->new) { /* We don't have to do anything about references that originate at * 'txn_row', because ovsdb_row_destroy() will remove those weak * references. */ return NULL; } table = txn_row->table; SHASH_FOR_EACH (node, &table->schema->columns) { const struct ovsdb_column *column = node->data; struct ovsdb_datum *datum = &txn_row->new->fields[column->index]; unsigned int orig_n, i; bool zero = false; orig_n = datum->n; if (ovsdb_base_type_is_weak_ref(&column->type.key)) { for (i = 0; i < datum->n; ) { const struct ovsdb_row *row; row = ovsdb_table_get_row(column->type.key.u.uuid.refTable, &datum->keys[i].uuid); if (row) { add_weak_ref(txn, txn_row->new, row); i++; } else { if (uuid_is_zero(&datum->keys[i].uuid)) { zero = true; } ovsdb_datum_remove_unsafe(datum, i, &column->type); } } } if (ovsdb_base_type_is_weak_ref(&column->type.value)) { for (i = 0; i < datum->n; ) { const struct ovsdb_row *row; row = ovsdb_table_get_row(column->type.value.u.uuid.refTable, &datum->values[i].uuid); if (row) { add_weak_ref(txn, txn_row->new, row); i++; } else { if (uuid_is_zero(&datum->values[i].uuid)) { zero = true; } ovsdb_datum_remove_unsafe(datum, i, &column->type); } } } if (datum->n != orig_n) { bitmap_set1(txn_row->changed, column->index); ovsdb_datum_sort_assert(datum, column->type.key.type); if (datum->n < column->type.n_min) { const struct uuid *row_uuid = ovsdb_row_get_uuid(txn_row->new); if (zero && !txn_row->old) { return ovsdb_error( "constraint violation", "Weak reference column \"%s\" in \"%s\" row "UUID_FMT " (inserted within this transaction) contained " "all-zeros UUID (probably as the default value for " "this column) but deleting this value caused a " "constraint volation because this column is not " "allowed to be empty.", column->name, table->schema->name, UUID_ARGS(row_uuid)); } else { return ovsdb_error( "constraint violation", "Deletion of %u weak reference(s) to deleted (or " "never-existing) rows from column \"%s\" in \"%s\" " "row "UUID_FMT" %scaused this column to become empty, " "but constraints on this column disallow an " "empty column.", orig_n - datum->n, column->name, table->schema->name, UUID_ARGS(row_uuid), (txn_row->old ? "" : "(inserted within this transaction) ")); } } } } return NULL; } static struct ovsdb_error * WARN_UNUSED_RESULT determine_changes(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) { struct ovsdb_table *table = txn_row->table; if (txn_row->old && txn_row->new) { struct shash_node *node; bool changed = false; SHASH_FOR_EACH (node, &table->schema->columns) { const struct ovsdb_column *column = node->data; const struct ovsdb_type *type = &column->type; unsigned int idx = column->index; if (!ovsdb_datum_equals(&txn_row->old->fields[idx], &txn_row->new->fields[idx], type)) { bitmap_set1(txn_row->changed, idx); changed = true; } } if (!changed) { /* Nothing actually changed in this row, so drop it. */ ovsdb_txn_row_abort(txn, txn_row); } } else { bitmap_set_multiple(txn_row->changed, 0, shash_count(&table->schema->columns), 1); } return NULL; } static struct ovsdb_error * WARN_UNUSED_RESULT check_max_rows(struct ovsdb_txn *txn) { struct ovsdb_txn_table *t; LIST_FOR_EACH (t, node, &txn->txn_tables) { size_t n_rows = hmap_count(&t->table->rows); unsigned int max_rows = t->table->schema->max_rows; if (n_rows > max_rows) { return ovsdb_error("constraint violation", "transaction causes \"%s\" table to contain " "%zu rows, greater than the schema-defined " "limit of %u row(s)", t->table->schema->name, n_rows, max_rows); } } return NULL; } static struct ovsdb_row * ovsdb_index_search(struct hmap *index, struct ovsdb_row *row, size_t i, uint32_t hash) { const struct ovsdb_table *table = row->table; const struct ovsdb_column_set *columns = &table->schema->indexes[i]; struct hmap_node *node; for (node = hmap_first_with_hash(index, hash); node; node = hmap_next_with_hash(node)) { struct ovsdb_row *irow = ovsdb_row_from_index_node(node, table, i); if (ovsdb_row_equal_columns(row, irow, columns)) { return irow; } } return NULL; } static void duplicate_index_row__(const struct ovsdb_column_set *index, const struct ovsdb_row *row, const char *title, struct ds *out) { size_t n_columns = shash_count(&row->table->schema->columns); ds_put_format(out, "%s row, with UUID "UUID_FMT", ", title, UUID_ARGS(ovsdb_row_get_uuid(row))); if (!row->txn_row || bitmap_scan(row->txn_row->changed, 0, n_columns) == n_columns) { ds_put_cstr(out, "existed in the database before this " "transaction and was not modified by the transaction."); } else if (!row->txn_row->old) { ds_put_cstr(out, "was inserted by this transaction."); } else if (ovsdb_row_equal_columns(row->txn_row->old, row->txn_row->new, index)) { ds_put_cstr(out, "existed in the database before this " "transaction, which modified some of the row's columns " "but not any columns in this index."); } else { ds_put_cstr(out, "had the following index values before the " "transaction: "); ovsdb_row_columns_to_string(row->txn_row->old, index, out); ds_put_char(out, '.'); } } static struct ovsdb_error * WARN_UNUSED_RESULT duplicate_index_row(const struct ovsdb_column_set *index, const struct ovsdb_row *a, const struct ovsdb_row *b) { struct ovsdb_column_set all_columns; struct ovsdb_error *error; char *index_s; struct ds s; /* Put 'a' and 'b' in a predictable order to make error messages * reproducible for testing. */ ovsdb_column_set_init(&all_columns); ovsdb_column_set_add_all(&all_columns, a->table); if (ovsdb_row_compare_columns_3way(a, b, &all_columns) < 0) { const struct ovsdb_row *tmp = a; a = b; b = tmp; } ovsdb_column_set_destroy(&all_columns); index_s = ovsdb_column_set_to_string(index); ds_init(&s); ds_put_format(&s, "Transaction causes multiple rows in \"%s\" table to " "have identical values (", a->table->schema->name); ovsdb_row_columns_to_string(a, index, &s); ds_put_format(&s, ") for index on %s. ", index_s); duplicate_index_row__(index, a, "First", &s); ds_put_cstr(&s, " "); duplicate_index_row__(index, b, "Second", &s); free(index_s); error = ovsdb_error("constraint violation", "%s", ds_cstr(&s)); ds_destroy(&s); return error; } static struct ovsdb_error * WARN_UNUSED_RESULT check_index_uniqueness(struct ovsdb_txn *txn OVS_UNUSED, struct ovsdb_txn_row *txn_row) { struct ovsdb_txn_table *txn_table = txn_row->table->txn_table; struct ovsdb_table *table = txn_row->table; struct ovsdb_row *row = txn_row->new; size_t i; if (!row) { return NULL; } for (i = 0; i < table->schema->n_indexes; i++) { const struct ovsdb_column_set *index = &table->schema->indexes[i]; struct ovsdb_row *irow; uint32_t hash; hash = ovsdb_row_hash_columns(row, index, 0); irow = ovsdb_index_search(&txn_table->txn_indexes[i], row, i, hash); if (irow) { return duplicate_index_row(index, irow, row); } irow = ovsdb_index_search(&table->indexes[i], row, i, hash); if (irow && !irow->txn_row) { return duplicate_index_row(index, irow, row); } hmap_insert(&txn_table->txn_indexes[i], ovsdb_row_get_index_node(row, i), hash); } return NULL; } struct ovsdb_error * ovsdb_txn_commit(struct ovsdb_txn *txn, bool durable) { struct ovsdb_replica *replica; struct ovsdb_error *error; /* Figure out what actually changed, and abort early if the transaction * was really a no-op. */ error = for_each_txn_row(txn, determine_changes); if (error) { return OVSDB_WRAP_BUG("can't happen", error); } if (list_is_empty(&txn->txn_tables)) { ovsdb_txn_abort(txn); return NULL; } /* Update reference counts and check referential integrity. */ error = update_ref_counts(txn); if (error) { ovsdb_txn_abort(txn); return error; } /* Delete unreferenced, non-root rows. */ error = for_each_txn_row(txn, collect_garbage); if (error) { ovsdb_txn_abort(txn); return OVSDB_WRAP_BUG("can't happen", error); } /* Check maximum rows table constraints. */ error = check_max_rows(txn); if (error) { ovsdb_txn_abort(txn); return error; } /* Check reference counts and remove bad references for "weak" referential * integrity. */ error = for_each_txn_row(txn, assess_weak_refs); if (error) { ovsdb_txn_abort(txn); return error; } /* Verify that the indexes will still be unique post-transaction. */ error = for_each_txn_row(txn, check_index_uniqueness); if (error) { ovsdb_txn_abort(txn); return error; } /* Send the commit to each replica. */ LIST_FOR_EACH (replica, node, &txn->db->replicas) { error = (replica->class->commit)(replica, txn, durable); if (error) { /* We don't support two-phase commit so only the first replica is * allowed to report an error. */ ovs_assert(&replica->node == txn->db->replicas.next); ovsdb_txn_abort(txn); return error; } } /* Finalize commit. */ txn->db->run_triggers = true; ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_row_commit)); ovsdb_txn_free(txn); return NULL; } void ovsdb_txn_for_each_change(const struct ovsdb_txn *txn, ovsdb_txn_row_cb_func *cb, void *aux) { struct ovsdb_txn_table *t; struct ovsdb_txn_row *r; LIST_FOR_EACH (t, node, &txn->txn_tables) { HMAP_FOR_EACH (r, hmap_node, &t->txn_rows) { if ((r->old || r->new) && !cb(r->old, r->new, r->changed, aux)) { break; } } } } static struct ovsdb_txn_table * ovsdb_txn_create_txn_table(struct ovsdb_txn *txn, struct ovsdb_table *table) { if (!table->txn_table) { struct ovsdb_txn_table *txn_table; size_t i; table->txn_table = txn_table = xmalloc(sizeof *table->txn_table); txn_table->table = table; hmap_init(&txn_table->txn_rows); txn_table->serial = serial - 1; txn_table->txn_indexes = xmalloc(table->schema->n_indexes * sizeof *txn_table->txn_indexes); for (i = 0; i < table->schema->n_indexes; i++) { hmap_init(&txn_table->txn_indexes[i]); } list_push_back(&txn->txn_tables, &txn_table->node); } return table->txn_table; } static struct ovsdb_txn_row * ovsdb_txn_row_create(struct ovsdb_txn *txn, struct ovsdb_table *table, const struct ovsdb_row *old_, struct ovsdb_row *new) { const struct ovsdb_row *row = old_ ? old_ : new; struct ovsdb_row *old = CONST_CAST(struct ovsdb_row *, old_); size_t n_columns = shash_count(&table->schema->columns); struct ovsdb_txn_table *txn_table; struct ovsdb_txn_row *txn_row; txn_row = xzalloc(offsetof(struct ovsdb_txn_row, changed) + bitmap_n_bytes(n_columns)); txn_row->uuid = *ovsdb_row_get_uuid(row); txn_row->table = row->table; txn_row->old = old; txn_row->new = new; txn_row->n_refs = old ? old->n_refs : 0; txn_row->serial = serial - 1; if (old) { old->txn_row = txn_row; } if (new) { new->txn_row = txn_row; } txn_table = ovsdb_txn_create_txn_table(txn, table); hmap_insert(&txn_table->txn_rows, &txn_row->hmap_node, ovsdb_row_hash(old ? old : new)); return txn_row; } struct ovsdb_row * ovsdb_txn_row_modify(struct ovsdb_txn *txn, const struct ovsdb_row *ro_row_) { struct ovsdb_row *ro_row = CONST_CAST(struct ovsdb_row *, ro_row_); if (ro_row->txn_row) { ovs_assert(ro_row == ro_row->txn_row->new); return ro_row; } else { struct ovsdb_table *table = ro_row->table; struct ovsdb_row *rw_row; rw_row = ovsdb_row_clone(ro_row); rw_row->n_refs = ro_row->n_refs; uuid_generate(ovsdb_row_get_version_rw(rw_row)); ovsdb_txn_row_create(txn, table, ro_row, rw_row); hmap_replace(&table->rows, &ro_row->hmap_node, &rw_row->hmap_node); return rw_row; } } void ovsdb_txn_row_insert(struct ovsdb_txn *txn, struct ovsdb_row *row) { uint32_t hash = ovsdb_row_hash(row); struct ovsdb_table *table = row->table; uuid_generate(ovsdb_row_get_version_rw(row)); ovsdb_txn_row_create(txn, table, NULL, row); hmap_insert(&table->rows, &row->hmap_node, hash); } /* 'row' must be assumed destroyed upon return; the caller must not reference * it again. */ void ovsdb_txn_row_delete(struct ovsdb_txn *txn, const struct ovsdb_row *row_) { struct ovsdb_row *row = CONST_CAST(struct ovsdb_row *, row_); struct ovsdb_table *table = row->table; struct ovsdb_txn_row *txn_row = row->txn_row; hmap_remove(&table->rows, &row->hmap_node); if (!txn_row) { ovsdb_txn_row_create(txn, table, row, NULL); } else { ovs_assert(txn_row->new == row); if (txn_row->old) { txn_row->new = NULL; } else { hmap_remove(&table->txn_table->txn_rows, &txn_row->hmap_node); free(txn_row); } ovsdb_row_destroy(row); } } void ovsdb_txn_add_comment(struct ovsdb_txn *txn, const char *s) { if (txn->comment.length) { ds_put_char(&txn->comment, '\n'); } ds_put_cstr(&txn->comment, s); } const char * ovsdb_txn_get_comment(const struct ovsdb_txn *txn) { return txn->comment.length ? ds_cstr_ro(&txn->comment) : NULL; } static void ovsdb_txn_row_prefree(struct ovsdb_txn_row *txn_row) { struct ovsdb_txn_table *txn_table = txn_row->table->txn_table; txn_table->n_processed--; hmap_remove(&txn_table->txn_rows, &txn_row->hmap_node); if (txn_row->old) { txn_row->old->txn_row = NULL; } if (txn_row->new) { txn_row->new->txn_row = NULL; } } static void ovsdb_txn_table_destroy(struct ovsdb_txn_table *txn_table) { size_t i; ovs_assert(hmap_is_empty(&txn_table->txn_rows)); for (i = 0; i < txn_table->table->schema->n_indexes; i++) { hmap_destroy(&txn_table->txn_indexes[i]); } free(txn_table->txn_indexes); txn_table->table->txn_table = NULL; hmap_destroy(&txn_table->txn_rows); list_remove(&txn_table->node); free(txn_table); } /* Calls 'cb' for every txn_row within 'txn'. If 'cb' returns nonnull, this * aborts the iteration and for_each_txn_row() passes the error up. Otherwise, * returns a null pointer after iteration is complete. * * 'cb' may insert new txn_rows and new txn_tables into 'txn'. It may delete * the txn_row that it is passed in, or txn_rows in txn_tables other than the * one passed to 'cb'. It may *not* delete txn_rows other than the one passed * in within the same txn_table. It may *not* delete any txn_tables. As long * as these rules are followed, 'cb' will be called exactly once for each * txn_row in 'txn', even those added by 'cb'. * * (Even though 'cb' is not allowed to delete some txn_rows, it can still * delete any actual row by clearing a txn_row's 'new' member.) */ static struct ovsdb_error * WARN_UNUSED_RESULT for_each_txn_row(struct ovsdb_txn *txn, struct ovsdb_error *(*cb)(struct ovsdb_txn *, struct ovsdb_txn_row *)) { bool any_work; serial++; do { struct ovsdb_txn_table *t, *next_txn_table; any_work = false; LIST_FOR_EACH_SAFE (t, next_txn_table, node, &txn->txn_tables) { if (t->serial != serial) { t->serial = serial; t->n_processed = 0; } while (t->n_processed < hmap_count(&t->txn_rows)) { struct ovsdb_txn_row *r, *next_txn_row; HMAP_FOR_EACH_SAFE (r, next_txn_row, hmap_node, &t->txn_rows) { if (r->serial != serial) { struct ovsdb_error *error; r->serial = serial; t->n_processed++; any_work = true; error = cb(txn, r); if (error) { return error; } } } } if (hmap_is_empty(&t->txn_rows)) { /* Table is empty. Drop it. */ ovsdb_txn_table_destroy(t); } } } while (any_work); return NULL; } openvswitch-2.0.1+git20140120/ovsdb/transaction.h000066400000000000000000000033301226605124000212640ustar00rootroot00000000000000/* Copyright (c) 2009, 2010 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_TRANSACTION_H #define OVSDB_TRANSACTION_H 1 #include #include "compiler.h" struct json; struct ovsdb; struct ovsdb_table; struct uuid; struct ovsdb_txn *ovsdb_txn_create(struct ovsdb *); void ovsdb_txn_abort(struct ovsdb_txn *); struct ovsdb_error *ovsdb_txn_commit(struct ovsdb_txn *, bool durable) WARN_UNUSED_RESULT; struct ovsdb_row *ovsdb_txn_row_modify(struct ovsdb_txn *, const struct ovsdb_row *); void ovsdb_txn_row_insert(struct ovsdb_txn *, struct ovsdb_row *); void ovsdb_txn_row_delete(struct ovsdb_txn *, const struct ovsdb_row *); typedef bool ovsdb_txn_row_cb_func(const struct ovsdb_row *old, const struct ovsdb_row *new, const unsigned long int *changed, void *aux); void ovsdb_txn_for_each_change(const struct ovsdb_txn *, ovsdb_txn_row_cb_func *, void *aux); void ovsdb_txn_add_comment(struct ovsdb_txn *, const char *); const char *ovsdb_txn_get_comment(const struct ovsdb_txn *); #endif /* ovsdb/transaction.h */ openvswitch-2.0.1+git20140120/ovsdb/trigger.c000066400000000000000000000065421226605124000204050ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "trigger.h" #include #include "json.h" #include "jsonrpc.h" #include "ovsdb.h" #include "poll-loop.h" #include "server.h" static bool ovsdb_trigger_try(struct ovsdb_trigger *, long long int now); static void ovsdb_trigger_complete(struct ovsdb_trigger *); void ovsdb_trigger_init(struct ovsdb_session *session, struct ovsdb *db, struct ovsdb_trigger *trigger, struct json *request, long long int now) { trigger->session = session; trigger->db = db; list_push_back(&trigger->db->triggers, &trigger->node); trigger->request = request; trigger->result = NULL; trigger->created = now; trigger->timeout_msec = LLONG_MAX; ovsdb_trigger_try(trigger, now); } void ovsdb_trigger_destroy(struct ovsdb_trigger *trigger) { list_remove(&trigger->node); json_destroy(trigger->request); json_destroy(trigger->result); } bool ovsdb_trigger_is_complete(const struct ovsdb_trigger *trigger) { return trigger->result != NULL; } struct json * ovsdb_trigger_steal_result(struct ovsdb_trigger *trigger) { struct json *result = trigger->result; trigger->result = NULL; return result; } void ovsdb_trigger_run(struct ovsdb *db, long long int now) { struct ovsdb_trigger *t, *next; bool run_triggers; run_triggers = db->run_triggers; db->run_triggers = false; LIST_FOR_EACH_SAFE (t, next, node, &db->triggers) { if (run_triggers || now - t->created >= t->timeout_msec) { ovsdb_trigger_try(t, now); } } } void ovsdb_trigger_wait(struct ovsdb *db, long long int now) { if (db->run_triggers) { poll_immediate_wake(); } else { long long int deadline = LLONG_MAX; struct ovsdb_trigger *t; LIST_FOR_EACH (t, node, &db->triggers) { if (t->created < LLONG_MAX - t->timeout_msec) { long long int t_deadline = t->created + t->timeout_msec; if (deadline > t_deadline) { deadline = t_deadline; if (now >= deadline) { break; } } } } if (deadline < LLONG_MAX) { poll_timer_wait_until(deadline); } } } static bool ovsdb_trigger_try(struct ovsdb_trigger *t, long long int now) { t->result = ovsdb_execute(t->db, t->session, t->request, now - t->created, &t->timeout_msec); if (t->result) { ovsdb_trigger_complete(t); return true; } else { return false; } } static void ovsdb_trigger_complete(struct ovsdb_trigger *t) { ovs_assert(t->result != NULL); list_remove(&t->node); list_push_back(&t->session->completions, &t->node); } openvswitch-2.0.1+git20140120/ovsdb/trigger.h000066400000000000000000000033101226605124000204000ustar00rootroot00000000000000/* Copyright (c) 2009, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef OVSDB_TRIGGER_H #define OVSDB_TRIGGER_H 1 #include "list.h" struct ovsdb; struct ovsdb_trigger { struct ovsdb_session *session; /* Session that owns this trigger. */ struct ovsdb *db; /* Database on which trigger acts. */ struct list node; /* !result: in db->triggers; * result: in session->completions. */ struct json *request; /* Database request. */ struct json *result; /* Result (null if none yet). */ long long int created; /* Time created. */ long long int timeout_msec; /* Max wait duration. */ }; void ovsdb_trigger_init(struct ovsdb_session *, struct ovsdb *, struct ovsdb_trigger *, struct json *request, long long int now); void ovsdb_trigger_destroy(struct ovsdb_trigger *); bool ovsdb_trigger_is_complete(const struct ovsdb_trigger *); struct json *ovsdb_trigger_steal_result(struct ovsdb_trigger *); void ovsdb_trigger_run(struct ovsdb *, long long int now); void ovsdb_trigger_wait(struct ovsdb *, long long int now); #endif /* ovsdb/trigger.h */ openvswitch-2.0.1+git20140120/python/000077500000000000000000000000001226605124000167735ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/python/automake.mk000066400000000000000000000052621226605124000211370ustar00rootroot00000000000000ovstest_pyfiles = \ python/ovstest/__init__.py \ python/ovstest/args.py \ python/ovstest/rpcserver.py \ python/ovstest/tcp.py \ python/ovstest/tests.py \ python/ovstest/udp.py \ python/ovstest/util.py \ python/ovstest/vswitch.py ovs_pyfiles = \ python/ovs/__init__.py \ python/ovs/daemon.py \ python/ovs/db/__init__.py \ python/ovs/db/data.py \ python/ovs/db/error.py \ python/ovs/db/idl.py \ python/ovs/db/parser.py \ python/ovs/db/schema.py \ python/ovs/db/types.py \ python/ovs/fatal_signal.py \ python/ovs/json.py \ python/ovs/jsonrpc.py \ python/ovs/ovsuuid.py \ python/ovs/poller.py \ python/ovs/process.py \ python/ovs/reconnect.py \ python/ovs/socket_util.py \ python/ovs/stream.py \ python/ovs/timeval.py \ python/ovs/unixctl/__init__.py \ python/ovs/unixctl/client.py \ python/ovs/unixctl/server.py \ python/ovs/util.py \ python/ovs/version.py \ python/ovs/vlog.py PYFILES = $(ovs_pyfiles) python/ovs/dirs.py $(ovstest_pyfiles) EXTRA_DIST += $(PYFILES) PYCOV_CLEAN_FILES += $(PYFILES:.py=.py,cover) if HAVE_PYTHON nobase_pkgdata_DATA = $(ovs_pyfiles) $(ovstest_pyfiles) ovs-install-data-local: $(MKDIR_P) python/ovs sed \ -e '/^##/d' \ -e 's,[@]pkgdatadir[@],$(pkgdatadir),g' \ -e 's,[@]RUNDIR[@],$(RUNDIR),g' \ -e 's,[@]LOGDIR[@],$(LOGDIR),g' \ -e 's,[@]bindir[@],$(bindir),g' \ -e 's,[@]sysconfdir[@],$(sysconfdir),g' \ -e 's,[@]DBDIR[@],$(DBDIR),g' \ < $(srcdir)/python/ovs/dirs.py.template \ > python/ovs/dirs.py.tmp $(MKDIR_P) $(DESTDIR)$(pkgdatadir)/python/ovs $(INSTALL_DATA) python/ovs/dirs.py.tmp $(DESTDIR)$(pkgdatadir)/python/ovs/dirs.py rm python/ovs/dirs.py.tmp else ovs-install-data-local: @: endif install-data-local: ovs-install-data-local UNINSTALL_LOCAL += ovs-uninstall-local ovs-uninstall-local: rm -f $(DESTDIR)$(pkgdatadir)/python/ovs/dirs.py ALL_LOCAL += $(srcdir)/python/ovs/version.py $(srcdir)/python/ovs/version.py: config.status $(ro_shell) > $(@F).tmp echo 'VERSION = "$(VERSION)"' >> $(@F).tmp if cmp -s $(@F).tmp $@; then touch $@; rm $(@F).tmp; else mv $(@F).tmp $@; fi ALL_LOCAL += $(srcdir)/python/ovs/dirs.py $(srcdir)/python/ovs/dirs.py: python/ovs/dirs.py.template sed \ -e '/^##/d' \ -e 's,[@]pkgdatadir[@],/usr/local/share/openvswitch,g' \ -e 's,[@]RUNDIR[@],/var/run,g' \ -e 's,[@]LOGDIR[@],/usr/local/var/log,g' \ -e 's,[@]bindir[@],/usr/local/bin,g' \ -e 's,[@]sysconfdir[@],/usr/local/etc,g' \ -e 's,[@]DBDIR[@],/usr/local/etc/openvswitch,g' \ < $? > $@.tmp mv $@.tmp $@ EXTRA_DIST += python/ovs/dirs.py python/ovs/dirs.py.template openvswitch-2.0.1+git20140120/python/compat/000077500000000000000000000000001226605124000202565ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/python/compat/argparse.py000066400000000000000000002534531226605124000224500ustar00rootroot00000000000000# Author: Steven J. Bethard . """Command-line parsing library This module is an optparse-inspired command-line parsing library that: - handles both optional and positional arguments - produces highly informative usage messages - supports parsers that dispatch to sub-parsers The following is a simple usage example that sums integers from the command-line and writes the result to a file:: parser = argparse.ArgumentParser( description='sum the integers at the command line') parser.add_argument( 'integers', metavar='int', nargs='+', type=int, help='an integer to be summed') parser.add_argument( '--log', default=sys.stdout, type=argparse.FileType('w'), help='the file where the sum should be written') args = parser.parse_args() args.log.write('%s' % sum(args.integers)) args.log.close() The module contains the following public classes: - ArgumentParser -- The main entry point for command-line parsing. As the example above shows, the add_argument() method is used to populate the parser with actions for optional and positional arguments. Then the parse_args() method is invoked to convert the args at the command-line into an object with attributes. - ArgumentError -- The exception raised by ArgumentParser objects when there are errors with the parser's actions. Errors raised while parsing the command-line are caught by ArgumentParser and emitted as command-line messages. - FileType -- A factory for defining types of files to be created. As the example above shows, instances of FileType are typically passed as the type= argument of add_argument() calls. - Action -- The base class for parser actions. Typically actions are selected by passing strings like 'store_true' or 'append_const' to the action= argument of add_argument(). However, for greater customization of ArgumentParser actions, subclasses of Action may be defined and passed as the action= argument. - HelpFormatter, RawDescriptionHelpFormatter, RawTextHelpFormatter, ArgumentDefaultsHelpFormatter -- Formatter classes which may be passed as the formatter_class= argument to the ArgumentParser constructor. HelpFormatter is the default, RawDescriptionHelpFormatter and RawTextHelpFormatter tell the parser not to change the formatting for help text, and ArgumentDefaultsHelpFormatter adds information about argument defaults to the help. All other classes in this module are considered implementation details. (Also note that HelpFormatter and RawDescriptionHelpFormatter are only considered public as object names -- the API of the formatter objects is still considered an implementation detail.) """ __version__ = '1.1' __all__ = [ 'ArgumentParser', 'ArgumentError', 'ArgumentTypeError', 'FileType', 'HelpFormatter', 'ArgumentDefaultsHelpFormatter', 'RawDescriptionHelpFormatter', 'RawTextHelpFormatter', 'Namespace', 'Action', 'ONE_OR_MORE', 'OPTIONAL', 'PARSER', 'REMAINDER', 'SUPPRESS', 'ZERO_OR_MORE', ] import collections as _collections import copy as _copy import os as _os import re as _re import sys as _sys import textwrap as _textwrap from gettext import gettext as _ def _callable(obj): return hasattr(obj, '__call__') or hasattr(obj, '__bases__') SUPPRESS = '==SUPPRESS==' OPTIONAL = '?' ZERO_OR_MORE = '*' ONE_OR_MORE = '+' PARSER = 'A...' REMAINDER = '...' _UNRECOGNIZED_ARGS_ATTR = '_unrecognized_args' # ============================= # Utility functions and classes # ============================= class _AttributeHolder(object): """Abstract base class that provides __repr__. The __repr__ method returns a string in the format:: ClassName(attr=name, attr=name, ...) The attributes are determined either by a class-level attribute, '_kwarg_names', or by inspecting the instance __dict__. """ def __repr__(self): type_name = type(self).__name__ arg_strings = [] for arg in self._get_args(): arg_strings.append(repr(arg)) for name, value in self._get_kwargs(): arg_strings.append('%s=%r' % (name, value)) return '%s(%s)' % (type_name, ', '.join(arg_strings)) def _get_kwargs(self): return sorted(self.__dict__.items()) def _get_args(self): return [] def _ensure_value(namespace, name, value): if getattr(namespace, name, None) is None: setattr(namespace, name, value) return getattr(namespace, name) # =============== # Formatting Help # =============== class HelpFormatter(object): """Formatter for generating usage messages and argument help strings. Only the name of this class is considered a public API. All the methods provided by the class are considered an implementation detail. """ def __init__(self, prog, indent_increment=2, max_help_position=24, width=None): # default setting for width if width is None: try: width = int(_os.environ['COLUMNS']) except (KeyError, ValueError): width = 80 width -= 2 self._prog = prog self._indent_increment = indent_increment self._max_help_position = max_help_position self._width = width self._current_indent = 0 self._level = 0 self._action_max_length = 0 self._root_section = self._Section(self, None) self._current_section = self._root_section self._whitespace_matcher = _re.compile(r'\s+') self._long_break_matcher = _re.compile(r'\n\n\n+') # =============================== # Section and indentation methods # =============================== def _indent(self): self._current_indent += self._indent_increment self._level += 1 def _dedent(self): self._current_indent -= self._indent_increment assert self._current_indent >= 0, 'Indent decreased below 0.' self._level -= 1 class _Section(object): def __init__(self, formatter, parent, heading=None): self.formatter = formatter self.parent = parent self.heading = heading self.items = [] def format_help(self): # format the indented section if self.parent is not None: self.formatter._indent() join = self.formatter._join_parts for func, args in self.items: func(*args) item_help = join([func(*args) for func, args in self.items]) if self.parent is not None: self.formatter._dedent() # return nothing if the section was empty if not item_help: return '' # add the heading if the section was non-empty if self.heading is not SUPPRESS and self.heading is not None: current_indent = self.formatter._current_indent heading = '%*s%s:\n' % (current_indent, '', self.heading) else: heading = '' # join the section-initial newline, the heading and the help return join(['\n', heading, item_help, '\n']) def _add_item(self, func, args): self._current_section.items.append((func, args)) # ======================== # Message building methods # ======================== def start_section(self, heading): self._indent() section = self._Section(self, self._current_section, heading) self._add_item(section.format_help, []) self._current_section = section def end_section(self): self._current_section = self._current_section.parent self._dedent() def add_text(self, text): if text is not SUPPRESS and text is not None: self._add_item(self._format_text, [text]) def add_usage(self, usage, actions, groups, prefix=None): if usage is not SUPPRESS: args = usage, actions, groups, prefix self._add_item(self._format_usage, args) def add_argument(self, action): if action.help is not SUPPRESS: # find all invocations get_invocation = self._format_action_invocation invocations = [get_invocation(action)] for subaction in self._iter_indented_subactions(action): invocations.append(get_invocation(subaction)) # update the maximum item length invocation_length = max([len(s) for s in invocations]) action_length = invocation_length + self._current_indent self._action_max_length = max(self._action_max_length, action_length) # add the item to the list self._add_item(self._format_action, [action]) def add_arguments(self, actions): for action in actions: self.add_argument(action) # ======================= # Help-formatting methods # ======================= def format_help(self): help = self._root_section.format_help() if help: help = self._long_break_matcher.sub('\n\n', help) help = help.strip('\n') + '\n' return help def _join_parts(self, part_strings): return ''.join([part for part in part_strings if part and part is not SUPPRESS]) def _format_usage(self, usage, actions, groups, prefix): if prefix is None: prefix = _('usage: ') # if usage is specified, use that if usage is not None: usage = usage % dict(prog=self._prog) # if no optionals or positionals are available, usage is just prog elif usage is None and not actions: usage = '%(prog)s' % dict(prog=self._prog) # if optionals and positionals are available, calculate usage elif usage is None: prog = '%(prog)s' % dict(prog=self._prog) # split optionals from positionals optionals = [] positionals = [] for action in actions: if action.option_strings: optionals.append(action) else: positionals.append(action) # build full usage string format = self._format_actions_usage action_usage = format(optionals + positionals, groups) usage = ' '.join([s for s in [prog, action_usage] if s]) # wrap the usage parts if it's too long text_width = self._width - self._current_indent if len(prefix) + len(usage) > text_width: # break usage into wrappable parts part_regexp = r'\(.*?\)+|\[.*?\]+|\S+' opt_usage = format(optionals, groups) pos_usage = format(positionals, groups) opt_parts = _re.findall(part_regexp, opt_usage) pos_parts = _re.findall(part_regexp, pos_usage) assert ' '.join(opt_parts) == opt_usage assert ' '.join(pos_parts) == pos_usage # helper for wrapping lines def get_lines(parts, indent, prefix=None): lines = [] line = [] if prefix is not None: line_len = len(prefix) - 1 else: line_len = len(indent) - 1 for part in parts: if line_len + 1 + len(part) > text_width: lines.append(indent + ' '.join(line)) line = [] line_len = len(indent) - 1 line.append(part) line_len += len(part) + 1 if line: lines.append(indent + ' '.join(line)) if prefix is not None: lines[0] = lines[0][len(indent):] return lines # if prog is short, follow it with optionals or positionals if len(prefix) + len(prog) <= 0.75 * text_width: indent = ' ' * (len(prefix) + len(prog) + 1) if opt_parts: lines = get_lines([prog] + opt_parts, indent, prefix) lines.extend(get_lines(pos_parts, indent)) elif pos_parts: lines = get_lines([prog] + pos_parts, indent, prefix) else: lines = [prog] # if prog is long, put it on its own line else: indent = ' ' * len(prefix) parts = opt_parts + pos_parts lines = get_lines(parts, indent) if len(lines) > 1: lines = [] lines.extend(get_lines(opt_parts, indent)) lines.extend(get_lines(pos_parts, indent)) lines = [prog] + lines # join lines into usage usage = '\n'.join(lines) # prefix with 'usage:' return '%s%s\n\n' % (prefix, usage) def _format_actions_usage(self, actions, groups): # find group indices and identify actions in groups group_actions = set() inserts = {} for group in groups: try: start = actions.index(group._group_actions[0]) except ValueError: continue else: end = start + len(group._group_actions) if actions[start:end] == group._group_actions: for action in group._group_actions: group_actions.add(action) if not group.required: if start in inserts: inserts[start] += ' [' else: inserts[start] = '[' inserts[end] = ']' else: if start in inserts: inserts[start] += ' (' else: inserts[start] = '(' inserts[end] = ')' for i in range(start + 1, end): inserts[i] = '|' # collect all actions format strings parts = [] for i, action in enumerate(actions): # suppressed arguments are marked with None # remove | separators for suppressed arguments if action.help is SUPPRESS: parts.append(None) if inserts.get(i) == '|': inserts.pop(i) elif inserts.get(i + 1) == '|': inserts.pop(i + 1) # produce all arg strings elif not action.option_strings: part = self._format_args(action, action.dest) # if it's in a group, strip the outer [] if action in group_actions: if part[0] == '[' and part[-1] == ']': part = part[1:-1] # add the action string to the list parts.append(part) # produce the first way to invoke the option in brackets else: option_string = action.option_strings[0] # if the Optional doesn't take a value, format is: # -s or --long if action.nargs == 0: part = '%s' % option_string # if the Optional takes a value, format is: # -s ARGS or --long ARGS else: default = action.dest.upper() args_string = self._format_args(action, default) part = '%s %s' % (option_string, args_string) # make it look optional if it's not required or in a group if not action.required and action not in group_actions: part = '[%s]' % part # add the action string to the list parts.append(part) # insert things at the necessary indices for i in sorted(inserts, reverse=True): parts[i:i] = [inserts[i]] # join all the action items with spaces text = ' '.join([item for item in parts if item is not None]) # clean up separators for mutually exclusive groups open = r'[\[(]' close = r'[\])]' text = _re.sub(r'(%s) ' % open, r'\1', text) text = _re.sub(r' (%s)' % close, r'\1', text) text = _re.sub(r'%s *%s' % (open, close), r'', text) text = _re.sub(r'\(([^|]*)\)', r'\1', text) text = text.strip() # return the text return text def _format_text(self, text): if '%(prog)' in text: text = text % dict(prog=self._prog) text_width = self._width - self._current_indent indent = ' ' * self._current_indent return self._fill_text(text, text_width, indent) + '\n\n' def _format_action(self, action): # determine the required width and the entry label help_position = min(self._action_max_length + 2, self._max_help_position) help_width = self._width - help_position action_width = help_position - self._current_indent - 2 action_header = self._format_action_invocation(action) # ho nelp; start on same line and add a final newline if not action.help: tup = self._current_indent, '', action_header action_header = '%*s%s\n' % tup # short action name; start on the same line and pad two spaces elif len(action_header) <= action_width: tup = self._current_indent, '', action_width, action_header action_header = '%*s%-*s ' % tup indent_first = 0 # long action name; start on the next line else: tup = self._current_indent, '', action_header action_header = '%*s%s\n' % tup indent_first = help_position # collect the pieces of the action help parts = [action_header] # if there was help for the action, add lines of help text if action.help: help_text = self._expand_help(action) help_lines = self._split_lines(help_text, help_width) parts.append('%*s%s\n' % (indent_first, '', help_lines[0])) for line in help_lines[1:]: parts.append('%*s%s\n' % (help_position, '', line)) # or add a newline if the description doesn't end with one elif not action_header.endswith('\n'): parts.append('\n') # if there are any sub-actions, add their help as well for subaction in self._iter_indented_subactions(action): parts.append(self._format_action(subaction)) # return a single string return self._join_parts(parts) def _format_action_invocation(self, action): if not action.option_strings: metavar, = self._metavar_formatter(action, action.dest)(1) return metavar else: parts = [] # if the Optional doesn't take a value, format is: # -s, --long if action.nargs == 0: parts.extend(action.option_strings) # if the Optional takes a value, format is: # -s ARGS, --long ARGS else: default = action.dest.upper() args_string = self._format_args(action, default) for option_string in action.option_strings: parts.append('%s %s' % (option_string, args_string)) return ', '.join(parts) def _metavar_formatter(self, action, default_metavar): if action.metavar is not None: result = action.metavar elif action.choices is not None: choice_strs = [str(choice) for choice in action.choices] result = '{%s}' % ','.join(choice_strs) else: result = default_metavar def format(tuple_size): if isinstance(result, tuple): return result else: return (result, ) * tuple_size return format def _format_args(self, action, default_metavar): get_metavar = self._metavar_formatter(action, default_metavar) if action.nargs is None: result = '%s' % get_metavar(1) elif action.nargs == OPTIONAL: result = '[%s]' % get_metavar(1) elif action.nargs == ZERO_OR_MORE: result = '[%s [%s ...]]' % get_metavar(2) elif action.nargs == ONE_OR_MORE: result = '%s [%s ...]' % get_metavar(2) elif action.nargs == REMAINDER: result = '...' elif action.nargs == PARSER: result = '%s ...' % get_metavar(1) else: formats = ['%s' for _ in range(action.nargs)] result = ' '.join(formats) % get_metavar(action.nargs) return result def _expand_help(self, action): params = dict(vars(action), prog=self._prog) for name in list(params): if params[name] is SUPPRESS: del params[name] for name in list(params): if hasattr(params[name], '__name__'): params[name] = params[name].__name__ if params.get('choices') is not None: choices_str = ', '.join([str(c) for c in params['choices']]) params['choices'] = choices_str return self._get_help_string(action) % params def _iter_indented_subactions(self, action): try: get_subactions = action._get_subactions except AttributeError: pass else: self._indent() for subaction in get_subactions(): yield subaction self._dedent() def _split_lines(self, text, width): text = self._whitespace_matcher.sub(' ', text).strip() return _textwrap.wrap(text, width) def _fill_text(self, text, width, indent): text = self._whitespace_matcher.sub(' ', text).strip() return _textwrap.fill(text, width, initial_indent=indent, subsequent_indent=indent) def _get_help_string(self, action): return action.help class RawDescriptionHelpFormatter(HelpFormatter): """Help message formatter which retains any formatting in descriptions. Only the name of this class is considered a public API. All the methods provided by the class are considered an implementation detail. """ def _fill_text(self, text, width, indent): return ''.join([indent + line for line in text.splitlines(True)]) class RawTextHelpFormatter(RawDescriptionHelpFormatter): """Help message formatter which retains formatting of all help text. Only the name of this class is considered a public API. All the methods provided by the class are considered an implementation detail. """ def _split_lines(self, text, width): return text.splitlines() class ArgumentDefaultsHelpFormatter(HelpFormatter): """Help message formatter which adds default values to argument help. Only the name of this class is considered a public API. All the methods provided by the class are considered an implementation detail. """ def _get_help_string(self, action): help = action.help if '%(default)' not in action.help: if action.default is not SUPPRESS: defaulting_nargs = [OPTIONAL, ZERO_OR_MORE] if action.option_strings or action.nargs in defaulting_nargs: help += ' (default: %(default)s)' return help # ===================== # Options and Arguments # ===================== def _get_action_name(argument): if argument is None: return None elif argument.option_strings: return '/'.join(argument.option_strings) elif argument.metavar not in (None, SUPPRESS): return argument.metavar elif argument.dest not in (None, SUPPRESS): return argument.dest else: return None class ArgumentError(Exception): """An error from creating or using an argument (optional or positional). The string value of this exception is the message, augmented with information about the argument that caused it. """ def __init__(self, argument, message): self.argument_name = _get_action_name(argument) self.message = message def __str__(self): if self.argument_name is None: format = '%(message)s' else: format = 'argument %(argument_name)s: %(message)s' return format % dict(message=self.message, argument_name=self.argument_name) class ArgumentTypeError(Exception): """An error from trying to convert a command line string to a type.""" pass # ============== # Action classes # ============== class Action(_AttributeHolder): """Information about how to convert command line strings to Python objects. Action objects are used by an ArgumentParser to represent the information needed to parse a single argument from one or more strings from the command line. The keyword arguments to the Action constructor are also all attributes of Action instances. Keyword Arguments: - option_strings -- A list of command-line option strings which should be associated with this action. - dest -- The name of the attribute to hold the created object(s) - nargs -- The number of command-line arguments that should be consumed. By default, one argument will be consumed and a single value will be produced. Other values include: - N (an integer) consumes N arguments (and produces a list) - '?' consumes zero or one arguments - '*' consumes zero or more arguments (and produces a list) - '+' consumes one or more arguments (and produces a list) Note that the difference between the default and nargs=1 is that with the default, a single value will be produced, while with nargs=1, a list containing a single value will be produced. - const -- The value to be produced if the option is specified and the option uses an action that takes no values. - default -- The value to be produced if the option is not specified. - type -- The type which the command-line arguments should be converted to, should be one of 'string', 'int', 'float', 'complex' or a callable object that accepts a single string argument. If None, 'string' is assumed. - choices -- A container of values that should be allowed. If not None, after a command-line argument has been converted to the appropriate type, an exception will be raised if it is not a member of this collection. - required -- True if the action must always be specified at the command line. This is only meaningful for optional command-line arguments. - help -- The help string describing the argument. - metavar -- The name to be used for the option's argument with the help string. If None, the 'dest' value will be used as the name. """ def __init__(self, option_strings, dest, nargs=None, const=None, default=None, type=None, choices=None, required=False, help=None, metavar=None): self.option_strings = option_strings self.dest = dest self.nargs = nargs self.const = const self.default = default self.type = type self.choices = choices self.required = required self.help = help self.metavar = metavar def _get_kwargs(self): names = [ 'option_strings', 'dest', 'nargs', 'const', 'default', 'type', 'choices', 'help', 'metavar', ] return [(name, getattr(self, name)) for name in names] def __call__(self, parser, namespace, values, option_string=None): raise NotImplementedError(_('.__call__() not defined')) class _StoreAction(Action): def __init__(self, option_strings, dest, nargs=None, const=None, default=None, type=None, choices=None, required=False, help=None, metavar=None): if nargs == 0: raise ValueError('nargs for store actions must be > 0; if you ' 'have nothing to store, actions such as store ' 'true or store const may be more appropriate') if const is not None and nargs != OPTIONAL: raise ValueError('nargs must be %r to supply const' % OPTIONAL) super(_StoreAction, self).__init__( option_strings=option_strings, dest=dest, nargs=nargs, const=const, default=default, type=type, choices=choices, required=required, help=help, metavar=metavar) def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, values) class _StoreConstAction(Action): def __init__(self, option_strings, dest, const, default=None, required=False, help=None, metavar=None): super(_StoreConstAction, self).__init__( option_strings=option_strings, dest=dest, nargs=0, const=const, default=default, required=required, help=help) def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, self.const) class _StoreTrueAction(_StoreConstAction): def __init__(self, option_strings, dest, default=False, required=False, help=None): super(_StoreTrueAction, self).__init__( option_strings=option_strings, dest=dest, const=True, default=default, required=required, help=help) class _StoreFalseAction(_StoreConstAction): def __init__(self, option_strings, dest, default=True, required=False, help=None): super(_StoreFalseAction, self).__init__( option_strings=option_strings, dest=dest, const=False, default=default, required=required, help=help) class _AppendAction(Action): def __init__(self, option_strings, dest, nargs=None, const=None, default=None, type=None, choices=None, required=False, help=None, metavar=None): if nargs == 0: raise ValueError('nargs for append actions must be > 0; if arg ' 'strings are not supplying the value to append, ' 'the append const action may be more appropriate') if const is not None and nargs != OPTIONAL: raise ValueError('nargs must be %r to supply const' % OPTIONAL) super(_AppendAction, self).__init__( option_strings=option_strings, dest=dest, nargs=nargs, const=const, default=default, type=type, choices=choices, required=required, help=help, metavar=metavar) def __call__(self, parser, namespace, values, option_string=None): items = _copy.copy(_ensure_value(namespace, self.dest, [])) items.append(values) setattr(namespace, self.dest, items) class _AppendConstAction(Action): def __init__(self, option_strings, dest, const, default=None, required=False, help=None, metavar=None): super(_AppendConstAction, self).__init__( option_strings=option_strings, dest=dest, nargs=0, const=const, default=default, required=required, help=help, metavar=metavar) def __call__(self, parser, namespace, values, option_string=None): items = _copy.copy(_ensure_value(namespace, self.dest, [])) items.append(self.const) setattr(namespace, self.dest, items) class _CountAction(Action): def __init__(self, option_strings, dest, default=None, required=False, help=None): super(_CountAction, self).__init__( option_strings=option_strings, dest=dest, nargs=0, default=default, required=required, help=help) def __call__(self, parser, namespace, values, option_string=None): new_count = _ensure_value(namespace, self.dest, 0) + 1 setattr(namespace, self.dest, new_count) class _HelpAction(Action): def __init__(self, option_strings, dest=SUPPRESS, default=SUPPRESS, help=None): super(_HelpAction, self).__init__( option_strings=option_strings, dest=dest, default=default, nargs=0, help=help) def __call__(self, parser, namespace, values, option_string=None): parser.print_help() parser.exit() class _VersionAction(Action): def __init__(self, option_strings, version=None, dest=SUPPRESS, default=SUPPRESS, help="show program's version number and exit"): super(_VersionAction, self).__init__( option_strings=option_strings, dest=dest, default=default, nargs=0, help=help) self.version = version def __call__(self, parser, namespace, values, option_string=None): version = self.version if version is None: version = parser.version formatter = parser._get_formatter() formatter.add_text(version) parser.exit(message=formatter.format_help()) class _SubParsersAction(Action): class _ChoicesPseudoAction(Action): def __init__(self, name, help): sup = super(_SubParsersAction._ChoicesPseudoAction, self) sup.__init__(option_strings=[], dest=name, help=help) def __init__(self, option_strings, prog, parser_class, dest=SUPPRESS, help=None, metavar=None): self._prog_prefix = prog self._parser_class = parser_class self._name_parser_map = _collections.OrderedDict() self._choices_actions = [] super(_SubParsersAction, self).__init__( option_strings=option_strings, dest=dest, nargs=PARSER, choices=self._name_parser_map, help=help, metavar=metavar) def add_parser(self, name, **kwargs): # set prog from the existing prefix if kwargs.get('prog') is None: kwargs['prog'] = '%s %s' % (self._prog_prefix, name) # create a pseudo-action to hold the choice help if 'help' in kwargs: help = kwargs.pop('help') choice_action = self._ChoicesPseudoAction(name, help) self._choices_actions.append(choice_action) # create the parser and add it to the map parser = self._parser_class(**kwargs) self._name_parser_map[name] = parser return parser def _get_subactions(self): return self._choices_actions def __call__(self, parser, namespace, values, option_string=None): parser_name = values[0] arg_strings = values[1:] # set the parser name if requested if self.dest is not SUPPRESS: setattr(namespace, self.dest, parser_name) # select the parser try: parser = self._name_parser_map[parser_name] except KeyError: tup = parser_name, ', '.join(self._name_parser_map) msg = _('unknown parser %r (choices: %s)') % tup raise ArgumentError(self, msg) # parse all the remaining options into the namespace # store any unrecognized options on the object, so that the top # level parser can decide what to do with them namespace, arg_strings = parser.parse_known_args(arg_strings, namespace) if arg_strings: vars(namespace).setdefault(_UNRECOGNIZED_ARGS_ATTR, []) getattr(namespace, _UNRECOGNIZED_ARGS_ATTR).extend(arg_strings) # ============== # Type classes # ============== class FileType(object): """Factory for creating file object types Instances of FileType are typically passed as type= arguments to the ArgumentParser add_argument() method. Keyword Arguments: - mode -- A string indicating how the file is to be opened. Accepts the same values as the builtin open() function. - bufsize -- The file's desired buffer size. Accepts the same values as the builtin open() function. """ def __init__(self, mode='r', bufsize=-1): self._mode = mode self._bufsize = bufsize def __call__(self, string): # the special argument "-" means sys.std{in,out} if string == '-': if 'r' in self._mode: return _sys.stdin elif 'w' in self._mode: return _sys.stdout else: msg = _('argument "-" with mode %r') % self._mode raise ValueError(msg) # all other arguments are used as file names try: return open(string, self._mode, self._bufsize) except IOError, e: message = _("can't open '%s': %s") raise ArgumentTypeError(message % (string, e)) def __repr__(self): args = self._mode, self._bufsize args_str = ', '.join(repr(arg) for arg in args if arg != -1) return '%s(%s)' % (type(self).__name__, args_str) # =========================== # Optional and Positional Parsing # =========================== class Namespace(_AttributeHolder): """Simple object for storing attributes. Implements equality by attribute names and values, and provides a simple string representation. """ def __init__(self, **kwargs): for name in kwargs: setattr(self, name, kwargs[name]) __hash__ = None def __eq__(self, other): return vars(self) == vars(other) def __ne__(self, other): return not (self == other) def __contains__(self, key): return key in self.__dict__ class _ActionsContainer(object): def __init__(self, description, prefix_chars, argument_default, conflict_handler): super(_ActionsContainer, self).__init__() self.description = description self.argument_default = argument_default self.prefix_chars = prefix_chars self.conflict_handler = conflict_handler # set up registries self._registries = {} # register actions self.register('action', None, _StoreAction) self.register('action', 'store', _StoreAction) self.register('action', 'store_const', _StoreConstAction) self.register('action', 'store_true', _StoreTrueAction) self.register('action', 'store_false', _StoreFalseAction) self.register('action', 'append', _AppendAction) self.register('action', 'append_const', _AppendConstAction) self.register('action', 'count', _CountAction) self.register('action', 'help', _HelpAction) self.register('action', 'version', _VersionAction) self.register('action', 'parsers', _SubParsersAction) # raise an exception if the conflict handler is invalid self._get_handler() # action storage self._actions = [] self._option_string_actions = {} # groups self._action_groups = [] self._mutually_exclusive_groups = [] # defaults storage self._defaults = {} # determines whether an "option" looks like a negative number self._negative_number_matcher = _re.compile(r'^-\d+$|^-\d*\.\d+$') # whether or not there are any optionals that look like negative # numbers -- uses a list so it can be shared and edited self._has_negative_number_optionals = [] # ==================== # Registration methods # ==================== def register(self, registry_name, value, object): registry = self._registries.setdefault(registry_name, {}) registry[value] = object def _registry_get(self, registry_name, value, default=None): return self._registries[registry_name].get(value, default) # ================================== # Namespace default accessor methods # ================================== def set_defaults(self, **kwargs): self._defaults.update(kwargs) # if these defaults match any existing arguments, replace # the previous default on the object with the new one for action in self._actions: if action.dest in kwargs: action.default = kwargs[action.dest] def get_default(self, dest): for action in self._actions: if action.dest == dest and action.default is not None: return action.default return self._defaults.get(dest, None) # ======================= # Adding argument actions # ======================= def add_argument(self, *args, **kwargs): """ add_argument(dest, ..., name=value, ...) add_argument(option_string, option_string, ..., name=value, ...) """ # if no positional args are supplied or only one is supplied and # it doesn't look like an option string, parse a positional # argument chars = self.prefix_chars if not args or len(args) == 1 and args[0][0] not in chars: if args and 'dest' in kwargs: raise ValueError('dest supplied twice for positional argument') kwargs = self._get_positional_kwargs(*args, **kwargs) # otherwise, we're adding an optional argument else: kwargs = self._get_optional_kwargs(*args, **kwargs) # if no default was supplied, use the parser-level default if 'default' not in kwargs: dest = kwargs['dest'] if dest in self._defaults: kwargs['default'] = self._defaults[dest] elif self.argument_default is not None: kwargs['default'] = self.argument_default # create the action object, and add it to the parser action_class = self._pop_action_class(kwargs) if not _callable(action_class): raise ValueError('unknown action "%s"' % (action_class,)) action = action_class(**kwargs) # raise an error if the action type is not callable type_func = self._registry_get('type', action.type, action.type) if not _callable(type_func): raise ValueError('%r is not callable' % (type_func,)) # raise an error if the metavar does not match the type if hasattr(self, "_get_formatter"): try: self._get_formatter()._format_args(action, None) except TypeError: raise ValueError("length of metavar tuple does not match nargs") return self._add_action(action) def add_argument_group(self, *args, **kwargs): group = _ArgumentGroup(self, *args, **kwargs) self._action_groups.append(group) return group def add_mutually_exclusive_group(self, **kwargs): group = _MutuallyExclusiveGroup(self, **kwargs) self._mutually_exclusive_groups.append(group) return group def _add_action(self, action): # resolve any conflicts self._check_conflict(action) # add to actions list self._actions.append(action) action.container = self # index the action by any option strings it has for option_string in action.option_strings: self._option_string_actions[option_string] = action # set the flag if any option strings look like negative numbers for option_string in action.option_strings: if self._negative_number_matcher.match(option_string): if not self._has_negative_number_optionals: self._has_negative_number_optionals.append(True) # return the created action return action def _remove_action(self, action): self._actions.remove(action) def _add_container_actions(self, container): # collect groups by titles title_group_map = {} for group in self._action_groups: if group.title in title_group_map: msg = _('cannot merge actions - two groups are named %r') raise ValueError(msg % (group.title)) title_group_map[group.title] = group # map each action to its group group_map = {} for group in container._action_groups: # if a group with the title exists, use that, otherwise # create a new group matching the container's group if group.title not in title_group_map: title_group_map[group.title] = self.add_argument_group( title=group.title, description=group.description, conflict_handler=group.conflict_handler) # map the actions to their new group for action in group._group_actions: group_map[action] = title_group_map[group.title] # add container's mutually exclusive groups # NOTE: if add_mutually_exclusive_group ever gains title= and # description= then this code will need to be expanded as above for group in container._mutually_exclusive_groups: mutex_group = self.add_mutually_exclusive_group( required=group.required) # map the actions to their new mutex group for action in group._group_actions: group_map[action] = mutex_group # add all actions to this container or their group for action in container._actions: group_map.get(action, self)._add_action(action) def _get_positional_kwargs(self, dest, **kwargs): # make sure required is not specified if 'required' in kwargs: msg = _("'required' is an invalid argument for positionals") raise TypeError(msg) # mark positional arguments as required if at least one is # always required if kwargs.get('nargs') not in [OPTIONAL, ZERO_OR_MORE]: kwargs['required'] = True if kwargs.get('nargs') == ZERO_OR_MORE and 'default' not in kwargs: kwargs['required'] = True # return the keyword arguments with no option strings return dict(kwargs, dest=dest, option_strings=[]) def _get_optional_kwargs(self, *args, **kwargs): # determine short and long option strings option_strings = [] long_option_strings = [] for option_string in args: # error on strings that don't start with an appropriate prefix if not option_string[0] in self.prefix_chars: msg = _('invalid option string %r: ' 'must start with a character %r') tup = option_string, self.prefix_chars raise ValueError(msg % tup) # strings starting with two prefix characters are long options option_strings.append(option_string) if option_string[0] in self.prefix_chars: if len(option_string) > 1: if option_string[1] in self.prefix_chars: long_option_strings.append(option_string) # infer destination, '--foo-bar' -> 'foo_bar' and '-x' -> 'x' dest = kwargs.pop('dest', None) if dest is None: if long_option_strings: dest_option_string = long_option_strings[0] else: dest_option_string = option_strings[0] dest = dest_option_string.lstrip(self.prefix_chars) if not dest: msg = _('dest= is required for options like %r') raise ValueError(msg % option_string) dest = dest.replace('-', '_') # return the updated keyword arguments return dict(kwargs, dest=dest, option_strings=option_strings) def _pop_action_class(self, kwargs, default=None): action = kwargs.pop('action', default) return self._registry_get('action', action, action) def _get_handler(self): # determine function from conflict handler string handler_func_name = '_handle_conflict_%s' % self.conflict_handler try: return getattr(self, handler_func_name) except AttributeError: msg = _('invalid conflict_resolution value: %r') raise ValueError(msg % self.conflict_handler) def _check_conflict(self, action): # find all options that conflict with this option confl_optionals = [] for option_string in action.option_strings: if option_string in self._option_string_actions: confl_optional = self._option_string_actions[option_string] confl_optionals.append((option_string, confl_optional)) # resolve any conflicts if confl_optionals: conflict_handler = self._get_handler() conflict_handler(action, confl_optionals) def _handle_conflict_error(self, action, conflicting_actions): message = _('conflicting option string(s): %s') conflict_string = ', '.join([option_string for option_string, action in conflicting_actions]) raise ArgumentError(action, message % conflict_string) def _handle_conflict_resolve(self, action, conflicting_actions): # remove all conflicting options for option_string, action in conflicting_actions: # remove the conflicting option action.option_strings.remove(option_string) self._option_string_actions.pop(option_string, None) # if the option now has no option string, remove it from the # container holding it if not action.option_strings: action.container._remove_action(action) class _ArgumentGroup(_ActionsContainer): def __init__(self, container, title=None, description=None, **kwargs): # add any missing keyword arguments by checking the container update = kwargs.setdefault update('conflict_handler', container.conflict_handler) update('prefix_chars', container.prefix_chars) update('argument_default', container.argument_default) super_init = super(_ArgumentGroup, self).__init__ super_init(description=description, **kwargs) # group attributes self.title = title self._group_actions = [] # share most attributes with the container self._registries = container._registries self._actions = container._actions self._option_string_actions = container._option_string_actions self._defaults = container._defaults self._has_negative_number_optionals = \ container._has_negative_number_optionals self._mutually_exclusive_groups = container._mutually_exclusive_groups def _add_action(self, action): action = super(_ArgumentGroup, self)._add_action(action) self._group_actions.append(action) return action def _remove_action(self, action): super(_ArgumentGroup, self)._remove_action(action) self._group_actions.remove(action) class _MutuallyExclusiveGroup(_ArgumentGroup): def __init__(self, container, required=False): super(_MutuallyExclusiveGroup, self).__init__(container) self.required = required self._container = container def _add_action(self, action): if action.required: msg = _('mutually exclusive arguments must be optional') raise ValueError(msg) action = self._container._add_action(action) self._group_actions.append(action) return action def _remove_action(self, action): self._container._remove_action(action) self._group_actions.remove(action) class ArgumentParser(_AttributeHolder, _ActionsContainer): """Object for parsing command line strings into Python objects. Keyword Arguments: - prog -- The name of the program (default: sys.argv[0]) - usage -- A usage message (default: auto-generated from arguments) - description -- A description of what the program does - epilog -- Text following the argument descriptions - parents -- Parsers whose arguments should be copied into this one - formatter_class -- HelpFormatter class for printing help messages - prefix_chars -- Characters that prefix optional arguments - fromfile_prefix_chars -- Characters that prefix files containing additional arguments - argument_default -- The default value for all arguments - conflict_handler -- String indicating how to handle conflicts - add_help -- Add a -h/-help option """ def __init__(self, prog=None, usage=None, description=None, epilog=None, version=None, parents=[], formatter_class=HelpFormatter, prefix_chars='-', fromfile_prefix_chars=None, argument_default=None, conflict_handler='error', add_help=True): if version is not None: import warnings warnings.warn( """The "version" argument to ArgumentParser is deprecated. """ """Please use """ """"add_argument(..., action='version', version="N", ...)" """ """instead""", DeprecationWarning) superinit = super(ArgumentParser, self).__init__ superinit(description=description, prefix_chars=prefix_chars, argument_default=argument_default, conflict_handler=conflict_handler) # default setting for prog if prog is None: prog = _os.path.basename(_sys.argv[0]) self.prog = prog self.usage = usage self.epilog = epilog self.version = version self.formatter_class = formatter_class self.fromfile_prefix_chars = fromfile_prefix_chars self.add_help = add_help add_group = self.add_argument_group self._positionals = add_group(_('positional arguments')) self._optionals = add_group(_('optional arguments')) self._subparsers = None # register types def identity(string): return string self.register('type', None, identity) # add help and version arguments if necessary # (using explicit default to override global argument_default) if '-' in prefix_chars: default_prefix = '-' else: default_prefix = prefix_chars[0] if self.add_help: self.add_argument( default_prefix+'h', default_prefix*2+'help', action='help', default=SUPPRESS, help=_('show this help message and exit')) if self.version: self.add_argument( default_prefix+'v', default_prefix*2+'version', action='version', default=SUPPRESS, version=self.version, help=_("show program's version number and exit")) # add parent arguments and defaults for parent in parents: self._add_container_actions(parent) try: defaults = parent._defaults except AttributeError: pass else: self._defaults.update(defaults) # ======================= # Pretty __repr__ methods # ======================= def _get_kwargs(self): names = [ 'prog', 'usage', 'description', 'version', 'formatter_class', 'conflict_handler', 'add_help', ] return [(name, getattr(self, name)) for name in names] # ================================== # Optional/Positional adding methods # ================================== def add_subparsers(self, **kwargs): if self._subparsers is not None: self.error(_('cannot have multiple subparser arguments')) # add the parser class to the arguments if it's not present kwargs.setdefault('parser_class', type(self)) if 'title' in kwargs or 'description' in kwargs: title = _(kwargs.pop('title', 'subcommands')) description = _(kwargs.pop('description', None)) self._subparsers = self.add_argument_group(title, description) else: self._subparsers = self._positionals # prog defaults to the usage message of this parser, skipping # optional arguments and with no "usage:" prefix if kwargs.get('prog') is None: formatter = self._get_formatter() positionals = self._get_positional_actions() groups = self._mutually_exclusive_groups formatter.add_usage(self.usage, positionals, groups, '') kwargs['prog'] = formatter.format_help().strip() # create the parsers action and add it to the positionals list parsers_class = self._pop_action_class(kwargs, 'parsers') action = parsers_class(option_strings=[], **kwargs) self._subparsers._add_action(action) # return the created parsers action return action def _add_action(self, action): if action.option_strings: self._optionals._add_action(action) else: self._positionals._add_action(action) return action def _get_optional_actions(self): return [action for action in self._actions if action.option_strings] def _get_positional_actions(self): return [action for action in self._actions if not action.option_strings] # ===================================== # Command line argument parsing methods # ===================================== def parse_args(self, args=None, namespace=None): args, argv = self.parse_known_args(args, namespace) if argv: msg = _('unrecognized arguments: %s') self.error(msg % ' '.join(argv)) return args def parse_known_args(self, args=None, namespace=None): # args default to the system args if args is None: args = _sys.argv[1:] # default Namespace built from parser defaults if namespace is None: namespace = Namespace() # add any action defaults that aren't present for action in self._actions: if action.dest is not SUPPRESS: if not hasattr(namespace, action.dest): if action.default is not SUPPRESS: default = action.default if isinstance(action.default, basestring): default = self._get_value(action, default) setattr(namespace, action.dest, default) # add any parser defaults that aren't present for dest in self._defaults: if not hasattr(namespace, dest): setattr(namespace, dest, self._defaults[dest]) # parse the arguments and exit if there are any errors try: namespace, args = self._parse_known_args(args, namespace) if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR)) delattr(namespace, _UNRECOGNIZED_ARGS_ATTR) return namespace, args except ArgumentError: err = _sys.exc_info()[1] self.error(str(err)) def _parse_known_args(self, arg_strings, namespace): # replace arg strings that are file references if self.fromfile_prefix_chars is not None: arg_strings = self._read_args_from_files(arg_strings) # map all mutually exclusive arguments to the other arguments # they can't occur with action_conflicts = {} for mutex_group in self._mutually_exclusive_groups: group_actions = mutex_group._group_actions for i, mutex_action in enumerate(mutex_group._group_actions): conflicts = action_conflicts.setdefault(mutex_action, []) conflicts.extend(group_actions[:i]) conflicts.extend(group_actions[i + 1:]) # find all option indices, and determine the arg_string_pattern # which has an 'O' if there is an option at an index, # an 'A' if there is an argument, or a '-' if there is a '--' option_string_indices = {} arg_string_pattern_parts = [] arg_strings_iter = iter(arg_strings) for i, arg_string in enumerate(arg_strings_iter): # all args after -- are non-options if arg_string == '--': arg_string_pattern_parts.append('-') for arg_string in arg_strings_iter: arg_string_pattern_parts.append('A') # otherwise, add the arg to the arg strings # and note the index if it was an option else: option_tuple = self._parse_optional(arg_string) if option_tuple is None: pattern = 'A' else: option_string_indices[i] = option_tuple pattern = 'O' arg_string_pattern_parts.append(pattern) # join the pieces together to form the pattern arg_strings_pattern = ''.join(arg_string_pattern_parts) # converts arg strings to the appropriate and then takes the action seen_actions = set() seen_non_default_actions = set() def take_action(action, argument_strings, option_string=None): seen_actions.add(action) argument_values = self._get_values(action, argument_strings) # error if this argument is not allowed with other previously # seen arguments, assuming that actions that use the default # value don't really count as "present" if argument_values is not action.default: seen_non_default_actions.add(action) for conflict_action in action_conflicts.get(action, []): if conflict_action in seen_non_default_actions: msg = _('not allowed with argument %s') action_name = _get_action_name(conflict_action) raise ArgumentError(action, msg % action_name) # take the action if we didn't receive a SUPPRESS value # (e.g. from a default) if argument_values is not SUPPRESS: action(self, namespace, argument_values, option_string) # function to convert arg_strings into an optional action def consume_optional(start_index): # get the optional identified at this index option_tuple = option_string_indices[start_index] action, option_string, explicit_arg = option_tuple # identify additional optionals in the same arg string # (e.g. -xyz is the same as -x -y -z if no args are required) match_argument = self._match_argument action_tuples = [] while True: # if we found no optional action, skip it if action is None: extras.append(arg_strings[start_index]) return start_index + 1 # if there is an explicit argument, try to match the # optional's string arguments to only this if explicit_arg is not None: arg_count = match_argument(action, 'A') # if the action is a single-dash option and takes no # arguments, try to parse more single-dash options out # of the tail of the option string chars = self.prefix_chars if arg_count == 0 and option_string[1] not in chars: action_tuples.append((action, [], option_string)) char = option_string[0] option_string = char + explicit_arg[0] new_explicit_arg = explicit_arg[1:] or None optionals_map = self._option_string_actions if option_string in optionals_map: action = optionals_map[option_string] explicit_arg = new_explicit_arg else: msg = _('ignored explicit argument %r') raise ArgumentError(action, msg % explicit_arg) # if the action expect exactly one argument, we've # successfully matched the option; exit the loop elif arg_count == 1: stop = start_index + 1 args = [explicit_arg] action_tuples.append((action, args, option_string)) break # error if a double-dash option did not use the # explicit argument else: msg = _('ignored explicit argument %r') raise ArgumentError(action, msg % explicit_arg) # if there is no explicit argument, try to match the # optional's string arguments with the following strings # if successful, exit the loop else: start = start_index + 1 selected_patterns = arg_strings_pattern[start:] arg_count = match_argument(action, selected_patterns) stop = start + arg_count args = arg_strings[start:stop] action_tuples.append((action, args, option_string)) break # add the Optional to the list and return the index at which # the Optional's string args stopped assert action_tuples for action, args, option_string in action_tuples: take_action(action, args, option_string) return stop # the list of Positionals left to be parsed; this is modified # by consume_positionals() positionals = self._get_positional_actions() # function to convert arg_strings into positional actions def consume_positionals(start_index): # match as many Positionals as possible match_partial = self._match_arguments_partial selected_pattern = arg_strings_pattern[start_index:] arg_counts = match_partial(positionals, selected_pattern) # slice off the appropriate arg strings for each Positional # and add the Positional and its args to the list for action, arg_count in zip(positionals, arg_counts): args = arg_strings[start_index: start_index + arg_count] start_index += arg_count take_action(action, args) # slice off the Positionals that we just parsed and return the # index at which the Positionals' string args stopped positionals[:] = positionals[len(arg_counts):] return start_index # consume Positionals and Optionals alternately, until we have # passed the last option string extras = [] start_index = 0 if option_string_indices: max_option_string_index = max(option_string_indices) else: max_option_string_index = -1 while start_index <= max_option_string_index: # consume any Positionals preceding the next option next_option_string_index = min([ index for index in option_string_indices if index >= start_index]) if start_index != next_option_string_index: positionals_end_index = consume_positionals(start_index) # only try to parse the next optional if we didn't consume # the option string during the positionals parsing if positionals_end_index > start_index: start_index = positionals_end_index continue else: start_index = positionals_end_index # if we consumed all the positionals we could and we're not # at the index of an option string, there were extra arguments if start_index not in option_string_indices: strings = arg_strings[start_index:next_option_string_index] extras.extend(strings) start_index = next_option_string_index # consume the next optional and any arguments for it start_index = consume_optional(start_index) # consume any positionals following the last Optional stop_index = consume_positionals(start_index) # if we didn't consume all the argument strings, there were extras extras.extend(arg_strings[stop_index:]) # if we didn't use all the Positional objects, there were too few # arg strings supplied. if positionals: self.error(_('too few arguments')) # make sure all required actions were present for action in self._actions: if action.required: if action not in seen_actions: name = _get_action_name(action) self.error(_('argument %s is required') % name) # make sure all required groups had one option present for group in self._mutually_exclusive_groups: if group.required: for action in group._group_actions: if action in seen_non_default_actions: break # if no actions were used, report the error else: names = [_get_action_name(action) for action in group._group_actions if action.help is not SUPPRESS] msg = _('one of the arguments %s is required') self.error(msg % ' '.join(names)) # return the updated namespace and the extra arguments return namespace, extras def _read_args_from_files(self, arg_strings): # expand arguments referencing files new_arg_strings = [] for arg_string in arg_strings: # for regular arguments, just add them back into the list if arg_string[0] not in self.fromfile_prefix_chars: new_arg_strings.append(arg_string) # replace arguments referencing files with the file content else: try: args_file = open(arg_string[1:]) try: arg_strings = [] for arg_line in args_file.read().splitlines(): for arg in self.convert_arg_line_to_args(arg_line): arg_strings.append(arg) arg_strings = self._read_args_from_files(arg_strings) new_arg_strings.extend(arg_strings) finally: args_file.close() except IOError: err = _sys.exc_info()[1] self.error(str(err)) # return the modified argument list return new_arg_strings def convert_arg_line_to_args(self, arg_line): return [arg_line] def _match_argument(self, action, arg_strings_pattern): # match the pattern for this action to the arg strings nargs_pattern = self._get_nargs_pattern(action) match = _re.match(nargs_pattern, arg_strings_pattern) # raise an exception if we weren't able to find a match if match is None: nargs_errors = { None: _('expected one argument'), OPTIONAL: _('expected at most one argument'), ONE_OR_MORE: _('expected at least one argument'), } default = _('expected %s argument(s)') % action.nargs msg = nargs_errors.get(action.nargs, default) raise ArgumentError(action, msg) # return the number of arguments matched return len(match.group(1)) def _match_arguments_partial(self, actions, arg_strings_pattern): # progressively shorten the actions list by slicing off the # final actions until we find a match result = [] for i in range(len(actions), 0, -1): actions_slice = actions[:i] pattern = ''.join([self._get_nargs_pattern(action) for action in actions_slice]) match = _re.match(pattern, arg_strings_pattern) if match is not None: result.extend([len(string) for string in match.groups()]) break # return the list of arg string counts return result def _parse_optional(self, arg_string): # if it's an empty string, it was meant to be a positional if not arg_string: return None # if it doesn't start with a prefix, it was meant to be positional if not arg_string[0] in self.prefix_chars: return None # if the option string is present in the parser, return the action if arg_string in self._option_string_actions: action = self._option_string_actions[arg_string] return action, arg_string, None # if it's just a single character, it was meant to be positional if len(arg_string) == 1: return None # if the option string before the "=" is present, return the action if '=' in arg_string: option_string, explicit_arg = arg_string.split('=', 1) if option_string in self._option_string_actions: action = self._option_string_actions[option_string] return action, option_string, explicit_arg # search through all possible prefixes of the option string # and all actions in the parser for possible interpretations option_tuples = self._get_option_tuples(arg_string) # if multiple actions match, the option string was ambiguous if len(option_tuples) > 1: options = ', '.join([option_string for action, option_string, explicit_arg in option_tuples]) tup = arg_string, options self.error(_('ambiguous option: %s could match %s') % tup) # if exactly one action matched, this segmentation is good, # so return the parsed action elif len(option_tuples) == 1: option_tuple, = option_tuples return option_tuple # if it was not found as an option, but it looks like a negative # number, it was meant to be positional # unless there are negative-number-like options if self._negative_number_matcher.match(arg_string): if not self._has_negative_number_optionals: return None # if it contains a space, it was meant to be a positional if ' ' in arg_string: return None # it was meant to be an optional but there is no such option # in this parser (though it might be a valid option in a subparser) return None, arg_string, None def _get_option_tuples(self, option_string): result = [] # option strings starting with two prefix characters are only # split at the '=' chars = self.prefix_chars if option_string[0] in chars and option_string[1] in chars: if '=' in option_string: option_prefix, explicit_arg = option_string.split('=', 1) else: option_prefix = option_string explicit_arg = None for option_string in self._option_string_actions: if option_string.startswith(option_prefix): action = self._option_string_actions[option_string] tup = action, option_string, explicit_arg result.append(tup) # single character options can be concatenated with their arguments # but multiple character options always have to have their argument # separate elif option_string[0] in chars and option_string[1] not in chars: option_prefix = option_string explicit_arg = None short_option_prefix = option_string[:2] short_explicit_arg = option_string[2:] for option_string in self._option_string_actions: if option_string == short_option_prefix: action = self._option_string_actions[option_string] tup = action, option_string, short_explicit_arg result.append(tup) elif option_string.startswith(option_prefix): action = self._option_string_actions[option_string] tup = action, option_string, explicit_arg result.append(tup) # shouldn't ever get here else: self.error(_('unexpected option string: %s') % option_string) # return the collected option tuples return result def _get_nargs_pattern(self, action): # in all examples below, we have to allow for '--' args # which are represented as '-' in the pattern nargs = action.nargs # the default (None) is assumed to be a single argument if nargs is None: nargs_pattern = '(-*A-*)' # allow zero or one arguments elif nargs == OPTIONAL: nargs_pattern = '(-*A?-*)' # allow zero or more arguments elif nargs == ZERO_OR_MORE: nargs_pattern = '(-*[A-]*)' # allow one or more arguments elif nargs == ONE_OR_MORE: nargs_pattern = '(-*A[A-]*)' # allow any number of options or arguments elif nargs == REMAINDER: nargs_pattern = '([-AO]*)' # allow one argument followed by any number of options or arguments elif nargs == PARSER: nargs_pattern = '(-*A[-AO]*)' # all others should be integers else: nargs_pattern = '(-*%s-*)' % '-*'.join('A' * nargs) # if this is an optional action, -- is not allowed if action.option_strings: nargs_pattern = nargs_pattern.replace('-*', '') nargs_pattern = nargs_pattern.replace('-', '') # return the pattern return nargs_pattern # ======================== # Value conversion methods # ======================== def _get_values(self, action, arg_strings): # for everything but PARSER args, strip out '--' if action.nargs not in [PARSER, REMAINDER]: arg_strings = [s for s in arg_strings if s != '--'] # optional argument produces a default when not present if not arg_strings and action.nargs == OPTIONAL: if action.option_strings: value = action.const else: value = action.default if isinstance(value, basestring): value = self._get_value(action, value) self._check_value(action, value) # when nargs='*' on a positional, if there were no command-line # args, use the default if it is anything other than None elif (not arg_strings and action.nargs == ZERO_OR_MORE and not action.option_strings): if action.default is not None: value = action.default else: value = arg_strings self._check_value(action, value) # single argument or optional argument produces a single value elif len(arg_strings) == 1 and action.nargs in [None, OPTIONAL]: arg_string, = arg_strings value = self._get_value(action, arg_string) self._check_value(action, value) # REMAINDER arguments convert all values, checking none elif action.nargs == REMAINDER: value = [self._get_value(action, v) for v in arg_strings] # PARSER arguments convert all values, but check only the first elif action.nargs == PARSER: value = [self._get_value(action, v) for v in arg_strings] self._check_value(action, value[0]) # all other types of nargs produce a list else: value = [self._get_value(action, v) for v in arg_strings] for v in value: self._check_value(action, v) # return the converted value return value def _get_value(self, action, arg_string): type_func = self._registry_get('type', action.type, action.type) if not _callable(type_func): msg = _('%r is not callable') raise ArgumentError(action, msg % type_func) # convert the value to the appropriate type try: result = type_func(arg_string) # ArgumentTypeErrors indicate errors except ArgumentTypeError: name = getattr(action.type, '__name__', repr(action.type)) msg = str(_sys.exc_info()[1]) raise ArgumentError(action, msg) # TypeErrors or ValueErrors also indicate errors except (TypeError, ValueError): name = getattr(action.type, '__name__', repr(action.type)) msg = _('invalid %s value: %r') raise ArgumentError(action, msg % (name, arg_string)) # return the converted value return result def _check_value(self, action, value): # converted value must be one of the choices (if specified) if action.choices is not None and value not in action.choices: tup = value, ', '.join(map(repr, action.choices)) msg = _('invalid choice: %r (choose from %s)') % tup raise ArgumentError(action, msg) # ======================= # Help-formatting methods # ======================= def format_usage(self): formatter = self._get_formatter() formatter.add_usage(self.usage, self._actions, self._mutually_exclusive_groups) return formatter.format_help() def format_help(self): formatter = self._get_formatter() # usage formatter.add_usage(self.usage, self._actions, self._mutually_exclusive_groups) # description formatter.add_text(self.description) # positionals, optionals and user-defined groups for action_group in self._action_groups: formatter.start_section(action_group.title) formatter.add_text(action_group.description) formatter.add_arguments(action_group._group_actions) formatter.end_section() # epilog formatter.add_text(self.epilog) # determine help from format above return formatter.format_help() def format_version(self): import warnings warnings.warn( 'The format_version method is deprecated -- the "version" ' 'argument to ArgumentParser is no longer supported.', DeprecationWarning) formatter = self._get_formatter() formatter.add_text(self.version) return formatter.format_help() def _get_formatter(self): return self.formatter_class(prog=self.prog) # ===================== # Help-printing methods # ===================== def print_usage(self, file=None): if file is None: file = _sys.stdout self._print_message(self.format_usage(), file) def print_help(self, file=None): if file is None: file = _sys.stdout self._print_message(self.format_help(), file) def print_version(self, file=None): import warnings warnings.warn( 'The print_version method is deprecated -- the "version" ' 'argument to ArgumentParser is no longer supported.', DeprecationWarning) self._print_message(self.format_version(), file) def _print_message(self, message, file=None): if message: if file is None: file = _sys.stderr file.write(message) # =============== # Exiting methods # =============== def exit(self, status=0, message=None): if message: self._print_message(message, _sys.stderr) _sys.exit(status) def error(self, message): """error(message: string) Prints a usage message incorporating the message to stderr and exits. If you override this in a subclass, it should not return -- it should either exit or raise an exception. """ self.print_usage(_sys.stderr) self.exit(2, _('%s: error: %s\n') % (self.prog, message)) openvswitch-2.0.1+git20140120/python/compat/automake.mk000066400000000000000000000001041226605124000224100ustar00rootroot00000000000000EXTRA_DIST += \ python/compat/uuid.py \ python/compat/argparse.py openvswitch-2.0.1+git20140120/python/compat/uuid.py000066400000000000000000000544301226605124000216040ustar00rootroot00000000000000# This file is from Python 2.5. It has been modified by adding this # license header, which is copied from the LICENSE file distributed # with Python. # # PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 # -------------------------------------------- # # 1. This LICENSE AGREEMENT is between the Python Software Foundation # ("PSF"), and the Individual or Organization ("Licensee") accessing and # otherwise using this software ("Python") in source or binary form and # its associated documentation. # # 2. Subject to the terms and conditions of this License Agreement, PSF # hereby grants Licensee a nonexclusive, royalty-free, world-wide # license to reproduce, analyze, test, perform and/or display publicly, # prepare derivative works, distribute, and otherwise use Python # alone or in any derivative version, provided, however, that PSF's # License Agreement and PSF's notice of copyright, i.e., "Copyright (c) # 2001, 2002, 2003, 2004, 2005, 2006, 2007 Python Software Foundation; # All Rights Reserved" are retained in Python alone or in any derivative # version prepared by Licensee. # # 3. In the event Licensee prepares a derivative work that is based on # or incorporates Python or any part thereof, and wants to make # the derivative work available to others as provided herein, then # Licensee hereby agrees to include in any such work a brief summary of # the changes made to Python. # # 4. PSF is making Python available to Licensee on an "AS IS" # basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR # IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND # DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS # FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT # INFRINGE ANY THIRD PARTY RIGHTS. # # 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON # FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS # A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, # OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. # # 6. This License Agreement will automatically terminate upon a material # breach of its terms and conditions. # # 7. Nothing in this License Agreement shall be deemed to create any # relationship of agency, partnership, or joint venture between PSF and # Licensee. This License Agreement does not grant permission to use PSF # trademarks or trade name in a trademark sense to endorse or promote # products or services of Licensee, or any third party. # # 8. By copying, installing or otherwise using Python, Licensee # agrees to be bound by the terms and conditions of this License # Agreement. r"""UUID objects (universally unique identifiers) according to RFC 4122. This module provides immutable UUID objects (class UUID) and the functions uuid1(), uuid3(), uuid4(), uuid5() for generating version 1, 3, 4, and 5 UUIDs as specified in RFC 4122. If all you want is a unique ID, you should probably call uuid1() or uuid4(). Note that uuid1() may compromise privacy since it creates a UUID containing the computer's network address. uuid4() creates a random UUID. Typical usage: >>> import uuid # make a UUID based on the host ID and current time >>> uuid.uuid1() UUID('a8098c1a-f86e-11da-bd1a-00112444be1e') # make a UUID using an MD5 hash of a namespace UUID and a name >>> uuid.uuid3(uuid.NAMESPACE_DNS, 'python.org') UUID('6fa459ea-ee8a-3ca4-894e-db77e160355e') # make a random UUID >>> uuid.uuid4() UUID('16fd2706-8baf-433b-82eb-8c7fada847da') # make a UUID using a SHA-1 hash of a namespace UUID and a name >>> uuid.uuid5(uuid.NAMESPACE_DNS, 'python.org') UUID('886313e1-3b8a-5372-9b90-0c9aee199e5d') # make a UUID from a string of hex digits (braces and hyphens ignored) >>> x = uuid.UUID('{00010203-0405-0607-0809-0a0b0c0d0e0f}') # convert a UUID to a string of hex digits in standard form >>> str(x) '00010203-0405-0607-0809-0a0b0c0d0e0f' # get the raw 16 bytes of the UUID >>> x.bytes '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f' # make a UUID from a 16-byte string >>> uuid.UUID(bytes=x.bytes) UUID('00010203-0405-0607-0809-0a0b0c0d0e0f') """ __author__ = 'Ka-Ping Yee ' RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, RESERVED_FUTURE = [ 'reserved for NCS compatibility', 'specified in RFC 4122', 'reserved for Microsoft compatibility', 'reserved for future definition'] class UUID(object): """Instances of the UUID class represent UUIDs as specified in RFC 4122. UUID objects are immutable, hashable, and usable as dictionary keys. Converting a UUID to a string with str() yields something in the form '12345678-1234-1234-1234-123456789abc'. The UUID constructor accepts five possible forms: a similar string of hexadecimal digits, or a tuple of six integer fields (with 32-bit, 16-bit, 16-bit, 8-bit, 8-bit, and 48-bit values respectively) as an argument named 'fields', or a string of 16 bytes (with all the integer fields in big-endian order) as an argument named 'bytes', or a string of 16 bytes (with the first three fields in little-endian order) as an argument named 'bytes_le', or a single 128-bit integer as an argument named 'int'. UUIDs have these read-only attributes: bytes the UUID as a 16-byte string (containing the six integer fields in big-endian byte order) bytes_le the UUID as a 16-byte string (with time_low, time_mid, and time_hi_version in little-endian byte order) fields a tuple of the six integer fields of the UUID, which are also available as six individual attributes and two derived attributes: time_low the first 32 bits of the UUID time_mid the next 16 bits of the UUID time_hi_version the next 16 bits of the UUID clock_seq_hi_variant the next 8 bits of the UUID clock_seq_low the next 8 bits of the UUID node the last 48 bits of the UUID time the 60-bit timestamp clock_seq the 14-bit sequence number hex the UUID as a 32-character hexadecimal string int the UUID as a 128-bit integer urn the UUID as a URN as specified in RFC 4122 variant the UUID variant (one of the constants RESERVED_NCS, RFC_4122, RESERVED_MICROSOFT, or RESERVED_FUTURE) version the UUID version number (1 through 5, meaningful only when the variant is RFC_4122) """ def __init__(self, hex=None, bytes=None, bytes_le=None, fields=None, int=None, version=None): r"""Create a UUID from either a string of 32 hexadecimal digits, a string of 16 bytes as the 'bytes' argument, a string of 16 bytes in little-endian order as the 'bytes_le' argument, a tuple of six integers (32-bit time_low, 16-bit time_mid, 16-bit time_hi_version, 8-bit clock_seq_hi_variant, 8-bit clock_seq_low, 48-bit node) as the 'fields' argument, or a single 128-bit integer as the 'int' argument. When a string of hex digits is given, curly braces, hyphens, and a URN prefix are all optional. For example, these expressions all yield the same UUID: UUID('{12345678-1234-5678-1234-567812345678}') UUID('12345678123456781234567812345678') UUID('urn:uuid:12345678-1234-5678-1234-567812345678') UUID(bytes='\x12\x34\x56\x78'*4) UUID(bytes_le='\x78\x56\x34\x12\x34\x12\x78\x56' + '\x12\x34\x56\x78\x12\x34\x56\x78') UUID(fields=(0x12345678, 0x1234, 0x5678, 0x12, 0x34, 0x567812345678)) UUID(int=0x12345678123456781234567812345678) Exactly one of 'hex', 'bytes', 'bytes_le', 'fields', or 'int' must be given. The 'version' argument is optional; if given, the resulting UUID will have its variant and version set according to RFC 4122, overriding the given 'hex', 'bytes', 'bytes_le', 'fields', or 'int'. """ if [hex, bytes, bytes_le, fields, int].count(None) != 4: raise TypeError('need one of hex, bytes, bytes_le, fields, or int') if hex is not None: hex = hex.replace('urn:', '').replace('uuid:', '') hex = hex.strip('{}').replace('-', '') if len(hex) != 32: raise ValueError('badly formed hexadecimal UUID string') int = long(hex, 16) if bytes_le is not None: if len(bytes_le) != 16: raise ValueError('bytes_le is not a 16-char string') bytes = (bytes_le[3] + bytes_le[2] + bytes_le[1] + bytes_le[0] + bytes_le[5] + bytes_le[4] + bytes_le[7] + bytes_le[6] + bytes_le[8:]) if bytes is not None: if len(bytes) != 16: raise ValueError('bytes is not a 16-char string') int = long(('%02x'*16) % tuple(map(ord, bytes)), 16) if fields is not None: if len(fields) != 6: raise ValueError('fields is not a 6-tuple') (time_low, time_mid, time_hi_version, clock_seq_hi_variant, clock_seq_low, node) = fields if not 0 <= time_low < 1<<32L: raise ValueError('field 1 out of range (need a 32-bit value)') if not 0 <= time_mid < 1<<16L: raise ValueError('field 2 out of range (need a 16-bit value)') if not 0 <= time_hi_version < 1<<16L: raise ValueError('field 3 out of range (need a 16-bit value)') if not 0 <= clock_seq_hi_variant < 1<<8L: raise ValueError('field 4 out of range (need an 8-bit value)') if not 0 <= clock_seq_low < 1<<8L: raise ValueError('field 5 out of range (need an 8-bit value)') if not 0 <= node < 1<<48L: raise ValueError('field 6 out of range (need a 48-bit value)') clock_seq = (clock_seq_hi_variant << 8L) | clock_seq_low int = ((time_low << 96L) | (time_mid << 80L) | (time_hi_version << 64L) | (clock_seq << 48L) | node) if int is not None: if not 0 <= int < 1<<128L: raise ValueError('int is out of range (need a 128-bit value)') if version is not None: if not 1 <= version <= 5: raise ValueError('illegal version number') # Set the variant to RFC 4122. int &= ~(0xc000 << 48L) int |= 0x8000 << 48L # Set the version number. int &= ~(0xf000 << 64L) int |= version << 76L self.__dict__['int'] = int def __cmp__(self, other): if isinstance(other, UUID): return cmp(self.int, other.int) return NotImplemented def __hash__(self): return hash(self.int) def __int__(self): return self.int def __repr__(self): return 'UUID(%r)' % str(self) def __setattr__(self, name, value): raise TypeError('UUID objects are immutable') def __str__(self): hex = '%032x' % self.int return '%s-%s-%s-%s-%s' % ( hex[:8], hex[8:12], hex[12:16], hex[16:20], hex[20:]) def get_bytes(self): bytes = '' for shift in range(0, 128, 8): bytes = chr((self.int >> shift) & 0xff) + bytes return bytes bytes = property(get_bytes) def get_bytes_le(self): bytes = self.bytes return (bytes[3] + bytes[2] + bytes[1] + bytes[0] + bytes[5] + bytes[4] + bytes[7] + bytes[6] + bytes[8:]) bytes_le = property(get_bytes_le) def get_fields(self): return (self.time_low, self.time_mid, self.time_hi_version, self.clock_seq_hi_variant, self.clock_seq_low, self.node) fields = property(get_fields) def get_time_low(self): return self.int >> 96L time_low = property(get_time_low) def get_time_mid(self): return (self.int >> 80L) & 0xffff time_mid = property(get_time_mid) def get_time_hi_version(self): return (self.int >> 64L) & 0xffff time_hi_version = property(get_time_hi_version) def get_clock_seq_hi_variant(self): return (self.int >> 56L) & 0xff clock_seq_hi_variant = property(get_clock_seq_hi_variant) def get_clock_seq_low(self): return (self.int >> 48L) & 0xff clock_seq_low = property(get_clock_seq_low) def get_time(self): return (((self.time_hi_version & 0x0fffL) << 48L) | (self.time_mid << 32L) | self.time_low) time = property(get_time) def get_clock_seq(self): return (((self.clock_seq_hi_variant & 0x3fL) << 8L) | self.clock_seq_low) clock_seq = property(get_clock_seq) def get_node(self): return self.int & 0xffffffffffff node = property(get_node) def get_hex(self): return '%032x' % self.int hex = property(get_hex) def get_urn(self): return 'urn:uuid:' + str(self) urn = property(get_urn) def get_variant(self): if not self.int & (0x8000 << 48L): return RESERVED_NCS elif not self.int & (0x4000 << 48L): return RFC_4122 elif not self.int & (0x2000 << 48L): return RESERVED_MICROSOFT else: return RESERVED_FUTURE variant = property(get_variant) def get_version(self): # The version bits are only meaningful for RFC 4122 UUIDs. if self.variant == RFC_4122: return int((self.int >> 76L) & 0xf) version = property(get_version) def _find_mac(command, args, hw_identifiers, get_index): import os for dir in ['', '/sbin/', '/usr/sbin']: executable = os.path.join(dir, command) if not os.path.exists(executable): continue try: # LC_ALL to get English output, 2>/dev/null to # prevent output on stderr cmd = 'LC_ALL=C %s %s 2>/dev/null' % (executable, args) pipe = os.popen(cmd) except IOError: continue for line in pipe: words = line.lower().split() for i in range(len(words)): if words[i] in hw_identifiers: return int(words[get_index(i)].replace(':', ''), 16) return None def _ifconfig_getnode(): """Get the hardware address on Unix by running ifconfig.""" # This works on Linux ('' or '-a'), Tru64 ('-av'), but not all Unixes. for args in ('', '-a', '-av'): mac = _find_mac('ifconfig', args, ['hwaddr', 'ether'], lambda i: i+1) if mac: return mac import socket ip_addr = socket.gethostbyname(socket.gethostname()) # Try getting the MAC addr from arp based on our IP address (Solaris). mac = _find_mac('arp', '-an', [ip_addr], lambda i: -1) if mac: return mac # This might work on HP-UX. mac = _find_mac('lanscan', '-ai', ['lan0'], lambda i: 0) if mac: return mac return None def _ipconfig_getnode(): """Get the hardware address on Windows by running ipconfig.exe.""" import os, re dirs = ['', r'c:\windows\system32', r'c:\winnt\system32'] try: import ctypes buffer = ctypes.create_string_buffer(300) ctypes.windll.kernel32.GetSystemDirectoryA(buffer, 300) dirs.insert(0, buffer.value.decode('mbcs')) except: pass for dir in dirs: try: pipe = os.popen(os.path.join(dir, 'ipconfig') + ' /all') except IOError: continue for line in pipe: value = line.split(':')[-1].strip().lower() if re.match('([0-9a-f][0-9a-f]-){5}[0-9a-f][0-9a-f]', value): return int(value.replace('-', ''), 16) def _netbios_getnode(): """Get the hardware address on Windows using NetBIOS calls. See http://support.microsoft.com/kb/118623 for details.""" import win32wnet, netbios ncb = netbios.NCB() ncb.Command = netbios.NCBENUM ncb.Buffer = adapters = netbios.LANA_ENUM() adapters._pack() if win32wnet.Netbios(ncb) != 0: return adapters._unpack() for i in range(adapters.length): ncb.Reset() ncb.Command = netbios.NCBRESET ncb.Lana_num = ord(adapters.lana[i]) if win32wnet.Netbios(ncb) != 0: continue ncb.Reset() ncb.Command = netbios.NCBASTAT ncb.Lana_num = ord(adapters.lana[i]) ncb.Callname = '*'.ljust(16) ncb.Buffer = status = netbios.ADAPTER_STATUS() if win32wnet.Netbios(ncb) != 0: continue status._unpack() bytes = map(ord, status.adapter_address) return ((bytes[0]<<40L) + (bytes[1]<<32L) + (bytes[2]<<24L) + (bytes[3]<<16L) + (bytes[4]<<8L) + bytes[5]) # Thanks to Thomas Heller for ctypes and for his help with its use here. # If ctypes is available, use it to find system routines for UUID generation. _uuid_generate_random = _uuid_generate_time = _UuidCreate = None try: import ctypes, ctypes.util _buffer = ctypes.create_string_buffer(16) # The uuid_generate_* routines are provided by libuuid on at least # Linux and FreeBSD, and provided by libc on Mac OS X. for libname in ['uuid', 'c']: try: lib = ctypes.CDLL(ctypes.util.find_library(libname)) except: continue if hasattr(lib, 'uuid_generate_random'): _uuid_generate_random = lib.uuid_generate_random if hasattr(lib, 'uuid_generate_time'): _uuid_generate_time = lib.uuid_generate_time # On Windows prior to 2000, UuidCreate gives a UUID containing the # hardware address. On Windows 2000 and later, UuidCreate makes a # random UUID and UuidCreateSequential gives a UUID containing the # hardware address. These routines are provided by the RPC runtime. # NOTE: at least on Tim's WinXP Pro SP2 desktop box, while the last # 6 bytes returned by UuidCreateSequential are fixed, they don't appear # to bear any relationship to the MAC address of any network device # on the box. try: lib = ctypes.windll.rpcrt4 except: lib = None _UuidCreate = getattr(lib, 'UuidCreateSequential', getattr(lib, 'UuidCreate', None)) except: pass def _unixdll_getnode(): """Get the hardware address on Unix using ctypes.""" _uuid_generate_time(_buffer) return UUID(bytes=_buffer.raw).node def _windll_getnode(): """Get the hardware address on Windows using ctypes.""" if _UuidCreate(_buffer) == 0: return UUID(bytes=_buffer.raw).node def _random_getnode(): """Get a random node ID, with eighth bit set as suggested by RFC 4122.""" import random return random.randrange(0, 1<<48L) | 0x010000000000L _node = None def getnode(): """Get the hardware address as a 48-bit positive integer. The first time this runs, it may launch a separate program, which could be quite slow. If all attempts to obtain the hardware address fail, we choose a random 48-bit number with its eighth bit set to 1 as recommended in RFC 4122. """ global _node if _node is not None: return _node import sys if sys.platform == 'win32': getters = [_windll_getnode, _netbios_getnode, _ipconfig_getnode] else: getters = [_unixdll_getnode, _ifconfig_getnode] for getter in getters + [_random_getnode]: try: _node = getter() except: continue if _node is not None: return _node _last_timestamp = None def uuid1(node=None, clock_seq=None): """Generate a UUID from a host ID, sequence number, and the current time. If 'node' is not given, getnode() is used to obtain the hardware address. If 'clock_seq' is given, it is used as the sequence number; otherwise a random 14-bit sequence number is chosen.""" # When the system provides a version-1 UUID generator, use it (but don't # use UuidCreate here because its UUIDs don't conform to RFC 4122). if _uuid_generate_time and node is clock_seq is None: _uuid_generate_time(_buffer) return UUID(bytes=_buffer.raw) global _last_timestamp import time nanoseconds = int(time.time() * 1e9) # 0x01b21dd213814000 is the number of 100-ns intervals between the # UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00. timestamp = int(nanoseconds/100) + 0x01b21dd213814000L if timestamp <= _last_timestamp: timestamp = _last_timestamp + 1 _last_timestamp = timestamp if clock_seq is None: import random clock_seq = random.randrange(1<<14L) # instead of stable storage time_low = timestamp & 0xffffffffL time_mid = (timestamp >> 32L) & 0xffffL time_hi_version = (timestamp >> 48L) & 0x0fffL clock_seq_low = clock_seq & 0xffL clock_seq_hi_variant = (clock_seq >> 8L) & 0x3fL if node is None: node = getnode() return UUID(fields=(time_low, time_mid, time_hi_version, clock_seq_hi_variant, clock_seq_low, node), version=1) def uuid3(namespace, name): """Generate a UUID from the MD5 hash of a namespace UUID and a name.""" import md5 hash = md5.md5(namespace.bytes + name).digest() return UUID(bytes=hash[:16], version=3) def uuid4(): """Generate a random UUID.""" # When the system provides a version-4 UUID generator, use it. if _uuid_generate_random: _uuid_generate_random(_buffer) return UUID(bytes=_buffer.raw) # Otherwise, get randomness from urandom or the 'random' module. try: import os return UUID(bytes=os.urandom(16), version=4) except: import random bytes = [chr(random.randrange(256)) for i in range(16)] return UUID(bytes=bytes, version=4) def uuid5(namespace, name): """Generate a UUID from the SHA-1 hash of a namespace UUID and a name.""" import sha hash = sha.sha(namespace.bytes + name).digest() return UUID(bytes=hash[:16], version=5) # The following standard UUIDs are for use with uuid3() or uuid5(). NAMESPACE_DNS = UUID('6ba7b810-9dad-11d1-80b4-00c04fd430c8') NAMESPACE_URL = UUID('6ba7b811-9dad-11d1-80b4-00c04fd430c8') NAMESPACE_OID = UUID('6ba7b812-9dad-11d1-80b4-00c04fd430c8') NAMESPACE_X500 = UUID('6ba7b814-9dad-11d1-80b4-00c04fd430c8') openvswitch-2.0.1+git20140120/python/ovs/000077500000000000000000000000001226605124000176025ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/python/ovs/.gitignore000066400000000000000000000000131226605124000215640ustar00rootroot00000000000000version.py openvswitch-2.0.1+git20140120/python/ovs/__init__.py000066400000000000000000000000461226605124000217130ustar00rootroot00000000000000# This file intentionally left blank. openvswitch-2.0.1+git20140120/python/ovs/daemon.py000066400000000000000000000411131226605124000214170ustar00rootroot00000000000000# Copyright (c) 2010, 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import errno import fcntl import os import resource import signal import sys import time import ovs.dirs import ovs.fatal_signal #import ovs.lockfile import ovs.process import ovs.socket_util import ovs.timeval import ovs.util import ovs.vlog vlog = ovs.vlog.Vlog("daemon") # --detach: Should we run in the background? _detach = False # --pidfile: Name of pidfile (null if none). _pidfile = None # Our pidfile's inode and device, if we have created one. _pidfile_dev = None _pidfile_ino = None # --overwrite-pidfile: Create pidfile even if one already exists and is locked? _overwrite_pidfile = False # --no-chdir: Should we chdir to "/"? _chdir = True # --monitor: Should a supervisory process monitor the daemon and restart it if # it dies due to an error signal? _monitor = False # File descriptor used by daemonize_start() and daemonize_complete(). _daemonize_fd = None RESTART_EXIT_CODE = 5 def make_pidfile_name(name): """Returns the file name that would be used for a pidfile if 'name' were provided to set_pidfile().""" if name is None or name == "": return "%s/%s.pid" % (ovs.dirs.RUNDIR, ovs.util.PROGRAM_NAME) else: return ovs.util.abs_file_name(ovs.dirs.RUNDIR, name) def set_pidfile(name): """Sets up a following call to daemonize() to create a pidfile named 'name'. If 'name' begins with '/', then it is treated as an absolute path. Otherwise, it is taken relative to ovs.util.RUNDIR, which is $(prefix)/var/run by default. If 'name' is null, then ovs.util.PROGRAM_NAME followed by ".pid" is used.""" global _pidfile _pidfile = make_pidfile_name(name) def get_pidfile(): """Returns an absolute path to the configured pidfile, or None if no pidfile is configured.""" return _pidfile def set_no_chdir(): """Sets that we do not chdir to "/".""" global _chdir _chdir = False def is_chdir_enabled(): """Will we chdir to "/" as part of daemonizing?""" return _chdir def ignore_existing_pidfile(): """Normally, daemonize() or daemonize_start() will terminate the program with a message if a locked pidfile already exists. If this function is called, an existing pidfile will be replaced, with a warning.""" global _overwrite_pidfile _overwrite_pidfile = True def set_detach(): """Sets up a following call to daemonize() to detach from the foreground session, running this process in the background.""" global _detach _detach = True def get_detach(): """Will daemonize() really detach?""" return _detach def set_monitor(): """Sets up a following call to daemonize() to fork a supervisory process to monitor the daemon and restart it if it dies due to an error signal.""" global _monitor _monitor = True def _fatal(msg): vlog.err(msg) sys.stderr.write("%s\n" % msg) sys.exit(1) def _make_pidfile(): """If a pidfile has been configured, creates it and stores the running process's pid in it. Ensures that the pidfile will be deleted when the process exits.""" pid = os.getpid() # Create a temporary pidfile. tmpfile = "%s.tmp%d" % (_pidfile, pid) ovs.fatal_signal.add_file_to_unlink(tmpfile) try: # This is global to keep Python from garbage-collecting and # therefore closing our file after this function exits. That would # unlock the lock for us, and we don't want that. global file_handle file_handle = open(tmpfile, "w") except IOError, e: _fatal("%s: create failed (%s)" % (tmpfile, e.strerror)) try: s = os.fstat(file_handle.fileno()) except IOError, e: _fatal("%s: fstat failed (%s)" % (tmpfile, e.strerror)) try: file_handle.write("%s\n" % pid) file_handle.flush() except OSError, e: _fatal("%s: write failed: %s" % (tmpfile, e.strerror)) try: fcntl.lockf(file_handle, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError, e: _fatal("%s: fcntl failed: %s" % (tmpfile, e.strerror)) # Rename or link it to the correct name. if _overwrite_pidfile: try: os.rename(tmpfile, _pidfile) except OSError, e: _fatal("failed to rename \"%s\" to \"%s\" (%s)" % (tmpfile, _pidfile, e.strerror)) else: while True: try: os.link(tmpfile, _pidfile) error = 0 except OSError, e: error = e.errno if error == errno.EEXIST: _check_already_running() elif error != errno.EINTR: break if error: _fatal("failed to link \"%s\" as \"%s\" (%s)" % (tmpfile, _pidfile, os.strerror(error))) # Ensure that the pidfile will get deleted on exit. ovs.fatal_signal.add_file_to_unlink(_pidfile) # Delete the temporary pidfile if it still exists. if not _overwrite_pidfile: error = ovs.fatal_signal.unlink_file_now(tmpfile) if error: _fatal("%s: unlink failed (%s)" % (tmpfile, os.strerror(error))) global _pidfile_dev global _pidfile_ino _pidfile_dev = s.st_dev _pidfile_ino = s.st_ino def daemonize(): """If configured with set_pidfile() or set_detach(), creates the pid file and detaches from the foreground session.""" daemonize_start() daemonize_complete() def _waitpid(pid, options): while True: try: return os.waitpid(pid, options) except OSError, e: if e.errno == errno.EINTR: pass return -e.errno, 0 def _fork_and_wait_for_startup(): try: rfd, wfd = os.pipe() except OSError, e: sys.stderr.write("pipe failed: %s\n" % os.strerror(e.errno)) sys.exit(1) try: pid = os.fork() except OSError, e: sys.stderr.write("could not fork: %s\n" % os.strerror(e.errno)) sys.exit(1) if pid > 0: # Running in parent process. os.close(wfd) ovs.fatal_signal.fork() while True: try: s = os.read(rfd, 1) error = 0 except OSError, e: s = "" error = e.errno if error != errno.EINTR: break if len(s) != 1: retval, status = _waitpid(pid, 0) if retval == pid: if os.WIFEXITED(status) and os.WEXITSTATUS(status): # Child exited with an error. Convey the same error to # our parent process as a courtesy. sys.exit(os.WEXITSTATUS(status)) else: sys.stderr.write("fork child failed to signal " "startup (%s)\n" % ovs.process.status_msg(status)) else: assert retval < 0 sys.stderr.write("waitpid failed (%s)\n" % os.strerror(-retval)) sys.exit(1) os.close(rfd) else: # Running in parent process. os.close(rfd) ovs.timeval.postfork() #ovs.lockfile.postfork() global _daemonize_fd _daemonize_fd = wfd return pid def _fork_notify_startup(fd): if fd is not None: error, bytes_written = ovs.socket_util.write_fully(fd, "0") if error: sys.stderr.write("could not write to pipe\n") sys.exit(1) os.close(fd) def _should_restart(status): global RESTART_EXIT_CODE if os.WIFEXITED(status) and os.WEXITSTATUS(status) == RESTART_EXIT_CODE: return True if os.WIFSIGNALED(status): for signame in ("SIGABRT", "SIGALRM", "SIGBUS", "SIGFPE", "SIGILL", "SIGPIPE", "SIGSEGV", "SIGXCPU", "SIGXFSZ"): if os.WTERMSIG(status) == getattr(signal, signame, None): return True return False def _monitor_daemon(daemon_pid): # XXX should log daemon's stderr output at startup time # XXX should use setproctitle module if available last_restart = None while True: retval, status = _waitpid(daemon_pid, 0) if retval < 0: sys.stderr.write("waitpid failed\n") sys.exit(1) elif retval == daemon_pid: status_msg = ("pid %d died, %s" % (daemon_pid, ovs.process.status_msg(status))) if _should_restart(status): if os.WCOREDUMP(status): # Disable further core dumps to save disk space. try: resource.setrlimit(resource.RLIMIT_CORE, (0, 0)) except resource.error: vlog.warn("failed to disable core dumps") # Throttle restarts to no more than once every 10 seconds. if (last_restart is not None and ovs.timeval.msec() < last_restart + 10000): vlog.warn("%s, waiting until 10 seconds since last " "restart" % status_msg) while True: now = ovs.timeval.msec() wakeup = last_restart + 10000 if now > wakeup: break print "sleep %f" % ((wakeup - now) / 1000.0) time.sleep((wakeup - now) / 1000.0) last_restart = ovs.timeval.msec() vlog.err("%s, restarting" % status_msg) daemon_pid = _fork_and_wait_for_startup() if not daemon_pid: break else: vlog.info("%s, exiting" % status_msg) sys.exit(0) # Running in new daemon process. def _close_standard_fds(): """Close stdin, stdout, stderr. If we're started from e.g. an SSH session, then this keeps us from holding that session open artificially.""" null_fd = ovs.socket_util.get_null_fd() if null_fd >= 0: os.dup2(null_fd, 0) os.dup2(null_fd, 1) os.dup2(null_fd, 2) def daemonize_start(): """If daemonization is configured, then starts daemonization, by forking and returning in the child process. The parent process hangs around until the child lets it know either that it completed startup successfully (by calling daemon_complete()) or that it failed to start up (by exiting with a nonzero exit code).""" if _detach: if _fork_and_wait_for_startup() > 0: # Running in parent process. sys.exit(0) # Running in daemon or monitor process. os.setsid() if _monitor: saved_daemonize_fd = _daemonize_fd daemon_pid = _fork_and_wait_for_startup() if daemon_pid > 0: # Running in monitor process. _fork_notify_startup(saved_daemonize_fd) _close_standard_fds() _monitor_daemon(daemon_pid) # Running in daemon process if _pidfile: _make_pidfile() def daemonize_complete(): """If daemonization is configured, then this function notifies the parent process that the child process has completed startup successfully.""" _fork_notify_startup(_daemonize_fd) if _detach: if _chdir: os.chdir("/") _close_standard_fds() def usage(): sys.stdout.write(""" Daemon options: --detach run in background as daemon --no-chdir do not chdir to '/' --pidfile[=FILE] create pidfile (default: %s/%s.pid) --overwrite-pidfile with --pidfile, start even if already running """ % (ovs.dirs.RUNDIR, ovs.util.PROGRAM_NAME)) def __read_pidfile(pidfile, delete_if_stale): if _pidfile_dev is not None: try: s = os.stat(pidfile) if s.st_ino == _pidfile_ino and s.st_dev == _pidfile_dev: # It's our own pidfile. We can't afford to open it, # because closing *any* fd for a file that a process # has locked also releases all the locks on that file. # # Fortunately, we know the associated pid anyhow. return os.getpid() except OSError: pass try: file_handle = open(pidfile, "r+") except IOError, e: if e.errno == errno.ENOENT and delete_if_stale: return 0 vlog.warn("%s: open: %s" % (pidfile, e.strerror)) return -e.errno # Python fcntl doesn't directly support F_GETLK so we have to just try # to lock it. try: fcntl.lockf(file_handle, fcntl.LOCK_EX | fcntl.LOCK_NB) # pidfile exists but wasn't locked by anyone. Now we have the lock. if not delete_if_stale: file_handle.close() vlog.warn("%s: pid file is stale" % pidfile) return -errno.ESRCH # Is the file we have locked still named 'pidfile'? try: raced = False s = os.stat(pidfile) s2 = os.fstat(file_handle.fileno()) if s.st_ino != s2.st_ino or s.st_dev != s2.st_dev: raced = True except IOError: raced = True if raced: vlog.warn("%s: lost race to delete pidfile" % pidfile) return -errno.EALREADY # We won the right to delete the stale pidfile. try: os.unlink(pidfile) except IOError, e: vlog.warn("%s: failed to delete stale pidfile (%s)" % (pidfile, e.strerror)) return -e.errno else: vlog.dbg("%s: deleted stale pidfile" % pidfile) file_handle.close() return 0 except IOError, e: if e.errno not in [errno.EACCES, errno.EAGAIN]: vlog.warn("%s: fcntl: %s" % (pidfile, e.strerror)) return -e.errno # Someone else has the pidfile locked. try: try: error = int(file_handle.readline()) except IOError, e: vlog.warn("%s: read: %s" % (pidfile, e.strerror)) error = -e.errno except ValueError: vlog.warn("%s does not contain a pid" % pidfile) error = -errno.EINVAL return error finally: try: file_handle.close() except IOError: pass def read_pidfile(pidfile): """Opens and reads a PID from 'pidfile'. Returns the positive PID if successful, otherwise a negative errno value.""" return __read_pidfile(pidfile, False) def _check_already_running(): pid = __read_pidfile(_pidfile, True) if pid > 0: _fatal("%s: already running as pid %d, aborting" % (_pidfile, pid)) elif pid < 0: _fatal("%s: pidfile check failed (%s), aborting" % (_pidfile, os.strerror(pid))) def add_args(parser): """Populates 'parser', an ArgumentParser allocated using the argparse module, with the command line arguments required by the daemon module.""" pidfile = make_pidfile_name(None) group = parser.add_argument_group(title="Daemon Options") group.add_argument("--detach", action="store_true", help="Run in background as a daemon.") group.add_argument("--no-chdir", action="store_true", help="Do not chdir to '/'.") group.add_argument("--monitor", action="store_true", help="Monitor %s process." % ovs.util.PROGRAM_NAME) group.add_argument("--pidfile", nargs="?", const=pidfile, help="Create pidfile (default %s)." % pidfile) group.add_argument("--overwrite-pidfile", action="store_true", help="With --pidfile, start even if already running.") def handle_args(args): """Handles daemon module settings in 'args'. 'args' is an object containing values parsed by the parse_args() method of ArgumentParser. The parent ArgumentParser should have been prepared by add_args() before calling parse_args().""" if args.detach: set_detach() if args.no_chdir: set_no_chdir() if args.pidfile: set_pidfile(args.pidfile) if args.overwrite_pidfile: ignore_existing_pidfile() if args.monitor: set_monitor() openvswitch-2.0.1+git20140120/python/ovs/db/000077500000000000000000000000001226605124000201675ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/python/ovs/db/__init__.py000066400000000000000000000000461226605124000223000ustar00rootroot00000000000000# This file intentionally left blank. openvswitch-2.0.1+git20140120/python/ovs/db/data.py000066400000000000000000000454271226605124000214660ustar00rootroot00000000000000# Copyright (c) 2009, 2010, 2011 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import re import uuid import ovs.poller import ovs.socket_util import ovs.json import ovs.jsonrpc import ovs.ovsuuid import ovs.db.parser from ovs.db import error import ovs.db.types class ConstraintViolation(error.Error): def __init__(self, msg, json=None): error.Error.__init__(self, msg, json, tag="constraint violation") def escapeCString(src): dst = [] for c in src: if c in "\\\"": dst.append("\\" + c) elif ord(c) < 32: if c == '\n': dst.append('\\n') elif c == '\r': dst.append('\\r') elif c == '\a': dst.append('\\a') elif c == '\b': dst.append('\\b') elif c == '\f': dst.append('\\f') elif c == '\t': dst.append('\\t') elif c == '\v': dst.append('\\v') else: dst.append('\\%03o' % ord(c)) else: dst.append(c) return ''.join(dst) def returnUnchanged(x): return x class Atom(object): def __init__(self, type_, value=None): self.type = type_ if value is not None: self.value = value else: self.value = type_.default_atom() def __cmp__(self, other): if not isinstance(other, Atom) or self.type != other.type: return NotImplemented elif self.value < other.value: return -1 elif self.value > other.value: return 1 else: return 0 def __hash__(self): return hash(self.value) @staticmethod def default(type_): """Returns the default value for the given type_, which must be an instance of ovs.db.types.AtomicType. The default value for each atomic type is; - 0, for integer or real atoms. - False, for a boolean atom. - "", for a string atom. - The all-zeros UUID, for a UUID atom.""" return Atom(type_) def is_default(self): return self == self.default(self.type) @staticmethod def from_json(base, json, symtab=None): type_ = base.type json = ovs.db.parser.float_to_int(json) if ((type_ == ovs.db.types.IntegerType and type(json) in [int, long]) or (type_ == ovs.db.types.RealType and type(json) in [int, long, float]) or (type_ == ovs.db.types.BooleanType and type(json) == bool) or (type_ == ovs.db.types.StringType and type(json) in [str, unicode])): atom = Atom(type_, json) elif type_ == ovs.db.types.UuidType: atom = Atom(type_, ovs.ovsuuid.from_json(json, symtab)) else: raise error.Error("expected %s" % type_.to_string(), json) atom.check_constraints(base) return atom @staticmethod def from_python(base, value): value = ovs.db.parser.float_to_int(value) if type(value) in base.type.python_types: atom = Atom(base.type, value) else: raise error.Error("expected %s, got %s" % (base.type, type(value))) atom.check_constraints(base) return atom def check_constraints(self, base): """Checks whether 'atom' meets the constraints (if any) defined in 'base' and raises an ovs.db.error.Error if any constraint is violated. 'base' and 'atom' must have the same type. Checking UUID constraints is deferred to transaction commit time, so this function does nothing for UUID constraints.""" assert base.type == self.type if base.enum is not None and self not in base.enum: raise ConstraintViolation( "%s is not one of the allowed values (%s)" % (self.to_string(), base.enum.to_string())) elif base.type in [ovs.db.types.IntegerType, ovs.db.types.RealType]: if ((base.min is None or self.value >= base.min) and (base.max is None or self.value <= base.max)): pass elif base.min is not None and base.max is not None: raise ConstraintViolation( "%s is not in the valid range %.15g to %.15g (inclusive)" % (self.to_string(), base.min, base.max)) elif base.min is not None: raise ConstraintViolation( "%s is less than minimum allowed value %.15g" % (self.to_string(), base.min)) else: raise ConstraintViolation( "%s is greater than maximum allowed value %.15g" % (self.to_string(), base.max)) elif base.type == ovs.db.types.StringType: # XXX The C version validates that the string is valid UTF-8 here. # Do we need to do that in Python too? s = self.value length = len(s) if length < base.min_length: raise ConstraintViolation( '"%s" length %d is less than minimum allowed length %d' % (s, length, base.min_length)) elif length > base.max_length: raise ConstraintViolation( '"%s" length %d is greater than maximum allowed ' 'length %d' % (s, length, base.max_length)) def to_json(self): if self.type == ovs.db.types.UuidType: return ovs.ovsuuid.to_json(self.value) else: return self.value def cInitAtom(self, var): if self.type == ovs.db.types.IntegerType: return ['%s.integer = %d;' % (var, self.value)] elif self.type == ovs.db.types.RealType: return ['%s.real = %.15g;' % (var, self.value)] elif self.type == ovs.db.types.BooleanType: if self.value: return ['%s.boolean = true;'] else: return ['%s.boolean = false;'] elif self.type == ovs.db.types.StringType: return ['%s.string = xstrdup("%s");' % (var, escapeCString(self.value))] elif self.type == ovs.db.types.UuidType: return ovs.ovsuuid.to_c_assignment(self.value, var) def toEnglish(self, escapeLiteral=returnUnchanged): if self.type == ovs.db.types.IntegerType: return '%d' % self.value elif self.type == ovs.db.types.RealType: return '%.15g' % self.value elif self.type == ovs.db.types.BooleanType: if self.value: return 'true' else: return 'false' elif self.type == ovs.db.types.StringType: return escapeLiteral(self.value) elif self.type == ovs.db.types.UuidType: return self.value.value __need_quotes_re = re.compile("$|true|false|[^_a-zA-Z]|.*[^-._a-zA-Z]") @staticmethod def __string_needs_quotes(s): return Atom.__need_quotes_re.match(s) def to_string(self): if self.type == ovs.db.types.IntegerType: return '%d' % self.value elif self.type == ovs.db.types.RealType: return '%.15g' % self.value elif self.type == ovs.db.types.BooleanType: if self.value: return 'true' else: return 'false' elif self.type == ovs.db.types.StringType: if Atom.__string_needs_quotes(self.value): return ovs.json.to_string(self.value) else: return self.value elif self.type == ovs.db.types.UuidType: return str(self.value) @staticmethod def new(x): if type(x) in [int, long]: t = ovs.db.types.IntegerType elif type(x) == float: t = ovs.db.types.RealType elif x in [False, True]: t = ovs.db.types.BooleanType elif type(x) in [str, unicode]: t = ovs.db.types.StringType elif isinstance(x, uuid): t = ovs.db.types.UuidType else: raise TypeError return Atom(t, x) class Datum(object): def __init__(self, type_, values={}): self.type = type_ self.values = values def __cmp__(self, other): if not isinstance(other, Datum): return NotImplemented elif self.values < other.values: return -1 elif self.values > other.values: return 1 else: return 0 __hash__ = None def __contains__(self, item): return item in self.values def copy(self): return Datum(self.type, dict(self.values)) @staticmethod def default(type_): if type_.n_min == 0: values = {} elif type_.is_map(): values = {type_.key.default(): type_.value.default()} else: values = {type_.key.default(): None} return Datum(type_, values) def is_default(self): return self == Datum.default(self.type) def check_constraints(self): """Checks that each of the atoms in 'datum' conforms to the constraints specified by its 'type' and raises an ovs.db.error.Error. This function is not commonly useful because the most ordinary way to obtain a datum is ultimately via Datum.from_json() or Atom.from_json(), which check constraints themselves.""" for keyAtom, valueAtom in self.values.iteritems(): keyAtom.check_constraints(self.type.key) if valueAtom is not None: valueAtom.check_constraints(self.type.value) @staticmethod def from_json(type_, json, symtab=None): """Parses 'json' as a datum of the type described by 'type'. If successful, returns a new datum. On failure, raises an ovs.db.error.Error. Violations of constraints expressed by 'type' are treated as errors. If 'symtab' is nonnull, then named UUIDs in 'symtab' are accepted. Refer to ovsdb/SPECS for information about this, and for the syntax that this function accepts.""" is_map = type_.is_map() if (is_map or (type(json) == list and len(json) > 0 and json[0] == "set")): if is_map: class_ = "map" else: class_ = "set" inner = ovs.db.parser.unwrap_json(json, class_, [list, tuple], "array") n = len(inner) if n < type_.n_min or n > type_.n_max: raise error.Error("%s must have %d to %d members but %d are " "present" % (class_, type_.n_min, type_.n_max, n), json) values = {} for element in inner: if is_map: key, value = ovs.db.parser.parse_json_pair(element) keyAtom = Atom.from_json(type_.key, key, symtab) valueAtom = Atom.from_json(type_.value, value, symtab) else: keyAtom = Atom.from_json(type_.key, element, symtab) valueAtom = None if keyAtom in values: if is_map: raise error.Error("map contains duplicate key") else: raise error.Error("set contains duplicate") values[keyAtom] = valueAtom return Datum(type_, values) else: keyAtom = Atom.from_json(type_.key, json, symtab) return Datum(type_, {keyAtom: None}) def to_json(self): if self.type.is_map(): return ["map", [[k.to_json(), v.to_json()] for k, v in sorted(self.values.items())]] elif len(self.values) == 1: key = self.values.keys()[0] return key.to_json() else: return ["set", [k.to_json() for k in sorted(self.values.keys())]] def to_string(self): head = tail = None if self.type.n_max > 1 or len(self.values) == 0: if self.type.is_map(): head = "{" tail = "}" else: head = "[" tail = "]" s = [] if head: s.append(head) for i, key in enumerate(sorted(self.values)): if i: s.append(", ") s.append(key.to_string()) if self.type.is_map(): s.append("=") s.append(self.values[key].to_string()) if tail: s.append(tail) return ''.join(s) def as_list(self): if self.type.is_map(): return [[k.value, v.value] for k, v in self.values.iteritems()] else: return [k.value for k in self.values.iterkeys()] def as_dict(self): return dict(self.values) def as_scalar(self): if len(self.values) == 1: if self.type.is_map(): k, v = self.values.iteritems()[0] return [k.value, v.value] else: return self.values.keys()[0].value else: return None def to_python(self, uuid_to_row): """Returns this datum's value converted into a natural Python representation of this datum's type, according to the following rules: - If the type has exactly one value and it is not a map (that is, self.type.is_scalar() returns True), then the value is: * An int or long, for an integer column. * An int or long or float, for a real column. * A bool, for a boolean column. * A str or unicode object, for a string column. * A uuid.UUID object, for a UUID column without a ref_table. * An object represented the referenced row, for a UUID column with a ref_table. (For the Idl, this object will be an ovs.db.idl.Row object.) If some error occurs (e.g. the database server's idea of the column is different from the IDL's idea), then the default value for the scalar type is used (see Atom.default()). - Otherwise, if the type is not a map, then the value is a Python list whose elements have the types described above. - Otherwise, the type is a map, and the value is a Python dict that maps from key to value, with key and value types determined as described above. 'uuid_to_row' must be a function that takes a value and an ovs.db.types.BaseType and translates UUIDs into row objects.""" if self.type.is_scalar(): value = uuid_to_row(self.as_scalar(), self.type.key) if value is None: return self.type.key.default() else: return value elif self.type.is_map(): value = {} for k, v in self.values.iteritems(): dk = uuid_to_row(k.value, self.type.key) dv = uuid_to_row(v.value, self.type.value) if dk is not None and dv is not None: value[dk] = dv return value else: s = set() for k in self.values: dk = uuid_to_row(k.value, self.type.key) if dk is not None: s.add(dk) return sorted(s) @staticmethod def from_python(type_, value, row_to_uuid): """Returns a new Datum with the given ovs.db.types.Type 'type_'. The new datum's value is taken from 'value', which must take the form described as a valid return value from Datum.to_python() for 'type'. Each scalar value within 'value' is initially passed through 'row_to_uuid', which should convert objects that represent rows (if any) into uuid.UUID objects and return other data unchanged. Raises ovs.db.error.Error if 'value' is not in an appropriate form for 'type_'.""" d = {} if type(value) == dict: for k, v in value.iteritems(): ka = Atom.from_python(type_.key, row_to_uuid(k)) va = Atom.from_python(type_.value, row_to_uuid(v)) d[ka] = va elif type(value) in (list, tuple): for k in value: ka = Atom.from_python(type_.key, row_to_uuid(k)) d[ka] = None else: ka = Atom.from_python(type_.key, row_to_uuid(value)) d[ka] = None datum = Datum(type_, d) datum.check_constraints() if not datum.conforms_to_type(): raise error.Error("%d values when type requires between %d and %d" % (len(d), type_.n_min, type_.n_max)) return datum def __getitem__(self, key): if not isinstance(key, Atom): key = Atom.new(key) if not self.type.is_map(): raise IndexError elif key not in self.values: raise KeyError else: return self.values[key].value def get(self, key, default=None): if not isinstance(key, Atom): key = Atom.new(key) if key in self.values: return self.values[key].value else: return default def __str__(self): return self.to_string() def conforms_to_type(self): n = len(self.values) return self.type.n_min <= n <= self.type.n_max def cInitDatum(self, var): if len(self.values) == 0: return ["ovsdb_datum_init_empty(%s);" % var] s = ["%s->n = %d;" % (var, len(self.values))] s += ["%s->keys = xmalloc(%d * sizeof *%s->keys);" % (var, len(self.values), var)] for i, key in enumerate(sorted(self.values)): s += key.cInitAtom("%s->keys[%d]" % (var, i)) if self.type.value: s += ["%s->values = xmalloc(%d * sizeof *%s->values);" % (var, len(self.values), var)] for i, (key, value) in enumerate(sorted(self.values.items())): s += value.cInitAtom("%s->values[%d]" % (var, i)) else: s += ["%s->values = NULL;" % var] if len(self.values) > 1: s += ["ovsdb_datum_sort_assert(%s, OVSDB_TYPE_%s);" % (var, self.type.key.type.to_string().upper())] return s openvswitch-2.0.1+git20140120/python/ovs/db/error.py000066400000000000000000000022101226605124000216650ustar00rootroot00000000000000# Copyright (c) 2009, 2010, 2011 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import ovs.json class Error(Exception): def __init__(self, msg, json=None, tag=None): self.msg = msg self.json = json if tag is None: if json is None: self.tag = "ovsdb error" else: self.tag = "syntax error" else: self.tag = tag # Compose message. syntax = "" if self.json is not None: syntax = 'syntax "%s": ' % ovs.json.to_string(self.json) Exception.__init__(self, "%s%s: %s" % (syntax, self.tag, self.msg)) openvswitch-2.0.1+git20140120/python/ovs/db/idl.py000066400000000000000000001475121226605124000213230ustar00rootroot00000000000000# Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import uuid import ovs.jsonrpc import ovs.db.parser import ovs.db.schema from ovs.db import error import ovs.ovsuuid import ovs.poller import ovs.vlog vlog = ovs.vlog.Vlog("idl") __pychecker__ = 'no-classattr no-objattrs' class Idl: """Open vSwitch Database Interface Definition Language (OVSDB IDL). The OVSDB IDL maintains an in-memory replica of a database. It issues RPC requests to an OVSDB database server and parses the responses, converting raw JSON into data structures that are easier for clients to digest. The IDL also assists with issuing database transactions. The client creates a transaction, manipulates the IDL data structures, and commits or aborts the transaction. The IDL then composes and issues the necessary JSON-RPC requests and reports to the client whether the transaction completed successfully. The client is allowed to access the following attributes directly, in a read-only fashion: - 'tables': This is the 'tables' map in the ovs.db.schema.DbSchema provided to the Idl constructor. Each ovs.db.schema.TableSchema in the map is annotated with a new attribute 'rows', which is a dict from a uuid.UUID to a Row object. The client may directly read and write the Row objects referenced by the 'rows' map values. Refer to Row for more details. - 'change_seqno': A number that represents the IDL's state. When the IDL is updated (by Idl.run()), its value changes. The sequence number can occasionally change even if the database does not. This happens if the connection to the database drops and reconnects, which causes the database contents to be reloaded even if they didn't change. (It could also happen if the database server sends out a "change" that reflects what the IDL already thought was in the database. The database server is not supposed to do that, but bugs could in theory cause it to do so.) - 'lock_name': The name of the lock configured with Idl.set_lock(), or None if no lock is configured. - 'has_lock': True, if the IDL is configured to obtain a lock and owns that lock, and False otherwise. Locking and unlocking happens asynchronously from the database client's point of view, so the information is only useful for optimization (e.g. if the client doesn't have the lock then there's no point in trying to write to the database). - 'is_lock_contended': True, if the IDL is configured to obtain a lock but the database server has indicated that some other client already owns the requested lock, and False otherwise. - 'txn': The ovs.db.idl.Transaction object for the database transaction currently being constructed, if there is one, or None otherwise. """ def __init__(self, remote, schema): """Creates and returns a connection to the database named 'db_name' on 'remote', which should be in a form acceptable to ovs.jsonrpc.session.open(). The connection will maintain an in-memory replica of the remote database. 'schema' should be the schema for the remote database. The caller may have cut it down by removing tables or columns that are not of interest. The IDL will only replicate the tables and columns that remain. The caller may also add a attribute named 'alert' to selected remaining columns, setting its value to False; if so, then changes to those columns will not be considered changes to the database for the purpose of the return value of Idl.run() and Idl.change_seqno. This is useful for columns that the IDL's client will write but not read. As a convenience to users, 'schema' may also be an instance of the SchemaHelper class. The IDL uses and modifies 'schema' directly.""" assert isinstance(schema, SchemaHelper) schema = schema.get_idl_schema() self.tables = schema.tables self._db = schema self._session = ovs.jsonrpc.Session.open(remote) self._monitor_request_id = None self._last_seqno = None self.change_seqno = 0 # Database locking. self.lock_name = None # Name of lock we need, None if none. self.has_lock = False # Has db server said we have the lock? self.is_lock_contended = False # Has db server said we can't get lock? self._lock_request_id = None # JSON-RPC ID of in-flight lock request. # Transaction support. self.txn = None self._outstanding_txns = {} for table in schema.tables.itervalues(): for column in table.columns.itervalues(): if not hasattr(column, 'alert'): column.alert = True table.need_table = False table.rows = {} table.idl = self def close(self): """Closes the connection to the database. The IDL will no longer update.""" self._session.close() def run(self): """Processes a batch of messages from the database server. Returns True if the database as seen through the IDL changed, False if it did not change. The initial fetch of the entire contents of the remote database is considered to be one kind of change. If the IDL has been configured to acquire a database lock (with Idl.set_lock()), then successfully acquiring the lock is also considered to be a change. This function can return occasional false positives, that is, report that the database changed even though it didn't. This happens if the connection to the database drops and reconnects, which causes the database contents to be reloaded even if they didn't change. (It could also happen if the database server sends out a "change" that reflects what we already thought was in the database, but the database server is not supposed to do that.) As an alternative to checking the return value, the client may check for changes in self.change_seqno.""" assert not self.txn initial_change_seqno = self.change_seqno self._session.run() i = 0 while i < 50: i += 1 if not self._session.is_connected(): break seqno = self._session.get_seqno() if seqno != self._last_seqno: self._last_seqno = seqno self.__txn_abort_all() self.__send_monitor_request() if self.lock_name: self.__send_lock_request() break msg = self._session.recv() if msg is None: break if (msg.type == ovs.jsonrpc.Message.T_NOTIFY and msg.method == "update" and len(msg.params) == 2 and msg.params[0] == None): # Database contents changed. self.__parse_update(msg.params[1]) elif (msg.type == ovs.jsonrpc.Message.T_REPLY and self._monitor_request_id is not None and self._monitor_request_id == msg.id): # Reply to our "monitor" request. try: self.change_seqno += 1 self._monitor_request_id = None self.__clear() self.__parse_update(msg.result) except error.Error, e: vlog.err("%s: parse error in received schema: %s" % (self._session.get_name(), e)) self.__error() elif (msg.type == ovs.jsonrpc.Message.T_REPLY and self._lock_request_id is not None and self._lock_request_id == msg.id): # Reply to our "lock" request. self.__parse_lock_reply(msg.result) elif (msg.type == ovs.jsonrpc.Message.T_NOTIFY and msg.method == "locked"): # We got our lock. self.__parse_lock_notify(msg.params, True) elif (msg.type == ovs.jsonrpc.Message.T_NOTIFY and msg.method == "stolen"): # Someone else stole our lock. self.__parse_lock_notify(msg.params, False) elif msg.type == ovs.jsonrpc.Message.T_NOTIFY and msg.id == "echo": # Reply to our echo request. Ignore it. pass elif (msg.type in (ovs.jsonrpc.Message.T_ERROR, ovs.jsonrpc.Message.T_REPLY) and self.__txn_process_reply(msg)): # __txn_process_reply() did everything needed. pass else: # This can happen if a transaction is destroyed before we # receive the reply, so keep the log level low. vlog.dbg("%s: received unexpected %s message" % (self._session.get_name(), ovs.jsonrpc.Message.type_to_string(msg.type))) return initial_change_seqno != self.change_seqno def wait(self, poller): """Arranges for poller.block() to wake up when self.run() has something to do or when activity occurs on a transaction on 'self'.""" self._session.wait(poller) self._session.recv_wait(poller) def has_ever_connected(self): """Returns True, if the IDL successfully connected to the remote database and retrieved its contents (even if the connection subsequently dropped and is in the process of reconnecting). If so, then the IDL contains an atomic snapshot of the database's contents (but it might be arbitrarily old if the connection dropped). Returns False if the IDL has never connected or retrieved the database's contents. If so, the IDL is empty.""" return self.change_seqno != 0 def force_reconnect(self): """Forces the IDL to drop its connection to the database and reconnect. In the meantime, the contents of the IDL will not change.""" self._session.force_reconnect() def set_lock(self, lock_name): """If 'lock_name' is not None, configures the IDL to obtain the named lock from the database server and to avoid modifying the database when the lock cannot be acquired (that is, when another client has the same lock). If 'lock_name' is None, drops the locking requirement and releases the lock.""" assert not self.txn assert not self._outstanding_txns if self.lock_name and (not lock_name or lock_name != self.lock_name): # Release previous lock. self.__send_unlock_request() self.lock_name = None self.is_lock_contended = False if lock_name and not self.lock_name: # Acquire new lock. self.lock_name = lock_name self.__send_lock_request() def __clear(self): changed = False for table in self.tables.itervalues(): if table.rows: changed = True table.rows = {} if changed: self.change_seqno += 1 def __update_has_lock(self, new_has_lock): if new_has_lock and not self.has_lock: if self._monitor_request_id is None: self.change_seqno += 1 else: # We're waiting for a monitor reply, so don't signal that the # database changed. The monitor reply will increment # change_seqno anyhow. pass self.is_lock_contended = False self.has_lock = new_has_lock def __do_send_lock_request(self, method): self.__update_has_lock(False) self._lock_request_id = None if self._session.is_connected(): msg = ovs.jsonrpc.Message.create_request(method, [self.lock_name]) msg_id = msg.id self._session.send(msg) else: msg_id = None return msg_id def __send_lock_request(self): self._lock_request_id = self.__do_send_lock_request("lock") def __send_unlock_request(self): self.__do_send_lock_request("unlock") def __parse_lock_reply(self, result): self._lock_request_id = None got_lock = type(result) == dict and result.get("locked") is True self.__update_has_lock(got_lock) if not got_lock: self.is_lock_contended = True def __parse_lock_notify(self, params, new_has_lock): if (self.lock_name is not None and type(params) in (list, tuple) and params and params[0] == self.lock_name): self.__update_has_lock(self, new_has_lock) if not new_has_lock: self.is_lock_contended = True def __send_monitor_request(self): monitor_requests = {} for table in self.tables.itervalues(): monitor_requests[table.name] = {"columns": table.columns.keys()} msg = ovs.jsonrpc.Message.create_request( "monitor", [self._db.name, None, monitor_requests]) self._monitor_request_id = msg.id self._session.send(msg) def __parse_update(self, update): try: self.__do_parse_update(update) except error.Error, e: vlog.err("%s: error parsing update: %s" % (self._session.get_name(), e)) def __do_parse_update(self, table_updates): if type(table_updates) != dict: raise error.Error(" is not an object", table_updates) for table_name, table_update in table_updates.iteritems(): table = self.tables.get(table_name) if not table: raise error.Error(' includes unknown ' 'table "%s"' % table_name) if type(table_update) != dict: raise error.Error(' for table "%s" is not ' 'an object' % table_name, table_update) for uuid_string, row_update in table_update.iteritems(): if not ovs.ovsuuid.is_valid_string(uuid_string): raise error.Error(' for table "%s" ' 'contains bad UUID "%s" as member ' 'name' % (table_name, uuid_string), table_update) uuid = ovs.ovsuuid.from_string(uuid_string) if type(row_update) != dict: raise error.Error(' for table "%s" ' 'contains for %s that ' 'is not an object' % (table_name, uuid_string)) parser = ovs.db.parser.Parser(row_update, "row-update") old = parser.get_optional("old", [dict]) new = parser.get_optional("new", [dict]) parser.finish() if not old and not new: raise error.Error(' missing "old" and ' '"new" members', row_update) if self.__process_update(table, uuid, old, new): self.change_seqno += 1 def __process_update(self, table, uuid, old, new): """Returns True if a column changed, False otherwise.""" row = table.rows.get(uuid) changed = False if not new: # Delete row. if row: del table.rows[uuid] changed = True else: # XXX rate-limit vlog.warn("cannot delete missing row %s from table %s" % (uuid, table.name)) elif not old: # Insert row. if not row: row = self.__create_row(table, uuid) changed = True else: # XXX rate-limit vlog.warn("cannot add existing row %s to table %s" % (uuid, table.name)) if self.__row_update(table, row, new): changed = True else: if not row: row = self.__create_row(table, uuid) changed = True # XXX rate-limit vlog.warn("cannot modify missing row %s in table %s" % (uuid, table.name)) if self.__row_update(table, row, new): changed = True return changed def __row_update(self, table, row, row_json): changed = False for column_name, datum_json in row_json.iteritems(): column = table.columns.get(column_name) if not column: # XXX rate-limit vlog.warn("unknown column %s updating table %s" % (column_name, table.name)) continue try: datum = ovs.db.data.Datum.from_json(column.type, datum_json) except error.Error, e: # XXX rate-limit vlog.warn("error parsing column %s in table %s: %s" % (column_name, table.name, e)) continue if datum != row._data[column_name]: row._data[column_name] = datum if column.alert: changed = True else: # Didn't really change but the OVSDB monitor protocol always # includes every value in a row. pass return changed def __create_row(self, table, uuid): data = {} for column in table.columns.itervalues(): data[column.name] = ovs.db.data.Datum.default(column.type) row = table.rows[uuid] = Row(self, table, uuid, data) return row def __error(self): self._session.force_reconnect() def __txn_abort_all(self): while self._outstanding_txns: txn = self._outstanding_txns.popitem()[1] txn._status = Transaction.TRY_AGAIN def __txn_process_reply(self, msg): txn = self._outstanding_txns.pop(msg.id, None) if txn: txn._process_reply(msg) def _uuid_to_row(atom, base): if base.ref_table: return base.ref_table.rows.get(atom) else: return atom def _row_to_uuid(value): if type(value) == Row: return value.uuid else: return value class Row(object): """A row within an IDL. The client may access the following attributes directly: - 'uuid': a uuid.UUID object whose value is the row's database UUID. - An attribute for each column in the Row's table, named for the column, whose values are as returned by Datum.to_python() for the column's type. If some error occurs (e.g. the database server's idea of the column is different from the IDL's idea), then the attribute values is the "default" value return by Datum.default() for the column's type. (It is important to know this because the default value may violate constraints for the column's type, e.g. the default integer value is 0 even if column contraints require the column's value to be positive.) When a transaction is active, column attributes may also be assigned new values. Committing the transaction will then cause the new value to be stored into the database. *NOTE*: In the current implementation, the value of a column is a *copy* of the value in the database. This means that modifying its value directly will have no useful effect. For example, the following: row.mycolumn["a"] = "b" # don't do this will not change anything in the database, even after commit. To modify the column, instead assign the modified column value back to the column: d = row.mycolumn d["a"] = "b" row.mycolumn = d """ def __init__(self, idl, table, uuid, data): # All of the explicit references to self.__dict__ below are required # to set real attributes with invoking self.__getattr__(). self.__dict__["uuid"] = uuid self.__dict__["_idl"] = idl self.__dict__["_table"] = table # _data is the committed data. It takes the following values: # # - A dictionary that maps every column name to a Datum, if the row # exists in the committed form of the database. # # - None, if this row is newly inserted within the active transaction # and thus has no committed form. self.__dict__["_data"] = data # _changes describes changes to this row within the active transaction. # It takes the following values: # # - {}, the empty dictionary, if no transaction is active or if the # row has yet not been changed within this transaction. # # - A dictionary that maps a column name to its new Datum, if an # active transaction changes those columns' values. # # - A dictionary that maps every column name to a Datum, if the row # is newly inserted within the active transaction. # # - None, if this transaction deletes this row. self.__dict__["_changes"] = {} # A dictionary whose keys are the names of columns that must be # verified as prerequisites when the transaction commits. The values # in the dictionary are all None. self.__dict__["_prereqs"] = {} def __getattr__(self, column_name): assert self._changes is not None datum = self._changes.get(column_name) if datum is None: if self._data is None: raise AttributeError("%s instance has no attribute '%s'" % (self.__class__.__name__, column_name)) datum = self._data[column_name] return datum.to_python(_uuid_to_row) def __setattr__(self, column_name, value): assert self._changes is not None assert self._idl.txn column = self._table.columns[column_name] try: datum = ovs.db.data.Datum.from_python(column.type, value, _row_to_uuid) except error.Error, e: # XXX rate-limit vlog.err("attempting to write bad value to column %s (%s)" % (column_name, e)) return self._idl.txn._write(self, column, datum) def verify(self, column_name): """Causes the original contents of column 'column_name' in this row to be verified as a prerequisite to completing the transaction. That is, if 'column_name' changed in this row (or if this row was deleted) between the time that the IDL originally read its contents and the time that the transaction commits, then the transaction aborts and Transaction.commit() returns Transaction.TRY_AGAIN. The intention is that, to ensure that no transaction commits based on dirty reads, an application should call Row.verify() on each data item read as part of a read-modify-write operation. In some cases Row.verify() reduces to a no-op, because the current value of the column is already known: - If this row is a row created by the current transaction (returned by Transaction.insert()). - If the column has already been modified within the current transaction. Because of the latter property, always call Row.verify() *before* modifying the column, for a given read-modify-write. A transaction must be in progress.""" assert self._idl.txn assert self._changes is not None if not self._data or column_name in self._changes: return self._prereqs[column_name] = None def delete(self): """Deletes this row from its table. A transaction must be in progress.""" assert self._idl.txn assert self._changes is not None if self._data is None: del self._idl.txn._txn_rows[self.uuid] else: self._idl.txn._txn_rows[self.uuid] = self self.__dict__["_changes"] = None del self._table.rows[self.uuid] def increment(self, column_name): """Causes the transaction, when committed, to increment the value of 'column_name' within this row by 1. 'column_name' must have an integer type. After the transaction commits successfully, the client may retrieve the final (incremented) value of 'column_name' with Transaction.get_increment_new_value(). The client could accomplish something similar by reading and writing and verify()ing columns. However, increment() will never (by itself) cause a transaction to fail because of a verify error. The intended use is for incrementing the "next_cfg" column in the Open_vSwitch table.""" self._idl.txn._increment(self, column_name) def _uuid_name_from_uuid(uuid): return "row%s" % str(uuid).replace("-", "_") def _where_uuid_equals(uuid): return [["_uuid", "==", ["uuid", str(uuid)]]] class _InsertedRow(object): def __init__(self, op_index): self.op_index = op_index self.real = None class Transaction(object): """A transaction may modify the contents of a database by modifying the values of columns, deleting rows, inserting rows, or adding checks that columns in the database have not changed ("verify" operations), through Row methods. Reading and writing columns and inserting and deleting rows are all straightforward. The reasons to verify columns are less obvious. Verification is the key to maintaining transactional integrity. Because OVSDB handles multiple clients, it can happen that between the time that OVSDB client A reads a column and writes a new value, OVSDB client B has written that column. Client A's write should not ordinarily overwrite client B's, especially if the column in question is a "map" column that contains several more or less independent data items. If client A adds a "verify" operation before it writes the column, then the transaction fails in case client B modifies it first. Client A will then see the new value of the column and compose a new transaction based on the new contents written by client B. When a transaction is complete, which must be before the next call to Idl.run(), call Transaction.commit() or Transaction.abort(). The life-cycle of a transaction looks like this: 1. Create the transaction and record the initial sequence number: seqno = idl.change_seqno(idl) txn = Transaction(idl) 2. Modify the database with Row and Transaction methods. 3. Commit the transaction by calling Transaction.commit(). The first call to this function probably returns Transaction.INCOMPLETE. The client must keep calling again along as this remains true, calling Idl.run() in between to let the IDL do protocol processing. (If the client doesn't have anything else to do in the meantime, it can use Transaction.commit_block() to avoid having to loop itself.) 4. If the final status is Transaction.TRY_AGAIN, wait for Idl.change_seqno to change from the saved 'seqno' (it's possible that it's already changed, in which case the client should not wait at all), then start over from step 1. Only a call to Idl.run() will change the return value of Idl.change_seqno. (Transaction.commit_block() calls Idl.run().)""" # Status values that Transaction.commit() can return. UNCOMMITTED = "uncommitted" # Not yet committed or aborted. UNCHANGED = "unchanged" # Transaction didn't include any changes. INCOMPLETE = "incomplete" # Commit in progress, please wait. ABORTED = "aborted" # ovsdb_idl_txn_abort() called. SUCCESS = "success" # Commit successful. TRY_AGAIN = "try again" # Commit failed because a "verify" operation # reported an inconsistency, due to a network # problem, or other transient failure. Wait # for a change, then try again. NOT_LOCKED = "not locked" # Server hasn't given us the lock yet. ERROR = "error" # Commit failed due to a hard error. @staticmethod def status_to_string(status): """Converts one of the status values that Transaction.commit() can return into a human-readable string. (The status values are in fact such strings already, so there's nothing to do.)""" return status def __init__(self, idl): """Starts a new transaction on 'idl' (an instance of ovs.db.idl.Idl). A given Idl may only have a single active transaction at a time. A Transaction may modify the contents of a database by assigning new values to columns (attributes of Row), deleting rows (with Row.delete()), or inserting rows (with Transaction.insert()). It may also check that columns in the database have not changed with Row.verify(). When a transaction is complete (which must be before the next call to Idl.run()), call Transaction.commit() or Transaction.abort().""" assert idl.txn is None idl.txn = self self._request_id = None self.idl = idl self.dry_run = False self._txn_rows = {} self._status = Transaction.UNCOMMITTED self._error = None self._comments = [] self._commit_seqno = self.idl.change_seqno self._inc_row = None self._inc_column = None self._inserted_rows = {} # Map from UUID to _InsertedRow def add_comment(self, comment): """Appens 'comment' to the comments that will be passed to the OVSDB server when this transaction is committed. (The comment will be committed to the OVSDB log, which "ovsdb-tool show-log" can print in a relatively human-readable form.)""" self._comments.append(comment) def wait(self, poller): """Causes poll_block() to wake up if this transaction has completed committing.""" if self._status not in (Transaction.UNCOMMITTED, Transaction.INCOMPLETE): poller.immediate_wake() def _substitute_uuids(self, json): if type(json) in (list, tuple): if (len(json) == 2 and json[0] == 'uuid' and ovs.ovsuuid.is_valid_string(json[1])): uuid = ovs.ovsuuid.from_string(json[1]) row = self._txn_rows.get(uuid, None) if row and row._data is None: return ["named-uuid", _uuid_name_from_uuid(uuid)] else: return [self._substitute_uuids(elem) for elem in json] return json def __disassemble(self): self.idl.txn = None for row in self._txn_rows.itervalues(): if row._changes is None: row._table.rows[row.uuid] = row elif row._data is None: del row._table.rows[row.uuid] row.__dict__["_changes"] = {} row.__dict__["_prereqs"] = {} self._txn_rows = {} def commit(self): """Attempts to commit 'txn'. Returns the status of the commit operation, one of the following constants: Transaction.INCOMPLETE: The transaction is in progress, but not yet complete. The caller should call again later, after calling Idl.run() to let the IDL do OVSDB protocol processing. Transaction.UNCHANGED: The transaction is complete. (It didn't actually change the database, so the IDL didn't send any request to the database server.) Transaction.ABORTED: The caller previously called Transaction.abort(). Transaction.SUCCESS: The transaction was successful. The update made by the transaction (and possibly other changes made by other database clients) should already be visible in the IDL. Transaction.TRY_AGAIN: The transaction failed for some transient reason, e.g. because a "verify" operation reported an inconsistency or due to a network problem. The caller should wait for a change to the database, then compose a new transaction, and commit the new transaction. Use Idl.change_seqno to wait for a change in the database. It is important to use its value *before* the initial call to Transaction.commit() as the baseline for this purpose, because the change that one should wait for can happen after the initial call but before the call that returns Transaction.TRY_AGAIN, and using some other baseline value in that situation could cause an indefinite wait if the database rarely changes. Transaction.NOT_LOCKED: The transaction failed because the IDL has been configured to require a database lock (with Idl.set_lock()) but didn't get it yet or has already lost it. Committing a transaction rolls back all of the changes that it made to the IDL's copy of the database. If the transaction commits successfully, then the database server will send an update and, thus, the IDL will be updated with the committed changes.""" # The status can only change if we're the active transaction. # (Otherwise, our status will change only in Idl.run().) if self != self.idl.txn: return self._status # If we need a lock but don't have it, give up quickly. if self.idl.lock_name and not self.idl.has_lock(): self._status = Transaction.NOT_LOCKED self.__disassemble() return self._status operations = [self.idl._db.name] # Assert that we have the required lock (avoiding a race). if self.idl.lock_name: operations.append({"op": "assert", "lock": self.idl.lock_name}) # Add prerequisites and declarations of new rows. for row in self._txn_rows.itervalues(): if row._prereqs: rows = {} columns = [] for column_name in row._prereqs: columns.append(column_name) rows[column_name] = row._data[column_name].to_json() operations.append({"op": "wait", "table": row._table.name, "timeout": 0, "where": _where_uuid_equals(row.uuid), "until": "==", "columns": columns, "rows": [rows]}) # Add updates. any_updates = False for row in self._txn_rows.itervalues(): if row._changes is None: if row._table.is_root: operations.append({"op": "delete", "table": row._table.name, "where": _where_uuid_equals(row.uuid)}) any_updates = True else: # Let ovsdb-server decide whether to really delete it. pass elif row._changes: op = {"table": row._table.name} if row._data is None: op["op"] = "insert" op["uuid-name"] = _uuid_name_from_uuid(row.uuid) any_updates = True op_index = len(operations) - 1 self._inserted_rows[row.uuid] = _InsertedRow(op_index) else: op["op"] = "update" op["where"] = _where_uuid_equals(row.uuid) row_json = {} op["row"] = row_json for column_name, datum in row._changes.iteritems(): if row._data is not None or not datum.is_default(): row_json[column_name] = ( self._substitute_uuids(datum.to_json())) # If anything really changed, consider it an update. # We can't suppress not-really-changed values earlier # or transactions would become nonatomic (see the big # comment inside Transaction._write()). if (not any_updates and row._data is not None and row._data[column_name] != datum): any_updates = True if row._data is None or row_json: operations.append(op) # Add increment. if self._inc_row and any_updates: self._inc_index = len(operations) - 1 operations.append({"op": "mutate", "table": self._inc_row._table.name, "where": self._substitute_uuids( _where_uuid_equals(self._inc_row.uuid)), "mutations": [[self._inc_column, "+=", 1]]}) operations.append({"op": "select", "table": self._inc_row._table.name, "where": self._substitute_uuids( _where_uuid_equals(self._inc_row.uuid)), "columns": [self._inc_column]}) # Add comment. if self._comments: operations.append({"op": "comment", "comment": "\n".join(self._comments)}) # Dry run? if self.dry_run: operations.append({"op": "abort"}) if not any_updates: self._status = Transaction.UNCHANGED else: msg = ovs.jsonrpc.Message.create_request("transact", operations) self._request_id = msg.id if not self.idl._session.send(msg): self.idl._outstanding_txns[self._request_id] = self self._status = Transaction.INCOMPLETE else: self._status = Transaction.TRY_AGAIN self.__disassemble() return self._status def commit_block(self): """Attempts to commit this transaction, blocking until the commit either succeeds or fails. Returns the final commit status, which may be any Transaction.* value other than Transaction.INCOMPLETE. This function calls Idl.run() on this transaction'ss IDL, so it may cause Idl.change_seqno to change.""" while True: status = self.commit() if status != Transaction.INCOMPLETE: return status self.idl.run() poller = ovs.poller.Poller() self.idl.wait(poller) self.wait(poller) poller.block() def get_increment_new_value(self): """Returns the final (incremented) value of the column in this transaction that was set to be incremented by Row.increment. This transaction must have committed successfully.""" assert self._status == Transaction.SUCCESS return self._inc_new_value def abort(self): """Aborts this transaction. If Transaction.commit() has already been called then the transaction might get committed anyhow.""" self.__disassemble() if self._status in (Transaction.UNCOMMITTED, Transaction.INCOMPLETE): self._status = Transaction.ABORTED def get_error(self): """Returns a string representing this transaction's current status, suitable for use in log messages.""" if self._status != Transaction.ERROR: return Transaction.status_to_string(self._status) elif self._error: return self._error else: return "no error details available" def __set_error_json(self, json): if self._error is None: self._error = ovs.json.to_string(json) def get_insert_uuid(self, uuid): """Finds and returns the permanent UUID that the database assigned to a newly inserted row, given the UUID that Transaction.insert() assigned locally to that row. Returns None if 'uuid' is not a UUID assigned by Transaction.insert() or if it was assigned by that function and then deleted by Row.delete() within the same transaction. (Rows that are inserted and then deleted within a single transaction are never sent to the database server, so it never assigns them a permanent UUID.) This transaction must have completed successfully.""" assert self._status in (Transaction.SUCCESS, Transaction.UNCHANGED) inserted_row = self._inserted_rows.get(uuid) if inserted_row: return inserted_row.real return None def _increment(self, row, column): assert not self._inc_row self._inc_row = row self._inc_column = column def _write(self, row, column, datum): assert row._changes is not None txn = row._idl.txn # If this is a write-only column and the datum being written is the # same as the one already there, just skip the update entirely. This # is worth optimizing because we have a lot of columns that get # periodically refreshed into the database but don't actually change # that often. # # We don't do this for read/write columns because that would break # atomicity of transactions--some other client might have written a # different value in that column since we read it. (But if a whole # transaction only does writes of existing values, without making any # real changes, we will drop the whole transaction later in # ovsdb_idl_txn_commit().) if not column.alert and row._data.get(column.name) == datum: new_value = row._changes.get(column.name) if new_value is None or new_value == datum: return txn._txn_rows[row.uuid] = row row._changes[column.name] = datum.copy() def insert(self, table, new_uuid=None): """Inserts and returns a new row in 'table', which must be one of the ovs.db.schema.TableSchema objects in the Idl's 'tables' dict. The new row is assigned a provisional UUID. If 'uuid' is None then one is randomly generated; otherwise 'uuid' should specify a randomly generated uuid.UUID not otherwise in use. ovsdb-server will assign a different UUID when 'txn' is committed, but the IDL will replace any uses of the provisional UUID in the data to be to be committed by the UUID assigned by ovsdb-server.""" assert self._status == Transaction.UNCOMMITTED if new_uuid is None: new_uuid = uuid.uuid4() row = Row(self.idl, table, new_uuid, None) table.rows[row.uuid] = row self._txn_rows[row.uuid] = row return row def _process_reply(self, msg): if msg.type == ovs.jsonrpc.Message.T_ERROR: self._status = Transaction.ERROR elif type(msg.result) not in (list, tuple): # XXX rate-limit vlog.warn('reply to "transact" is not JSON array') else: hard_errors = False soft_errors = False lock_errors = False ops = msg.result for op in ops: if op is None: # This isn't an error in itself but indicates that some # prior operation failed, so make sure that we know about # it. soft_errors = True elif type(op) == dict: error = op.get("error") if error is not None: if error == "timed out": soft_errors = True elif error == "not owner": lock_errors = True elif error == "aborted": pass else: hard_errors = True self.__set_error_json(op) else: hard_errors = True self.__set_error_json(op) # XXX rate-limit vlog.warn("operation reply is not JSON null or object") if not soft_errors and not hard_errors and not lock_errors: if self._inc_row and not self.__process_inc_reply(ops): hard_errors = True for insert in self._inserted_rows.itervalues(): if not self.__process_insert_reply(insert, ops): hard_errors = True if hard_errors: self._status = Transaction.ERROR elif lock_errors: self._status = Transaction.NOT_LOCKED elif soft_errors: self._status = Transaction.TRY_AGAIN else: self._status = Transaction.SUCCESS @staticmethod def __check_json_type(json, types, name): if not json: # XXX rate-limit vlog.warn("%s is missing" % name) return False elif type(json) not in types: # XXX rate-limit vlog.warn("%s has unexpected type %s" % (name, type(json))) return False else: return True def __process_inc_reply(self, ops): if self._inc_index + 2 > len(ops): # XXX rate-limit vlog.warn("reply does not contain enough operations for " "increment (has %d, needs %d)" % (len(ops), self._inc_index + 2)) # We know that this is a JSON object because the loop in # __process_reply() already checked. mutate = ops[self._inc_index] count = mutate.get("count") if not Transaction.__check_json_type(count, (int, long), '"mutate" reply "count"'): return False if count != 1: # XXX rate-limit vlog.warn('"mutate" reply "count" is %d instead of 1' % count) return False select = ops[self._inc_index + 1] rows = select.get("rows") if not Transaction.__check_json_type(rows, (list, tuple), '"select" reply "rows"'): return False if len(rows) != 1: # XXX rate-limit vlog.warn('"select" reply "rows" has %d elements ' 'instead of 1' % len(rows)) return False row = rows[0] if not Transaction.__check_json_type(row, (dict,), '"select" reply row'): return False column = row.get(self._inc_column) if not Transaction.__check_json_type(column, (int, long), '"select" reply inc column'): return False self._inc_new_value = column return True def __process_insert_reply(self, insert, ops): if insert.op_index >= len(ops): # XXX rate-limit vlog.warn("reply does not contain enough operations " "for insert (has %d, needs %d)" % (len(ops), insert.op_index)) return False # We know that this is a JSON object because the loop in # __process_reply() already checked. reply = ops[insert.op_index] json_uuid = reply.get("uuid") if not Transaction.__check_json_type(json_uuid, (tuple, list), '"insert" reply "uuid"'): return False try: uuid_ = ovs.ovsuuid.from_json(json_uuid) except error.Error: # XXX rate-limit vlog.warn('"insert" reply "uuid" is not a JSON UUID') return False insert.real = uuid_ return True class SchemaHelper(object): """IDL Schema helper. This class encapsulates the logic required to generate schemas suitable for creating 'ovs.db.idl.Idl' objects. Clients should register columns they are interested in using register_columns(). When finished, the get_idl_schema() function may be called. The location on disk of the schema used may be found in the 'schema_location' variable.""" def __init__(self, location=None, schema_json=None): """Creates a new Schema object. 'location' file path to ovs schema. None means default location 'schema_json' schema in json preresentation in memory """ if location and schema_json: raise ValueError("both location and schema_json can't be " "specified. it's ambiguous.") if schema_json is None: if location is None: location = "%s/vswitch.ovsschema" % ovs.dirs.PKGDATADIR schema_json = ovs.json.from_file(location) self.schema_json = schema_json self._tables = {} self._all = False def register_columns(self, table, columns): """Registers interest in the given 'columns' of 'table'. Future calls to get_idl_schema() will include 'table':column for each column in 'columns'. This function automatically avoids adding duplicate entries to the schema. 'table' must be a string. 'columns' must be a list of strings. """ assert type(table) is str assert type(columns) is list columns = set(columns) | self._tables.get(table, set()) self._tables[table] = columns def register_table(self, table): """Registers interest in the given all columns of 'table'. Future calls to get_idl_schema() will include all columns of 'table'. 'table' must be a string """ assert type(table) is str self._tables[table] = set() # empty set means all columns in the table def register_all(self): """Registers interest in every column of every table.""" self._all = True def get_idl_schema(self): """Gets a schema appropriate for the creation of an 'ovs.db.id.IDL' object based on columns registered using the register_columns() function.""" schema = ovs.db.schema.DbSchema.from_json(self.schema_json) self.schema_json = None if not self._all: schema_tables = {} for table, columns in self._tables.iteritems(): schema_tables[table] = ( self._keep_table_columns(schema, table, columns)) schema.tables = schema_tables return schema def _keep_table_columns(self, schema, table_name, columns): assert table_name in schema.tables table = schema.tables[table_name] if not columns: # empty set means all columns in the table return table new_columns = {} for column_name in columns: assert type(column_name) is str assert column_name in table.columns new_columns[column_name] = table.columns[column_name] table.columns = new_columns return table openvswitch-2.0.1+git20140120/python/ovs/db/parser.py000066400000000000000000000064031226605124000220400ustar00rootroot00000000000000# Copyright (c) 2010, 2011 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import re from ovs.db import error class Parser(object): def __init__(self, json, name): self.name = name self.json = json if type(json) != dict: self.__raise_error("Object expected.") self.used = set() def __get(self, name, types, optional, default=None): if name in self.json: self.used.add(name) member = float_to_int(self.json[name]) if is_identifier(member) and "id" in types: return member if len(types) and type(member) not in types: self.__raise_error("Type mismatch for member '%s'." % name) return member else: if not optional: self.__raise_error("Required '%s' member is missing." % name) return default def get(self, name, types): return self.__get(name, types, False) def get_optional(self, name, types, default=None): return self.__get(name, types, True, default) def __raise_error(self, message): raise error.Error("Parsing %s failed: %s" % (self.name, message), self.json) def finish(self): missing = set(self.json) - set(self.used) if missing: name = missing.pop() if len(missing) > 1: present = "and %d other members are" % len(missing) elif missing: present = "and 1 other member are" else: present = "is" self.__raise_error("Member '%s' %s present but not allowed here" % (name, present)) def float_to_int(x): # XXX still needed? if type(x) == float: integer = int(x) if integer == x and -2 ** 53 <= integer < 2 ** 53: return integer return x id_re = re.compile("[_a-zA-Z][_a-zA-Z0-9]*$") def is_identifier(s): return type(s) in [str, unicode] and id_re.match(s) def json_type_to_string(type_): if type_ == None: return "null" elif type_ == bool: return "boolean" elif type_ == dict: return "object" elif type_ == list: return "array" elif type_ in [int, long, float]: return "number" elif type_ in [str, unicode]: return "string" else: return "" def unwrap_json(json, name, types, desc): if (type(json) not in (list, tuple) or len(json) != 2 or json[0] != name or type(json[1]) not in types): raise error.Error('expected ["%s", <%s>]' % (name, desc), json) return json[1] def parse_json_pair(json): if type(json) != list or len(json) != 2: raise error.Error("expected 2-element array", json) return json openvswitch-2.0.1+git20140120/python/ovs/db/schema.py000066400000000000000000000244621226605124000220110ustar00rootroot00000000000000# Copyright (c) 2009, 2010, 2011 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import re import sys from ovs.db import error import ovs.db.parser from ovs.db import types def _check_id(name, json): if name.startswith('_'): raise error.Error('names beginning with "_" are reserved', json) elif not ovs.db.parser.is_identifier(name): raise error.Error("name must be a valid id", json) class DbSchema(object): """Schema for an OVSDB database.""" def __init__(self, name, version, tables): self.name = name self.version = version self.tables = tables # "isRoot" was not part of the original schema definition. Before it # was added, there was no support for garbage collection. So, for # backward compatibility, if the root set is empty then assume that # every table is in the root set. if self.__root_set_size() == 0: for table in self.tables.itervalues(): table.is_root = True # Find the "ref_table"s referenced by "ref_table_name"s. # # Also force certain columns to be persistent, as explained in # __check_ref_table(). This requires 'is_root' to be known, so this # must follow the loop updating 'is_root' above. for table in self.tables.itervalues(): for column in table.columns.itervalues(): self.__follow_ref_table(column, column.type.key, "key") self.__follow_ref_table(column, column.type.value, "value") def __root_set_size(self): """Returns the number of tables in the schema's root set.""" n_root = 0 for table in self.tables.itervalues(): if table.is_root: n_root += 1 return n_root @staticmethod def from_json(json): parser = ovs.db.parser.Parser(json, "database schema") name = parser.get("name", ['id']) version = parser.get_optional("version", [str, unicode]) parser.get_optional("cksum", [str, unicode]) tablesJson = parser.get("tables", [dict]) parser.finish() if (version is not None and not re.match('[0-9]+\.[0-9]+\.[0-9]+$', version)): raise error.Error('schema version "%s" not in format x.y.z' % version) tables = {} for tableName, tableJson in tablesJson.iteritems(): _check_id(tableName, json) tables[tableName] = TableSchema.from_json(tableJson, tableName) return DbSchema(name, version, tables) def to_json(self): # "isRoot" was not part of the original schema definition. Before it # was added, there was no support for garbage collection. So, for # backward compatibility, if every table is in the root set then do not # output "isRoot" in table schemas. default_is_root = self.__root_set_size() == len(self.tables) tables = {} for table in self.tables.itervalues(): tables[table.name] = table.to_json(default_is_root) json = {"name": self.name, "tables": tables} if self.version: json["version"] = self.version return json def copy(self): return DbSchema.from_json(self.to_json()) def __follow_ref_table(self, column, base, base_name): if not base or base.type != types.UuidType or not base.ref_table_name: return base.ref_table = self.tables.get(base.ref_table_name) if not base.ref_table: raise error.Error("column %s %s refers to undefined table %s" % (column.name, base_name, base.ref_table_name), tag="syntax error") if base.is_strong_ref() and not base.ref_table.is_root: # We cannot allow a strong reference to a non-root table to be # ephemeral: if it is the only reference to a row, then replaying # the database log from disk will cause the referenced row to be # deleted, even though it did exist in memory. If there are # references to that row later in the log (to modify it, to delete # it, or just to point to it), then this will yield a transaction # error. column.persistent = True class IdlSchema(DbSchema): def __init__(self, name, version, tables, idlPrefix, idlHeader): DbSchema.__init__(self, name, version, tables) self.idlPrefix = idlPrefix self.idlHeader = idlHeader @staticmethod def from_json(json): parser = ovs.db.parser.Parser(json, "IDL schema") idlPrefix = parser.get("idlPrefix", [str, unicode]) idlHeader = parser.get("idlHeader", [str, unicode]) subjson = dict(json) del subjson["idlPrefix"] del subjson["idlHeader"] schema = DbSchema.from_json(subjson) return IdlSchema(schema.name, schema.version, schema.tables, idlPrefix, idlHeader) def column_set_from_json(json, columns): if json is None: return tuple(columns) elif type(json) != list: raise error.Error("array of distinct column names expected", json) else: for column_name in json: if type(column_name) not in [str, unicode]: raise error.Error("array of distinct column names expected", json) elif column_name not in columns: raise error.Error("%s is not a valid column name" % column_name, json) if len(set(json)) != len(json): # Duplicate. raise error.Error("array of distinct column names expected", json) return tuple([columns[column_name] for column_name in json]) class TableSchema(object): def __init__(self, name, columns, mutable=True, max_rows=sys.maxint, is_root=True, indexes=[]): self.name = name self.columns = columns self.mutable = mutable self.max_rows = max_rows self.is_root = is_root self.indexes = indexes @staticmethod def from_json(json, name): parser = ovs.db.parser.Parser(json, "table schema for table %s" % name) columns_json = parser.get("columns", [dict]) mutable = parser.get_optional("mutable", [bool], True) max_rows = parser.get_optional("maxRows", [int]) is_root = parser.get_optional("isRoot", [bool], False) indexes_json = parser.get_optional("indexes", [list], []) parser.finish() if max_rows == None: max_rows = sys.maxint elif max_rows <= 0: raise error.Error("maxRows must be at least 1", json) if not columns_json: raise error.Error("table must have at least one column", json) columns = {} for column_name, column_json in columns_json.iteritems(): _check_id(column_name, json) columns[column_name] = ColumnSchema.from_json(column_json, column_name) indexes = [] for index_json in indexes_json: index = column_set_from_json(index_json, columns) if not index: raise error.Error("index must have at least one column", json) elif len(index) == 1: index[0].unique = True for column in index: if not column.persistent: raise error.Error("ephemeral columns (such as %s) may " "not be indexed" % column.name, json) indexes.append(index) return TableSchema(name, columns, mutable, max_rows, is_root, indexes) def to_json(self, default_is_root=False): """Returns this table schema serialized into JSON. The "isRoot" member is included in the JSON only if its value would differ from 'default_is_root'. Ordinarily 'default_is_root' should be false, because ordinarily a table would be not be part of the root set if its "isRoot" member is omitted. However, garbage collection was not originally included in OVSDB, so in older schemas that do not include any "isRoot" members, every table is implicitly part of the root set. To serialize such a schema in a way that can be read by older OVSDB tools, specify 'default_is_root' as True. """ json = {} if not self.mutable: json["mutable"] = False if default_is_root != self.is_root: json["isRoot"] = self.is_root json["columns"] = columns = {} for column in self.columns.itervalues(): if not column.name.startswith("_"): columns[column.name] = column.to_json() if self.max_rows != sys.maxint: json["maxRows"] = self.max_rows if self.indexes: json["indexes"] = [] for index in self.indexes: json["indexes"].append([column.name for column in index]) return json class ColumnSchema(object): def __init__(self, name, mutable, persistent, type_): self.name = name self.mutable = mutable self.persistent = persistent self.type = type_ self.unique = False @staticmethod def from_json(json, name): parser = ovs.db.parser.Parser(json, "schema for column %s" % name) mutable = parser.get_optional("mutable", [bool], True) ephemeral = parser.get_optional("ephemeral", [bool], False) type_ = types.Type.from_json(parser.get("type", [dict, str, unicode])) parser.finish() return ColumnSchema(name, mutable, not ephemeral, type_) def to_json(self): json = {"type": self.type.to_json()} if not self.mutable: json["mutable"] = False if not self.persistent: json["ephemeral"] = True return json openvswitch-2.0.1+git20140120/python/ovs/db/types.py000066400000000000000000000523371226605124000217170ustar00rootroot00000000000000# Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import sys import uuid from ovs.db import error import ovs.db.parser import ovs.db.data import ovs.ovsuuid class AtomicType(object): def __init__(self, name, default, python_types): self.name = name self.default = default self.python_types = python_types @staticmethod def from_string(s): if s != "void": for atomic_type in ATOMIC_TYPES: if s == atomic_type.name: return atomic_type raise error.Error('"%s" is not an atomic-type' % s, s) @staticmethod def from_json(json): if type(json) not in [str, unicode]: raise error.Error("atomic-type expected", json) else: return AtomicType.from_string(json) def __str__(self): return self.name def to_string(self): return self.name def to_json(self): return self.name def default_atom(self): return ovs.db.data.Atom(self, self.default) VoidType = AtomicType("void", None, ()) IntegerType = AtomicType("integer", 0, (int, long)) RealType = AtomicType("real", 0.0, (int, long, float)) BooleanType = AtomicType("boolean", False, (bool,)) StringType = AtomicType("string", "", (str, unicode)) UuidType = AtomicType("uuid", ovs.ovsuuid.zero(), (uuid.UUID,)) ATOMIC_TYPES = [VoidType, IntegerType, RealType, BooleanType, StringType, UuidType] def escapeCString(src): dst = "" for c in src: if c in "\\\"": dst += "\\" + c elif ord(c) < 32: if c == '\n': dst += '\\n' elif c == '\r': dst += '\\r' elif c == '\a': dst += '\\a' elif c == '\b': dst += '\\b' elif c == '\f': dst += '\\f' elif c == '\t': dst += '\\t' elif c == '\v': dst += '\\v' else: dst += '\\%03o' % ord(c) else: dst += c return dst def commafy(x): """Returns integer x formatted in decimal with thousands set off by commas.""" return _commafy("%d" % x) def _commafy(s): if s.startswith('-'): return '-' + _commafy(s[1:]) elif len(s) <= 3: return s else: return _commafy(s[:-3]) + ',' + _commafy(s[-3:]) def returnUnchanged(x): return x class BaseType(object): def __init__(self, type_, enum=None, min=None, max=None, min_length=0, max_length=sys.maxint, ref_table_name=None): assert isinstance(type_, AtomicType) self.type = type_ self.enum = enum self.min = min self.max = max self.min_length = min_length self.max_length = max_length self.ref_table_name = ref_table_name if ref_table_name: self.ref_type = 'strong' else: self.ref_type = None self.ref_table = None def default(self): return ovs.db.data.Atom.default(self.type) def __eq__(self, other): if not isinstance(other, BaseType): return NotImplemented return (self.type == other.type and self.enum == other.enum and self.min == other.min and self.max == other.max and self.min_length == other.min_length and self.max_length == other.max_length and self.ref_table_name == other.ref_table_name) def __ne__(self, other): if not isinstance(other, BaseType): return NotImplemented else: return not (self == other) @staticmethod def __parse_uint(parser, name, default): value = parser.get_optional(name, [int, long]) if value is None: value = default else: max_value = 2 ** 32 - 1 if not (0 <= value <= max_value): raise error.Error("%s out of valid range 0 to %d" % (name, max_value), value) return value @staticmethod def from_json(json): if type(json) in [str, unicode]: return BaseType(AtomicType.from_json(json)) parser = ovs.db.parser.Parser(json, "ovsdb type") atomic_type = AtomicType.from_json(parser.get("type", [str, unicode])) base = BaseType(atomic_type) enum = parser.get_optional("enum", []) if enum is not None: base.enum = ovs.db.data.Datum.from_json( BaseType.get_enum_type(base.type), enum) elif base.type == IntegerType: base.min = parser.get_optional("minInteger", [int, long]) base.max = parser.get_optional("maxInteger", [int, long]) if (base.min is not None and base.max is not None and base.min > base.max): raise error.Error("minInteger exceeds maxInteger", json) elif base.type == RealType: base.min = parser.get_optional("minReal", [int, long, float]) base.max = parser.get_optional("maxReal", [int, long, float]) if (base.min is not None and base.max is not None and base.min > base.max): raise error.Error("minReal exceeds maxReal", json) elif base.type == StringType: base.min_length = BaseType.__parse_uint(parser, "minLength", 0) base.max_length = BaseType.__parse_uint(parser, "maxLength", sys.maxint) if base.min_length > base.max_length: raise error.Error("minLength exceeds maxLength", json) elif base.type == UuidType: base.ref_table_name = parser.get_optional("refTable", ['id']) if base.ref_table_name: base.ref_type = parser.get_optional("refType", [str, unicode], "strong") if base.ref_type not in ['strong', 'weak']: raise error.Error('refType must be "strong" or "weak" ' '(not "%s")' % base.ref_type) parser.finish() return base def to_json(self): if not self.has_constraints(): return self.type.to_json() json = {'type': self.type.to_json()} if self.enum: json['enum'] = self.enum.to_json() if self.type == IntegerType: if self.min is not None: json['minInteger'] = self.min if self.max is not None: json['maxInteger'] = self.max elif self.type == RealType: if self.min is not None: json['minReal'] = self.min if self.max is not None: json['maxReal'] = self.max elif self.type == StringType: if self.min_length != 0: json['minLength'] = self.min_length if self.max_length != sys.maxint: json['maxLength'] = self.max_length elif self.type == UuidType: if self.ref_table_name: json['refTable'] = self.ref_table_name if self.ref_type != 'strong': json['refType'] = self.ref_type return json def copy(self): base = BaseType(self.type, self.enum.copy(), self.min, self.max, self.min_length, self.max_length, self.ref_table_name) base.ref_table = self.ref_table return base def is_valid(self): if self.type in (VoidType, BooleanType, UuidType): return True elif self.type in (IntegerType, RealType): return self.min is None or self.max is None or self.min <= self.max elif self.type == StringType: return self.min_length <= self.max_length else: return False def has_constraints(self): return (self.enum is not None or self.min is not None or self.max is not None or self.min_length != 0 or self.max_length != sys.maxint or self.ref_table_name is not None) def without_constraints(self): return BaseType(self.type) @staticmethod def get_enum_type(atomic_type): """Returns the type of the 'enum' member for a BaseType whose 'type' is 'atomic_type'.""" return Type(BaseType(atomic_type), None, 1, sys.maxint) def is_ref(self): return self.type == UuidType and self.ref_table_name is not None def is_strong_ref(self): return self.is_ref() and self.ref_type == 'strong' def is_weak_ref(self): return self.is_ref() and self.ref_type == 'weak' def toEnglish(self, escapeLiteral=returnUnchanged): if self.type == UuidType and self.ref_table_name: s = escapeLiteral(self.ref_table_name) if self.ref_type == 'weak': s = "weak reference to " + s return s else: return self.type.to_string() def constraintsToEnglish(self, escapeLiteral=returnUnchanged, escapeNumber=returnUnchanged): if self.enum: literals = [value.toEnglish(escapeLiteral) for value in self.enum.values] if len(literals) == 1: english = 'must be %s' % (literals[0]) elif len(literals) == 2: english = 'either %s or %s' % (literals[0], literals[1]) else: english = 'one of %s, %s, or %s' % (literals[0], ', '.join(literals[1:-1]), literals[-1]) elif self.min is not None and self.max is not None: if self.type == IntegerType: english = 'in range %s to %s' % ( escapeNumber(commafy(self.min)), escapeNumber(commafy(self.max))) else: english = 'in range %s to %s' % ( escapeNumber("%g" % self.min), escapeNumber("%g" % self.max)) elif self.min is not None: if self.type == IntegerType: english = 'at least %s' % escapeNumber(commafy(self.min)) else: english = 'at least %s' % escapeNumber("%g" % self.min) elif self.max is not None: if self.type == IntegerType: english = 'at most %s' % escapeNumber(commafy(self.max)) else: english = 'at most %s' % escapeNumber("%g" % self.max) elif self.min_length != 0 and self.max_length != sys.maxint: if self.min_length == self.max_length: english = ('exactly %s characters long' % commafy(self.min_length)) else: english = ('between %s and %s characters long' % (commafy(self.min_length), commafy(self.max_length))) elif self.min_length != 0: return 'at least %s characters long' % commafy(self.min_length) elif self.max_length != sys.maxint: english = 'at most %s characters long' % commafy(self.max_length) else: english = '' return english def toCType(self, prefix): if self.ref_table_name: return "struct %s%s *" % (prefix, self.ref_table_name.lower()) else: return {IntegerType: 'int64_t ', RealType: 'double ', UuidType: 'struct uuid ', BooleanType: 'bool ', StringType: 'char *'}[self.type] def toAtomicType(self): return "OVSDB_TYPE_%s" % self.type.to_string().upper() def copyCValue(self, dst, src): args = {'dst': dst, 'src': src} if self.ref_table_name: return ("%(dst)s = %(src)s->header_.uuid;") % args elif self.type == StringType: return "%(dst)s = xstrdup(%(src)s);" % args else: return "%(dst)s = %(src)s;" % args def assign_c_value_casting_away_const(self, dst, src): args = {'dst': dst, 'src': src} if self.ref_table_name: return ("%(dst)s = %(src)s->header_.uuid;") % args elif self.type == StringType: return "%(dst)s = CONST_CAST(char *, %(src)s);" % args else: return "%(dst)s = %(src)s;" % args def initCDefault(self, var, is_optional): if self.ref_table_name: return "%s = NULL;" % var elif self.type == StringType and not is_optional: return '%s = "";' % var else: pattern = {IntegerType: '%s = 0;', RealType: '%s = 0.0;', UuidType: 'uuid_zero(&%s);', BooleanType: '%s = false;', StringType: '%s = NULL;'}[self.type] return pattern % var def cInitBaseType(self, indent, var): stmts = [] stmts.append('ovsdb_base_type_init(&%s, %s);' % ( var, self.toAtomicType())) if self.enum: stmts.append("%s.enum_ = xmalloc(sizeof *%s.enum_);" % (var, var)) stmts += self.enum.cInitDatum("%s.enum_" % var) if self.type == IntegerType: if self.min is not None: stmts.append('%s.u.integer.min = INT64_C(%d);' % (var, self.min)) if self.max is not None: stmts.append('%s.u.integer.max = INT64_C(%d);' % (var, self.max)) elif self.type == RealType: if self.min is not None: stmts.append('%s.u.real.min = %d;' % (var, self.min)) if self.max is not None: stmts.append('%s.u.real.max = %d;' % (var, self.max)) elif self.type == StringType: if self.min_length is not None: stmts.append('%s.u.string.minLen = %d;' % (var, self.min_length)) if self.max_length != sys.maxint: stmts.append('%s.u.string.maxLen = %d;' % (var, self.max_length)) elif self.type == UuidType: if self.ref_table_name is not None: stmts.append('%s.u.uuid.refTableName = "%s";' % (var, escapeCString(self.ref_table_name))) stmts.append('%s.u.uuid.refType = OVSDB_REF_%s;' % (var, self.ref_type.upper())) return '\n'.join([indent + stmt for stmt in stmts]) class Type(object): DEFAULT_MIN = 1 DEFAULT_MAX = 1 def __init__(self, key, value=None, n_min=DEFAULT_MIN, n_max=DEFAULT_MAX): self.key = key self.value = value self.n_min = n_min self.n_max = n_max def copy(self): if self.value is None: value = None else: value = self.value.copy() return Type(self.key.copy(), value, self.n_min, self.n_max) def __eq__(self, other): if not isinstance(other, Type): return NotImplemented return (self.key == other.key and self.value == other.value and self.n_min == other.n_min and self.n_max == other.n_max) def __ne__(self, other): if not isinstance(other, Type): return NotImplemented else: return not (self == other) def is_valid(self): return (self.key.type != VoidType and self.key.is_valid() and (self.value is None or (self.value.type != VoidType and self.value.is_valid())) and self.n_min <= 1 <= self.n_max) def is_scalar(self): return self.n_min == 1 and self.n_max == 1 and not self.value def is_optional(self): return self.n_min == 0 and self.n_max == 1 def is_composite(self): return self.n_max > 1 def is_set(self): return self.value is None and (self.n_min != 1 or self.n_max != 1) def is_map(self): return self.value is not None def is_smap(self): return (self.is_map() and self.key.type == StringType and self.value.type == StringType) def is_optional_pointer(self): return (self.is_optional() and not self.value and (self.key.type == StringType or self.key.ref_table_name)) @staticmethod def __n_from_json(json, default): if json is None: return default elif type(json) == int and 0 <= json <= sys.maxint: return json else: raise error.Error("bad min or max value", json) @staticmethod def from_json(json): if type(json) in [str, unicode]: return Type(BaseType.from_json(json)) parser = ovs.db.parser.Parser(json, "ovsdb type") key_json = parser.get("key", [dict, str, unicode]) value_json = parser.get_optional("value", [dict, str, unicode]) min_json = parser.get_optional("min", [int]) max_json = parser.get_optional("max", [int, str, unicode]) parser.finish() key = BaseType.from_json(key_json) if value_json: value = BaseType.from_json(value_json) else: value = None n_min = Type.__n_from_json(min_json, Type.DEFAULT_MIN) if max_json == 'unlimited': n_max = sys.maxint else: n_max = Type.__n_from_json(max_json, Type.DEFAULT_MAX) type_ = Type(key, value, n_min, n_max) if not type_.is_valid(): raise error.Error("ovsdb type fails constraint checks", json) return type_ def to_json(self): if self.is_scalar() and not self.key.has_constraints(): return self.key.to_json() json = {"key": self.key.to_json()} if self.value is not None: json["value"] = self.value.to_json() if self.n_min != Type.DEFAULT_MIN: json["min"] = self.n_min if self.n_max == sys.maxint: json["max"] = "unlimited" elif self.n_max != Type.DEFAULT_MAX: json["max"] = self.n_max return json def toEnglish(self, escapeLiteral=returnUnchanged): keyName = self.key.toEnglish(escapeLiteral) if self.value: valueName = self.value.toEnglish(escapeLiteral) if self.is_scalar(): return keyName elif self.is_optional(): if self.value: return "optional %s-%s pair" % (keyName, valueName) else: return "optional %s" % keyName else: if self.n_max == sys.maxint: if self.n_min: quantity = "%s or more " % commafy(self.n_min) else: quantity = "" elif self.n_min: quantity = "%s to %s " % (commafy(self.n_min), commafy(self.n_max)) else: quantity = "up to %s " % commafy(self.n_max) if self.value: return "map of %s%s-%s pairs" % (quantity, keyName, valueName) else: if keyName.endswith('s'): plural = keyName + "es" else: plural = keyName + "s" return "set of %s%s" % (quantity, plural) def constraintsToEnglish(self, escapeLiteral=returnUnchanged, escapeNumber=returnUnchanged): constraints = [] keyConstraints = self.key.constraintsToEnglish(escapeLiteral, escapeNumber) if keyConstraints: if self.value: constraints.append('key %s' % keyConstraints) else: constraints.append(keyConstraints) if self.value: valueConstraints = self.value.constraintsToEnglish(escapeLiteral, escapeNumber) if valueConstraints: constraints.append('value %s' % valueConstraints) return ', '.join(constraints) def cDeclComment(self): if self.n_min == 1 and self.n_max == 1 and self.key.type == StringType: return "\t/* Always nonnull. */" else: return "" def cInitType(self, indent, var): initKey = self.key.cInitBaseType(indent, "%s.key" % var) if self.value: initValue = self.value.cInitBaseType(indent, "%s.value" % var) else: initValue = ('%sovsdb_base_type_init(&%s.value, ' 'OVSDB_TYPE_VOID);' % (indent, var)) initMin = "%s%s.n_min = %s;" % (indent, var, self.n_min) if self.n_max == sys.maxint: n_max = "UINT_MAX" else: n_max = self.n_max initMax = "%s%s.n_max = %s;" % (indent, var, n_max) return "\n".join((initKey, initValue, initMin, initMax)) openvswitch-2.0.1+git20140120/python/ovs/dirs.py000066400000000000000000000007571226605124000211260ustar00rootroot00000000000000import os PKGDATADIR = os.environ.get("OVS_PKGDATADIR", """/usr/local/share/openvswitch""") RUNDIR = os.environ.get("OVS_RUNDIR", """/var/run""") LOGDIR = os.environ.get("OVS_LOGDIR", """/usr/local/var/log""") BINDIR = os.environ.get("OVS_BINDIR", """/usr/local/bin""") DBDIR = os.environ.get("OVS_DBDIR") if not DBDIR: sysconfdir = os.environ.get("OVS_SYSCONFDIR") if sysconfdir: DBDIR = "%s/openvswitch" % sysconfdir else: DBDIR = """/usr/local/etc/openvswitch""" openvswitch-2.0.1+git20140120/python/ovs/dirs.py.template000066400000000000000000000012271226605124000227310ustar00rootroot00000000000000## The @variables@ in this file are replaced by default directories for ## use in python/ovs/dirs.py in the source directory and replaced by the ## configured directories for use in the installed python/ovs/dirs.py. ## import os PKGDATADIR = os.environ.get("OVS_PKGDATADIR", """@pkgdatadir@""") RUNDIR = os.environ.get("OVS_RUNDIR", """@RUNDIR@""") LOGDIR = os.environ.get("OVS_LOGDIR", """@LOGDIR@""") BINDIR = os.environ.get("OVS_BINDIR", """@bindir@""") DBDIR = os.environ.get("OVS_DBDIR") if not DBDIR: sysconfdir = os.environ.get("OVS_SYSCONFDIR") if sysconfdir: DBDIR = "%s/openvswitch" % sysconfdir else: DBDIR = """@DBDIR@""" openvswitch-2.0.1+git20140120/python/ovs/fatal_signal.py000066400000000000000000000065701226605124000226100ustar00rootroot00000000000000# Copyright (c) 2010, 2011 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import atexit import os import signal import ovs.vlog _hooks = [] vlog = ovs.vlog.Vlog("fatal-signal") def add_hook(hook, cancel, run_at_exit): _init() _hooks.append((hook, cancel, run_at_exit)) def fork(): """Clears all of the fatal signal hooks without executing them. If any of the hooks passed a 'cancel' function to add_hook(), then those functions will be called, allowing them to free resources, etc. Following a fork, one of the resulting processes can call this function to allow it to terminate without calling the hooks registered before calling this function. New hooks registered after calling this function will take effect normally.""" global _hooks for hook, cancel, run_at_exit in _hooks: if cancel: cancel() _hooks = [] _added_hook = False _files = {} def add_file_to_unlink(file): """Registers 'file' to be unlinked when the program terminates via sys.exit() or a fatal signal.""" global _added_hook if not _added_hook: _added_hook = True add_hook(_unlink_files, _cancel_files, True) _files[file] = None def remove_file_to_unlink(file): """Unregisters 'file' from being unlinked when the program terminates via sys.exit() or a fatal signal.""" if file in _files: del _files[file] def unlink_file_now(file): """Like fatal_signal_remove_file_to_unlink(), but also unlinks 'file'. Returns 0 if successful, otherwise a positive errno value.""" error = _unlink(file) if error: vlog.warn("could not unlink \"%s\" (%s)" % (file, os.strerror(error))) remove_file_to_unlink(file) return error def _unlink_files(): for file_ in _files: _unlink(file_) def _cancel_files(): global _added_hook global _files _added_hook = False _files = {} def _unlink(file_): try: os.unlink(file_) return 0 except OSError, e: return e.errno def _signal_handler(signr, _): _call_hooks(signr) # Re-raise the signal with the default handling so that the program # termination status reflects that we were killed by this signal. signal.signal(signr, signal.SIG_DFL) os.kill(os.getpid(), signr) def _atexit_handler(): _call_hooks(0) recurse = False def _call_hooks(signr): global recurse if recurse: return recurse = True for hook, cancel, run_at_exit in _hooks: if signr != 0 or run_at_exit: hook() _inited = False def _init(): global _inited if not _inited: _inited = True for signr in (signal.SIGTERM, signal.SIGINT, signal.SIGHUP, signal.SIGALRM): if signal.getsignal(signr) == signal.SIG_DFL: signal.signal(signr, _signal_handler) atexit.register(_atexit_handler) openvswitch-2.0.1+git20140120/python/ovs/json.py000066400000000000000000000443161226605124000211350ustar00rootroot00000000000000# Copyright (c) 2010, 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import re import StringIO import sys __pychecker__ = 'no-stringiter' escapes = {ord('"'): u"\\\"", ord("\\"): u"\\\\", ord("\b"): u"\\b", ord("\f"): u"\\f", ord("\n"): u"\\n", ord("\r"): u"\\r", ord("\t"): u"\\t"} for esc in range(32): if esc not in escapes: escapes[esc] = u"\\u%04x" % esc SPACES_PER_LEVEL = 2 class _Serializer(object): def __init__(self, stream, pretty, sort_keys): self.stream = stream self.pretty = pretty self.sort_keys = sort_keys self.depth = 0 def __serialize_string(self, s): self.stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s)) def __indent_line(self): if self.pretty: self.stream.write('\n') self.stream.write(' ' * (SPACES_PER_LEVEL * self.depth)) def serialize(self, obj): if obj is None: self.stream.write(u"null") elif obj is False: self.stream.write(u"false") elif obj is True: self.stream.write(u"true") elif type(obj) in (int, long): self.stream.write(u"%d" % obj) elif type(obj) == float: self.stream.write("%.15g" % obj) elif type(obj) == unicode: self.__serialize_string(obj) elif type(obj) == str: self.__serialize_string(unicode(obj)) elif type(obj) == dict: self.stream.write(u"{") self.depth += 1 self.__indent_line() if self.sort_keys: items = sorted(obj.items()) else: items = obj.iteritems() for i, (key, value) in enumerate(items): if i > 0: self.stream.write(u",") self.__indent_line() self.__serialize_string(unicode(key)) self.stream.write(u":") if self.pretty: self.stream.write(u' ') self.serialize(value) self.stream.write(u"}") self.depth -= 1 elif type(obj) in (list, tuple): self.stream.write(u"[") self.depth += 1 if obj: self.__indent_line() for i, value in enumerate(obj): if i > 0: self.stream.write(u",") self.__indent_line() self.serialize(value) self.depth -= 1 self.stream.write(u"]") else: raise Exception("can't serialize %s as JSON" % obj) def to_stream(obj, stream, pretty=False, sort_keys=True): _Serializer(stream, pretty, sort_keys).serialize(obj) def to_file(obj, name, pretty=False, sort_keys=True): stream = open(name, "w") try: to_stream(obj, stream, pretty, sort_keys) finally: stream.close() def to_string(obj, pretty=False, sort_keys=True): output = StringIO.StringIO() to_stream(obj, output, pretty, sort_keys) s = output.getvalue() output.close() return s def from_stream(stream): p = Parser(check_trailer=True) while True: buf = stream.read(4096) if buf == "" or p.feed(buf) != len(buf): break return p.finish() def from_file(name): stream = open(name, "r") try: return from_stream(stream) finally: stream.close() def from_string(s): try: s = unicode(s, 'utf-8') except UnicodeDecodeError, e: seq = ' '.join(["0x%2x" % ord(c) for c in e.object[e.start:e.end] if ord(c) >= 0x80]) return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq) p = Parser(check_trailer=True) p.feed(s) return p.finish() class Parser(object): ## Maximum height of parsing stack. ## MAX_HEIGHT = 1000 def __init__(self, check_trailer=False): self.check_trailer = check_trailer # Lexical analysis. self.lex_state = Parser.__lex_start self.buffer = "" self.line_number = 0 self.column_number = 0 self.byte_number = 0 # Parsing. self.parse_state = Parser.__parse_start self.stack = [] self.member_name = None # Parse status. self.done = False self.error = None def __lex_start_space(self, c): pass def __lex_start_alpha(self, c): self.buffer = c self.lex_state = Parser.__lex_keyword def __lex_start_token(self, c): self.__parser_input(c) def __lex_start_number(self, c): self.buffer = c self.lex_state = Parser.__lex_number def __lex_start_string(self, _): self.lex_state = Parser.__lex_string def __lex_start_error(self, c): if ord(c) >= 32 and ord(c) < 128: self.__error("invalid character '%s'" % c) else: self.__error("invalid character U+%04x" % ord(c)) __lex_start_actions = {} for c in " \t\n\r": __lex_start_actions[c] = __lex_start_space for c in "abcdefghijklmnopqrstuvwxyz": __lex_start_actions[c] = __lex_start_alpha for c in "[{]}:,": __lex_start_actions[c] = __lex_start_token for c in "-0123456789": __lex_start_actions[c] = __lex_start_number __lex_start_actions['"'] = __lex_start_string def __lex_start(self, c): Parser.__lex_start_actions.get( c, Parser.__lex_start_error)(self, c) return True __lex_alpha = {} for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": __lex_alpha[c] = True def __lex_finish_keyword(self): if self.buffer == "false": self.__parser_input(False) elif self.buffer == "true": self.__parser_input(True) elif self.buffer == "null": self.__parser_input(None) else: self.__error("invalid keyword '%s'" % self.buffer) def __lex_keyword(self, c): if c in Parser.__lex_alpha: self.buffer += c return True else: self.__lex_finish_keyword() return False __number_re = re.compile("(-)?(0|[1-9][0-9]*)" "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$") def __lex_finish_number(self): s = self.buffer m = Parser.__number_re.match(s) if m: sign, integer, fraction, exp = m.groups() if (exp is not None and (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)): self.__error("exponent outside valid range") return if fraction is not None and len(fraction.lstrip('0')) == 0: fraction = None sig_string = integer if fraction is not None: sig_string += fraction significand = int(sig_string) pow10 = 0 if fraction is not None: pow10 -= len(fraction) if exp is not None: pow10 += long(exp) if significand == 0: self.__parser_input(0) return elif significand <= 2 ** 63: while pow10 > 0 and significand <= 2 ** 63: significand *= 10 pow10 -= 1 while pow10 < 0 and significand % 10 == 0: significand /= 10 pow10 += 1 if (pow10 == 0 and ((not sign and significand < 2 ** 63) or (sign and significand <= 2 ** 63))): if sign: self.__parser_input(-significand) else: self.__parser_input(significand) return value = float(s) if value == float("inf") or value == float("-inf"): self.__error("number outside valid range") return if value == 0: # Suppress negative zero. value = 0 self.__parser_input(value) elif re.match("-?0[0-9]", s): self.__error("leading zeros not allowed") elif re.match("-([^0-9]|$)", s): self.__error("'-' must be followed by digit") elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s): self.__error("decimal point must be followed by digit") elif re.search("e[-+]?([^0-9]|$)", s): self.__error("exponent must contain at least one digit") else: self.__error("syntax error in number") def __lex_number(self, c): if c in ".0123456789eE-+": self.buffer += c return True else: self.__lex_finish_number() return False __4hex_re = re.compile("[0-9a-fA-F]{4}") def __lex_4hex(self, s): if len(s) < 4: self.__error("quoted string ends within \\u escape") elif not Parser.__4hex_re.match(s): self.__error("malformed \\u escape") elif s == "0000": self.__error("null bytes not supported in quoted strings") else: return int(s, 16) @staticmethod def __is_leading_surrogate(c): """Returns true if 'c' is a Unicode code point for a leading surrogate.""" return c >= 0xd800 and c <= 0xdbff @staticmethod def __is_trailing_surrogate(c): """Returns true if 'c' is a Unicode code point for a trailing surrogate.""" return c >= 0xdc00 and c <= 0xdfff @staticmethod def __utf16_decode_surrogate_pair(leading, trailing): """Returns the unicode code point corresponding to leading surrogate 'leading' and trailing surrogate 'trailing'. The return value will not make any sense if 'leading' or 'trailing' are not in the correct ranges for leading or trailing surrogates.""" # Leading surrogate: 110110wwwwxxxxxx # Trailing surrogate: 110111xxxxxxxxxx # Code point: 000uuuuuxxxxxxxxxxxxxxxx w = (leading >> 6) & 0xf u = w + 1 x0 = leading & 0x3f x1 = trailing & 0x3ff return (u << 16) | (x0 << 10) | x1 __unescape = {'"': u'"', "\\": u"\\", "/": u"/", "b": u"\b", "f": u"\f", "n": u"\n", "r": u"\r", "t": u"\t"} def __lex_finish_string(self): inp = self.buffer out = u"" while len(inp): backslash = inp.find('\\') if backslash == -1: out += inp break out += inp[:backslash] inp = inp[backslash + 1:] if inp == "": self.__error("quoted string may not end with backslash") return replacement = Parser.__unescape.get(inp[0]) if replacement is not None: out += replacement inp = inp[1:] continue elif inp[0] != u'u': self.__error("bad escape \\%s" % inp[0]) return c0 = self.__lex_4hex(inp[1:5]) if c0 is None: return inp = inp[5:] if Parser.__is_leading_surrogate(c0): if inp[:2] != u'\\u': self.__error("malformed escaped surrogate pair") return c1 = self.__lex_4hex(inp[2:6]) if c1 is None: return if not Parser.__is_trailing_surrogate(c1): self.__error("second half of escaped surrogate pair is " "not trailing surrogate") return code_point = Parser.__utf16_decode_surrogate_pair(c0, c1) inp = inp[6:] else: code_point = c0 out += unichr(code_point) self.__parser_input('string', out) def __lex_string_escape(self, c): self.buffer += c self.lex_state = Parser.__lex_string return True def __lex_string(self, c): if c == '\\': self.buffer += c self.lex_state = Parser.__lex_string_escape elif c == '"': self.__lex_finish_string() elif ord(c) >= 0x20: self.buffer += c else: self.__error("U+%04X must be escaped in quoted string" % ord(c)) return True def __lex_input(self, c): eat = self.lex_state(self, c) assert eat is True or eat is False return eat def __parse_start(self, token, unused_string): if token == '{': self.__push_object() elif token == '[': self.__push_array() else: self.__error("syntax error at beginning of input") def __parse_end(self, unused_token, unused_string): self.__error("trailing garbage at end of input") def __parse_object_init(self, token, string): if token == '}': self.__parser_pop() else: self.__parse_object_name(token, string) def __parse_object_name(self, token, string): if token == 'string': self.member_name = string self.parse_state = Parser.__parse_object_colon else: self.__error("syntax error parsing object expecting string") def __parse_object_colon(self, token, unused_string): if token == ":": self.parse_state = Parser.__parse_object_value else: self.__error("syntax error parsing object expecting ':'") def __parse_object_value(self, token, string): self.__parse_value(token, string, Parser.__parse_object_next) def __parse_object_next(self, token, unused_string): if token == ",": self.parse_state = Parser.__parse_object_name elif token == "}": self.__parser_pop() else: self.__error("syntax error expecting '}' or ','") def __parse_array_init(self, token, string): if token == ']': self.__parser_pop() else: self.__parse_array_value(token, string) def __parse_array_value(self, token, string): self.__parse_value(token, string, Parser.__parse_array_next) def __parse_array_next(self, token, unused_string): if token == ",": self.parse_state = Parser.__parse_array_value elif token == "]": self.__parser_pop() else: self.__error("syntax error expecting ']' or ','") def __parser_input(self, token, string=None): self.lex_state = Parser.__lex_start self.buffer = "" self.parse_state(self, token, string) def __put_value(self, value): top = self.stack[-1] if type(top) == dict: top[self.member_name] = value else: top.append(value) def __parser_push(self, new_json, next_state): if len(self.stack) < Parser.MAX_HEIGHT: if len(self.stack) > 0: self.__put_value(new_json) self.stack.append(new_json) self.parse_state = next_state else: self.__error("input exceeds maximum nesting depth %d" % Parser.MAX_HEIGHT) def __push_object(self): self.__parser_push({}, Parser.__parse_object_init) def __push_array(self): self.__parser_push([], Parser.__parse_array_init) def __parser_pop(self): if len(self.stack) == 1: self.parse_state = Parser.__parse_end if not self.check_trailer: self.done = True else: self.stack.pop() top = self.stack[-1] if type(top) == list: self.parse_state = Parser.__parse_array_next else: self.parse_state = Parser.__parse_object_next def __parse_value(self, token, string, next_state): if token in [False, None, True] or type(token) in [int, long, float]: self.__put_value(token) elif token == 'string': self.__put_value(string) else: if token == '{': self.__push_object() elif token == '[': self.__push_array() else: self.__error("syntax error expecting value") return self.parse_state = next_state def __error(self, message): if self.error is None: self.error = ("line %d, column %d, byte %d: %s" % (self.line_number, self.column_number, self.byte_number, message)) self.done = True def feed(self, s): i = 0 while True: if self.done or i >= len(s): return i c = s[i] if self.__lex_input(c): self.byte_number += 1 if c == '\n': self.column_number = 0 self.line_number += 1 else: self.column_number += 1 i += 1 def is_done(self): return self.done def finish(self): if self.lex_state == Parser.__lex_start: pass elif self.lex_state in (Parser.__lex_string, Parser.__lex_string_escape): self.__error("unexpected end of input in quoted string") else: self.__lex_input(" ") if self.parse_state == Parser.__parse_start: self.__error("empty input stream") elif self.parse_state != Parser.__parse_end: self.__error("unexpected end of input") if self.error == None: assert len(self.stack) == 1 return self.stack.pop() else: return self.error openvswitch-2.0.1+git20140120/python/ovs/jsonrpc.py000066400000000000000000000435041226605124000216400ustar00rootroot00000000000000# Copyright (c) 2010, 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import errno import os import ovs.json import ovs.poller import ovs.reconnect import ovs.stream import ovs.timeval import ovs.util import ovs.vlog EOF = ovs.util.EOF vlog = ovs.vlog.Vlog("jsonrpc") class Message(object): T_REQUEST = 0 # Request. T_NOTIFY = 1 # Notification. T_REPLY = 2 # Successful reply. T_ERROR = 3 # Error reply. __types = {T_REQUEST: "request", T_NOTIFY: "notification", T_REPLY: "reply", T_ERROR: "error"} def __init__(self, type_, method, params, result, error, id): self.type = type_ self.method = method self.params = params self.result = result self.error = error self.id = id _next_id = 0 @staticmethod def _create_id(): this_id = Message._next_id Message._next_id += 1 return this_id @staticmethod def create_request(method, params): return Message(Message.T_REQUEST, method, params, None, None, Message._create_id()) @staticmethod def create_notify(method, params): return Message(Message.T_NOTIFY, method, params, None, None, None) @staticmethod def create_reply(result, id): return Message(Message.T_REPLY, None, None, result, None, id) @staticmethod def create_error(error, id): return Message(Message.T_ERROR, None, None, None, error, id) @staticmethod def type_to_string(type_): return Message.__types[type_] def __validate_arg(self, value, name, must_have): if (value is not None) == (must_have != 0): return None else: type_name = Message.type_to_string(self.type) if must_have: verb = "must" else: verb = "must not" return "%s %s have \"%s\"" % (type_name, verb, name) def is_valid(self): if self.params is not None and type(self.params) != list: return "\"params\" must be JSON array" pattern = {Message.T_REQUEST: 0x11001, Message.T_NOTIFY: 0x11000, Message.T_REPLY: 0x00101, Message.T_ERROR: 0x00011}.get(self.type) if pattern is None: return "invalid JSON-RPC message type %s" % self.type return ( self.__validate_arg(self.method, "method", pattern & 0x10000) or self.__validate_arg(self.params, "params", pattern & 0x1000) or self.__validate_arg(self.result, "result", pattern & 0x100) or self.__validate_arg(self.error, "error", pattern & 0x10) or self.__validate_arg(self.id, "id", pattern & 0x1)) @staticmethod def from_json(json): if type(json) != dict: return "message is not a JSON object" # Make a copy to avoid modifying the caller's dict. json = dict(json) if "method" in json: method = json.pop("method") if type(method) not in [str, unicode]: return "method is not a JSON string" else: method = None params = json.pop("params", None) result = json.pop("result", None) error = json.pop("error", None) id_ = json.pop("id", None) if len(json): return "message has unexpected member \"%s\"" % json.popitem()[0] if result is not None: msg_type = Message.T_REPLY elif error is not None: msg_type = Message.T_ERROR elif id_ is not None: msg_type = Message.T_REQUEST else: msg_type = Message.T_NOTIFY msg = Message(msg_type, method, params, result, error, id_) validation_error = msg.is_valid() if validation_error is not None: return validation_error else: return msg def to_json(self): json = {} if self.method is not None: json["method"] = self.method if self.params is not None: json["params"] = self.params if self.result is not None or self.type == Message.T_ERROR: json["result"] = self.result if self.error is not None or self.type == Message.T_REPLY: json["error"] = self.error if self.id is not None or self.type == Message.T_NOTIFY: json["id"] = self.id return json def __str__(self): s = [Message.type_to_string(self.type)] if self.method is not None: s.append("method=\"%s\"" % self.method) if self.params is not None: s.append("params=" + ovs.json.to_string(self.params)) if self.result is not None: s.append("result=" + ovs.json.to_string(self.result)) if self.error is not None: s.append("error=" + ovs.json.to_string(self.error)) if self.id is not None: s.append("id=" + ovs.json.to_string(self.id)) return ", ".join(s) class Connection(object): def __init__(self, stream): self.name = stream.name self.stream = stream self.status = 0 self.input = "" self.output = "" self.parser = None self.received_bytes = 0 def close(self): self.stream.close() self.stream = None def run(self): if self.status: return while len(self.output): retval = self.stream.send(self.output) if retval >= 0: self.output = self.output[retval:] else: if retval != -errno.EAGAIN: vlog.warn("%s: send error: %s" % (self.name, os.strerror(-retval))) self.error(-retval) break def wait(self, poller): if not self.status: self.stream.run_wait(poller) if len(self.output): self.stream.send_wait(poller) def get_status(self): return self.status def get_backlog(self): if self.status != 0: return 0 else: return len(self.output) def get_received_bytes(self): return self.received_bytes def __log_msg(self, title, msg): vlog.dbg("%s: %s %s" % (self.name, title, msg)) def send(self, msg): if self.status: return self.status self.__log_msg("send", msg) was_empty = len(self.output) == 0 self.output += ovs.json.to_string(msg.to_json()) if was_empty: self.run() return self.status def send_block(self, msg): error = self.send(msg) if error: return error while True: self.run() if not self.get_backlog() or self.get_status(): return self.status poller = ovs.poller.Poller() self.wait(poller) poller.block() def recv(self): if self.status: return self.status, None while True: if not self.input: error, data = self.stream.recv(4096) if error: if error == errno.EAGAIN: return error, None else: # XXX rate-limit vlog.warn("%s: receive error: %s" % (self.name, os.strerror(error))) self.error(error) return self.status, None elif not data: self.error(EOF) return EOF, None else: self.input += data self.received_bytes += len(data) else: if self.parser is None: self.parser = ovs.json.Parser() self.input = self.input[self.parser.feed(self.input):] if self.parser.is_done(): msg = self.__process_msg() if msg: return 0, msg else: return self.status, None def recv_block(self): while True: error, msg = self.recv() if error != errno.EAGAIN: return error, msg self.run() poller = ovs.poller.Poller() self.wait(poller) self.recv_wait(poller) poller.block() def transact_block(self, request): id_ = request.id error = self.send(request) reply = None while not error: error, reply = self.recv_block() if (reply and (reply.type == Message.T_REPLY or reply.type == Message.T_ERROR) and reply.id == id_): break return error, reply def __process_msg(self): json = self.parser.finish() self.parser = None if type(json) in [str, unicode]: # XXX rate-limit vlog.warn("%s: error parsing stream: %s" % (self.name, json)) self.error(errno.EPROTO) return msg = Message.from_json(json) if not isinstance(msg, Message): # XXX rate-limit vlog.warn("%s: received bad JSON-RPC message: %s" % (self.name, msg)) self.error(errno.EPROTO) return self.__log_msg("received", msg) return msg def recv_wait(self, poller): if self.status or self.input: poller.immediate_wake() else: self.stream.recv_wait(poller) def error(self, error): if self.status == 0: self.status = error self.stream.close() self.output = "" class Session(object): """A JSON-RPC session with reconnection.""" def __init__(self, reconnect, rpc): self.reconnect = reconnect self.rpc = rpc self.stream = None self.pstream = None self.seqno = 0 @staticmethod def open(name): """Creates and returns a Session that maintains a JSON-RPC session to 'name', which should be a string acceptable to ovs.stream.Stream or ovs.stream.PassiveStream's initializer. If 'name' is an active connection method, e.g. "tcp:127.1.2.3", the new session connects and reconnects, with back-off, to 'name'. If 'name' is a passive connection method, e.g. "ptcp:", the new session listens for connections to 'name'. It maintains at most one connection at any given time. Any new connection causes the previous one (if any) to be dropped.""" reconnect = ovs.reconnect.Reconnect(ovs.timeval.msec()) reconnect.set_name(name) reconnect.enable(ovs.timeval.msec()) if ovs.stream.PassiveStream.is_valid_name(name): reconnect.set_passive(True, ovs.timeval.msec()) if ovs.stream.stream_or_pstream_needs_probes(name): reconnect.set_probe_interval(0) return Session(reconnect, None) @staticmethod def open_unreliably(jsonrpc): reconnect = ovs.reconnect.Reconnect(ovs.timeval.msec()) reconnect.set_quiet(True) reconnect.set_name(jsonrpc.name) reconnect.set_max_tries(0) reconnect.connected(ovs.timeval.msec()) return Session(reconnect, jsonrpc) def close(self): if self.rpc is not None: self.rpc.close() self.rpc = None if self.stream is not None: self.stream.close() self.stream = None if self.pstream is not None: self.pstream.close() self.pstream = None def __disconnect(self): if self.rpc is not None: self.rpc.error(EOF) self.rpc.close() self.rpc = None self.seqno += 1 elif self.stream is not None: self.stream.close() self.stream = None self.seqno += 1 def __connect(self): self.__disconnect() name = self.reconnect.get_name() if not self.reconnect.is_passive(): error, self.stream = ovs.stream.Stream.open(name) if not error: self.reconnect.connecting(ovs.timeval.msec()) else: self.reconnect.connect_failed(ovs.timeval.msec(), error) elif self.pstream is not None: error, self.pstream = ovs.stream.PassiveStream.open(name) if not error: self.reconnect.listening(ovs.timeval.msec()) else: self.reconnect.connect_failed(ovs.timeval.msec(), error) self.seqno += 1 def run(self): if self.pstream is not None: error, stream = self.pstream.accept() if error == 0: if self.rpc or self.stream: # XXX rate-limit vlog.info("%s: new connection replacing active " "connection" % self.reconnect.get_name()) self.__disconnect() self.reconnect.connected(ovs.timeval.msec()) self.rpc = Connection(stream) elif error != errno.EAGAIN: self.reconnect.listen_error(ovs.timeval.msec(), error) self.pstream.close() self.pstream = None if self.rpc: backlog = self.rpc.get_backlog() self.rpc.run() if self.rpc.get_backlog() < backlog: # Data previously caught in a queue was successfully sent (or # there's an error, which we'll catch below). # # We don't count data that is successfully sent immediately as # activity, because there's a lot of queuing downstream from # us, which means that we can push a lot of data into a # connection that has stalled and won't ever recover. self.reconnect.activity(ovs.timeval.msec()) error = self.rpc.get_status() if error != 0: self.reconnect.disconnected(ovs.timeval.msec(), error) self.__disconnect() elif self.stream is not None: self.stream.run() error = self.stream.connect() if error == 0: self.reconnect.connected(ovs.timeval.msec()) self.rpc = Connection(self.stream) self.stream = None elif error != errno.EAGAIN: self.reconnect.connect_failed(ovs.timeval.msec(), error) self.stream.close() self.stream = None action = self.reconnect.run(ovs.timeval.msec()) if action == ovs.reconnect.CONNECT: self.__connect() elif action == ovs.reconnect.DISCONNECT: self.reconnect.disconnected(ovs.timeval.msec(), 0) self.__disconnect() elif action == ovs.reconnect.PROBE: if self.rpc: request = Message.create_request("echo", []) request.id = "echo" self.rpc.send(request) else: assert action == None def wait(self, poller): if self.rpc is not None: self.rpc.wait(poller) elif self.stream is not None: self.stream.run_wait(poller) self.stream.connect_wait(poller) if self.pstream is not None: self.pstream.wait(poller) self.reconnect.wait(poller, ovs.timeval.msec()) def get_backlog(self): if self.rpc is not None: return self.rpc.get_backlog() else: return 0 def get_name(self): return self.reconnect.get_name() def send(self, msg): if self.rpc is not None: return self.rpc.send(msg) else: return errno.ENOTCONN def recv(self): if self.rpc is not None: received_bytes = self.rpc.get_received_bytes() error, msg = self.rpc.recv() if received_bytes != self.rpc.get_received_bytes(): # Data was successfully received. # # Previously we only counted receiving a full message as # activity, but with large messages or a slow connection that # policy could time out the session mid-message. self.reconnect.activity(ovs.timeval.msec()) if not error: if msg.type == Message.T_REQUEST and msg.method == "echo": # Echo request. Send reply. self.send(Message.create_reply(msg.params, msg.id)) elif msg.type == Message.T_REPLY and msg.id == "echo": # It's a reply to our echo request. Suppress it. pass else: return msg return None def recv_wait(self, poller): if self.rpc is not None: self.rpc.recv_wait(poller) def is_alive(self): if self.rpc is not None or self.stream is not None: return True else: max_tries = self.reconnect.get_max_tries() return max_tries is None or max_tries > 0 def is_connected(self): return self.rpc is not None def get_seqno(self): return self.seqno def force_reconnect(self): self.reconnect.force_reconnect(ovs.timeval.msec()) openvswitch-2.0.1+git20140120/python/ovs/ovsuuid.py000066400000000000000000000037611226605124000216610ustar00rootroot00000000000000# Copyright (c) 2009, 2010, 2011 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import re import uuid from ovs.db import error import ovs.db.parser uuidRE = re.compile("^xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx$" .replace('x', '[0-9a-fA-F]')) def zero(): return uuid.UUID(int=0) def is_valid_string(s): return uuidRE.match(s) is not None def from_string(s): if not is_valid_string(s): raise error.Error("%s is not a valid UUID" % s) return uuid.UUID(s) def from_json(json, symtab=None): try: s = ovs.db.parser.unwrap_json(json, "uuid", [str, unicode], "string") if not uuidRE.match(s): raise error.Error("\"%s\" is not a valid UUID" % s, json) return uuid.UUID(s) except error.Error, e: if not symtab: raise e try: name = ovs.db.parser.unwrap_json(json, "named-uuid", [str, unicode], "string") except error.Error: raise e if name not in symtab: symtab[name] = uuid.uuid4() return symtab[name] def to_json(uuid_): return ["uuid", str(uuid_)] def to_c_assignment(uuid_, var): """Returns an array of strings, each of which contain a C statement. The statements assign 'uuid_' to a "struct uuid" as defined in Open vSwitch lib/uuid.h.""" hex_string = uuid_.hex return ["%s.parts[%d] = 0x%s;" % (var, x, hex_string[x * 8:(x + 1) * 8]) for x in range(4)] openvswitch-2.0.1+git20140120/python/ovs/poller.py000066400000000000000000000165561226605124000214660ustar00rootroot00000000000000# Copyright (c) 2010 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import errno import ovs.timeval import ovs.vlog import select import socket try: import eventlet.patcher def _using_eventlet_green_select(): return eventlet.patcher.is_monkey_patched(select) except: def _using_eventlet_green_select(): return False vlog = ovs.vlog.Vlog("poller") POLLIN = 0x001 POLLOUT = 0x004 POLLERR = 0x008 POLLHUP = 0x010 POLLNVAL = 0x020 # eventlet/gevent doesn't support select.poll. If select.poll is used, # python interpreter is blocked as a whole instead of switching from the # current thread that is about to block to other runnable thread. # So emulate select.poll by select.select because using python means that # performance isn't so important. class _SelectSelect(object): """ select.poll emulation by using select.select. Only register and poll are needed at the moment. """ def __init__(self): self.rlist = [] self.wlist = [] self.xlist = [] def register(self, fd, events): if isinstance(fd, socket.socket): fd = fd.fileno() assert isinstance(fd, int) if events & POLLIN: self.rlist.append(fd) events &= ~POLLIN if events & POLLOUT: self.wlist.append(fd) events &= ~POLLOUT if events: self.xlist.append(fd) def poll(self, timeout): if timeout == -1: # epoll uses -1 for infinite timeout, select uses None. timeout = None else: timeout = float(timeout) / 1000 # XXX workaround a bug in eventlet # see https://github.com/eventlet/eventlet/pull/25 if timeout == 0 and _using_eventlet_green_select(): timeout = 0.1 rlist, wlist, xlist = select.select(self.rlist, self.wlist, self.xlist, timeout) # collections.defaultdict is introduced by python 2.5 and # XenServer uses python 2.4. We don't use it for XenServer. # events_dict = collections.defaultdict(int) # events_dict[fd] |= event events_dict = {} for fd in rlist: events_dict[fd] = events_dict.get(fd, 0) | POLLIN for fd in wlist: events_dict[fd] = events_dict.get(fd, 0) | POLLOUT for fd in xlist: events_dict[fd] = events_dict.get(fd, 0) | (POLLERR | POLLHUP | POLLNVAL) return events_dict.items() SelectPoll = _SelectSelect # If eventlet/gevent isn't used, we can use select.poll by replacing # _SelectPoll with select.poll class # _SelectPoll = select.poll class Poller(object): """High-level wrapper around the "poll" system call. Intended usage is for the program's main loop to go about its business servicing whatever events it needs to. Then, when it runs out of immediate tasks, it calls each subordinate module or object's "wait" function, which in turn calls one (or more) of the functions Poller.fd_wait(), Poller.immediate_wake(), and Poller.timer_wait() to register to be awakened when the appropriate event occurs. Then the main loop calls Poller.block(), which blocks until one of the registered events happens.""" def __init__(self): self.__reset() def fd_wait(self, fd, events): """Registers 'fd' as waiting for the specified 'events' (which should be select.POLLIN or select.POLLOUT or their bitwise-OR). The following call to self.block() will wake up when 'fd' becomes ready for one or more of the requested events. The event registration is one-shot: only the following call to self.block() is affected. The event will need to be re-registered after self.block() is called if it is to persist. 'fd' may be an integer file descriptor or an object with a fileno() method that returns an integer file descriptor.""" self.poll.register(fd, events) def __timer_wait(self, msec): if self.timeout < 0 or msec < self.timeout: self.timeout = msec def timer_wait(self, msec): """Causes the following call to self.block() to block for no more than 'msec' milliseconds. If 'msec' is nonpositive, the following call to self.block() will not block at all. The timer registration is one-shot: only the following call to self.block() is affected. The timer will need to be re-registered after self.block() is called if it is to persist.""" if msec <= 0: self.immediate_wake() else: self.__timer_wait(msec) def timer_wait_until(self, msec): """Causes the following call to self.block() to wake up when the current time, as returned by ovs.timeval.msec(), reaches 'msec' or later. If 'msec' is earlier than the current time, the following call to self.block() will not block at all. The timer registration is one-shot: only the following call to self.block() is affected. The timer will need to be re-registered after self.block() is called if it is to persist.""" now = ovs.timeval.msec() if msec <= now: self.immediate_wake() else: self.__timer_wait(msec - now) def immediate_wake(self): """Causes the following call to self.block() to wake up immediately, without blocking.""" self.timeout = 0 def block(self): """Blocks until one or more of the events registered with self.fd_wait() occurs, or until the minimum duration registered with self.timer_wait() elapses, or not at all if self.immediate_wake() has been called.""" try: try: events = self.poll.poll(self.timeout) self.__log_wakeup(events) except select.error, e: # XXX rate-limit error, msg = e if error != errno.EINTR: vlog.err("poll: %s" % e[1]) finally: self.__reset() def __log_wakeup(self, events): if not events: vlog.dbg("%d-ms timeout" % self.timeout) else: for fd, revents in events: if revents != 0: s = "" if revents & POLLIN: s += "[POLLIN]" if revents & POLLOUT: s += "[POLLOUT]" if revents & POLLERR: s += "[POLLERR]" if revents & POLLHUP: s += "[POLLHUP]" if revents & POLLNVAL: s += "[POLLNVAL]" vlog.dbg("%s on fd %d" % (s, fd)) def __reset(self): self.poll = SelectPoll() self.timeout = -1 openvswitch-2.0.1+git20140120/python/ovs/process.py000066400000000000000000000026731226605124000216420ustar00rootroot00000000000000# Copyright (c) 2010, 2011 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import signal def _signal_status_msg(type_, signr): s = "%s by signal %d" % (type_, signr) for name in signal.__dict__: if name.startswith("SIG") and getattr(signal, name) == signr: return "%s (%s)" % (s, name) return s def status_msg(status): """Given 'status', which is a process status in the form reported by waitpid(2) and returned by process_status(), returns a string describing how the process terminated.""" if os.WIFEXITED(status): s = "exit status %d" % os.WEXITSTATUS(status) elif os.WIFSIGNALED(status): s = _signal_status_msg("killed", os.WTERMSIG(status)) elif os.WIFSTOPPED(status): s = _signal_status_msg("stopped", os.WSTOPSIG(status)) else: s = "terminated abnormally (%x)" % status if os.WCOREDUMP(status): s += ", core dumped" return s openvswitch-2.0.1+git20140120/python/ovs/reconnect.py000066400000000000000000000551211226605124000221400ustar00rootroot00000000000000# Copyright (c) 2010, 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import ovs.vlog import ovs.util # Values returned by Reconnect.run() CONNECT = 'connect' DISCONNECT = 'disconnect' PROBE = 'probe' EOF = ovs.util.EOF vlog = ovs.vlog.Vlog("reconnect") class Reconnect(object): """A finite-state machine for connecting and reconnecting to a network resource with exponential backoff. It also provides optional support for detecting a connection on which the peer is no longer responding. The library does not implement anything networking related, only an FSM for networking code to use. Many Reconnect methods take a "now" argument. This makes testing easier since there is no hidden state. When not testing, just pass the return value of ovs.time.msec(). (Perhaps this design should be revisited later.)""" class Void(object): name = "VOID" is_connected = False @staticmethod def deadline(fsm): return None @staticmethod def run(fsm, now): return None class Listening(object): name = "LISTENING" is_connected = False @staticmethod def deadline(fsm): return None @staticmethod def run(fsm, now): return None class Backoff(object): name = "BACKOFF" is_connected = False @staticmethod def deadline(fsm): return fsm.state_entered + fsm.backoff @staticmethod def run(fsm, now): return CONNECT class ConnectInProgress(object): name = "CONNECTING" is_connected = False @staticmethod def deadline(fsm): return fsm.state_entered + max(1000, fsm.backoff) @staticmethod def run(fsm, now): return DISCONNECT class Active(object): name = "ACTIVE" is_connected = True @staticmethod def deadline(fsm): if fsm.probe_interval: base = max(fsm.last_activity, fsm.state_entered) return base + fsm.probe_interval return None @staticmethod def run(fsm, now): vlog.dbg("%s: idle %d ms, sending inactivity probe" % (fsm.name, now - max(fsm.last_activity, fsm.state_entered))) fsm._transition(now, Reconnect.Idle) return PROBE class Idle(object): name = "IDLE" is_connected = True @staticmethod def deadline(fsm): if fsm.probe_interval: return fsm.state_entered + fsm.probe_interval return None @staticmethod def run(fsm, now): vlog.err("%s: no response to inactivity probe after %.3g " "seconds, disconnecting" % (fsm.name, (now - fsm.state_entered) / 1000.0)) return DISCONNECT class Reconnect(object): name = "RECONNECT" is_connected = False @staticmethod def deadline(fsm): return fsm.state_entered @staticmethod def run(fsm, now): return DISCONNECT def __init__(self, now): """Creates and returns a new reconnect FSM with default settings. The FSM is initially disabled. The caller will likely want to call self.enable() and self.set_name() on the returned object.""" self.name = "void" self.min_backoff = 1000 self.max_backoff = 8000 self.probe_interval = 5000 self.passive = False self.info_level = vlog.info self.state = Reconnect.Void self.state_entered = now self.backoff = 0 self.last_activity = now self.last_connected = None self.last_disconnected = None self.max_tries = None self.creation_time = now self.n_attempted_connections = 0 self.n_successful_connections = 0 self.total_connected_duration = 0 self.seqno = 0 def set_quiet(self, quiet): """If 'quiet' is true, this object will log informational messages at debug level, by default keeping them out of log files. This is appropriate if the connection is one that is expected to be short-lived, so that the log messages are merely distracting. If 'quiet' is false, this object logs informational messages at info level. This is the default. This setting has no effect on the log level of debugging, warning, or error messages.""" if quiet: self.info_level = vlog.dbg else: self.info_level = vlog.info def get_name(self): return self.name def set_name(self, name): """Sets this object's name to 'name'. If 'name' is None, then "void" is used instead. The name is used in log messages.""" if name is None: self.name = "void" else: self.name = name def get_min_backoff(self): """Return the minimum number of milliseconds to back off between consecutive connection attempts. The default is 1000 ms.""" return self.min_backoff def get_max_backoff(self): """Return the maximum number of milliseconds to back off between consecutive connection attempts. The default is 8000 ms.""" return self.max_backoff def get_probe_interval(self): """Returns the "probe interval" in milliseconds. If this is zero, it disables the connection keepalive feature. If it is nonzero, then if the interval passes while the FSM is connected and without self.activity() being called, self.run() returns ovs.reconnect.PROBE. If the interval passes again without self.activity() being called, self.run() returns ovs.reconnect.DISCONNECT.""" return self.probe_interval def set_max_tries(self, max_tries): """Limits the maximum number of times that this object will ask the client to try to reconnect to 'max_tries'. None (the default) means an unlimited number of tries. After the number of tries has expired, the FSM will disable itself instead of backing off and retrying.""" self.max_tries = max_tries def get_max_tries(self): """Returns the current remaining number of connection attempts, None if the number is unlimited.""" return self.max_tries def set_backoff(self, min_backoff, max_backoff): """Configures the backoff parameters for this FSM. 'min_backoff' is the minimum number of milliseconds, and 'max_backoff' is the maximum, between connection attempts. 'min_backoff' must be at least 1000, and 'max_backoff' must be greater than or equal to 'min_backoff'.""" self.min_backoff = max(min_backoff, 1000) if self.max_backoff: self.max_backoff = max(max_backoff, 1000) else: self.max_backoff = 8000 if self.min_backoff > self.max_backoff: self.max_backoff = self.min_backoff if (self.state == Reconnect.Backoff and self.backoff > self.max_backoff): self.backoff = self.max_backoff def set_probe_interval(self, probe_interval): """Sets the "probe interval" to 'probe_interval', in milliseconds. If this is zero, it disables the connection keepalive feature. If it is nonzero, then if the interval passes while this FSM is connected and without self.activity() being called, self.run() returns ovs.reconnect.PROBE. If the interval passes again without self.activity() being called, self.run() returns ovs.reconnect.DISCONNECT. If 'probe_interval' is nonzero, then it will be forced to a value of at least 1000 ms.""" if probe_interval: self.probe_interval = max(1000, probe_interval) else: self.probe_interval = 0 def is_passive(self): """Returns true if 'fsm' is in passive mode, false if 'fsm' is in active mode (the default).""" return self.passive def set_passive(self, passive, now): """Configures this FSM for active or passive mode. In active mode (the default), the FSM is attempting to connect to a remote host. In passive mode, the FSM is listening for connections from a remote host.""" if self.passive != passive: self.passive = passive if ((passive and self.state in (Reconnect.ConnectInProgress, Reconnect.Reconnect)) or (not passive and self.state == Reconnect.Listening and self.__may_retry())): self._transition(now, Reconnect.Backoff) self.backoff = 0 def is_enabled(self): """Returns true if this FSM has been enabled with self.enable(). Calling another function that indicates a change in connection state, such as self.disconnected() or self.force_reconnect(), will also enable a reconnect FSM.""" return self.state != Reconnect.Void def enable(self, now): """If this FSM is disabled (the default for newly created FSMs), enables it, so that the next call to reconnect_run() for 'fsm' will return ovs.reconnect.CONNECT. If this FSM is not disabled, this function has no effect.""" if self.state == Reconnect.Void and self.__may_retry(): self._transition(now, Reconnect.Backoff) self.backoff = 0 def disable(self, now): """Disables this FSM. Until 'fsm' is enabled again, self.run() will always return 0.""" if self.state != Reconnect.Void: self._transition(now, Reconnect.Void) def force_reconnect(self, now): """If this FSM is enabled and currently connected (or attempting to connect), forces self.run() to return ovs.reconnect.DISCONNECT the next time it is called, which should cause the client to drop the connection (or attempt), back off, and then reconnect.""" if self.state in (Reconnect.ConnectInProgress, Reconnect.Active, Reconnect.Idle): self._transition(now, Reconnect.Reconnect) def disconnected(self, now, error): """Tell this FSM that the connection dropped or that a connection attempt failed. 'error' specifies the reason: a positive value represents an errno value, EOF indicates that the connection was closed by the peer (e.g. read() returned 0), and 0 indicates no specific error. The FSM will back off, then reconnect.""" if self.state not in (Reconnect.Backoff, Reconnect.Void): # Report what happened if self.state in (Reconnect.Active, Reconnect.Idle): if error > 0: vlog.warn("%s: connection dropped (%s)" % (self.name, os.strerror(error))) elif error == EOF: self.info_level("%s: connection closed by peer" % self.name) else: self.info_level("%s: connection dropped" % self.name) elif self.state == Reconnect.Listening: if error > 0: vlog.warn("%s: error listening for connections (%s)" % (self.name, os.strerror(error))) else: self.info_level("%s: error listening for connections" % self.name) else: if self.passive: type_ = "listen" else: type_ = "connection" if error > 0: vlog.warn("%s: %s attempt failed (%s)" % (self.name, type_, os.strerror(error))) else: self.info_level("%s: %s attempt timed out" % (self.name, type_)) if (self.state in (Reconnect.Active, Reconnect.Idle)): self.last_disconnected = now # Back off if (self.state in (Reconnect.Active, Reconnect.Idle) and (self.last_activity - self.last_connected >= self.backoff or self.passive)): if self.passive: self.backoff = 0 else: self.backoff = self.min_backoff else: if self.backoff < self.min_backoff: self.backoff = self.min_backoff elif self.backoff >= self.max_backoff / 2: self.backoff = self.max_backoff else: self.backoff *= 2 if self.passive: self.info_level("%s: waiting %.3g seconds before trying " "to listen again" % (self.name, self.backoff / 1000.0)) else: self.info_level("%s: waiting %.3g seconds before reconnect" % (self.name, self.backoff / 1000.0)) if self.__may_retry(): self._transition(now, Reconnect.Backoff) else: self._transition(now, Reconnect.Void) def connecting(self, now): """Tell this FSM that a connection or listening attempt is in progress. The FSM will start a timer, after which the connection or listening attempt will be aborted (by returning ovs.reconnect.DISCONNECT from self.run()).""" if self.state != Reconnect.ConnectInProgress: if self.passive: self.info_level("%s: listening..." % self.name) else: self.info_level("%s: connecting..." % self.name) self._transition(now, Reconnect.ConnectInProgress) def listening(self, now): """Tell this FSM that the client is listening for connection attempts. This state last indefinitely until the client reports some change. The natural progression from this state is for the client to report that a connection has been accepted or is in progress of being accepted, by calling self.connecting() or self.connected(). The client may also report that listening failed (e.g. accept() returned an unexpected error such as ENOMEM) by calling self.listen_error(), in which case the FSM will back off and eventually return ovs.reconnect.CONNECT from self.run() to tell the client to try listening again.""" if self.state != Reconnect.Listening: self.info_level("%s: listening..." % self.name) self._transition(now, Reconnect.Listening) def listen_error(self, now, error): """Tell this FSM that the client's attempt to accept a connection failed (e.g. accept() returned an unexpected error such as ENOMEM). If the FSM is currently listening (self.listening() was called), it will back off and eventually return ovs.reconnect.CONNECT from self.run() to tell the client to try listening again. If there is an active connection, this will be delayed until that connection drops.""" if self.state == Reconnect.Listening: self.disconnected(now, error) def connected(self, now): """Tell this FSM that the connection was successful. The FSM will start the probe interval timer, which is reset by self.activity(). If the timer expires, a probe will be sent (by returning ovs.reconnect.PROBE from self.run(). If the timer expires again without being reset, the connection will be aborted (by returning ovs.reconnect.DISCONNECT from self.run().""" if not self.state.is_connected: self.connecting(now) self.info_level("%s: connected" % self.name) self._transition(now, Reconnect.Active) self.last_connected = now def connect_failed(self, now, error): """Tell this FSM that the connection attempt failed. The FSM will back off and attempt to reconnect.""" self.connecting(now) self.disconnected(now, error) def activity(self, now): """Tell this FSM that some activity occurred on the connection. This resets the probe interval timer, so that the connection is known not to be idle.""" if self.state != Reconnect.Active: self._transition(now, Reconnect.Active) self.last_activity = now def _transition(self, now, state): if self.state == Reconnect.ConnectInProgress: self.n_attempted_connections += 1 if state == Reconnect.Active: self.n_successful_connections += 1 connected_before = self.state.is_connected connected_now = state.is_connected if connected_before != connected_now: if connected_before: self.total_connected_duration += now - self.last_connected self.seqno += 1 vlog.dbg("%s: entering %s" % (self.name, state.name)) self.state = state self.state_entered = now def run(self, now): """Assesses whether any action should be taken on this FSM. The return value is one of: - None: The client need not take any action. - Active client, ovs.reconnect.CONNECT: The client should start a connection attempt and indicate this by calling self.connecting(). If the connection attempt has definitely succeeded, it should call self.connected(). If the connection attempt has definitely failed, it should call self.connect_failed(). The FSM is smart enough to back off correctly after successful connections that quickly abort, so it is OK to call self.connected() after a low-level successful connection (e.g. connect()) even if the connection might soon abort due to a failure at a high-level (e.g. SSL negotiation failure). - Passive client, ovs.reconnect.CONNECT: The client should try to listen for a connection, if it is not already listening. It should call self.listening() if successful, otherwise self.connecting() or reconnected_connect_failed() if the attempt is in progress or definitely failed, respectively. A listening passive client should constantly attempt to accept a new connection and report an accepted connection with self.connected(). - ovs.reconnect.DISCONNECT: The client should abort the current connection or connection attempt or listen attempt and call self.disconnected() or self.connect_failed() to indicate it. - ovs.reconnect.PROBE: The client should send some kind of request to the peer that will elicit a response, to ensure that the connection is indeed in working order. (This will only be returned if the "probe interval" is nonzero--see self.set_probe_interval()).""" deadline = self.state.deadline(self) if deadline is not None and now >= deadline: return self.state.run(self, now) else: return None def wait(self, poller, now): """Causes the next call to poller.block() to wake up when self.run() should be called.""" timeout = self.timeout(now) if timeout >= 0: poller.timer_wait(timeout) def timeout(self, now): """Returns the number of milliseconds after which self.run() should be called if nothing else notable happens in the meantime, or None if this is currently unnecessary.""" deadline = self.state.deadline(self) if deadline is not None: remaining = deadline - now return max(0, remaining) else: return None def is_connected(self): """Returns True if this FSM is currently believed to be connected, that is, if self.connected() was called more recently than any call to self.connect_failed() or self.disconnected() or self.disable(), and False otherwise.""" return self.state.is_connected def get_last_connect_elapsed(self, now): """Returns the number of milliseconds since 'fsm' was last connected to its peer. Returns None if never connected.""" if self.last_connected: return now - self.last_connected else: return None def get_last_disconnect_elapsed(self, now): """Returns the number of milliseconds since 'fsm' was last disconnected from its peer. Returns None if never disconnected.""" if self.last_disconnected: return now - self.last_disconnected else: return None def get_stats(self, now): class Stats(object): pass stats = Stats() stats.creation_time = self.creation_time stats.last_connected = self.last_connected stats.last_disconnected = self.last_disconnected stats.last_activity = self.last_activity stats.backoff = self.backoff stats.seqno = self.seqno stats.is_connected = self.is_connected() stats.msec_since_connect = self.get_last_connect_elapsed(now) stats.msec_since_disconnect = self.get_last_disconnect_elapsed(now) stats.total_connected_duration = self.total_connected_duration if self.is_connected(): stats.total_connected_duration += ( self.get_last_connect_elapsed(now)) stats.n_attempted_connections = self.n_attempted_connections stats.n_successful_connections = self.n_successful_connections stats.state = self.state.name stats.state_elapsed = now - self.state_entered return stats def __may_retry(self): if self.max_tries is None: return True elif self.max_tries > 0: self.max_tries -= 1 return True else: return False openvswitch-2.0.1+git20140120/python/ovs/socket_util.py000066400000000000000000000173211226605124000225050ustar00rootroot00000000000000# Copyright (c) 2010, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import errno import os import select import socket import sys import ovs.fatal_signal import ovs.poller import ovs.vlog vlog = ovs.vlog.Vlog("socket_util") def make_unix_socket(style, nonblock, bind_path, connect_path): """Creates a Unix domain socket in the given 'style' (either socket.SOCK_DGRAM or socket.SOCK_STREAM) that is bound to 'bind_path' (if 'bind_path' is not None) and connected to 'connect_path' (if 'connect_path' is not None). If 'nonblock' is true, the socket is made non-blocking. Returns (error, socket): on success 'error' is 0 and 'socket' is a new socket object, on failure 'error' is a positive errno value and 'socket' is None.""" try: sock = socket.socket(socket.AF_UNIX, style) except socket.error, e: return get_exception_errno(e), None try: if nonblock: set_nonblocking(sock) if bind_path is not None: # Delete bind_path but ignore ENOENT. try: os.unlink(bind_path) except OSError, e: if e.errno != errno.ENOENT: return e.errno, None ovs.fatal_signal.add_file_to_unlink(bind_path) sock.bind(bind_path) try: if sys.hexversion >= 0x02060000: os.fchmod(sock.fileno(), 0700) else: os.chmod("/dev/fd/%d" % sock.fileno(), 0700) except OSError, e: pass if connect_path is not None: try: sock.connect(connect_path) except socket.error, e: if get_exception_errno(e) != errno.EINPROGRESS: raise return 0, sock except socket.error, e: sock.close() if (bind_path is not None and os.path.exists(bind_path)): ovs.fatal_signal.unlink_file_now(bind_path) eno = ovs.socket_util.get_exception_errno(e) if (eno == "AF_UNIX path too long" and os.uname()[0] == "Linux"): short_connect_path = None short_bind_path = None connect_dirfd = None bind_dirfd = None # Try workaround using /proc/self/fd if connect_path is not None: dirname = os.path.dirname(connect_path) basename = os.path.basename(connect_path) try: connect_dirfd = os.open(dirname, os.O_DIRECTORY | os.O_RDONLY) except OSError, err: return get_exception_errno(err), None short_connect_path = "/proc/self/fd/%d/%s" % (connect_dirfd, basename) if bind_path is not None: dirname = os.path.dirname(bind_path) basename = os.path.basename(bind_path) try: bind_dirfd = os.open(dirname, os.O_DIRECTORY | os.O_RDONLY) except OSError, err: return get_exception_errno(err), None short_bind_path = "/proc/self/fd/%d/%s" % (bind_dirfd, basename) try: return make_unix_socket(style, nonblock, short_bind_path, short_connect_path) finally: if connect_dirfd is not None: os.close(connect_dirfd) if bind_dirfd is not None: os.close(bind_dirfd) else: return get_exception_errno(e), None def check_connection_completion(sock): p = ovs.poller.SelectPoll() p.register(sock, ovs.poller.POLLOUT) pfds = p.poll(0) if len(pfds) == 1: revents = pfds[0][1] if revents & ovs.poller.POLLERR: try: # The following should raise an exception. socket.send("\0", socket.MSG_DONTWAIT) # (Here's where we end up if it didn't.) # XXX rate-limit vlog.err("poll return POLLERR but send succeeded") return errno.EPROTO except socket.error, e: return get_exception_errno(e) else: return 0 else: return errno.EAGAIN def inet_parse_active(target, default_port): address = target.split(":") host_name = address[0] if not host_name: raise ValueError("%s: bad peer name format" % target) if len(address) >= 2: port = int(address[1]) elif default_port: port = default_port else: raise ValueError("%s: port number must be specified" % target) return (host_name, port) def inet_open_active(style, target, default_port, dscp): address = inet_parse_active(target, default_port) try: sock = socket.socket(socket.AF_INET, style, 0) except socket.error, e: return get_exception_errno(e), None try: set_nonblocking(sock) set_dscp(sock, dscp) try: sock.connect(address) except socket.error, e: if get_exception_errno(e) != errno.EINPROGRESS: raise return 0, sock except socket.error, e: sock.close() return get_exception_errno(e), None def get_exception_errno(e): """A lot of methods on Python socket objects raise socket.error, but that exception is documented as having two completely different forms of arguments: either a string or a (errno, string) tuple. We only want the errno.""" if type(e.args) == tuple: return e.args[0] else: return errno.EPROTO null_fd = -1 def get_null_fd(): """Returns a readable and writable fd for /dev/null, if successful, otherwise a negative errno value. The caller must not close the returned fd (because the same fd will be handed out to subsequent callers).""" global null_fd if null_fd < 0: try: null_fd = os.open("/dev/null", os.O_RDWR) except OSError, e: vlog.err("could not open /dev/null: %s" % os.strerror(e.errno)) return -e.errno return null_fd def write_fully(fd, buf): """Returns an (error, bytes_written) tuple where 'error' is 0 on success, otherwise a positive errno value, and 'bytes_written' is the number of bytes that were written before the error occurred. 'error' is 0 if and only if 'bytes_written' is len(buf).""" bytes_written = 0 if len(buf) == 0: return 0, 0 while True: try: retval = os.write(fd, buf) assert retval >= 0 if retval == len(buf): return 0, bytes_written + len(buf) elif retval == 0: vlog.warn("write returned 0") return errno.EPROTO, bytes_written else: bytes_written += retval buf = buf[:retval] except OSError, e: return e.errno, bytes_written def set_nonblocking(sock): try: sock.setblocking(0) except socket.error, e: vlog.err("could not set nonblocking mode on socket: %s" % os.strerror(get_exception_errno(e))) def set_dscp(sock, dscp): if dscp > 63: raise ValueError("Invalid dscp %d" % dscp) val = dscp << 2 sock.setsockopt(socket.IPPROTO_IP, socket.IP_TOS, val) openvswitch-2.0.1+git20140120/python/ovs/stream.py000066400000000000000000000302031226605124000214450ustar00rootroot00000000000000# Copyright (c) 2010, 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import errno import os import socket import ovs.poller import ovs.socket_util import ovs.vlog vlog = ovs.vlog.Vlog("stream") def stream_or_pstream_needs_probes(name): """ 1 if the stream or pstream specified by 'name' needs periodic probes to verify connectivity. For [p]streams which need probes, it can take a long time to notice the connection was dropped. Returns 0 if probes aren't needed, and -1 if 'name' is invalid""" if PassiveStream.is_valid_name(name) or Stream.is_valid_name(name): # Only unix and punix are supported currently. return 0 else: return -1 class Stream(object): """Bidirectional byte stream. Currently only Unix domain sockets are implemented.""" # States. __S_CONNECTING = 0 __S_CONNECTED = 1 __S_DISCONNECTED = 2 # Kinds of events that one might wait for. W_CONNECT = 0 # Connect complete (success or failure). W_RECV = 1 # Data received. W_SEND = 2 # Send buffer room available. _SOCKET_METHODS = {} @staticmethod def register_method(method, cls): Stream._SOCKET_METHODS[method + ":"] = cls @staticmethod def _find_method(name): for method, cls in Stream._SOCKET_METHODS.items(): if name.startswith(method): return cls return None @staticmethod def is_valid_name(name): """Returns True if 'name' is a stream name in the form "TYPE:ARGS" and TYPE is a supported stream type (currently only "unix:" and "tcp:"), otherwise False.""" return bool(Stream._find_method(name)) def __init__(self, socket, name, status): self.socket = socket self.name = name if status == errno.EAGAIN: self.state = Stream.__S_CONNECTING elif status == 0: self.state = Stream.__S_CONNECTED else: self.state = Stream.__S_DISCONNECTED self.error = 0 # Default value of dscp bits for connection between controller and manager. # Value of IPTOS_PREC_INTERNETCONTROL = 0xc0 which is defined # in is used. IPTOS_PREC_INTERNETCONTROL = 0xc0 DSCP_DEFAULT = IPTOS_PREC_INTERNETCONTROL >> 2 @staticmethod def open(name, dscp=DSCP_DEFAULT): """Attempts to connect a stream to a remote peer. 'name' is a connection name in the form "TYPE:ARGS", where TYPE is an active stream class's name and ARGS are stream class-specific. Currently the only supported TYPEs are "unix" and "tcp". Returns (error, stream): on success 'error' is 0 and 'stream' is the new Stream, on failure 'error' is a positive errno value and 'stream' is None. Never returns errno.EAGAIN or errno.EINPROGRESS. Instead, returns 0 and a new Stream. The connect() method can be used to check for successful connection completion.""" cls = Stream._find_method(name) if not cls: return errno.EAFNOSUPPORT, None suffix = name.split(":", 1)[1] if name.startswith("unix:"): suffix = ovs.util.abs_file_name(ovs.dirs.RUNDIR, suffix) error, sock = cls._open(suffix, dscp) if error: return error, None else: status = ovs.socket_util.check_connection_completion(sock) return 0, Stream(sock, name, status) @staticmethod def _open(suffix, dscp): raise NotImplementedError("This method must be overrided by subclass") @staticmethod def open_block((error, stream)): """Blocks until a Stream completes its connection attempt, either succeeding or failing. (error, stream) should be the tuple returned by Stream.open(). Returns a tuple of the same form. Typical usage: error, stream = Stream.open_block(Stream.open("unix:/tmp/socket"))""" if not error: while True: error = stream.connect() if error != errno.EAGAIN: break stream.run() poller = ovs.poller.Poller() stream.run_wait(poller) stream.connect_wait(poller) poller.block() assert error != errno.EINPROGRESS if error and stream: stream.close() stream = None return error, stream def close(self): self.socket.close() def __scs_connecting(self): retval = ovs.socket_util.check_connection_completion(self.socket) assert retval != errno.EINPROGRESS if retval == 0: self.state = Stream.__S_CONNECTED elif retval != errno.EAGAIN: self.state = Stream.__S_DISCONNECTED self.error = retval def connect(self): """Tries to complete the connection on this stream. If the connection is complete, returns 0 if the connection was successful or a positive errno value if it failed. If the connection is still in progress, returns errno.EAGAIN.""" if self.state == Stream.__S_CONNECTING: self.__scs_connecting() if self.state == Stream.__S_CONNECTING: return errno.EAGAIN elif self.state == Stream.__S_CONNECTED: return 0 else: assert self.state == Stream.__S_DISCONNECTED return self.error def recv(self, n): """Tries to receive up to 'n' bytes from this stream. Returns a (error, string) tuple: - If successful, 'error' is zero and 'string' contains between 1 and 'n' bytes of data. - On error, 'error' is a positive errno value. - If the connection has been closed in the normal fashion or if 'n' is 0, the tuple is (0, ""). The recv function will not block waiting for data to arrive. If no data have been received, it returns (errno.EAGAIN, "") immediately.""" retval = self.connect() if retval != 0: return (retval, "") elif n == 0: return (0, "") try: return (0, self.socket.recv(n)) except socket.error, e: return (ovs.socket_util.get_exception_errno(e), "") def send(self, buf): """Tries to send 'buf' on this stream. If successful, returns the number of bytes sent, between 1 and len(buf). 0 is only a valid return value if len(buf) is 0. On error, returns a negative errno value. Will not block. If no bytes can be immediately accepted for transmission, returns -errno.EAGAIN immediately.""" retval = self.connect() if retval != 0: return -retval elif len(buf) == 0: return 0 try: return self.socket.send(buf) except socket.error, e: return -ovs.socket_util.get_exception_errno(e) def run(self): pass def run_wait(self, poller): pass def wait(self, poller, wait): assert wait in (Stream.W_CONNECT, Stream.W_RECV, Stream.W_SEND) if self.state == Stream.__S_DISCONNECTED: poller.immediate_wake() return if self.state == Stream.__S_CONNECTING: wait = Stream.W_CONNECT if wait == Stream.W_RECV: poller.fd_wait(self.socket, ovs.poller.POLLIN) else: poller.fd_wait(self.socket, ovs.poller.POLLOUT) def connect_wait(self, poller): self.wait(poller, Stream.W_CONNECT) def recv_wait(self, poller): self.wait(poller, Stream.W_RECV) def send_wait(self, poller): self.wait(poller, Stream.W_SEND) def __del__(self): # Don't delete the file: we might have forked. self.socket.close() class PassiveStream(object): @staticmethod def is_valid_name(name): """Returns True if 'name' is a passive stream name in the form "TYPE:ARGS" and TYPE is a supported passive stream type (currently only "punix:"), otherwise False.""" return name.startswith("punix:") def __init__(self, sock, name, bind_path): self.name = name self.socket = sock self.bind_path = bind_path @staticmethod def open(name): """Attempts to start listening for remote stream connections. 'name' is a connection name in the form "TYPE:ARGS", where TYPE is an passive stream class's name and ARGS are stream class-specific. Currently the only supported TYPE is "punix". Returns (error, pstream): on success 'error' is 0 and 'pstream' is the new PassiveStream, on failure 'error' is a positive errno value and 'pstream' is None.""" if not PassiveStream.is_valid_name(name): return errno.EAFNOSUPPORT, None bind_path = name[6:] if name.startswith("punix:"): bind_path = ovs.util.abs_file_name(ovs.dirs.RUNDIR, bind_path) error, sock = ovs.socket_util.make_unix_socket(socket.SOCK_STREAM, True, bind_path, None) if error: return error, None try: sock.listen(10) except socket.error, e: vlog.err("%s: listen: %s" % (name, os.strerror(e.error))) sock.close() return e.error, None return 0, PassiveStream(sock, name, bind_path) def close(self): """Closes this PassiveStream.""" self.socket.close() if self.bind_path is not None: ovs.fatal_signal.unlink_file_now(self.bind_path) self.bind_path = None def accept(self): """Tries to accept a new connection on this passive stream. Returns (error, stream): if successful, 'error' is 0 and 'stream' is the new Stream object, and on failure 'error' is a positive errno value and 'stream' is None. Will not block waiting for a connection. If no connection is ready to be accepted, returns (errno.EAGAIN, None) immediately.""" while True: try: sock, addr = self.socket.accept() ovs.socket_util.set_nonblocking(sock) return 0, Stream(sock, "unix:%s" % addr, 0) except socket.error, e: error = ovs.socket_util.get_exception_errno(e) if error != errno.EAGAIN: # XXX rate-limit vlog.dbg("accept: %s" % os.strerror(error)) return error, None def wait(self, poller): poller.fd_wait(self.socket, ovs.poller.POLLIN) def __del__(self): # Don't delete the file: we might have forked. self.socket.close() def usage(name): return """ Active %s connection methods: unix:FILE Unix domain socket named FILE tcp:IP:PORT TCP socket to IP with port no of PORT Passive %s connection methods: punix:FILE Listen on Unix domain socket FILE""" % (name, name) class UnixStream(Stream): @staticmethod def _open(suffix, dscp): connect_path = suffix return ovs.socket_util.make_unix_socket(socket.SOCK_STREAM, True, None, connect_path) Stream.register_method("unix", UnixStream) class TCPStream(Stream): @staticmethod def _open(suffix, dscp): error, sock = ovs.socket_util.inet_open_active(socket.SOCK_STREAM, suffix, 0, dscp) if not error: sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) return error, sock Stream.register_method("tcp", TCPStream) openvswitch-2.0.1+git20140120/python/ovs/timeval.py000066400000000000000000000014471226605124000216230ustar00rootroot00000000000000# Copyright (c) 2009, 2010 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import time def msec(): """Returns the current time, as the amount of time since the epoch, in milliseconds, as a float.""" return time.time() * 1000.0 def postfork(): # Just a stub for now pass openvswitch-2.0.1+git20140120/python/ovs/unixctl/000077500000000000000000000000001226605124000212705ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/python/ovs/unixctl/__init__.py000066400000000000000000000055261226605124000234110ustar00rootroot00000000000000# Copyright (c) 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import types import ovs.util commands = {} strtypes = types.StringTypes class _UnixctlCommand(object): def __init__(self, usage, min_args, max_args, callback, aux): self.usage = usage self.min_args = min_args self.max_args = max_args self.callback = callback self.aux = aux def _unixctl_help(conn, unused_argv, unused_aux): reply = "The available commands are:\n" command_names = sorted(commands.keys()) for name in command_names: reply += " " usage = commands[name].usage if usage: reply += "%-23s %s" % (name, usage) else: reply += name reply += "\n" conn.reply(reply) def command_register(name, usage, min_args, max_args, callback, aux): """ Registers a command with the given 'name' to be exposed by the UnixctlServer. 'usage' describes the arguments to the command; it is used only for presentation to the user in "help" output. 'callback' is called when the command is received. It is passed a UnixctlConnection object, the list of arguments as unicode strings, and 'aux'. Normally 'callback' should reply by calling UnixctlConnection.reply() or UnixctlConnection.reply_error() before it returns, but if the command cannot be handled immediately, then it can defer the reply until later. A given connection can only process a single request at a time, so a reply must be made eventually to avoid blocking that connection.""" assert isinstance(name, strtypes) assert isinstance(usage, strtypes) assert isinstance(min_args, int) assert isinstance(max_args, int) assert isinstance(callback, types.FunctionType) if name not in commands: commands[name] = _UnixctlCommand(usage, min_args, max_args, callback, aux) def socket_name_from_target(target): assert isinstance(target, strtypes) if target.startswith("/"): return 0, target pidfile_name = "%s/%s.pid" % (ovs.dirs.RUNDIR, target) pid = ovs.daemon.read_pidfile(pidfile_name) if pid < 0: return -pid, "cannot read pidfile \"%s\"" % pidfile_name return 0, "%s/%s.%d.ctl" % (ovs.dirs.RUNDIR, target, pid) command_register("help", "", 0, 0, _unixctl_help, None) openvswitch-2.0.1+git20140120/python/ovs/unixctl/client.py000066400000000000000000000040351226605124000231220ustar00rootroot00000000000000# Copyright (c) 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import copy import errno import os import types import ovs.jsonrpc import ovs.stream import ovs.util vlog = ovs.vlog.Vlog("unixctl_client") strtypes = types.StringTypes class UnixctlClient(object): def __init__(self, conn): assert isinstance(conn, ovs.jsonrpc.Connection) self._conn = conn def transact(self, command, argv): assert isinstance(command, strtypes) assert isinstance(argv, list) for arg in argv: assert isinstance(arg, strtypes) request = ovs.jsonrpc.Message.create_request(command, argv) error, reply = self._conn.transact_block(request) if error: vlog.warn("error communicating with %s: %s" % (self._conn.name, os.strerror(error))) return error, None, None if reply.error is not None: return 0, str(reply.error), None else: assert reply.result is not None return 0, None, str(reply.result) def close(self): self._conn.close() self.conn = None @staticmethod def create(path): assert isinstance(path, str) unix = "unix:%s" % ovs.util.abs_file_name(ovs.dirs.RUNDIR, path) error, stream = ovs.stream.Stream.open_block( ovs.stream.Stream.open(unix)) if error: vlog.warn("failed to connect to %s" % path) return error, None return 0, UnixctlClient(ovs.jsonrpc.Connection(stream)) openvswitch-2.0.1+git20140120/python/ovs/unixctl/server.py000066400000000000000000000170511226605124000231540ustar00rootroot00000000000000# Copyright (c) 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import copy import errno import os import types import ovs.dirs import ovs.jsonrpc import ovs.stream import ovs.unixctl import ovs.util import ovs.version import ovs.vlog Message = ovs.jsonrpc.Message vlog = ovs.vlog.Vlog("unixctl_server") strtypes = types.StringTypes class UnixctlConnection(object): def __init__(self, rpc): assert isinstance(rpc, ovs.jsonrpc.Connection) self._rpc = rpc self._request_id = None def run(self): self._rpc.run() error = self._rpc.get_status() if error or self._rpc.get_backlog(): return error for _ in range(10): if error or self._request_id: break error, msg = self._rpc.recv() if msg: if msg.type == Message.T_REQUEST: self._process_command(msg) else: # XXX: rate-limit vlog.warn("%s: received unexpected %s message" % (self._rpc.name, Message.type_to_string(msg.type))) error = errno.EINVAL if not error: error = self._rpc.get_status() return error def reply(self, body): self._reply_impl(True, body) def reply_error(self, body): self._reply_impl(False, body) # Called only by unixctl classes. def _close(self): self._rpc.close() self._request_id = None def _wait(self, poller): self._rpc.wait(poller) if not self._rpc.get_backlog(): self._rpc.recv_wait(poller) def _reply_impl(self, success, body): assert isinstance(success, bool) assert body is None or isinstance(body, strtypes) assert self._request_id is not None if body is None: body = "" if body and not body.endswith("\n"): body += "\n" if success: reply = Message.create_reply(body, self._request_id) else: reply = Message.create_error(body, self._request_id) self._rpc.send(reply) self._request_id = None def _process_command(self, request): assert isinstance(request, ovs.jsonrpc.Message) assert request.type == ovs.jsonrpc.Message.T_REQUEST self._request_id = request.id error = None params = request.params method = request.method command = ovs.unixctl.commands.get(method) if command is None: error = '"%s" is not a valid command' % method elif len(params) < command.min_args: error = '"%s" command requires at least %d arguments' \ % (method, command.min_args) elif len(params) > command.max_args: error = '"%s" command takes at most %d arguments' \ % (method, command.max_args) else: for param in params: if not isinstance(param, strtypes): error = '"%s" command has non-string argument' % method break if error is None: unicode_params = [unicode(p) for p in params] command.callback(self, unicode_params, command.aux) if error: self.reply_error(error) def _unixctl_version(conn, unused_argv, version): assert isinstance(conn, UnixctlConnection) version = "%s (Open vSwitch) %s" % (ovs.util.PROGRAM_NAME, version) conn.reply(version) class UnixctlServer(object): def __init__(self, listener): assert isinstance(listener, ovs.stream.PassiveStream) self._listener = listener self._conns = [] def run(self): for _ in range(10): error, stream = self._listener.accept() if not error: rpc = ovs.jsonrpc.Connection(stream) self._conns.append(UnixctlConnection(rpc)) elif error == errno.EAGAIN: break else: # XXX: rate-limit vlog.warn("%s: accept failed: %s" % (self._listener.name, os.strerror(error))) for conn in copy.copy(self._conns): error = conn.run() if error and error != errno.EAGAIN: conn._close() self._conns.remove(conn) def wait(self, poller): self._listener.wait(poller) for conn in self._conns: conn._wait(poller) def close(self): for conn in self._conns: conn._close() self._conns = None self._listener.close() self._listener = None @staticmethod def create(path, version=None): """Creates a new UnixctlServer which listens on a unixctl socket created at 'path'. If 'path' is None, the default path is chosen. 'version' contains the version of the server as reported by the unixctl version command. If None, ovs.version.VERSION is used.""" assert path is None or isinstance(path, strtypes) if path is not None: path = "punix:%s" % ovs.util.abs_file_name(ovs.dirs.RUNDIR, path) else: path = "punix:%s/%s.%d.ctl" % (ovs.dirs.RUNDIR, ovs.util.PROGRAM_NAME, os.getpid()) if version is None: version = ovs.version.VERSION error, listener = ovs.stream.PassiveStream.open(path) if error: ovs.util.ovs_error(error, "could not initialize control socket %s" % path) return error, None ovs.unixctl.command_register("version", "", 0, 0, _unixctl_version, version) return 0, UnixctlServer(listener) class UnixctlClient(object): def __init__(self, conn): assert isinstance(conn, ovs.jsonrpc.Connection) self._conn = conn def transact(self, command, argv): assert isinstance(command, strtypes) assert isinstance(argv, list) for arg in argv: assert isinstance(arg, strtypes) request = Message.create_request(command, argv) error, reply = self._conn.transact_block(request) if error: vlog.warn("error communicating with %s: %s" % (self._conn.name, os.strerror(error))) return error, None, None if reply.error is not None: return 0, str(reply.error), None else: assert reply.result is not None return 0, None, str(reply.result) def close(self): self._conn.close() self.conn = None @staticmethod def create(path): assert isinstance(path, str) unix = "unix:%s" % ovs.util.abs_file_name(ovs.dirs.RUNDIR, path) error, stream = ovs.stream.Stream.open_block( ovs.stream.Stream.open(unix)) if error: vlog.warn("failed to connect to %s" % path) return error, None return 0, UnixctlClient(ovs.jsonrpc.Connection(stream)) openvswitch-2.0.1+git20140120/python/ovs/util.py000066400000000000000000000057271226605124000211440ustar00rootroot00000000000000# Copyright (c) 2010, 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import os.path import sys PROGRAM_NAME = os.path.basename(sys.argv[0]) EOF = -1 def abs_file_name(dir_, file_name): """If 'file_name' starts with '/', returns a copy of 'file_name'. Otherwise, returns an absolute path to 'file_name' considering it relative to 'dir_', which itself must be absolute. 'dir_' may be None or the empty string, in which case the current working directory is used. Returns None if 'dir_' is None and getcwd() fails. This differs from os.path.abspath() in that it will never change the meaning of a file name.""" if file_name.startswith('/'): return file_name else: if dir_ is None or dir_ == "": try: dir_ = os.getcwd() except OSError: return None if dir_.endswith('/'): return dir_ + file_name else: return "%s/%s" % (dir_, file_name) def ovs_retval_to_string(retval): """Many OVS functions return an int which is one of: - 0: no error yet - >0: errno value - EOF: end of file (not necessarily an error; depends on the function called) Returns the appropriate human-readable string.""" if not retval: return "" if retval > 0: return os.strerror(retval) if retval == EOF: return "End of file" return "***unknown return value: %s***" % retval def ovs_error(err_no, message, vlog=None): """Prints 'message' on stderr and emits an ERROR level log message to 'vlog' if supplied. If 'err_no' is nonzero, then it is formatted with ovs_retval_to_string() and appended to the message inside parentheses. 'message' should not end with a new-line, because this function will add one itself.""" err_msg = "%s: %s" % (PROGRAM_NAME, message) if err_no: err_msg += " (%s)" % ovs_retval_to_string(err_no) sys.stderr.write("%s\n" % err_msg) if vlog: vlog.err(err_msg) def ovs_fatal(*args, **kwargs): """Prints 'message' on stderr and emits an ERROR level log message to 'vlog' if supplied. If 'err_no' is nonzero, then it is formatted with ovs_retval_to_string() and appended to the message inside parentheses. Then, terminates with exit code 1 (indicating a failure). 'message' should not end with a new-line, because this function will add one itself.""" ovs_error(*args, **kwargs) sys.exit(1) openvswitch-2.0.1+git20140120/python/ovs/vlog.py000066400000000000000000000221541226605124000211270ustar00rootroot00000000000000 # Copyright (c) 2011, 2012, 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import datetime import logging import logging.handlers import re import socket import sys import ovs.dirs import ovs.unixctl import ovs.util FACILITIES = {"console": "info", "file": "info", "syslog": "info"} LEVELS = { "dbg": logging.DEBUG, "info": logging.INFO, "warn": logging.WARNING, "err": logging.ERROR, "emer": logging.CRITICAL, "off": logging.CRITICAL } def get_level(level_str): return LEVELS.get(level_str.lower()) class Vlog: __inited = False __msg_num = 0 __mfl = {} # Module -> facility -> level __log_file = None __file_handler = None def __init__(self, name): """Creates a new Vlog object representing a module called 'name'. The created Vlog object will do nothing until the Vlog.init() static method is called. Once called, no more Vlog objects may be created.""" assert not Vlog.__inited self.name = name.lower() if name not in Vlog.__mfl: Vlog.__mfl[self.name] = FACILITIES.copy() def __log(self, level, message, **kwargs): if not Vlog.__inited: return dt = datetime.datetime.utcnow(); now = dt.strftime("%Y-%m-%dT%H:%M:%S.%%03iZ") % (dt.microsecond/1000) syslog_message = ("%s|%s|%s|%s" % (Vlog.__msg_num, self.name, level, message)) level = LEVELS.get(level.lower(), logging.DEBUG) Vlog.__msg_num += 1 for f, f_level in Vlog.__mfl[self.name].iteritems(): f_level = LEVELS.get(f_level, logging.CRITICAL) if level >= f_level: if f == "syslog": message = "ovs|" + syslog_message else: message = "%s|%s" % (now, syslog_message) logging.getLogger(f).log(level, message, **kwargs) def emer(self, message, **kwargs): self.__log("EMER", message, **kwargs) def err(self, message, **kwargs): self.__log("ERR", message, **kwargs) def warn(self, message, **kwargs): self.__log("WARN", message, **kwargs) def info(self, message, **kwargs): self.__log("INFO", message, **kwargs) def dbg(self, message, **kwargs): self.__log("DBG", message, **kwargs) def exception(self, message): """Logs 'message' at ERR log level. Includes a backtrace when in exception context.""" self.err(message, exc_info=True) @staticmethod def init(log_file=None): """Intializes the Vlog module. Causes Vlog to write to 'log_file' if not None. Should be called after all Vlog objects have been created. No logging will occur until this function is called.""" if Vlog.__inited: return Vlog.__inited = True logging.raiseExceptions = False Vlog.__log_file = log_file for f in FACILITIES: logger = logging.getLogger(f) logger.setLevel(logging.DEBUG) try: if f == "console": logger.addHandler(logging.StreamHandler(sys.stderr)) elif f == "syslog": logger.addHandler(logging.handlers.SysLogHandler( address="/dev/log", facility=logging.handlers.SysLogHandler.LOG_DAEMON)) elif f == "file" and Vlog.__log_file: Vlog.__file_handler = logging.FileHandler(Vlog.__log_file) logger.addHandler(Vlog.__file_handler) except (IOError, socket.error): logger.setLevel(logging.CRITICAL) ovs.unixctl.command_register("vlog/reopen", "", 0, 0, Vlog._unixctl_vlog_reopen, None) ovs.unixctl.command_register("vlog/set", "spec", 1, sys.maxint, Vlog._unixctl_vlog_set, None) ovs.unixctl.command_register("vlog/list", "", 0, 0, Vlog._unixctl_vlog_list, None) @staticmethod def set_level(module, facility, level): """ Sets the log level of the 'module'-'facility' tuple to 'level'. All three arguments are strings which are interpreted the same as arguments to the --verbose flag. Should be called after all Vlog objects have already been created.""" module = module.lower() facility = facility.lower() level = level.lower() if facility != "any" and facility not in FACILITIES: return if module != "any" and module not in Vlog.__mfl: return if level not in LEVELS: return if module == "any": modules = Vlog.__mfl.keys() else: modules = [module] if facility == "any": facilities = FACILITIES.keys() else: facilities = [facility] for m in modules: for f in facilities: Vlog.__mfl[m][f] = level @staticmethod def set_levels_from_string(s): module = None level = None facility = None for word in [w.lower() for w in re.split('[ :]', s)]: if word == "any": pass elif word in FACILITIES: if facility: return "cannot specify multiple facilities" facility = word elif word in LEVELS: if level: return "cannot specify multiple levels" level = word elif word in Vlog.__mfl: if module: return "cannot specify multiple modules" module = word else: return "no facility, level, or module \"%s\"" % word Vlog.set_level(module or "any", facility or "any", level or "any") @staticmethod def get_levels(): lines = [" console syslog file\n", " ------- ------ ------\n"] lines.extend(sorted(["%-16s %4s %4s %4s\n" % (m, Vlog.__mfl[m]["console"], Vlog.__mfl[m]["syslog"], Vlog.__mfl[m]["file"]) for m in Vlog.__mfl])) return ''.join(lines) @staticmethod def reopen_log_file(): """Closes and then attempts to re-open the current log file. (This is useful just after log rotation, to ensure that the new log file starts being used.)""" if Vlog.__log_file: logger = logging.getLogger("file") logger.removeHandler(Vlog.__file_handler) Vlog.__file_handler = logging.FileHandler(Vlog.__log_file) logger.addHandler(Vlog.__file_handler) @staticmethod def _unixctl_vlog_reopen(conn, unused_argv, unused_aux): if Vlog.__log_file: Vlog.reopen_log_file() conn.reply(None) else: conn.reply("Logging to file not configured") @staticmethod def _unixctl_vlog_set(conn, argv, unused_aux): for arg in argv: msg = Vlog.set_levels_from_string(arg) if msg: conn.reply(msg) return conn.reply(None) @staticmethod def _unixctl_vlog_list(conn, unused_argv, unused_aux): conn.reply(Vlog.get_levels()) def add_args(parser): """Adds vlog related options to 'parser', an ArgumentParser object. The resulting arguments parsed by 'parser' should be passed to handle_args.""" group = parser.add_argument_group(title="Logging Options") group.add_argument("--log-file", nargs="?", const="default", help="Enables logging to a file. Default log file" " is used if LOG_FILE is omitted.") group.add_argument("-v", "--verbose", nargs="*", help="Sets logging levels, see ovs-vswitchd(8)." " Defaults to dbg.") def handle_args(args): """ Handles command line arguments ('args') parsed by an ArgumentParser. The ArgumentParser should have been primed by add_args(). Also takes care of initializing the Vlog module.""" log_file = args.log_file if log_file == "default": log_file = "%s/%s.log" % (ovs.dirs.LOGDIR, ovs.util.PROGRAM_NAME) if args.verbose is None: args.verbose = [] elif args.verbose == []: args.verbose = ["any:any:dbg"] for verbose in args.verbose: msg = Vlog.set_levels_from_string(verbose) if msg: ovs.util.ovs_fatal(0, "processing \"%s\": %s" % (verbose, msg)) Vlog.init(log_file) openvswitch-2.0.1+git20140120/python/ovstest/000077500000000000000000000000001226605124000205025ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/python/ovstest/__init__.py000066400000000000000000000000461226605124000226130ustar00rootroot00000000000000# This file intentionally left blank. openvswitch-2.0.1+git20140120/python/ovstest/args.py000066400000000000000000000257121226605124000220170ustar00rootroot00000000000000# Copyright (c) 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ ovsargs provide argument parsing for ovs-test utility """ import argparse import re import socket import sys CONTROL_PORT = 15531 DATA_PORT = 15532 def ip_address(string): """Verifies if string is a valid IP address""" try: socket.inet_aton(string) except socket.error: raise argparse.ArgumentTypeError("Not a valid IPv4 address") return string def ip_optional_mask(string): """ Verifies if string contains a valid IP address and an optional mask in CIDR notation. """ token = string.split("/") if len(token) > 2: raise argparse.ArgumentTypeError("IP address and netmask must be " "separated by a single slash") elif len(token) == 2: try: mask = int(token[1]) except ValueError: raise argparse.ArgumentTypeError("Netmask is not a valid integer") if mask < 0 or mask > 31: raise argparse.ArgumentTypeError("Netmask must be in range 0..31") ip_address(token[0]) return string def port(string): """Convert a string into a TCP/UDP Port (integer)""" try: port_number = int(string) if port_number < 1 or port_number > 65535: raise argparse.ArgumentTypeError("Port is out of range") except ValueError: raise argparse.ArgumentTypeError("Port is not an integer") return port_number def ip_optional_port(string, default_port, ip_callback): """Convert a string into IP and Port pair. If port was absent then use default_port as the port. The third argument is a callback that verifies whether IP address is given in correct format.""" value = string.split(':') if len(value) == 1: return (ip_callback(value[0]), default_port) elif len(value) == 2: return (ip_callback(value[0]), port(value[1])) else: raise argparse.ArgumentTypeError("IP address from the optional Port " "must be colon-separated") def ip_optional_port_port(string, default_port1, default_port2, ip_callback): """Convert a string into IP, Port1, Port2 tuple. If any of ports were missing, then default ports will be used. The fourth argument is a callback that verifies whether IP address is given in the expected format.""" value = string.split(':') if len(value) == 1: return (ip_callback(value[0]), default_port1, default_port2) elif len(value) == 2: return (ip_callback(value[0]), port(value[1]), default_port2) elif len(value) == 3: return (ip_callback(value[0]), port(value[1]), port(value[2])) else: raise argparse.ArgumentTypeError("Expected IP address and at most " "two colon-separated ports") def vlan_tag(string): """ This function verifies whether given string is a correct VLAN tag. """ try: value = int(string) except ValueError: raise argparse.ArgumentTypeError("VLAN tag is not a valid integer") if value < 1 or value > 4094: raise argparse.ArgumentTypeError("Not a valid VLAN tag. " "VLAN tag should be in the " "range 1..4094.") return string def server_endpoint(string): """Converts a string OuterIP[:OuterPort],InnerIP[/Mask][:InnerPort] into a 4-tuple, where: 1. First element is OuterIP 2. Second element is OuterPort (if omitted will use default value 15531) 3 Third element is InnerIP with optional mask 4. Fourth element is InnerPort (if omitted will use default value 15532) """ value = string.split(',') if len(value) == 2: ret1 = ip_optional_port(value[0], CONTROL_PORT, ip_address) ret2 = ip_optional_port(value[1], DATA_PORT, ip_optional_mask) return (ret1[0], ret1[1], ret2[0], ret2[1]) else: raise argparse.ArgumentTypeError("OuterIP:OuterPort and InnerIP/Mask:" "InnerPort must be comma separated") class UniqueServerAction(argparse.Action): """ This custom action class will prevent user from entering multiple ovs-test servers with the same OuterIP. If there is an server with 127.0.0.1 outer IP address then it will be inserted in the front of the list. """ def __call__(self, parser, namespace, values, option_string=None): outer_ips = set() endpoints = [] for server in values: try: endpoint = server_endpoint(server) except argparse.ArgumentTypeError: raise argparse.ArgumentError(self, str(sys.exc_info()[1])) if endpoint[0] in outer_ips: raise argparse.ArgumentError(self, "Duplicate OuterIPs found") else: outer_ips.add(endpoint[0]) if endpoint[0] == "127.0.0.1": endpoints.insert(0, endpoint) else: endpoints.append(endpoint) setattr(namespace, self.dest, endpoints) def bandwidth(string): """Convert a string (given in bits/second with optional magnitude for units) into a long (bytes/second)""" if re.match("^[1-9][0-9]*[MK]?$", string) is None: raise argparse.ArgumentTypeError("Not a valid target bandwidth") bwidth = string.replace("M", "000000") bwidth = bwidth.replace("K", "000") return long(bwidth) / 8 # Convert from bits to bytes def tunnel_types(string): """ This function converts a string into a list that contains all tunnel types that user intended to test. """ return string.split(',') def l3_endpoint_client(string): """ This function parses command line argument string in remoteIP,localInnerIP[/mask][:ControlPort[:TestPort]],remoteInnerIP[: ControlPort[:TestPort]] format. """ try: remote_ip, me, he = string.split(',') except ValueError: raise argparse.ArgumentTypeError("All 3 IP addresses must be comma " "separated.") r = (ip_address(remote_ip), ip_optional_port_port(me, CONTROL_PORT, DATA_PORT, ip_optional_mask), ip_optional_port_port(he, CONTROL_PORT, DATA_PORT, ip_address)) return r def l3_endpoint_server(string): """ This function parses a command line argument string in remoteIP,localInnerIP[/mask][:ControlPort] format. """ try: remote_ip, me = string.split(',') except ValueError: raise argparse.ArgumentTypeError("Both IP addresses must be comma " "separated.") return (ip_address(remote_ip), ip_optional_port(me, CONTROL_PORT, ip_optional_mask)) def ovs_initialize_args(): """ Initialize argument parsing for ovs-test utility. """ parser = argparse.ArgumentParser(description='Test connectivity ' 'between two Open vSwitches.') parser.add_argument('-v', '--version', action='version', version='ovs-test (Open vSwitch) @VERSION@') parser.add_argument("-b", "--bandwidth", action='store', dest="targetBandwidth", default="1M", type=bandwidth, help='Target bandwidth for UDP tests in bits/second. Use ' 'postfix M or K to alter unit magnitude.') parser.add_argument("-i", "--interval", action='store', dest="testInterval", default=5, type=int, help='Interval for how long to run each test in seconds.') parser.add_argument("-t", "--tunnel-modes", action='store', dest="tunnelModes", default=(), type=tunnel_types, help='Do L3 tests with the given tunnel modes.') parser.add_argument("-l", "--vlan-tag", action='store', dest="vlanTag", default=None, type=vlan_tag, help='Do VLAN tests and use the given VLAN tag.') parser.add_argument("-d", "--direct", action='store_true', dest="direct", default=None, help='Do direct tests between both ovs-test servers.') group = parser.add_mutually_exclusive_group(required=True) group.add_argument("-s", "--server", action="store", dest="port", type=port, help='Run in server mode and wait for the client to ' 'connect to this port.') group.add_argument('-c', "--client", nargs=2, dest="servers", action=UniqueServerAction, metavar=("SERVER1", "SERVER2"), help='Run in client mode and do tests between these ' 'two ovs-test servers. Each server must be specified in ' 'following format - OuterIP:OuterPort,InnerIP[/mask] ' ':InnerPort. It is possible to start local instance of ' 'ovs-test server in the client mode by using 127.0.0.1 as ' 'OuterIP.') return parser.parse_args() def l3_initialize_args(): """ Initialize argument parsing for ovs-l3ping utility. """ parser = argparse.ArgumentParser(description='Test L3 tunnel ' 'connectivity between two Open vSwitch instances.') parser.add_argument('-v', '--version', action='version', version='ovs-l3ping (Open vSwitch) @VERSION@') parser.add_argument("-b", "--bandwidth", action='store', dest="targetBandwidth", default="1M", type=bandwidth, help='Target bandwidth for UDP tests in bits/second. Use ' 'postfix M or K to alter unit magnitude.') parser.add_argument("-i", "--interval", action='store', dest="testInterval", default=5, type=int, help='Interval for how long to run each test in seconds.') parser.add_argument("-t", "--tunnel-mode", action='store', dest="tunnelMode", required=True, help='Do L3 tests with this tunnel type.') group = parser.add_mutually_exclusive_group(required=True) group.add_argument("-s", "--server", action="store", dest="server", metavar="TUNNELIP,SERVER", type=l3_endpoint_server, help='Run in server mode and wait for the client to ' 'connect.') group.add_argument('-c', "--client", action="store", dest="client", metavar="TUNNELIP,CLIENT,SERVER", type=l3_endpoint_client, help='Run in client mode and connect to the server.') return parser.parse_args() openvswitch-2.0.1+git20140120/python/ovstest/rpcserver.py000066400000000000000000000264711226605124000231010ustar00rootroot00000000000000# Copyright (c) 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ rpcserver is an XML RPC server that allows RPC client to initiate tests """ import exceptions import sys import xmlrpclib from twisted.internet import reactor from twisted.internet.error import CannotListenError from twisted.web import xmlrpc from twisted.web import server import tcp import udp import util import vswitch class TestArena(xmlrpc.XMLRPC): """ This class contains all the functions that ovs-test client will call remotely. The caller is responsible to use designated handleIds for designated methods (e.g. do not mix UDP and TCP handles). """ def __init__(self): xmlrpc.XMLRPC.__init__(self, allowNone=True) self.handle_id = 1 self.handle_map = {} self.bridges = set() self.pbridges = set() self.ports = set() self.request = None def __acquire_handle(self, value): """ Allocates new handle and assigns value object to it """ handle = self.handle_id self.handle_map[handle] = value self.handle_id += 1 return handle def __get_handle_resources(self, handle): """ Return resources that were assigned to handle """ return self.handle_map[handle] def __delete_handle(self, handle): """ Releases handle from handle_map """ del self.handle_map[handle] def cleanup(self): """ Delete all remaining bridges and ports if ovs-test client did not had a chance to remove them. It is necessary to call this function if ovs-test server is abruptly terminated when doing the tests. """ for port in self.ports: # Remove ports that were added to existing bridges vswitch.ovs_vsctl_del_port_from_bridge(port) for bridge in self.bridges: # Remove bridges that were added for L3 tests vswitch.ovs_vsctl_del_bridge(bridge) for pbridge in self.pbridges: # Remove bridges that were added for VLAN tests vswitch.ovs_vsctl_del_pbridge(pbridge[0], pbridge[1]) def render(self, request): """ This method overrides the original XMLRPC.render method so that it would be possible to get the XML RPC client IP address from the request object. """ self.request = request return xmlrpc.XMLRPC.render(self, request) def xmlrpc_get_my_address(self): """ Returns the RPC client's IP address. """ return self.request.getClientIP() def xmlrpc_get_my_address_from(self, his_ip, his_port): """ Returns the ovs-test server IP address that the other ovs-test server with the given ip will see. """ server1 = xmlrpclib.Server("http://%s:%u/" % (his_ip, his_port)) return server1.get_my_address() def xmlrpc_create_udp_listener(self, port): """ Creates a UDP listener that will receive packets from UDP sender """ try: listener = udp.UdpListener() reactor.listenUDP(port, listener) handle_id = self.__acquire_handle(listener) except CannotListenError: return -1 return handle_id def xmlrpc_create_udp_sender(self, host, count, size, duration): """ Send UDP datagrams to UDP listener """ sender = udp.UdpSender(tuple(host), count, size, duration) reactor.listenUDP(0, sender) handle_id = self.__acquire_handle(sender) return handle_id def xmlrpc_get_udp_listener_results(self, handle): """ Returns number of datagrams that were received """ listener = self.__get_handle_resources(handle) return listener.getResults() def xmlrpc_get_udp_sender_results(self, handle): """ Returns number of datagrams that were sent """ sender = self.__get_handle_resources(handle) return sender.getResults() def xmlrpc_close_udp_listener(self, handle): """ Releases UdpListener and all its resources """ listener = self.__get_handle_resources(handle) listener.transport.stopListening() self.__delete_handle(handle) return 0 def xmlrpc_close_udp_sender(self, handle): """ Releases UdpSender and all its resources """ sender = self.__get_handle_resources(handle) sender.transport.stopListening() self.__delete_handle(handle) return 0 def xmlrpc_create_tcp_listener(self, port): """ Creates a TcpListener that will accept connection from TcpSender """ try: listener = tcp.TcpListenerFactory() port = reactor.listenTCP(port, listener) handle_id = self.__acquire_handle((listener, port)) return handle_id except CannotListenError: return -1 def xmlrpc_create_tcp_sender(self, his_ip, his_port, duration): """ Creates a TcpSender that will connect to TcpListener """ sender = tcp.TcpSenderFactory(duration) connector = reactor.connectTCP(his_ip, his_port, sender) handle_id = self.__acquire_handle((sender, connector)) return handle_id def xmlrpc_get_tcp_listener_results(self, handle): """ Returns number of bytes received """ (listener, _) = self.__get_handle_resources(handle) return listener.getResults() def xmlrpc_get_tcp_sender_results(self, handle): """ Returns number of bytes sent """ (sender, _) = self.__get_handle_resources(handle) return sender.getResults() def xmlrpc_close_tcp_listener(self, handle): """ Releases TcpListener and all its resources """ try: (_, port) = self.__get_handle_resources(handle) port.loseConnection() self.__delete_handle(handle) except exceptions.KeyError: return -1 return 0 def xmlrpc_close_tcp_sender(self, handle): """ Releases TcpSender and all its resources """ try: (_, connector) = self.__get_handle_resources(handle) connector.disconnect() self.__delete_handle(handle) except exceptions.KeyError: return -1 return 0 def xmlrpc_create_test_bridge(self, bridge, iface): """ This function creates a physical bridge from iface. It moves the IP configuration from the physical interface to the bridge. """ ret = vswitch.ovs_vsctl_add_bridge(bridge) if ret == 0: self.pbridges.add((bridge, iface)) util.interface_up(bridge) (ip_addr, mask) = util.interface_get_ip(iface) util.interface_assign_ip(bridge, ip_addr, mask) util.move_routes(iface, bridge) util.interface_assign_ip(iface, "0.0.0.0", "255.255.255.255") ret = vswitch.ovs_vsctl_add_port_to_bridge(bridge, iface) if ret == 0: self.ports.add(iface) else: util.interface_assign_ip(iface, ip_addr, mask) util.move_routes(bridge, iface) vswitch.ovs_vsctl_del_bridge(bridge) return ret def xmlrpc_del_test_bridge(self, bridge, iface): """ This function deletes the test bridge and moves its IP configuration back to the physical interface. """ ret = vswitch.ovs_vsctl_del_pbridge(bridge, iface) self.pbridges.discard((bridge, iface)) return ret def xmlrpc_get_iface_from_bridge(self, brname): """ Tries to figure out physical interface from bridge. """ return vswitch.ovs_get_physical_interface(brname) def xmlrpc_create_bridge(self, brname): """ Creates an OVS bridge. """ ret = vswitch.ovs_vsctl_add_bridge(brname) if ret == 0: self.bridges.add(brname) return ret def xmlrpc_del_bridge(self, brname): """ Deletes an OVS bridge. """ ret = vswitch.ovs_vsctl_del_bridge(brname) if ret == 0: self.bridges.discard(brname) return ret def xmlrpc_is_ovs_bridge(self, bridge): """ This function verifies whether given interface is an ovs bridge. """ return vswitch.ovs_vsctl_is_ovs_bridge(bridge) def xmlrpc_add_port_to_bridge(self, bridge, port): """ Adds a port to the OVS bridge. """ ret = vswitch.ovs_vsctl_add_port_to_bridge(bridge, port) if ret == 0: self.ports.add(port) return ret def xmlrpc_del_port_from_bridge(self, port): """ Removes a port from OVS bridge. """ ret = vswitch.ovs_vsctl_del_port_from_bridge(port) if ret == 0: self.ports.discard(port) return ret def xmlrpc_ovs_vsctl_set(self, table, record, column, key, value): """ This function allows to alter OVS database. """ return vswitch.ovs_vsctl_set(table, record, column, key, value) def xmlrpc_interface_up(self, iface): """ This function brings up given interface. """ return util.interface_up(iface) def xmlrpc_interface_assign_ip(self, iface, ip_address, mask): """ This function allows to assing ip address to the given interface. """ return util.interface_assign_ip(iface, ip_address, mask) def xmlrpc_get_interface(self, address): """ Finds first interface that has given address """ return util.get_interface(address) def xmlrpc_get_interface_mtu(self, iface): """ Returns MTU of the given interface """ return util.get_interface_mtu(iface) def xmlrpc_uname(self): """ Return information about running kernel """ return util.uname() def xmlrpc_get_driver(self, iface): """ Returns driver version """ return util.get_driver(iface) def xmlrpc_get_interface_from_routing_decision(self, ip): """ Returns driver version """ return util.get_interface_from_routing_decision(ip) def start_rpc_server(port): """ This function creates a RPC server and adds it to the Twisted Reactor. """ rpc_server = TestArena() reactor.listenTCP(port, server.Site(rpc_server)) try: print "Starting RPC server\n" sys.stdout.flush() # If this server was started from ovs-test client then we must flush # STDOUT so that client would know that server is ready to accept # XML RPC connections. reactor.run() finally: rpc_server.cleanup() openvswitch-2.0.1+git20140120/python/ovstest/tcp.py000066400000000000000000000070071226605124000216460ustar00rootroot00000000000000# Copyright (c) 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ tcp module contains listener and sender classes for TCP protocol """ from twisted.internet.protocol import Factory, ClientFactory, Protocol from twisted.internet import interfaces from zope.interface import implements import time class TcpListenerConnection(Protocol): """ This per-connection class is instantiated each time sender connects """ def __init__(self): self.stats = 0 def dataReceived(self, data): self.stats += len(data) def connectionLost(self, reason): self.factory.stats += self.stats class TcpListenerFactory(Factory): """ This per-listening socket class is used to instantiate TcpListenerConnections """ protocol = TcpListenerConnection def __init__(self): self.stats = 0 def getResults(self): """ returns the number of bytes received as string""" # XML RPC does not support 64bit int (http://bugs.python.org/issue2985) # so we have to convert the amount of bytes into a string return str(self.stats) class Producer(object): implements(interfaces.IPushProducer) """ This producer class generates infinite byte stream for a specified time duration """ def __init__(self, proto, duration): self.proto = proto self.start = time.time() self.produced = 0 self.paused = False self.data = "X" * 65535 self.duration = duration def pauseProducing(self): """This function is called whenever write() to socket would block""" self.paused = True def resumeProducing(self): """This function is called whenever socket becomes writable""" self.paused = False current = time.time() while (not self.paused) and (current < self.start + self.duration): self.proto.transport.write(self.data) self.produced += len(self.data) current = time.time() if current >= self.start + self.duration: self.proto.factory.stats += self.produced self.proto.transport.unregisterProducer() self.proto.transport.loseConnection() def stopProducing(self): pass class TcpSenderConnection(Protocol): """ TCP connection instance class that sends all traffic at full speed. """ def connectionMade(self): producer = Producer(self, self.factory.duration) self.transport.registerProducer(producer, True) producer.resumeProducing() def dataReceived(self, data): self.transport.loseConnection() class TcpSenderFactory(ClientFactory): """ This factory is responsible to instantiate TcpSenderConnection classes each time sender initiates connection """ protocol = TcpSenderConnection def __init__(self, duration): self.duration = duration self.stats = 0 def getResults(self): """Returns amount of bytes sent to the Listener (as a string)""" return str(self.stats) openvswitch-2.0.1+git20140120/python/ovstest/tests.py000066400000000000000000000221571226605124000222250ustar00rootroot00000000000000import math import time import ovstest.util as util DEFAULT_TEST_BRIDGE = "ovstestbr0" DEFAULT_TEST_PORT = "ovstestport0" DEFAULT_TEST_TUN = "ovstestport1" NO_HANDLE = -1 def do_udp_tests(receiver, sender, tbwidth, duration, port_sizes): """Schedule UDP tests between receiver and sender""" server1 = util.rpc_client(receiver[0], receiver[1]) server2 = util.rpc_client(sender[0], sender[1]) udpformat = '{0:>15} {1:>15} {2:>15} {3:>15} {4:>15}' print ("UDP test from %s:%u to %s:%u with target bandwidth %s" % (sender[0], sender[1], receiver[0], receiver[1], util.bandwidth_to_string(tbwidth))) print udpformat.format("Datagram Size", "Snt Datagrams", "Rcv Datagrams", "Datagram Loss", "Bandwidth") for size in port_sizes: listen_handle = NO_HANDLE send_handle = NO_HANDLE try: packetcnt = (tbwidth * duration) / size listen_handle = server1.create_udp_listener(receiver[3]) if listen_handle == NO_HANDLE: print ("Server could not open UDP listening socket on port" " %u. Try to restart the server.\n" % receiver[3]) return send_handle = server2.create_udp_sender( (util.ip_from_cidr(receiver[2]), receiver[3]), packetcnt, size, duration) # Using sleep here because there is no other synchronization # source that would notify us when all sent packets were received time.sleep(duration + 1) rcv_packets = server1.get_udp_listener_results(listen_handle) snt_packets = server2.get_udp_sender_results(send_handle) loss = math.ceil(((snt_packets - rcv_packets) * 10000.0) / snt_packets) / 100 bwidth = (rcv_packets * size) / duration print udpformat.format(size, snt_packets, rcv_packets, '%.2f%%' % loss, util.bandwidth_to_string(bwidth)) finally: if listen_handle != NO_HANDLE: server1.close_udp_listener(listen_handle) if send_handle != NO_HANDLE: server2.close_udp_sender(send_handle) print "\n" def do_tcp_tests(receiver, sender, duration): """Schedule TCP tests between receiver and sender""" server1 = util.rpc_client(receiver[0], receiver[1]) server2 = util.rpc_client(sender[0], sender[1]) tcpformat = '{0:>15} {1:>15} {2:>15}' print "TCP test from %s:%u to %s:%u (full speed)" % (sender[0], sender[1], receiver[0], receiver[1]) print tcpformat.format("Snt Bytes", "Rcv Bytes", "Bandwidth") listen_handle = NO_HANDLE send_handle = NO_HANDLE try: listen_handle = server1.create_tcp_listener(receiver[3]) if listen_handle == NO_HANDLE: print ("Server was unable to open TCP listening socket on port" " %u. Try to restart the server.\n" % receiver[3]) return send_handle = server2.create_tcp_sender(util.ip_from_cidr(receiver[2]), receiver[3], duration) time.sleep(duration + 1) rcv_bytes = long(server1.get_tcp_listener_results(listen_handle)) snt_bytes = long(server2.get_tcp_sender_results(send_handle)) bwidth = rcv_bytes / duration print tcpformat.format(snt_bytes, rcv_bytes, util.bandwidth_to_string(bwidth)) finally: if listen_handle != NO_HANDLE: server1.close_tcp_listener(listen_handle) if send_handle != NO_HANDLE: server2.close_tcp_sender(send_handle) print "\n" def do_l3_tests(node1, node2, bandwidth, duration, ps, type): """ Do L3 tunneling tests. Each node is given as 4 tuple - physical interface IP, control port, test IP and test port. """ server1 = util.rpc_client(node1[0], node1[1]) server2 = util.rpc_client(node2[0], node2[1]) servers_with_bridges = [] try: server1.create_bridge(DEFAULT_TEST_BRIDGE) servers_with_bridges.append(server1) server2.create_bridge(DEFAULT_TEST_BRIDGE) servers_with_bridges.append(server2) server1.interface_up(DEFAULT_TEST_BRIDGE) server2.interface_up(DEFAULT_TEST_BRIDGE) server1.interface_assign_ip(DEFAULT_TEST_BRIDGE, node1[2], None) server2.interface_assign_ip(DEFAULT_TEST_BRIDGE, node2[2], None) server1.add_port_to_bridge(DEFAULT_TEST_BRIDGE, DEFAULT_TEST_TUN) server2.add_port_to_bridge(DEFAULT_TEST_BRIDGE, DEFAULT_TEST_TUN) server1.ovs_vsctl_set("Interface", DEFAULT_TEST_TUN, "type", None, type) server2.ovs_vsctl_set("Interface", DEFAULT_TEST_TUN, "type", None, type) server1.ovs_vsctl_set("Interface", DEFAULT_TEST_TUN, "options", "remote_ip", node2[0]) server2.ovs_vsctl_set("Interface", DEFAULT_TEST_TUN, "options", "remote_ip", node1[0]) do_udp_tests(node1, node2, bandwidth, duration, ps) do_udp_tests(node2, node1, bandwidth, duration, ps) do_tcp_tests(node1, node2, duration) do_tcp_tests(node2, node1, duration) finally: for server in servers_with_bridges: server.del_bridge(DEFAULT_TEST_BRIDGE) def do_vlan_tests(node1, node2, bandwidth, duration, ps, tag): """ Do VLAN tests between node1 and node2. Each node is given as 4 tuple - physical interface IP, control port, test IP and test port. """ server1 = util.rpc_client(node1[0], node1[1]) server2 = util.rpc_client(node2[0], node2[1]) br_name1 = None br_name2 = None servers_with_test_ports = [] try: interface_node1 = server1.get_interface(node1[0]) interface_node2 = server2.get_interface(node2[0]) if server1.is_ovs_bridge(interface_node1): br_name1 = interface_node1 else: br_name1 = DEFAULT_TEST_BRIDGE server1.create_test_bridge(br_name1, interface_node1) if server2.is_ovs_bridge(interface_node2): br_name2 = interface_node2 else: br_name2 = DEFAULT_TEST_BRIDGE server2.create_test_bridge(br_name2, interface_node2) server1.add_port_to_bridge(br_name1, DEFAULT_TEST_PORT) servers_with_test_ports.append(server1) server2.add_port_to_bridge(br_name2, DEFAULT_TEST_PORT) servers_with_test_ports.append(server2) server1.ovs_vsctl_set("Port", DEFAULT_TEST_PORT, "tag", None, tag) server2.ovs_vsctl_set("Port", DEFAULT_TEST_PORT, "tag", None, tag) server1.ovs_vsctl_set("Interface", DEFAULT_TEST_PORT, "type", None, "internal") server2.ovs_vsctl_set("Interface", DEFAULT_TEST_PORT, "type", None, "internal") server1.interface_assign_ip(DEFAULT_TEST_PORT, node1[2], None) server2.interface_assign_ip(DEFAULT_TEST_PORT, node2[2], None) server1.interface_up(DEFAULT_TEST_PORT) server2.interface_up(DEFAULT_TEST_PORT) do_udp_tests(node1, node2, bandwidth, duration, ps) do_udp_tests(node2, node1, bandwidth, duration, ps) do_tcp_tests(node1, node2, duration) do_tcp_tests(node2, node1, duration) finally: for server in servers_with_test_ports: server.del_port_from_bridge(DEFAULT_TEST_PORT) if br_name1 == DEFAULT_TEST_BRIDGE: server1.del_test_bridge(br_name1, interface_node1) if br_name2 == DEFAULT_TEST_BRIDGE: server2.del_test_bridge(br_name2, interface_node2) def do_direct_tests(node1, node2, bandwidth, duration, ps): """ Do tests between outer IPs without involving Open vSwitch. Each node is given as 4 tuple - physical interface IP, control port, test IP and test port. Direct tests will use physical interface IP as the test IP address. """ n1 = (node1[0], node1[1], node1[0], node1[3]) n2 = (node2[0], node2[1], node2[0], node2[3]) do_udp_tests(n1, n2, bandwidth, duration, ps) do_udp_tests(n2, n1, bandwidth, duration, ps) do_tcp_tests(n1, n2, duration) do_tcp_tests(n2, n1, duration) def configure_l3(conf, tunnel_mode): """ This function creates a temporary test bridge and adds an L3 tunnel. """ s = util.start_local_server(conf[1][1]) server = util.rpc_client("127.0.0.1", conf[1][1]) server.create_bridge(DEFAULT_TEST_BRIDGE) server.add_port_to_bridge(DEFAULT_TEST_BRIDGE, DEFAULT_TEST_PORT) server.interface_up(DEFAULT_TEST_BRIDGE) server.interface_assign_ip(DEFAULT_TEST_BRIDGE, conf[1][0], None) server.ovs_vsctl_set("Interface", DEFAULT_TEST_PORT, "type", None, tunnel_mode) server.ovs_vsctl_set("Interface", DEFAULT_TEST_PORT, "options", "remote_ip", conf[0]) return s openvswitch-2.0.1+git20140120/python/ovstest/udp.py000066400000000000000000000051061226605124000216460ustar00rootroot00000000000000# Copyright (c) 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ ovsudp contains listener and sender classes for UDP protocol """ import array import struct import time from twisted.internet.protocol import DatagramProtocol from twisted.internet.task import LoopingCall class UdpListener(DatagramProtocol): """ Class that will listen for incoming UDP packets """ def __init__(self): self.stats = [] def datagramReceived(self, data, (_1, _2)): """This function is called each time datagram is received""" try: self.stats.append(struct.unpack_from("Q", data, 0)) except struct.error: pass # ignore packets that are less than 8 bytes of size def getResults(self): """Returns number of packets that were actually received""" return len(self.stats) class UdpSender(DatagramProtocol): """ Class that will send UDP packets to UDP Listener """ def __init__(self, host, count, size, duration): # LoopingCall does not know whether UDP socket is actually writable self.looper = None self.host = host self.count = count self.duration = duration self.start = time.time() self.sent = 0 self.data = array.array('c', 'X' * size) def startProtocol(self): self.looper = LoopingCall(self.sendData) period = self.duration / float(self.count) self.looper.start(period , now = False) def stopProtocol(self): if (self.looper is not None): self.looper.stop() self.looper = None def datagramReceived(self, data, (host, port)): pass def sendData(self): """This function is called from LoopingCall""" if self.start + self.duration < time.time(): self.looper.stop() self.looper = None self.sent += 1 struct.pack_into('Q', self.data, 0, self.sent) self.transport.write(self.data, self.host) def getResults(self): """Returns number of packets that were sent""" return self.sent openvswitch-2.0.1+git20140120/python/ovstest/util.py000066400000000000000000000147051226605124000220400ustar00rootroot00000000000000# Copyright (c) 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ util module contains some helper function """ import array import exceptions import fcntl import os import select import socket import struct import signal import subprocess import re import xmlrpclib def str_ip(ip_address): """ Converts an IP address from binary format to a string. """ (x1, x2, x3, x4) = struct.unpack("BBBB", ip_address) return ("%u.%u.%u.%u") % (x1, x2, x3, x4) def get_interface_mtu(iface): """ Returns MTU of the given interface. """ s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) indata = iface + ('\0' * (32 - len(iface))) try: outdata = fcntl.ioctl(s.fileno(), 0x8921, indata) # socket.SIOCGIFMTU mtu = struct.unpack("16si12x", outdata)[1] except: return 0 return mtu def get_interface(address): """ Finds first interface that has given address """ bytes = 256 * 32 s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) names = array.array('B', '\0' * bytes) outbytes = struct.unpack('iL', fcntl.ioctl( s.fileno(), 0x8912, # SIOCGIFCONF struct.pack('iL', bytes, names.buffer_info()[0]) ))[0] namestr = names.tostring() for i in range(0, outbytes, 40): name = namestr[i:i + 16].split('\0', 1)[0] if address == str_ip(namestr[i + 20:i + 24]): return name return None # did not find interface we were looking for def uname(): os_info = os.uname() return os_info[2] # return only the kernel version number def start_process(args): try: p = subprocess.Popen(args, stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.PIPE) out, err = p.communicate() return (p.returncode, out, err) except exceptions.OSError: return (-1, None, None) def get_driver(iface): ret, out, _err = start_process(["ethtool", "-i", iface]) if ret == 0: lines = out.splitlines() driver = "%s(%s)" % (lines[0], lines[1]) # driver name + version else: driver = None return driver def interface_up(iface): """ This function brings given iface up. """ ret, _out, _err = start_process(["ifconfig", iface, "up"]) return ret def interface_assign_ip(iface, ip_addr, mask): """ This function allows to assign IP address to an interface. If mask is an empty string then ifconfig will decide what kind of mask to use. The caller can also specify the mask by using CIDR notation in ip argument by leaving the mask argument as an empty string. In case of success this function returns 0. """ args = ["ifconfig", iface, ip_addr] if mask is not None: args.append("netmask") args.append(mask) ret, _out, _err = start_process(args) return ret def interface_get_ip(iface): """ This function returns tuple - ip and mask that was assigned to the interface. """ args = ["ifconfig", iface] ret, out, _err = start_process(args) if ret == 0: ip = re.search(r'inet addr:(\S+)', out) mask = re.search(r'Mask:(\S+)', out) if ip is not None and mask is not None: return (ip.group(1), mask.group(1)) else: return ret def move_routes(iface1, iface2): """ This function moves routes from iface1 to iface2. """ args = ["ip", "route", "show", "dev", iface1] ret, out, _err = start_process(args) if ret == 0: for route in out.splitlines(): args = ["ip", "route", "replace", "dev", iface2] + route.split() start_process(args) def get_interface_from_routing_decision(ip): """ This function returns the interface through which the given ip address is reachable. """ args = ["ip", "route", "get", ip] ret, out, _err = start_process(args) if ret == 0: iface = re.search(r'dev (\S+)', out) if iface: return iface.group(1) return None def rpc_client(ip, port): return xmlrpclib.Server("http://%s:%u/" % (ip, port), allow_none=True) def sigint_intercept(): """ Intercept SIGINT from child (the local ovs-test server process). """ signal.signal(signal.SIGINT, signal.SIG_IGN) def start_local_server(port): """ This function spawns an ovs-test server that listens on specified port and blocks till the spawned ovs-test server is ready to accept XML RPC connections. """ p = subprocess.Popen(["ovs-test", "-s", str(port)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=sigint_intercept) fcntl.fcntl( p.stdout.fileno(),fcntl.F_SETFL, fcntl.fcntl(p.stdout.fileno(), fcntl.F_GETFL) | os.O_NONBLOCK) while p.poll() is None: fd = select.select([p.stdout.fileno()], [], [])[0] if fd: out = p.stdout.readline() if out.startswith("Starting RPC server"): break if p.poll() is not None: raise RuntimeError("Couldn't start local instance of ovs-test server") return p def get_datagram_sizes(mtu1, mtu2): """ This function calculates all the "interesting" datagram sizes so that we test both - receive and send side with different packets sizes. """ s1 = set([8, mtu1 - 100, mtu1 - 28, mtu1]) s2 = set([8, mtu2 - 100, mtu2 - 28, mtu2]) return sorted(s1.union(s2)) def ip_from_cidr(string): """ This function removes the netmask (if present) from the given string and returns the IP address. """ token = string.split("/") return token[0] def bandwidth_to_string(bwidth): """Convert bandwidth from long to string and add units.""" bwidth = bwidth * 8 # Convert back to bits/second if bwidth >= 10000000: return str(int(bwidth / 1000000)) + "Mbps" elif bwidth > 10000: return str(int(bwidth / 1000)) + "Kbps" else: return str(int(bwidth)) + "bps" openvswitch-2.0.1+git20140120/python/ovstest/vswitch.py000066400000000000000000000065131226605124000225500ustar00rootroot00000000000000# Copyright (c) 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ vswitch module allows its callers to interact with OVS DB. """ import exceptions import subprocess import util def ovs_vsctl_add_bridge(bridge): """ This function creates an OVS bridge. """ ret, _out, _err = util.start_process(["ovs-vsctl", "add-br", bridge]) return ret def ovs_vsctl_del_bridge(bridge): """ This function deletes the OVS bridge. """ ret, _out, _err = util.start_process(["ovs-vsctl", "del-br", bridge]) return ret def ovs_vsctl_del_pbridge(bridge, iface): """ This function deletes the OVS bridge and assigns the bridge IP address back to the iface. """ (ip_addr, mask) = util.interface_get_ip(bridge) util.interface_assign_ip(iface, ip_addr, mask) util.move_routes(bridge, iface) return ovs_vsctl_del_bridge(bridge) def ovs_vsctl_is_ovs_bridge(bridge): """ This function verifies whether given port is an OVS bridge. If it is an OVS bridge then it will return True. """ ret, _out, _err = util.start_process(["ovs-vsctl", "br-exists", bridge]) return ret == 0 def ovs_vsctl_add_port_to_bridge(bridge, iface): """ This function adds given interface to the bridge. """ ret, _out, _err = util.start_process(["ovs-vsctl", "add-port", bridge, iface]) return ret def ovs_vsctl_del_port_from_bridge(port): """ This function removes given port from a OVS bridge. """ ret, _out, _err = util.start_process(["ovs-vsctl", "del-port", port]) return ret def ovs_vsctl_set(table, record, column, key, value): """ This function allows to alter the OVS database. If column is a map, then caller should also set the key, otherwise the key should be left as an empty string. """ if key is None: index = column else: index = "%s:%s" % (column, key) index_value = "%s=%s" % (index, value) ret, _out, _err = util.start_process(["ovs-vsctl", "set", table, record, index_value]) return ret def ovs_get_physical_interface(bridge): """ This function tries to figure out which is the physical interface that belongs to the bridge. If there are multiple physical interfaces assigned to this bridge then it will return the first match. """ ret, out, _err = util.start_process(["ovs-vsctl", "list-ifaces", bridge]) if ret == 0: ifaces = out.splitlines() for iface in ifaces: ret, out, _err = util.start_process(["ovs-vsctl", "get", "Interface", iface, "type"]) if ret == 0: if ('""' in out) or ('system' in out): return iface # this should be the physical interface return None openvswitch-2.0.1+git20140120/rhel/000077500000000000000000000000001226605124000164045ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/rhel/.gitignore000066400000000000000000000001761226605124000204000ustar00rootroot00000000000000openvswitch-kmod-rhel5.spec openvswitch-kmod-rhel6.spec openvswitch-kmod-fedora.spec openvswitch.spec openvswitch-fedora.spec openvswitch-2.0.1+git20140120/rhel/README.RHEL000066400000000000000000000065761226605124000200330ustar00rootroot00000000000000Red Hat network scripts integration ----------------------------------- The RPM packages for Open vSwitch provide some integration with Red Hat's network scripts. Using this integration is optional. To use the integration for a Open vSwitch bridge or interface named , create or edit /etc/sysconfig/network-scripts/ifcfg-. This is a shell script that consists of a series of VARIABLE=VALUE assignments. The following OVS-specific variable names are supported: - DEVICETYPE: Always set to "ovs". - TYPE: If this is "OVSBridge", then this file represents an OVS bridge named . Otherwise, it represents a port on an OVS bridge and TYPE must have one of the following values: * "OVSPort", if is a physical port (e.g. eth0) or virtual port (e.g. vif1.0). * "OVSIntPort", if is an internal port (e.g. a tagged VLAN). * "OVSBond", if is an OVS bond. - OVS_BRIDGE: If TYPE is anything other than "OVSBridge", set to the name of the OVS bridge to which the port should be attached. - OVS_OPTIONS: Optionally, extra options to set in the "Port" table when adding the port to the bridge, as a sequence of column[:key]=value options. For example, "tag=100" to make the port an access port for VLAN 100. See the documentation of "add-port" in ovs-vsctl(8) for syntax and the section on the Port table in ovs-vswitchd.conf.db(5) for available options. - OVS_EXTRA: Optionally, additional ovs-vsctl commands, separated by "--" (double dash). - BOND_IFACES: For "OVSBond" interfaces, a list of physical interfaces to bond together. Note ---- * "ifdown" on a bridge will not bring individual ports on the bridge down. "ifup" on a bridge will not add ports to the bridge. This behavior should be compatible with standard bridges (with TYPE=Bridge). * If 'ifup' on an interface is called multiple times, one can see "RTNETLINK answers: File exists" printed on the console. This comes from ifup-eth trying to add zeroconf route multiple times and is harmless. Examples -------- Standalone bridge: ==> ifcfg-ovsbridge0 <== DEVICE=ovsbridge0 ONBOOT=yes DEVICETYPE=ovs TYPE=OVSBridge BOOTPROTO=static IPADDR=A.B.C.D NETMASK=X.Y.Z.0 HOTPLUG=no Enable DHCP on the bridge: * Needs OVSBOOTPROTO instead of BOOTPROTO. * All the interfaces that can reach the DHCP server as a space separated list in OVSDHCPINTERFACES. DEVICE=ovsbridge0 ONBOOT=yes DEVICETYPE=ovs TYPE=OVSBridge OVSBOOTPROTO="dhcp" OVSDHCPINTERFACES="eth0" HOTPLUG=no Adding physical eth0 to ovsbridge0 described above: ==> ifcfg-eth0 <== DEVICE=eth0 ONBOOT=yes DEVICETYPE=ovs TYPE=OVSPort OVS_BRIDGE=ovsbridge0 BOOTPROTO=none HOTPLUG=no Tagged VLAN interface on top of ovsbridge0: ==> ifcfg-vlan100 <== DEVICE=vlan100 ONBOOT=yes DEVICETYPE=ovs TYPE=OVSIntPort BOOTPROTO=static IPADDR=A.B.C.D NETMASK=X.Y.Z.0 OVS_BRIDGE=ovsbridge0 OVS_OPTIONS="tag=100" OVS_EXTRA="set Interface $DEVICE external-ids:iface-id=$(hostname -s)-$DEVICE-vif" HOTPLUG=no Bonding: ==> ifcfg-bond0 <== DEVICE=bond0 ONBOOT=yes DEVICETYPE=ovs TYPE=OVSBond OVS_BRIDGE=ovsbridge0 BOOTPROTO=none BOND_IFACES="gige-1b-0 gige-1b-1 gige-21-0 gige-21-1" OVS_OPTIONS="bond_mode=balance-tcp lacp=active" HOTPLUG=no ==> ifcfg-gige-* <== DEVICE=gige-* ONBOOT=yes HOTPLUG=no Reporting Bugs -------------- Please report problems to bugs@openvswitch.org. openvswitch-2.0.1+git20140120/rhel/automake.mk000066400000000000000000000031501226605124000205420ustar00rootroot00000000000000# Copyright (C) 2009, 2010, 2011, 2012 Nicira, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. This file is offered as-is, # without warranty of any kind. EXTRA_DIST += \ rhel/README.RHEL \ rhel/automake.mk \ rhel/etc_init.d_openvswitch \ rhel/etc_logrotate.d_openvswitch \ rhel/etc_sysconfig_network-scripts_ifdown-ovs \ rhel/etc_sysconfig_network-scripts_ifup-ovs \ rhel/openvswitch-kmod-rhel6.spec \ rhel/openvswitch-kmod-rhel6.spec.in \ rhel/openvswitch-kmod.files \ rhel/openvswitch-kmod-fedora.spec \ rhel/openvswitch-kmod-fedora.spec.in \ rhel/openvswitch.spec \ rhel/openvswitch.spec.in \ rhel/openvswitch-fedora.spec \ rhel/openvswitch-fedora.spec.in \ rhel/usr_share_openvswitch_scripts_sysconfig.template \ rhel/usr_lib_systemd_system_openvswitch.service update_rhel_spec = \ ($(ro_shell) && sed -e 's,[@]VERSION[@],$(VERSION),g') \ < $(srcdir)/rhel/$(@F).in > $(@F).tmp || exit 1; \ if cmp -s $(@F).tmp $@; then touch $@; rm $(@F).tmp; else mv $(@F).tmp $@; fi $(srcdir)/rhel/openvswitch-kmod-rhel6.spec: rhel/openvswitch-kmod-rhel6.spec.in $(top_builddir)/config.status $(update_rhel_spec) $(srcdir)/rhel/openvswitch-kmod-fedora.spec: rhel/openvswitch-kmod-fedora.spec.in $(top_builddir)/config.status $(update_rhel_spec) $(srcdir)/rhel/openvswitch.spec: rhel/openvswitch.spec.in $(top_builddir)/config.status $(update_rhel_spec) $(srcdir)/rhel/openvswitch-fedora.spec: rhel/openvswitch-fedora.spec.in $(top_builddir)/config.status $(update_rhel_spec) openvswitch-2.0.1+git20140120/rhel/etc_init.d_openvswitch000077500000000000000000000045151226605124000230100ustar00rootroot00000000000000#!/bin/sh # # openvswitch # # chkconfig: 2345 09 91 # description: Manage Open vSwitch kernel modules and user-space daemons # Copyright (C) 2009, 2010, 2011, 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ### BEGIN INIT INFO # Provides: openvswitch-switch # Required-Start: # Required-Stop: # Default-Start: 2 3 4 5 # Default-Stop: 0 1 6 # Short-Description: Open vSwitch switch ### END INIT INFO . /usr/share/openvswitch/scripts/ovs-lib || exit 1 test -e /etc/sysconfig/openvswitch && . /etc/sysconfig/openvswitch start () { set ovs_ctl ${1-start} set "$@" --system-id=random if test X"$FORCE_COREFILES" != X; then set "$@" --force-corefiles="$FORCE_COREFILES" fi if test X"$OVSDB_SERVER_PRIORITY" != X; then set "$@" --ovsdb-server-priority="$OVSDB_SERVER_PRIORITY" fi if test X"$VSWITCHD_PRIORITY" != X; then set "$@" --ovs-vswitchd-priority="$VSWITCHD_PRIORITY" fi if test X"$VSWITCHD_MLOCKALL" != X; then set "$@" --mlockall="$VSWITCHD_MLOCKALL" fi set "$@" $OVS_CTL_OPTS "$@" touch /var/lock/subsys/openvswitch } stop () { ovs_ctl stop rm -f /var/lock/subsys/openvswitch } restart () { if [ "$1" = "--save-flows=yes" ]; then start restart else stop start fi } case $1 in start) start ;; stop) stop ;; restart) shift restart "$@" ;; reload|force-reload) # Nothing to do. ;; status) ovs_ctl status exit $? ;; version) ovs_ctl version ;; force-reload-kmod) start force-reload-kmod ;; help) printf "$0 [start|stop|restart|reload|force-reload|status|version|force-reload-kmod]\n" ;; *) printf "Unknown command: $1\n" exit 1 ;; esac openvswitch-2.0.1+git20140120/rhel/etc_logrotate.d_openvswitch000066400000000000000000000011111226605124000240270ustar00rootroot00000000000000# Copyright (C) 2009, 2010, 2011, 2012 Nicira, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. This file is offered as-is, # without warranty of any kind. /var/log/openvswitch/*.log { daily compress sharedscripts missingok postrotate # Tell Open vSwitch daemons to reopen their log files for pidfile in `cd /var/run/openvswitch && echo *.pid`; do ovs-appctl -t "${pidfile%%.pid}" vlog/reopen done endscript } openvswitch-2.0.1+git20140120/rhel/etc_sysconfig_network-scripts_ifdown-ovs000077500000000000000000000025531226605124000266070ustar00rootroot00000000000000#!/bin/bash # Copyright (c) 2011 Alexey I. Froloff. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. . /etc/init.d/functions cd /etc/sysconfig/network-scripts . ./network-functions [ -f ../network ] && . ../network CONFIG=${1} TIMEOUT=10 source_config . /etc/sysconfig/network OTHERSCRIPT="/etc/sysconfig/network-scripts/ifdown-${REAL_DEVICETYPE}" if [ ! -x ${OTHERSCRIPT} ]; then OTHERSCRIPT="/etc/sysconfig/network-scripts/ifdown-eth" fi [ -f /var/lock/subsys/openvswitch ] || /sbin/service openvswitch start case "$TYPE" in OVSBridge) ${OTHERSCRIPT} ${CONFIG} $2 retval=$? ovs-vsctl -t ${TIMEOUT} -- --if-exists del-br "$DEVICE" ;; OVSPort|OVSIntPort|OVSBond) ${OTHERSCRIPT} ${CONFIG} $2 retval=$? ovs-vsctl -t ${TIMEOUT} -- --if-exists del-port "$OVS_BRIDGE" "$DEVICE" ;; *) echo $"Invalid OVS interface type $TYPE" exit 1 ;; esac exit $retval openvswitch-2.0.1+git20140120/rhel/etc_sysconfig_network-scripts_ifup-ovs000077500000000000000000000071071226605124000262640ustar00rootroot00000000000000#!/bin/bash # Copyright (c) 2011 Alexey I. Froloff. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. . /etc/init.d/functions cd /etc/sysconfig/network-scripts . ./network-functions [ -f ../network ] && . ../network CONFIG=${1} TIMEOUT=10 need_config ${CONFIG} source_config OTHERSCRIPT="/etc/sysconfig/network-scripts/ifup-${REAL_DEVICETYPE}" if [ ! -x ${OTHERSCRIPT} ]; then OTHERSCRIPT="/etc/sysconfig/network-scripts/ifup-eth" fi check_recursion () { [ -n "${UPPEDSTACK}" ] && for _r in ${UPPEDSTACK}; do [ "$_r" = "$1" ] && return 1 done return 0 } ifup_ovs_bridge () { if ovs-vsctl br-exists "${OVS_BRIDGE}"; then :; else /sbin/ifup "${OVS_BRIDGE}" fi } if [ -z "${UPPEDSTACK}" ]; then UPPEDSTACK="${DEVICE}" fi [ -n "${OVSREQUIRES}" ] && for _i in ${OVSREQUIRES}; do if ( check_recursion "$_i" ); then UPPEDSTACK="${UPPEDSTACK} $_i" /sbin/ifup "$_i" fi done [ -f /var/lock/subsys/openvswitch ] || /sbin/service openvswitch start case "$TYPE" in OVSBridge) # If bridge already exists and is up, it has been configured through # other cases like OVSPort, OVSIntPort and OVSBond. If it is down or # it does not exist, create it. It is possible for a bridge to exist # because it remained in the OVSDB for some reason, but it won't be up. if check_device_down "${DEVICE}"; then ovs-vsctl -t ${TIMEOUT} -- --may-exist add-br "$DEVICE" $OVS_OPTIONS \ ${OVS_EXTRA+-- $OVS_EXTRA} \ ${STP+-- set bridge "$DEVICE" stp_enable="${STP}"} else OVSBRIDGECONFIGURED="yes" fi # When dhcp is enabled, the assumption is that there will be a port to # attach (otherwise, we can't reach out for dhcp). So, we do not # configure the bridge through rhel's ifup infrastructure unless # it is being configured after the port has been configured. # The "OVSINTF" is set only after the port is configured. if [ "${OVSBOOTPROTO}" = "dhcp" ] && [ -n "${OVSINTF}" ]; then case " ${OVSDHCPINTERFACES} " in *" ${OVSINTF} "*) BOOTPROTO=dhcp ${OTHERSCRIPT} ${CONFIG} ;; esac fi # When dhcp is not enabled, it is possible that someone may want # a standalone bridge (i.e it may not have any ports). Configure it. if [ "${OVSBOOTPROTO}" != "dhcp" ] && [ -z "${OVSINTF}" ] && \ [ "${OVSBRIDGECONFIGURED}" != "yes" ]; then ${OTHERSCRIPT} ${CONFIG} fi exit 0 ;; OVSPort) ifup_ovs_bridge ${OTHERSCRIPT} ${CONFIG} ${2} ovs-vsctl -t ${TIMEOUT} -- --may-exist add-port "$OVS_BRIDGE" "$DEVICE" $OVS_OPTIONS ${OVS_EXTRA+-- $OVS_EXTRA} OVSINTF=${DEVICE} /sbin/ifup "$OVS_BRIDGE" ;; OVSIntPort) ifup_ovs_bridge ovs-vsctl -t ${TIMEOUT} -- --may-exist add-port "$OVS_BRIDGE" "$DEVICE" $OVS_OPTIONS -- set Interface "$DEVICE" type=internal ${OVS_EXTRA+-- $OVS_EXTRA} ${OTHERSCRIPT} ${CONFIG} ${2} ;; OVSBond) ifup_ovs_bridge for _iface in $BOND_IFACES; do /sbin/ifup ${_iface} done ovs-vsctl -t ${TIMEOUT} -- --fake-iface add-bond "$OVS_BRIDGE" "$DEVICE" ${BOND_IFACES} $OVS_OPTIONS ${OVS_EXTRA+-- $OVS_EXTRA} ${OTHERSCRIPT} ${CONFIG} ${2} OVSINTF=${DEVICE} /sbin/ifup "$OVS_BRIDGE" ;; *) echo $"Invalid OVS interface type $TYPE" exit 1 ;; esac openvswitch-2.0.1+git20140120/rhel/openvswitch-fedora.spec.in000066400000000000000000000161701226605124000235010ustar00rootroot00000000000000# Spec file for Open vSwitch. # Copyright (C) 2009, 2010, 2013 Nicira Networks, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. This file is offered as-is, # without warranty of any kind. #%define kernel 2.6.40.4-5.fc15.x86_64 Name: openvswitch Summary: Open vSwitch Group: System Environment/Daemons URL: http://www.openvswitch.org/ Version: @VERSION@ # The entire source code is ASL 2.0 except datapath/ which is GPLv2 License: ASL 2.0 Release: 1%{?dist} Source: openvswitch-%{version}.tar.gz #Source1: openvswitch-init Buildroot: /tmp/openvswitch-fedora-rpm Requires(post): systemd-units Requires(preun): systemd-units Requires(postun): systemd-units %description Open vSwitch provides standard network bridging functions augmented with support for the OpenFlow protocol for remote per-flow control of traffic. %prep %setup -q -n openvswitch-%{version} %build ./configure --prefix=/usr --sysconfdir=/etc --localstatedir=%{_localstatedir} --enable-ssl %{build_number} make %{_smp_mflags} %install rm -rf $RPM_BUILD_ROOT make install DESTDIR=$RPM_BUILD_ROOT install -d -m 755 $RPM_BUILD_ROOT/etc install -d -m 755 $RPM_BUILD_ROOT/etc/openvswitch install -p -D -m 0644 rhel/usr_lib_systemd_system_openvswitch.service \ $RPM_BUILD_ROOT%{_unitdir}/openvswitch.service install -m 755 rhel/etc_init.d_openvswitch \ $RPM_BUILD_ROOT%{_datadir}/openvswitch/scripts/openvswitch.init install -d -m 755 $RPM_BUILD_ROOT/etc/sysconfig install -d -m 755 $RPM_BUILD_ROOT/etc/logrotate.d install -m 755 rhel/etc_logrotate.d_openvswitch \ $RPM_BUILD_ROOT/etc/logrotate.d/openvswitch install -d -m 755 $RPM_BUILD_ROOT/etc/profile.d install -m 644 vswitchd/vswitch.ovsschema \ $RPM_BUILD_ROOT/usr/share/openvswitch/vswitch.ovsschema install -d -m 755 $RPM_BUILD_ROOT/usr/share/openvswitch/scripts install -d -m 0755 $RPM_BUILD_ROOT/etc/sysconfig/network-scripts/ install -p -m 0755 rhel/etc_sysconfig_network-scripts_ifdown-ovs \ $RPM_BUILD_ROOT/etc/sysconfig/network-scripts/ifdown-ovs install -p -m 0755 rhel/etc_sysconfig_network-scripts_ifup-ovs \ $RPM_BUILD_ROOT/etc/sysconfig/network-scripts/ifup-ovs install -p -D -m 0644 rhel/usr_share_openvswitch_scripts_sysconfig.template \ $RPM_BUILD_ROOT/etc/sysconfig/openvswitch install -d -m 755 $RPM_BUILD_ROOT/usr/share/openvswitch/scripts install python/compat/uuid.py $RPM_BUILD_ROOT/usr/share/openvswitch/python install python/compat/argparse.py $RPM_BUILD_ROOT/usr/share/openvswitch/python install -d -m 755 $RPM_BUILD_ROOT/var/lib/openvswitch %clean rm -rf $RPM_BUILD_ROOT %preun # Package removal, not upgrade systemctl stop openvswitch.service systemctl disable openvswitch.service %post if test ! -e /etc/openvswitch/conf.db; then install -d -m 755 -o root -g root /etc/openvswitch # Create ovs-vswitchd config database ovsdb-tool -vANY:console:emer create /etc/openvswitch/conf.db \ /usr/share/openvswitch/vswitch.ovsschema # Create initial table in config database ovsdb-tool -vANY:console:emer transact /etc/openvswitch/conf.db \ '[{"op": "insert", "table": "Open_vSwitch", "row": {}}]' \ > /dev/null fi # Initial installation systemctl enable openvswitch.service systemctl start openvswitch.service %postun %files %defattr(-,root,root) %config /etc/sysconfig/openvswitch %config /etc/logrotate.d/openvswitch %{_unitdir}/openvswitch.service %{_datadir}/openvswitch/scripts/openvswitch.init %{_sysconfdir}/sysconfig/network-scripts/ifup-ovs %{_sysconfdir}/sysconfig/network-scripts/ifdown-ovs /usr/share/openvswitch/bugtool-plugins/ /usr/share/openvswitch/python/ovs/__init__.py /usr/share/openvswitch/python/ovs/daemon.py /usr/share/openvswitch/python/ovs/db/__init__.py /usr/share/openvswitch/python/ovs/db/data.py /usr/share/openvswitch/python/ovs/db/error.py /usr/share/openvswitch/python/ovs/db/idl.py /usr/share/openvswitch/python/ovs/db/parser.py /usr/share/openvswitch/python/ovs/db/schema.py /usr/share/openvswitch/python/ovs/db/types.py /usr/share/openvswitch/python/ovs/dirs.py /usr/share/openvswitch/python/ovs/fatal_signal.py /usr/share/openvswitch/python/ovs/json.py /usr/share/openvswitch/python/ovs/jsonrpc.py /usr/share/openvswitch/python/ovs/ovsuuid.py /usr/share/openvswitch/python/ovs/poller.py /usr/share/openvswitch/python/ovs/process.py /usr/share/openvswitch/python/ovs/reconnect.py /usr/share/openvswitch/python/ovs/socket_util.py /usr/share/openvswitch/python/ovs/stream.py /usr/share/openvswitch/python/ovs/timeval.py /usr/share/openvswitch/python/ovs/util.py /usr/share/openvswitch/python/ovs/version.py /usr/share/openvswitch/python/ovs/unixctl/__init__.py /usr/share/openvswitch/python/ovs/unixctl/client.py /usr/share/openvswitch/python/ovs/unixctl/server.py /usr/share/openvswitch/python/uuid.py /usr/share/openvswitch/python/argparse.py /usr/share/openvswitch/python/ovs/vlog.py /usr/share/openvswitch/python/ovstest/__init__.py /usr/share/openvswitch/python/ovstest/args.py /usr/share/openvswitch/python/ovstest/rpcserver.py /usr/share/openvswitch/python/ovstest/tcp.py /usr/share/openvswitch/python/ovstest/udp.py /usr/share/openvswitch/python/ovstest/util.py /usr/share/openvswitch/python/ovstest/vswitch.py /usr/share/openvswitch/python/ovstest/tests.py /usr/share/openvswitch/scripts/ovs-bugtool-* /usr/share/openvswitch/scripts/ovs-check-dead-ifs /usr/share/openvswitch/scripts/ovs-lib %config /usr/share/openvswitch/vswitch.ovsschema /usr/sbin/ovs-bugtool /usr/sbin/ovs-vswitchd /usr/sbin/ovsdb-server /usr/bin/ovs-appctl /usr/bin/ovs-dpctl /usr/bin/ovs-dpctl-top /usr/bin/ovs-ofctl /usr/bin/ovs-vsctl /usr/bin/ovsdb-client /usr/bin/ovsdb-tool /usr/bin/ovs-controller /usr/bin/ovs-pki /usr/bin/ovs-test /usr/bin/ovs-l3ping %doc /usr/share/man/man8/ovs-controller.8.gz %doc /usr/share/man/man8/ovs-pki.8.gz %doc /usr/share/man/man1/ovsdb-client.1.gz %doc /usr/share/man/man1/ovsdb-server.1.gz %doc /usr/share/man/man1/ovsdb-tool.1.gz %doc /usr/share/man/man5/ovs-vswitchd.conf.db.5.gz %doc /usr/share/man/man8/ovs-appctl.8.gz %doc /usr/share/man/man8/ovs-bugtool.8.gz %doc /usr/share/man/man8/ovs-dpctl.8.gz %doc /usr/share/man/man8/ovs-dpctl-top.8.gz %doc /usr/share/man/man8/ovs-ofctl.8.gz %doc /usr/share/man/man8/ovs-parse-backtrace.8.gz %doc /usr/share/man/man8/ovs-vsctl.8.gz %doc /usr/share/man/man8/ovs-vswitchd.8.gz %doc /usr/share/man/man8/ovs-test.8.gz %doc /usr/share/man/man8/ovs-l3ping.8.gz /var/lib/openvswitch /var/log/openvswitch /usr/share/openvswitch/scripts/ovs-ctl %exclude /etc/openvswitch %exclude /usr/bin/ovs-benchmark %exclude /usr/bin/ovs-parse-backtrace %exclude /usr/bin/ovs-pcap %exclude /usr/bin/ovs-tcpundump %exclude /usr/bin/ovs-vlan-test %exclude /usr/sbin/ovs-vlan-bug-workaround %exclude /usr/share/man/man1/ovs-benchmark.1.gz %exclude /usr/share/man/man1/ovs-pcap.1.gz %exclude /usr/share/man/man1/ovs-tcpundump.1.gz %exclude /usr/share/man/man8/ovs-ctl.8.gz %exclude /usr/share/man/man8/ovs-vlan-bug-workaround.8.gz %exclude /usr/share/man/man8/ovs-vlan-test.8.gz %exclude /usr/share/openvswitch/scripts/ovs-save %changelog * Wed Jan 12 2011 Ralf Spenneberg - First build on F14 openvswitch-2.0.1+git20140120/rhel/openvswitch-kmod-fedora.spec.in000066400000000000000000000035731226605124000244340ustar00rootroot00000000000000# Spec file for Open vSwitch. # Copyright (C) 2009, 2010 Nicira Networks, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. This file is offered as-is, # without warranty of any kind. #%define kernel 3.1.5-1.fc16.x86_64 #define kernel %{kernel_source} %{?kversion:%define kernel %kversion} Name: openvswitch-kmod Summary: Open vSwitch Kernel Modules Group: System Environment/Daemons URL: http://www.openvswitch.org/ Vendor: OpenSource Security Ralf Spenneberg Version: @VERSION@ # The entire source code is ASL 2.0 except datapath/ which is GPLv2 License: GPLv2 Release: 1%{?dist} Source: openvswitch-%{version}.tar.gz #Source1: openvswitch-init Buildroot: /tmp/openvswitch-xen-rpm %description Open vSwitch provides standard network bridging functions augmented with support for the OpenFlow protocol for remote per-flow control of traffic. This package contains the kernel modules. %prep %setup -q -n openvswitch-%{version} %build ./configure --prefix=/usr --sysconfdir=/etc --localstatedir=%{_localstatedir} --with-linux=/lib/modules/%{kernel}/build --enable-ssl %{build_number} make %{_smp_mflags} -C datapath/linux %install rm -rf $RPM_BUILD_ROOT make -C datapath/linux modules_install install -d -m 755 $RPM_BUILD_ROOT/lib/modules/%{kernel}/kernel/extra/openvswitch find datapath/linux -name *.ko -exec install -m 755 \{\} $RPM_BUILD_ROOT/lib/modules/%{kernel}/kernel/extra/openvswitch \; %clean rm -rf $RPM_BUILD_ROOT %preun %post # Ensure that modprobe will find our modules. depmod %{kernel} %files %defattr(-,root,root) /lib/modules/%{kernel}/kernel/extra/openvswitch/openvswitch.ko %changelog * Wed Sep 21 2011 Kyle Mestery - Updated for F15 * Wed Jan 12 2011 Ralf Spenneberg - First build on F14 openvswitch-2.0.1+git20140120/rhel/openvswitch-kmod-rhel6.spec.in000066400000000000000000000040571226605124000242120ustar00rootroot00000000000000# Spec file for Open vSwitch kernel modules on Red Hat Enterprise # Linux 6. # Copyright (C) 2011, 2012 Nicira, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. This file is offered as-is, # without warranty of any kind. %define oname openvswitch Name: %{oname}-kmod Version: @VERSION@ Release: 1%{?dist} Summary: Open vSwitch kernel module Group: System/Kernel License: GPLv2 URL: http://openvswitch.org/ Source0: %{oname}-%{version}.tar.gz Source1: %{oname}-kmod.files BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) BuildRequires: %kernel_module_package_buildreqs # Without this we get an empty openvswitch-debuginfo package (whose name # conflicts with the openvswitch-debuginfo package for OVS userspace). %undefine _enable_debug_packages # Use -D 'kversion 2.6.32-131.6.1.el6.x86_64' to build package # for specified kernel version. %{?kversion:%define kernel_version %kversion} # Use -D 'kflavors default debug kdump' to build packages for # specified kernel variants. %{!?kflavors:%define kflavors default} %kernel_module_package -n %{oname} -f %{SOURCE1} %kflavors %description Open vSwitch Linux kernel module. %prep %setup -n %{oname}-%{version} cat > %{oname}.conf << EOF override %{oname} * extra/%{oname} override %{oname} * weak-updates/%{oname} EOF %build for flavor in %flavors_to_build; do mkdir _$flavor (cd _$flavor && ../configure --with-linux="%{kernel_source $flavor}") %{__make} -C _$flavor/datapath/linux %{?_smp_mflags} done %install export INSTALL_MOD_PATH=$RPM_BUILD_ROOT export INSTALL_MOD_DIR=extra/%{oname} for flavor in %flavors_to_build ; do make -C %{kernel_source $flavor} modules_install \ M="`pwd`"/_$flavor/datapath/linux done install -d %{buildroot}%{_sysconfdir}/depmod.d/ install -m 644 %{oname}.conf %{buildroot}%{_sysconfdir}/depmod.d/ %clean rm -rf $RPM_BUILD_ROOT openvswitch-2.0.1+git20140120/rhel/openvswitch-kmod.files000066400000000000000000000001161226605124000227270ustar00rootroot00000000000000%defattr(644,root,root,755) /lib/modules/%2-%1 /etc/depmod.d/openvswitch.conf openvswitch-2.0.1+git20140120/rhel/openvswitch.spec.in000066400000000000000000000114011226605124000222330ustar00rootroot00000000000000# Spec file for Open vSwitch on Red Hat Enterprise Linux. # Copyright (C) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. This file is offered as-is, # without warranty of any kind. Name: openvswitch Summary: Open vSwitch daemon/database/utilities Group: System Environment/Daemons URL: http://www.openvswitch.org/ Vendor: Nicira, Inc. Version: @VERSION@ License: ASL 2.0 Release: 1 Source: openvswitch-%{version}.tar.gz Buildroot: /tmp/openvswitch-rpm Requires: openvswitch-kmod, logrotate, python %description Open vSwitch provides standard network bridging functions and support for the OpenFlow protocol for remote per-flow control of traffic. %prep %setup -q %build ./configure --prefix=/usr --sysconfdir=/etc --localstatedir=%{_localstatedir} --enable-ssl make %{_smp_mflags} %install rm -rf $RPM_BUILD_ROOT make install DESTDIR=$RPM_BUILD_ROOT rhel_cp() { base=$1 mode=$2 dst=$RPM_BUILD_ROOT/$(echo $base | sed 's,_,/,g') install -D -m $mode rhel/$base $dst } rhel_cp etc_init.d_openvswitch 0755 rhel_cp etc_logrotate.d_openvswitch 0644 rhel_cp etc_sysconfig_network-scripts_ifup-ovs 0755 rhel_cp etc_sysconfig_network-scripts_ifdown-ovs 0755 rhel_cp usr_share_openvswitch_scripts_sysconfig.template 0644 docdir=$RPM_BUILD_ROOT/usr/share/doc/openvswitch-%{version} install -d -m755 "$docdir" install -m 0644 FAQ rhel/README.RHEL "$docdir" install python/compat/uuid.py $RPM_BUILD_ROOT/usr/share/openvswitch/python install python/compat/argparse.py $RPM_BUILD_ROOT/usr/share/openvswitch/python # Get rid of stuff we don't want to make RPM happy. rm \ $RPM_BUILD_ROOT/usr/bin/ovs-controller \ $RPM_BUILD_ROOT/usr/share/man/man8/ovs-controller.8 \ $RPM_BUILD_ROOT/usr/bin/ovs-test \ $RPM_BUILD_ROOT/usr/bin/ovs-l3ping \ $RPM_BUILD_ROOT/usr/share/man/man8/ovs-test.8 \ $RPM_BUILD_ROOT/usr/share/man/man8/ovs-l3ping.8 \ $RPM_BUILD_ROOT/usr/sbin/ovs-vlan-bug-workaround \ $RPM_BUILD_ROOT/usr/share/man/man8/ovs-vlan-bug-workaround.8 install -d -m 755 $RPM_BUILD_ROOT/var/lib/openvswitch %clean rm -rf $RPM_BUILD_ROOT %post # Create default or update existing /etc/sysconfig/openvswitch. SYSCONFIG=/etc/sysconfig/openvswitch TEMPLATE=/usr/share/openvswitch/scripts/sysconfig.template if [ ! -e $SYSCONFIG ]; then cp $TEMPLATE $SYSCONFIG else for var in $(awk -F'[ :]' '/^# [_A-Z0-9]+:/{print $2}' $TEMPLATE) do if ! grep $var $SYSCONFIG >/dev/null 2>&1; then echo >> $SYSCONFIG sed -n "/$var:/,/$var=/p" $TEMPLATE >> $SYSCONFIG fi done fi # Ensure all required services are set to run /sbin/chkconfig --add openvswitch /sbin/chkconfig openvswitch on %preun if [ "$1" = "0" ]; then # $1 = 0 for uninstall /sbin/service openvswitch stop /sbin/chkconfig --del openvswitch fi %postun if [ "$1" = "0" ]; then # $1 = 0 for uninstall rm -f /etc/openvswitch/conf.db rm -f /etc/sysconfig/openvswitch rm -f /etc/openvswitch/vswitchd.cacert fi exit 0 %files %defattr(-,root,root) /etc/init.d/openvswitch %config(noreplace) /etc/logrotate.d/openvswitch /etc/sysconfig/network-scripts/ifup-ovs /etc/sysconfig/network-scripts/ifdown-ovs /usr/bin/ovs-appctl /usr/bin/ovs-benchmark /usr/bin/ovs-dpctl /usr/bin/ovs-dpctl-top /usr/bin/ovs-ofctl /usr/bin/ovs-parse-backtrace /usr/bin/ovs-pcap /usr/bin/ovs-pki /usr/bin/ovs-tcpundump /usr/bin/ovs-vlan-test /usr/bin/ovs-vsctl /usr/bin/ovsdb-client /usr/bin/ovsdb-tool /usr/sbin/ovs-bugtool /usr/sbin/ovs-vswitchd /usr/sbin/ovsdb-server /usr/share/man/man1/ovs-benchmark.1.gz /usr/share/man/man1/ovs-pcap.1.gz /usr/share/man/man1/ovs-tcpundump.1.gz /usr/share/man/man1/ovsdb-client.1.gz /usr/share/man/man1/ovsdb-server.1.gz /usr/share/man/man1/ovsdb-tool.1.gz /usr/share/man/man5/ovs-vswitchd.conf.db.5.gz /usr/share/man/man8/ovs-appctl.8.gz /usr/share/man/man8/ovs-bugtool.8.gz /usr/share/man/man8/ovs-ctl.8.gz /usr/share/man/man8/ovs-dpctl.8.gz /usr/share/man/man8/ovs-dpctl-top.8.gz /usr/share/man/man8/ovs-ofctl.8.gz /usr/share/man/man8/ovs-parse-backtrace.8.gz /usr/share/man/man8/ovs-pki.8.gz /usr/share/man/man8/ovs-vlan-test.8.gz /usr/share/man/man8/ovs-vsctl.8.gz /usr/share/man/man8/ovs-vswitchd.8.gz /usr/share/openvswitch/bugtool-plugins/ /usr/share/openvswitch/python/ /usr/share/openvswitch/scripts/ovs-bugtool-* /usr/share/openvswitch/scripts/ovs-check-dead-ifs /usr/share/openvswitch/scripts/ovs-ctl /usr/share/openvswitch/scripts/ovs-lib /usr/share/openvswitch/scripts/ovs-save /usr/share/openvswitch/scripts/sysconfig.template /usr/share/openvswitch/vswitch.ovsschema /usr/share/doc/openvswitch-%{version}/FAQ /usr/share/doc/openvswitch-%{version}/README.RHEL /var/lib/openvswitch /var/log/openvswitch openvswitch-2.0.1+git20140120/rhel/usr_lib_systemd_system_openvswitch.service000066400000000000000000000004231226605124000272310ustar00rootroot00000000000000[Unit] Description=Open vSwitch After=syslog.target network.target [Service] Type=oneshot ExecStart=/usr/share/openvswitch/scripts/openvswitch.init start ExecStop=/usr/share/openvswitch/scripts/openvswitch.init stop RemainAfterExit=yes [Install] WantedBy=multi-user.target openvswitch-2.0.1+git20140120/rhel/usr_share_openvswitch_scripts_sysconfig.template000066400000000000000000000015521226605124000304230ustar00rootroot00000000000000### Configuration options for openvswitch # Copyright (C) 2009, 2010, 2011 Nicira, Inc. # FORCE_COREFILES: If 'yes' then core files will be enabled. # FORCE_COREFILES=yes # OVSDB_SERVER_PRIORITY: "nice" priority at which to run ovsdb-server. # # OVSDB_SERVER_PRIORITY=-10 # VSWITCHD_PRIORITY: "nice" priority at which to run ovs-vswitchd. # VSWITCHD_PRIORITY=-10 # VSWITCHD_MLOCKALL: Whether to pass ovs-vswitchd the --mlockall option. # This option should be set to "yes" or "no". The default is "yes". # Enabling this option can avoid networking interruptions due to # system memory pressure in extraordinary situations, such as multiple # concurrent VM import operations. # VSWITCHD_MLOCKALL=yes # OVS_CTL_OPTS: Extra options to pass to ovs-ctl. This is, for example, # a suitable place to specify --ovs-vswitchd-wrapper=valgrind. # OVS_CTL_OPTS= openvswitch-2.0.1+git20140120/tests/000077500000000000000000000000001226605124000166145ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/tests/.gitignore000066400000000000000000000011061226605124000206020ustar00rootroot00000000000000*.pem /Makefile /Makefile.in /atconfig /atlocal /idltest.c /idltest.h /idltest.ovsidl /ovs-pki.log /pki/ /test-aes128 /test-atomic /test-bundle /test-byte-order /test-classifier /test-csum /test-file_name /test-flows /test-hash /test-heap /test-hindex /test-hmap /test-json /test-jsonrpc /test-list /test-lockfile /test-multipath /test-netflow /test-odp /test-ovsdb /test-packets /test-random /test-reconnect /test-sflow /test-sha1 /test-stp /test-strtok_r /test-timeval /test-type-props /test-unix-socket /test-util /test-uuid /test-vconn /testsuite /testsuite.dir/ /testsuite.log openvswitch-2.0.1+git20140120/tests/MockXenAPI.py000066400000000000000000000064051226605124000210710ustar00rootroot00000000000000# Copyright (c) 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import re def xapi_local(): return Session() class Session(object): def __init__(self): self.xenapi = XenAPI() class Failure(Exception): pass class XenAPI(object): def __init__(self): self.network = Network() self.pool = Pool() self.VIF = VIF() self.VM = VM() def login_with_password(self, unused_username, unused_password): pass class RecordRef(object): def __init__(self, attrs): self.attrs = attrs class Table(object): def __init__(self, records): self.records = records def get_all(self): return [RecordRef(rec) for rec in self.records] def get_all_records_where(self, condition): k, v = re.match(r'field "([^"]*)"="([^"]*)"$', condition).groups() d = {} # I'm sure that the keys used in the dictionary below are wrong # but I can't find any documentation on get_all_records_where # and this satisfies the current test case. i = 0 for rec in self.records: if rec[k] == v: d[i] = rec i += 1 return d def get_by_uuid(self, uuid): recs = [rec for rec in self.records if rec["uuid"] == uuid] if len(recs) != 1: raise Failure("No record with UUID %s" % uuid) return RecordRef(recs[0]) def get_record(self, record_ref): return record_ref.attrs class Network(Table): __records = ({"uuid": "9b66c68b-a74e-4d34-89a5-20a8ab352d1e", "bridge": "xenbr0", "other_config": {"vswitch-controller-fail-mode": "secure", "nicira-bridge-id": "custom bridge ID"}}, {"uuid": "e1c9019d-375b-45ac-a441-0255dd2247de", "bridge": "xenbr1", "other_config": {"vswitch-disable-in-band": "true"}}) def __init__(self): Table.__init__(self, Network.__records) class Pool(Table): __records = ({"uuid": "7a793edf-e5f4-4994-a0f9-cee784c0cda3", "other_config": {"vswitch-controller-fail-mode": "secure"}},) def __init__(self): Table.__init__(self, Pool.__records) class VIF(Table): __records = ({"uuid": "6ab1b260-398e-49ba-827b-c7696108964c", "other_config": {"nicira-iface-id": "custom iface ID"}},) def __init__(self): Table.__init__(self, VIF.__records) class VM(Table): __records = ({"uuid": "fcb8a3f6-dc04-41d2-8b8a-55afd2b755b8", "other_config": {"nicira-vm-id": "custom vm ID"}},) def __init__(self): Table.__init__(self, VM.__records) openvswitch-2.0.1+git20140120/tests/aes128.at000066400000000000000000000076421226605124000201560ustar00rootroot00000000000000AT_BANNER([AES-128 unit tests]) m4_define([AES128_CHECK], [AT_SETUP([$1]) AT_KEYWORDS([aes128]) AT_CHECK([test-aes128 $2 $3], [0], [$4 ], []) AT_CLEANUP]) AES128_CHECK( [wikipedia test vector 1], [00010203050607080a0b0c0d0f101112], [506812a45f08c889b97f5980038b8359], [d8f532538289ef7d06b506a4fd5be9c9]) AES128_CHECK( [wikipedia test vector 2], [95A8EE8E89979B9EFDCBC6EB9797528D], [4ec137a426dabf8aa0beb8bc0c2b89d6], [d9b65d1232ba0199cdbd487b2a1fd646]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 0], [10a58869d74be5a374cf867cfb473859], [00000000000000000000000000000000], [6d251e6944b051e04eaa6fb4dbf78465]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 1], [caea65cdbb75e9169ecd22ebe6e54675], [00000000000000000000000000000000], [6e29201190152df4ee058139def610bb]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 2], [a2e2fa9baf7d20822ca9f0542f764a41], [00000000000000000000000000000000], [c3b44b95d9d2f25670eee9a0de099fa3]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 3], [b6364ac4e1de1e285eaf144a2415f7a0], [00000000000000000000000000000000], [5d9b05578fc944b3cf1ccf0e746cd581]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 4], [64cf9c7abc50b888af65f49d521944b2], [00000000000000000000000000000000], [f7efc89d5dba578104016ce5ad659c05]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 5], [47d6742eefcc0465dc96355e851b64d9], [00000000000000000000000000000000], [0306194f666d183624aa230a8b264ae7]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 6], [3eb39790678c56bee34bbcdeccf6cdb5], [00000000000000000000000000000000], [858075d536d79ccee571f7d7204b1f67]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 7], [64110a924f0743d500ccadae72c13427], [00000000000000000000000000000000], [35870c6a57e9e92314bcb8087cde72ce]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 8], [18d8126516f8a12ab1a36d9f04d68e51], [00000000000000000000000000000000], [6c68e9be5ec41e22c825b7c7affb4363]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 9], [f530357968578480b398a3c251cd1093], [00000000000000000000000000000000], [f5df39990fc688f1b07224cc03e86cea]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 10], [da84367f325d42d601b4326964802e8e], [00000000000000000000000000000000], [bba071bcb470f8f6586e5d3add18bc66]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 11], [e37b1c6aa2846f6fdb413f238b089f23], [00000000000000000000000000000000], [43c9f7e62f5d288bb27aa40ef8fe1ea8]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 12], [6c002b682483e0cabcc731c253be5674], [00000000000000000000000000000000], [3580d19cff44f1014a7c966a69059de5]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 13], [143ae8ed6555aba96110ab58893a8ae1], [00000000000000000000000000000000], [806da864dd29d48deafbe764f8202aef]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 14], [b69418a85332240dc82492353956ae0c], [00000000000000000000000000000000], [a303d940ded8f0baff6f75414cac5243]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 15], [71b5c08a1993e1362e4d0ce9b22b78d5], [00000000000000000000000000000000], [c2dabd117f8a3ecabfbb11d12194d9d0]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 16], [e234cdca2606b81f29408d5f6da21206], [00000000000000000000000000000000], [fff60a4740086b3b9c56195b98d91a7b]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 17], [13237c49074a3da078dc1d828bb78c6f], [00000000000000000000000000000000], [8146a08e2357f0caa30ca8c94d1a0544]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 18], [3071a2a48fe6cbd04f1a129098e308f8], [00000000000000000000000000000000], [4b98e06d356deb07ebb824e5713f7be3]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 19], [90f42ec0f68385f2ffc5dfc03a654dce], [00000000000000000000000000000000], [7a20a53d460fc9ce0423a7a0764c6cf2]) AES128_CHECK( [NIST KAT ECBKeySbox128e vector 20], [febd9a24d8b65c1c787d50a4ed3619a9], [00000000000000000000000000000000], [f4a70d8af877f9b02b4c40df57d45b17]) openvswitch-2.0.1+git20140120/tests/appctl.py000066400000000000000000000044571226605124000204630ustar00rootroot00000000000000# Copyright (c) 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse import signal import sys import ovs.daemon import ovs.unixctl import ovs.unixctl.client import ovs.util import ovs.vlog def connect_to_target(target): error, str_result = ovs.unixctl.socket_name_from_target(target) if error: ovs.util.ovs_fatal(error, str_result) else: socket_name = str_result error, client = ovs.unixctl.client.UnixctlClient.create(socket_name) if error: ovs.util.ovs_fatal(error, "cannot connect to \"%s\"" % socket_name) return client def main(): parser = argparse.ArgumentParser(description="Python Implementation of" " ovs-appctl.") parser.add_argument("-t", "--target", default="ovs-vswitchd", help="pidfile or socket to contact") parser.add_argument("command", metavar="COMMAND", help="Command to run.") parser.add_argument("argv", metavar="ARG", nargs="*", help="Arguments to the command.") parser.add_argument("-T", "--timeout", metavar="SECS", help="wait at most SECS seconds for a response") args = parser.parse_args() if args.timeout: signal.alarm(int(args.timeout)) ovs.vlog.Vlog.init() target = args.target client = connect_to_target(target) err_no, error, result = client.transact(args.command, args.argv) client.close() if err_no: ovs.util.ovs_fatal(err_no, "%s: transaction error" % target) elif error is not None: sys.stderr.write(error) ovs.util.ovs_error(0, "%s: server returned an error" % target) sys.exit(2) else: assert result is not None sys.stdout.write(result) if __name__ == '__main__': main() openvswitch-2.0.1+git20140120/tests/atlocal.in000066400000000000000000000025301226605124000205630ustar00rootroot00000000000000# -*- shell-script -*- HAVE_OPENSSL='@HAVE_OPENSSL@' HAVE_PYTHON='@HAVE_PYTHON@' EGREP='@EGREP@' PERL='@PERL@' if test x"$PYTHON" = x; then PYTHON='@PYTHON@' fi PYTHONPATH=$abs_top_srcdir/python:$abs_top_builddir/tests:$PYTHONPATH export PYTHONPATH PYTHONIOENCODING=utf_8 export PYTHONIOENCODING # PYTHONDONTWRITEBYTECODE=yes keeps Python 2.6+ from creating .pyc and .pyo # files. Creating .py[co] works OK for any given version of Open # vSwitch, but it causes trouble if you switch from a version with # foo/__init__.py into an (older) version with plain foo.py, since # foo/__init__.pyc will cause Python to ignore foo.py. # # Python before version 2.6 always creates .pyc files, so if you develop # with such an older version then you're out of luck. PYTHONDONTWRITEBYTECODE=yes export PYTHONDONTWRITEBYTECODE if test $HAVE_PYTHON = yes; then if python -m argparse 2>/dev/null; then : else PYTHONPATH=$PYTHONPATH:$abs_top_srcdir/python/compat export PYTHONPATH fi fi # Enable malloc debugging features. case `uname` in Linux) MALLOC_CHECK_=2 MALLOC_PERTURB_=165 export MALLOC_CHECK_ export MALLOC_PERTURB_ ;; FreeBSD) case `uname -r` in [789].*) MALLOC_CONF=AJ ;; *) MALLOC_CONF=abort:true,junk:true,redzone:true ;; esac export MALLOC_CONF esac openvswitch-2.0.1+git20140120/tests/automake.mk000066400000000000000000000300531226605124000207540ustar00rootroot00000000000000EXTRA_DIST += \ $(TESTSUITE_AT) \ $(TESTSUITE) \ tests/atlocal.in \ $(srcdir)/package.m4 \ $(srcdir)/tests/testsuite TESTSUITE_AT = \ tests/testsuite.at \ tests/ovsdb-macros.at \ tests/library.at \ tests/heap.at \ tests/bundle.at \ tests/classifier.at \ tests/check-structs.at \ tests/daemon.at \ tests/daemon-py.at \ tests/ofp-actions.at \ tests/ofp-print.at \ tests/ofp-util.at \ tests/ofp-errors.at \ tests/ovs-ofctl.at \ tests/odp.at \ tests/multipath.at \ tests/bfd.at \ tests/cfm.at \ tests/lacp.at \ tests/learn.at \ tests/vconn.at \ tests/file_name.at \ tests/aes128.at \ tests/unixctl-py.at \ tests/uuid.at \ tests/json.at \ tests/jsonrpc.at \ tests/jsonrpc-py.at \ tests/timeval.at \ tests/tunnel.at \ tests/lockfile.at \ tests/reconnect.at \ tests/ovs-vswitchd.at \ tests/ofproto-dpif.at \ tests/vlan-splinters.at \ tests/ofproto-macros.at \ tests/ofproto.at \ tests/ovsdb.at \ tests/ovsdb-log.at \ tests/ovsdb-types.at \ tests/ovsdb-data.at \ tests/ovsdb-column.at \ tests/ovsdb-table.at \ tests/ovsdb-row.at \ tests/ovsdb-schema.at \ tests/ovsdb-condition.at \ tests/ovsdb-mutation.at \ tests/ovsdb-query.at \ tests/ovsdb-transaction.at \ tests/ovsdb-execution.at \ tests/ovsdb-trigger.at \ tests/ovsdb-tool.at \ tests/ovsdb-server.at \ tests/ovsdb-monitor.at \ tests/ovsdb-idl.at \ tests/ovs-vsctl.at \ tests/ovs-monitor-ipsec.at \ tests/ovs-xapi-sync.at \ tests/stp.at \ tests/interface-reconfigure.at \ tests/vlog.at TESTSUITE = $(srcdir)/tests/testsuite DISTCLEANFILES += tests/atconfig tests/atlocal AUTOTEST_PATH = utilities:vswitchd:ovsdb:tests check-local: tests/atconfig tests/atlocal $(TESTSUITE) $(SHELL) '$(TESTSUITE)' -C tests AUTOTEST_PATH=$(AUTOTEST_PATH) $(TESTSUITEFLAGS) # Python Coverage support. # Requires coverage.py http://nedbatchelder.com/code/coverage/. COVERAGE = coverage COVERAGE_FILE='$(abs_srcdir)/.coverage' check-pycov: all tests/atconfig tests/atlocal $(TESTSUITE) clean-pycov PYTHONDONTWRITEBYTECODE=yes COVERAGE_FILE=$(COVERAGE_FILE) PYTHON='$(COVERAGE) run -p' $(SHELL) '$(TESTSUITE)' -C tests AUTOTEST_PATH=$(AUTOTEST_PATH) $(TESTSUITEFLAGS) @cd $(srcdir) && $(COVERAGE) combine && COVERAGE_FILE=$(COVERAGE_FILE) $(COVERAGE) annotate @echo @echo '----------------------------------------------------------------------' @echo 'Annotated coverage source has the ",cover" extension.' @echo '----------------------------------------------------------------------' @echo @COVERAGE_FILE=$(COVERAGE_FILE) $(COVERAGE) report # valgrind support valgrind_wrappers = \ tests/valgrind/ovs-appctl \ tests/valgrind/ovs-ofctl \ tests/valgrind/ovs-vsctl \ tests/valgrind/ovs-vswitchd \ tests/valgrind/ovsdb-client \ tests/valgrind/ovsdb-server \ tests/valgrind/ovsdb-tool \ tests/valgrind/test-aes128 \ tests/valgrind/test-atomic \ tests/valgrind/test-bundle \ tests/valgrind/test-byte-order \ tests/valgrind/test-classifier \ tests/valgrind/test-csum \ tests/valgrind/test-file_name \ tests/valgrind/test-flows \ tests/valgrind/test-hash \ tests/valgrind/test-heap \ tests/valgrind/test-hindex \ tests/valgrind/test-hmap \ tests/valgrind/test-json \ tests/valgrind/test-jsonrpc \ tests/valgrind/test-list \ tests/valgrind/test-lockfile \ tests/valgrind/test-multipath \ tests/valgrind/test-odp \ tests/valgrind/test-ovsdb \ tests/valgrind/test-packets \ tests/valgrind/test-random \ tests/valgrind/test-reconnect \ tests/valgrind/test-sha1 \ tests/valgrind/test-stp \ tests/valgrind/test-timeval \ tests/valgrind/test-type-props \ tests/valgrind/test-unix-socket \ tests/valgrind/test-uuid \ tests/valgrind/test-vconn $(valgrind_wrappers): tests/valgrind-wrapper.in @test -d tests/valgrind || mkdir tests/valgrind sed -e 's,[@]wrap_program[@],$@,' \ $(top_srcdir)/tests/valgrind-wrapper.in > $@.tmp chmod +x $@.tmp mv $@.tmp $@ CLEANFILES += $(valgrind_wrappers) EXTRA_DIST += tests/valgrind-wrapper.in VALGRIND = valgrind --log-file=valgrind.%p --leak-check=full \ --suppressions=$(abs_top_srcdir)/tests/glibc.supp \ --suppressions=$(abs_top_srcdir)/tests/openssl.supp --num-callers=20 EXTRA_DIST += tests/glibc.supp tests/openssl.supp check-valgrind: all tests/atconfig tests/atlocal $(TESTSUITE) \ $(valgrind_wrappers) $(check_DATA) $(SHELL) '$(TESTSUITE)' -C tests CHECK_VALGRIND=true VALGRIND='$(VALGRIND)' AUTOTEST_PATH='tests/valgrind:$(AUTOTEST_PATH)' -d $(TESTSUITEFLAGS) @echo @echo '----------------------------------------------------------------------' @echo 'Valgrind output can be found in tests/testsuite.dir/*/valgrind.*' @echo '----------------------------------------------------------------------' # OFTest support. check-oftest: all srcdir='$(srcdir)' $(SHELL) $(srcdir)/tests/run-oftest EXTRA_DIST += tests/run-oftest clean-local: test ! -f '$(TESTSUITE)' || $(SHELL) '$(TESTSUITE)' -C tests --clean AUTOTEST = $(AUTOM4TE) --language=autotest $(TESTSUITE): package.m4 $(TESTSUITE_AT) $(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at mv $@.tmp $@ # The `:;' works around a Bash 3.2 bug when the output is not writeable. $(srcdir)/package.m4: $(top_srcdir)/configure.ac :;{ \ echo '# Signature of the current package.' && \ echo 'm4_define([AT_PACKAGE_NAME], [$(PACKAGE_NAME)])' && \ echo 'm4_define([AT_PACKAGE_TARNAME], [$(PACKAGE_TARNAME)])' && \ echo 'm4_define([AT_PACKAGE_VERSION], [$(PACKAGE_VERSION)])' && \ echo 'm4_define([AT_PACKAGE_STRING], [$(PACKAGE_STRING)])' && \ echo 'm4_define([AT_PACKAGE_BUGREPORT], [$(PACKAGE_BUGREPORT)])'; \ } >'$(srcdir)/package.m4' noinst_PROGRAMS += tests/test-aes128 tests_test_aes128_SOURCES = tests/test-aes128.c tests_test_aes128_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-atomic tests_test_atomic_SOURCES = tests/test-atomic.c tests_test_atomic_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-bundle tests_test_bundle_SOURCES = tests/test-bundle.c tests_test_bundle_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-classifier tests_test_classifier_SOURCES = tests/test-classifier.c tests_test_classifier_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-csum tests_test_csum_SOURCES = tests/test-csum.c tests_test_csum_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-file_name tests_test_file_name_SOURCES = tests/test-file_name.c tests_test_file_name_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-flows tests_test_flows_SOURCES = tests/test-flows.c tests_test_flows_LDADD = lib/libopenvswitch.a $(SSL_LIBS) dist_check_SCRIPTS = tests/flowgen.pl noinst_PROGRAMS += tests/test-hash tests_test_hash_SOURCES = tests/test-hash.c tests_test_hash_LDADD = lib/libopenvswitch.a noinst_PROGRAMS += tests/test-heap tests_test_heap_SOURCES = tests/test-heap.c tests_test_heap_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-hindex tests_test_hindex_SOURCES = tests/test-hindex.c tests_test_hindex_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-hmap tests_test_hmap_SOURCES = tests/test-hmap.c tests_test_hmap_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-json tests_test_json_SOURCES = tests/test-json.c tests_test_json_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-jsonrpc tests_test_jsonrpc_SOURCES = tests/test-jsonrpc.c tests_test_jsonrpc_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-list tests_test_list_SOURCES = tests/test-list.c tests_test_list_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-lockfile tests_test_lockfile_SOURCES = tests/test-lockfile.c tests_test_lockfile_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-multipath tests_test_multipath_SOURCES = tests/test-multipath.c tests_test_multipath_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-packets tests_test_packets_SOURCES = tests/test-packets.c tests_test_packets_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-random tests_test_random_SOURCES = tests/test-random.c tests_test_random_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-stp tests_test_stp_SOURCES = tests/test-stp.c tests_test_stp_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-sflow tests_test_sflow_SOURCES = tests/test-sflow.c tests_test_sflow_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-netflow tests_test_netflow_SOURCES = tests/test-netflow.c tests_test_netflow_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-unix-socket tests_test_unix_socket_SOURCES = tests/test-unix-socket.c tests_test_unix_socket_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-odp tests_test_odp_SOURCES = tests/test-odp.c tests_test_odp_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-ovsdb tests_test_ovsdb_SOURCES = \ tests/test-ovsdb.c \ tests/idltest.c \ tests/idltest.h EXTRA_DIST += tests/uuidfilt.pl tests/ovsdb-monitor-sort.pl tests_test_ovsdb_LDADD = ovsdb/libovsdb.a lib/libopenvswitch.a $(SSL_LIBS) # idltest schema and IDL OVSIDL_BUILT += tests/idltest.c tests/idltest.h tests/idltest.ovsidl IDLTEST_IDL_FILES = tests/idltest.ovsschema tests/idltest.ann EXTRA_DIST += $(IDLTEST_IDL_FILES) tests/idltest.ovsidl: $(IDLTEST_IDL_FILES) $(OVSDB_IDLC) -C $(srcdir) annotate $(IDLTEST_IDL_FILES) > $@.tmp mv $@.tmp $@ tests/idltest.c: tests/idltest.h noinst_PROGRAMS += tests/test-reconnect tests_test_reconnect_SOURCES = tests/test-reconnect.c tests_test_reconnect_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-sha1 tests_test_sha1_SOURCES = tests/test-sha1.c tests_test_sha1_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-timeval tests_test_timeval_SOURCES = tests/test-timeval.c tests_test_timeval_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-strtok_r tests_test_strtok_r_SOURCES = tests/test-strtok_r.c noinst_PROGRAMS += tests/test-type-props tests_test_type_props_SOURCES = tests/test-type-props.c noinst_PROGRAMS += tests/test-util tests_test_util_SOURCES = tests/test-util.c tests_test_util_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-uuid tests_test_uuid_SOURCES = tests/test-uuid.c tests_test_uuid_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-vconn tests_test_vconn_SOURCES = tests/test-vconn.c tests_test_vconn_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += tests/test-byte-order tests_test_byte_order_SOURCES = tests/test-byte-order.c tests_test_byte_order_LDADD = lib/libopenvswitch.a # Python tests. CHECK_PYFILES = \ tests/appctl.py \ tests/test-daemon.py \ tests/test-json.py \ tests/test-jsonrpc.py \ tests/test-ovsdb.py \ tests/test-reconnect.py \ tests/MockXenAPI.py \ tests/test-unix-socket.py \ tests/test-unixctl.py \ tests/test-vlog.py EXTRA_DIST += $(CHECK_PYFILES) PYCOV_CLEAN_FILES += $(CHECK_PYFILES:.py=.py,cover) .coverage if HAVE_OPENSSL TESTPKI_FILES = \ tests/testpki-cacert.pem \ tests/testpki-cert.pem \ tests/testpki-privkey.pem \ tests/testpki-req.pem \ tests/testpki-cert2.pem \ tests/testpki-privkey2.pem \ tests/testpki-req2.pem check_DATA += $(TESTPKI_FILES) CLEANFILES += $(TESTPKI_FILES) tests/testpki-cacert.pem: tests/pki/stamp; cp tests/pki/switchca/cacert.pem $@ tests/testpki-cert.pem: tests/pki/stamp; cp tests/pki/test-cert.pem $@ tests/testpki-req.pem: tests/pki/stamp; cp tests/pki/test-req.pem $@ tests/testpki-privkey.pem: tests/pki/stamp; cp tests/pki/test-privkey.pem $@ tests/testpki-cert2.pem: tests/pki/stamp; cp tests/pki/test2-cert.pem $@ tests/testpki-req2.pem: tests/pki/stamp; cp tests/pki/test2-req.pem $@ tests/testpki-privkey2.pem: tests/pki/stamp; cp tests/pki/test2-privkey.pem $@ OVS_PKI = $(SHELL) $(srcdir)/utilities/ovs-pki.in --dir=tests/pki --log=tests/ovs-pki.log tests/pki/stamp: rm -f tests/pki/stamp rm -rf tests/pki $(OVS_PKI) init $(OVS_PKI) req+sign tests/pki/test $(OVS_PKI) req+sign tests/pki/test2 : > tests/pki/stamp CLEANFILES += tests/ovs-pki.log CLEAN_LOCAL += clean-pki clean-pki: rm -f tests/pki/stamp rm -rf tests/pki endif openvswitch-2.0.1+git20140120/tests/bfd.at000066400000000000000000001072501226605124000177020ustar00rootroot00000000000000AT_BANNER([bfd]) m4_define([BFD_CHECK], [ AT_CHECK([ovs-appctl bfd/show $1 | sed -e '/Time:/d' | sed -e '/Discriminator/d' | sed -e '/Interval:/d'],[0], [dnl Forwarding: $2 Detect Multiplier: 3 Concatenated Path Down: $3 Local Flags: $4 Local Session State: $5 Local Diagnostic: $6 Remote Flags: $7 Remote Session State: $8 Remote Diagnostic: $9 ]) ]) m4_define([BFD_CHECK_TX], [ AT_CHECK([ovs-appctl bfd/show $1 | sed -n '/TX Interval/p'],[0], [dnl TX Interval: Approx $2 Local Minimum TX Interval: $3 Remote Minimum TX Interval: $4 ]) ]) m4_define([BFD_CHECK_RX], [ AT_CHECK([ovs-appctl bfd/show $1 | sed -n '/RX Interval/p'],[0], [dnl RX Interval: Approx $2 Local Minimum RX Interval: $3 Remote Minimum RX Interval: $4 ]) ]) AT_SETUP([bfd - basic config on different bridges]) #Create 2 bridges connected by patch ports and enable BFD OVS_VSWITCHD_START( [add-br br1 -- \ set bridge br1 datapath-type=dummy \ other-config:hwaddr=aa:55:aa:56:00:00 -- \ add-port br1 p1 -- set Interface p1 type=patch \ options:peer=p0 -- \ add-port br0 p0 -- set Interface p0 type=patch \ options:peer=p1 -- \ set Interface p0 bfd:enable=true -- \ set Interface p1 bfd:enable=true ]) ovs-appctl time/stop for i in `seq 0 40`; do ovs-appctl time/warp 100; done #Verify that BFD has been enabled on both interfaces. BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) AT_CHECK([ ovs-vsctl set interface p0 bfd:enable=false]) for i in `seq 0 40`; do ovs-appctl time/warp 100; done BFD_CHECK([p1], [false], [false], [none], [down], [Control Detection Time Expired], [none], [down], [No Diagnostic]) AT_CHECK([ ovs-vsctl set interface p0 bfd:enable=true]) for i in `seq 0 40`; do ovs-appctl time/warp 100; done BFD_CHECK([p1], [true], [false], [none], [up], [Control Detection Time Expired], [none], [up], [No Diagnostic]) BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [Control Detection Time Expired]) ovs-vsctl del-br br0 AT_CHECK([ovs-appctl bfd/show p0], [2],[ignore], [no such bfd object ovs-appctl: ovs-vswitchd: server returned an error ]) ovs-vsctl del-br br1 #Check that the entries are gone. AT_CHECK([ovs-appctl bfd/show p1], [2],[ignore], [no such bfd object ovs-appctl: ovs-vswitchd: server returned an error ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([bfd - Verify tunnel down detection]) #Create 3 bridges - br-bfd0, br-bfd1 and br-sw which is midway between the two. br2 is #connected to br-bfd0 and br-bfd1 through patch ports p0-2 and p1-2. Enable BFD on #interfaces in br-bfd0 and br-bfd1. When br-sw is dropping all packets, BFD should detect # that the tunnel is down, and come back up when br-sw is working fine. OVS_VSWITCHD_START( [add-br br-bfd0 -- \ set bridge br-bfd0 datapath-type=dummy \ other-config:hwaddr=aa:55:aa:56:00:00 -- \ add-br br-bfd1 -- \ set bridge br-bfd1 datapath-type=dummy \ other-config:hwaddr=aa:55:aa:57:00:00 -- \ add-br br-sw -- \ set bridge br-sw datapath-type=dummy \ other-config:hwaddr=aa:55:aa:58:00:00 -- \ add-port br-sw p1-sw -- set Interface p1-sw type=patch \ options:peer=p1 -- \ add-port br-sw p0-sw -- set Interface p0-sw type=patch \ options:peer=p0 -- \ add-port br-bfd1 p1 -- set Interface p1 type=patch \ options:peer=p1-sw bfd:enable=true -- \ add-port br-bfd0 p0 -- set Interface p0 type=patch \ options:peer=p0-sw bfd:enable=true --]) ovs-appctl time/stop #Create 2 bridges connected by patch ports and enable BFD AT_CHECK([ovs-ofctl add-flow br-sw 'priority=0,actions=NORMAL']) #Verify that BFD is enabled. for i in `seq 0 40`; do ovs-appctl time/warp 100; done BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) #Drop all packets in the br-sw bridge so that the tunnel is down. AT_CHECK([ ovs-ofctl add-flow br-sw 'priority=5,actions=drop' ]) for i in `seq 0 40`; do ovs-appctl time/warp 100; done BFD_CHECK([p1], [false], [false], [none], [down], [Control Detection Time Expired], [none], [down], [No Diagnostic]) BFD_CHECK([p0], [false], [false], [none], [down], [Control Detection Time Expired], [none], [down], [No Diagnostic]) #Delete the added flow AT_CHECK([ovs-ofctl del-flows br-sw], [0]) AT_CHECK([ovs-ofctl add-flow br-sw 'priority=0,actions=NORMAL']) #Verify that BFD is back up again. for i in `seq 0 40`; do ovs-appctl time/warp 100; done BFD_CHECK([p1], [true], [false], [none], [up], [Control Detection Time Expired], [none], [up], [Control Detection Time Expired]) BFD_CHECK([p0], [true], [false], [none], [up], [Control Detection Time Expired], [none], [up], [Control Detection Time Expired]) #Now, Verify one-side tunnel down detection #When br-sw is dropping packets from one end, BFD should detect # that the tunnel is down, and come back up when br-sw is working fine. #Bring down the br-bfd1 - br-sw link. So BFD packets will be sent from p0, # but not received by p1. p0 will receive all BFD packets from p1. AT_CHECK([ ovs-ofctl add-flow br-sw 'in_port=1,priority=5,actions=drop']) for i in `seq 0 40`; do ovs-appctl time/warp 100; done # Make sure p1 BFD state is down since it received no BFD packets. BFD_CHECK([p1], [false], [false], [none], [down], [Control Detection Time Expired], [none], [down], [No Diagnostic]) for i in `seq 0 40`; do ovs-appctl time/warp 100; done # p0 will be in init state once it receives "down" BFD message from p1. BFD_CHECK([p0], [false], [false], [none], [init], [Neighbor Signaled Session Down], [none], [down], [Control Detection Time Expired]) AT_CHECK([ovs-ofctl del-flows br-sw]) AT_CHECK([ovs-ofctl add-flow br-sw 'priority=0,actions=NORMAL']) #Ensure that BFD is back up again. for i in `seq 0 10`; do ovs-appctl time/warp 100; done #Bring down the br-bfd0 - br-sw link AT_CHECK([ ovs-ofctl add-flow br-sw 'in_port=2,priority=5,actions=drop']) for i in `seq 0 40`; do ovs-appctl time/warp 100; done BFD_CHECK([p0], [false], [false], [none], [down], [Control Detection Time Expired], [none], [down], [No Diagnostic]) for i in `seq 0 40`; do ovs-appctl time/warp 100; done BFD_CHECK([p1], [false], [false], [none], [init], [Neighbor Signaled Session Down], [none], [down], [Control Detection Time Expired]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([bfd - concatenated path down]) #Create 2 bridges connected by patch ports and enable BFD OVS_VSWITCHD_START() ovs-appctl time/stop AT_CHECK([ ovs-vsctl -- add-br br1 -- \ set bridge br1 datapath-type=dummy \ other-config:hwaddr=aa:55:aa:56:00:00 ]) AT_CHECK([ ovs-vsctl -- add-port br1 p1 -- set Interface p1 type=patch \ options:peer=p0 ]) AT_CHECK([ ovs-vsctl -- add-port br0 p0 -- set Interface p0 type=patch \ options:peer=p1 ]) AT_CHECK([ ovs-vsctl -- set interface p0 bfd:enable=true ]) AT_CHECK([ ovs-vsctl -- set interface p1 bfd:enable=true ]) for i in `seq 0 40`; do ovs-appctl time/warp 100; done #Verify that BFD has been enabled on both interfaces. BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) #Set cpath_down to true on one interface, make sure the remote interface updates its values. AT_CHECK([ovs-vsctl set interface p0 bfd:cpath_down=true]) for i in `seq 0 40`; do ovs-appctl time/warp 100; done BFD_CHECK([p1], [false], [false], [none], [up], [No Diagnostic], [none], [up], [Concatenated Path Down]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([bfd - Edit the Min Tx/Rx values]) #Create 2 bridges connected by patch ports and enable BFD OVS_VSWITCHD_START() ovs-appctl time/stop AT_CHECK([ ovs-vsctl -- add-br br1 -- \ set bridge br1 datapath-type=dummy ]) AT_CHECK([ ovs-vsctl -- add-port br1 p1 -- set Interface p1 type=patch \ options:peer=p0 ]) AT_CHECK([ ovs-vsctl -- add-port br0 p0 -- set Interface p0 type=patch \ options:peer=p1 ]) AT_CHECK([ ovs-vsctl -- set interface p0 bfd:enable=true ]) AT_CHECK([ ovs-vsctl -- set interface p1 bfd:enable=true ]) for i in `seq 0 30`; do ovs-appctl time/warp 100; done #Verify that BFD has been enabled on both interfaces. BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) #Edit the min Tx value. AT_CHECK([ovs-vsctl set interface p0 bfd:min_tx=200]) for i in `seq 0 20`; do ovs-appctl time/warp 100; done BFD_CHECK_TX([p0], [1000ms], [200ms], [100ms]) BFD_CHECK_TX([p1], [1000ms], [100ms], [200ms]) #Edit the min Rx value. AT_CHECK([ovs-vsctl set interface p1 bfd:min_rx=300]) for i in `seq 0 20`; do ovs-appctl time/warp 100; done BFD_CHECK_RX([p1], [300ms], [300ms], [1000ms]) BFD_CHECK_RX([p0], [1000ms], [1000ms], [300ms]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([bfd - check_tnl_key]) OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \ options:remote_ip=2.2.2.2 options:key=1 ofport_request=1 -- \ set interface p1 bfd:enable=true -- \ set bridge br0 fail-mode=standalone]) # by default check_tnl_key is false. so we should process a bfd packet with tun_id=1. AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x1,src=2.2.2.2,dst=2.2.2.1,tos=0x0,ttl=64,flags(key)),in_port(1),skb_mark(0/0),eth(src=00:11:22:33:44:55,dst=00:23:20:00:00:01),eth_type(0x0800),ipv4(src=169.254.1.0/0.0.0.0,dst=169.254.1.1/0.0.0.0,proto=17/0xff,tos=0/0,ttl=255/0,frag=no/0xff),udp(src=49152/0,dst=3784/0xffff)' -generate], [0], [stdout]) # check that the packet should be handled as BFD packet. AT_CHECK([tail -2 stdout], [0], [dnl This flow is handled by the userspace slow path because it: - Consists of BFD packets. ], []) # turn on the check_tnl_key. AT_CHECK([ovs-vsctl set interface p1 bfd:check_tnl_key=true]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x1,src=2.2.2.2,dst=2.2.2.1,tos=0x0,ttl=64,flags(key)),in_port(1),skb_mark(0/0),eth(src=00:11:22:33:44:55,dst=00:23:20:00:00:01),eth_type(0x0800),ipv4(src=169.254.1.0/0.0.0.0,dst=169.254.1.1/0.0.0.0,proto=17/0xff,tos=0/0,ttl=255/0,frag=no/0xff),udp(src=49152/0,dst=3784/0xffff)' -generate], [0], [stdout]) # check that the packet should be handled as normal packet. AT_CHECK([tail -1 stdout], [0],[dnl Datapath actions: 100 ], []) # set the tunnel key to 0. AT_CHECK([ovs-vsctl set interface p1 options:key=0]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=2.2.2.2,dst=2.2.2.1,tos=0x0,ttl=64,flags(key)),in_port(1),skb_mark(0/0),eth(src=00:11:22:33:44:55,dst=00:23:20:00:00:01),eth_type(0x0800),ipv4(src=169.254.1.0/0.0.0.0,dst=169.254.1.1/0.0.0.0,proto=17/0xff,tos=0/0,ttl=255/0,frag=no/0xff),udp(src=49152/0,dst=3784/0xffff)' -generate], [0], [stdout]) # check that the packet should be handled as BFD packet. AT_CHECK([tail -2 stdout], [0], [dnl This flow is handled by the userspace slow path because it: - Consists of BFD packets. ], []) OVS_VSWITCHD_STOP AT_CLEANUP # Tests below are for bfd decay features. AT_SETUP([bfd - bfd decay]) OVS_VSWITCHD_START([add-br br1 -- set bridge br1 datapath-type=dummy -- \ add-port br1 p1 -- set Interface p1 type=patch \ options:peer=p0 ofport_request=2 -- \ add-port br0 p0 -- set Interface p0 type=patch \ options:peer=p1 ofport_request=1 -- \ set Interface p0 bfd:enable=true bfd:min_tx=300 bfd:min_rx=300 bfd:decay_min_rx=3000 -- \ set Interface p1 bfd:enable=true bfd:min_tx=500 bfd:min_rx=500]) ovs-appctl time/stop # wait for local session state to go from down to up. for i in `seq 0 1`; do ovs-appctl time/warp 500; done BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [init], [No Diagnostic]) # Test-1 BFD decay: decay to decay_min_rx # bfd:decay_min_rx is set to 3000ms after the local state of p0 goes up, # so for the first 2500ms, there should be no change. for i in `seq 0 4`; do ovs-appctl time/warp 500; done BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [500ms], [300ms], [500ms]) # advance the clock by 500ms. ovs-appctl time/warp 500 # now at 3000ms, min_rx should decay to 3000ms and there should be # poll sequence flags. BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [final], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [poll], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [3000ms], [3000ms], [500ms]) # since the tx_min of p0 is still 500ms, after 500ms from decay, # the control message will be sent from p0 to p1, and p1 'flag' # will go back to none. ovs-appctl time/warp 500 BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) # the rx_min of p0 is 3000ms now, and p1 will send next control message # 3000ms after decay. so, advance clock by 2500ms to make that happen. for i in `seq 0 4`; do ovs-appctl time/warp 500; done BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [3000ms], [3000ms], [500ms]) # End of Test-1 ############################################################### # Test-2 BFD decay: go back to cfg_min_rx when there is traffic # receive packet at 1/100ms rate for 3000ms. for i in `seq 0 30` do ovs-appctl time/warp 100 AT_CHECK([ovs-ofctl packet-out br1 3 2 "90e2ba01475000101856b2e80806000108000604000100101856b2e80202020300000000000002020202"], [0], [stdout], []) done # after a decay interval (3000ms), the p0 min_rx will go back to # cfg_min_rx. there should be poll sequence flags. BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [final], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [poll], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [500ms], [300ms], [500ms]) # 500ms later, both direction will send control messages, # and their 'flag' will go back to none. ovs-appctl time/warp 500 BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [500ms], [300ms], [500ms]) # End of Test-2 ############################################################### # Test-3 BFD decay: go back to cfg_min_rx when decay_min_rx is changed # advance the clock by 2500ms to 3000m after restore of # min_rx. p0 is decayed, and there should be the poll sequence flags. for i in `seq 0 4`; do ovs-appctl time/warp 500; done BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [final], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [poll], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [3000ms], [3000ms], [500ms]) # advance the clock, to make 'flag' go back to none. for i in `seq 0 5`; do ovs-appctl time/warp 500; done BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) # change decay_min_rx to 1000ms. # for decay_min_rx < 2000ms, the decay detection time is set to 2000ms. # this should firstly reset the min_rx and start poll sequence. AT_CHECK([ovs-vsctl set Interface p0 bfd:decay_min_rx=1000]) BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [final], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [poll], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [500ms], [300ms], [500ms]) # for the following 1500ms, there should be no decay, # since the decay_detect_time is set to 2000ms. for i in `seq 0 2` do ovs-appctl time/warp 500 BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [500ms], [300ms], [500ms]) done ovs-appctl time/warp 500 # at 2000ms, decay should happen and there should be the poll sequence flags. BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [final], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [poll], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [1000ms], [1000ms], [500ms]) # advance the clock, so 'flag' go back to none. for i in `seq 0 4`; do ovs-appctl time/warp 500; done # End of Test-3 ############################################################### # Test-4 BFD decay: set min_rx to 800ms. # this should firstly reset the min_rx and then re-decay to 1000ms. AT_CHECK([ovs-vsctl set Interface p0 bfd:min_rx=800]) BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [final], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [poll], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [800ms], [800ms], [500ms]) # for the following 1600ms, there should be no decay, # since the decay detection time is set to 2000ms. for i in `seq 0 1` do ovs-appctl time/warp 800 BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [800ms], [800ms], [500ms]) done ovs-appctl time/warp 400 # at 2000ms, decay should happen and there should be the poll sequence flags. BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [final], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [poll], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [1000ms], [1000ms], [500ms]) # advance the clock, so 'flag' go back to none. for i in `seq 0 4`; do ovs-appctl time/warp 500; done # End of Test-4 ############################################################### # Test-5 BFD decay: set min_rx to 300ms and decay_min_rx to 5000ms together. AT_CHECK([ovs-vsctl set Interface p0 bfd:min_rx=300 bfd:decay_min_rx=5000]) BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [final], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [poll], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [500ms], [300ms], [500ms]) # for decay_min_rx > 2000ms, the decay detection time is set to # decay_min_rx (5000ms). # for the following 4500ms, there should be no decay, # since the decay detection time is set to 5000ms. for i in `seq 0 8` do ovs-appctl time/warp 500 BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [500ms], [300ms], [500ms]) done ovs-appctl time/warp 500 # at 5000ms, decay should happen and there should be the poll sequence flags. BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [final], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [poll], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [5000ms], [5000ms], [500ms]) # advance the clock, to make 'flag' go back to none. for i in `seq 0 9`; do ovs-appctl time/warp 500; done # End of Test-5 ############################################################### # Test-6 BFD decay: set decay_min_rx to 0 to disable bfd decay. AT_CHECK([ovs-vsctl set Interface p0 bfd:decay_min_rx=0]) # min_rx is reset, and there should be the poll sequence flags. BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [final], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [poll], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [500ms], [300ms], [500ms]) for i in `seq 0 20` do ovs-appctl time/warp 500 BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [500ms], [300ms], [500ms]) done # End of Test-6 ################################################################ # Test-7 BFD decay: rmt_min_tx is greater than decay_min_rx AT_CHECK([ovs-vsctl set Interface p0 bfd:decay_min_rx=3000 -- set interface p1 bfd:min_tx=5000]) # there will be poll sequences from both sides. and it is hard to determine the # order. so just skip 10000ms and check the RX/TX. at that time, p0 should be in decay already. for i in `seq 0 19`; do echo $i; ovs-appctl bfd/show; ovs-appctl time/warp 500; done BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [5000ms]) BFD_CHECK_RX([p0], [5000ms], [3000ms], [500ms]) # then, there should be no change of status, for i in `seq 0 9` do ovs-appctl time/warp 500 BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [5000ms]) BFD_CHECK_RX([p0], [5000ms], [3000ms], [500ms]) done # reset the p1's min_tx to 500ms. AT_CHECK([ovs-vsctl set Interface p1 bfd:min_tx=500]) # check the poll sequence. since p0 has been in decay, now the RX will show 3000ms. BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [final], [up], [No Diagnostic]) BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [poll], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [3000ms], [3000ms], [500ms]) # advance the clock by 3000ms, at that time, p1 will send the control packets. # then there will be no poll flags. for i in `seq 0 5`; do ovs-appctl time/warp 500; done BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [3000ms], [3000ms], [500ms]) # End of Test-7 ############################################################### # Test-8 BFD decay: state up->down->up. # turn bfd off on p1 AT_CHECK([ovs-vsctl set Interface p1 bfd:enable=false]) # check the state change of bfd on p0. After 9000 ms (3 min_rx intervals) for i in `seq 0 8`; do ovs-appctl time/warp 1000; done BFD_CHECK([p0], [false], [false], [none], [down], [Control Detection Time Expired], [none], [down], [No Diagnostic]) BFD_CHECK_TX([p0], [1000ms], [1000ms], [0ms]) BFD_CHECK_RX([p0], [300ms], [300ms], [1ms]) # resume the bfd on p1. the bfd should not go to decay mode direclty. AT_CHECK([ovs-vsctl set Interface p1 bfd:enable=true]) for i in `seq 0 1`; do ovs-appctl time/warp 500; done BFD_CHECK([p0], [true], [false], [none], [up], [Control Detection Time Expired], [none], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [500ms], [300ms], [500ms]) # since the decay_min_rx is still 3000ms, so after 3000ms, there should be the decay and poll sequence. for i in `seq 0 5`; do ovs-appctl time/warp 500; done BFD_CHECK([p0], [true], [false], [none], [up], [Control Detection Time Expired], [final], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [poll], [up], [Control Detection Time Expired]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [3000ms], [3000ms], [500ms]) # End of Test-8 ################################################################ OVS_VSWITCHD_STOP AT_CLEANUP # Tests below are for bfd forwarding_if_rx feature. # forwarding_if_rx Test1: bfd is enabled on one end of link. AT_SETUP([bfd - bfd forwarding_if_rx 1]) OVS_VSWITCHD_START([add-br br1 -- set bridge br1 datapath-type=dummy -- \ add-port br1 p1 -- set Interface p1 type=patch \ options:peer=p0 ofport_request=2 -- \ add-port br0 p0 -- set Interface p0 type=patch \ options:peer=p1 ofport_request=1 -- \ set Interface p0 bfd:enable=true bfd:min_tx=500 bfd:min_rx=500 -- \ add-port br1 p2 -- set Interface p2 type=internal ofport_request=3]) ovs-appctl time/stop # check the inital status. BFD_CHECK([p0], [false], [false], [none], [down], [No Diagnostic], [none], [down], [No Diagnostic]) BFD_CHECK_TX([p0], [1000ms], [1000ms], [0ms]) BFD_CHECK_RX([p0], [500ms], [500ms], [1ms]) # enable forwarding_if_rx. AT_CHECK([ovs-vsctl set Interface p0 bfd:forwarding_if_rx=true], [0]) # there should be no change of forwarding flag, since # there is no traffic. for i in `seq 0 3` do ovs-appctl time/warp 500 BFD_CHECK([p0], [false], [false], [none], [down], [No Diagnostic], [none], [down], [No Diagnostic]) done # receive one packet. AT_CHECK([ovs-ofctl packet-out br1 3 2 "90e2ba01475000101856b2e80806000108000604000100101856b2e80202020300000000000002020202"], [0], [stdout], []) for i in `seq 0 14` do ovs-appctl time/warp 100 # the forwarding flag should be true, since there is data received. BFD_CHECK([p0], [true], [false], [none], [down], [No Diagnostic], [none], [down], [No Diagnostic]) BFD_CHECK_TX([p0], [1000ms], [1000ms], [0ms]) BFD_CHECK_RX([p0], [500ms], [500ms], [1ms]) done # Stop sending packets for 1000ms. for i in `seq 0 9`; do ovs-appctl time/warp 100; done BFD_CHECK([p0], [false], [false], [none], [down], [No Diagnostic], [none], [down], [No Diagnostic]) BFD_CHECK_TX([p0], [1000ms], [1000ms], [0ms]) BFD_CHECK_RX([p0], [500ms], [500ms], [1ms]) # receive packet at 1/100ms rate for 1000ms. for i in `seq 0 9` do ovs-appctl time/warp 100 AT_CHECK([ovs-ofctl packet-out br1 3 2 "90e2ba01475000101856b2e80806000108000604000100101856b2e80202020300000000000002020202"], [0], [stdout], []) done # the forwarding flag should be true, since there is data received. BFD_CHECK([p0], [true], [false], [none], [down], [No Diagnostic], [none], [down], [No Diagnostic]) BFD_CHECK_TX([p0], [1000ms], [1000ms], [0ms]) BFD_CHECK_RX([p0], [500ms], [500ms], [1ms]) # reset bfd forwarding_if_rx. AT_CHECK([ovs-vsctl set Interface p0 bfd:forwarding_if_rx=false], [0]) # forwarding flag should turn to false since the STATE is DOWN. BFD_CHECK([p0], [false], [false], [none], [down], [No Diagnostic], [none], [down], [No Diagnostic]) BFD_CHECK_TX([p0], [1000ms], [1000ms], [0ms]) BFD_CHECK_RX([p0], [500ms], [500ms], [1ms]) AT_CHECK([ovs-vsctl del-br br1], [0], [ignore]) AT_CLEANUP # forwarding_if_rx Test2: bfd is enabled on both ends of link. AT_SETUP([bfd - bfd forwarding_if_rx 2]) OVS_VSWITCHD_START([add-br br1 -- set bridge br1 datapath-type=dummy -- \ add-port br1 p1 -- set Interface p1 type=patch \ options:peer=p0 ofport_request=2 -- \ add-port br0 p0 -- set Interface p0 type=patch \ options:peer=p1 ofport_request=1 -- \ set Interface p0 bfd:enable=true bfd:min_tx=500 bfd:min_rx=500 -- \ set Interface p1 bfd:enable=true bfd:min_tx=300 bfd:min_rx=300 -- \ add-port br1 p2 -- set Interface p2 type=internal ofport_request=3]) ovs-appctl time/stop # advance the clock, to stablize the states. for i in `seq 0 9`; do ovs-appctl time/warp 500; done # enable forwarding_if_rx. AT_CHECK([ovs-vsctl set Interface p0 bfd:forwarding_if_rx=true], [0]) # there should be no change of the forwarding flag, since # the bfd on both ends is already up. for i in `seq 0 5` do ovs-appctl time/warp 500 BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) done # stop the bfd on one side. AT_CHECK([ovs-vsctl set Interface p1 bfd:enable=false], [0]) # for within 1500ms, the detection timer is not out. # there is no change to status. for i in `seq 0 1` do ovs-appctl time/warp 500 BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) for i in `seq 0 5` do AT_CHECK([ovs-ofctl packet-out br1 3 2 "90e2ba01475000101856b2e80806000108000604000100101856b2e80202020300000000000002020202"], [0], [stdout], []) done done # at 1500ms, the STATE should go DOWN, due to Control Detection Time Expired. # but forwarding flag should be still true. ovs-appctl time/warp 500 BFD_CHECK([p0], [true], [false], [none], [down], [Control Detection Time Expired], [none], [down], [No Diagnostic]) # receive packet at 1/100ms rate for 1000ms. for i in `seq 0 9` do AT_CHECK([ovs-ofctl packet-out br1 3 2 "90e2ba01475000101856b2e80806000108000604000100101856b2e80202020300000000000002020202"], [0], [stdout], []) ovs-appctl time/warp 100 # the forwarding flag should always be true during this time. BFD_CHECK([p0], [true], [false], [none], [down], [Control Detection Time Expired], [none], [down], [No Diagnostic]) done # reset bfd forwarding_if_rx. AT_CHECK([ovs-vsctl set Interface p0 bfd:forwarding_if_rx=false], [0]) # forwarding flag should turn to false since the STATE is DOWN. BFD_CHECK([p0], [false], [false], [none], [down], [Control Detection Time Expired], [none], [down], [No Diagnostic]) BFD_CHECK_TX([p0], [1000ms], [1000ms], [0ms]) BFD_CHECK_RX([p0], [500ms], [500ms], [1ms]) # re-enable bfd on the other end. the states should be up. AT_CHECK([ovs-vsctl set Interface p1 bfd:enable=true bfd:min_tx=300 bfd:min_rx=300]) # advance the clock, to stablize the states. for i in `seq 0 9`; do ovs-appctl time/warp 500; done BFD_CHECK([p0], [true], [false], [none], [up], [Control Detection Time Expired], [none], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [Control Detection Time Expired]) BFD_CHECK_TX([p0], [500ms], [500ms], [300ms]) BFD_CHECK_RX([p0], [500ms], [500ms], [300ms]) AT_CHECK([ovs-vsctl del-br br1], [0], [ignore]) AT_CLEANUP # forwarding_if_rx Test3: bfd is enabled on both ends of link and decay is enabled. AT_SETUP([bfd - bfd forwarding_if_rx 3]) OVS_VSWITCHD_START([add-br br1 -- set bridge br1 datapath-type=dummy -- \ add-port br1 p1 -- set Interface p1 type=patch \ options:peer=p0 ofport_request=2 -- \ add-port br0 p0 -- set Interface p0 type=patch \ options:peer=p1 ofport_request=1 -- \ set Interface p0 bfd:enable=true bfd:min_tx=300 bfd:min_rx=300 bfd:decay_min_rx=3000 -- \ set Interface p1 bfd:enable=true bfd:min_tx=500 bfd:min_rx=500]) ovs-appctl time/stop # advance the clock, to stablize the states. for i in `seq 0 19`; do ovs-appctl time/warp 500; done BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [3000ms], [3000ms], [500ms]) # enable forwarding_if_rx. AT_CHECK([ovs-vsctl set Interface p0 bfd:forwarding_if_rx=true], [0]) # there should be no change of the forwarding flag, since # the bfd on both ends is already up. for i in `seq 0 9` do ovs-appctl time/warp 500 BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) done # reconfigure the decay_min_rx to 1000ms. check the poll sequence. AT_CHECK([ovs-vsctl set interface p0 bfd:decay_min_rx=1000]) BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [final], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [poll], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [500ms], [300ms], [500ms]) # wait for 2000ms to decay. for i in `seq 0 3`; do ovs-appctl time/warp 500; done BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [final], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [poll], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [1000ms], [1000ms], [500ms]) # wait for 1000ms, so that the flags will go back to none. for i in `seq 0 1`; do ovs-appctl time/warp 500; done BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) BFD_CHECK_TX([p0], [500ms], [300ms], [500ms]) BFD_CHECK_RX([p0], [1000ms], [1000ms], [500ms]) # stop the bfd on one side. AT_CHECK([ovs-vsctl set Interface p1 bfd:enable=false], [0]) # for within 2500ms, the detection timer is not out. # there is no change to status. for i in `seq 0 4` do ovs-appctl time/warp 500 BFD_CHECK([p0], [true], [false], [none], [up], [No Diagnostic], [none], [up], [No Diagnostic]) AT_CHECK([ovs-ofctl packet-out br1 3 2 "90e2ba01475000101856b2e80806000108000604000100101856b2e80202020300000000000002020202"], [0], [stdout], []) done # at 3000ms, the STATE should go DOWN, due to Control Detection Time Expired. # but forwarding flag should be still true. ovs-appctl time/warp 500 BFD_CHECK([p0], [true], [false], [none], [down], [Control Detection Time Expired], [none], [down], [No Diagnostic]) # receive packet at 1/100ms rate for 1000ms. for i in `seq 0 9` do AT_CHECK([ovs-ofctl packet-out br1 3 2 "90e2ba01475000101856b2e80806000108000604000100101856b2e80202020300000000000002020202"], [0], [stdout], []) ovs-appctl time/warp 100 # the forwarding flag should always be true during this time. BFD_CHECK([p0], [true], [false], [none], [down], [Control Detection Time Expired], [none], [down], [No Diagnostic]) done # stop receiving for 2000ms. for i in `seq 0 19`; do ovs-appctl time/warp 100; done BFD_CHECK([p0], [false], [false], [none], [down], [Control Detection Time Expired], [none], [down], [No Diagnostic]) # reset bfd forwarding_if_rx. AT_CHECK([ovs-vsctl set Interface p0 bfd:forwarding_if_rx=false]) # forwarding flag should turn to false since the STATE is DOWN. BFD_CHECK([p0], [false], [false], [none], [down], [Control Detection Time Expired], [none], [down], [No Diagnostic]) BFD_CHECK_TX([p0], [1000ms], [1000ms], [0ms]) BFD_CHECK_RX([p0], [300ms], [300ms], [1ms]) # re-enable bfd forwarding_if_rx. AT_CHECK([ovs-vsctl set Interface p0 bfd:forwarding_if_rx=true]) # there should be no change. BFD_CHECK([p0], [false], [false], [none], [down], [Control Detection Time Expired], [none], [down], [No Diagnostic]) BFD_CHECK_TX([p0], [1000ms], [1000ms], [0ms]) BFD_CHECK_RX([p0], [300ms], [300ms], [1ms]) # re-enable bfd on the other end. the states should be up. AT_CHECK([ovs-vsctl set Interface p1 bfd:enable=true bfd:min_tx=300 bfd:min_rx=300]) # advance the clock, to stablize the states. for i in `seq 0 9`; do ovs-appctl time/warp 500; done BFD_CHECK([p0], [true], [false], [none], [up], [Control Detection Time Expired], [none], [up], [No Diagnostic]) BFD_CHECK([p1], [true], [false], [none], [up], [No Diagnostic], [none], [up], [Control Detection Time Expired]) BFD_CHECK_TX([p0], [300ms], [300ms], [300ms]) BFD_CHECK_RX([p0], [1000ms], [1000ms], [300ms]) AT_CHECK([ovs-vsctl del-br br1], [0], [ignore]) AT_CLEANUPopenvswitch-2.0.1+git20140120/tests/bundle.at000066400000000000000000000263251226605124000204230ustar00rootroot00000000000000AT_BANNER([bundle link selection]) # The test-bundle program prints a lot of output on stdout, but each of the # tests below ignores it because it will vary a bit depending on endianness and # floating point precision. test-bundle will output an error message on # stderr and return with exit code 1 if anything really goes wrong. In each # case, we list the (approximate) expected output in a comment to aid debugging # if the test does fail. AT_SETUP([hrw bundle link selection]) AT_CHECK([[test-bundle 'symmetric_l4,60,hrw,ofport,NXM_NX_REG0[],slaves:1,2,3,4,5']], [0], [ignore]) # 100000: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 # 110000: disruption=0.50 (perfect=0.50) 0.50 0.50 0.00 0.00 0.00 0.00 # 010000: disruption=0.50 (perfect=0.50) 0.00 1.00 0.00 0.00 0.00 0.00 # 011000: disruption=0.50 (perfect=0.50) 0.00 0.50 0.50 0.00 0.00 0.00 # 111000: disruption=0.33 (perfect=0.33) 0.33 0.33 0.34 0.00 0.00 0.00 # 101000: disruption=0.33 (perfect=0.33) 0.50 0.00 0.50 0.00 0.00 0.00 # 001000: disruption=0.50 (perfect=0.50) 0.00 0.00 1.00 0.00 0.00 0.00 # 001100: disruption=0.50 (perfect=0.50) 0.00 0.00 0.50 0.50 0.00 0.00 # 101100: disruption=0.33 (perfect=0.33) 0.33 0.00 0.34 0.33 0.00 0.00 # 111100: disruption=0.25 (perfect=0.25) 0.25 0.25 0.25 0.25 0.00 0.00 # 011100: disruption=0.25 (perfect=0.25) 0.00 0.33 0.33 0.33 0.00 0.00 # 010100: disruption=0.33 (perfect=0.33) 0.00 0.50 0.00 0.50 0.00 0.00 # 110100: disruption=0.33 (perfect=0.33) 0.33 0.33 0.00 0.34 0.00 0.00 # 100100: disruption=0.33 (perfect=0.33) 0.50 0.00 0.00 0.50 0.00 0.00 # 000100: disruption=0.50 (perfect=0.50) 0.00 0.00 0.00 1.00 0.00 0.00 # 000110: disruption=0.50 (perfect=0.50) 0.00 0.00 0.00 0.50 0.50 0.00 # 100110: disruption=0.33 (perfect=0.33) 0.33 0.00 0.00 0.33 0.33 0.00 # 110110: disruption=0.25 (perfect=0.25) 0.25 0.25 0.00 0.25 0.25 0.00 # 010110: disruption=0.25 (perfect=0.25) 0.00 0.34 0.00 0.33 0.33 0.00 # 011110: disruption=0.25 (perfect=0.25) 0.00 0.25 0.25 0.25 0.25 0.00 # 111110: disruption=0.20 (perfect=0.20) 0.20 0.20 0.20 0.20 0.20 0.00 # 101110: disruption=0.20 (perfect=0.20) 0.25 0.00 0.25 0.25 0.25 0.00 # 001110: disruption=0.25 (perfect=0.25) 0.00 0.00 0.34 0.33 0.33 0.00 # 001010: disruption=0.33 (perfect=0.33) 0.00 0.00 0.50 0.00 0.50 0.00 # 101010: disruption=0.33 (perfect=0.33) 0.33 0.00 0.34 0.00 0.33 0.00 # 111010: disruption=0.25 (perfect=0.25) 0.25 0.25 0.25 0.00 0.25 0.00 # 011010: disruption=0.25 (perfect=0.25) 0.00 0.33 0.34 0.00 0.33 0.00 # 010010: disruption=0.34 (perfect=0.33) 0.00 0.50 0.00 0.00 0.50 0.00 # 110010: disruption=0.33 (perfect=0.33) 0.33 0.33 0.00 0.00 0.33 0.00 # 100010: disruption=0.33 (perfect=0.33) 0.50 0.00 0.00 0.00 0.50 0.00 # 000010: disruption=0.50 (perfect=0.50) 0.00 0.00 0.00 0.00 1.00 0.00 # 000011: disruption=0.50 (perfect=0.50) 0.00 0.00 0.00 0.00 0.50 0.50 # 100011: disruption=0.33 (perfect=0.33) 0.33 0.00 0.00 0.00 0.33 0.33 # 110011: disruption=0.25 (perfect=0.25) 0.25 0.25 0.00 0.00 0.25 0.25 # 010011: disruption=0.25 (perfect=0.25) 0.00 0.33 0.00 0.00 0.33 0.33 # 011011: disruption=0.25 (perfect=0.25) 0.00 0.25 0.25 0.00 0.25 0.25 # 111011: disruption=0.20 (perfect=0.20) 0.20 0.20 0.20 0.00 0.20 0.20 # 101011: disruption=0.20 (perfect=0.20) 0.25 0.00 0.25 0.00 0.25 0.25 # 001011: disruption=0.25 (perfect=0.25) 0.00 0.00 0.34 0.00 0.33 0.33 # 001111: disruption=0.25 (perfect=0.25) 0.00 0.00 0.25 0.25 0.25 0.25 # 101111: disruption=0.20 (perfect=0.20) 0.20 0.00 0.20 0.20 0.20 0.20 # 111111: disruption=0.17 (perfect=0.17) 0.17 0.17 0.17 0.17 0.17 0.17 # 011111: disruption=0.17 (perfect=0.17) 0.00 0.20 0.20 0.20 0.20 0.20 # 010111: disruption=0.20 (perfect=0.20) 0.00 0.25 0.00 0.25 0.25 0.25 # 110111: disruption=0.20 (perfect=0.20) 0.20 0.20 0.00 0.20 0.20 0.20 # 100111: disruption=0.20 (perfect=0.20) 0.25 0.00 0.00 0.25 0.25 0.25 # 000111: disruption=0.25 (perfect=0.25) 0.00 0.00 0.00 0.33 0.33 0.33 # 000101: disruption=0.33 (perfect=0.33) 0.00 0.00 0.00 0.50 0.00 0.50 # 100101: disruption=0.33 (perfect=0.33) 0.33 0.00 0.00 0.33 0.00 0.33 # 110101: disruption=0.25 (perfect=0.25) 0.25 0.25 0.00 0.25 0.00 0.25 # 010101: disruption=0.25 (perfect=0.25) 0.00 0.33 0.00 0.33 0.00 0.33 # 011101: disruption=0.25 (perfect=0.25) 0.00 0.25 0.25 0.25 0.00 0.25 # 111101: disruption=0.20 (perfect=0.20) 0.20 0.20 0.20 0.20 0.00 0.20 # 101101: disruption=0.20 (perfect=0.20) 0.25 0.00 0.25 0.25 0.00 0.25 # 001101: disruption=0.25 (perfect=0.25) 0.00 0.00 0.33 0.33 0.00 0.33 # 001001: disruption=0.33 (perfect=0.33) 0.00 0.00 0.50 0.00 0.00 0.50 # 101001: disruption=0.33 (perfect=0.33) 0.33 0.00 0.33 0.00 0.00 0.33 # 111001: disruption=0.25 (perfect=0.25) 0.25 0.25 0.25 0.00 0.00 0.25 # 011001: disruption=0.25 (perfect=0.25) 0.00 0.33 0.34 0.00 0.00 0.33 # 010001: disruption=0.34 (perfect=0.33) 0.00 0.50 0.00 0.00 0.00 0.50 # 110001: disruption=0.33 (perfect=0.33) 0.33 0.33 0.00 0.00 0.00 0.34 # 100001: disruption=0.33 (perfect=0.33) 0.50 0.00 0.00 0.00 0.00 0.50 # 000001: disruption=0.50 (perfect=0.50) 0.00 0.00 0.00 0.00 0.00 1.00 # 000000: disruption=1.00 (perfect=1.00) 0.00 0.00 0.00 0.00 0.00 0.00 # 100000: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 AT_CLEANUP AT_SETUP([active_backup bundle link selection]) AT_CHECK([[test-bundle 'symmetric_l4,60,active_backup,ofport,NXM_NX_REG0[],slaves:1,2,3,4,5,6']], [0], [100000: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 110000: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 010000: disruption=1.00 (perfect=1.00) 0.00 1.00 0.00 0.00 0.00 0.00 011000: disruption=0.00 (perfect=0.00) 0.00 1.00 0.00 0.00 0.00 0.00 111000: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 101000: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 001000: disruption=1.00 (perfect=1.00) 0.00 0.00 1.00 0.00 0.00 0.00 001100: disruption=0.00 (perfect=0.00) 0.00 0.00 1.00 0.00 0.00 0.00 101100: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 111100: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 011100: disruption=1.00 (perfect=1.00) 0.00 1.00 0.00 0.00 0.00 0.00 010100: disruption=0.00 (perfect=0.00) 0.00 1.00 0.00 0.00 0.00 0.00 110100: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 100100: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 000100: disruption=1.00 (perfect=1.00) 0.00 0.00 0.00 1.00 0.00 0.00 000110: disruption=0.00 (perfect=0.00) 0.00 0.00 0.00 1.00 0.00 0.00 100110: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 110110: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 010110: disruption=1.00 (perfect=1.00) 0.00 1.00 0.00 0.00 0.00 0.00 011110: disruption=0.00 (perfect=0.00) 0.00 1.00 0.00 0.00 0.00 0.00 111110: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 101110: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 001110: disruption=1.00 (perfect=1.00) 0.00 0.00 1.00 0.00 0.00 0.00 001010: disruption=0.00 (perfect=0.00) 0.00 0.00 1.00 0.00 0.00 0.00 101010: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 111010: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 011010: disruption=1.00 (perfect=1.00) 0.00 1.00 0.00 0.00 0.00 0.00 010010: disruption=0.00 (perfect=0.00) 0.00 1.00 0.00 0.00 0.00 0.00 110010: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 100010: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 000010: disruption=1.00 (perfect=1.00) 0.00 0.00 0.00 0.00 1.00 0.00 000011: disruption=0.00 (perfect=0.00) 0.00 0.00 0.00 0.00 1.00 0.00 100011: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 110011: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 010011: disruption=1.00 (perfect=1.00) 0.00 1.00 0.00 0.00 0.00 0.00 011011: disruption=0.00 (perfect=0.00) 0.00 1.00 0.00 0.00 0.00 0.00 111011: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 101011: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 001011: disruption=1.00 (perfect=1.00) 0.00 0.00 1.00 0.00 0.00 0.00 001111: disruption=0.00 (perfect=0.00) 0.00 0.00 1.00 0.00 0.00 0.00 101111: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 111111: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 011111: disruption=1.00 (perfect=1.00) 0.00 1.00 0.00 0.00 0.00 0.00 010111: disruption=0.00 (perfect=0.00) 0.00 1.00 0.00 0.00 0.00 0.00 110111: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 100111: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 000111: disruption=1.00 (perfect=1.00) 0.00 0.00 0.00 1.00 0.00 0.00 000101: disruption=0.00 (perfect=0.00) 0.00 0.00 0.00 1.00 0.00 0.00 100101: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 110101: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 010101: disruption=1.00 (perfect=1.00) 0.00 1.00 0.00 0.00 0.00 0.00 011101: disruption=0.00 (perfect=0.00) 0.00 1.00 0.00 0.00 0.00 0.00 111101: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 101101: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 001101: disruption=1.00 (perfect=1.00) 0.00 0.00 1.00 0.00 0.00 0.00 001001: disruption=0.00 (perfect=0.00) 0.00 0.00 1.00 0.00 0.00 0.00 101001: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 111001: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 011001: disruption=1.00 (perfect=1.00) 0.00 1.00 0.00 0.00 0.00 0.00 010001: disruption=0.00 (perfect=0.00) 0.00 1.00 0.00 0.00 0.00 0.00 110001: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 100001: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 000001: disruption=1.00 (perfect=1.00) 0.00 0.00 0.00 0.00 0.00 1.00 000000: disruption=1.00 (perfect=1.00) 0.00 0.00 0.00 0.00 0.00 0.00 100000: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 ]) AT_CLEANUP AT_SETUP([hrw bundle single link selection]) AT_CHECK([[test-bundle 'symmetric_l4,60,hrw,ofport,NXM_NX_REG0[],slaves:1']], [0], [ignore]) # 1: disruption=1.00 (perfect=1.00) 1.00 # 0: disruption=1.00 (perfect=1.00) 0.00 # 1: disruption=1.00 (perfect=1.00) 1.00 AT_CLEANUP AT_SETUP([hrw bundle no link selection]) AT_CHECK([[test-bundle 'symmetric_l4,60,hrw,ofport,NXM_NX_REG0[],slaves:']], [0], [ignore]) AT_CLEANUP #: disruption=0.00 (perfect=0.00) #: disruption=0.00 (perfect=0.00) AT_SETUP([bundle action missing argument]) AT_CHECK([ovs-ofctl parse-flow actions=bundle], [1], [], [ovs-ofctl: : not enough arguments to bundle action ]) AT_CLEANUP AT_SETUP([bundle action bad fields]) AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(xyzzy,60,hrw,ofport,slaves:1,2))'], [1], [], [ovs-ofctl: xyzzy,60,hrw,ofport,slaves:1,2: unknown fields `xyzzy' ]) AT_CLEANUP AT_SETUP([bundle action bad algorithm]) AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(symmetric_l4,60,fubar,ofport,slaves:1,2))'], [1], [], [ovs-ofctl: symmetric_l4,60,fubar,ofport,slaves:1,2: unknown algorithm `fubar' ]) AT_CLEANUP AT_SETUP([bundle action bad slave type]) AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(symmetric_l4,60,hrw,robot,slaves:1,2))'], [1], [], [ovs-ofctl: symmetric_l4,60,hrw,robot,slaves:1,2: unknown slave_type `robot' ]) AT_CLEANUP AT_SETUP([bundle action bad slave delimiter]) AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(symmetric_l4,60,hrw,ofport,robot:1,2))'], [1], [], [ovs-ofctl: symmetric_l4,60,hrw,ofport,robot:1,2: missing slave delimiter, expected `slaves' got `robot' ]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/cfm.at000066400000000000000000000036641226605124000177200ustar00rootroot00000000000000AT_BANNER([cfm]) m4_define([CFM_CHECK_EXTENDED], [ AT_CHECK([ovs-appctl cfm/show $1 | sed -e '/next CCM tx:/d' | sed -e '/next fault check:/d' | sed -e '/recv since check:/d'],[0], [dnl ---- $1 ---- MPID $2: extended average health: $3 opstate: $4 remote_opstate: $5 interval: $6 Remote MPID $7 opstate: $8 ]) ]) # test cfm under demand mode. AT_SETUP([cfm - demand mode]) #Create 2 bridges connected by patch ports and enable BFD OVS_VSWITCHD_START([add-br br1 -- \ set bridge br1 datapath-type=dummy \ other-config:hwaddr=aa:55:aa:56:00:00 -- \ add-port br1 p1 -- set Interface p1 type=patch \ options:peer=p0 -- \ add-port br0 p0 -- set Interface p0 type=patch \ options:peer=p1 -- \ set Interface p0 cfm_mpid=1 other_config:cfm_interval=300 other_config:cfm_extended=true -- \ set Interface p1 cfm_mpid=2 other_config:cfm_interval=300 other_config:cfm_extended=true ]) ovs-appctl time/stop # wait for a while to stablize cfm. for i in `seq 0 100`; do ovs-appctl time/warp 100; done CFM_CHECK_EXTENDED([p0], [1], [100], [up], [up], [300ms], [2], [up]) CFM_CHECK_EXTENDED([p1], [2], [100], [up], [up], [300ms], [1], [up]) # turn on demand mode on one end. AT_CHECK([ovs-vsctl set interface p0 other_config:cfm_demand=true]) # cfm should never go down. for i in `seq 0 100` do ovs-appctl time/warp 100 CFM_CHECK_EXTENDED([p0], [1], [100], [up], [up], [300ms], [2], [up]) CFM_CHECK_EXTENDED([p1], [2], [100], [up], [up], [300ms], [1], [up]) done # turn on demand mode on the other end. AT_CHECK([ovs-vsctl set interface p1 other_config:cfm_demand=true]) for i in `seq 0 100` do ovs-appctl time/warp 100 CFM_CHECK_EXTENDED([p0], [1], [100], [up], [up], [300ms], [2], [up]) CFM_CHECK_EXTENDED([p1], [2], [100], [up], [up], [300ms], [1], [up]) done OVS_VSWITCHD_STOP AT_CLEANUP openvswitch-2.0.1+git20140120/tests/check-structs.at000066400000000000000000000024241226605124000217260ustar00rootroot00000000000000AT_BANNER([struct alignment checker unit tests]) m4_define([check_structs], [$top_srcdir/build-aux/check-structs]) m4_define([RUN_STRUCT_CHECKER], [AT_KEYWORDS([check-structs]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_DATA([test.h], [$1 ]) AT_CHECK_UNQUOTED([$PYTHON check_structs test.h], [$2], [$3], [$4])]) AT_SETUP([check struct tail padding]) RUN_STRUCT_CHECKER( [struct xyz { ovs_be16 x; };], [1], [], [test.h:3: warning: struct xyz needs 2 bytes of tail padding ]) AT_CLEANUP AT_SETUP([check struct internal alignment]) RUN_STRUCT_CHECKER( [struct xyzzy { ovs_be16 x; ovs_be32 y; };], [1], [], [test.h:3: warning: struct xyzzy member y is 2 bytes short of 4-byte alignment ]) AT_CLEANUP AT_SETUP([check struct declared size]) RUN_STRUCT_CHECKER( [struct wibble { ovs_be64 z; }; OFP_ASSERT(sizeof(struct wibble) == 12); ], [1], [], [test.h:4: warning: struct wibble is 8 bytes long but declared as 12 ]) AT_CLEANUP AT_SETUP([check wrong struct's declared size]) RUN_STRUCT_CHECKER( [struct moo { ovs_be64 bar; }; OFP_ASSERT(sizeof(struct moo) == 8); struct wibble { ovs_be64 z; }; OFP_ASSERT(sizeof(struct moo) == 8); ], [1], [], [test.h:8: warning: checking size of struct moo but struct wibble was most recently defined ]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/classifier.at000066400000000000000000000012141226605124000212640ustar00rootroot00000000000000AT_BANNER([flow classifier unit tests]) m4_foreach( [testname], [[empty], [destroy-null], [single-rule], [rule-replacement], [many-rules-in-one-list], [many-rules-in-one-table], [many-rules-in-two-tables], [many-rules-in-five-tables]], [AT_SETUP([flow classifier - m4_bpatsubst(testname, [-], [ ])]) AT_CHECK([test-classifier testname], [0], [], []) AT_CLEANUP])]) AT_BANNER([miniflow unit tests]) m4_foreach( [testname], [[miniflow], [minimask_has_extra], [minimask_combine]], [AT_SETUP([miniflow - m4_bpatsubst(testname, [-], [ ])]) AT_CHECK([test-classifier testname], [0], [], []) AT_CLEANUP])]) openvswitch-2.0.1+git20140120/tests/daemon-py.at000066400000000000000000000222151226605124000210350ustar00rootroot00000000000000AT_BANNER([daemon unit tests - Python]) AT_SETUP([daemon - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_CAPTURE_FILE([pid]) AT_CAPTURE_FILE([expected]) # Start the daemon and wait for the pidfile to get created # and that its contents are the correct pid. AT_CHECK([$PYTHON $srcdir/test-daemon.py --pidfile=`pwd`/pid& echo $! > expected], [0]) OVS_WAIT_UNTIL([test -s pid], [kill `cat expected`]) AT_CHECK( [pid=`cat pid` && expected=`cat expected` && test "$pid" = "$expected"], [0], [], [], [kill `cat expected`]) AT_CHECK([kill -0 `cat pid`], [0], [], [], [kill `cat expected`]) # Kill the daemon and make sure that the pidfile gets deleted. kill `cat expected` OVS_WAIT_WHILE([kill -0 `cat expected`]) AT_CHECK([test ! -e pid]) AT_CLEANUP AT_SETUP([daemon --monitor - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_CAPTURE_FILE([pid]) AT_CAPTURE_FILE([parent]) AT_CAPTURE_FILE([parentpid]) AT_CAPTURE_FILE([newpid]) # Start the daemon and wait for the pidfile to get created. AT_CHECK([$PYTHON $srcdir/test-daemon.py --pidfile=`pwd`/pid --monitor& echo $! > parent], [0]) OVS_WAIT_UNTIL([test -s pid], [kill `cat parent`]) # Check that the pidfile names a running process, # and that the parent process of that process is our child process. AT_CHECK([kill -0 `cat pid`], [0], [], [], [kill `cat parent`]) AT_CHECK([ps -o ppid= -p `cat pid` > parentpid], [0], [], [], [kill `cat parent`]) AT_CHECK( [parentpid=`cat parentpid` && parent=`cat parent` && test $parentpid = $parent], [0], [], [], [kill `cat parent`]) # Kill the daemon process, making it look like a segfault, # and wait for a new child process to get spawned. AT_CHECK([cp pid oldpid], [0], [], [], [kill `cat parent`]) AT_CHECK([kill -SEGV `cat pid`], [0], [], [ignore], [kill `cat parent`]) OVS_WAIT_WHILE([kill -0 `cat oldpid`], [kill `cat parent`]) OVS_WAIT_UNTIL([test -s pid && test `cat pid` != `cat oldpid`], [kill `cat parent`]) AT_CHECK([cp pid newpid], [0], [], [], [kill `cat parent`]) # Check that the pidfile names a running process, # and that the parent process of that process is our child process. AT_CHECK([ps -o ppid= -p `cat pid` > parentpid], [0], [], [], [kill `cat parent`]) AT_CHECK( [parentpid=`cat parentpid` && parent=`cat parent` && test $parentpid = $parent], [0], [], [], [kill `cat parent`]) # Kill the daemon process with SIGTERM, and wait for the daemon # and the monitor processes to go away and the pidfile to get deleted. AT_CHECK([kill `cat pid`], [0], [], [ignore], [kill `cat parent`]) OVS_WAIT_WHILE([kill -0 `cat parent` || kill -0 `cat newpid` || test -e pid], [kill `cat parent`]) AT_CLEANUP AT_SETUP([daemon --monitor restart exit code - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_CAPTURE_FILE([pid]) AT_CAPTURE_FILE([parent]) AT_CAPTURE_FILE([parentpid]) AT_CAPTURE_FILE([newpid]) # Start the daemon and wait for the pidfile to get created. AT_CHECK([$PYTHON $srcdir/test-daemon.py --pidfile=`pwd`/pid --monitor& echo $! > parent], [0]) OVS_WAIT_UNTIL([test -s pid], [kill `cat parent`]) # Check that the pidfile names a running process, # and that the parent process of that process is our child process. AT_CHECK([kill -0 `cat pid`], [0], [], [], [kill `cat parent`]) AT_CHECK([ps -o ppid= -p `cat pid` > parentpid], [0], [], [], [kill `cat parent`]) AT_CHECK( [parentpid=`cat parentpid` && parent=`cat parent` && test $parentpid = $parent], [0], [], [], [kill `cat parent`]) # HUP the daemon process causing it to throw an exception, # and wait for a new child process to get spawned. AT_CHECK([cp pid oldpid], [0], [], [], [kill `cat parent`]) AT_CHECK([kill -HUP `cat pid`], [0], [], [ignore], [kill `cat parent`]) OVS_WAIT_WHILE([kill -0 `cat oldpid`], [kill `cat parent`]) OVS_WAIT_UNTIL([test -s pid && test `cat pid` != `cat oldpid`], [kill `cat parent`]) AT_CHECK([cp pid newpid], [0], [], [], [kill `cat parent`]) # Check that the pidfile names a running process, # and that the parent process of that process is our child process. AT_CHECK([ps -o ppid= -p `cat pid` > parentpid], [0], [], [], [kill `cat parent`]) AT_CHECK( [parentpid=`cat parentpid` && parent=`cat parent` && test $parentpid = $parent], [0], [], [], [kill `cat parent`]) # Kill the daemon process with SIGTERM, and wait for the daemon # and the monitor processes to go away and the pidfile to get deleted. AT_CHECK([kill `cat pid`], [0], [], [ignore], [kill `cat parent`]) OVS_WAIT_WHILE([kill -0 `cat parent` || kill -0 `cat newpid` || test -e pid], [kill `cat parent`]) AT_CLEANUP AT_SETUP([daemon --detach - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_CAPTURE_FILE([pid]) # Start the daemon and make sure that the pidfile exists immediately. # We don't wait for the pidfile to get created because the daemon is # supposed to do so before the parent exits. AT_CHECK([$PYTHON $srcdir/test-daemon.py --pidfile=`pwd`/pid --detach], [0]) AT_CHECK([test -s pid]) AT_CHECK([kill -0 `cat pid`]) # Kill the daemon and make sure that the pidfile gets deleted. cp pid saved-pid kill `cat pid` OVS_WAIT_WHILE([kill -0 `cat saved-pid`]) AT_CHECK([test ! -e pid]) AT_CLEANUP AT_SETUP([daemon --detach --monitor - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) m4_define([CHECK], [AT_CHECK([$1], [$2], [$3], [$4], [kill `cat daemon monitor`])]) AT_CAPTURE_FILE([daemon]) AT_CAPTURE_FILE([olddaemon]) AT_CAPTURE_FILE([newdaemon]) AT_CAPTURE_FILE([monitor]) AT_CAPTURE_FILE([newmonitor]) AT_CAPTURE_FILE([init]) # Start the daemon and make sure that the pidfile exists immediately. # We don't wait for the pidfile to get created because the daemon is # supposed to do so before the parent exits. AT_CHECK([$PYTHON $srcdir/test-daemon.py --pidfile=`pwd`/daemon --detach --monitor], [0]) AT_CHECK([test -s daemon]) # Check that the pidfile names a running process, # and that the parent process of that process is a running process, # and that the parent process of that process is init. CHECK([kill -0 `cat daemon`]) CHECK([ps -o ppid= -p `cat daemon` > monitor]) CHECK([kill -0 `cat monitor`]) CHECK([ps -o ppid= -p `cat monitor` > init]) CHECK([test `cat init` != $$]) # Kill the daemon process, making it look like a segfault, # and wait for a new daemon process to get spawned. CHECK([cp daemon olddaemon]) CHECK([kill -SEGV `cat daemon`], [0], [ignore], [ignore]) OVS_WAIT_WHILE([kill -0 `cat olddaemon`], [kill `cat olddaemon daemon`]) OVS_WAIT_UNTIL([test -s daemon && test `cat daemon` != `cat olddaemon`], [kill `cat olddaemon daemon`]) CHECK([cp daemon newdaemon]) # Check that the pidfile names a running process, # and that the parent process of that process is our child process. CHECK([kill -0 `cat daemon`]) CHECK([diff olddaemon newdaemon], [1], [ignore]) CHECK([ps -o ppid= -p `cat daemon` > newmonitor]) CHECK([diff monitor newmonitor]) CHECK([kill -0 `cat newmonitor`]) CHECK([ps -o ppid= -p `cat newmonitor` > init]) CHECK([test `cat init` != $$]) # Kill the daemon process with SIGTERM, and wait for the daemon # and the monitor processes to go away and the pidfile to get deleted. CHECK([kill `cat daemon`], [0], [], [ignore]) OVS_WAIT_WHILE( [kill -0 `cat monitor` || kill -0 `cat newdaemon` || test -e daemon], [kill `cat monitor newdaemon`]) m4_undefine([CHECK]) AT_CLEANUP AT_SETUP([daemon --detach startup errors - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_CAPTURE_FILE([pid]) AT_CHECK([$PYTHON $srcdir/test-daemon.py --pidfile=`pwd`/pid --detach --bail], [1], [], [stderr]) AT_CHECK([grep 'test-daemon.py: exiting after daemonize_start() as requested' stderr], [0], [ignore], []) AT_CHECK([test ! -s pid]) AT_CLEANUP AT_SETUP([daemon --detach --monitor startup errors - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_CAPTURE_FILE([pid]) AT_CHECK([$PYTHON $srcdir/test-daemon.py --pidfile=`pwd`/pid --detach --monitor --bail], [1], [], [stderr]) AT_CHECK([grep 'test-daemon.py: exiting after daemonize_start() as requested' stderr], [0], [ignore], []) AT_CHECK([test ! -s pid]) AT_CLEANUP AT_SETUP([daemon --detach closes standard fds - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_CAPTURE_FILE([pid]) AT_CAPTURE_FILE([status]) AT_CAPTURE_FILE([stderr]) AT_CHECK([(yes 2>stderr; echo $? > status) | $PYTHON $srcdir/test-daemon.py --pidfile=`pwd`/pid --detach], [0], [], []) AT_CHECK([kill `cat pid`]) AT_CHECK([test -s status]) if grep '[[bB]]roken pipe' stderr >/dev/null 2>&1; then # Something in the environment caused SIGPIPE to be ignored, but # 'yes' at least told us that it got EPIPE. Good enough; we know # that stdout was closed. : else # Otherwise make sure that 'yes' died from SIGPIPE. AT_CHECK([kill -l `cat status`], [0], [PIPE ]) fi AT_CLEANUP AT_SETUP([daemon --detach --monitor closes standard fds]) AT_CAPTURE_FILE([pid]) AT_CAPTURE_FILE([status]) AT_CAPTURE_FILE([stderr]) OVSDB_INIT([db]) AT_CHECK([(yes 2>stderr; echo $? > status) | $PYTHON $srcdir/test-daemon.py --pidfile=`pwd`/pid --detach], [0], [], []) AT_CHECK([kill `cat pid`]) AT_CHECK([test -s status]) if grep '[[bB]]roken pipe' stderr >/dev/null 2>&1; then # Something in the environment caused SIGPIPE to be ignored, but # 'yes' at least told us that it got EPIPE. Good enough; we know # that stdout was closed. : else # Otherwise make sure that 'yes' died from SIGPIPE. AT_CHECK([kill -l `cat status`], [0], [PIPE ]) fi AT_CLEANUP openvswitch-2.0.1+git20140120/tests/daemon.at000066400000000000000000000142671226605124000204170ustar00rootroot00000000000000AT_BANNER([daemon unit tests - C]) AT_SETUP([daemon]) OVSDB_INIT([db]) AT_CAPTURE_FILE([pid]) AT_CAPTURE_FILE([expected]) # Start the daemon and wait for the pidfile to get created # and that its contents are the correct pid. AT_CHECK([ovsdb-server --pidfile="`pwd`"/pid --remote=punix:socket --unixctl="`pwd`"/unixctl db& echo $! > expected], [0]) OVS_WAIT_UNTIL([test -s pid], [kill `cat expected`]) AT_CHECK( [pid=`cat pid` && expected=`cat expected` && test "$pid" = "$expected"], [0], [], [], [kill `cat expected`]) AT_CHECK([kill -0 `cat pid`], [0], [], [], [kill `cat expected`]) # Kill the daemon and make sure that the pidfile gets deleted. kill `cat expected` OVS_WAIT_WHILE([kill -0 `cat expected`]) AT_CHECK([test ! -e pid]) AT_CLEANUP AT_SETUP([daemon --monitor]) OVSDB_INIT([db]) AT_CAPTURE_FILE([pid]) AT_CAPTURE_FILE([parent]) AT_CAPTURE_FILE([parentpid]) AT_CAPTURE_FILE([newpid]) # Start the daemon and wait for the pidfile to get created. AT_CHECK([ovsdb-server --monitor --pidfile="`pwd`"/pid --remote=punix:socket --unixctl="`pwd`"/unixctl db& echo $! > parent], [0]) OVS_WAIT_UNTIL([test -s pid], [kill `cat parent`]) # Check that the pidfile names a running process, # and that the parent process of that process is our child process. AT_CHECK([kill -0 `cat pid`], [0], [], [], [kill `cat parent`]) AT_CHECK([ps -o ppid= -p `cat pid` > parentpid], [0], [], [], [kill `cat parent`]) AT_CHECK( [parentpid=`cat parentpid` && parent=`cat parent` && test $parentpid = $parent], [0], [], [], [kill `cat parent`]) # Avoid a race between pidfile creation and notifying the parent, # which can easily trigger if ovsdb-server is slow (e.g. due to valgrind). OVS_WAIT_UNTIL( [ovs-appctl --timeout=10 -t "`pwd`/unixctl" version], [kill `cat pid`]) # Kill the daemon process, making it look like a segfault, # and wait for a new child process to get spawned. AT_CHECK([cp pid oldpid], [0], [], [], [kill `cat parent`]) AT_CHECK([kill -SEGV `cat pid`], [0], [], [ignore], [kill `cat parent`]) OVS_WAIT_WHILE([kill -0 `cat oldpid`], [kill `cat parent`]) OVS_WAIT_UNTIL([test -s pid && test `cat pid` != `cat oldpid`], [kill `cat parent`]) AT_CHECK([cp pid newpid], [0], [], [], [kill `cat parent`]) # Check that the pidfile names a running process, # and that the parent process of that process is our child process. AT_CHECK([ps -o ppid= -p `cat pid` > parentpid], [0], [], [], [kill `cat parent`]) AT_CHECK( [parentpid=`cat parentpid` && parent=`cat parent` && test $parentpid = $parent], [0], [], [], [kill `cat parent`]) # Kill the daemon process with SIGTERM, and wait for the daemon # and the monitor processes to go away and the pidfile to get deleted. AT_CHECK([kill `cat pid`], [0], [], [ignore], [kill `cat parent`]) OVS_WAIT_WHILE([kill -0 `cat parent` || kill -0 `cat newpid` || test -e pid], [kill `cat parent`]) AT_CLEANUP AT_SETUP([daemon --detach]) AT_CAPTURE_FILE([pid]) OVSDB_INIT([db]) # Start the daemon and make sure that the pidfile exists immediately. # We don't wait for the pidfile to get created because the daemon is # supposed to do so before the parent exits. AT_CHECK([ovsdb-server --detach --no-chdir --pidfile="`pwd`"/pid --remote=punix:socket --unixctl="`pwd`"/unixctl db], [0]) AT_CHECK([test -s pid]) AT_CHECK([kill -0 `cat pid`]) # Kill the daemon and make sure that the pidfile gets deleted. cp pid saved-pid kill `cat pid` OVS_WAIT_WHILE([kill -0 `cat saved-pid`]) AT_CHECK([test ! -e pid]) AT_CLEANUP AT_SETUP([daemon --detach --monitor]) m4_define([CHECK], [AT_CHECK([$1], [$2], [$3], [$4], [kill `cat daemon monitor`])]) OVSDB_INIT([db]) AT_CAPTURE_FILE([daemon]) AT_CAPTURE_FILE([olddaemon]) AT_CAPTURE_FILE([newdaemon]) AT_CAPTURE_FILE([monitor]) AT_CAPTURE_FILE([newmonitor]) AT_CAPTURE_FILE([init]) # Start the daemon and make sure that the pidfile exists immediately. # We don't wait for the pidfile to get created because the daemon is # supposed to do so before the parent exits. AT_CHECK([ovsdb-server --detach --no-chdir --pidfile="`pwd`"/daemon --monitor --remote=punix:socket --unixctl="`pwd`"/unixctl db], [0]) AT_CHECK([test -s daemon]) # Check that the pidfile names a running process, # and that the parent process of that process is a running process, # and that the parent process of that process is init. CHECK([kill -0 `cat daemon`]) CHECK([ps -o ppid= -p `cat daemon` > monitor]) CHECK([kill -0 `cat monitor`]) CHECK([ps -o ppid= -p `cat monitor` > init]) CHECK([test `cat init` != $$]) # Kill the daemon process, making it look like a segfault, # and wait for a new daemon process to get spawned. CHECK([cp daemon olddaemon]) CHECK([kill -SEGV `cat daemon`], [0]) OVS_WAIT_WHILE([kill -0 `cat olddaemon`], [kill `cat olddaemon daemon`]) OVS_WAIT_UNTIL([test -s daemon && test `cat daemon` != `cat olddaemon`], [kill `cat olddaemon daemon`]) CHECK([cp daemon newdaemon]) # Check that the pidfile names a running process, # and that the parent process of that process is our child process. CHECK([kill -0 `cat daemon`]) CHECK([diff olddaemon newdaemon], [1], [ignore]) CHECK([ps -o ppid= -p `cat daemon` > newmonitor]) CHECK([diff monitor newmonitor]) CHECK([kill -0 `cat newmonitor`]) CHECK([ps -o ppid= -p `cat newmonitor` > init]) CHECK([test `cat init` != $$]) # Kill the daemon process with SIGTERM, and wait for the daemon # and the monitor processes to go away and the pidfile to get deleted. CHECK([kill `cat daemon`], [0], [], [ignore]) OVS_WAIT_WHILE( [kill -0 `cat monitor` || kill -0 `cat newdaemon` || test -e daemon], [kill `cat monitor newdaemon`]) m4_undefine([CHECK]) AT_CLEANUP AT_SETUP([daemon --detach startup errors]) AT_CAPTURE_FILE([pid]) OVSDB_INIT([db]) AT_CHECK([ovsdb-server --detach --no-chdir --pidfile="`pwd`"/pid --unixctl="`pwd`"/nonexistent/unixctl db], [1], [], [stderr]) AT_CHECK([grep 'ovsdb-server: could not initialize control socket' stderr], [0], [ignore], []) AT_CHECK([test ! -s pid]) AT_CLEANUP AT_SETUP([daemon --detach --monitor startup errors]) AT_CAPTURE_FILE([pid]) OVSDB_INIT([db]) AT_CHECK([ovsdb-server --detach --no-chdir --pidfile="`pwd`"/pid --monitor --unixctl="`pwd`"/nonexistent/unixctl db], [1], [], [stderr]) AT_CHECK([grep 'ovsdb-server: could not initialize control socket' stderr], [0], [ignore], []) AT_CHECK([test ! -s pid]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/file_name.at000066400000000000000000000061021226605124000210600ustar00rootroot00000000000000AT_BANNER([test dir_name and base_name functions]) m4_define([CHECK_FILE_NAME], [AT_SETUP([components of "$1" are "$2", "$3"]) AT_KEYWORDS([dir_name base_name]) AT_CHECK([test-file_name "AS_ESCAPE($1)"], [0], [$2 $3 ]) AT_CLEANUP]) # These are the test cases given in POSIX for dirname() and basename(). CHECK_FILE_NAME([/usr/lib], [/usr], [lib]) CHECK_FILE_NAME([/usr/], [/], [usr]) CHECK_FILE_NAME([usr], [.], [usr]) CHECK_FILE_NAME([/], [/], [/]) CHECK_FILE_NAME([.], [.], [.]) CHECK_FILE_NAME([..], [.], [..]) CHECK_FILE_NAME([//], [//], [//]) # / is also allowed CHECK_FILE_NAME([//foo], [//], [foo]) # / is also allowed for dirname CHECK_FILE_NAME([], [.], [.]) # Additional test cases. CHECK_FILE_NAME([dir/file], [dir], [file]) CHECK_FILE_NAME([dir/file/], [dir], [file]) CHECK_FILE_NAME([dir/file//], [dir], [file]) CHECK_FILE_NAME([///foo], [/], [foo]) AT_BANNER([test follow_symlinks function]) m4_define([CHECK_FOLLOW], [echo "check $1 -> $2" AT_CHECK_UNQUOTED([test-util follow-symlinks "$1"], [0], [$2 ]) echo]) AT_SETUP([follow_symlinks - relative symlinks]) : > target ln -s target source AT_SKIP_IF([test ! -h source]) CHECK_FOLLOW([source], [target]) mkdir dir ln -s target2 dir/source2 CHECK_FOLLOW([dir/source2], [dir/target2]) mkdir dir/dir2 ln -s dir/b a ln -s c dir/b ln -s dir2/d dir/c CHECK_FOLLOW([a], [dir/dir2/d]) AT_CLEANUP AT_SETUP([follow_symlinks - absolute symlinks]) : > target ln -s "`pwd`/target" source AT_SKIP_IF([test ! -h source]) CHECK_FOLLOW([source], [`pwd`/target]) mkdir dir ln -s "`pwd`/dir/target2" dir/source2 CHECK_FOLLOW([dir/source2], [`pwd`/dir/target2]) mkdir dir/dir2 ln -s "`pwd`/dir/b" a ln -s "`pwd`/dir/c" dir/b ln -s "`pwd`/dir/dir2/d" dir/c CHECK_FOLLOW([a], [`pwd`/dir/dir2/d]) AT_CLEANUP AT_SETUP([follow_symlinks - symlinks to directories]) mkdir target ln -s target source AT_SKIP_IF([test ! -h source]) ln -s target/ source2 CHECK_FOLLOW([source], [target]) CHECK_FOLLOW([source2], [target/]) # follow_symlinks() doesn't expand symlinks in the middle of a name. : > source/x CHECK_FOLLOW([source/x], [source/x]) AT_CLEANUP AT_SETUP([follow_symlinks - nonexistent targets]) ln -s target source AT_SKIP_IF([test ! -h source]) CHECK_FOLLOW([source], [target]) CHECK_FOLLOW([target], [target]) CHECK_FOLLOW([target], [target]) AT_CLEANUP AT_SETUP([follow_symlinks - regular files]) touch x CHECK_FOLLOW([x], [x]) AT_CLEANUP AT_SETUP([follow_symlinks - device targets]) AT_SKIP_IF([test ! -e /dev/null]) AT_SKIP_IF([test ! -e /dev/full]) ln -s /dev/null x ln -s /dev/full y CHECK_FOLLOW([x], [/dev/null]) CHECK_FOLLOW([y], [/dev/full]) AT_CLEANUP AT_SETUP([follow_symlinks - nonexistent files]) CHECK_FOLLOW([nonexistent], [nonexistent]) CHECK_FOLLOW([a/b/c], [a/b/c]) CHECK_FOLLOW([/a/b/c], [/a/b/c]) CHECK_FOLLOW([//a/b/c], [//a/b/c]) AT_CLEANUP AT_SETUP([follow_symlinks - symlink loop]) ln -s a b AT_SKIP_IF([test ! -h b]) ln -s b a AT_SKIP_IF([test ! -h a]) AT_CHECK([test-util follow-symlinks a], [0], [a ], [stderr]) AT_CHECK([sed 's/^[[^|]]*|//' stderr], [0], [00001|util|WARN|a: too many levels of symlinks ]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/flowgen.pl000077500000000000000000000221611226605124000206170ustar00rootroot00000000000000#! /usr/bin/perl # Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. use strict; use warnings; open(FLOWS, ">&=3");# or die "failed to open fd 3 for writing: $!\n"; open(PACKETS, ">&=4");# or die "failed to open fd 4 for writing: $!\n"; # Print pcap file header. print PACKETS pack('NnnNNNN', 0xa1b2c3d4, # magic number 2, # major version 4, # minor version 0, # time zone offset 0, # time stamp accuracy 1518, # snaplen 1); # Ethernet output(DL_HEADER => '802.2'); for my $dl_header (qw(802.2+SNAP Ethernet)) { my %a = (DL_HEADER => $dl_header); for my $dl_vlan (qw(none zero nonzero)) { my %b = (%a, DL_VLAN => $dl_vlan); # Non-IP case. output(%b, DL_TYPE => 'non-ip'); for my $ip_options (qw(no yes)) { my %c = (%b, DL_TYPE => 'ip', IP_OPTIONS => $ip_options); for my $ip_fragment (qw(no first middle last)) { my %d = (%c, IP_FRAGMENT => $ip_fragment); for my $tp_proto (qw(TCP TCP+options UDP ICMP other)) { output(%d, TP_PROTO => $tp_proto); } } } } } sub output { my (%attrs) = @_; # Compose flow. my (%flow); $flow{DL_SRC} = "00:02:e3:0f:80:a4"; $flow{DL_DST} = "00:1a:92:40:ac:05"; $flow{NW_PROTO} = 0; $flow{NW_TOS} = 0; $flow{NW_SRC} = '0.0.0.0'; $flow{NW_DST} = '0.0.0.0'; $flow{TP_SRC} = 0; $flow{TP_DST} = 0; if (defined($attrs{DL_VLAN})) { my (%vlan_map) = ('none' => 0xffff, 'zero' => 0, 'nonzero' => 0x0123); $flow{DL_VLAN} = $vlan_map{$attrs{DL_VLAN}}; } else { $flow{DL_VLAN} = 0xffff; # OFP_VLAN_NONE } if ($attrs{DL_HEADER} eq '802.2') { $flow{DL_TYPE} = 0x5ff; # OFP_DL_TYPE_NOT_ETH_TYPE } elsif ($attrs{DL_TYPE} eq 'ip') { $flow{DL_TYPE} = 0x0800; # ETH_TYPE_IP $flow{NW_SRC} = '10.0.2.15'; $flow{NW_DST} = '192.168.1.20'; $flow{NW_TOS} = 44; if ($attrs{TP_PROTO} eq 'other') { $flow{NW_PROTO} = 42; } elsif ($attrs{TP_PROTO} eq 'TCP' || $attrs{TP_PROTO} eq 'TCP+options') { $flow{NW_PROTO} = 6; # IPPROTO_TCP $flow{TP_SRC} = 6667; $flow{TP_DST} = 9998; } elsif ($attrs{TP_PROTO} eq 'UDP') { $flow{NW_PROTO} = 17; # IPPROTO_UDP $flow{TP_SRC} = 1112; $flow{TP_DST} = 2223; } elsif ($attrs{TP_PROTO} eq 'ICMP') { $flow{NW_PROTO} = 1; # IPPROTO_ICMP $flow{TP_SRC} = 8; # echo request $flow{TP_DST} = 0; # code } else { die; } if ($attrs{IP_FRAGMENT} ne 'no' && $attrs{IP_FRAGMENT} ne 'first') { $flow{TP_SRC} = $flow{TP_DST} = 0; } } elsif ($attrs{DL_TYPE} eq 'non-ip') { $flow{DL_TYPE} = 0x5678; } else { die; } # Compose packet. my $packet = ''; my $wildcards = 0; $packet .= pack_ethaddr($flow{DL_DST}); $packet .= pack_ethaddr($flow{DL_SRC}); if ($flow{DL_VLAN} != 0xffff) { $packet .= pack('nn', 0x8100, $flow{DL_VLAN}); } else { $wildcards |= 1 << 20; # OFPFW10_DL_VLAN_PCP } my $len_ofs = length($packet); $packet .= pack('n', 0) if $attrs{DL_HEADER} =~ /^802.2/; if ($attrs{DL_HEADER} eq '802.2') { $packet .= pack('CCC', 0x42, 0x42, 0x03); # LLC for 802.1D STP. } else { if ($attrs{DL_HEADER} eq '802.2+SNAP') { $packet .= pack('CCC', 0xaa, 0xaa, 0x03); # LLC for SNAP. $packet .= pack('CCC', 0, 0, 0); # SNAP OUI. } $packet .= pack('n', $flow{DL_TYPE}); if ($attrs{DL_TYPE} eq 'ip') { my $ip = pack('CCnnnCCnNN', (4 << 4) | 5, # version, hdrlen $flow{NW_TOS}, # type of service 0, # total length (filled in later) 65432, # id 0, # frag offset 64, # ttl $flow{NW_PROTO}, # protocol 0, # checksum 0x0a00020f, # source 0xc0a80114); # dest if ($attrs{IP_OPTIONS} eq 'yes') { substr($ip, 0, 1) = pack('C', (4 << 4) | 8); $ip .= pack('CCnnnCCCx', 130, # type 11, # length 0x6bc5, # top secret 0xabcd, 0x1234, 1, 2, 3); } if ($attrs{IP_FRAGMENT} ne 'no') { my (%frag_map) = ('first' => 0x2000, # more frags, ofs 0 'middle' => 0x2111, # more frags, ofs 0x888 'last' => 0x0222); # last frag, ofs 0x1110 substr($ip, 6, 2) = pack('n', $frag_map{$attrs{IP_FRAGMENT}}); } if ($attrs{TP_PROTO} =~ '^TCP') { my $tcp = pack('nnNNnnnn', $flow{TP_SRC}, # source port $flow{TP_DST}, # dest port 87123455, # seqno 712378912, # ackno (5 << 12) | 0x02 | 0x10, # hdrlen, SYN, ACK 5823, # window size 18923, # checksum 12893); # urgent pointer if ($attrs{TP_PROTO} eq 'TCP+options') { substr($tcp, 12, 2) = pack('n', (6 << 12) | 0x02 | 0x10); $tcp .= pack('CCn', 2, 4, 1975); # MSS option } $tcp .= 'payload'; $ip .= $tcp; } elsif ($attrs{TP_PROTO} eq 'UDP') { my $len = 15; my $udp = pack('nnnn', $flow{TP_SRC}, $flow{TP_DST}, $len, 0); $udp .= chr($len) while length($udp) < $len; $ip .= $udp; } elsif ($attrs{TP_PROTO} eq 'ICMP') { $ip .= pack('CCnnn', 8, # echo request 0, # code 0, # checksum 736, # identifier 931); # sequence number } elsif ($attrs{TP_PROTO} eq 'other') { $ip .= 'other header'; } else { die; } substr($ip, 2, 2) = pack('n', length($ip)); $packet .= $ip; } } if ($attrs{DL_HEADER} =~ /^802.2/) { my $len = length ($packet); $len -= 4 if $flow{DL_VLAN} != 0xffff; substr($packet, $len_ofs, 2) = pack('n', $len); } print join(' ', map("$_=$attrs{$_}", keys(%attrs))), "\n"; print join(' ', map("$_=$flow{$_}", keys(%flow))), "\n"; print "\n"; print FLOWS pack('Nn', $wildcards, # wildcards 1); # in_port print FLOWS pack_ethaddr($flow{DL_SRC}); print FLOWS pack_ethaddr($flow{DL_DST}); print FLOWS pack('nCxnCCxxNNnn', $flow{DL_VLAN}, 0, # DL_VLAN_PCP $flow{DL_TYPE}, $flow{NW_TOS}, $flow{NW_PROTO}, inet_aton($flow{NW_SRC}), inet_aton($flow{NW_DST}), $flow{TP_SRC}, $flow{TP_DST}); print PACKETS pack('NNNN', 0, # timestamp seconds 0, # timestamp microseconds length($packet), # bytes saved length($packet)), # total length $packet; } sub pack_ethaddr { local ($_) = @_; my $xx = '([0-9a-fA-F][0-9a-fA-F])'; my (@octets) = /$xx:$xx:$xx:$xx:$xx:$xx/; @octets == 6 or die $_; my ($out) = ''; $out .= pack('C', hex($_)) foreach @octets; return $out; } sub inet_aton { local ($_) = @_; my ($a, $b, $c, $d) = /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/; defined $d or die $_; return ($a << 24) | ($b << 16) | ($c << 8) | $d; } openvswitch-2.0.1+git20140120/tests/glibc.supp000066400000000000000000000003371226605124000206100ustar00rootroot00000000000000# suppress what appear to unavoidable error reports from glibc { timer_create Memcheck:Leak fun:malloc fun:timer_create } { timer_create Memcheck:Param timer_create(evp) ... fun:set_up_timer } openvswitch-2.0.1+git20140120/tests/heap.at000066400000000000000000000005611226605124000200610ustar00rootroot00000000000000AT_BANNER([heap library]) m4_define([TEST_HEAP], [AT_SETUP([heap library -- m4_bpatsubst([$1], [-], [ ])]) AT_CHECK([test-heap $1]) AT_CLEANUP]) TEST_HEAP([insert-delete-same-order]) TEST_HEAP([insert-delete-reverse-order]) TEST_HEAP([insert-delete-every-order]) TEST_HEAP([insert-delete-same-order-with-dups]) TEST_HEAP([raw-insert]) TEST_HEAP([raw-delete]) openvswitch-2.0.1+git20140120/tests/idltest.ann000066400000000000000000000005371226605124000207670ustar00rootroot00000000000000# -*- python -*- # This code, when invoked by "ovsdb-idlc annotate" (by the build # process), annotates idltest.ovsschema with additional data that give # the ovsdb-idl engine information about the types involved, so that # it can generate more programmer-friendly data structures. s["idlPrefix"] = "idltest_" s["idlHeader"] = "\"tests/idltest.h\"" openvswitch-2.0.1+git20140120/tests/idltest.ovsschema000066400000000000000000000037531226605124000222060ustar00rootroot00000000000000{ "name": "idltest", "version": "1.2.3", "tables": { "link1": { "columns": { "i": { "type": "integer" }, "k": { "type": { "key": { "type": "uuid", "refTable": "link1" } } }, "ka": { "type": { "key": { "type": "uuid", "refTable": "link1" }, "max": "unlimited", "min": 0 } }, "l2": { "type": { "key": { "type": "uuid", "refTable": "link2" }, "min": 0 } } } }, "link2": { "columns": { "i": { "type": "integer" }, "l1": { "type": { "key": { "type": "uuid", "refTable": "link1" }, "min": 0 } } } }, "simple": { "columns": { "b": { "type": "boolean" }, "ba": { "type": { "key": "boolean", "max": 1, "min": 0 } }, "i": { "type": "integer" }, "ia": { "type": { "key": "integer", "max": "unlimited", "min": 0 } }, "r": { "type": "real" }, "ra": { "type": { "key": "real", "max": "unlimited", "min": 0 } }, "s": { "type": "string" }, "sa": { "type": { "key": "string", "max": "unlimited", "min": 0 } }, "u": { "type": "uuid" }, "ua": { "type": { "key": "uuid", "max": "unlimited", "min": 0 } } } } } } openvswitch-2.0.1+git20140120/tests/interface-reconfigure.at000066400000000000000000000643671226605124000234300ustar00rootroot00000000000000m4_divert_push([PREPARE_TESTS]) [ # Creates a directory tree for use with "interface-reconfigure --root-dir". ifr_setup () { for script in \ interface-reconfigure \ InterfaceReconfigure.py \ InterfaceReconfigureBridge.py \ InterfaceReconfigureVswitch.py do cp $top_srcdir/xenserver/opt_xensource_libexec_$script $script done mkdir -p etc cat > etc/xensource-inventory < etc/xensource/network.conf for utility in \ sbin/ethtool \ sbin/ifconfig \ sbin/ifdown \ sbin/ifup \ sbin/ip \ sbin/update-issue \ sbin/vconfig \ usr/sbin/brctl \ usr/sbin/ovs-vlan-bug-workaround do mkdir -p `dirname $utility` cat > $utility <<'EOF' #! /bin/sh echo ${0} ${*} >&2 EOF chmod +x $utility done mkdir -p usr/bin cat > usr/bin/ovs-vsctl <<'EOF' #! /bin/sh echo ${0} ${*} >&2 while test ${#} -ge 4; do if test X"${1}" = Xget && \ test X"${2}" = Xinterface && \ test X"${4}" = Xofport; then if test X"${3}" = Xeth2; then echo 5 else echo -1 fi fi shift done EOF chmod +x usr/bin/ovs-vsctl cat > usr/bin/ovs-ofctl <<'EOF' #! /bin/sh echo ${0} ${*} >&2 # Check that the flow is properly formed. ovs-ofctl parse-flow "${3}" >/dev/null EOF chmod +x usr/bin/ovs-ofctl mkdir -p etc/sysconfig/network-scripts configure_netdev () { mkdir -p sys/class/net/${1} echo ${2} > sys/class/net/${1}/address echo ${3} > sys/class/net/${1}/tx_queue_len if test ${1} = eth1; then # No VLAN acceleration. echo 0x829 > sys/class/net/${1}/features else # Supports VLAN acceleration. echo 0x10b89 > sys/class/net/${1}/features fi mkdir sys/class/net/${1}/device ln -s ../../../bus/pci/drivers/e1000 sys/class/net/${1}/device/driver : >> etc/sysconfig/network-scripts/ifcfg-${1} } configure_netdev lo 00:00:00:00:00:00 0 configure_netdev eth0 00:22:19:22:4b:af 1000 configure_netdev eth1 00:22:19:22:4b:b1 1000 configure_netdev eth2 00:15:17:a0:29:80 1000 configure_netdev eth3 00:15:17:a0:29:81 1000 configure_netdev eth4 00:1b:21:29:ce:51 1000 mkdir -p var/xapi cat > var/xapi/network.dbcache <<'EOF' False OpaqueRef:86d81bcf-0d25-90b2-cb11-af2007bd586e bd62a141-091f-3909-e334-0334f67ff3be OpaqueRef:NULL -1 00:22:19:22:4b:af None eth0 False OpaqueRef:83e4a934-aeb5-e6f0-a743-d1c7ef7364c5 False OpaqueRef:NULL df8d35c2-cc3a-a623-7065-d987a29feb75 OpaqueRef:NULL -1 00:1b:21:29:ce:51 None eth4 False OpaqueRef:bf51b4d3-7bdc-ea55-ba21-539b150b0531 False OpaqueRef:86d81bcf-0d25-90b2-cb11-af2007bd586e 2f87fc95-5ab4-571a-2487-3f4ac1985663 OpaqueRef:NULL -1 00:22:19:22:4b:b1 None eth1 False OpaqueRef:92b41bf6-aa21-45d3-1c86-c87a5fa98f7d False OpaqueRef:NULL d2dfdab3-daf4-afea-f055-a25a0d24d714 OpaqueRef:4c0eb823-4d96-da1d-e75f-411b85badb0c 4 fe:ff:ff:ff:ff:ff None bond0 True OpaqueRef:a63afad0-fb4c-b4a4-3696-cbb3d88afc47 OpaqueRef:e623e1d6-cd02-be8d-820d-49d65c710297 False OpaqueRef:NULL f4ba396e-a993-a592-5fbc-a1d566afb59e 10.0.0.188 OpaqueRef:NULL -1 255.0.0.0 00:15:17:a0:29:81 Static eth3 True OpaqueRef:d9189da2-d00b-61ba-8a6d-ac42cc868e32 True OpaqueRef:NULL 646ca9a1-36ad-e2f9-3ecc-1e5622c201c2 172.18.3.188 OpaqueRef:NULL -1 255.255.0.0 00:15:17:a0:29:80 DHCP eth2 True OpaqueRef:6e7c6e81-6b5e-b91f-e1f9-9e028567bdfe False OpaqueRef:NULL 3941edd2-865b-8dd8-61f0-199f5e1fa652 OpaqueRef:e623e1d6-cd02-be8d-820d-49d65c710297 123 fe:ff:ff:ff:ff:ff None eth3 True OpaqueRef:240fb5f8-addc-6ea3-f921-2a42b42acd17 OpaqueRef:4c0eb823-4d96-da1d-e75f-411b85badb0c False OpaqueRef:NULL 6c0327a9-afa3-fc19-6798-a1bfe20095ed OpaqueRef:NULL -1 00:22:19:22:4b:af None bond0 OpaqueRef:86d81bcf-0d25-90b2-cb11-af2007bd586e True OpaqueRef:ec1e5037-60ea-97e5-54b8-39bdb43c071a OpaqueRef:69c904bb-8da9-3424-485b-8b47c2d3ef11 dad825f1-6d81-386e-849c-5589281e53e1 OpaqueRef:e0955887-571f-17fc-a971-61c1ec7d81b6 OpaqueRef:2956e6c8-487e-981c-85ff-c84796418768 OpaqueRef:69c904bb-8da9-3424-485b-8b47c2d3ef11 841814da-d0d2-9da4-0b2e-b6143480bbfb OpaqueRef:d2d1e51e-4da9-3163-8f57-bb683429335e OpaqueRef:2bc0fab5-523a-4125-609d-212391f5f6fc 399279a2-5ccd-5368-9af3-8622a1f1ac82 OpaqueRef:8e3e37e6-ebb9-087e-0201-f6a56bf554c3 OpaqueRef:d2d1e51e-4da9-3163-8f57-bb683429335e xapi2 99be2da4-6c33-6f8e-49ea-3bc592fe3c85 OpaqueRef:2bc0fab5-523a-4125-609d-212391f5f6fc xenbr3 2902ae1b-8013-897a-b697-0b200ea3aaa5 OpaqueRef:69c904bb-8da9-3424-485b-8b47c2d3ef11 xapi1 45cbbb43-113d-a712-3231-c6463f253cef OpaqueRef:2956e6c8-487e-981c-85ff-c84796418768 xenbr1 99f8771a-645a-26a3-e06c-30a401f1d009 OpaqueRef:205d1186-2cd1-d5e6-45e4-ea1698ea6e15 xenbr2 d08c8749-0c8f-9e8d-ce25-fd364661ee99 OpaqueRef:e0955887-571f-17fc-a971-61c1ec7d81b6 xenbr0 true c9eecb03-560d-61de-b6a8-56dfc766f67e OpaqueRef:eea8da94-a5e6-18fc-34a7-5e9b5a235806 xenbr4 d2c14c89-29cc-51d4-7664-633eff02b2ad xapi0 dc0f0632-c2aa-1b78-2fea-0d3a23c51740 OpaqueRef:8e3e37e6-ebb9-087e-0201-f6a56bf554c3 xapi3 db7bdc03-074d-42ae-fc73-9b06de1d57f6 secure EOF } ifr_run () { ./interface-reconfigure --root-prefix="`pwd`" --no-syslog "$@" } ifr_filter () { sed -n -e "s,`pwd`,,g" -e 's/ -- /\ /g' -e '/^Running command:/!p' stderr }] m4_divert_pop([PREPARE_TESTS]) AT_BANNER([interface-reconfigure]) AT_SETUP([non-VLAN, non-bond]) AT_KEYWORDS([interface-reconfigure]) ifr_setup AT_CHECK([ifr_run --force xenbr2 up], [0], [], [stderr]) AT_CHECK([ifr_filter], [0], [[Force interface xenbr2 up Loading xapi database cache from /var/xapi/network.dbcache Configured for Vswitch datapath action_up: xenbr2 Writing network configuration for xenbr2 Configuring xenbr2 using DHCP configuration configure_datapath: bridge - xenbr2 configure_datapath: physical - [u'eth2'] configure_datapath: extra ports - [] configure_datapath: extra bonds - [] /usr/bin/ovs-vsctl -vconsole:off get-fail-mode xenbr2 Applying changes to /etc/sysconfig/network-scripts/route-xenbr2 configuration Applying changes to /etc/sysconfig/network configuration Applying changes to /etc/sysconfig/network-scripts/ifcfg-xenbr2 configuration /sbin/ifconfig eth2 up mtu 1500 /sbin/ethtool -K eth2 gro off lro off /usr/sbin/ovs-vlan-bug-workaround eth2 on /usr/bin/ovs-vsctl --timeout=20 --with-iface --if-exists del-port eth2 --may-exist add-br xenbr2 --may-exist add-port xenbr2 eth2 set Bridge xenbr2 other-config:hwaddr="00:15:17:a0:29:80" set Bridge xenbr2 fail_mode=secure remove Bridge xenbr2 other_config disable-in-band br-set-external-id xenbr2 xs-network-uuids d08c8749-0c8f-9e8d-ce25-fd364661ee99 /usr/bin/ovs-vsctl -vconsole:off get interface eth2 ofport /usr/bin/ovs-ofctl add-flow xenbr2 idle_timeout=0,priority=0,in_port=5,arp,nw_proto=1,actions=local /usr/bin/ovs-ofctl add-flow xenbr2 idle_timeout=0,priority=0,in_port=local,arp,dl_src=00:15:17:a0:29:80,actions=5 /usr/bin/ovs-ofctl add-flow xenbr2 idle_timeout=0,priority=0,in_port=5,dl_dst=00:15:17:a0:29:80,actions=local /usr/bin/ovs-ofctl add-flow xenbr2 idle_timeout=0,priority=0,in_port=local,dl_src=00:15:17:a0:29:80,actions=5 /sbin/ifup xenbr2 /sbin/update-issue Committing changes to /etc/sysconfig/network-scripts/route-xenbr2 configuration Committing changes to /etc/sysconfig/network configuration Committing changes to /etc/sysconfig/network-scripts/ifcfg-xenbr2 configuration ]]) AT_CHECK([cat etc/sysconfig/network-scripts/ifcfg-xenbr2], [0], [# DO NOT EDIT: This file (ifcfg-xenbr2) was autogenerated by interface-reconfigure XEMANAGED=yes DEVICE=xenbr2 ONBOOT=no NOZEROCONF=yes TYPE=Ethernet BOOTPROTO=dhcp PERSISTENT_DHCLIENT=yes MTU=1500 ]) # Simulate interface-reconfigure creating xenbr2, so that we can tell # interface-reconfigure to take it back down. AT_CHECK([configure_netdev xenbr2 00:15:17:a0:29:80 0]) AT_CHECK([ifr_run --force xenbr2 down], [0], [], [stderr]) AT_CHECK([ifr_filter], [0], [[Force interface xenbr2 down Loading xapi database cache from /var/xapi/network.dbcache Configured for Vswitch datapath action_down: xenbr2 /sbin/ifdown xenbr2 deconfigure ipdev xenbr2 on xenbr2 deconfigure_bridge: bridge - xenbr2 action_down: bring down physical devices - [u'eth2'] /sbin/ifconfig eth2 down /usr/bin/ovs-vsctl --timeout=20 --with-iface --if-exists del-port xenbr2 --if-exists del-br xenbr2 ]]) AT_CLEANUP AT_SETUP([VLAN, non-bond]) AT_KEYWORDS([interface-reconfigure]) ifr_setup AT_CHECK([ifr_run --force xapi3 up], [0], [], [stderr]) AT_CHECK([ifr_filter], [0], [[Force interface xapi3 up Loading xapi database cache from /var/xapi/network.dbcache Configured for Vswitch datapath action_up: xapi3 Writing network configuration for xapi3 Configuring xapi3 using None configuration configure_datapath: bridge - xenbr3 configure_datapath: physical - [u'eth3'] configure_datapath: extra ports - [] configure_datapath: extra bonds - [] Applying changes to /etc/sysconfig/network-scripts/route-xapi3 configuration Applying changes to /etc/sysconfig/network-scripts/ifcfg-xapi3 configuration /sbin/ifconfig eth3 up mtu 1500 /sbin/ethtool -K eth3 gro off lro off /usr/sbin/ovs-vlan-bug-workaround eth3 on /usr/bin/ovs-vsctl --timeout=20 --with-iface --if-exists del-port eth3 --may-exist add-br xenbr3 --may-exist add-port xenbr3 eth3 set Bridge xenbr3 other-config:hwaddr="00:15:17:a0:29:81" set Bridge xenbr3 fail_mode=secure remove Bridge xenbr3 other_config disable-in-band br-set-external-id xenbr3 xs-network-uuids 2902ae1b-8013-897a-b697-0b200ea3aaa5;db7bdc03-074d-42ae-fc73-9b06de1d57f6 --if-exists del-br xapi3 --may-exist add-br xapi3 xenbr3 123 br-set-external-id xapi3 xs-network-uuids 2902ae1b-8013-897a-b697-0b200ea3aaa5;db7bdc03-074d-42ae-fc73-9b06de1d57f6 set Interface xapi3 MAC="00:15:17:a0:29:81" /sbin/ifup xapi3 /sbin/update-issue Committing changes to /etc/sysconfig/network-scripts/route-xapi3 configuration Committing changes to /etc/sysconfig/network-scripts/ifcfg-xapi3 configuration ]]) AT_CHECK([cat etc/sysconfig/network-scripts/ifcfg-xapi3], [0], [# DO NOT EDIT: This file (ifcfg-xapi3) was autogenerated by interface-reconfigure XEMANAGED=yes DEVICE=xapi3 ONBOOT=no NOZEROCONF=yes TYPE=Ethernet BOOTPROTO=none MTU=1500 ]) # Simulate interface-reconfigure creating xapi3, so that we can tell # interface-reconfigure to take it back down. AT_CHECK([configure_netdev xapi3 00:23:20:AC:AF:02 0]) AT_CHECK([ifr_run --force xapi3 down], [0], [], [stderr]) AT_CHECK([ifr_filter], [0], [[Force interface xapi3 down Loading xapi database cache from /var/xapi/network.dbcache Configured for Vswitch datapath action_down: xapi3 /sbin/ifdown xapi3 deconfigure ipdev xapi3 on xenbr3 deconfigure_bridge: bridge - xapi3 action_down: no more masters, bring down slave xenbr3 deconfigure_bridge: bridge - xenbr3 action_down: bring down physical devices - [u'eth3'] /sbin/ifconfig eth3 down /usr/bin/ovs-vsctl --timeout=20 --with-iface --if-exists del-port xapi3 --if-exists del-br xapi3 --if-exists del-br xenbr3 ]]) AT_CLEANUP AT_SETUP([Bond, non-VLAN]) AT_KEYWORDS([interface-reconfigure]) ifr_setup # Pretend that bond0 exists, even though it would really be created by # a "create-bond" call in an ovs-vsctl invocation within # interface-reconfigure, because otherwise interface-reconfigure will # die with "failed to apply changes: netdev: up: device bond0 does not # exist" after it thinks it created bond0. AT_CHECK([configure_netdev bond0 00:23:20:e6:39:75 0]) AT_CHECK([ifr_run --force xapi1 up], [0], [], [stderr]) AT_CHECK([ifr_filter], [0], [[Force interface xapi1 up Loading xapi database cache from /var/xapi/network.dbcache Configured for Vswitch datapath action_up: xapi1 Writing network configuration for xapi1 Configuring xapi1 using None configuration configure_datapath: leaving bond bond0 up configure_datapath: leaving bond bond0 up configure_datapath: bridge - xapi1 configure_datapath: physical - [u'eth0', u'eth1'] configure_datapath: extra ports - [] configure_datapath: extra bonds - [] netdev: down: device xenbr0 does not exist, ignoring netdev: down: device xenbr1 does not exist, ignoring Applying changes to /etc/sysconfig/network-scripts/route-xapi1 configuration Applying changes to /etc/sysconfig/network-scripts/ifcfg-xapi1 configuration /sbin/ifconfig eth0 up mtu 1500 /sbin/ethtool -K eth0 gro off lro off /usr/sbin/ovs-vlan-bug-workaround eth0 on /sbin/ifconfig eth1 up mtu 1500 /sbin/ethtool -K eth1 gro off lro off /usr/sbin/ovs-vlan-bug-workaround eth1 off /usr/bin/ovs-vsctl --timeout=20 --if-exists del-br xenbr0 --if-exists del-br xenbr1 --with-iface --if-exists del-port eth0 --with-iface --if-exists del-port eth1 --may-exist add-br xapi1 --with-iface --if-exists del-port bond0 --fake-iface add-bond xapi1 bond0 eth0 eth1 set Port bond0 MAC="00:22:19:22:4b:af" other-config:bond-miimon-interval=100 bond_downdelay=200 bond_updelay=31000 other-config:bond-detect-mode=carrier lacp=off bond_mode=balance-slb set Bridge xapi1 other-config:hwaddr="00:22:19:22:4b:af" set Bridge xapi1 fail_mode=secure remove Bridge xapi1 other_config disable-in-band br-set-external-id xapi1 xs-network-uuids 45cbbb43-113d-a712-3231-c6463f253cef;99be2da4-6c33-6f8e-49ea-3bc592fe3c85 /sbin/ifup xapi1 action_up: bring up bond0 /sbin/ifconfig bond0 up /sbin/update-issue Committing changes to /etc/sysconfig/network-scripts/route-xapi1 configuration Committing changes to /etc/sysconfig/network-scripts/ifcfg-xapi1 configuration ]]) AT_CHECK([cat etc/sysconfig/network-scripts/ifcfg-xapi1], [0], [# DO NOT EDIT: This file (ifcfg-xapi1) was autogenerated by interface-reconfigure XEMANAGED=yes DEVICE=xapi1 ONBOOT=no NOZEROCONF=yes TYPE=Ethernet BOOTPROTO=none MTU=1500 ]) # Simulate interface-reconfigure creating xapi1, so that we can tell # interface-reconfigure to take it back down. AT_CHECK([configure_netdev xapi1 00:22:19:22:4B:AF 0]) AT_CHECK([ifr_run --force xapi1 down], [0], [], [stderr]) AT_CHECK([ifr_filter], [0], [[Force interface xapi1 down Loading xapi database cache from /var/xapi/network.dbcache Configured for Vswitch datapath action_down: xapi1 /sbin/ifdown xapi1 deconfigure ipdev xapi1 on xapi1 deconfigure_bridge: bridge - xapi1 action_down: bring down physical devices - [u'eth0', u'eth1'] /sbin/ifconfig eth0 down /sbin/ifconfig eth1 down /usr/bin/ovs-vsctl --timeout=20 --with-iface --if-exists del-port xapi1 --if-exists del-br xapi1 ]]) AT_CLEANUP AT_SETUP([VLAN on bond]) AT_KEYWORDS([interface-reconfigure]) ifr_setup # Pretend that bond0 exists, even though it would really be created by # a "create-bond" call in an ovs-vsctl invocation within # interface-reconfigure, because otherwise interface-reconfigure will # die with "failed to apply changes: netdev: up: device bond0 does not # exist" after it thinks it created bond0. AT_CHECK([configure_netdev bond0 00:23:20:e6:39:75 0]) AT_CHECK([ifr_run --force xapi2 up], [0], [], [stderr]) AT_CHECK([ifr_filter], [0], [[Force interface xapi2 up Loading xapi database cache from /var/xapi/network.dbcache Configured for Vswitch datapath action_up: xapi2 Writing network configuration for xapi2 Configuring xapi2 using None configuration configure_datapath: leaving bond bond0 up configure_datapath: leaving bond bond0 up configure_datapath: bridge - xapi1 configure_datapath: physical - [u'eth0', u'eth1'] configure_datapath: extra ports - [] configure_datapath: extra bonds - [] netdev: down: device xenbr0 does not exist, ignoring netdev: down: device xenbr1 does not exist, ignoring Applying changes to /etc/sysconfig/network-scripts/route-xapi2 configuration Applying changes to /etc/sysconfig/network-scripts/ifcfg-xapi2 configuration /sbin/ifconfig eth0 up mtu 1500 /sbin/ethtool -K eth0 gro off lro off /usr/sbin/ovs-vlan-bug-workaround eth0 on /sbin/ifconfig eth1 up mtu 1500 /sbin/ethtool -K eth1 gro off lro off /usr/sbin/ovs-vlan-bug-workaround eth1 off /usr/bin/ovs-vsctl --timeout=20 --if-exists del-br xenbr0 --if-exists del-br xenbr1 --with-iface --if-exists del-port eth0 --with-iface --if-exists del-port eth1 --may-exist add-br xapi1 --with-iface --if-exists del-port bond0 --fake-iface add-bond xapi1 bond0 eth0 eth1 set Port bond0 MAC="00:22:19:22:4b:af" other-config:bond-miimon-interval=100 bond_downdelay=200 bond_updelay=31000 other-config:bond-detect-mode=carrier lacp=off bond_mode=balance-slb set Bridge xapi1 other-config:hwaddr="00:22:19:22:4b:af" set Bridge xapi1 fail_mode=secure remove Bridge xapi1 other_config disable-in-band br-set-external-id xapi1 xs-network-uuids 45cbbb43-113d-a712-3231-c6463f253cef;99be2da4-6c33-6f8e-49ea-3bc592fe3c85 --if-exists del-br xapi2 --may-exist add-br xapi2 xapi1 4 br-set-external-id xapi2 xs-network-uuids 45cbbb43-113d-a712-3231-c6463f253cef;99be2da4-6c33-6f8e-49ea-3bc592fe3c85 set Interface xapi2 MAC="00:22:19:22:4b:af" /sbin/ifup xapi2 action_up: bring up bond0 /sbin/ifconfig bond0 up /sbin/update-issue Committing changes to /etc/sysconfig/network-scripts/route-xapi2 configuration Committing changes to /etc/sysconfig/network-scripts/ifcfg-xapi2 configuration ]]) AT_CHECK([cat etc/sysconfig/network-scripts/ifcfg-xapi2], [0], [# DO NOT EDIT: This file (ifcfg-xapi2) was autogenerated by interface-reconfigure XEMANAGED=yes DEVICE=xapi2 ONBOOT=no NOZEROCONF=yes TYPE=Ethernet BOOTPROTO=none MTU=1500 ]) # Simulate interface-reconfigure creating xapi2, so that we can tell # interface-reconfigure to take it back down. AT_CHECK([configure_netdev xapi2 00:23:20:A4:71:C2 0]) AT_CHECK([ifr_run --force xapi2 down], [0], [], [stderr]) AT_CHECK([ifr_filter], [0], [[Force interface xapi2 down Loading xapi database cache from /var/xapi/network.dbcache Configured for Vswitch datapath action_down: xapi2 /sbin/ifdown xapi2 deconfigure ipdev xapi2 on xapi1 deconfigure_bridge: bridge - xapi2 action_down: no more masters, bring down slave xapi1 deconfigure_bridge: bridge - xapi1 action_down: bring down physical devices - [u'eth0', u'eth1'] /sbin/ifconfig eth0 down /sbin/ifconfig eth1 down /usr/bin/ovs-vsctl --timeout=20 --with-iface --if-exists del-port xapi2 --if-exists del-br xapi2 --if-exists del-br xapi1 ]]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/json.at000066400000000000000000000317661226605124000201300ustar00rootroot00000000000000m4_define([JSON_CHECK_POSITIVE_C], [AT_SETUP([$1]) AT_KEYWORDS([json positive]) AT_CHECK([printf %s "AS_ESCAPE([$2])" > input]) AT_CAPTURE_FILE([input]) AT_CHECK([test-json $4 input], [0], [stdout], []) AT_CHECK([cat stdout], [0], [$3 ]) AT_CLEANUP]) m4_define([JSON_CHECK_POSITIVE_PY], [AT_SETUP([$1]) AT_KEYWORDS([json positive Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_CHECK([printf %s "AS_ESCAPE([$2])" > input]) AT_CAPTURE_FILE([input]) AT_CHECK([$PYTHON $srcdir/test-json.py $4 input], [0], [stdout], []) AT_CHECK([cat stdout], [0], [$3 ]) AT_CLEANUP]) m4_define([JSON_CHECK_POSITIVE_UCS4PY], [AT_SETUP([$1]) AT_KEYWORDS([json positive Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_XFAIL_IF([$PYTHON -c "exit(len(u'\U00010800'))"; test $? -ne 1]) AT_CHECK([printf %s "AS_ESCAPE([$2])" > input]) AT_CAPTURE_FILE([input]) AT_CHECK([$PYTHON $srcdir/test-json.py $4 input], [0], [stdout], []) AT_CHECK([cat stdout], [0], [$3 ]) AT_CLEANUP]) m4_define([JSON_CHECK_POSITIVE], [JSON_CHECK_POSITIVE_C([$1 - C], [$2], [$3], [$4]) JSON_CHECK_POSITIVE_PY([$1 - Python], [$2], [$3], [$4])]) m4_define([JSON_CHECK_NEGATIVE_C], [AT_SETUP([$1]) AT_KEYWORDS([json negative]) AT_CHECK([printf %s "AS_ESCAPE([$2])" > input]) AT_CAPTURE_FILE([input]) AT_CHECK([test-json $4 input], [1], [stdout], []) AT_CHECK([[sed 's/^error: [^:]*:/error:/' < stdout]], [0], [$3 ]) AT_CLEANUP]) m4_define([JSON_CHECK_NEGATIVE_PY], [AT_SETUP([$1]) AT_KEYWORDS([json negative Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_CHECK([printf %s "AS_ESCAPE([$2])" > input]) AT_CAPTURE_FILE([input]) AT_CHECK([$PYTHON $srcdir/test-json.py $4 input], [1], [stdout], []) AT_CHECK([[sed 's/^error: [^:]*:/error:/' < stdout]], [0], [$3 ]) AT_CLEANUP]) m4_define([JSON_CHECK_NEGATIVE], [JSON_CHECK_NEGATIVE_C([$1 - C], [$2], [$3], [$4]) JSON_CHECK_NEGATIVE_PY([$1 - Python], [$2], [$3], [$4])]) AT_BANNER([JSON -- arrays]) JSON_CHECK_POSITIVE([empty array], [[ [ ] ]], [[[]]]) JSON_CHECK_POSITIVE([single-element array], [[ [ 1 ] ]], [[[1]]]) JSON_CHECK_POSITIVE([2-element array], [[ [ 1, 2 ] ]], [[[1,2]]]) JSON_CHECK_POSITIVE([many-element array], [[ [ 1, 2, 3, 4, 5 ] ]], [[[1,2,3,4,5]]]) JSON_CHECK_NEGATIVE([missing comma], [[ [ 1, 2, 3 4, 5 ] ]], [error: syntax error expecting '@:>@' or ',']) JSON_CHECK_NEGATIVE([trailing comma not allowed], [[[1,2,]]], [error: syntax error expecting value]) JSON_CHECK_NEGATIVE([doubled comma not allowed], [[[1,,2]]], [error: syntax error expecting value]) AT_BANNER([JSON -- strings]) JSON_CHECK_POSITIVE([empty string], [[[ "" ]]], [[[""]]]) JSON_CHECK_POSITIVE([1-character strings], [[[ "a", "b", "c" ]]], [[["a","b","c"]]]) JSON_CHECK_POSITIVE([escape sequences], [[[ " \" \\ \/ \b \f \n \r \t" ]]], [[[" \" \\ / \b \f \n \r \t"]]]) JSON_CHECK_POSITIVE([Unicode escape sequences], [[[ " \u0022 \u005c \u002F \u0008 \u000c \u000A \u000d \u0009" ]]], [[[" \" \\ / \b \f \n \r \t"]]]) JSON_CHECK_POSITIVE_C([surrogate pairs - C], [[["\ud834\udd1e"]]], [[["𝄞"]]]) JSON_CHECK_POSITIVE_UCS4PY([surrogate pairs - Python], [[["\ud834\udd1e"]]], [[["𝄞"]]]) JSON_CHECK_NEGATIVE([a string by itself is not valid JSON], ["xxx"], [error: syntax error at beginning of input]) JSON_CHECK_NEGATIVE([end of line in quoted string], [[["xxx "]]], [error: U+000A must be escaped in quoted string]) JSON_CHECK_NEGATIVE([formfeed in quoted string], [[["xxx "]]], [error: U+000C must be escaped in quoted string]) JSON_CHECK_NEGATIVE([bad escape in quoted string], [[["\x12"]]], [error: bad escape \x]) JSON_CHECK_NEGATIVE([\u must be followed by 4 hex digits (1)], [[["\u1x"]]], [error: quoted string ends within \u escape]) JSON_CHECK_NEGATIVE([\u must be followed by 4 hex digits (2)], [[["\u1xyz"]]], [error: malformed \u escape]) JSON_CHECK_NEGATIVE([isolated leading surrogate not allowed], [[["\ud834xxx"]]], [error: malformed escaped surrogate pair]) JSON_CHECK_NEGATIVE([surrogatess must paired properly], [[["\ud834\u1234"]]], [error: second half of escaped surrogate pair is not trailing surrogate]) JSON_CHECK_NEGATIVE([null bytes not allowed], [[["\u0000"]]], [error: null bytes not supported in quoted strings]) AT_SETUP([end of input in quoted string - C]) AT_KEYWORDS([json negative]) AT_CHECK([printf '"xxx' | test-json -], [1], [error: line 0, column 4, byte 4: unexpected end of input in quoted string ]) AT_CLEANUP AT_SETUP([end of input in quoted string - Python]) AT_KEYWORDS([json negative Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_CHECK([printf '"xxx' > input $PYTHON $srcdir/test-json.py input], [1], [error: line 0, column 4, byte 4: unexpected end of input in quoted string ]) AT_CLEANUP AT_BANNER([JSON -- objects]) JSON_CHECK_POSITIVE([empty object], [[{ }]], [[{}]]) JSON_CHECK_POSITIVE([simple object], [[{"b": 2, "a": 1, "c": 3}]], [[{"a":1,"b":2,"c":3}]]) JSON_CHECK_NEGATIVE([bad value], [[{"a": }, "b": 2]], [error: syntax error expecting value]) JSON_CHECK_NEGATIVE([missing colon], [[{"b": 2, "a" 1, "c": 3}]], [error: syntax error parsing object expecting ':']) JSON_CHECK_NEGATIVE([missing comma], [[{"b": 2 "a" 1, "c": 3}]], [error: syntax error expecting '}' or ',']) JSON_CHECK_NEGATIVE([trailing comma not allowed], [[{"b": 2, "a": 1, "c": 3, }]], [[error: syntax error parsing object expecting string]]) JSON_CHECK_NEGATIVE([doubled comma not allowed], [[{"b": 2, "a": 1,, "c": 3}]], [[error: syntax error parsing object expecting string]]) JSON_CHECK_NEGATIVE([names must be strings], [[{1: 2}]], [[error: syntax error parsing object expecting string]]) AT_BANNER([JSON -- literal names]) JSON_CHECK_POSITIVE([null], [[[ null ]]], [[[null]]]) JSON_CHECK_POSITIVE([false], [[[ false ]]], [[[false]]]) JSON_CHECK_POSITIVE([true], [[[ true ]]], [[[true]]]) JSON_CHECK_NEGATIVE([a literal by itself is not valid JSON], [null], [error: syntax error at beginning of input]) JSON_CHECK_NEGATIVE([nullify is invalid], [[[ nullify ]]], [error: invalid keyword 'nullify']) JSON_CHECK_NEGATIVE([nubs is invalid], [[[ nubs ]]], [error: invalid keyword 'nubs']) JSON_CHECK_NEGATIVE([xxx is invalid], [[[ xxx ]]], [error: invalid keyword 'xxx']) AT_BANNER([JSON -- numbers]) JSON_CHECK_POSITIVE( [integers expressed as reals], [[[1.0000000000, 2.00000000000000000000000000000000000, 2e5, 2.1234e4, 2.1230e3, 0e-10000, 0e10000]]], [[[1,2,200000,21234,2123,0,0]]]) JSON_CHECK_POSITIVE( [large integers], [[[9223372036854775807, -9223372036854775808]]], [[[9223372036854775807,-9223372036854775808]]]) JSON_CHECK_POSITIVE( [large integers expressed as reals], [[[9223372036854775807.0, -9223372036854775808.0, 92233720.36854775807e11, -9.223372036854775808e18]]], [[[9223372036854775807,-9223372036854775808,9223372036854775807,-9223372036854775808]]]) # It seems likely that the following test will fail on some system that # rounds slightly differently in arithmetic or in printf, but I'd like # to keep it this way until we run into such a system. JSON_CHECK_POSITIVE( [large integers that overflow to reals], [[[9223372036854775807000, -92233720368547758080000]]], [[[9.22337203685478e+21,-9.22337203685478e+22]]]) JSON_CHECK_POSITIVE( [negative zero], [[[-0, -0.0, 1e-9999, -1e-9999]]], [[[0,0,0,0]]]) JSON_CHECK_POSITIVE( [reals], [[[0.0, 1.0, 2.0, 3.0, 3.5, 81.250]]], [[[0,1,2,3,3.5,81.25]]]) JSON_CHECK_POSITIVE( [scientific notation], [[[1e3, 1E3, 2.5E2, 1e+3, 125e-3, 3.125e-2, 3125e-05, 1.525878906e-5]]], [[[1000,1000,250,1000,0.125,0.03125,0.03125,1.525878906e-05]]]) # It seems likely that the following test will fail on some system that # rounds slightly differently in arithmetic or in printf, but I'd like # to keep it this way until we run into such a system. JSON_CHECK_POSITIVE( [+/- DBL_MAX], [[[1.7976931348623157e+308, -1.7976931348623157e+308]]], [[[1.79769313486232e+308,-1.79769313486232e+308]]]) JSON_CHECK_POSITIVE( [negative reals], [[[-0, -1.0, -2.0, -3.0, -3.5, -8.1250]]], [[[0,-1,-2,-3,-3.5,-8.125]]]) JSON_CHECK_POSITIVE( [negative scientific notation], [[[-1e3, -1E3, -2.5E2, -1e+3, -125e-3, -3.125e-2, -3125e-05, -1.525878906e-5]]], [[[-1000,-1000,-250,-1000,-0.125,-0.03125,-0.03125,-1.525878906e-05]]]) JSON_CHECK_POSITIVE( [1e-9999 underflows to 0], [[[1e-9999]]], [[[0]]]) JSON_CHECK_NEGATIVE([a number by itself is not valid JSON], [1], [error: syntax error at beginning of input]) JSON_CHECK_NEGATIVE( [leading zeros not allowed], [[[0123]]], [error: leading zeros not allowed]) JSON_CHECK_NEGATIVE( [1e9999 is too big], [[[1e9999]]], [error: number outside valid range]) JSON_CHECK_NEGATIVE( [exponent bigger than INT_MAX], [[[1e9999999999999999999]]], [error: exponent outside valid range]) JSON_CHECK_NEGATIVE( [decimal point must be followed by digit], [[[1.]]], [error: decimal point must be followed by digit]) JSON_CHECK_NEGATIVE( [exponent must contain at least one digit (1)], [[[1e]]], [error: exponent must contain at least one digit]) JSON_CHECK_NEGATIVE( [exponent must contain at least one digit (2)], [[[1e+]]], [error: exponent must contain at least one digit]) JSON_CHECK_NEGATIVE( [exponent must contain at least one digit (3)], [[[1e-]]], [error: exponent must contain at least one digit]) AT_BANNER([JSON -- RFC 4627 examples]) JSON_CHECK_POSITIVE([RFC 4267 object example], [[{ "Image": { "Width": 800, "Height": 600, "Title": "View from 15th Floor", "Thumbnail": { "Url": "http://www.example.com/image/481989943", "Height": 125, "Width": "100" }, "IDs": [116, 943, 234, 38793] } }]], [[{"Image":{"Height":600,"IDs":[116,943,234,38793],"Thumbnail":{"Height":125,"Url":"http://www.example.com/image/481989943","Width":"100"},"Title":"View from 15th Floor","Width":800}}]]) JSON_CHECK_POSITIVE([RFC 4267 array example], [[[ { "precision": "zip", "Latitude": 37.7668, "Longitude": -122.3959, "Address": "", "City": "SAN FRANCISCO", "State": "CA", "Zip": "94107", "Country": "US" }, { "precision": "zip", "Latitude": 37.371991, "Longitude": -122.026020, "Address": "", "City": "SUNNYVALE", "State": "CA", "Zip": "94085", "Country": "US" } ]]], [[[{"Address":"","City":"SAN FRANCISCO","Country":"US","Latitude":37.7668,"Longitude":-122.3959,"State":"CA","Zip":"94107","precision":"zip"},{"Address":"","City":"SUNNYVALE","Country":"US","Latitude":37.371991,"Longitude":-122.02602,"State":"CA","Zip":"94085","precision":"zip"}]]]) AT_BANNER([JSON -- pathological cases]) JSON_CHECK_NEGATIVE([trailing garbage], [[[1]null]], [error: trailing garbage at end of input]) JSON_CHECK_NEGATIVE([formfeeds are not valid white space], [[[ ]]], [error: invalid character U+000c]) JSON_CHECK_NEGATIVE([';' is not a valid token], [;], [error: invalid character ';']) JSON_CHECK_NEGATIVE([arrays nesting too deep], [m4_for([i], [0], [1002], [1], [@<:@])dnl m4_for([i], [0], [1002], [1], [@:>@])], [error: input exceeds maximum nesting depth 1000]) JSON_CHECK_NEGATIVE([objects nesting too deep], [m4_for([i], [0], [1002], [1], [{"x":])dnl m4_for([i], [0], [1002], [1], [}])], [error: input exceeds maximum nesting depth 1000]) AT_SETUP([input may not be empty]) AT_KEYWORDS([json negative]) AT_CHECK([test-json /dev/null], [1], [error: line 0, column 0, byte 0: empty input stream ]) AT_CLEANUP AT_BANNER([JSON -- multiple inputs]) JSON_CHECK_POSITIVE([multiple adjacent objects], [[{}{}{}]], [[{} {} {}]], [--multiple]) JSON_CHECK_POSITIVE([multiple space-separated objects], [[{} {} {}]], [[{} {} {}]], [--multiple]) JSON_CHECK_POSITIVE([multiple objects on separate lines], [[{} {} {}]], [[{} {} {}]], [--multiple]) JSON_CHECK_POSITIVE([multiple objects and arrays], [[{}[]{}[]]], [[{} [] {} []]], [--multiple]) JSON_CHECK_NEGATIVE([garbage between multiple objects], [[{}x{}]], [[{} error: invalid keyword 'x' {}]], [--multiple]) JSON_CHECK_NEGATIVE([garbage after multiple objects], [[{}{}x]], [[{} {} error: invalid keyword 'x']], [--multiple]) openvswitch-2.0.1+git20140120/tests/jsonrpc-py.at000066400000000000000000000040041226605124000212440ustar00rootroot00000000000000AT_BANNER([JSON-RPC - Python]) AT_SETUP([JSON-RPC request and successful reply - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR AT_CHECK([$PYTHON $srcdir/test-jsonrpc.py --detach --pidfile=`pwd`/pid listen punix:socket]) AT_CHECK([test -s pid]) AT_CHECK([kill -0 `cat pid`]) AT_CHECK( [[$PYTHON $srcdir/test-jsonrpc.py request unix:socket echo '[{"a": "b", "x": null}]']], [0], [[{"error":null,"id":0,"result":[{"a":"b","x":null}]} ]], [], [test ! -e pid || kill `cat pid`]) AT_CHECK([kill `cat pid`]) AT_CLEANUP AT_SETUP([JSON-RPC request and error reply - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR AT_CHECK([$PYTHON $srcdir/test-jsonrpc.py --detach --pidfile=`pwd`/pid listen punix:socket]) AT_CHECK([test -s pid]) AT_CHECK([kill -0 `cat pid`]) AT_CHECK( [[$PYTHON $srcdir/test-jsonrpc.py request unix:socket bad-request '[]']], [0], [[{"error":{"error":"unknown method"},"id":0,"result":null} ]], [], [test ! -e pid || kill `cat pid`]) AT_CHECK([kill `cat pid`]) AT_CLEANUP AT_SETUP([JSON-RPC notification - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR AT_CHECK([$PYTHON $srcdir/test-jsonrpc.py --detach --pidfile=`pwd`/pid listen punix:socket]) AT_CHECK([test -s pid]) # When a daemon dies it deletes its pidfile, so make a copy. AT_CHECK([cp pid pid2]) AT_CHECK([kill -0 `cat pid2`]) AT_CHECK([[$PYTHON $srcdir/test-jsonrpc.py notify unix:socket shutdown '[]']], [0], [], [], [kill `cat pid2`]) AT_CHECK( [pid=`cat pid2` # First try a quick sleep, so that the test completes very quickly # in the normal case. POSIX doesn't require fractional times to # work, so this might not work. sleep 0.1; if kill -0 $pid; then :; else echo success; exit 0; fi # Then wait up to 2 seconds. sleep 1; if kill -0 $pid; then :; else echo success; exit 0; fi sleep 1; if kill -0 $pid; then :; else echo success; exit 0; fi echo failure; exit 1], [0], [success ], [ignore]) AT_CHECK([test ! -e pid]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/jsonrpc.at000066400000000000000000000034521226605124000206240ustar00rootroot00000000000000AT_BANNER([JSON-RPC - C]) AT_SETUP([JSON-RPC request and successful reply]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR AT_CHECK([test-jsonrpc --detach --no-chdir --pidfile="`pwd`"/pid listen punix:socket]) AT_CHECK([test -s pid]) AT_CHECK([kill -0 `cat pid`]) AT_CHECK( [[test-jsonrpc request unix:socket echo '[{"a": "b", "x": null}]']], [0], [[{"error":null,"id":0,"result":[{"a":"b","x":null}]} ]], [], [test ! -e pid || kill `cat pid`]) AT_CHECK([kill `cat pid`]) AT_CLEANUP AT_SETUP([JSON-RPC request and error reply]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR AT_CHECK([test-jsonrpc --detach --no-chdir --pidfile="`pwd`"/pid listen punix:socket]) AT_CHECK([test -s pid]) AT_CHECK([kill -0 `cat pid`]) AT_CHECK( [[test-jsonrpc request unix:socket bad-request '[]']], [0], [[{"error":{"error":"unknown method"},"id":0,"result":null} ]], [], [test ! -e pid || kill `cat pid`]) AT_CHECK([kill `cat pid`]) AT_CLEANUP AT_SETUP([JSON-RPC notification]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR AT_CHECK([test-jsonrpc --detach --no-chdir --pidfile="`pwd`"/pid listen punix:socket]) AT_CHECK([test -s pid]) # When a daemon dies it deletes its pidfile, so make a copy. AT_CHECK([cp pid pid2]) AT_CHECK([kill -0 `cat pid2`]) AT_CHECK([[test-jsonrpc notify unix:socket shutdown '[]']], [0], [], [], [kill `cat pid2`]) AT_CHECK( [pid=`cat pid2` # First try a quick sleep, so that the test completes very quickly # in the normal case. POSIX doesn't require fractional times to # work, so this might not work. sleep 0.1; if kill -0 $pid; then :; else echo success; exit 0; fi # Then wait up to 2 seconds. sleep 1; if kill -0 $pid; then :; else echo success; exit 0; fi sleep 1; if kill -0 $pid; then :; else echo success; exit 0; fi echo failure; exit 1], [0], [success ], [ignore]) AT_CHECK([test ! -e pid]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/lacp.at000066400000000000000000000323251226605124000200660ustar00rootroot00000000000000AT_BANNER([lacp]) AT_SETUP([lacp - config]) OVS_VSWITCHD_START([\ add-port br0 p1 --\ set Port p1 lacp=active --\ set Interface p1 type=dummy ]) AT_CHECK([ovs-appctl lacp/show], [0], [dnl ---- p1 ---- status: active negotiated sys_id: aa:55:aa:55:00:00 sys_priority: 65535 aggregation key: 1 lacp_time: slow slave: p1: expired attached port_id: 1 port_priority: 65535 may_enable: false actor sys_id: aa:55:aa:55:00:00 actor sys_priority: 65535 actor port_id: 1 actor port_priority: 65535 actor key: 1 actor state: activity synchronized collecting distributing expired partner sys_id: 00:00:00:00:00:00 partner sys_priority: 0 partner port_id: 0 partner port_priority: 0 partner key: 0 partner state: timeout ]) AT_CHECK([ovs-appctl bond/show]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([lacp - multi port config]) OVS_VSWITCHD_START([dnl add-bond br0 bond p1 p2 --\ set Port bond lacp=active bond-mode=active-backup \ other_config:lacp-time="fast" \ other_config:lacp-system-id=11:22:33:44:55:66 \ other_config:lacp-system-priority=54321 --\ set Interface p1 type=dummy \ other_config:lacp-port-id=11 \ other_config:lacp-port-priority=111 \ other_config:lacp-aggregation-key=3333 --\ set Interface p2 type=dummy \ other_config:lacp-port-id=22 \ other_config:lacp-port-priority=222 \ other_config:lacp-aggregation-key=3333 ]) AT_CHECK([ovs-appctl lacp/show], [0], [stdout]) AT_CHECK([sed -e 's/aggregation key:.*/aggregation key: /' < stdout], [0], [dnl ---- bond ---- status: active negotiated sys_id: 11:22:33:44:55:66 sys_priority: 54321 aggregation key: lacp_time: fast slave: p1: expired attached port_id: 11 port_priority: 111 may_enable: false actor sys_id: 11:22:33:44:55:66 actor sys_priority: 54321 actor port_id: 11 actor port_priority: 111 actor key: 3333 actor state: activity timeout aggregation synchronized collecting distributing expired partner sys_id: 00:00:00:00:00:00 partner sys_priority: 0 partner port_id: 0 partner port_priority: 0 partner key: 0 partner state: timeout slave: p2: expired attached port_id: 22 port_priority: 222 may_enable: false actor sys_id: 11:22:33:44:55:66 actor sys_priority: 54321 actor port_id: 22 actor port_priority: 222 actor key: 3333 actor state: activity timeout aggregation synchronized collecting distributing expired partner sys_id: 00:00:00:00:00:00 partner sys_priority: 0 partner port_id: 0 partner port_priority: 0 partner key: 0 partner state: timeout ]) AT_CHECK([ovs-appctl bond/show], [0], [dnl ---- bond ---- bond_mode: active-backup bond-hash-basis: 0 updelay: 0 ms downdelay: 0 ms lacp_status: negotiated slave p1: disabled may_enable: false slave p2: disabled may_enable: false ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([lacp - negotiation]) # Create bond0 on br0 with interfaces p0 and p1 # and bond1 on br1 with interfaces p2 and p3 # with p0 patched to p2 and p1 patched to p3. OVS_VSWITCHD_START( [add-bond br0 bond0 p0 p1 bond_mode=balance-tcp lacp=active \ other-config:lacp-time=fast \ other-config:bond-rebalance-interval=0 -- \ set interface p0 type=patch options:peer=p2 ofport_request=1 -- \ set interface p1 type=patch options:peer=p3 ofport_request=2 -- \ add-br br1 -- \ set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \ set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \ fail-mode=secure -- \ add-bond br1 bond1 p2 p3 bond_mode=balance-tcp lacp=active \ other-config:lacp-time=fast \ other-config:bond-rebalance-interval=0 -- \ set interface p2 type=patch options:peer=p0 ofport_request=3 -- \ set interface p3 type=patch options:peer=p1 ofport_request=4 --]) AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK ]) ovs-appctl time/stop # Wait for up to 5 (simulated) seconds, until LACP negotiation finishes. i=0 while :; do ovs-appctl lacp/show bond0 > bond0 AT_CAPTURE_FILE([bond0]) ovs-appctl lacp/show bond1 > bond1 AT_CAPTURE_FILE([bond1]) if grep negotiated bond0 && grep negotiated bond1; then if grep expired bond0 || grep expired bond1; then : else break fi fi i=`expr $i + 1` if test $i = 50; then AT_FAIL_IF([:]) fi ovs-appctl time/warp 100 done # Now check the correctly negotiated configuration. AT_CHECK( [ovs-appctl lacp/show bond0 ovs-appctl lacp/show bond1 ovs-appctl bond/show bond0 ovs-appctl bond/show bond1], [0], [stdout]) AT_CHECK([sed '/active slave/d' stdout], [0], [dnl ---- bond0 ---- status: active negotiated sys_id: aa:55:aa:55:00:00 sys_priority: 65534 aggregation key: 2 lacp_time: fast slave: p0: current attached port_id: 1 port_priority: 65535 may_enable: true actor sys_id: aa:55:aa:55:00:00 actor sys_priority: 65534 actor port_id: 1 actor port_priority: 65535 actor key: 2 actor state: activity timeout aggregation synchronized collecting distributing partner sys_id: aa:66:aa:66:00:00 partner sys_priority: 65534 partner port_id: 3 partner port_priority: 65535 partner key: 4 partner state: activity timeout aggregation synchronized collecting distributing slave: p1: current attached port_id: 2 port_priority: 65535 may_enable: true actor sys_id: aa:55:aa:55:00:00 actor sys_priority: 65534 actor port_id: 2 actor port_priority: 65535 actor key: 2 actor state: activity timeout aggregation synchronized collecting distributing partner sys_id: aa:66:aa:66:00:00 partner sys_priority: 65534 partner port_id: 4 partner port_priority: 65535 partner key: 4 partner state: activity timeout aggregation synchronized collecting distributing ---- bond1 ---- status: active negotiated sys_id: aa:66:aa:66:00:00 sys_priority: 65534 aggregation key: 4 lacp_time: fast slave: p2: current attached port_id: 3 port_priority: 65535 may_enable: true actor sys_id: aa:66:aa:66:00:00 actor sys_priority: 65534 actor port_id: 3 actor port_priority: 65535 actor key: 4 actor state: activity timeout aggregation synchronized collecting distributing partner sys_id: aa:55:aa:55:00:00 partner sys_priority: 65534 partner port_id: 1 partner port_priority: 65535 partner key: 2 partner state: activity timeout aggregation synchronized collecting distributing slave: p3: current attached port_id: 4 port_priority: 65535 may_enable: true actor sys_id: aa:66:aa:66:00:00 actor sys_priority: 65534 actor port_id: 4 actor port_priority: 65535 actor key: 4 actor state: activity timeout aggregation synchronized collecting distributing partner sys_id: aa:55:aa:55:00:00 partner sys_priority: 65534 partner port_id: 2 partner port_priority: 65535 partner key: 2 partner state: activity timeout aggregation synchronized collecting distributing ---- bond0 ---- bond_mode: balance-tcp bond-hash-basis: 0 updelay: 0 ms downdelay: 0 ms lacp_status: negotiated slave p0: enabled may_enable: true slave p1: enabled may_enable: true ---- bond1 ---- bond_mode: balance-tcp bond-hash-basis: 0 updelay: 0 ms downdelay: 0 ms lacp_status: negotiated slave p2: enabled may_enable: true slave p3: enabled may_enable: true ]) AT_CHECK([grep 'active slave' stdout], [0], [dnl active slave active slave ]) # Redirect the patch link between p0 and p2 so that no packets get # back and forth across them anymore. Then wait 4 simulated # seconds. The LACP state should become "expired" for p0 and p2. AT_CHECK([ovs-vsctl \ -- add-port br0 null0 -- set int null0 type=patch options:peer=p2 -- set int p2 options:peer=null0 \ -- add-port br1 null1 -- set int null1 type=patch options:peer=p0 -- set int p0 options:peer=null1]) for i in `seq 0 40`; do ovs-appctl time/warp 100; done AT_CHECK( [ovs-appctl lacp/show bond0 ovs-appctl lacp/show bond1 ovs-appctl bond/show bond0 ovs-appctl bond/show bond1], [0], [dnl ---- bond0 ---- status: active negotiated sys_id: aa:55:aa:55:00:00 sys_priority: 65534 aggregation key: 2 lacp_time: fast slave: p0: expired attached port_id: 1 port_priority: 65535 may_enable: false actor sys_id: aa:55:aa:55:00:00 actor sys_priority: 65534 actor port_id: 1 actor port_priority: 65535 actor key: 2 actor state: activity timeout aggregation synchronized collecting distributing expired partner sys_id: aa:66:aa:66:00:00 partner sys_priority: 65534 partner port_id: 3 partner port_priority: 65535 partner key: 4 partner state: activity timeout aggregation collecting distributing slave: p1: current attached port_id: 2 port_priority: 65535 may_enable: true actor sys_id: aa:55:aa:55:00:00 actor sys_priority: 65534 actor port_id: 2 actor port_priority: 65535 actor key: 2 actor state: activity timeout aggregation synchronized collecting distributing partner sys_id: aa:66:aa:66:00:00 partner sys_priority: 65534 partner port_id: 4 partner port_priority: 65535 partner key: 4 partner state: activity timeout aggregation synchronized collecting distributing ---- bond1 ---- status: active negotiated sys_id: aa:66:aa:66:00:00 sys_priority: 65534 aggregation key: 4 lacp_time: fast slave: p2: expired attached port_id: 3 port_priority: 65535 may_enable: false actor sys_id: aa:66:aa:66:00:00 actor sys_priority: 65534 actor port_id: 3 actor port_priority: 65535 actor key: 4 actor state: activity timeout aggregation synchronized collecting distributing expired partner sys_id: aa:55:aa:55:00:00 partner sys_priority: 65534 partner port_id: 1 partner port_priority: 65535 partner key: 2 partner state: activity timeout aggregation collecting distributing slave: p3: current attached port_id: 4 port_priority: 65535 may_enable: true actor sys_id: aa:66:aa:66:00:00 actor sys_priority: 65534 actor port_id: 4 actor port_priority: 65535 actor key: 4 actor state: activity timeout aggregation synchronized collecting distributing partner sys_id: aa:55:aa:55:00:00 partner sys_priority: 65534 partner port_id: 2 partner port_priority: 65535 partner key: 2 partner state: activity timeout aggregation synchronized collecting distributing ---- bond0 ---- bond_mode: balance-tcp bond-hash-basis: 0 updelay: 0 ms downdelay: 0 ms lacp_status: negotiated slave p0: disabled may_enable: false slave p1: enabled active slave may_enable: true ---- bond1 ---- bond_mode: balance-tcp bond-hash-basis: 0 updelay: 0 ms downdelay: 0 ms lacp_status: negotiated slave p2: disabled may_enable: false slave p3: enabled active slave may_enable: true ]) # Wait 4 more simulated seconds. The LACP state should become # "defaulted" for p0 and p2. for i in `seq 0 40`; do ovs-appctl time/warp 100; done AT_CHECK( [ovs-appctl lacp/show bond0 ovs-appctl lacp/show bond1 ovs-appctl bond/show bond0 ovs-appctl bond/show bond1], [0], [dnl ---- bond0 ---- status: active negotiated sys_id: aa:55:aa:55:00:00 sys_priority: 65534 aggregation key: 2 lacp_time: fast slave: p0: defaulted detached port_id: 1 port_priority: 65535 may_enable: false actor sys_id: aa:55:aa:55:00:00 actor sys_priority: 65534 actor port_id: 1 actor port_priority: 65535 actor key: 2 actor state: activity timeout aggregation defaulted partner sys_id: 00:00:00:00:00:00 partner sys_priority: 0 partner port_id: 0 partner port_priority: 0 partner key: 0 partner state: slave: p1: current attached port_id: 2 port_priority: 65535 may_enable: true actor sys_id: aa:55:aa:55:00:00 actor sys_priority: 65534 actor port_id: 2 actor port_priority: 65535 actor key: 2 actor state: activity timeout aggregation synchronized collecting distributing partner sys_id: aa:66:aa:66:00:00 partner sys_priority: 65534 partner port_id: 4 partner port_priority: 65535 partner key: 4 partner state: activity timeout aggregation synchronized collecting distributing ---- bond1 ---- status: active negotiated sys_id: aa:66:aa:66:00:00 sys_priority: 65534 aggregation key: 4 lacp_time: fast slave: p2: defaulted detached port_id: 3 port_priority: 65535 may_enable: false actor sys_id: aa:66:aa:66:00:00 actor sys_priority: 65534 actor port_id: 3 actor port_priority: 65535 actor key: 4 actor state: activity timeout aggregation defaulted partner sys_id: 00:00:00:00:00:00 partner sys_priority: 0 partner port_id: 0 partner port_priority: 0 partner key: 0 partner state: slave: p3: current attached port_id: 4 port_priority: 65535 may_enable: true actor sys_id: aa:66:aa:66:00:00 actor sys_priority: 65534 actor port_id: 4 actor port_priority: 65535 actor key: 4 actor state: activity timeout aggregation synchronized collecting distributing partner sys_id: aa:55:aa:55:00:00 partner sys_priority: 65534 partner port_id: 2 partner port_priority: 65535 partner key: 2 partner state: activity timeout aggregation synchronized collecting distributing ---- bond0 ---- bond_mode: balance-tcp bond-hash-basis: 0 updelay: 0 ms downdelay: 0 ms lacp_status: negotiated slave p0: disabled may_enable: false slave p1: enabled active slave may_enable: true ---- bond1 ---- bond_mode: balance-tcp bond-hash-basis: 0 updelay: 0 ms downdelay: 0 ms lacp_status: negotiated slave p2: disabled may_enable: false slave p3: enabled active slave may_enable: true ]) OVS_VSWITCHD_STOP AT_CLEANUP openvswitch-2.0.1+git20140120/tests/learn.at000066400000000000000000000406071226605124000202520ustar00rootroot00000000000000AT_BANNER([learning action]) AT_SETUP([learning action - parsing and formatting]) AT_DATA([flows.txt], [[ actions=learn() actions=learn(NXM_OF_VLAN_TCI[0..11], NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[], output:NXM_OF_IN_PORT[], load:10->NXM_NX_REG0[5..10]) actions=learn(table=1,idle_timeout=10, hard_timeout=20, fin_idle_timeout=5, fin_hard_timeout=10, priority=10, cookie=0xfedcba9876543210, in_port=99,NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[],load:NXM_OF_IN_PORT[]->NXM_NX_REG1[16..31]) ]]) AT_CHECK([ovs-ofctl parse-flows flows.txt], [0], [[usable protocols: any chosen protocol: OpenFlow10-table_id OFPT_FLOW_MOD (xid=0x1): ADD actions=learn(table=1) OFPT_FLOW_MOD (xid=0x2): ADD actions=learn(table=1,NXM_OF_VLAN_TCI[0..11],NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[],output:NXM_OF_IN_PORT[],load:0xa->NXM_NX_REG0[5..10]) OFPT_FLOW_MOD (xid=0x3): ADD actions=learn(table=1,idle_timeout=10,hard_timeout=20,fin_idle_timeout=5,fin_hard_timeout=10,priority=10,cookie=0xfedcba9876543210,in_port=99,NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[],load:NXM_OF_IN_PORT[]->NXM_NX_REG1[16..31]) ]]) AT_CLEANUP AT_SETUP([learning action - parsing and formatting - illegal in_port_oxm]) AT_CHECK([[ovs-ofctl parse-flow 'actions=learn(table=1, in_port_oxm=123456)']], [1], [], [stderr]) AT_CHECK([sed -e 's/.*|ofp_util|WARN|//' < stderr], [0], [[port 123456 is outside the supported range 0 through ffff or 0xffffff00 through 0xffffffff ovs-ofctl: 123456: port value out of range for in_port_oxm ]], [[]]) AT_CLEANUP AT_SETUP([learning action - parsing and formatting - OXM]) AT_DATA([flows.txt], [[ actions=learn(output:OXM_OF_IN_PORT[]) actions=learn(table=1, in_port=1, load:OXM_OF_IN_PORT[]->NXM_NX_REG1[], load:0xfffffffe->OXM_OF_IN_PORT[]) ]]) AT_CHECK([ovs-ofctl -O OpenFlow12 parse-flows flows.txt], [0], [[usable protocols: any chosen protocol: OXM-OpenFlow12 OFPT_FLOW_MOD (OF1.2) (xid=0x1): ADD table:255 actions=learn(table=1,output:OXM_OF_IN_PORT[]) OFPT_FLOW_MOD (OF1.2) (xid=0x2): ADD table:255 actions=learn(table=1,in_port=1,load:OXM_OF_IN_PORT[]->NXM_NX_REG1[],load:0xfffffffe->OXM_OF_IN_PORT[]) ]]) AT_CLEANUP AT_SETUP([learning action - examples]) AT_DATA([flows.txt], [[ # These are the examples from nicira-ext.h. actions=learn(in_port=99,NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[], load:NXM_OF_IN_PORT[]->NXM_NX_REG1[16..31]) actions=learn(NXM_OF_VLAN_TCI[0..11], NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[],output:NXM_OF_IN_PORT[]) table=0 actions=learn(table=1,hard_timeout=10, NXM_OF_VLAN_TCI[0..11],output:NXM_OF_IN_PORT[]), resubmit(,1) table=1 priority=0 actions=flood ]]) AT_CHECK([ovs-ofctl parse-flows flows.txt], [0], [[usable protocols: OXM,OpenFlow10+table_id,NXM+table_id,OpenFlow11 chosen protocol: OpenFlow10+table_id OFPT_FLOW_MOD (xid=0x1): ADD table:255 actions=learn(table=1,in_port=99,NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[],load:NXM_OF_IN_PORT[]->NXM_NX_REG1[16..31]) OFPT_FLOW_MOD (xid=0x2): ADD table:255 actions=learn(table=1,NXM_OF_VLAN_TCI[0..11],NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[],output:NXM_OF_IN_PORT[]) OFPT_FLOW_MOD (xid=0x3): ADD actions=learn(table=1,hard_timeout=10,NXM_OF_VLAN_TCI[0..11],output:NXM_OF_IN_PORT[]),resubmit(,1) OFPT_FLOW_MOD (xid=0x4): ADD table:1 priority=0 actions=FLOOD ]]) AT_CLEANUP AT_SETUP([learning action - satisfied prerequisites]) AT_DATA([flows.txt], [[actions=learn(eth_type=0x800,load:5->NXM_OF_IP_DST[]) ip,actions=learn(load:NXM_OF_IP_DST[]->NXM_NX_REG1[]) ip,actions=learn(eth_type=0x800,OXM_OF_IPV4_DST[]) ]]) AT_CHECK([ovs-ofctl parse-flows flows.txt], [0], [[usable protocols: any chosen protocol: OpenFlow10-table_id OFPT_FLOW_MOD (xid=0x1): ADD actions=learn(table=1,eth_type=0x800,load:0x5->NXM_OF_IP_DST[]) OFPT_FLOW_MOD (xid=0x2): ADD ip actions=learn(table=1,load:NXM_OF_IP_DST[]->NXM_NX_REG1[]) OFPT_FLOW_MOD (xid=0x3): ADD ip actions=learn(table=1,eth_type=0x800,NXM_OF_IP_DST[]) ]]) AT_CLEANUP AT_SETUP([learning action - invalid prerequisites]) AT_CHECK([[ovs-ofctl parse-flow 'actions=learn(load:5->NXM_OF_IP_DST[])']], [1], [], [stderr]) AT_CHECK([sed -e 's/.*|meta_flow|WARN|//' < stderr], [0], [[destination field ip_dst lacks correct prerequisites ovs-ofctl: actions are invalid with specified match (OFPBAC_MATCH_INCONSISTENT) ]], [[]]) AT_CHECK([[ovs-ofctl parse-flow 'actions=learn(load:NXM_OF_IP_DST[]->NXM_NX_REG1[])']], [1], [], [stderr]) AT_CHECK([sed -e 's/.*|meta_flow|WARN|//' < stderr], [0], [[source field ip_dst lacks correct prerequisites ovs-ofctl: actions are invalid with specified match (OFPBAC_MATCH_INCONSISTENT) ]]) AT_CLEANUP AT_SETUP([learning action - standard VLAN+MAC learning]) OVS_VSWITCHD_START( [add-port br0 p1 -- set Interface p1 type=dummy ofport_request=1 -- \ add-port br0 p2 -- set Interface p2 type=dummy ofport_request=2 -- \ add-port br0 p3 -- set Interface p3 type=dummy ofport_request=3]) # Set up flow table for VLAN+MAC learning. AT_DATA([flows.txt], [[ table=0 actions=learn(table=1, hard_timeout=60, NXM_OF_VLAN_TCI[0..11], NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[], output:NXM_OF_IN_PORT[]), resubmit(,1) table=1 priority=0 actions=flood ]]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) # Trace an ARP packet arriving on port 3, to create a MAC learning entry. flow="in_port(3),eth(src=50:54:00:00:00:05,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=192.168.0.1,tip=192.168.0.2,op=1,sha=50:54:00:00:00:05,tha=00:00:00:00:00:00)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow" -generate], [0], [stdout]) actual=`tail -1 stdout | sed 's/Datapath actions: //'` expected="1,2,100" AT_CHECK([ovs-dpctl normalize-actions "$flow" "$expected"], [0], [stdout]) mv stdout expout AT_CHECK([ovs-dpctl normalize-actions "$flow" "$actual"], [0], [expout]) # Check for the MAC learning entry. AT_CHECK([ovs-ofctl dump-flows br0 table=1 | ofctl_strip | sort], [0], [dnl table=1, hard_timeout=60, vlan_tci=0x0000/0x0fff,dl_dst=50:54:00:00:00:05 actions=output:3 table=1, priority=0 actions=FLOOD NXST_FLOW reply: ]) # Trace a packet arrival destined for the learned MAC. # (This will also learn a MAC.) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:06,dst=50:54:00:00:00:05),eth_type(0x0806),arp(sip=192.168.0.2,tip=192.168.0.1,op=2,sha=50:54:00:00:00:06,tha=50:54:00:00:00:05)' -generate], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: 3 ]) # Check for both MAC learning entries. AT_CHECK([ovs-ofctl dump-flows br0 table=1 | ofctl_strip |sort], [0], [dnl table=1, hard_timeout=60, vlan_tci=0x0000/0x0fff,dl_dst=50:54:00:00:00:05 actions=output:3 table=1, hard_timeout=60, vlan_tci=0x0000/0x0fff,dl_dst=50:54:00:00:00:06 actions=output:1 table=1, priority=0 actions=FLOOD NXST_FLOW reply: ]) # Trace a packet arrival that updates the first learned MAC entry. flow="in_port(2),eth(src=50:54:00:00:00:05,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=192.168.0.1,tip=192.168.0.2,op=1,sha=50:54:00:00:00:05,tha=00:00:00:00:00:00)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow" -generate], [0], [stdout]) actual=`tail -1 stdout | sed 's/Datapath actions: //'` expected="1,3,100" AT_CHECK([ovs-dpctl normalize-actions "$flow" "$expected"], [0], [stdout]) mv stdout expout AT_CHECK([ovs-dpctl normalize-actions "$flow" "$actual"], [0], [expout]) # Check that the MAC learning entry was updated. AT_CHECK([ovs-ofctl dump-flows br0 table=1 | ofctl_strip | sort], [0], [dnl table=1, hard_timeout=60, vlan_tci=0x0000/0x0fff,dl_dst=50:54:00:00:00:05 actions=output:2 table=1, hard_timeout=60, vlan_tci=0x0000/0x0fff,dl_dst=50:54:00:00:00:06 actions=output:1 table=1, priority=0 actions=FLOOD NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl This test checks that repeated uses of a "learn" action cause the dnl modified time of the learned flow to advance. Otherwise, the dnl learned flow will expire after its hard timeout even though it's dnl supposed to be refreshed. (The expiration can be hard to see since dnl it gets re-learned again the next time a packet appears, but dnl sometimes the expiration can cause temporary flooding etc.) AT_SETUP([learning action - learn refreshes hard_age]) OVS_VSWITCHD_START( [add-port br0 p1 -- set Interface p1 type=dummy ofport_request=1 -- \ add-port br0 p2 -- set Interface p2 type=dummy ofport_request=2 -- \ add-port br0 p3 -- set Interface p3 type=dummy ofport_request=3]) ovs-appctl time/stop # Set up flow table for MAC learning. AT_DATA([flows.txt], [[ table=0 actions=learn(table=1, hard_timeout=10, NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[], output:NXM_OF_IN_PORT[]), resubmit(,1) table=1 priority=0 actions=flood ]]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) # Trace an ICMP packet arriving on port 3, to create a MAC learning entry. flow="in_port(3),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:05),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=0,code=0)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow" -generate], [0], [stdout]) actual=`tail -1 stdout | sed 's/Datapath actions: //'` expected="1,2,100" AT_CHECK([ovs-dpctl normalize-actions "$flow" "$expected"], [0], [stdout]) mv stdout expout AT_CHECK([ovs-dpctl normalize-actions "$flow" "$actual"], [0], [expout]) # Check that the MAC learning entry appeared. AT_CHECK([ovs-ofctl dump-flows br0 table=1 | ofctl_strip | sort], [0], [dnl table=1, hard_timeout=10, dl_dst=50:54:00:00:00:07 actions=output:3 table=1, priority=0 actions=FLOOD NXST_FLOW reply: ]) # For 25 seconds, make sure that the MAC learning entry doesn't # disappear as long as we refresh it every second. for i in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25; do ovs-appctl time/warp 1000 AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow" -generate], [0], [stdout]) # Check that the entry is there. AT_CHECK([ovs-ofctl dump-flows br0 table=1], [0], [stdout]) AT_CHECK([ofctl_strip < stdout | sort], [0], [dnl table=1, hard_timeout=10, dl_dst=50:54:00:00:00:07 actions=output:3 table=1, priority=0 actions=FLOOD NXST_FLOW reply: ]) if test $i != 1; then # Check that hard_age has appeared. We need to do this separately # from the above check because ofctl_strip removes it. dump-flows # only prints hard_age when it is different from the flow's duration # (that is, the number of seconds from the time it was created), # so we only check for it after we've refreshed the flow once. AT_CHECK([grep dl_dst=50:54:00:00:00:07 stdout | grep -c hard_age], [0], [1 ]) fi done # Make sure that 15 seconds without refreshing makes the flow time out. ovs-appctl time/warp 5000 ovs-appctl time/warp 5000 ovs-appctl time/warp 5000 AT_CHECK([ovs-ofctl dump-flows br0 table=1 | ofctl_strip | sort], [0], [dnl table=1, priority=0 actions=FLOOD NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([learning action - TCPv4 port learning]) OVS_VSWITCHD_START( [add-port br0 p1 -- set Interface p1 type=dummy -- \ add-port br0 p2 -- set Interface p2 type=dummy -- \ add-port br0 p3 -- set Interface p3 type=dummy]) # Set up flow table for TCPv4 port learning. AT_CHECK([[ovs-ofctl add-flow br0 'table=0 tcp actions=learn(table=1, hard_timeout=60, eth_type=0x800, nw_proto=6, NXM_OF_IP_SRC[]=NXM_OF_IP_DST[], NXM_OF_IP_DST[]=NXM_OF_IP_SRC[], NXM_OF_TCP_SRC[]=NXM_OF_TCP_DST[], NXM_OF_TCP_DST[]=NXM_OF_TCP_SRC[]), flood']]) # Trace a TCPv4 packet arriving on port 3. flow="in_port(3),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:06),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=6,tos=0,ttl=64,frag=no),tcp(src=40000,dst=80)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow" -generate], [0], [stdout]) actual=`tail -1 stdout | sed 's/Datapath actions: //'` expected="1,2,100" AT_CHECK([ovs-dpctl normalize-actions "$flow" "$expected"], [0], [stdout]) mv stdout expout AT_CHECK([ovs-dpctl normalize-actions "$flow" "$actual"], [0], [expout]) # Check for the learning entry. AT_CHECK([ovs-ofctl dump-flows br0 table=1 | ofctl_strip | sort], [0], [dnl table=1, hard_timeout=60, tcp,nw_src=192.168.0.1,nw_dst=192.168.0.2,tp_src=80,tp_dst=40000 actions=drop NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([learning action - TCPv6 port learning]) OVS_VSWITCHD_START( [add-port br0 p1 -- set Interface p1 type=dummy -- \ add-port br0 p2 -- set Interface p2 type=dummy -- \ add-port br0 p3 -- set Interface p3 type=dummy]) # Set up flow table for TCPv6 port learning. # Also add a 128-bit-wide "load" action and a 128-bit literal match to check # that they work. AT_CHECK([[ovs-ofctl add-flow br0 'table=0 tcp6 actions=learn(table=1, hard_timeout=60, eth_type=0x86dd, nw_proto=6, NXM_NX_IPV6_SRC[]=NXM_NX_IPV6_DST[], ipv6_dst=2001:0db8:85a3:0000:0000:8a2e:0370:7334, NXM_OF_TCP_SRC[]=NXM_OF_TCP_DST[], NXM_OF_TCP_DST[]=NXM_OF_TCP_SRC[], load(0x20010db885a308d313198a2e03707348->NXM_NX_IPV6_DST[])), flood']]) # Trace a TCPv6 packet arriving on port 3. flow="in_port(3),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:06),eth_type(0x86dd),ipv6(src=fec0::2,dst=fec0::1,label=0,proto=6,tclass=0,hlimit=255,frag=no),tcp(src=40000,dst=80)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow" -generate], [0], [stdout]) actual=`tail -1 stdout | sed 's/Datapath actions: //'` expected="1,2,100" AT_CHECK([ovs-dpctl normalize-actions "$flow" "$expected"], [0], [stdout]) mv stdout expout AT_CHECK([ovs-dpctl normalize-actions "$flow" "$actual"], [0], [expout]) # Check for the learning entry. AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl table=1, hard_timeout=60, tcp6,ipv6_src=fec0::1,ipv6_dst=2001:db8:85a3::8a2e:370:7334,tp_src=80,tp_dst=40000 actions=load:0x13198a2e03707348->NXM_NX_IPV6_DST[[0..63]],load:0x20010db885a308d3->NXM_NX_IPV6_DST[[64..127]] tcp6 actions=learn(table=1,hard_timeout=60,eth_type=0x86dd,nw_proto=6,NXM_NX_IPV6_SRC[[]]=NXM_NX_IPV6_DST[[]],ipv6_dst=2001:db8:85a3::8a2e:370:7334,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],NXM_OF_TCP_DST[[]]=NXM_OF_TCP_SRC[[]],load:0x20010db885a308d313198a2e03707348->NXM_NX_IPV6_DST[[]]),FLOOD NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP # In this use of a learn action, the first packet in the flow creates # a new flow that changes the behavior of subsequent packets in the # flow. AT_SETUP([learning action - self-modifying flow]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], 1, 2, 3) # Set up flow table for TCPv4 port learning. AT_CHECK([[ovs-ofctl add-flow br0 'actions=load:3->NXM_NX_REG0[0..15],learn(table=0,priority=65535,NXM_OF_ETH_SRC[],NXM_OF_VLAN_TCI[0..11],output:NXM_NX_REG0[0..15]),output:2']]) # Trace some packets arriving. The particular packets don't matter. for i in 1 2 3 4 5 6 7 8 9 10; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)' ovs-appctl time/warp 10 done # Check for the learning entry. ovs-appctl time/warp 1000 AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [[ n_packets=1, n_bytes=60, actions=load:0x3->NXM_NX_REG0[0..15],learn(table=0,priority=65535,NXM_OF_ETH_SRC[],NXM_OF_VLAN_TCI[0..11],output:NXM_NX_REG0[0..15]),output:2 n_packets=9, n_bytes=540, priority=65535,vlan_tci=0x0000/0x0fff,dl_src=50:54:00:00:00:05 actions=output:3 NXST_FLOW reply: ]]) # Check that the first packet went out port 2 and the rest out port 3. AT_CHECK( [(ovs-ofctl dump-ports br0 2; ovs-ofctl dump-ports br0 3) | STRIP_XIDS], [0], [OFPST_PORT reply: 1 ports port 2: rx pkts=0, bytes=0, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=1, bytes=60, drop=0, errs=0, coll=0 OFPST_PORT reply: 1 ports port 3: rx pkts=0, bytes=0, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=9, bytes=540, drop=0, errs=0, coll=0 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([learning action - fin_timeout feature]) # This is a totally artificial use of the "learn" action. The only purpose # is to check that specifying fin_idle_timeout or fin_hard_timeout causes # a corresponding fin_timeout action to end up in the learned flows. OVS_VSWITCHD_START( [add-port br0 p1 -- set Interface p1 type=dummy ofport_request=1]) AT_CHECK([[ovs-ofctl add-flow br0 'actions=learn(fin_hard_timeout=10, fin_idle_timeout=5, NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[], output:NXM_OF_IN_PORT[])']]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:05,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),arp(sip=192.168.0.1,tip=192.168.0.2,op=1,sha=50:54:00:00:00:05,tha=00:00:00:00:00:00)' -generate], [0], [ignore]) AT_CHECK([ovs-ofctl dump-flows br0 table=1 | ofctl_strip], [0], [NXST_FLOW reply: table=1, dl_dst=50:54:00:00:00:05 actions=fin_timeout(idle_timeout=5,hard_timeout=10),output:1 ]) OVS_VSWITCHD_STOP AT_CLEANUP openvswitch-2.0.1+git20140120/tests/library.at000066400000000000000000000117571226605124000206210ustar00rootroot00000000000000AT_BANNER([library unit tests]) AT_SETUP([test flow extractor]) AT_CHECK([$PERL `which flowgen.pl` >/dev/null 3>flows 4>pcap]) AT_CHECK([test-flows . We do not have a workaround dnl for other platforms, so we skip the test there. AT_SETUP([test unix socket, long pathname - C]) AT_SKIP_IF([test ! -d /proc/self/fd]) dnl Linux has a 108 byte limit; this is 150 bytes long. longname=012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 mkdir $longname cd $longname AT_CHECK([test-unix-socket ../$longname/socket socket]) AT_CLEANUP AT_SETUP([test unix socket, short pathname - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_CHECK([$PYTHON $srcdir/test-unix-socket.py x]) AT_CLEANUP dnl Unix sockets with long names are problematic because the name has to dnl go in a fixed-length field in struct sockaddr_un. Generally the limit dnl is about 100 bytes. On Linux, we work around this by indirecting through dnl a directory fd using /proc/self/fd/. We do not have a workaround dnl for other platforms, so we skip the test there. AT_SETUP([test unix socket, long pathname - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_SKIP_IF([test ! -d /proc/self/fd]) dnl Linux has a 108 byte limit; this is 150 bytes long. longname=012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 mkdir $longname cd $longname AT_CHECK([$PYTHON $abs_srcdir/test-unix-socket.py ../$longname/socket socket]) AT_CLEANUP AT_SETUP([ovs_assert]) OVS_LOGDIR=`pwd`; export OVS_LOGDIR AT_CHECK([test-util -voff -vfile:info '-vPATTERN:file:%c|%p|%m' --log-file assert || kill -l $?], [0], [ABRT ], [stderr]) AT_CHECK([sed 's/\(opened log file\) .*/\1/ s/|[[^|]]*: /|/' test-util.log], [0], [dnl vlog|INFO|opened log file util|EMER|assertion false failed in test_assert() ]) AT_CHECK([sed 's/.*: // 1q' stderr], [0], [assertion false failed in test_assert() ]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/lockfile.at000066400000000000000000000035141226605124000207350ustar00rootroot00000000000000AT_BANNER([lockfile unit tests]) m4_define([CHECK_LOCKFILE], [AT_SETUP([m4_translit([$1], [_], [ ])]) AT_KEYWORDS([lockfile]) AT_CHECK([test-lockfile $1], [0], [$1: success (m4_if( [$2], [1], [$2 child], [$2 children])) ], [stderr]) AT_CHECK([sed 's/pid [[0-9]]*/pid /' stderr], [0], [$3]) AT_CLEANUP]) CHECK_LOCKFILE([lock_and_unlock], [0]) CHECK_LOCKFILE([lock_and_unlock_twice], [0]) CHECK_LOCKFILE([lock_blocks_same_process], [0], [lockfile|WARN|.file.~lock~: failed to lock file: Resource deadlock avoided ]) CHECK_LOCKFILE([lock_blocks_same_process_twice], [0], [lockfile|WARN|.file.~lock~: failed to lock file: Resource deadlock avoided lockfile|WARN|.file.~lock~: failed to lock file: Resource deadlock avoided ]) CHECK_LOCKFILE([lock_blocks_other_process], [1], [lockfile|WARN|.file.~lock~: child does not inherit lock lockfile|WARN|.file.~lock~: cannot lock file because it is already locked by pid ]) CHECK_LOCKFILE([lock_twice_blocks_other_process], [1], [lockfile|WARN|.file.~lock~: failed to lock file: Resource deadlock avoided lockfile|WARN|.file.~lock~: child does not inherit lock lockfile|WARN|.file.~lock~: cannot lock file because it is already locked by pid ]) CHECK_LOCKFILE([lock_and_unlock_allows_other_process], [1]) CHECK_LOCKFILE([lock_multiple], [0], [lockfile|WARN|.a.~lock~: failed to lock file: Resource deadlock avoided ]) CHECK_LOCKFILE([lock_symlink], [0], [lockfile|WARN|.a.~lock~: failed to lock file: Resource deadlock avoided lockfile|WARN|.b.~lock~: failed to lock file: Resource deadlock avoided lockfile|WARN|.b.~lock~: failed to lock file: Resource deadlock avoided lockfile|WARN|.a.~lock~: failed to lock file: Resource deadlock avoided ]) CHECK_LOCKFILE([lock_symlink_to_dir], [0], [lockfile|WARN|dir/.b.~lock~: failed to lock file: Resource deadlock avoided ]) openvswitch-2.0.1+git20140120/tests/multipath.at000066400000000000000000000447151226605124000211640ustar00rootroot00000000000000AT_BANNER([multipath link selection]) # The test-multipath program prints a lot of output on stdout, but each of the # tests below ignores it because it will vary a bit depending on endianness and # floating point precision. test-multipath will output an error message on # stderr and return with exit code 1 if anything really goes wrong. In each # case, we list the (approximate) expected output in a comment to aid debugging # if the test does fail. AT_SETUP([modulo_n multipath link selection]) AT_CHECK([[test-multipath 'eth_src,50,modulo_n,1,0,NXM_NX_REG0[]']], [0], [ignore]) # 1 -> 2: disruption=0.50 (perfect=0.50); stddev/expected=0.0000 # 2 -> 3: disruption=0.66 (perfect=0.33); stddev/expected=0.0023 # 3 -> 4: disruption=0.75 (perfect=0.25); stddev/expected=0.0061 # 4 -> 5: disruption=0.80 (perfect=0.20); stddev/expected=0.0082 # 5 -> 6: disruption=0.83 (perfect=0.17); stddev/expected=0.0083 # 6 -> 7: disruption=0.86 (perfect=0.14); stddev/expected=0.0061 # 7 -> 8: disruption=0.88 (perfect=0.12); stddev/expected=0.0103 # 8 -> 9: disruption=0.89 (perfect=0.11); stddev/expected=0.0129 # 9 -> 10: disruption=0.90 (perfect=0.10); stddev/expected=0.0091 #10 -> 11: disruption=0.91 (perfect=0.09); stddev/expected=0.0114 #11 -> 12: disruption=0.91 (perfect=0.08); stddev/expected=0.0073 #12 -> 13: disruption=0.92 (perfect=0.08); stddev/expected=0.0165 #13 -> 14: disruption=0.93 (perfect=0.07); stddev/expected=0.0149 #14 -> 15: disruption=0.93 (perfect=0.07); stddev/expected=0.0127 #15 -> 16: disruption=0.94 (perfect=0.06); stddev/expected=0.0142 #16 -> 17: disruption=0.94 (perfect=0.06); stddev/expected=0.0098 #17 -> 18: disruption=0.94 (perfect=0.06); stddev/expected=0.0159 #18 -> 19: disruption=0.95 (perfect=0.05); stddev/expected=0.0121 #19 -> 20: disruption=0.95 (perfect=0.05); stddev/expected=0.0195 #20 -> 21: disruption=0.95 (perfect=0.05); stddev/expected=0.0120 #21 -> 22: disruption=0.95 (perfect=0.05); stddev/expected=0.0181 #22 -> 23: disruption=0.96 (perfect=0.04); stddev/expected=0.0222 #23 -> 24: disruption=0.96 (perfect=0.04); stddev/expected=0.0164 #24 -> 25: disruption=0.96 (perfect=0.04); stddev/expected=0.0146 #25 -> 26: disruption=0.96 (perfect=0.04); stddev/expected=0.0175 #26 -> 27: disruption=0.96 (perfect=0.04); stddev/expected=0.0231 #27 -> 28: disruption=0.96 (perfect=0.04); stddev/expected=0.0172 #28 -> 29: disruption=0.97 (perfect=0.03); stddev/expected=0.0211 #29 -> 30: disruption=0.97 (perfect=0.03); stddev/expected=0.0213 #30 -> 31: disruption=0.97 (perfect=0.03); stddev/expected=0.0253 #31 -> 32: disruption=0.97 (perfect=0.03); stddev/expected=0.0208 #32 -> 33: disruption=0.97 (perfect=0.03); stddev/expected=0.0223 #33 -> 34: disruption=0.97 (perfect=0.03); stddev/expected=0.0215 #34 -> 35: disruption=0.97 (perfect=0.03); stddev/expected=0.0201 #35 -> 36: disruption=0.97 (perfect=0.03); stddev/expected=0.0220 #36 -> 37: disruption=0.97 (perfect=0.03); stddev/expected=0.0221 #37 -> 38: disruption=0.97 (perfect=0.03); stddev/expected=0.0201 #38 -> 39: disruption=0.97 (perfect=0.03); stddev/expected=0.0215 #39 -> 40: disruption=0.97 (perfect=0.03); stddev/expected=0.0271 #40 -> 41: disruption=0.98 (perfect=0.02); stddev/expected=0.0272 #41 -> 42: disruption=0.98 (perfect=0.02); stddev/expected=0.0208 #42 -> 43: disruption=0.98 (perfect=0.02); stddev/expected=0.0226 #43 -> 44: disruption=0.98 (perfect=0.02); stddev/expected=0.0264 #44 -> 45: disruption=0.98 (perfect=0.02); stddev/expected=0.0233 #45 -> 46: disruption=0.98 (perfect=0.02); stddev/expected=0.0285 #46 -> 47: disruption=0.98 (perfect=0.02); stddev/expected=0.0246 #47 -> 48: disruption=0.98 (perfect=0.02); stddev/expected=0.0282 #48 -> 49: disruption=0.98 (perfect=0.02); stddev/expected=0.0233 #49 -> 50: disruption=0.98 (perfect=0.02); stddev/expected=0.0197 #50 -> 51: disruption=0.98 (perfect=0.02); stddev/expected=0.0317 #51 -> 52: disruption=0.98 (perfect=0.02); stddev/expected=0.0283 #52 -> 53: disruption=0.98 (perfect=0.02); stddev/expected=0.0282 #53 -> 54: disruption=0.98 (perfect=0.02); stddev/expected=0.0273 #54 -> 55: disruption=0.98 (perfect=0.02); stddev/expected=0.0283 #55 -> 56: disruption=0.98 (perfect=0.02); stddev/expected=0.0288 #56 -> 57: disruption=0.98 (perfect=0.02); stddev/expected=0.0263 #57 -> 58: disruption=0.98 (perfect=0.02); stddev/expected=0.0339 #58 -> 59: disruption=0.98 (perfect=0.02); stddev/expected=0.0262 #59 -> 60: disruption=0.98 (perfect=0.02); stddev/expected=0.0309 #60 -> 61: disruption=0.98 (perfect=0.02); stddev/expected=0.0285 #61 -> 62: disruption=0.98 (perfect=0.02); stddev/expected=0.0288 #62 -> 63: disruption=0.98 (perfect=0.02); stddev/expected=0.0298 #63 -> 64: disruption=0.98 (perfect=0.02); stddev/expected=0.0277 AT_CLEANUP AT_SETUP([hash_threshold multipath link selection]) AT_CHECK([[test-multipath 'eth_src,50,hash_threshold,1,0,NXM_NX_REG0[]']], [0], [ignore]) # 1 -> 2: disruption=0.50 (perfect=0.50); stddev/expected=0.0000 # 2 -> 3: disruption=0.50 (perfect=0.33); stddev/expected=0.0056 # 3 -> 4: disruption=0.50 (perfect=0.25); stddev/expected=0.0050 # 4 -> 5: disruption=0.50 (perfect=0.20); stddev/expected=0.0074 # 5 -> 6: disruption=0.50 (perfect=0.17); stddev/expected=0.0031 # 6 -> 7: disruption=0.50 (perfect=0.14); stddev/expected=0.0078 # 7 -> 8: disruption=0.50 (perfect=0.12); stddev/expected=0.0085 # 8 -> 9: disruption=0.50 (perfect=0.11); stddev/expected=0.0093 # 9 -> 10: disruption=0.50 (perfect=0.10); stddev/expected=0.0083 #10 -> 11: disruption=0.51 (perfect=0.09); stddev/expected=0.0110 #11 -> 12: disruption=0.50 (perfect=0.08); stddev/expected=0.0124 #12 -> 13: disruption=0.50 (perfect=0.08); stddev/expected=0.0143 #13 -> 14: disruption=0.50 (perfect=0.07); stddev/expected=0.0148 #14 -> 15: disruption=0.50 (perfect=0.07); stddev/expected=0.0099 #15 -> 16: disruption=0.50 (perfect=0.06); stddev/expected=0.0166 #16 -> 17: disruption=0.50 (perfect=0.06); stddev/expected=0.0099 #17 -> 18: disruption=0.50 (perfect=0.06); stddev/expected=0.0194 #18 -> 19: disruption=0.50 (perfect=0.05); stddev/expected=0.0169 #19 -> 20: disruption=0.50 (perfect=0.05); stddev/expected=0.0169 #20 -> 21: disruption=0.50 (perfect=0.05); stddev/expected=0.0185 #21 -> 22: disruption=0.50 (perfect=0.05); stddev/expected=0.0160 #22 -> 23: disruption=0.50 (perfect=0.04); stddev/expected=0.0236 #23 -> 24: disruption=0.50 (perfect=0.04); stddev/expected=0.0147 #24 -> 25: disruption=0.50 (perfect=0.04); stddev/expected=0.0195 #25 -> 26: disruption=0.50 (perfect=0.04); stddev/expected=0.0199 #26 -> 27: disruption=0.50 (perfect=0.04); stddev/expected=0.0227 #27 -> 28: disruption=0.50 (perfect=0.04); stddev/expected=0.0198 #28 -> 29: disruption=0.50 (perfect=0.03); stddev/expected=0.0216 #29 -> 30: disruption=0.50 (perfect=0.03); stddev/expected=0.0233 #30 -> 31: disruption=0.50 (perfect=0.03); stddev/expected=0.0266 #31 -> 32: disruption=0.51 (perfect=0.03); stddev/expected=0.0238 #32 -> 33: disruption=0.50 (perfect=0.03); stddev/expected=0.0194 #33 -> 34: disruption=0.50 (perfect=0.03); stddev/expected=0.0173 #34 -> 35: disruption=0.50 (perfect=0.03); stddev/expected=0.0223 #35 -> 36: disruption=0.50 (perfect=0.03); stddev/expected=0.0220 #36 -> 37: disruption=0.50 (perfect=0.03); stddev/expected=0.0237 #37 -> 38: disruption=0.50 (perfect=0.03); stddev/expected=0.0237 #38 -> 39: disruption=0.50 (perfect=0.03); stddev/expected=0.0251 #39 -> 40: disruption=0.50 (perfect=0.03); stddev/expected=0.0212 #40 -> 41: disruption=0.50 (perfect=0.02); stddev/expected=0.0267 #41 -> 42: disruption=0.50 (perfect=0.02); stddev/expected=0.0242 #42 -> 43: disruption=0.50 (perfect=0.02); stddev/expected=0.0222 #43 -> 44: disruption=0.50 (perfect=0.02); stddev/expected=0.0244 #44 -> 45: disruption=0.50 (perfect=0.02); stddev/expected=0.0231 #45 -> 46: disruption=0.50 (perfect=0.02); stddev/expected=0.0299 #46 -> 47: disruption=0.50 (perfect=0.02); stddev/expected=0.0263 #47 -> 48: disruption=0.50 (perfect=0.02); stddev/expected=0.0307 #48 -> 49: disruption=0.50 (perfect=0.02); stddev/expected=0.0253 #49 -> 50: disruption=0.50 (perfect=0.02); stddev/expected=0.0228 #50 -> 51: disruption=0.50 (perfect=0.02); stddev/expected=0.0273 #51 -> 52: disruption=0.50 (perfect=0.02); stddev/expected=0.0243 #52 -> 53: disruption=0.50 (perfect=0.02); stddev/expected=0.0268 #53 -> 54: disruption=0.50 (perfect=0.02); stddev/expected=0.0251 #54 -> 55: disruption=0.50 (perfect=0.02); stddev/expected=0.0297 #55 -> 56: disruption=0.50 (perfect=0.02); stddev/expected=0.0287 #56 -> 57: disruption=0.50 (perfect=0.02); stddev/expected=0.0299 #57 -> 58: disruption=0.50 (perfect=0.02); stddev/expected=0.0272 #58 -> 59: disruption=0.50 (perfect=0.02); stddev/expected=0.0295 #59 -> 60: disruption=0.50 (perfect=0.02); stddev/expected=0.0312 #60 -> 61: disruption=0.50 (perfect=0.02); stddev/expected=0.0361 #61 -> 62: disruption=0.50 (perfect=0.02); stddev/expected=0.0308 #62 -> 63: disruption=0.50 (perfect=0.02); stddev/expected=0.0283 #63 -> 64: disruption=0.50 (perfect=0.02); stddev/expected=0.0325 AT_CLEANUP AT_SETUP([hrw multipath link selection]) AT_CHECK([[test-multipath 'eth_src,50,hrw,1,0,NXM_NX_REG0[]']], [0], [ignore]) # 1 -> 2: disruption=0.50 (perfect=0.50); stddev/expected=0.0000 # 2 -> 3: disruption=0.33 (perfect=0.33); stddev/expected=0.0033 # 3 -> 4: disruption=0.25 (perfect=0.25); stddev/expected=0.0076 # 4 -> 5: disruption=0.20 (perfect=0.20); stddev/expected=0.0059 # 5 -> 6: disruption=0.17 (perfect=0.17); stddev/expected=0.0030 # 6 -> 7: disruption=0.14 (perfect=0.14); stddev/expected=0.0124 # 7 -> 8: disruption=0.13 (perfect=0.12); stddev/expected=0.0072 # 8 -> 9: disruption=0.11 (perfect=0.11); stddev/expected=0.0074 # 9 -> 10: disruption=0.10 (perfect=0.10); stddev/expected=0.0161 #10 -> 11: disruption=0.09 (perfect=0.09); stddev/expected=0.0055 #11 -> 12: disruption=0.08 (perfect=0.08); stddev/expected=0.0092 #12 -> 13: disruption=0.08 (perfect=0.08); stddev/expected=0.0134 #13 -> 14: disruption=0.07 (perfect=0.07); stddev/expected=0.0124 #14 -> 15: disruption=0.07 (perfect=0.07); stddev/expected=0.0156 #15 -> 16: disruption=0.06 (perfect=0.06); stddev/expected=0.0182 #16 -> 17: disruption=0.06 (perfect=0.06); stddev/expected=0.0150 #17 -> 18: disruption=0.06 (perfect=0.06); stddev/expected=0.0109 #18 -> 19: disruption=0.05 (perfect=0.05); stddev/expected=0.0162 #19 -> 20: disruption=0.05 (perfect=0.05); stddev/expected=0.0149 #20 -> 21: disruption=0.05 (perfect=0.05); stddev/expected=0.0148 #21 -> 22: disruption=0.05 (perfect=0.05); stddev/expected=0.0230 #22 -> 23: disruption=0.04 (perfect=0.04); stddev/expected=0.0208 #23 -> 24: disruption=0.04 (perfect=0.04); stddev/expected=0.0210 #24 -> 25: disruption=0.04 (perfect=0.04); stddev/expected=0.0228 #25 -> 26: disruption=0.04 (perfect=0.04); stddev/expected=0.0155 #26 -> 27: disruption=0.04 (perfect=0.04); stddev/expected=0.0208 #27 -> 28: disruption=0.04 (perfect=0.04); stddev/expected=0.0218 #28 -> 29: disruption=0.03 (perfect=0.03); stddev/expected=0.0193 #29 -> 30: disruption=0.03 (perfect=0.03); stddev/expected=0.0169 #30 -> 31: disruption=0.03 (perfect=0.03); stddev/expected=0.0163 #31 -> 32: disruption=0.03 (perfect=0.03); stddev/expected=0.0192 #32 -> 33: disruption=0.03 (perfect=0.03); stddev/expected=0.0212 #33 -> 34: disruption=0.03 (perfect=0.03); stddev/expected=0.0240 #34 -> 35: disruption=0.03 (perfect=0.03); stddev/expected=0.0227 #35 -> 36: disruption=0.03 (perfect=0.03); stddev/expected=0.0230 #36 -> 37: disruption=0.03 (perfect=0.03); stddev/expected=0.0183 #37 -> 38: disruption=0.03 (perfect=0.03); stddev/expected=0.0227 #38 -> 39: disruption=0.03 (perfect=0.03); stddev/expected=0.0255 #39 -> 40: disruption=0.03 (perfect=0.03); stddev/expected=0.0247 #40 -> 41: disruption=0.02 (perfect=0.02); stddev/expected=0.0228 #41 -> 42: disruption=0.02 (perfect=0.02); stddev/expected=0.0247 #42 -> 43: disruption=0.02 (perfect=0.02); stddev/expected=0.0265 #43 -> 44: disruption=0.02 (perfect=0.02); stddev/expected=0.0250 #44 -> 45: disruption=0.02 (perfect=0.02); stddev/expected=0.0258 #45 -> 46: disruption=0.02 (perfect=0.02); stddev/expected=0.0196 #46 -> 47: disruption=0.02 (perfect=0.02); stddev/expected=0.0235 #47 -> 48: disruption=0.02 (perfect=0.02); stddev/expected=0.0314 #48 -> 49: disruption=0.02 (perfect=0.02); stddev/expected=0.0293 #49 -> 50: disruption=0.02 (perfect=0.02); stddev/expected=0.0241 #50 -> 51: disruption=0.02 (perfect=0.02); stddev/expected=0.0291 #51 -> 52: disruption=0.02 (perfect=0.02); stddev/expected=0.0304 #52 -> 53: disruption=0.02 (perfect=0.02); stddev/expected=0.0307 #53 -> 54: disruption=0.02 (perfect=0.02); stddev/expected=0.0250 #54 -> 55: disruption=0.02 (perfect=0.02); stddev/expected=0.0290 #55 -> 56: disruption=0.02 (perfect=0.02); stddev/expected=0.0284 #56 -> 57: disruption=0.02 (perfect=0.02); stddev/expected=0.0272 #57 -> 58: disruption=0.02 (perfect=0.02); stddev/expected=0.0272 #58 -> 59: disruption=0.02 (perfect=0.02); stddev/expected=0.0304 #59 -> 60: disruption=0.02 (perfect=0.02); stddev/expected=0.0345 #60 -> 61: disruption=0.02 (perfect=0.02); stddev/expected=0.0251 #61 -> 62: disruption=0.02 (perfect=0.02); stddev/expected=0.0249 #62 -> 63: disruption=0.02 (perfect=0.02); stddev/expected=0.0285 #63 -> 64: disruption=0.02 (perfect=0.02); stddev/expected=0.0285 AT_CLEANUP AT_SETUP([iter_hash multipath link selection]) AT_CHECK([[test-multipath 'eth_src,50,iter_hash,1,0,NXM_NX_REG0[]']], [0], [ignore]) # 1 -> 2: disruption=0.50 (perfect=0.50); stddev/expected=0.0000 # 2 -> 3: disruption=0.42 (perfect=0.33); stddev/expected=0.0034 # 3 -> 4: disruption=0.25 (perfect=0.25); stddev/expected=0.0082 # 4 -> 5: disruption=0.42 (perfect=0.20); stddev/expected=0.0073 # 5 -> 6: disruption=0.17 (perfect=0.17); stddev/expected=0.0040 # 6 -> 7: disruption=0.14 (perfect=0.14); stddev/expected=0.0069 # 7 -> 8: disruption=0.13 (perfect=0.12); stddev/expected=0.0131 # 8 -> 9: disruption=0.45 (perfect=0.11); stddev/expected=0.0093 # 9 -> 10: disruption=0.10 (perfect=0.10); stddev/expected=0.0127 #10 -> 11: disruption=0.09 (perfect=0.09); stddev/expected=0.0134 #11 -> 12: disruption=0.08 (perfect=0.08); stddev/expected=0.0101 #12 -> 13: disruption=0.08 (perfect=0.08); stddev/expected=0.0127 #13 -> 14: disruption=0.07 (perfect=0.07); stddev/expected=0.0115 #14 -> 15: disruption=0.07 (perfect=0.07); stddev/expected=0.0100 #15 -> 16: disruption=0.06 (perfect=0.06); stddev/expected=0.0111 #16 -> 17: disruption=0.47 (perfect=0.06); stddev/expected=0.0137 #17 -> 18: disruption=0.05 (perfect=0.06); stddev/expected=0.0204 #18 -> 19: disruption=0.05 (perfect=0.05); stddev/expected=0.0082 #19 -> 20: disruption=0.05 (perfect=0.05); stddev/expected=0.0124 #20 -> 21: disruption=0.05 (perfect=0.05); stddev/expected=0.0203 #21 -> 22: disruption=0.05 (perfect=0.05); stddev/expected=0.0196 #22 -> 23: disruption=0.04 (perfect=0.04); stddev/expected=0.0183 #23 -> 24: disruption=0.04 (perfect=0.04); stddev/expected=0.0212 #24 -> 25: disruption=0.04 (perfect=0.04); stddev/expected=0.0176 #25 -> 26: disruption=0.04 (perfect=0.04); stddev/expected=0.0173 #26 -> 27: disruption=0.04 (perfect=0.04); stddev/expected=0.0159 #27 -> 28: disruption=0.03 (perfect=0.04); stddev/expected=0.0168 #28 -> 29: disruption=0.03 (perfect=0.03); stddev/expected=0.0190 #29 -> 30: disruption=0.03 (perfect=0.03); stddev/expected=0.0305 #30 -> 31: disruption=0.03 (perfect=0.03); stddev/expected=0.0282 #31 -> 32: disruption=0.03 (perfect=0.03); stddev/expected=0.0255 #32 -> 33: disruption=0.49 (perfect=0.03); stddev/expected=0.0220 #33 -> 34: disruption=0.03 (perfect=0.03); stddev/expected=0.0188 #34 -> 35: disruption=0.03 (perfect=0.03); stddev/expected=0.0203 #35 -> 36: disruption=0.03 (perfect=0.03); stddev/expected=0.0207 #36 -> 37: disruption=0.03 (perfect=0.03); stddev/expected=0.0261 #37 -> 38: disruption=0.03 (perfect=0.03); stddev/expected=0.0226 #38 -> 39: disruption=0.03 (perfect=0.03); stddev/expected=0.0233 #39 -> 40: disruption=0.03 (perfect=0.03); stddev/expected=0.0161 #40 -> 41: disruption=0.03 (perfect=0.02); stddev/expected=0.0303 #41 -> 42: disruption=0.02 (perfect=0.02); stddev/expected=0.0249 #42 -> 43: disruption=0.02 (perfect=0.02); stddev/expected=0.0262 #43 -> 44: disruption=0.02 (perfect=0.02); stddev/expected=0.0260 #44 -> 45: disruption=0.02 (perfect=0.02); stddev/expected=0.0266 #45 -> 46: disruption=0.02 (perfect=0.02); stddev/expected=0.0287 #46 -> 47: disruption=0.02 (perfect=0.02); stddev/expected=0.0213 #47 -> 48: disruption=0.02 (perfect=0.02); stddev/expected=0.0301 #48 -> 49: disruption=0.02 (perfect=0.02); stddev/expected=0.0230 #49 -> 50: disruption=0.02 (perfect=0.02); stddev/expected=0.0248 #50 -> 51: disruption=0.02 (perfect=0.02); stddev/expected=0.0203 #51 -> 52: disruption=0.02 (perfect=0.02); stddev/expected=0.0235 #52 -> 53: disruption=0.02 (perfect=0.02); stddev/expected=0.0340 #53 -> 54: disruption=0.02 (perfect=0.02); stddev/expected=0.0264 #54 -> 55: disruption=0.02 (perfect=0.02); stddev/expected=0.0292 #55 -> 56: disruption=0.02 (perfect=0.02); stddev/expected=0.0246 #56 -> 57: disruption=0.02 (perfect=0.02); stddev/expected=0.0270 #57 -> 58: disruption=0.02 (perfect=0.02); stddev/expected=0.0299 #58 -> 59: disruption=0.02 (perfect=0.02); stddev/expected=0.0307 #59 -> 60: disruption=0.02 (perfect=0.02); stddev/expected=0.0275 #60 -> 61: disruption=0.02 (perfect=0.02); stddev/expected=0.0289 #61 -> 62: disruption=0.02 (perfect=0.02); stddev/expected=0.0292 #62 -> 63: disruption=0.02 (perfect=0.02); stddev/expected=0.0292 #63 -> 64: disruption=0.02 (perfect=0.02); stddev/expected=0.0307 AT_CLEANUP AT_SETUP([multipath action missing argument]) AT_CHECK([ovs-ofctl parse-flow actions=multipath], [1], [], [ovs-ofctl: : not enough arguments to multipath action ]) AT_CLEANUP AT_SETUP([multipath action bad fields]) AT_CHECK([ovs-ofctl parse-flow 'actions=multipath(xyzzy,50,modulo_n,1,0,NXM_NX_REG0[[]])'], [1], [], [ovs-ofctl: xyzzy,50,modulo_n,1,0,NXM_NX_REG0[[]]: unknown fields `xyzzy' ]) AT_CLEANUP AT_SETUP([multipath action bad algorithm]) AT_CHECK([ovs-ofctl parse-flow 'actions=multipath(eth_src,50,fubar,1,0,NXM_NX_REG0[[]])'], [1], [], [ovs-ofctl: eth_src,50,fubar,1,0,NXM_NX_REG0[[]]: unknown algorithm `fubar' ]) AT_CLEANUP AT_SETUP([multipath action bad n_links]) AT_CHECK([ovs-ofctl parse-flow 'actions=multipath(eth_src,50,modulo_n,0,0,NXM_NX_REG0[[]])'], [1], [], [ovs-ofctl: eth_src,50,modulo_n,0,0,NXM_NX_REG0[[]]: n_links 0 is not in valid range 1 to 65536 ]) AT_CLEANUP AT_SETUP([multipath action destination too narrow]) AT_CHECK([ovs-ofctl parse-flow 'actions=multipath(eth_src,50,modulo_n,1024,0,NXM_NX_REG0[[0..7]])'], [1], [], [ovs-ofctl: eth_src,50,modulo_n,1024,0,NXM_NX_REG0[[0..7]]: 8-bit destination field has 256 possible values, less than specified n_links 1024 ]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/odp.at000066400000000000000000000301451226605124000177270ustar00rootroot00000000000000AT_BANNER([datapath parsing and formatting]) AT_SETUP([OVS datapath key parsing and formatting - valid forms]) dnl We could add a test for invalid forms, but that's less important. AT_DATA([odp-base.txt], [dnl in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x1234) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=0x80,ttl=128,frag=no) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=0x81,ttl=128,frag=no) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=0x80,ttl=128,frag=first) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=0x80,ttl=128,frag=later) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=6,tos=0,ttl=128,frag=no),tcp(src=80,dst=8080) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=17,tos=0,ttl=128,frag=no),udp(src=81,dst=6632) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=1,tos=0,ttl=128,frag=no),icmp(type=1,code=2) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=10,tclass=0x70,hlimit=128,frag=no) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=10,tclass=0x71,hlimit=128,frag=no) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=10,tclass=0x70,hlimit=128,frag=first) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=10,tclass=0x70,hlimit=128,frag=later) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=6,tclass=0,hlimit=128,frag=no),tcp(src=80,dst=8080) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=17,tclass=0,hlimit=128,frag=no),udp(src=6630,dst=22) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=58,tclass=0,hlimit=128,frag=no),icmpv6(type=1,code=2) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=58,tclass=0,hlimit=128,frag=no),icmpv6(type=135,code=0),nd(target=::3) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=58,tclass=0,hlimit=128,frag=no),icmpv6(type=135,code=0),nd(target=::3,sll=00:05:06:07:08:09) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=58,tclass=0,hlimit=128,frag=no),icmpv6(type=136,code=0),nd(target=::3,tll=00:0a:0b:0c:0d:0e) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=58,tclass=0,hlimit=128,frag=no),icmpv6(type=136,code=0),nd(target=::3,sll=00:05:06:07:08:09,tll=00:0a:0b:0c:0d:0e) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0806),arp(sip=1.2.3.4,tip=5.6.7.8,op=1,sha=00:0f:10:11:12:13,tha=00:14:15:16:17:18) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=58,tclass=0,hlimit=128,frag=no),icmpv6(type=136,code=0),nd(target=::3,sll=00:05:06:07:08:09,tll=00:0a:0b:0c:0d:0e) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x8847),mpls(label=100,tc=3,ttl=64,bos=1) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x8847),mpls(label=100,tc=7,ttl=100,bos=1) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x8847),mpls(label=100,tc=7,ttl=100,bos=0) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x8848),mpls(label=1000,tc=4,ttl=200,bos=1) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x8848),mpls(label=1000,tc=4,ttl=200,bos=0) ]) (echo '# Valid forms without tun_id or VLAN header.' set 's/^/skb_priority(0),skb_mark(0),/' odp-base.txt set ' s/^/skb_priority(0),skb_mark(0),/ ' odp-base.txt echo echo '# Valid forms with tunnel header.' sed 's/^/skb_priority(0),tunnel(tun_id=0x7f10354,src=10.10.10.10,dst=20.20.20.20,tos=0x0,ttl=64,flags(csum,key)),skb_mark(0x1234),/' odp-base.txt echo echo '# Valid forms with VLAN header.' sed 's/^/skb_priority(0),skb_mark(0),/ s/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/ s/$/)/' odp-base.txt echo echo '# Valid forms with MPLS header.' sed 's/^/skb_priority(0),skb_mark(0),/ s/\(eth([[^)]]*),?\)/\1,eth_type(0x8847),mpls(label=100,tc=7,ttl=64,bos=1)/' odp-base.txt echo echo '# Valid forms with MPLS multicast header.' sed 's/^/skb_priority(0),skb_mark(0),/ s/\(eth([[^)]]*),?\)/\1,eth_type(0x8848),mpls(label=100,tc=7,ttl=64,bos=1)/' odp-base.txt echo echo '# Valid forms with tunnel and VLAN headers.' sed 's/^/skb_priority(0),tunnel(tun_id=0xfedcba9876543210,src=10.0.0.1,dst=10.0.0.2,tos=0x8,ttl=128,flags(key)),skb_mark(0),/ s/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/ s/$/)/' odp-base.txt echo echo '# Valid forms with QOS priority, tunnel, and VLAN headers.' sed 's/^/skb_priority(0x1234),tunnel(tun_id=0xfedcba9876543210,src=10.10.10.10,dst=20.20.20.20,tos=0x8,ttl=64,flags(key)),skb_mark(0),/ s/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/ s/$/)/' odp-base.txt echo echo '# Valid forms with IP first fragment.' sed 's/^/skb_priority(0),skb_mark(0),/' odp-base.txt | sed -n 's/,frag=no),/,frag=first),/p' echo echo '# Valid forms with IP later fragment.' sed 's/^/skb_priority(0),skb_mark(0),/' odp-base.txt | sed -n 's/,frag=no),.*/,frag=later)/p' ) > odp.txt AT_CAPTURE_FILE([odp.txt]) AT_CHECK_UNQUOTED([test-odp parse-keys < odp.txt], [0], [`cat odp.txt` ]) AT_CLEANUP AT_SETUP([OVS datapath wildcarded key parsing and formatting - valid forms]) dnl We could add a test for invalid forms, but that's less important. AT_DATA([odp-base.txt], [dnl in_port(1/0xff),eth(src=00:01:02:03:04:05/ff:ff:ff:ff:ff:f0,dst=10:11:12:13:14:15/ff:ff:ff:ff:ff:f0) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x1234/0xfff0) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41/255.255.255.0,dst=172.16.0.20/255.255.255.0,proto=5/0xf0,tos=0x80/0xf0,ttl=128/0xf0,frag=no/0xf0) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=6,tos=0,ttl=128,frag=no),tcp(src=80/0xff00,dst=8080/0xff) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=17,tos=0,ttl=128,frag=no),udp(src=81/0xff00,dst=6632/0xff) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=17,tos=0,ttl=128,frag=no),udp(src=81/0xff,dst=6632/0xff00) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0800),ipv4(src=35.8.2.41,dst=172.16.0.20,proto=1,tos=0,ttl=128,frag=no),icmp(type=1/0xf0,code=2/0xff) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1/::255,dst=::2/::255,label=0/0xf0,proto=10/0xf0,tclass=0x70/0xf0,hlimit=128/0xf0,frag=no/0xf0) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=6,tclass=0,hlimit=128,frag=no),tcp(src=80/0xff00,dst=8080/0xff) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=17,tclass=0,hlimit=128,frag=no),udp(src=6630/0xff00,dst=22/0xff) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=58,tclass=0,hlimit=128,frag=no),icmpv6(type=1/0xf0,code=2/0xff) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=58,tclass=0,hlimit=128,frag=no),icmpv6(type=135,code=0),nd(target=::3/::250) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=58,tclass=0,hlimit=128,frag=no),icmpv6(type=135,code=0),nd(target=::3/::250,sll=00:05:06:07:08:09/ff:ff:ff:ff:ff:00) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=58,tclass=0,hlimit=128,frag=no),icmpv6(type=136,code=0),nd(target=::3/::250,tll=00:0a:0b:0c:0d:0e/ff:ff:ff:ff:ff:00) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=58,tclass=0,hlimit=128,frag=no),icmpv6(type=136,code=0),nd(target=::3/::250,sll=00:05:06:07:08:09/ff:ff:ff:ff:ff:00,tll=00:0a:0b:0c:0d:0e/ff:ff:ff:ff:ff:00) in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x0806),arp(sip=1.2.3.4/255.255.255.250,tip=5.6.7.8/255.255.255.250,op=1/0xf0,sha=00:0f:10:11:12:13/ff:ff:ff:ff:ff:00,tha=00:14:15:16:17:18/ff:ff:ff:ff:ff:00) skb_mark(0x1234/0xfff0),in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=58,tclass=0,hlimit=128,frag=no),icmpv6(type=136,code=0),nd(target=::3,sll=00:05:06:07:08:09,tll=00:0a:0b:0c:0d:0e) ]) (echo '# Valid forms without tun_id or VLAN header.' cat odp-base.txt echo echo '# Valid forms with tunnel header.' sed 's/^/tunnel(tun_id=0x7f10354\/0xff,src=10.10.10.10\/255.255.255.0,dst=20.20.20.20\/255.255.255.0,tos=0\/0xff,ttl=64\/0xff,flags(csum,key)),/' odp-base.txt echo echo '# Valid forms with VLAN header.' sed 's/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/ s/$/)/' odp-base.txt echo echo '# Valid forms with MPLS header.' sed 's/\(eth([[^)]]*),?\)/\1,eth_type(0x8847),mpls(label=100\/0xff,tc=7\/7,ttl=64\/0xff,bos=1\/1)/' odp-base.txt echo echo '# Valid forms with QoS priority.' sed 's/^/skb_priority(0x1234\/0xff),/' odp-base.txt echo echo '# Valid forms with tunnel and VLAN headers.' sed 's/^/tunnel(tun_id=0xfedcba9876543210,src=10.0.0.1,dst=10.0.0.2,tos=0x8,ttl=128,flags(key)),/ s/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99/0xff0,pcp=7/0xe),encap(/ s/$/)/' odp-base.txt echo echo '# Valid forms with QOS priority, tunnel, and VLAN headers.' sed 's/^/skb_priority(0x1234),tunnel(tun_id=0xfedcba9876543210,src=10.10.10.10,dst=20.20.20.20,tos=0x8,ttl=64,flags(key)),/ s/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/ s/$/)/' odp-base.txt echo echo '# Valid forms with IP first fragment.' sed -n 's/,frag=no),/,frag=first),/p' odp-base.txt echo echo '# Valid forms with IP later fragment.' sed -n 's/,frag=no),.*/,frag=later)/p' odp-base.txt) > odp.txt AT_CAPTURE_FILE([odp.txt]) AT_CHECK_UNQUOTED([test-odp parse-wc-keys < odp.txt], [0], [`cat odp.txt` ]) AT_CLEANUP AT_SETUP([OVS datapath actions parsing and formatting - valid forms]) AT_DATA([actions.txt], [dnl 1,2,3 userspace(pid=555666777) userspace(pid=6633,sFlow(vid=9,pcp=7,output=10)) userspace(pid=9765,slow_path()) userspace(pid=9765,slow_path(cfm)) userspace(pid=1234567,userdata(0102030405060708090a0b0c0d0e0f)) userspace(pid=6633,flow_sample(probability=123,collector_set_id=1234,obs_domain_id=2345,obs_point_id=3456)) userspace(pid=6633,ipfix) set(in_port(2)) set(eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15)) set(eth_type(0x1234)) set(ipv4(src=35.8.2.41,dst=172.16.0.20,proto=5,tos=0x80,ttl=128,frag=no)) set(tcp(src=80,dst=8080)) set(udp(src=81,dst=6632)) set(icmp(type=1,code=2)) set(ipv6(src=::1,dst=::2,label=0,proto=10,tclass=0x70,hlimit=128,frag=no)) set(icmpv6(type=1,code=2)) push_vlan(vid=12,pcp=0) push_vlan(vid=13,pcp=5,cfi=0) push_vlan(tpid=0x9100,vid=13,pcp=5) push_vlan(tpid=0x9100,vid=13,pcp=5,cfi=0) pop_vlan sample(sample=9.7%,actions(1,2,3,push_vlan(vid=1,pcp=2))) set(tunnel(tun_id=0xabcdef1234567890,src=1.1.1.1,dst=2.2.2.2,tos=0x0,ttl=64,flags(df,csum,key))) set(tunnel(tun_id=0xabcdef1234567890,src=1.1.1.1,dst=2.2.2.2,tos=0x0,ttl=64,flags(key))) ]) AT_CHECK_UNQUOTED([test-odp parse-actions < actions.txt], [0], [`cat actions.txt` ]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/ofp-actions.at000066400000000000000000000305071226605124000213710ustar00rootroot00000000000000AT_BANNER([OpenFlow actions]) AT_SETUP([OpenFlow 1.0 action translation]) AT_KEYWORDS([ofp-actions OF1.0]) AT_DATA([test-data], [dnl # actions=LOCAL 0000 0008 fffe 04d2 # actions=CONTROLLER:1234 0000 0008 fffd 04d2 # actions=mod_vlan_vid:9 0001 0008 0009 0000 # actions=mod_vlan_pcp:6 0002 0008 06 000000 # actions=strip_vlan 0003 0008 00000000 # actions=mod_dl_src:00:11:22:33:44:55 0004 0010 001122334455 000000000000 # actions=mod_dl_dst:10:20:30:40:50:60 0005 0010 102030405060 000000000000 # actions=mod_nw_src:1.2.3.4 0006 0008 01020304 # actions=mod_nw_dst:192.168.0.1 0007 0008 c0a80001 # actions=mod_nw_tos:48 0008 0008 30 000000 # actions=mod_tp_src:80 0009 0008 0050 0000 # actions=mod_tp_dst:443 000a 0008 01bb 0000 # actions=enqueue:10q55 000b 0010 000a 000000000000 00000037 # actions=resubmit:5 ffff 0010 00002320 0001 0005 00000000 # actions=set_tunnel:0x12345678 ffff 0010 00002320 0002 0000 12345678 # actions=set_queue:2309737729 ffff 0010 00002320 0004 0000 89abcd01 # actions=pop_queue ffff 0010 00002320 0005 000000000000 # actions=move:NXM_OF_IN_PORT[]->NXM_OF_VLAN_TCI[] ffff 0018 00002320 0006 0010 0000 0000 00000002 00000802 # actions=load:0xf009->NXM_OF_VLAN_TCI[] ffff 0018 00002320 0007 000f 00000802 000000000000f009 # actions=note:11.e9.9a.ad.67.f3 ffff 0010 00002320 0008 11e99aad67f3 # actions=set_tunnel64:0xc426384d49c53d60 ffff 0018 00002320 0009 000000000000 c426384d49c53d60 # actions=set_tunnel64:0x885f3298 ffff 0018 00002320 0009 000000000000 00000000885f3298 # actions=write_metadata:0xfedcba9876543210 ffff 0020 00002320 0016 000000000000 fedcba9876543210 ffffffffffffffff # actions=write_metadata:0xfedcba9876543210/0xffff0000ffff0000 ffff 0020 00002320 0016 000000000000 fedcba9876543210 ffff0000ffff0000 # actions=multipath(eth_src,50,modulo_n,1,0,NXM_NX_REG0[]) ffff 0020 00002320 000a 0000 0032 0000 0000 0000 0000 0000 0000 001f 00010004 # actions=bundle(eth_src,0,hrw,ofport,slaves:4,8) ffff 0028 00002320 000c 0001 0000 0000 00000002 0002 0000 00000000 00000000 dnl 0004 0008 00000000 # actions=bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[],slaves:4,8) ffff 0028 00002320 000d 0001 0000 0000 00000002 0002 001f 00010004 00000000 dnl 0004 0008 00000000 # actions=resubmit(10,5) ffff 0010 00002320 000e 000a 05 000000 # actions=output:NXM_NX_REG1[5..10] ffff 0018 00002320 000f 0145 00010204 ffff 000000000000 # actions=learn(table=2,idle_timeout=10,hard_timeout=20,fin_idle_timeout=2,fin_hard_timeout=4,priority=80,cookie=0x123456789abcdef0,NXM_OF_VLAN_TCI[0..11],NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[],output:NXM_OF_IN_PORT[]) ffff 0048 00002320 0010 000a 0014 0050 123456789abcdef0 0000 02 00 0002 0004 dnl 000c 00000802 0000 00000802 0000 dnl 0030 00000406 0000 00000206 0000 dnl 1010 00000002 0000 dnl 00000000 # actions=exit ffff 0010 00002320 0011 000000000000 # actions=dec_ttl ffff 0010 00002320 0012 000000000000 # actions=fin_timeout(idle_timeout=10,hard_timeout=20) ffff 0010 00002320 0013 000a 0014 0000 # actions=controller(reason=invalid_ttl,max_len=1234,id=5678) ffff 0010 00002320 0014 04d2 162e 02 00 # actions=dec_ttl(32768,12345,90,765,1024) ffff 0020 00002320 0015 000500000000 80003039005A02fd 0400000000000000 # actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678) ffff 0018 00002320 001d 3039 00005BA0 00008707 0000B26E ]) sed '/^[[#&]]/d' < test-data > input.txt sed -n 's/^# //p; /^$/p' < test-data > expout sed -n 's/^& //p' < test-data > experr AT_CAPTURE_FILE([input.txt]) AT_CAPTURE_FILE([expout]) AT_CAPTURE_FILE([experr]) AT_CHECK( [ovs-ofctl '-vPATTERN:console:%c|%p|%m' parse-ofp10-actions < input.txt], [0], [expout], [experr]) AT_CLEANUP AT_SETUP([OpenFlow 1.1 action translation]) AT_KEYWORDS([ofp-actions OF1.1]) AT_DATA([test-data], [dnl # actions=LOCAL 0000 0010 fffffffe 04d2 000000000000 # actions=CONTROLLER:1234 0000 0010 fffffffd 04d2 000000000000 # actions=mod_vlan_vid:9 0001 0008 0009 0000 # actions=mod_vlan_pcp:6 0002 0008 06 000000 # actions=mod_dl_src:00:11:22:33:44:55 0003 0010 001122334455 000000000000 # actions=mod_dl_dst:10:20:30:40:50:60 0004 0010 102030405060 000000000000 # actions=mod_nw_src:1.2.3.4 0005 0008 01020304 # actions=mod_nw_dst:192.168.0.1 0006 0008 c0a80001 # actions=mod_nw_tos:48 0007 0008 30 000000 # actions=mod_tp_src:80 0009 0008 0050 0000 # actions=mod_tp_dst:443 000a 0008 01bb 0000 # actions=strip_vlan 0012 0008 00000000 # actions=set_queue:2309737729 0015 0008 89abcd01 dnl 802.1ad isn't supported at the moment dnl # actions=push_vlan:0x88a8 dnl 0011 0008 88a8 0000 # actions=push_vlan:0x8100 0011 0008 8100 0000 # actions=resubmit:5 ffff 0010 00002320 0001 0005 00000000 # actions=set_tunnel:0x12345678 ffff 0010 00002320 0002 0000 12345678 # actions=pop_queue ffff 0010 00002320 0005 000000000000 # actions=move:NXM_OF_IN_PORT[]->NXM_OF_VLAN_TCI[] ffff 0018 00002320 0006 0010 0000 0000 00000002 00000802 # actions=load:0xf009->NXM_OF_VLAN_TCI[] ffff 0018 00002320 0007 000f 00000802 000000000000f009 # actions=note:11.e9.9a.ad.67.f3 ffff 0010 00002320 0008 11e99aad67f3 # actions=set_tunnel64:0xc426384d49c53d60 ffff 0018 00002320 0009 000000000000 c426384d49c53d60 # actions=set_tunnel64:0x885f3298 ffff 0018 00002320 0009 000000000000 00000000885f3298 dnl OpenFlow 1.1 uses OFPIT_WRITE_METADATA to express the NXAST_WRITE_METADATA dnl action instead, so parse-ofp11-actions will recognise and drop this action. # actions=write_metadata:0xfedcba9876543210 # 0: ff -> (none) # 1: ff -> (none) # 2: 00 -> (none) # 3: 20 -> (none) # 4: 00 -> (none) # 5: 00 -> (none) # 6: 23 -> (none) # 7: 20 -> (none) # 8: 00 -> (none) # 9: 16 -> (none) # 10: 00 -> (none) # 11: 00 -> (none) # 12: 00 -> (none) # 13: 00 -> (none) # 14: 00 -> (none) # 15: 00 -> (none) # 16: fe -> (none) # 17: dc -> (none) # 18: ba -> (none) # 19: 98 -> (none) # 20: 76 -> (none) # 21: 54 -> (none) # 22: 32 -> (none) # 23: 10 -> (none) # 24: ff -> (none) # 25: ff -> (none) # 26: ff -> (none) # 27: ff -> (none) # 28: ff -> (none) # 29: ff -> (none) # 30: ff -> (none) # 31: ff -> (none) ffff 0020 00002320 0016 000000000000 fedcba9876543210 ffffffffffffffff dnl Write-Metadata duplicated. & ofp_actions|WARN|duplicate write_metadata instruction not allowed, for OpenFlow 1.1+ compatibility # bad OF1.1 actions: OFPBAC_UNSUPPORTED_ORDER ffff 0020 00002320 0016 000000000000 fedcba9876543210 ffffffffffffffff ffff 0020 00002320 0016 000000000000 fedcba9876543210 ffffffffffffffff dnl Write-Metadata in wrong position. & ofp_actions|WARN|invalid instruction ordering: apply_actions must appear before write_metadata, for OpenFlow 1.1+ compatibility # bad OF1.1 actions: OFPBAC_UNSUPPORTED_ORDER ffff 0020 00002320 0016 000000000000 fedcba9876543210 ffffffffffffffff ffff 0010 00002320 0002 0000 12345678 # actions=multipath(eth_src,50,modulo_n,1,0,NXM_NX_REG0[]) ffff 0020 00002320 000a 0000 0032 0000 0000 0000 0000 0000 0000 001f 00010004 # actions=bundle(eth_src,0,hrw,ofport,slaves:4,8) ffff 0028 00002320 000c 0001 0000 0000 00000002 0002 0000 00000000 00000000 dnl 0004 0008 00000000 # actions=bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[],slaves:4,8) ffff 0028 00002320 000d 0001 0000 0000 00000002 0002 001f 00010004 00000000 dnl 0004 0008 00000000 # actions=resubmit(10,5) ffff 0010 00002320 000e 000a 05 000000 # actions=output:NXM_NX_REG1[5..10] ffff 0018 00002320 000f 0145 00010204 ffff 000000000000 # actions=learn(table=2,idle_timeout=10,hard_timeout=20,fin_idle_timeout=2,fin_hard_timeout=4,priority=80,cookie=0x123456789abcdef0,NXM_OF_VLAN_TCI[0..11],NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[],output:NXM_OF_IN_PORT[]) ffff 0048 00002320 0010 000a 0014 0050 123456789abcdef0 0000 02 00 0002 0004 dnl 000c 00000802 0000 00000802 0000 dnl 0030 00000406 0000 00000206 0000 dnl 1010 00000002 0000 dnl 00000000 # actions=exit ffff 0010 00002320 0011 000000000000 dnl NXAST_DEC_TTL # actions=dec_ttl ffff 0010 00002320 0012 000000000000 dnl OpenFlow 1.1 OFPAT_DEC_TTL # actions=dec_ttl 0018 0008 00000000 # actions=fin_timeout(idle_timeout=10,hard_timeout=20) ffff 0010 00002320 0013 000a 0014 0000 # actions=controller(reason=invalid_ttl,max_len=1234,id=5678) ffff 0010 00002320 0014 04d2 162e 02 00 # actions=dec_ttl(32768,12345,90,765,1024) ffff 0020 00002320 0015 000500000000 80003039005A02fd 0400000000000000 # actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678) ffff 0018 00002320 001d 3039 00005BA0 00008707 0000B26E ]) sed '/^[[#&]]/d' < test-data > input.txt sed -n 's/^# //p; /^$/p' < test-data > expout sed -n 's/^& //p' < test-data > experr AT_CAPTURE_FILE([input.txt]) AT_CAPTURE_FILE([expout]) AT_CAPTURE_FILE([experr]) AT_CHECK( [ovs-ofctl '-vPATTERN:console:%c|%p|%m' parse-ofp11-actions < input.txt], [0], [expout], [experr]) AT_CLEANUP AT_SETUP([OpenFlow 1.1 instruction translation]) AT_KEYWORDS([OF1.1 instruction ofp-actions]) AT_DATA([test-data], [dnl # actions=LOCAL 0004 0018 00000000 dnl 0000 0010 fffffffe 04d2 000000000000 dnl Apply-Actions non-zero padding # actions=drop # 0: 00 -> (none) # 1: 04 -> (none) # 2: 00 -> (none) # 3: 08 -> (none) # 4: 00 -> (none) # 5: 00 -> (none) # 6: 00 -> (none) # 7: 01 -> (none) 0004 0008 00000001 dnl Check that an empty Apply-Actions instruction gets dropped. # actions=drop # 0: 00 -> (none) # 1: 04 -> (none) # 2: 00 -> (none) # 3: 08 -> (none) # 4: 00 -> (none) # 5: 00 -> (none) # 6: 00 -> (none) # 7: 00 -> (none) 0004 0008 00000000 dnl Duplicate instruction type: # bad OF1.1 instructions: ONFBIC_DUP_INSTRUCTION 0004 0008 00000000 0004 0008 00000000 dnl Instructions not multiple of 8 in length. & ofp_actions|WARN|OpenFlow message instructions length 9 is not a multiple of 8 # bad OF1.1 instructions: OFPBIC_BAD_LEN 0004 0009 01 00000000 dnl Goto-Table instruction too long. # bad OF1.1 instructions: OFPBIC_BAD_LEN 0001 0010 01 000000 0000000000000000 dnl Goto-Table 1 instruction non-zero padding # actions=goto_table:1 # 7: 01 -> 00 0001 0008 01 000001 dnl Goto-Table 1 instruction go back to the previous table. # bad OF1.1 instructions: OFPBRC_BAD_TABLE_ID 2,0001 0008 01 000000 dnl Goto-Table 1 # actions=goto_table:1 0001 0008 01 000000 dnl Write-Metadata. # actions=write_metadata:0xfedcba9876543210 0002 0018 00000000 fedcba9876543210 ffffffffffffffff dnl Write-Metadata with mask. # actions=write_metadata:0xfedcba9876543210/0xff00ff00ff00ff00 0002 0018 00000000 fedcba9876543210 ff00ff00ff00ff00 dnl Write-Metadata too short. # bad OF1.1 instructions: OFPBIC_BAD_LEN 0002 0010 00000000 fedcba9876543210 dnl Write-Metadata too long. # bad OF1.1 instructions: OFPBIC_BAD_LEN 0002 0020 00000000 fedcba9876543210 ffffffffffffffff 0000000000000000 dnl Write-Metadata duplicated. # bad OF1.1 instructions: ONFBIC_DUP_INSTRUCTION 0002 0018 00000000 fedcba9876543210 ff00ff00ff00ff00 0002 0018 00000000 fedcba9876543210 ff00ff00ff00ff00 dnl Write-Metadata in wrong position (OpenFlow 1.1+ disregards the order dnl and OVS reorders it to the canonical order) # actions=write_metadata:0xfedcba9876543210,goto_table:1 # 1: 01 -> 02 # 3: 08 -> 18 # 4: 01 -> 00 # 8: 00 -> fe # 9: 02 -> dc # 10: 00 -> ba # 11: 18 -> 98 # 12: 00 -> 76 # 13: 00 -> 54 # 14: 00 -> 32 # 15: 00 -> 10 # 16: fe -> ff # 17: dc -> ff # 18: ba -> ff # 19: 98 -> ff # 20: 76 -> ff # 21: 54 -> ff # 22: 32 -> ff # 23: 10 -> ff # 24: ff -> 00 # 25: ff -> 01 # 26: ff -> 00 # 27: ff -> 08 # 28: ff -> 01 # 29: ff -> 00 # 30: ff -> 00 # 31: ff -> 00 0001 0008 01 000000 0002 0018 00000000 fedcba9876543210 ffffffffffffffff dnl Write-Actions not supported yet. # bad OF1.1 instructions: OFPBIC_UNSUP_INST 0003 0008 01 000000 dnl Clear-Actions too-long # bad OF1.1 instructions: OFPBIC_BAD_LEN 0005 0010 00000000 0000000000000000 dnl Clear-Actions non-zero padding # actions=clear_actions # 7: 01 -> 00 0005 0008 00000001 dnl Clear-Actions non-zero padding # actions=clear_actions # 4: 01 -> 00 0005 0008 01 000000 dnl Clear-Actions # actions=clear_actions 0005 0008 00000000 dnl Experimenter actions not supported yet. # bad OF1.1 instructions: OFPBIC_BAD_EXPERIMENTER ffff 0008 01 000000 dnl Bad instruction number (0 not assigned). # bad OF1.1 instructions: OFPBIC_UNKNOWN_INST 0000 0008 01 000000 ]) sed '/^[[#&]]/d' < test-data > input.txt sed -n 's/^# //p; /^$/p' < test-data > expout sed -n 's/^& //p' < test-data > experr AT_CAPTURE_FILE([input.txt]) AT_CAPTURE_FILE([expout]) AT_CAPTURE_FILE([experr]) AT_CHECK( [ovs-ofctl '-vPATTERN:console:%c|%p|%m' parse-ofp11-instructions < input.txt], [0], [expout], [experr]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/ofp-errors.at000066400000000000000000000154401226605124000212440ustar00rootroot00000000000000AT_BANNER([ofp-errors tests]) AT_SETUP([OFPT_ERROR with type OFPET_HELLO_FAILED - OF1.0]) AT_KEYWORDS([ofp-print ofp-errors]) AT_CHECK([ovs-ofctl ofp-print 010100170000000000000001657874726120646174610a], [0], [dnl OFPT_ERROR (xid=0x0): OFPHFC_EPERM extra data\012 ]) AT_CLEANUP AT_SETUP([OFPT_ERROR with type OFPET_HELLO_FAILED - OF1.1]) AT_KEYWORDS([ofp-print ofp-errors]) AT_CHECK([ovs-ofctl ofp-print 020100170000000000000001657874726120646174610a], [0], [dnl OFPT_ERROR (OF1.1) (xid=0x0): OFPHFC_EPERM extra data\012 ]) AT_CLEANUP AT_SETUP([OFPT_ERROR with type OFPET_BAD_REQUEST - OF1.0]) AT_KEYWORDS([ofp-print ofp-errors]) AT_CHECK([ovs-ofctl ofp-print 0101001400000000000100060105ccddeeff0011], [0], [dnl OFPT_ERROR (xid=0x0): OFPBRC_BAD_LEN OFPT_FEATURES_REQUEST (xid=0xeeff0011): (***truncated to 8 bytes from 52445***) 00000000 01 05 cc dd ee ff 00 11- |........ | ]) AT_CLEANUP AT_SETUP([OFPT_ERROR prints type of truncated inner messages]) AT_KEYWORDS([ofp-print ofp-errors]) AT_CHECK([ovs-ofctl ofp-print "0101004c092529d500010006 \ 01 06 00 e0 00 00 00 01 00 00 50 54 00 00 00 01 \ 00 00 01 00 02 00 00 00 00 00 00 87 00 00 0f ff \ ff fe 50 54 00 00 00 01 62 72 30 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 01"], [0], [dnl OFPT_ERROR (xid=0x92529d5): OFPBRC_BAD_LEN OFPT_FEATURES_REPLY (xid=0x1): (***truncated to 64 bytes from 224***) 00000000 01 06 00 e0 00 00 00 01-00 00 50 54 00 00 00 01 |..........PT....| 00000010 00 00 01 00 02 00 00 00-00 00 00 87 00 00 0f ff |................| 00000020 ff fe 50 54 00 00 00 01-62 72 30 00 00 00 00 00 |..PT....br0.....| 00000030 00 00 00 00 00 00 00 00-00 00 00 01 00 00 00 01 |................| ]) AT_CLEANUP AT_SETUP([OFPT_ERROR with code OFPBMC_BAD_PREREQ - OF1.0]) AT_KEYWORDS([ofp-print ofp-errors]) AT_CHECK([ovs-ofctl ofp-print '0101001c55555555 b0c20000 0000232000010104 0102000811111111'], [0], [dnl OFPT_ERROR (xid=0x55555555): OFPBMC_BAD_PREREQ OFPT_ECHO_REQUEST (xid=0x11111111): 0 bytes of payload ]) AT_CLEANUP AT_SETUP([OFPT_ERROR with code OFPBMC_BAD_PREREQ - OF1.1]) AT_KEYWORDS([ofp-print ofp-errors]) AT_CHECK([ovs-ofctl ofp-print '0201001c55555555 b0c20000 0000232000010104 0102000811111111'], [0], [dnl OFPT_ERROR (OF1.1) (xid=0x55555555): OFPBMC_BAD_PREREQ OFPT_ECHO_REQUEST (xid=0x11111111): 0 bytes of payload ]) AT_CLEANUP dnl Error type 3, code 1 is OFPFMFC_OVERLAP in OF1.0 dnl and OFPBIC_UNSUP_INST in OF1.1, so check that value in both versions. AT_SETUP([OFPT_ERROR with type OFPFMFC_OVERLAP - OF1.0]) AT_KEYWORDS([ofp-print ofp-errors]) AT_CHECK([ovs-ofctl ofp-print 0101001400000000000300010106ccddeeff0011], [0], [dnl OFPT_ERROR (xid=0x0): OFPFMFC_OVERLAP OFPT_FEATURES_REPLY (xid=0xeeff0011): (***truncated to 8 bytes from 52445***) 00000000 01 06 cc dd ee ff 00 11- |........ | ]) AT_CLEANUP AT_SETUP([OFPT_ERROR with type OFPBIC_UNSUP_INST - OF1.1]) AT_KEYWORDS([ofp-print ofp-errors]) AT_CHECK([ovs-ofctl ofp-print 0201001400000000000300010206ccddeeff0011], [0], [dnl OFPT_ERROR (OF1.1) (xid=0x0): OFPBIC_UNSUP_INST OFPT_FEATURES_REPLY (OF1.1) (xid=0xeeff0011): (***truncated to 8 bytes from 52445***) 00000000 02 06 cc dd ee ff 00 11- |........ | ]) AT_CLEANUP dnl OF1.1 had OFPBIC_UNSUP_EXP_INST as 3,5. dnl OF1.2 broke it into OFPBIC_BAD_EXPERIMENTER as 3,5 dnl and OFPBIC_BAD_EXT_TYPE as 3,6. dnl Thus, for OF1.1 we translate both of the latter error codes into 3,5. AT_SETUP([encoding OFPBIC_* experimenter errors]) AT_KEYWORDS([ofp-print ofp-errors]) AT_CHECK([ovs-ofctl print-error OFPBIC_BAD_EXPERIMENTER], [0], [dnl OpenFlow 1.1: vendor 0, type 3, code 5 OpenFlow 1.2: vendor 0, type 3, code 5 OpenFlow 1.3: vendor 0, type 3, code 5 ]) AT_CHECK([ovs-ofctl print-error OFPBIC_BAD_EXP_TYPE], [0], [dnl OpenFlow 1.1: vendor 0, type 3, code 5 OpenFlow 1.2: vendor 0, type 3, code 6 OpenFlow 1.3: vendor 0, type 3, code 6 ]) AT_CLEANUP dnl The "bad role" error was a Nicira extension in OpenFlow 1.0 and 1.1. dnl It was adopted as an official error code in OpenFlow 1.2. AT_SETUP([encoding errors extension that became official]) AT_KEYWORDS([ofp-print ofp-errors]) AT_CHECK( [ovs-ofctl encode-error-reply OFPRRFC_BAD_ROLE 0100000812345678], [0], [dnl 00000000 01 01 00 1c 12 34 56 78-b0 c2 00 00 00 00 23 20 @&t@ 00000010 00 01 02 01 01 00 00 08-12 34 56 78 @&t@ ]) AT_CHECK( [ovs-ofctl encode-error-reply OFPRRFC_BAD_ROLE 0200000812345678], [0], [dnl 00000000 02 01 00 1c 12 34 56 78-b0 c2 00 00 00 00 23 20 @&t@ 00000010 00 01 02 01 02 00 00 08-12 34 56 78 @&t@ ]) AT_CHECK( [ovs-ofctl encode-error-reply OFPRRFC_BAD_ROLE 0300000812345678], [0], [dnl 00000000 03 01 00 14 12 34 56 78-00 0b 00 02 03 00 00 08 @&t@ 00000010 12 34 56 78 @&t@ ]) AT_CLEANUP AT_SETUP([decoding OFPBIC_* experimenter errors]) AT_KEYWORDS([ofp-print ofp-errors]) AT_CHECK([ovs-ofctl ofp-print '0201001455555555 00030005 0102000811111111'], [0], [dnl OFPT_ERROR (OF1.1) (xid=0x55555555): OFPBIC_BAD_EXPERIMENTER OFPT_ECHO_REQUEST (xid=0x11111111): 0 bytes of payload ]) AT_CHECK([ovs-ofctl ofp-print '0301001455555555 00030005 0102000811111111'], [0], [dnl OFPT_ERROR (OF1.2) (xid=0x55555555): OFPBIC_BAD_EXPERIMENTER OFPT_ECHO_REQUEST (xid=0x11111111): 0 bytes of payload ]) AT_CHECK([ovs-ofctl ofp-print '0301001455555555 00030006 0102000811111111'], [0], [dnl OFPT_ERROR (OF1.2) (xid=0x55555555): OFPBIC_BAD_EXP_TYPE OFPT_ECHO_REQUEST (xid=0x11111111): 0 bytes of payload ]) AT_CLEANUP AT_SETUP([decoding experimenter errors]) AT_KEYWORDS([ofp-print ofp-errors]) AT_CHECK([ovs-ofctl ofp-print '0101001c55555555 b0c20000 0000232000010203 0102000811111111'], [0], [dnl OFPT_ERROR (xid=0x55555555): NXBRC_MUST_BE_ZERO OFPT_ECHO_REQUEST (xid=0x11111111): 0 bytes of payload ]) AT_CHECK([ovs-ofctl ofp-print '0201001c55555555 b0c20000 0000232000010203 0102000811111111'], [0], [dnl OFPT_ERROR (OF1.1) (xid=0x55555555): NXBRC_MUST_BE_ZERO OFPT_ECHO_REQUEST (xid=0x11111111): 0 bytes of payload ]) AT_CHECK([ovs-ofctl ofp-print '0301001855555555 ffff0004 00002320 0102000811111111'], [0], [dnl OFPT_ERROR (OF1.2) (xid=0x55555555): NXBRC_MUST_BE_ZERO OFPT_ECHO_REQUEST (xid=0x11111111): 0 bytes of payload ]) AT_CLEANUP AT_SETUP([encoding experimenter errors]) AT_KEYWORDS([ofp-print ofp-errors]) AT_CHECK( [ovs-ofctl encode-error-reply NXBRC_MUST_BE_ZERO 0100000812345678], [0], [dnl 00000000 01 01 00 1c 12 34 56 78-b0 c2 00 00 00 00 23 20 @&t@ 00000010 00 01 02 03 01 00 00 08-12 34 56 78 @&t@ ]) AT_CHECK( [ovs-ofctl encode-error-reply NXBRC_MUST_BE_ZERO 0200000812345678], [0], [dnl 00000000 02 01 00 1c 12 34 56 78-b0 c2 00 00 00 00 23 20 @&t@ 00000010 00 01 02 03 02 00 00 08-12 34 56 78 @&t@ ]) AT_CHECK( [ovs-ofctl encode-error-reply NXBRC_MUST_BE_ZERO 0300000812345678], [0], [dnl 00000000 03 01 00 18 12 34 56 78-ff ff 00 04 00 00 23 20 @&t@ 00000010 03 00 00 08 12 34 56 78- ]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/ofp-print.at000066400000000000000000002704411226605124000210700ustar00rootroot00000000000000AT_BANNER([ofp-print]) AT_SETUP([empty]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print ''], [0], [OpenFlow message is empty ]) AT_CLEANUP AT_SETUP([too short]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print aabb], [0], [dnl OpenFlow packet too short (only 2 bytes): 00000000 aa bb |.. | ]) AT_CLEANUP AT_SETUP([wrong OpenFlow version]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print 00bb0008eeff0011], [0], [dnl ***decode error: OFPBRC_BAD_TYPE*** 00000000 00 bb 00 08 ee ff 00 11- |........ | ], [ofp_msgs|WARN|unknown OpenFlow message (version 0, type 187) ]) AT_CLEANUP AT_SETUP([truncated message]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print 0110ccddeeff0011], [0], [dnl (***truncated to 8 bytes from 52445***) 00000000 01 10 cc dd ee ff 00 11- |........ | ]) AT_CLEANUP AT_SETUP([message only uses part of buffer]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print 01100009eeff00112233], [0], [dnl (***only uses 9 bytes out of 10***) 00000000 01 10 00 09 ee ff 00 11-22 33 |........"3 | ]) # " AT_CLEANUP AT_SETUP([OFPT_HELLO - ordinary]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print 0100000800000000], [0], [dnl OFPT_HELLO (xid=0x0): version bitmap: 0x01 ]) AT_CLEANUP AT_SETUP([OFPT_HELLO with extra data]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print 0100001300000000657874726120646174610a], [0], [dnl OFPT_HELLO (xid=0x0): version bitmap: 0x01 unknown data in hello: 00000000 01 00 00 13 00 00 00 00-65 78 74 72 61 20 64 61 |........extra da| 00000010 74 61 0a |ta. | ]) AT_CLEANUP AT_SETUP([OFPT_HELLO with version bitmap]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "01 00 00 10 00 00 00 00 00 01 00 08 00 00 00 f0"], [0], [dnl OFPT_HELLO (xid=0x0): version bitmap: 0x04, 0x05, 0x06, 0x07 ]) AT_CLEANUP AT_SETUP([OFPT_HELLO with version bitmap and extra data]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 00 00 1b 00 00 00 00 ff ff 00 06 01 02 00 00 \ 00 01 00 08 00 00 00 f0 61 62 63"], [0], [dnl OFPT_HELLO (xid=0x0): version bitmap: 0x04, 0x05, 0x06, 0x07 unknown data in hello: 00000000 01 00 00 1b 00 00 00 00-ff ff 00 06 01 02 00 00 |................| 00000010 00 01 00 08 00 00 00 f0-61 62 63 |........abc | ]) AT_CLEANUP AT_SETUP([OFPT_HELLO with higher than supported version]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "0f 00 00 08 00 00 00 00"], [0], [dnl OFPT_HELLO (OF 0x0f) (xid=0x0): version bitmap: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f ]) AT_CHECK([ovs-ofctl ofp-print "40 00 00 08 00 00 00 00"], [0], [dnl OFPT_HELLO (OF 0x40) (xid=0x0): version bitmap: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f ]) AT_CHECK([ovs-ofctl ofp-print "3f 00 00 18 00 00 00 00 00 01 00 0c aa aa aa aa aa aa aa aa 00 00 00 00"], [0], [dnl OFPT_HELLO (OF 0x3f) (xid=0x0): version bitmap: 0x01, 0x03, 0x05, 0x07, 0x09, 0x0b, 0x0d, 0x0f, 0x11, 0x13, 0x15, 0x17, 0x19, 0x1b, 0x1d, 0x1f ]) AT_CLEANUP AT_SETUP([OFPT_HELLO with contradictory version bitmaps]) AT_KEYWORDS([ofp-print]) dnl Bitmap claims support for no versions at all. AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "01 00 00 10 00 00 00 00 00 01 00 08 00 00 00 00"], [0], [OFPT_HELLO (xid=0x0): version bitmap: 0x01 unknown data in hello: 00000000 01 00 00 10 00 00 00 00-00 01 00 08 00 00 00 00 |................| ], [dnl ofp_util|WARN|peer does not support any OpenFlow version (between 0x01 and 0x1f) ]) dnl Bitmap claims support for only versions above 0x1f. AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "3f 00 00 18 00 00 00 00 00 01 00 0c 00 00 00 00 aa aa aa aa 00 00 00 00"], [0], [OFPT_HELLO (OF 0x3f) (xid=0x0): version bitmap: 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f unknown data in hello: 00000000 3f 00 00 18 00 00 00 00-00 01 00 0c 00 00 00 00 |?...............| 00000010 aa aa aa aa 00 00 00 00- |........ | ], [dnl ofp_util|WARN|peer does not support any OpenFlow version (between 0x01 and 0x1f) ]) dnl Bitmap claims support for nonexistent version 0x00. AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "01 00 00 10 00 00 00 00 00 01 00 08 00 00 00 f1"], [0], [dnl OFPT_HELLO (xid=0x0): version bitmap: 0x04, 0x05, 0x06, 0x07 ], [dnl ofp_util|WARN|peer claims to support invalid OpenFlow version 0x00 ]) dnl Bitmap claims support for only nonexistent version 0x00. AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "01 00 00 10 00 00 00 00 00 01 00 08 00 00 00 01"], [0], [dnl OFPT_HELLO (xid=0x0): version bitmap: 0x01 unknown data in hello: 00000000 01 00 00 10 00 00 00 00-00 01 00 08 00 00 00 01 |................| ], [dnl ofp_util|WARN|peer claims to support invalid OpenFlow version 0x00 ofp_util|WARN|peer does not support any OpenFlow version (between 0x01 and 0x1f) ]) AT_CLEANUP dnl OFPT_ERROR tests are in ofp-errors.at. AT_SETUP([OFPT_ECHO_REQUEST, empty payload]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print '01 02 00 08 00 00 00 01'], [0], [dnl OFPT_ECHO_REQUEST (xid=0x1): 0 bytes of payload ]) AT_CLEANUP AT_SETUP([OFPT_ECHO_REQUEST, 5-byte payload]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print '0102000d00000001 25 53 54 1a 9d'], [0], [dnl OFPT_ECHO_REQUEST (xid=0x1): 5 bytes of payload 00000000 25 53 54 1a 9d |%ST.. | ]) AT_CLEANUP AT_SETUP([OFPT_ECHO_REPLY, empty payload]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print '01 03 00 08 00 00 00 01'], [0], [dnl OFPT_ECHO_REPLY (xid=0x1): 0 bytes of payload ]) AT_CLEANUP AT_SETUP([OFPT_ECHO_REPLY, 5-byte payload]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print '0103000d0000000ba330efaf9e'], [0], [dnl OFPT_ECHO_REPLY (xid=0xb): 5 bytes of payload 00000000 a3 30 ef af 9e |.0... | ]) AT_CLEANUP AT_SETUP([OFPT_FEATURES_REQUEST]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print '0105000800000001'], [0], [dnl OFPT_FEATURES_REQUEST (xid=0x1): ]) AT_CLEANUP AT_SETUP([OFPT_FEATURES_REPLY - OF1.0]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 06 00 e0 00 00 00 01 00 00 50 54 00 00 00 01 \ 00 00 01 00 02 00 00 00 00 00 00 87 00 00 0f ff \ ff fe 50 54 00 00 00 01 62 72 30 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 01 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 03 50 54 00 00 00 01 65 74 68 30 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 02 08 00 00 02 8f 00 00 02 8f 00 00 00 00 \ 00 02 50 54 00 00 00 03 65 74 68 32 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 02 08 00 00 02 8f 00 00 02 8f 00 00 00 00 \ 00 01 50 54 00 00 00 02 65 74 68 31 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 02 08 00 00 02 8f 00 00 02 8f 00 00 00 00 \ "], [0], [dnl OFPT_FEATURES_REPLY (xid=0x1): dpid:0000505400000001 n_tables:2, n_buffers:256 capabilities: FLOW_STATS TABLE_STATS PORT_STATS ARP_MATCH_IP actions: OUTPUT SET_VLAN_VID SET_VLAN_PCP STRIP_VLAN SET_DL_SRC SET_DL_DST SET_NW_SRC SET_NW_DST SET_NW_TOS SET_TP_SRC SET_TP_DST ENQUEUE 1(eth1): addr:50:54:00:00:00:02 config: 0 state: 0 current: 100MB-FD AUTO_NEG advertised: 10MB-HD 10MB-FD 100MB-HD 100MB-FD COPPER AUTO_NEG supported: 10MB-HD 10MB-FD 100MB-HD 100MB-FD COPPER AUTO_NEG speed: 100 Mbps now, 100 Mbps max 2(eth2): addr:50:54:00:00:00:03 config: 0 state: 0 current: 100MB-FD AUTO_NEG advertised: 10MB-HD 10MB-FD 100MB-HD 100MB-FD COPPER AUTO_NEG supported: 10MB-HD 10MB-FD 100MB-HD 100MB-FD COPPER AUTO_NEG speed: 100 Mbps now, 100 Mbps max 3(eth0): addr:50:54:00:00:00:01 config: 0 state: 0 current: 100MB-FD AUTO_NEG advertised: 10MB-HD 10MB-FD 100MB-HD 100MB-FD COPPER AUTO_NEG supported: 10MB-HD 10MB-FD 100MB-HD 100MB-FD COPPER AUTO_NEG speed: 100 Mbps now, 100 Mbps max LOCAL(br0): addr:50:54:00:00:00:01 config: PORT_DOWN state: LINK_DOWN speed: 0 Mbps now, 0 Mbps max ]) AT_CLEANUP AT_SETUP([OFPT_FEATURES_REPLY cut off mid-port - OF1.0]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 06 00 dc 00 00 00 01 00 00 50 54 00 00 00 01 \ 00 00 01 00 02 00 00 00 00 00 00 87 00 00 0f ff \ ff fe 50 54 00 00 00 01 62 72 30 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 01 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 03 50 54 00 00 00 01 65 74 68 30 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 02 08 00 00 02 8f 00 00 02 8f 00 00 00 00 \ 00 02 50 54 00 00 00 03 65 74 68 32 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 02 08 00 00 02 8f 00 00 02 8f 00 00 00 00 \ 00 01 50 54 00 00 00 02 65 74 68 31 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 02 08 00 00 02 8f 00 00 02 8f \ "], [0], [dnl ***decode error: OFPBRC_BAD_LEN*** 00000000 01 06 00 dc 00 00 00 01-00 00 50 54 00 00 00 01 |..........PT....| 00000010 00 00 01 00 02 00 00 00-00 00 00 87 00 00 0f ff |................| 00000020 ff fe 50 54 00 00 00 01-62 72 30 00 00 00 00 00 |..PT....br0.....| 00000030 00 00 00 00 00 00 00 00-00 00 00 01 00 00 00 01 |................| 00000040 00 00 00 00 00 00 00 00-00 00 00 00 00 00 00 00 |................| 00000050 00 03 50 54 00 00 00 01-65 74 68 30 00 00 00 00 |..PT....eth0....| 00000060 00 00 00 00 00 00 00 00-00 00 00 00 00 00 00 00 |................| 00000070 00 00 02 08 00 00 02 8f-00 00 02 8f 00 00 00 00 |................| 00000080 00 02 50 54 00 00 00 03-65 74 68 32 00 00 00 00 |..PT....eth2....| 00000090 00 00 00 00 00 00 00 00-00 00 00 00 00 00 00 00 |................| 000000a0 00 00 02 08 00 00 02 8f-00 00 02 8f 00 00 00 00 |................| 000000b0 00 01 50 54 00 00 00 02-65 74 68 31 00 00 00 00 |..PT....eth1....| 000000c0 00 00 00 00 00 00 00 00-00 00 00 00 00 00 00 00 |................| 000000d0 00 00 02 08 00 00 02 8f-00 00 02 8f |............ | ], [stderr]) AT_CHECK([sed 's/.*|//' stderr], [0], [dnl received OFPT_FEATURES_REPLY with incorrect length 220 (must be exactly 32 bytes or longer by an integer multiple of 48 bytes) ]) AT_CLEANUP AT_SETUP([OFPT_FEATURES_REPLY - OF1.1]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 02 06 00 a0 00 00 00 01 00 00 50 54 00 00 00 01 \ 00 00 01 00 02 00 00 00 00 00 00 87 00 00 00 00 \ ff ff ff fe 00 00 00 00 50 54 00 00 00 01 00 00 \ 62 72 30 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 01 00 00 00 01 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 01 86 a0 00 01 86 a0 \ 00 00 00 03 00 00 00 00 50 54 00 00 00 01 00 00 \ 65 74 68 30 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 20 08 00 00 28 0f \ 00 00 28 0f 00 00 00 00 00 01 86 a0 00 01 86 a0 \ "], [0], [dnl OFPT_FEATURES_REPLY (OF1.1) (xid=0x1): dpid:0000505400000001 n_tables:2, n_buffers:256 capabilities: FLOW_STATS TABLE_STATS PORT_STATS ARP_MATCH_IP 3(eth0): addr:50:54:00:00:00:01 config: 0 state: 0 current: 100MB-FD AUTO_NEG advertised: 10MB-HD 10MB-FD 100MB-HD 100MB-FD COPPER AUTO_NEG supported: 10MB-HD 10MB-FD 100MB-HD 100MB-FD COPPER AUTO_NEG speed: 100 Mbps now, 100 Mbps max LOCAL(br0): addr:50:54:00:00:00:01 config: PORT_DOWN state: LINK_DOWN speed: 100 Mbps now, 100 Mbps max ]) AT_CLEANUP AT_SETUP([OFPT_FEATURES_REPLY cut off mid-port - OF1.1]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 02 06 00 90 00 00 00 01 00 00 50 54 00 00 00 01 \ 00 00 01 00 02 00 00 00 00 00 00 87 00 00 00 00 \ ff ff ff fe 00 00 00 00 50 54 00 00 00 01 00 00 \ 62 72 30 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 01 00 00 00 01 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 01 86 a0 00 01 86 a0 \ 00 00 00 03 00 00 00 00 50 54 00 00 00 01 00 00 \ 65 74 68 30 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 20 08 00 00 28 0f \ "], [0], [dnl ***decode error: OFPBRC_BAD_LEN*** 00000000 02 06 00 90 00 00 00 01-00 00 50 54 00 00 00 01 |..........PT....| 00000010 00 00 01 00 02 00 00 00-00 00 00 87 00 00 00 00 |................| 00000020 ff ff ff fe 00 00 00 00-50 54 00 00 00 01 00 00 |........PT......| 00000030 62 72 30 00 00 00 00 00-00 00 00 00 00 00 00 00 |br0.............| 00000040 00 00 00 01 00 00 00 01-00 00 00 00 00 00 00 00 |................| 00000050 00 00 00 00 00 00 00 00-00 01 86 a0 00 01 86 a0 |................| 00000060 00 00 00 03 00 00 00 00-50 54 00 00 00 01 00 00 |........PT......| 00000070 65 74 68 30 00 00 00 00-00 00 00 00 00 00 00 00 |eth0............| 00000080 00 00 00 00 00 00 00 00-00 00 20 08 00 00 28 0f |.......... ...@{:@.| ], [stderr]) AT_CHECK([sed 's/.*|//' stderr], [0], [dnl received OFPT_FEATURES_REPLY with incorrect length 144 (must be exactly 32 bytes or longer by an integer multiple of 64 bytes) ]) AT_CLEANUP AT_SETUP([OFPT_FEATURES_REPLY - OF1.2]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 03 06 00 a0 00 00 00 01 00 00 50 54 00 00 00 01 \ 00 00 01 00 ff 00 00 00 00 00 01 77 00 00 00 00 \ ff ff ff fe 00 00 00 00 50 54 00 00 00 01 00 00 \ 62 72 30 0a 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 01 00 00 00 01 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 01 86 a0 00 01 86 a0 \ 00 00 00 03 00 00 00 00 50 54 00 00 00 01 00 00 \ 65 74 68 30 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 20 08 00 00 28 0f \ 00 00 28 0f 00 00 00 00 00 01 86 a0 00 01 86 a0 \ "], [0], [dnl OFPT_FEATURES_REPLY (OF1.2) (xid=0x1): dpid:0000505400000001 n_tables:255, n_buffers:256 capabilities: FLOW_STATS TABLE_STATS PORT_STATS IP_REASM QUEUE_STATS PORT_BLOCKED 3(eth0): addr:50:54:00:00:00:01 config: 0 state: 0 current: 100MB-FD AUTO_NEG advertised: 10MB-HD 10MB-FD 100MB-HD 100MB-FD COPPER AUTO_NEG supported: 10MB-HD 10MB-FD 100MB-HD 100MB-FD COPPER AUTO_NEG speed: 100 Mbps now, 100 Mbps max LOCAL(br0): addr:50:54:00:00:00:01 config: PORT_DOWN state: LINK_DOWN speed: 100 Mbps now, 100 Mbps max ]) AT_CLEANUP AT_SETUP([OFPT_FEATURES_REPLY cut off mid-port - OF1.2]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 03 06 00 a0 00 00 00 01 00 00 50 54 00 00 00 01 \ 00 00 01 00 ff 00 00 00 00 00 01 77 00 00 00 00 \ ff ff ff fe 00 00 00 00 50 54 00 00 00 01 00 00 \ 62 72 30 0a 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 01 00 00 00 01 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 01 86 a0 00 01 86 a0 \ 00 00 00 03 00 00 00 00 50 54 00 00 00 01 00 00 \ 65 74 68 30 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 20 08 00 00 28 0f \ "], [0], [dnl OFPT_FEATURES_REPLY (OF1.2) (xid=0x1): (***truncated to 144 bytes from 160***) 00000000 03 06 00 a0 00 00 00 01-00 00 50 54 00 00 00 01 |..........PT....| 00000010 00 00 01 00 ff 00 00 00-00 00 01 77 00 00 00 00 |...........w....| 00000020 ff ff ff fe 00 00 00 00-50 54 00 00 00 01 00 00 |........PT......| 00000030 62 72 30 0a 00 00 00 00-00 00 00 00 00 00 00 00 |br0.............| 00000040 00 00 00 01 00 00 00 01-00 00 00 00 00 00 00 00 |................| 00000050 00 00 00 00 00 00 00 00-00 01 86 a0 00 01 86 a0 |................| 00000060 00 00 00 03 00 00 00 00-50 54 00 00 00 01 00 00 |........PT......| 00000070 65 74 68 30 00 00 00 00-00 00 00 00 00 00 00 00 |eth0............| 00000080 00 00 00 00 00 00 00 00-00 00 20 08 00 00 28 0f |.......... ...@{:@.| ], [stderr]) AT_CHECK([sed 's/.*|//' stderr], [0], [dnl ]) AT_CLEANUP AT_SETUP([OFPT_FEATURES_REPLY - OF1.3]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 04 06 00 20 00 00 00 01 00 00 50 54 00 00 00 01 \ 00 00 01 00 ff 00 00 00 00 00 01 77 00 00 00 00 \ "], [0], [dnl OFPT_FEATURES_REPLY (OF1.3) (xid=0x1): dpid:0000505400000001 n_tables:255, n_buffers:256 capabilities: FLOW_STATS TABLE_STATS PORT_STATS IP_REASM QUEUE_STATS PORT_BLOCKED ]) AT_CLEANUP AT_SETUP([OFPT_FEATURES_REPLY - with auxiliary_id - OF1.3]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 04 06 00 20 00 00 00 01 00 00 50 54 00 00 00 01 \ 00 00 01 00 ff 01 00 00 00 00 01 77 00 00 00 00 \ "], [0], [dnl OFPT_FEATURES_REPLY (OF1.3) (xid=0x1): dpid:0000505400000001 n_tables:255, n_buffers:256, auxiliary_id:1 capabilities: FLOW_STATS TABLE_STATS PORT_STATS IP_REASM QUEUE_STATS PORT_BLOCKED ]) AT_CLEANUP AT_SETUP([OFPT_GET_CONFIG_REQUEST]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print '0107000800000001'], [0], [dnl OFPT_GET_CONFIG_REQUEST (xid=0x1): ]) AT_CLEANUP AT_SETUP([OFPT_GET_CONFIG_REPLY, most common form]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print '01 08 00 0c 00 00 00 03 00 00 00 00'], [0], [dnl OFPT_GET_CONFIG_REPLY (xid=0x3): frags=normal miss_send_len=0 ]) AT_CLEANUP AT_SETUP([OFPT_GET_CONFIG_REPLY, frags and miss_send_len]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print '01 08 00 0c 00 00 00 03 00 02 00 ff'], [0], [dnl OFPT_GET_CONFIG_REPLY (xid=0x3): frags=reassemble miss_send_len=255 ]) AT_CLEANUP AT_SETUP([OFPT_PACKET_IN - OF1.0]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 0a 00 4e 00 00 00 00 00 00 01 11 00 3c 00 03 \ 00 00 50 54 00 00 00 06 50 54 00 00 00 05 08 00 \ 45 00 00 28 bd 12 00 00 40 06 3c 6a c0 a8 00 01 \ c0 a8 00 02 27 2f 00 00 78 50 cc 5b 57 af 42 1e \ 50 00 02 00 26 e8 00 00 00 00 00 00 00 00 \ "], [0], [dnl OFPT_PACKET_IN (xid=0x0): total_len=60 in_port=3 (via no_match) data_len=60 buffer=0x00000111 tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=10031,tp_dst=0 tcp_csum:26e8 ]) AT_CLEANUP AT_SETUP([OFPT_PACKET_IN - OF1.0, with hex output of packet data)]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 0a 00 4e 00 00 00 00 00 00 01 11 00 3c 00 03 \ 00 00 50 54 00 00 00 06 50 54 00 00 00 05 08 00 \ 45 00 00 28 bd 12 00 00 40 06 3c 6a c0 a8 00 01 \ c0 a8 00 02 27 2f 00 00 78 50 cc 5b 57 af 42 1e \ 50 00 02 00 26 e8 00 00 00 00 00 00 00 00 \ " 3], [0], [dnl OFPT_PACKET_IN (xid=0x0): total_len=60 in_port=3 (via no_match) data_len=60 buffer=0x00000111 tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=10031,tp_dst=0 tcp_csum:26e8 00000000 50 54 00 00 00 06 50 54-00 00 00 05 08 00 45 00 00000010 00 28 bd 12 00 00 40 06-3c 6a c0 a8 00 01 c0 a8 00000020 00 02 27 2f 00 00 78 50-cc 5b 57 af 42 1e 50 00 00000030 02 00 26 e8 00 00 00 00-00 00 00 00 ]) AT_CLEANUP AT_SETUP([OFPT_PACKET_IN - OF1.2]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 03 0a 00 4c 00 00 00 00 ff ff ff 00 00 2a 00 00 \ 00 01 00 0c 80 00 00 04 ff ff ff fe 00 00 00 00 \ 00 00 ff ff ff ff ff ff 00 23 20 83 c1 5f 80 35 \ 00 01 08 00 06 04 00 01 00 23 20 83 c1 5f 00 00 \ 00 00 00 23 20 83 c1 5f 00 00 00 00 \ "], [0], [dnl OFPT_PACKET_IN (OF1.2) (xid=0x0): total_len=42 in_port=LOCAL (via no_match) data_len=42 buffer=0xffffff00 rarp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:23:20:83:c1:5f,dl_dst=ff:ff:ff:ff:ff:ff,arp_spa=0.0.0.0,arp_tpa=0.0.0.0,arp_op=1,arp_sha=00:23:20:83:c1:5f,arp_tha=00:23:20:83:c1:5f ]) AT_CLEANUP AT_SETUP([OFPT_PACKET_IN - OF1.2, with hex output of packet data]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 03 0a 00 4c 00 00 00 00 ff ff ff 00 00 2a 00 00 \ 00 01 00 0c 80 00 00 04 ff ff ff fe 00 00 00 00 \ 00 00 ff ff ff ff ff ff 00 23 20 83 c1 5f 80 35 \ 00 01 08 00 06 04 00 03 00 23 20 83 c1 5f 00 00 \ 00 00 00 23 20 83 c1 5f 00 00 00 00 \ " 3], [0], [dnl OFPT_PACKET_IN (OF1.2) (xid=0x0): total_len=42 in_port=LOCAL (via no_match) data_len=42 buffer=0xffffff00 rarp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:23:20:83:c1:5f,dl_dst=ff:ff:ff:ff:ff:ff,arp_spa=0.0.0.0,arp_tpa=0.0.0.0,arp_op=3,arp_sha=00:23:20:83:c1:5f,arp_tha=00:23:20:83:c1:5f 00000000 ff ff ff ff ff ff 00 23-20 83 c1 5f 80 35 00 01 00000010 08 00 06 04 00 03 00 23-20 83 c1 5f 00 00 00 00 00000020 00 23 20 83 c1 5f 00 00-00 00 ]) AT_CLEANUP AT_SETUP([OFPT_PACKET_IN - OF1.3]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 04 0a 00 54 00 00 00 00 ff ff ff 00 00 2a 00 00 \ 01 02 03 04 05 06 07 08 00 01 00 0c 80 00 00 04 \ ff ff ff fe 00 00 00 00 00 00 ff ff ff ff ff ff \ 00 23 20 83 c1 5f 80 35 00 01 08 00 06 04 00 03 \ 00 23 20 83 c1 5f 00 00 00 00 00 23 20 83 c1 5f \ 00 00 00 00 \ "], [0], [dnl OFPT_PACKET_IN (OF1.3) (xid=0x0): cookie=0x102030405060708 total_len=42 in_port=LOCAL (via no_match) data_len=42 buffer=0xffffff00 rarp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:23:20:83:c1:5f,dl_dst=ff:ff:ff:ff:ff:ff,arp_spa=0.0.0.0,arp_tpa=0.0.0.0,arp_op=3,arp_sha=00:23:20:83:c1:5f,arp_tha=00:23:20:83:c1:5f ]) AT_CLEANUP AT_SETUP([OFPT_PACKET_IN - OF1.3, with hex output of packet data]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 04 0a 00 54 00 00 00 00 ff ff ff 00 00 2a 00 00 \ 01 02 03 04 05 06 07 08 00 01 00 0c 80 00 00 04 \ ff ff ff fe 00 00 00 00 00 00 ff ff ff ff ff ff \ 00 23 20 83 c1 5f 80 35 00 01 08 00 06 04 00 03 \ 00 23 20 83 c1 5f 00 00 00 00 00 23 20 83 c1 5f \ 00 00 00 00 \ " 3], [0], [dnl OFPT_PACKET_IN (OF1.3) (xid=0x0): cookie=0x102030405060708 total_len=42 in_port=LOCAL (via no_match) data_len=42 buffer=0xffffff00 rarp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:23:20:83:c1:5f,dl_dst=ff:ff:ff:ff:ff:ff,arp_spa=0.0.0.0,arp_tpa=0.0.0.0,arp_op=3,arp_sha=00:23:20:83:c1:5f,arp_tha=00:23:20:83:c1:5f 00000000 ff ff ff ff ff ff 00 23-20 83 c1 5f 80 35 00 01 00000010 08 00 06 04 00 03 00 23-20 83 c1 5f 00 00 00 00 00000020 00 23 20 83 c1 5f 00 00-00 00 ]) AT_CLEANUP AT_SETUP([OFPT_FLOW_REMOVED - OF1.0]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 0b 00 58 00 00 00 00 00 00 00 00 00 03 50 54 \ 00 00 00 05 50 54 00 00 00 06 ff ff 00 00 08 06 \ 00 02 00 00 c0 a8 00 01 c0 a8 00 02 00 00 00 00 \ 00 00 00 00 00 00 00 00 ff ff 00 00 00 00 00 05 \ 30 e0 35 00 00 05 00 00 00 00 00 00 00 00 00 01 \ 00 00 00 00 00 00 00 3c \ "], [0], [dnl OFPT_FLOW_REMOVED (xid=0x0): priority=65535,arp,in_port=3,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06,arp_spa=192.168.0.1,arp_tpa=192.168.0.2,arp_op=2,nw_tos=0,tp_src=0,tp_dst=0 reason=idle duration5.82s idle5 pkts1 bytes60 ]) AT_CLEANUP AT_SETUP([OFPT_FLOW_REMOVED - OF1.2]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 03 0b 00 40 00 00 00 00 fe dc ba 98 76 54 32 10 \ 80 00 01 05 00 00 00 01 00 98 96 80 00 3c 00 78 \ 00 00 00 00 00 12 d6 87 00 00 00 00 6f 68 ba 66 \ 00 01 00 0a 80 00 0c 02 10 09 00 00 00 00 00 00"], [0], [dnl OFPT_FLOW_REMOVED (OF1.2) (xid=0x0): dl_vlan=9 reason=hard table_id=5 cookie:0xfedcba9876543210 duration1.01s idle60 hard120 pkts1234567 bytes1869134438 ]) AT_CLEANUP AT_SETUP([OFPT_FLOW_REMOVED - OF1.3]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 04 0b 00 40 00 00 00 00 fe dc ba 98 76 54 32 10 \ 80 00 01 05 00 00 00 01 00 98 96 80 00 3c 00 78 \ 00 00 00 00 00 12 d6 87 00 00 00 00 6f 68 ba 66 \ 00 01 00 0a 80 00 0c 02 10 09 00 00 00 00 00 00"], [0], [dnl OFPT_FLOW_REMOVED (OF1.3) (xid=0x0): dl_vlan=9 reason=hard table_id=5 cookie:0xfedcba9876543210 duration1.01s idle60 hard120 pkts1234567 bytes1869134438 ]) AT_CLEANUP AT_SETUP([OFPT_PORT_STATUS - OF1.0]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 0c 00 40 00 00 00 00 02 00 00 00 00 00 00 00 \ 00 03 50 54 00 00 00 01 65 74 68 30 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 01 \ 00 00 02 08 00 00 02 8f 00 00 02 8f 00 00 00 00 \ "], [0], [dnl OFPT_PORT_STATUS (xid=0x0): MOD: 3(eth0): addr:50:54:00:00:00:01 config: PORT_DOWN state: LINK_DOWN current: 100MB-FD AUTO_NEG advertised: 10MB-HD 10MB-FD 100MB-HD 100MB-FD COPPER AUTO_NEG supported: 10MB-HD 10MB-FD 100MB-HD 100MB-FD COPPER AUTO_NEG speed: 100 Mbps now, 100 Mbps max ]) AT_CLEANUP AT_SETUP([OFPT_PORT_STATUS - OF1.1]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 02 0c 00 50 00 00 00 00 02 00 00 00 00 00 00 00 \ 00 00 00 03 00 00 00 00 50 54 00 00 00 01 00 00 \ 65 74 68 30 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 20 08 00 00 28 0f \ 00 00 28 0f 00 00 00 00 00 01 86 a0 00 01 86 a0 \ "], [0], [dnl OFPT_PORT_STATUS (OF1.1) (xid=0x0): MOD: 3(eth0): addr:50:54:00:00:00:01 config: 0 state: 0 current: 100MB-FD AUTO_NEG advertised: 10MB-HD 10MB-FD 100MB-HD 100MB-FD COPPER AUTO_NEG supported: 10MB-HD 10MB-FD 100MB-HD 100MB-FD COPPER AUTO_NEG speed: 100 Mbps now, 100 Mbps max ]) AT_CLEANUP AT_SETUP([OFPT_PACKET_OUT - OF1.0]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 0d 00 54 00 00 00 00 00 00 01 14 00 01 00 08 \ 00 00 00 08 00 03 00 00 50 54 00 00 00 05 50 54 \ 00 00 00 06 08 00 45 00 00 28 00 00 40 00 40 06 \ b9 7c c0 a8 00 02 c0 a8 00 01 00 00 2b 60 00 00 \ 00 00 6a 4f 2b 58 50 14 00 00 6d 75 00 00 00 00 \ 00 00 00 00 \ "], [0], [dnl OFPT_PACKET_OUT (xid=0x0): in_port=1 actions=output:3 buffer=0x00000114 ]) AT_CLEANUP AT_SETUP([OFPT_PACKET_OUT - OF1.0, with packet]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 0d 00 54 00 00 00 00 ff ff ff ff 00 01 00 08 \ 00 00 00 08 00 03 00 00 50 54 00 00 00 05 50 54 \ 00 00 00 06 08 00 45 00 00 28 00 00 40 00 40 06 \ b9 7c c0 a8 00 02 c0 a8 00 01 00 00 2b 60 00 00 \ 00 00 6a 4f 2b 58 50 14 00 00 6d 75 00 00 00 00 \ 00 00 00 00 \ "], [0], [dnl OFPT_PACKET_OUT (xid=0x0): in_port=1 actions=output:3 data_len=60 tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,nw_src=192.168.0.2,nw_dst=192.168.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=11104 tcp_csum:6d75 ]) AT_CLEANUP AT_SETUP([OFPT_PACKET_OUT - OF1.0, with hex output of packet data]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 0d 00 54 00 00 00 00 ff ff ff ff 00 01 00 08 \ 00 00 00 08 00 03 00 00 50 54 00 00 00 05 50 54 \ 00 00 00 06 08 00 45 00 00 28 00 00 40 00 40 06 \ b9 7c c0 a8 00 02 c0 a8 00 01 00 00 2b 60 00 00 \ 00 00 6a 4f 2b 58 50 14 00 00 6d 75 00 00 00 00 \ 00 00 00 00 \ " 3], [0], [dnl OFPT_PACKET_OUT (xid=0x0): in_port=1 actions=output:3 data_len=60 tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,nw_src=192.168.0.2,nw_dst=192.168.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=11104 tcp_csum:6d75 00000000 50 54 00 00 00 05 50 54-00 00 00 06 08 00 45 00 00000010 00 28 00 00 40 00 40 06-b9 7c c0 a8 00 02 c0 a8 00000020 00 01 00 00 2b 60 00 00-00 00 6a 4f 2b 58 50 14 00000030 00 00 6d 75 00 00 00 00-00 00 00 00 ]) AT_CLEANUP AT_SETUP([OFPT_PACKET_OUT - OF1.1]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 03 0d 00 28 88 58 df c5 ff ff ff 00 ff ff ff fe \ 00 10 00 00 00 00 00 00 00 00 00 10 ff ff ff fb \ 05 dc 00 00 00 00 00 00 \ "], [0], [dnl OFPT_PACKET_OUT (OF1.2) (xid=0x8858dfc5): in_port=LOCAL actions=FLOOD buffer=0xffffff00 ]) AT_CLEANUP AT_SETUP([OFPT_PACKET_OUT - OF1.1, with packet]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 03 0d 00 64 88 58 df c5 ff ff ff ff ff ff ff fe \ 00 10 00 00 00 00 00 00 00 00 00 10 ff ff ff fb \ 05 dc 00 00 00 00 00 00 50 54 00 00 00 05 50 54 \ 00 00 00 06 08 00 45 00 00 28 00 00 40 00 40 06 \ b9 7c c0 a8 00 02 c0 a8 00 01 00 00 2b 60 00 00 \ 00 00 6a 4f 2b 58 50 14 00 00 6d 75 00 00 00 00 \ 00 00 00 00 \ "], [0], [dnl OFPT_PACKET_OUT (OF1.2) (xid=0x8858dfc5): in_port=LOCAL actions=FLOOD data_len=60 tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,nw_src=192.168.0.2,nw_dst=192.168.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=11104 tcp_csum:6d75 ]) AT_CLEANUP # The flow is formatted with cls_rule_format() for the low-verbosity case. AT_SETUP([OFPT_FLOW_MOD - OF1.0 - low verbosity]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "\ 01 0e 00 50 00 00 00 00 00 00 00 00 00 01 50 54 \ 00 00 00 06 50 54 00 00 00 05 ff ff 00 00 08 06 \ 00 02 00 00 c0 a8 00 02 c0 a8 00 01 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 05 00 00 00 00 \ 00 00 01 0e 00 00 00 00 00 00 00 08 00 03 00 00 \ " 2], [0], [dnl OFPT_FLOW_MOD (xid=0x0): ADD priority=65535,arp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,arp_spa=192.168.0.2,arp_tpa=192.168.0.1,arp_op=2 idle:5 buf:0x10e out_port:0 actions=output:3 ], [dnl ofp_util|INFO|normalization changed ofp_match, details: ofp_util|INFO| pre: arp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,arp_spa=192.168.0.2,arp_tpa=192.168.0.1,arp_op=2,nw_tos=0,tp_src=0,tp_dst=0 ofp_util|INFO|post: arp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,arp_spa=192.168.0.2,arp_tpa=192.168.0.1,arp_op=2 ]) AT_CLEANUP # The flow is formatted with cls_rule_format() for the low-verbosity case. AT_SETUP([OFPT_FLOW_MOD - OF1.1 - low verbosity]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "\ 020e 0090 01020304 \ da1aa3e035d87158 ffffffffffffffff \ 02 01 003c 0078 9c40 ffffffff ffffffff ffffffff 0003 \ 0000 \ \ 0000 0058 00000000 000003f7 \ 000000000000ffffffffffff 000000000000ffffffffffff \ 0000 00 00 0806 00 00 c0a88000000000ff 00000000ffffffff 0000 0000 \ 00000000 00 000000 0000000000000000ffffffffffffffff \ \ 0001 0008 03 000000 \ " 2], [0], [dnl OFPT_FLOW_MOD (OF1.1) (xid=0x1020304): MOD table:2 priority=40000,arp,arp_spa=192.168.128.0/24 cookie:0xda1aa3e035d87158/0xffffffffffffffff idle:60 hard:120 send_flow_rem check_overlap actions=goto_table:3 ]) AT_CLEANUP # The flow is formatted with cls_rule_format() for the low-verbosity case. AT_SETUP([OFPT_FLOW_MOD - OF1.2 - low verbosity]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "\ 03 0e 00 90 00 00 00 02 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 ff 00 00 00 00 00 ff ff \ ff ff ff ff ff ff ff ff ff ff ff ff 00 00 00 00 \ 00 01 00 42 80 00 00 04 00 00 00 01 80 00 08 06 \ 50 54 00 00 00 06 80 00 06 06 50 54 00 00 00 05 \ 80 00 0a 02 08 06 80 00 0c 02 00 00 80 00 2a 02 \ 00 02 80 00 2c 04 c0 a8 00 02 80 00 2e 04 c0 a8 \ 00 01 00 00 00 00 00 00 00 04 00 18 00 00 00 00 \ 00 00 00 10 00 00 00 03 00 00 00 00 00 00 00 00 \ " 2], [0], [dnl OFPT_FLOW_MOD (OF1.2) (xid=0x2): ADD table:255 priority=65535,arp,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,arp_spa=192.168.0.2,arp_tpa=192.168.0.1,arp_op=2 actions=output:3 ], [dnl ]) AT_CLEANUP # The flow is formatted with ofp10_match_to_string() for the # high-verbosity case. AT_SETUP([OFPT_FLOW_MOD - OF1.0 - high verbosity]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "\ 01 0e 00 50 00 00 00 00 00 00 00 00 00 01 50 54 \ 00 00 00 06 50 54 00 00 00 05 ff ff 00 00 08 06 \ 00 02 00 00 c0 a8 00 02 c0 a8 00 01 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 05 00 00 00 00 \ 00 00 01 0e 00 00 00 00 00 00 00 08 00 03 00 00 \ " 3], [0], [dnl OFPT_FLOW_MOD (xid=0x0): ADD arp,in_port=1,dl_vlan=65535,dl_vlan_pcp=0,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,nw_src=192.168.0.2,nw_dst=192.168.0.1,arp_op=2,nw_tos=0,tp_src=0,tp_dst=0 idle:5 pri:65535 buf:0x10e out_port:0 actions=output:3 ], [dnl ofp_util|INFO|normalization changed ofp_match, details: ofp_util|INFO| pre: arp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,arp_spa=192.168.0.2,arp_tpa=192.168.0.1,arp_op=2,nw_tos=0,tp_src=0,tp_dst=0 ofp_util|INFO|post: arp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,arp_spa=192.168.0.2,arp_tpa=192.168.0.1,arp_op=2 ]) AT_CLEANUP # The flow is formatted with cls_rule_format() for the low-verbosity case. AT_SETUP([OFPT_FLOW_MOD - OF1.2 - low verbosity]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "\ 03 0e 00 90 00 00 00 02 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 ff 00 00 00 00 00 ff ff \ ff ff ff ff ff ff ff ff ff ff ff ff 00 00 00 00 \ 00 01 00 42 80 00 00 04 00 00 00 01 80 00 08 06 \ 50 54 00 00 00 06 80 00 06 06 50 54 00 00 00 05 \ 80 00 0a 02 08 06 80 00 0c 02 00 00 80 00 2a 02 \ 00 02 80 00 2c 04 c0 a8 00 02 80 00 2e 04 c0 a8 \ 00 01 00 00 00 00 00 00 00 04 00 18 00 00 00 00 \ 00 00 00 10 00 00 00 03 00 00 00 00 00 00 00 00 \ " 2], [0], [dnl OFPT_FLOW_MOD (OF1.2) (xid=0x2): ADD table:255 priority=65535,arp,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,arp_spa=192.168.0.2,arp_tpa=192.168.0.1,arp_op=2 actions=output:3 ], [dnl ]) AT_CLEANUP # The flow is formatted with cls_rule_format() for the low-verbosity case. AT_SETUP([OFPT_FLOW_MOD - OF1.3 - flags - low verbosity]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "\ 04 0e 00 90 00 00 00 02 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 ff 00 00 00 00 00 ff ff \ ff ff ff ff ff ff ff ff ff ff ff ff 00 1f 00 00 \ 00 01 00 42 80 00 00 04 00 00 00 01 80 00 08 06 \ 50 54 00 00 00 06 80 00 06 06 50 54 00 00 00 05 \ 80 00 0a 02 08 06 80 00 0c 02 00 00 80 00 2a 02 \ 00 02 80 00 2c 04 c0 a8 00 02 80 00 2e 04 c0 a8 \ 00 01 00 00 00 00 00 00 00 04 00 18 00 00 00 00 \ 00 00 00 10 00 00 00 03 00 00 00 00 00 00 00 00 \ " 2], [0], [dnl OFPT_FLOW_MOD (OF1.3) (xid=0x2): ADD table:255 priority=65535,arp,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,arp_spa=192.168.0.2,arp_tpa=192.168.0.1,arp_op=2 send_flow_rem check_overlap reset_counts no_packet_counts no_byte_counts actions=output:3 ], [dnl ]) AT_CLEANUP AT_SETUP([OFPT_FLOW_MOD - OF1.2 - set-field ip_src]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "\ 03 0e 00 58 52 33 45 02 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ff \ ff ff ff ff ff ff ff ff ff ff ff ff 00 00 00 00 \ 00 01 00 0a 80 00 0a 02 08 00 00 00 00 00 00 00 \ 00 04 00 18 00 00 00 00 00 19 00 10 80 00 16 04 \ c0 a8 03 5c 00 00 00 00 \ " 2], [0], [dnl OFPT_FLOW_MOD (OF1.2) (xid=0x52334502): ADD priority=255,ip actions=set_field:192.168.3.92->ip_src ], [dnl ]) AT_CLEANUP AT_SETUP([OFPT_FLOW_MOD - OF1.2 - set-field ip_dst]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "\ 03 0e 00 58 52 33 45 07 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ff \ ff ff ff ff ff ff ff ff ff ff ff ff 00 00 00 00 \ 00 01 00 0a 80 00 0a 02 08 00 00 00 00 00 00 00 \ 00 04 00 18 00 00 00 00 00 19 00 10 80 00 18 04 \ c0 a8 4a 7a 00 00 00 00 \ " 2], [0], [dnl OFPT_FLOW_MOD (OF1.2) (xid=0x52334507): ADD priority=255,ip actions=set_field:192.168.74.122->ip_dst ], [dnl ]) AT_CLEANUP AT_SETUP([OFPT_FLOW_MOD - OF1.2 - set-field sctp_src]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "\ 03 0e 00 58 52 33 45 07 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ff \ ff ff ff ff ff ff ff ff ff ff ff ff 00 00 00 00 \ 00 01 00 0f 80 00 0a 02 08 00 80 00 14 01 84 00 \ 00 04 00 18 00 00 00 00 00 19 00 10 80 00 22 02 \ 0d 06 00 00 00 00 00 00 \ " 2], [0], [dnl OFPT_FLOW_MOD (OF1.2) (xid=0x52334507): ADD priority=255,sctp actions=set_field:3334->sctp_src ], [dnl ]) AT_CLEANUP AT_SETUP([OFPT_FLOW_MOD - OF1.2 - set-field sctp_dst]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "\ 03 0e 00 58 52 33 45 07 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ff \ ff ff ff ff ff ff ff ff ff ff ff ff 00 00 00 00 \ 00 01 00 0f 80 00 0a 02 08 00 80 00 14 01 84 00 \ 00 04 00 18 00 00 00 00 00 19 00 10 80 00 24 02 \ 11 5d 00 00 00 00 00 00 \ " 2], [0], [dnl OFPT_FLOW_MOD (OF1.2) (xid=0x52334507): ADD priority=255,sctp actions=set_field:4445->sctp_dst ], [dnl ]) AT_CLEANUP AT_SETUP([OFPT_FLOW reply - OF1.2 - set-field ip_src]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "\ 03 13 00 68 52 33 45 04 00 01 00 00 00 00 00 00 \ 00 58 00 00 00 00 00 00 00 00 00 00 00 ff 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 01 00 0a 80 00 0a 02 08 00 00 00 00 00 00 00 \ 00 04 00 18 00 00 00 00 00 19 00 10 80 00 16 04 \ c0 a8 03 5c 00 00 00 00 \ " 2], [0], [dnl OFPST_FLOW reply (OF1.2) (xid=0x52334504): cookie=0x0, duration=0s, table=0, n_packets=0, n_bytes=0, priority=255,ip actions=set_field:192.168.3.92->ip_src ], [dnl ]) AT_CLEANUP AT_SETUP([OFPT_FLOW reply - OF1.2 - set-field ip_dst]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "\ 03 13 00 68 52 33 45 09 00 01 00 00 00 00 00 00 \ 00 58 00 00 00 00 00 00 00 00 00 00 00 ff 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 01 00 0a 80 00 0a 02 08 00 00 00 00 00 00 00 \ 00 04 00 18 00 00 00 00 00 19 00 10 80 00 18 04 \ c0 a8 4a 7a 00 00 00 00 \ " 2], [0], [dnl OFPST_FLOW reply (OF1.2) (xid=0x52334509): cookie=0x0, duration=0s, table=0, n_packets=0, n_bytes=0, priority=255,ip actions=set_field:192.168.74.122->ip_dst ], [dnl ]) AT_CLEANUP AT_SETUP([OFPT_FLOW reply - OF1.2 - set-field sctp_src]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "\ 03 13 00 68 52 33 45 04 00 01 00 00 00 00 00 00 \ 00 58 00 00 00 00 00 00 00 00 00 00 00 ff 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 01 00 0f 80 00 0a 02 08 00 80 00 14 01 84 00 \ 00 04 00 18 00 00 00 00 00 19 00 10 80 00 22 02 \ 0d 06 00 00 00 00 00 00 \ " 2], [0], [dnl OFPST_FLOW reply (OF1.2) (xid=0x52334504): cookie=0x0, duration=0s, table=0, n_packets=0, n_bytes=0, priority=255,sctp actions=set_field:3334->sctp_src ], [dnl ]) AT_CLEANUP AT_SETUP([OFPT_FLOW reply - OF1.2 - set-field sctp_dst]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' ofp-print "\ 03 13 00 68 52 33 45 09 00 01 00 00 00 00 00 00 \ 00 58 00 00 00 00 00 00 00 00 00 00 00 ff 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 01 00 0f 80 00 0a 02 08 00 80 00 14 01 84 00 \ 00 04 00 18 00 00 00 00 00 19 00 10 80 00 24 02 \ 11 5d 00 00 00 00 00 00 \ " 2], [0], [dnl OFPST_FLOW reply (OF1.2) (xid=0x52334509): cookie=0x0, duration=0s, table=0, n_packets=0, n_bytes=0, priority=255,sctp actions=set_field:4445->sctp_dst ], [dnl ]) AT_CLEANUP AT_SETUP([OFPT_PORT_MOD - OF1.0]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 0f 00 20 00 00 00 03 00 03 50 54 00 00 00 01 \ 00 00 00 01 00 00 00 01 00 00 00 00 00 00 00 00 \ " 3], [0], [dnl OFPT_PORT_MOD (xid=0x3):port: 3: addr:50:54:00:00:00:01 config: PORT_DOWN mask: PORT_DOWN advertise: UNCHANGED ]) AT_CLEANUP AT_SETUP([OFPT_PORT_MOD - OF1.1]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 02 10 00 28 00 00 00 03 00 00 00 03 00 00 00 00 \ 50 54 00 00 00 01 00 00 00 00 00 01 00 00 00 01 \ 00 00 00 00 00 00 00 00 \ " 3], [0], [dnl OFPT_PORT_MOD (OF1.1) (xid=0x3):port: 3: addr:50:54:00:00:00:01 config: PORT_DOWN mask: PORT_DOWN advertise: UNCHANGED ]) AT_CLEANUP AT_SETUP([OFPT_PORT_MOD - OF1.2]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 03 10 00 28 00 00 00 03 00 00 00 03 00 00 00 00 \ 50 54 00 00 00 01 00 00 00 00 00 01 00 00 00 01 \ 00 00 00 00 00 00 00 00 \ " 3], [0], [dnl OFPT_PORT_MOD (OF1.2) (xid=0x3):port: 3: addr:50:54:00:00:00:01 config: PORT_DOWN mask: PORT_DOWN advertise: UNCHANGED ]) AT_CLEANUP AT_SETUP([OFPT_PORT_MOD - OF1.3]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 04 10 00 28 00 00 00 03 00 00 00 03 00 00 00 00 \ 50 54 00 00 00 01 00 00 00 00 00 01 00 00 00 01 \ 00 00 00 00 00 00 00 00 \ " 3], [0], [dnl OFPT_PORT_MOD (OF1.3) (xid=0x3):port: 3: addr:50:54:00:00:00:01 config: PORT_DOWN mask: PORT_DOWN advertise: UNCHANGED ]) AT_CLEANUP AT_SETUP([OFPST_DESC request]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "0110000c0000000100000000"], [0], [dnl OFPST_DESC request (xid=0x1): ]) AT_CLEANUP AT_SETUP([OFPST_DESC reply]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 01 11 04 2c 00 00 00 01 00 00 00 00 4e 69 63 69 \ 72 61 2c 20 49 6e 63 2e 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 4f 70 65 6e \ 20 76 53 77 69 74 63 68 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 31 2e 31 2e \ 30 70 72 65 32 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 4e 6f 6e 65 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 4e 6f 6e 65 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 \ "], [0], [dnl OFPST_DESC reply (xid=0x1): Manufacturer: Nicira, Inc. Hardware: Open vSwitch Software: 1.1.0pre2 Serial Num: None DP Description: None ]) AT_CLEANUP AT_SETUP([OFPST_FLOW request - OF1.0]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "\ 01 10 00 38 00 00 00 04 00 01 00 00 00 38 20 ff \ ff fe 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 ff 00 ff ff \ "], [0], [dnl OFPST_FLOW request (xid=0x4): @&t@ ]) AT_CLEANUP AT_SETUP([OFPST_FLOW request - OF1.2]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "\ 03 12 00 38 00 00 00 02 00 01 00 00 00 00 00 00 \ ff 00 00 00 ff ff ff ff ff ff ff ff 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 01 00 04 00 00 00 00 \ "], [0], [dnl OFPST_FLOW request (OF1.2) (xid=0x2): @&t@ ]) AT_CLEANUP AT_SETUP([OFPST_FLOW request - OF1.3]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "\ 04 12 00 38 00 00 00 02 00 01 00 00 00 00 00 00 \ ff 00 00 00 ff ff ff ff ff ff ff ff 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 01 00 04 00 00 00 00 \ "], [0], [dnl OFPST_FLOW request (OF1.3) (xid=0x2): @&t@ ]) AT_CLEANUP AT_SETUP([OFPST_FLOW reply - OF1.0]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 01 11 01 e4 00 00 00 04 00 01 00 00 00 60 00 00 \ 00 00 00 00 00 03 50 54 00 00 00 05 50 54 00 00 \ 00 06 ff ff 00 00 08 06 00 02 00 00 c0 a8 00 01 \ c0 a8 00 02 00 00 00 00 00 00 00 04 0b eb c2 00 \ ff ff 00 05 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 00 \ 00 00 00 3c 00 00 00 08 00 01 00 00 00 60 00 00 \ 00 00 00 00 00 01 50 54 00 00 00 06 50 54 00 00 \ 00 05 ff ff 00 00 08 00 00 01 00 00 c0 a8 00 02 \ c0 a8 00 01 00 00 00 00 00 00 00 08 35 a4 e9 00 \ ff ff 00 05 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 0d 00 00 00 00 \ 00 00 04 fa 00 00 00 08 00 03 00 00 00 60 00 00 \ 00 00 00 00 00 01 50 54 00 00 00 06 50 54 00 00 \ 00 05 ff ff 00 00 08 06 00 01 00 00 c0 a8 00 02 \ c0 a8 00 01 00 00 00 00 00 00 00 04 10 b0 76 00 \ ff ff 00 05 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 00 \ 00 00 00 3c 00 00 00 08 00 03 00 00 00 60 00 00 \ 00 00 00 01 00 03 50 54 00 00 00 05 50 54 00 00 \ 00 06 ff ff 00 00 08 00 00 01 00 00 c0 a8 00 01 \ c0 a8 00 02 00 08 00 00 00 00 00 09 05 b8 d8 00 \ 80 00 00 05 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 0d 00 00 00 00 \ 00 00 04 fa 00 00 00 08 00 01 00 00 \ 00 58 02 00 00 3f ff ff 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 80 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 \ "], [0], [dnl OFPST_FLOW reply (xid=0x4): cookie=0x0, duration=4.2s, table=0, n_packets=1, n_bytes=60, idle_timeout=5, priority=65535,arp,in_port=3,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06,arp_spa=192.168.0.1,arp_tpa=192.168.0.2,arp_op=2,nw_tos=0,tp_src=0,tp_dst=0 actions=output:1 cookie=0x0, duration=8.9s, table=0, n_packets=13, n_bytes=1274, idle_timeout=5, priority=65535,icmp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,nw_src=192.168.0.2,nw_dst=192.168.0.1,nw_tos=0,icmp_type=0,icmp_code=0 actions=output:3 cookie=0x0, duration=4.28s, table=0, n_packets=1, n_bytes=60, idle_timeout=5, priority=65535,arp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,arp_spa=192.168.0.2,arp_tpa=192.168.0.1,arp_op=1,nw_tos=0,tp_src=0,tp_dst=0 actions=output:3 cookie=0x0, duration=9.096s, table=0, n_packets=13, n_bytes=1274, idle_timeout=5, icmp,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,icmp_type=8,icmp_code=0 actions=output:1 cookie=0x0, duration=0s, table=2, n_packets=0, n_bytes=0, actions=drop ]) AT_CLEANUP AT_SETUP([OFPST_FLOW reply - OF1.2]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 03 13 01 78 00 00 00 02 00 01 00 00 00 00 00 00 \ 00 78 00 00 00 00 00 03 01 5e f3 c0 80 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 62 \ 00 01 00 2d 80 00 00 04 00 00 00 02 80 00 06 06 \ ca da ad d6 0d 37 80 00 0a 02 08 00 80 00 10 01 \ 00 80 00 04 08 00 00 00 00 00 00 00 00 00 00 00 \ 00 04 00 18 00 00 00 00 00 00 00 10 00 00 00 02 \ 05 dc 00 00 00 00 00 00 00 78 00 00 00 00 00 04 \ 20 7c 0a 40 80 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 02 \ 00 00 00 00 00 00 00 8c 00 01 00 2d 80 00 00 04 \ 00 00 00 02 80 00 06 06 52 54 00 c3 00 89 80 00 \ 0a 02 08 00 80 00 10 01 00 80 00 04 08 00 00 00 \ 00 00 00 00 00 00 00 00 00 04 00 18 00 00 00 00 \ 00 00 00 10 00 00 00 02 05 dc 00 00 00 00 00 00 \ 00 78 00 00 00 00 00 04 20 a9 d1 00 80 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 2a \ 00 01 00 2d 80 00 00 04 00 00 00 02 80 00 06 06 \ 52 54 00 97 00 69 80 00 0a 02 08 00 80 00 10 01 \ 00 80 00 04 08 00 00 00 00 00 00 00 00 00 00 00 \ 00 04 00 18 00 00 00 00 00 00 00 10 00 00 00 02 \ 05 dc 00 00 00 00 00 00 \ "], [0], [dnl OFPST_FLOW reply (OF1.2) (xid=0x2): cookie=0x0, duration=3.023s, table=0, n_packets=1, n_bytes=98, ip,metadata=0,in_port=2,dl_dst=ca:da:ad:d6:0d:37,nw_tos=0 actions=output:2 cookie=0x0, duration=4.545s, table=0, n_packets=2, n_bytes=140, ip,metadata=0,in_port=2,dl_dst=52:54:00:c3:00:89,nw_tos=0 actions=output:2 cookie=0x0, duration=4.548s, table=0, n_packets=1, n_bytes=42, ip,metadata=0,in_port=2,dl_dst=52:54:00:97:00:69,nw_tos=0 actions=output:2 ]) AT_CLEANUP AT_SETUP([OFPST_AGGREGATE request - OF1.0]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "\ 01 10 00 38 00 00 00 04 00 02 00 00 00 38 20 ff \ ff fe 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 ff 00 ff ff \ "], [0], [dnl OFPST_AGGREGATE request (xid=0x4): @&t@ ]) AT_CLEANUP AT_SETUP([OFPST_AGGREGATE request - OF1.2]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "\ 03 12 00 38 00 00 00 02 00 02 00 00 00 00 00 00 \ ff 00 00 00 ff ff ff ff ff ff ff ff 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 01 00 04 00 00 00 00 \ "], [0], [dnl OFPST_AGGREGATE request (OF1.2) (xid=0x2): @&t@ ]) AT_CLEANUP AT_SETUP([OFPST_AGGREGATE request - OF1.3]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "\ 04 12 00 38 00 00 00 02 00 02 00 00 00 00 00 00 \ ff 00 00 00 ff ff ff ff ff ff ff ff 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 01 00 04 00 00 00 00 \ "], [0], [dnl OFPST_AGGREGATE request (OF1.3) (xid=0x2): @&t@ ]) AT_CLEANUP AT_SETUP([OFPST_AGGREGATE reply - OF1.0]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 01 11 00 24 00 00 00 04 00 02 00 00 00 00 00 00 \ 00 00 01 82 00 00 00 00 00 00 93 78 00 00 00 04 \ 00 00 00 00 \ "], [0], [dnl OFPST_AGGREGATE reply (xid=0x4): packet_count=386 byte_count=37752 flow_count=4 ]) AT_CLEANUP AT_SETUP([OFPST_AGGREGATE reply - OF1.2]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 03 13 00 28 00 00 00 02 00 02 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 79 00 00 00 00 00 00 4b 4f \ 00 00 00 03 00 00 00 00 \ "], [0], [dnl OFPST_AGGREGATE reply (OF1.2) (xid=0x2): packet_count=121 byte_count=19279 flow_count=3 ]) AT_CLEANUP AT_SETUP([OFPST_AGGREGATE reply - OF1.3]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 04 13 00 28 00 00 00 02 00 02 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 79 00 00 00 00 00 00 4b 4f \ 00 00 00 03 00 00 00 00 \ "], [0], [dnl OFPST_AGGREGATE reply (OF1.3) (xid=0x2): packet_count=121 byte_count=19279 flow_count=3 ]) AT_CLEANUP AT_SETUP([OFPST_TABLE request - OF1.0]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "0110000c0000000100030000"], [0], [dnl OFPST_TABLE request (xid=0x1): ]) AT_CLEANUP AT_SETUP([OFPST_TABLE request - OF1.1]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "02120010000000020003000000000000"], [0], [dnl OFPST_TABLE request (OF1.1) (xid=0x2): ]) AT_CLEANUP AT_SETUP([OFPST_TABLE request - OF1.2]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "03120010000000020003000000000000"], [0], [dnl OFPST_TABLE request (OF1.2) (xid=0x2): ]) AT_CLEANUP AT_SETUP([OFPST_TABLE request - OF1.3]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "04120010000000020003000000000000"], [0], [dnl OFPST_TABLE request (OF1.3) (xid=0x2): ]) AT_CLEANUP AT_SETUP([OFPST_TABLE reply - OF1.0]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 01 11 00 4c 00 00 00 01 00 03 00 00 00 00 00 00 \ 63 6c 61 73 73 69 66 69 65 72 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 3f ff ff 00 10 00 00 00 00 00 0b 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 \ "], [0], [dnl OFPST_TABLE reply (xid=0x1): 1 tables 0: classifier: wild=0x3fffff, max=1048576, active=11 lookup=0, matched=0 ]) AT_CLEANUP AT_SETUP([OFPST_TABLE reply - OF1.2]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) (mid="wild=0xfffffffff, max=1000000," tail=" match=0xfffffffff, instructions=0x00000007, config=0x00000000 write_actions=0x00000000, apply_actions=0x00000000 write_setfields=0x0000000fffffffff apply_setfields=0x0000000fffffffff metadata_match=0x0000000000000000 metadata_write=0x0000000000000000" echo "OFPST_TABLE reply (OF1.2) (xid=0x2): 255 tables 0: classifier: $mid active=1 lookup=74614, matched=106024$tail" x=1 while test $x -lt 254; do printf " %d: %-8s: $mid active=0 lookup=0, matched=0$tail " $x table$x x=`expr $x + 1` done echo " 254: table254: $mid active=2 lookup=0, matched=0$tail") > expout (pad32="\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" pad7="00 00 00 00 00 00 00 " mid="00 00 00 0f ff ff ff ff \ 00 00 00 0f ff ff ff ff 00 00 00 00 00 00 00 00 \ 00 00 00 0f ff ff ff ff 00 00 00 0f ff ff ff ff \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 07 00 00 00 00 00 0f 42 40 " tail="00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" echo -n "03 13 7f 90 00 00 00 02 00 03 00 00 00 00 00 00 " x=0 printf "%02x $pad7" $x printf "%s$pad32" "classifier" | od -A n -t x1 -v -N 32 | tr '\n' ' ' echo -n "$mid 00 00 00 01 " echo -n "00 00 00 00 00 01 23 76 00 00 00 00 00 01 9e 28 " x=1 while test $x -lt 254; do printf "%02x $pad7" $x printf "%s$pad32" "table$x" | od -A n -t x1 -v -N 32 | tr '\n' ' ' echo -n "$mid 00 00 00 00 $tail " x=`expr $x + 1` done x=254 printf "%02x $pad7" $x printf "%s$pad32" "table$x" | od -A n -t x1 -v -N 32 | tr '\n' ' ' echo -n "$mid 00 00 00 02 $tail") > in AT_CHECK([ovs-ofctl ofp-print "$(cat in)"], [0], [expout]) AT_CLEANUP AT_SETUP([OFPST_TABLE reply - OF1.3]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 04 13 00 40 00 00 00 01 00 03 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 0b 00 00 00 00 00 00 02 00 \ 00 00 00 00 00 00 01 00 01 00 00 00 00 00 00 0c \ 00 00 00 00 00 00 02 01 00 00 00 00 00 00 01 01 \ "], [0], [dnl OFPST_TABLE reply (OF1.3) (xid=0x1): 2 tables 0: active=11, lookup=512, matched=256 1: active=12, lookup=513, matched=257 ]) AT_CLEANUP AT_SETUP([OFPST_PORT request - 1.0]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "\ 01 10 00 14 00 00 00 01 00 04 00 00 ff ff 00 00 \ 00 00 00 00 \ "], [0], [dnl OFPST_PORT request (xid=0x1): port_no=ANY ]) AT_CLEANUP AT_SETUP([OFPST_PORT request - 1.1]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "\ 02 12 00 18 00 00 00 02 00 04 00 00 00 00 00 00 \ ff ff ff ff 00 00 00 00 \ "], [0], [dnl OFPST_PORT request (OF1.1) (xid=0x2): port_no=ANY ]) AT_CLEANUP AT_SETUP([OFPST_PORT request - 1.2]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "\ 03 12 00 18 00 00 00 02 00 04 00 00 00 00 00 00 \ ff ff ff ff 00 00 00 00 \ "], [0], [dnl OFPST_PORT request (OF1.2) (xid=0x2): port_no=ANY ]) AT_CLEANUP AT_SETUP([OFPST_PORT request - 1.3]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "\ 04 12 00 18 00 00 00 02 00 04 00 00 00 00 00 00 \ ff ff ff ff 00 00 00 00 \ "], [0], [dnl OFPST_PORT request (OF1.3) (xid=0x2): port_no=ANY ]) AT_CLEANUP AT_SETUP([OFPST_PORT reply - OF1.0]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 01 11 01 ac 00 00 00 01 00 04 00 00 00 03 00 00 \ 00 00 00 00 00 00 00 00 00 00 4d 20 00 00 00 00 \ 00 00 14 32 00 00 00 00 00 0f 60 4e 00 00 00 00 \ 00 05 71 bc 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 ff fe 00 00 00 00 00 00 00 00 00 00 \ 00 00 02 ac 00 00 00 00 00 00 01 f5 00 00 00 00 \ 00 01 0c 8c 00 00 00 00 00 00 db 1c 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 02 00 00 \ 00 00 00 00 00 00 00 00 00 00 06 be 00 00 00 00 \ 00 00 05 84 00 00 00 00 00 02 34 b4 00 00 00 00 \ 00 02 23 d4 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 01 00 00 00 00 00 00 00 00 00 00 \ 00 00 14 12 00 00 00 00 00 00 14 66 00 00 00 00 \ 00 04 a2 54 00 00 00 00 00 05 8a 1e 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 \ "], [0], [dnl OFPST_PORT reply (xid=0x1): 4 ports port 3: rx pkts=19744, bytes=1007694, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=5170, bytes=356796, drop=0, errs=0, coll=0 port LOCAL: rx pkts=684, bytes=68748, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=501, bytes=56092, drop=0, errs=0, coll=0 port 2: rx pkts=1726, bytes=144564, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=1412, bytes=140244, drop=0, errs=0, coll=0 port 1: rx pkts=5138, bytes=303700, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=5222, bytes=363038, drop=0, errs=0, coll=0 ]) AT_CLEANUP AT_SETUP([OFPST_PORT reply - OF1.2]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 03 13 01 48 00 00 00 02 00 04 00 00 00 00 00 00 \ 00 00 00 02 00 00 00 00 00 00 00 00 00 01 95 56 \ 00 00 00 00 00 00 00 88 00 00 00 00 02 5d 08 98 \ 00 00 00 00 00 00 2c f8 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 ff ff ff fe 00 00 00 00 \ 00 00 00 00 00 00 00 44 00 00 00 00 00 00 9d 2c \ 00 00 00 00 00 00 16 7c 00 00 00 00 01 1e 36 44 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 44 \ 00 00 00 00 00 00 9d 2c 00 00 00 00 00 00 16 7c \ 00 00 00 00 01 1e 36 44 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 \ "], [0], [dnl OFPST_PORT reply (OF1.2) (xid=0x2): 3 ports port 2: rx pkts=103766, bytes=39651480, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=136, bytes=11512, drop=0, errs=0, coll=0 port LOCAL: rx pkts=68, bytes=5756, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=40236, bytes=18757188, drop=0, errs=0, coll=0 port 1: rx pkts=68, bytes=5756, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=40236, bytes=18757188, drop=0, errs=0, coll=0 ]) AT_CLEANUP AT_SETUP([OFPST_PORT reply - OF1.3]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 04 13 01 60 00 00 00 02 00 04 00 00 00 00 00 00 \ 00 00 00 02 00 00 00 00 00 00 00 00 00 01 95 56 \ 00 00 00 00 00 00 00 88 00 00 00 00 02 5d 08 98 \ 00 00 00 00 00 00 2c f8 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 01 00 0f 42 40 \ ff ff ff fe 00 00 00 00 \ 00 00 00 00 00 00 00 44 00 00 00 00 00 00 9d 2c \ 00 00 00 00 00 00 16 7c 00 00 00 00 01 1e 36 44 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ ff ff ff ff ff ff ff ff \ 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 44 \ 00 00 00 00 00 00 9d 2c 00 00 00 00 00 00 16 7c \ 00 00 00 00 01 1e 36 44 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 07 54 d4 c0 \ "], [0], [dnl OFPST_PORT reply (OF1.3) (xid=0x2): 3 ports port 2: rx pkts=103766, bytes=39651480, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=136, bytes=11512, drop=0, errs=0, coll=0 duration=1.001s port LOCAL: rx pkts=68, bytes=5756, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=40236, bytes=18757188, drop=0, errs=0, coll=0 port 1: rx pkts=68, bytes=5756, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=40236, bytes=18757188, drop=0, errs=0, coll=0 duration=0.123s ]) AT_CLEANUP AT_SETUP([OFPST_QUEUE request - OF1.0]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "\ 01 10 00 14 00 00 00 01 00 05 00 00 ff fc 00 00 \ ff ff ff ff \ "], [0], [dnl OFPST_QUEUE request (xid=0x1):port=ANY queue=ALL ]) AT_CLEANUP AT_SETUP([OFPST_QUEUE request - OF1.1]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "\ 02 12 00 18 00 00 00 02 00 05 00 00 00 00 00 00 \ ff ff ff ff ff ff ff ff \ "], [0], [dnl OFPST_QUEUE request (OF1.1) (xid=0x2):port=ANY queue=ALL ]) AT_CLEANUP AT_SETUP([OFPST_QUEUE request - OF1.2]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "\ 03 12 00 18 00 00 00 02 00 05 00 00 00 00 00 00 \ ff ff ff ff ff ff ff ff \ "], [0], [dnl OFPST_QUEUE request (OF1.2) (xid=0x2):port=ANY queue=ALL ]) AT_CLEANUP AT_SETUP([OFPST_QUEUE request - OF1.3]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "\ 04 12 00 18 00 00 00 02 00 05 00 00 00 00 00 00 \ ff ff ff ff ff ff ff ff \ "], [0], [dnl OFPST_QUEUE request (OF1.3) (xid=0x2):port=ANY queue=ALL ]) AT_CLEANUP AT_SETUP([OFPST_QUEUE reply - OF1.0]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 01 11 00 cc 00 00 00 01 00 05 00 00 00 03 00 00 \ 00 00 00 01 00 00 00 00 00 00 01 2e 00 00 00 00 \ 00 00 00 01 00 00 00 00 00 00 00 00 00 03 00 00 \ 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 02 00 00 \ 00 00 00 01 00 00 00 00 00 00 08 34 00 00 00 00 \ 00 00 00 14 00 00 00 00 00 00 00 00 00 02 00 00 \ 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 01 00 00 \ 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 01 00 00 \ 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 \ "], [0], [dnl OFPST_QUEUE reply (xid=0x1): 6 queues port 3 queue 1: bytes=302, pkts=1, errors=0, duration=? port 3 queue 2: bytes=0, pkts=0, errors=0, duration=? port 2 queue 1: bytes=2100, pkts=20, errors=0, duration=? port 2 queue 2: bytes=0, pkts=0, errors=0, duration=? port 1 queue 1: bytes=0, pkts=0, errors=0, duration=? port 1 queue 2: bytes=0, pkts=0, errors=0, duration=? ]) AT_CLEANUP AT_SETUP([OFPST_PORT_DESC request - OF1.0]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "0110000c00000001000d0000"], [0], [dnl OFPST_PORT_DESC request (xid=0x1): ]) AT_CLEANUP AT_SETUP([OFPST_QUEUE reply - OF1.1]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 02 13 00 d0 00 00 00 01 00 05 00 00 00 00 00 00 \ 00 00 00 03 00 00 00 01 00 00 00 00 00 00 01 2e \ 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 00 \ 00 00 00 03 00 00 00 02 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 02 00 00 00 01 00 00 00 00 00 00 08 34 \ 00 00 00 00 00 00 00 14 00 00 00 00 00 00 00 00 \ 00 00 00 02 00 00 00 02 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 01 00 00 00 01 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 01 00 00 00 02 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ "], [0], [dnl OFPST_QUEUE reply (OF1.1) (xid=0x1): 6 queues port 3 queue 1: bytes=302, pkts=1, errors=0, duration=? port 3 queue 2: bytes=0, pkts=0, errors=0, duration=? port 2 queue 1: bytes=2100, pkts=20, errors=0, duration=? port 2 queue 2: bytes=0, pkts=0, errors=0, duration=? port 1 queue 1: bytes=0, pkts=0, errors=0, duration=? port 1 queue 2: bytes=0, pkts=0, errors=0, duration=? ]) AT_CLEANUP AT_SETUP([OFPST_QUEUE reply - OF1.2]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 03 13 00 d0 00 00 00 01 00 05 00 00 00 00 00 00 \ 00 00 00 03 00 00 00 01 00 00 00 00 00 00 01 2e \ 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 00 \ 00 00 00 03 00 00 00 02 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 02 00 00 00 01 00 00 00 00 00 00 08 34 \ 00 00 00 00 00 00 00 14 00 00 00 00 00 00 00 00 \ 00 00 00 02 00 00 00 02 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 01 00 00 00 01 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 01 00 00 00 02 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ "], [0], [dnl OFPST_QUEUE reply (OF1.2) (xid=0x1): 6 queues port 3 queue 1: bytes=302, pkts=1, errors=0, duration=? port 3 queue 2: bytes=0, pkts=0, errors=0, duration=? port 2 queue 1: bytes=2100, pkts=20, errors=0, duration=? port 2 queue 2: bytes=0, pkts=0, errors=0, duration=? port 1 queue 1: bytes=0, pkts=0, errors=0, duration=? port 1 queue 2: bytes=0, pkts=0, errors=0, duration=? ]) AT_CLEANUP AT_SETUP([OFPST_QUEUE reply - OF1.3]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 04 13 01 00 00 00 00 01 00 05 00 00 00 00 00 00 \ 00 00 00 03 00 00 00 01 00 00 00 00 00 00 01 2e \ 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 00 \ 00 00 00 64 1d cd 65 00 \ 00 00 00 03 00 00 00 02 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 64 1d cd 65 00 \ 00 00 00 02 00 00 00 01 00 00 00 00 00 00 08 34 \ 00 00 00 00 00 00 00 14 00 00 00 00 00 00 00 00 \ 00 00 00 64 1d cd 65 00 \ 00 00 00 02 00 00 00 02 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 64 1d cd 65 00 \ 00 00 00 01 00 00 00 01 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 64 1d cd 65 00 \ 00 00 00 01 00 00 00 02 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ ff ff ff ff ff ff ff ff \ "], [0], [dnl OFPST_QUEUE reply (OF1.3) (xid=0x1): 6 queues port 3 queue 1: bytes=302, pkts=1, errors=0, duration=100.5s port 3 queue 2: bytes=0, pkts=0, errors=0, duration=100.5s port 2 queue 1: bytes=2100, pkts=20, errors=0, duration=100.5s port 2 queue 2: bytes=0, pkts=0, errors=0, duration=100.5s port 1 queue 1: bytes=0, pkts=0, errors=0, duration=100.5s port 1 queue 2: bytes=0, pkts=0, errors=0, duration=? ]) AT_CLEANUP AT_SETUP([OFPST_PORT_DESC reply - OF1.0]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 01 11 00 3c 00 00 00 00 00 0d 00 00 00 03 50 54 \ 00 00 00 01 65 74 68 30 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 01 00 00 00 01 00 00 02 08 \ 00 00 02 8f 00 00 02 8f 00 00 00 00 \ "], [0], [dnl OFPST_PORT_DESC reply (xid=0x0): 3(eth0): addr:50:54:00:00:00:01 config: PORT_DOWN state: LINK_DOWN current: 100MB-FD AUTO_NEG advertised: 10MB-HD 10MB-FD 100MB-HD 100MB-FD COPPER AUTO_NEG supported: 10MB-HD 10MB-FD 100MB-HD 100MB-FD COPPER AUTO_NEG speed: 100 Mbps now, 100 Mbps max ]) AT_CLEANUP AT_SETUP([OFPT_METER_MOD request - OF1.3]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 04 1d 00 20 00 00 00 02 00 00 00 0d 00 00 00 05 \ 00 01 00 10 00 00 04 00 00 00 00 80 00 00 00 00 \ "], [0], [dnl OFPT_METER_MOD (OF1.3) (xid=0x2): ADD meter=5 kbps burst stats bands= type=drop rate=1024 burst_size=128 ]) AT_CLEANUP AT_SETUP([OFPST_METER request - OF1.3]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "041200180000000200090000000000000000000100000000"], [0], [dnl OFPST_METER request (OF1.3) (xid=0x2): meter=1 ]) AT_CLEANUP AT_SETUP([OFPST_METER_CONFIG request - OF1.3]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "0412001800000002000a0000000000000000000100000000"], [0], [dnl OFPST_METER_CONFIG request (OF1.3) (xid=0x2): meter=1 ]) AT_CLEANUP AT_SETUP([OFPST_METER_FEATURES request - OF1.3]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "0412001000000002000b000000000000"], [0], [dnl OFPST_METER_FEATURES request (OF1.3) (xid=0x2): ]) AT_CLEANUP AT_SETUP([OFPST_METER_FEATURES reply - OF1.3]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 04 13 00 20 00 00 00 02 00 0b 00 00 00 00 00 00 \ 00 01 00 00 00 00 00 06 00 00 00 0F 10 02 00 00 \ "], [0], [dnl OFPST_METER_FEATURES reply (OF1.3) (xid=0x2): max_meter:65536 max_bands:16 max_color:2 band_types: drop dscp_remark capabilities: kbps pktps burst stats ]) AT_CLEANUP AT_SETUP([OFPST_METER_CONFIG reply - OF1.3]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 04 13 00 50 00 00 00 02 00 0a 00 00 00 00 00 00 \ 00 28 00 05 00 00 00 01 \ 00 01 00 10 00 01 00 00 00 00 05 00 00 00 00 00 \ 00 02 00 10 00 10 00 00 00 00 f0 00 00 00 00 00 \ 00 18 00 09 00 00 00 02 \ 00 01 00 10 00 02 00 00 00 00 00 00 00 00 00 00 \ "], [0], [dnl OFPST_METER_CONFIG reply (OF1.3) (xid=0x2): meter=1 kbps burst bands= type=drop rate=65536 burst_size=1280 type=dscp_remark rate=1048576 burst_size=61440 prec_level=0 meter=2 kbps stats bands= type=drop rate=131072 ]) AT_CLEANUP AT_SETUP([OFPST_METER reply - OF1.3]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 04 13 00 90 00 00 00 02 00 09 00 00 00 00 00 00 \ 00 00 00 01 00 48 00 00 00 00 00 00 00 00 00 05 \ 00 00 00 00 00 00 10 00 00 00 00 00 00 02 30 00 \ 00 00 01 8a 0a 6e 23 44 \ 00 00 00 00 00 00 00 7e 00 00 00 00 00 00 34 33 \ 00 00 00 00 00 00 00 e7 00 00 00 00 00 00 94 2e \ 00 00 00 02 00 38 00 00 00 00 00 00 00 00 00 02 \ 00 00 00 00 00 00 02 00 00 00 00 00 00 00 30 00 \ 00 00 01 87 0a 23 6e 44 \ 00 00 00 00 00 00 00 2a 00 00 00 00 00 00 04 33 \ "], [0], [dnl OFPST_METER reply (OF1.3) (xid=0x2): meter:1 flow_count:5 packet_in_count:4096 byte_in_count:143360 duration:394.174990148s bands: 0: packet_count:126 byte_count:13363 1: packet_count:231 byte_count:37934 meter:2 flow_count:2 packet_in_count:512 byte_in_count:12288 duration:391.170094148s bands: 0: packet_count:42 byte_count:1075 ]) AT_CLEANUP AT_SETUP([OFPT_BARRIER_REQUEST - OF1.0]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print '01 12 00 08 00 00 00 01'], [0], [dnl OFPT_BARRIER_REQUEST (xid=0x1): ]) AT_CLEANUP AT_SETUP([OFPT_BARRIER_REQUEST - OF1.1]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print '02 14 00 08 00 00 00 01'], [0], [dnl OFPT_BARRIER_REQUEST (OF1.1) (xid=0x1): ]) AT_CLEANUP AT_SETUP([OFPT_BARRIER_REQUEST - OF1.2]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print '03 14 00 08 00 00 00 01'], [0], [dnl OFPT_BARRIER_REQUEST (OF1.2) (xid=0x1): ]) AT_CLEANUP AT_SETUP([OFPT_BARRIER_REQUEST - OF1.3]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print '04 14 00 08 00 00 00 01'], [0], [dnl OFPT_BARRIER_REQUEST (OF1.3) (xid=0x1): ]) AT_CLEANUP AT_SETUP([OFPT_BARRIER_REPLY - OF1.0]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print '01 13 00 08 00 00 00 01'], [0], [dnl OFPT_BARRIER_REPLY (xid=0x1): ]) AT_CLEANUP AT_SETUP([OFPT_BARRIER_REPLY - OF1.1]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print '02 15 00 08 00 00 00 01'], [0], [dnl OFPT_BARRIER_REPLY (OF1.1) (xid=0x1): ]) AT_CLEANUP AT_SETUP([OFPT_BARRIER_REPLY - OF1.2]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print '03 15 00 08 00 00 00 01'], [0], [dnl OFPT_BARRIER_REPLY (OF1.2) (xid=0x1): ]) AT_CLEANUP AT_SETUP([OFPT_BARRIER_REPLY - OF1.3]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print '04 15 00 08 00 00 00 01'], [0], [dnl OFPT_BARRIER_REPLY (OF1.3) (xid=0x1): ]) AT_CLEANUP AT_SETUP([OFPT_SET_ASYNC - OF1.3]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 04 1c 00 20 00 00 00 00 00 00 10 05 00 00 10 07 \ 00 00 00 03 00 00 00 07 00 00 00 00 00 00 00 03 \ "], [0], [dnl OFPT_SET_ASYNC (OF1.3) (xid=0x0): master: PACKET_IN: no_match invalid_ttl 12 PORT_STATUS: add delete FLOW_REMOVED: (off) slave: PACKET_IN: no_match action invalid_ttl 12 PORT_STATUS: add delete modify FLOW_REMOVED: idle hard ]) AT_CLEANUP AT_SETUP([OFPT_ROLE_REQUEST - OF1.2]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 03 18 00 18 00 00 00 02 00 00 00 02 00 00 00 00 \ 00 00 00 00 00 00 00 03 \ "], [0], [dnl OFPT_ROLE_REQUEST (OF1.2) (xid=0x2): role=master generation_id=3 ]) AT_CLEANUP AT_SETUP([OFPT_ROLE_REQUEST - nochange - OF1.2]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 03 18 00 18 00 00 00 02 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 \ "], [0], [dnl OFPT_ROLE_REQUEST (OF1.2) (xid=0x2): role=nochange ]) AT_CLEANUP AT_SETUP([NXT_ROLE_REQUEST]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 14 00 00 00 02 00 00 23 20 00 00 00 0a \ 00 00 00 01 \ "], [0], [dnl NXT_ROLE_REQUEST (xid=0x2): role=master ]) AT_CLEANUP AT_SETUP([OFPT_ROLE_REPLY - OF1.2]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 03 19 00 18 00 00 00 02 00 00 00 03 00 00 00 00 \ 12 34 56 78 ab cd ef 90 \ "], [0], [dnl OFPT_ROLE_REPLY (OF1.2) (xid=0x2): role=slave generation_id=1311768467750121360 ]) AT_CLEANUP AT_SETUP([NXT_ROLE_REPLY]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 14 00 00 00 02 00 00 23 20 00 00 00 0b \ 00 00 00 02 \ "], [0], [dnl NXT_ROLE_REPLY (xid=0x2): role=slave ]) AT_CLEANUP AT_SETUP([NXT_SET_PACKET_IN]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 14 00 00 00 02 00 00 23 20 00 00 00 10 \ 00 00 00 01 \ "], [0], [dnl NXT_SET_PACKET_IN_FORMAT (xid=0x2): format=nxm ]) AT_CLEANUP AT_SETUP([NXT_PACKET_IN]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 ba 00 00 00 00 00 00 23 20 00 00 00 11 \ ff ff ff ff 00 40 01 07 00 00 00 00 00 00 00 09 \ 00 4e 00 00 00 00 00 00 00 00 00 02 00 01 00 01 \ 20 08 00 00 00 00 00 00 00 06 00 01 00 04 00 00 \ 00 01 00 01 02 04 00 00 00 02 00 01 04 04 00 00 \ 00 03 00 01 06 04 00 00 00 04 00 01 08 04 00 00 \ 00 05 80 00 05 10 5a 5a 5a 5a 5a 5a 5a 5a ff ff \ ff ff ff ff ff ff 00 00 00 00 82 82 82 82 82 82 \ 80 81 81 81 81 81 81 00 00 50 08 00 45 00 00 28 \ 00 00 00 00 00 06 32 05 53 53 53 53 54 54 54 54 \ 00 55 00 56 00 00 00 00 00 00 00 00 50 00 00 00 \ 31 6d 00 00 00 00 00 00 00 00 \ "], [0], [dnl NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 in_port=1 tun_id=0x6 metadata=0x5a5a5a5a5a5a5a5a reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) tcp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=86 tcp_csum:316d ]) AT_CLEANUP AT_SETUP([NXT_PACKET_IN, with hex output of packet data]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 ba 00 00 00 00 00 00 23 20 00 00 00 11 \ ff ff ff ff 00 40 01 07 00 00 00 00 00 00 00 09 \ 00 4e 00 00 00 00 00 00 00 00 00 02 00 01 00 01 \ 20 08 00 00 00 00 00 00 00 06 00 01 00 04 00 00 \ 00 01 00 01 02 04 00 00 00 02 00 01 04 04 00 00 \ 00 03 00 01 06 04 00 00 00 04 00 01 08 04 00 00 \ 00 05 80 00 05 10 5a 5a 5a 5a 5a 5a 5a 5a ff ff \ ff ff ff ff ff ff 00 00 00 00 82 82 82 82 82 82 \ 80 81 81 81 81 81 81 00 00 50 08 00 45 00 00 28 \ 00 00 00 00 00 06 32 05 53 53 53 53 54 54 54 54 \ 00 55 00 56 00 00 00 00 00 00 00 00 50 00 00 00 \ 31 6d 00 00 00 00 00 00 00 00 \ " 3], [0], [dnl NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 in_port=1 tun_id=0x6 metadata=0x5a5a5a5a5a5a5a5a reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) tcp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=86 tcp_csum:316d 00000000 82 82 82 82 82 82 80 81-81 81 81 81 81 00 00 50 00000010 08 00 45 00 00 28 00 00-00 00 00 06 32 05 53 53 00000020 53 53 54 54 54 54 00 55-00 56 00 00 00 00 00 00 00000030 00 00 50 00 00 00 31 6d-00 00 00 00 00 00 00 00 ]) AT_CLEANUP AT_SETUP([NXT_SET_ASYNC_CONFIG]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 28 00 00 00 00 00 00 23 20 00 00 00 13 \ 00 00 10 05 00 00 10 07 00 00 00 03 00 00 00 07 \ 00 00 00 00 00 00 00 03 \ "], [0], [dnl NXT_SET_ASYNC_CONFIG (xid=0x0): master: PACKET_IN: no_match invalid_ttl 12 PORT_STATUS: add delete FLOW_REMOVED: (off) slave: PACKET_IN: no_match action invalid_ttl 12 PORT_STATUS: add delete modify FLOW_REMOVED: idle hard ]) AT_CLEANUP AT_SETUP([NXT_SET_CONTROLLER_ID]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 18 00 00 00 03 00 00 23 20 00 00 00 14 \ 00 00 00 00 00 00 00 7b \ "], [0], [dnl NXT_SET_CONTROLLER_ID (xid=0x3): id=123 ]) AT_CLEANUP AT_SETUP([NXT_FLOW_MONITOR_CANCEL]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 14 00 00 00 03 00 00 23 20 00 00 00 15 \ 01 02 30 40 \ "], [0], [dnl NXT_FLOW_MONITOR_CANCEL (xid=0x3): id=16920640 ]) AT_CLEANUP AT_SETUP([NXT_FLOW_MONITOR_PAUSED]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 10 00 00 00 03 00 00 23 20 00 00 00 16 \ "], [0], [dnl NXT_FLOW_MONITOR_PAUSED (xid=0x3): ]) AT_CLEANUP AT_SETUP([NXT_FLOW_MONITOR_RESUMED]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 10 00 00 00 03 00 00 23 20 00 00 00 17 \ "], [0], [dnl NXT_FLOW_MONITOR_RESUMED (xid=0x3): ]) AT_CLEANUP AT_SETUP([NXT_SET_FLOW_FORMAT]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 14 00 00 00 02 00 00 23 20 00 00 00 0c \ 00 00 00 02 \ "], [0], [dnl NXT_SET_FLOW_FORMAT (xid=0x2): format=nxm ]) AT_CLEANUP # The flow is formatted with cls_rule_format() for the low-verbosity case. AT_SETUP([NXT_FLOW_MOD, low verbosity]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 60 00 00 00 02 00 00 23 20 00 00 00 0d \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 00 \ ff ff ff ff 00 10 00 00 00 14 00 00 00 00 00 00 \ 00 01 20 08 00 00 00 00 00 00 01 c8 00 01 00 04 \ 00 00 00 7b 00 00 00 00 ff ff 00 18 00 00 23 20 \ 00 07 00 1f 00 01 00 04 00 00 00 00 00 00 00 05 \ " 2], [0], [dnl NXT_FLOW_MOD (xid=0x2): ADD reg0=0x7b,tun_id=0x1c8 out_port:16 actions=load:0x5->NXM_NX_REG0[[]] ]) AT_CLEANUP # The flow is formatted with ofp10_match_to_string() for the # low-verbosity case. AT_SETUP([NXT_FLOW_MOD, high verbosity]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 60 00 00 00 02 00 00 23 20 00 00 00 0d \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80 00 \ ff ff ff ff 01 00 00 00 00 14 00 00 00 00 00 00 \ 00 01 20 08 00 00 00 00 00 00 01 c8 00 01 00 04 \ 00 00 00 7b 00 00 00 00 ff ff 00 18 00 00 23 20 \ 00 07 00 1f 00 01 00 04 00 00 00 00 00 00 00 05 \ " 3], [0], [dnl NXT_FLOW_MOD (xid=0x2): ADD NXM_NX_TUN_ID(00000000000001c8), NXM_NX_REG0(0000007b) out_port:256 actions=load:0x5->NXM_NX_REG0[[]] ]) AT_CLEANUP AT_SETUP([NXT_FLOW_REMOVED]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 78 00 00 00 00 00 00 23 20 00 00 00 0e \ 00 00 00 00 00 00 00 00 ff ff 00 02 00 00 00 06 \ 01 6e 36 00 00 05 00 3c 00 00 00 00 00 00 00 01 \ 00 00 00 00 00 00 00 3c 00 00 00 02 00 03 00 00 \ 02 06 50 54 00 00 00 06 00 00 04 06 50 54 00 00 \ 00 05 00 00 06 02 08 06 00 00 08 02 00 00 00 00 \ 1e 02 00 02 00 00 20 04 c0 a8 00 01 00 00 22 04 \ c0 a8 00 02 00 00 00 00 \ "], [0], [dnl NXT_FLOW_REMOVED (xid=0x0): priority=65535,arp,in_port=3,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06,arp_spa=192.168.0.1,arp_tpa=192.168.0.2,arp_op=2 reason=idle table_id=1 duration6.024s idle5 pkts1 bytes60 ]) AT_CLEANUP AT_SETUP([NXT_FLOW_MOD_TABLE_ID]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 18 01 02 03 04 00 00 23 20 00 00 00 0f \ 01 00 00 00 00 00 00 00 \ "], [0], [dnl NXT_FLOW_MOD_TABLE_ID (xid=0x1020304): enable ]) AT_CLEANUP AT_SETUP([NXST_FLOW request]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "\ 01 10 00 20 00 00 00 04 ff ff 00 00 00 00 23 20 \ 00 00 00 00 00 00 00 00 ff ff 00 00 ff 00 00 00 \ "], [0], [dnl NXST_FLOW request (xid=0x4): @&t@ ]) AT_CLEANUP AT_SETUP([NXST_FLOW reply]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 01 11 08 18 00 00 00 04 ff ff 00 00 00 00 23 20 \ 00 00 00 00 00 00 00 00 00 88 00 00 00 00 00 01 \ 02 dc 6c 00 ff ff 00 05 00 00 00 4c 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 \ 00 00 00 00 00 00 00 3c 00 00 00 02 00 03 00 00 \ 02 06 50 54 00 00 00 06 00 00 04 06 50 54 00 00 \ 00 05 00 00 06 02 08 00 00 00 08 02 00 00 00 00 \ 0a 01 00 00 00 0e 04 c0 a8 00 01 00 00 10 04 c0 \ a8 00 02 00 00 0c 01 06 00 00 12 02 09 e7 00 00 \ 14 02 00 00 00 00 00 00 00 00 00 08 00 01 00 00 \ 00 88 00 00 00 00 00 03 32 11 62 00 ff ff 00 05 \ 00 00 00 4c 00 03 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 3c \ 00 00 00 02 00 03 00 00 02 06 50 54 00 00 00 06 \ 00 00 04 06 50 54 00 00 00 05 00 00 06 02 08 00 \ 00 00 08 02 00 00 00 00 0a 01 00 00 00 0e 04 c0 \ a8 00 01 00 00 10 04 c0 a8 00 02 00 00 0c 01 06 \ 00 00 12 02 09 e4 00 00 14 02 00 00 00 00 00 00 \ 00 00 00 08 00 01 00 00 00 88 00 00 00 00 00 02 \ 33 f9 aa 00 ff ff 00 05 00 00 00 4c 00 05 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 \ 00 00 00 00 00 00 00 3c 00 00 00 02 00 01 00 00 \ 02 06 50 54 00 00 00 05 00 00 04 06 50 54 00 00 \ 00 06 00 00 06 02 08 00 00 00 08 02 00 00 00 00 \ 0a 01 00 00 00 0e 04 c0 a8 00 02 00 00 10 04 c0 \ a8 00 01 00 00 0c 01 06 00 00 12 02 00 00 00 00 \ 14 02 09 e5 00 00 00 00 00 00 00 08 00 03 00 00 \ 00 88 00 00 00 00 00 04 2d 0f a5 00 ff ff 00 05 \ 00 00 00 4c 00 01 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 3c \ 00 00 00 02 00 03 00 00 02 06 50 54 00 00 00 06 \ 00 00 04 06 50 54 00 00 00 05 00 00 06 02 08 00 \ 00 00 08 02 00 00 00 00 0a 01 00 00 00 0e 04 c0 \ a8 00 01 00 00 10 04 c0 a8 00 02 00 00 0c 01 06 \ 00 00 12 02 09 e3 00 00 14 02 00 00 00 00 00 00 \ 00 00 00 08 00 01 00 00 00 88 00 00 00 00 00 02 \ 34 73 bc 00 ff ff 00 05 00 0a 00 4c 00 03 00 03 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 \ 00 00 00 00 00 00 00 3c 00 00 00 02 00 03 00 00 \ 02 06 50 54 00 00 00 06 00 00 04 06 50 54 00 00 \ 00 05 00 00 06 02 08 00 00 00 08 02 00 00 00 00 \ 0a 01 00 00 00 0e 04 c0 a8 00 01 00 00 10 04 c0 \ a8 00 02 00 00 0c 01 06 00 00 12 02 09 e5 00 00 \ 14 02 00 00 00 00 00 00 00 00 00 08 00 01 00 00 \ 00 88 00 00 00 00 00 05 28 0d e8 00 ff ff 00 05 \ 00 00 00 4c 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 3c \ 00 00 00 02 00 03 00 00 02 06 50 54 00 00 00 06 \ 00 00 04 06 50 54 00 00 00 05 00 00 06 02 08 00 \ 00 00 08 02 00 00 00 00 0a 01 00 00 00 0e 04 c0 \ a8 00 01 00 00 10 04 c0 a8 00 02 00 00 0c 01 06 \ 00 00 12 02 09 e2 00 00 14 02 00 00 00 00 00 00 \ 00 00 00 08 00 01 00 00 00 88 00 00 00 00 00 01 \ 02 62 5a 00 ff ff 00 05 00 00 00 4c 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 \ 00 00 00 00 00 00 00 3c 00 00 00 02 00 01 00 00 \ 02 06 50 54 00 00 00 05 00 00 04 06 50 54 00 00 \ 00 06 00 00 06 02 08 00 00 00 08 02 00 00 00 00 \ 0a 01 00 00 00 0e 04 c0 a8 00 02 00 00 10 04 c0 \ a8 00 01 00 00 0c 01 06 00 00 12 02 00 00 00 00 \ 14 02 09 e7 00 00 00 00 00 00 00 08 00 03 00 00 \ 00 88 00 00 00 00 00 01 38 be 5e 00 ff ff 00 05 \ 00 00 00 4c 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 3c \ 00 00 00 02 00 01 00 00 02 06 50 54 00 00 00 05 \ 00 00 04 06 50 54 00 00 00 06 00 00 06 02 08 00 \ 00 00 08 02 00 00 00 00 0a 01 00 00 00 0e 04 c0 \ a8 00 02 00 00 10 04 c0 a8 00 01 00 00 0c 01 06 \ 00 00 12 02 00 00 00 00 14 02 09 e6 00 00 00 00 \ 00 00 00 08 00 03 00 00 00 88 00 00 00 00 00 04 \ 27 d0 df 00 ff ff 00 05 00 00 00 4c 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 \ 00 00 00 00 00 00 00 3c 00 00 00 02 00 01 00 00 \ 02 06 50 54 00 00 00 05 00 00 04 06 50 54 00 00 \ 00 06 00 00 06 02 08 00 00 00 08 02 00 00 00 00 \ 0a 01 00 00 00 0e 04 c0 a8 00 02 00 00 10 04 c0 \ a8 00 01 00 00 0c 01 06 00 00 12 02 00 00 00 00 \ 14 02 09 e3 00 00 00 00 00 00 00 08 00 03 00 00 \ 00 88 00 00 00 00 00 03 2c d2 9c 00 ff ff 00 05 \ 00 00 00 4c 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 3c \ 00 00 00 02 00 01 00 00 02 06 50 54 00 00 00 05 \ 00 00 04 06 50 54 00 00 00 06 00 00 06 02 08 00 \ 00 00 08 02 00 00 00 00 0a 01 00 00 00 0e 04 c0 \ a8 00 02 00 00 10 04 c0 a8 00 01 00 00 0c 01 06 \ 00 00 12 02 00 00 00 00 14 02 09 e4 00 00 00 00 \ 00 00 00 08 00 03 00 00 00 88 00 00 00 00 00 00 \ 0a 40 83 00 ff ff 00 05 00 00 00 4c 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 \ 00 00 00 00 00 00 00 3c 00 00 00 02 00 03 00 00 \ 02 06 50 54 00 00 00 06 00 00 04 06 50 54 00 00 \ 00 05 00 00 06 02 08 00 00 00 08 02 00 00 00 00 \ 0a 01 00 00 00 0e 04 c0 a8 00 01 00 00 10 04 c0 \ a8 00 02 00 00 0c 01 06 00 00 12 02 09 e8 00 00 \ 14 02 00 00 00 00 00 00 00 00 00 08 00 01 00 00 \ 00 88 00 00 00 00 00 05 25 31 7c 00 ff ff 00 05 \ 00 00 00 4c 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 3c \ 00 00 00 02 00 01 00 00 02 06 50 54 00 00 00 05 \ 00 00 04 06 50 54 00 00 00 06 00 00 06 02 08 00 \ 00 00 08 02 00 00 00 00 0a 01 00 00 00 0e 04 c0 \ a8 00 02 00 00 10 04 c0 a8 00 01 00 00 0c 01 06 \ 00 00 12 02 00 00 00 00 14 02 09 e2 00 00 00 00 \ 00 00 00 08 00 03 00 00 00 88 00 00 00 00 00 00 \ 04 c4 b4 00 ff ff 00 05 00 00 00 4c 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 \ 00 00 00 00 00 00 00 3c 00 00 00 02 00 01 00 00 \ 02 06 50 54 00 00 00 05 00 00 04 06 50 54 00 00 \ 00 06 00 00 06 02 08 00 00 00 08 02 00 00 00 00 \ 0a 01 00 00 00 0e 04 c0 a8 00 02 00 00 10 04 c0 \ a8 00 01 00 00 0c 01 06 00 00 12 02 00 00 00 00 \ 14 02 09 e8 00 00 00 00 00 00 00 08 00 03 00 00 \ 00 88 00 00 00 00 00 01 39 38 70 00 ff ff 00 05 \ 00 00 00 4c 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 3c \ 00 00 00 02 00 03 00 00 02 06 50 54 00 00 00 06 \ 00 00 04 06 50 54 00 00 00 05 00 00 06 02 08 00 \ 00 00 08 02 00 00 00 00 0a 01 00 00 00 0e 04 c0 \ a8 00 01 00 00 10 04 c0 a8 00 02 00 00 0c 01 06 \ 00 00 12 02 09 e6 00 00 14 02 00 00 00 00 00 00 \ 00 00 00 08 00 01 00 00 00 60 00 00 00 00 00 e4 \ 2e 7d db 00 80 00 00 00 00 00 00 14 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 00 00 01 20 08 00 00 00 00 \ 00 00 01 c8 00 01 00 04 00 00 00 7b 00 00 00 00 \ ff ff 00 18 00 00 23 20 00 07 00 1f 00 01 00 04 \ 00 00 00 00 00 00 00 05 \ 00 30 01 00 00 00 0e 10 00 07 a1 20 80 00 00 00 \ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ 00 00 00 00 00 00 00 64 00 00 00 00 00 00 19 00 \ "], [0], [[NXST_FLOW reply (xid=0x4): cookie=0x0, duration=1.048s, table=0, n_packets=1, n_bytes=60, idle_timeout=5, priority=65535,tcp,in_port=3,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,tp_src=2535,tp_dst=0 actions=output:1 cookie=0x0, duration=3.84s, table=0, n_packets=1, n_bytes=60, idle_timeout=5, idle_age=2, priority=65535,tcp,in_port=3,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,tp_src=2532,tp_dst=0 actions=output:1 cookie=0x0, duration=2.872s, table=0, n_packets=1, n_bytes=60, idle_timeout=5, idle_age=4, priority=65535,tcp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,nw_src=192.168.0.2,nw_dst=192.168.0.1,nw_tos=0,tp_src=0,tp_dst=2533 actions=output:3 cookie=0x0, duration=4.756s, table=0, n_packets=1, n_bytes=60, idle_timeout=5, idle_age=0, priority=65535,tcp,in_port=3,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,tp_src=2531,tp_dst=0 actions=output:1 cookie=0x0, duration=2.88s, table=0, n_packets=1, n_bytes=60, idle_timeout=5, hard_timeout=10, idle_age=2, priority=65535,tcp,in_port=3,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,tp_src=2533,tp_dst=0 actions=output:1 cookie=0x0, duration=5.672s, table=0, n_packets=1, n_bytes=60, idle_timeout=5, priority=65535,tcp,in_port=3,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,tp_src=2530,tp_dst=0 actions=output:1 cookie=0x0, duration=1.04s, table=0, n_packets=1, n_bytes=60, idle_timeout=5, priority=65535,tcp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,nw_src=192.168.0.2,nw_dst=192.168.0.1,nw_tos=0,tp_src=0,tp_dst=2535 actions=output:3 cookie=0x0, duration=1.952s, table=0, n_packets=1, n_bytes=60, idle_timeout=5, priority=65535,tcp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,nw_src=192.168.0.2,nw_dst=192.168.0.1,nw_tos=0,tp_src=0,tp_dst=2534 actions=output:3 cookie=0x0, duration=4.668s, table=0, n_packets=1, n_bytes=60, idle_timeout=5, priority=65535,tcp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,nw_src=192.168.0.2,nw_dst=192.168.0.1,nw_tos=0,tp_src=0,tp_dst=2531 actions=output:3 cookie=0x0, duration=3.752s, table=0, n_packets=1, n_bytes=60, idle_timeout=5, priority=65535,tcp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,nw_src=192.168.0.2,nw_dst=192.168.0.1,nw_tos=0,tp_src=0,tp_dst=2532 actions=output:3 cookie=0x0, duration=0.172s, table=0, n_packets=1, n_bytes=60, idle_timeout=5, priority=65535,tcp,in_port=3,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,tp_src=2536,tp_dst=0 actions=output:1 cookie=0x0, duration=5.624s, table=0, n_packets=1, n_bytes=60, idle_timeout=5, priority=65535,tcp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,nw_src=192.168.0.2,nw_dst=192.168.0.1,nw_tos=0,tp_src=0,tp_dst=2530 actions=output:3 cookie=0x0, duration=0.08s, table=0, n_packets=1, n_bytes=60, idle_timeout=5, priority=65535,tcp,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:06,dl_dst=50:54:00:00:00:05,nw_src=192.168.0.2,nw_dst=192.168.0.1,nw_tos=0,tp_src=0,tp_dst=2536 actions=output:3 cookie=0x0, duration=1.96s, table=0, n_packets=1, n_bytes=60, idle_timeout=5, priority=65535,tcp,in_port=3,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,tp_src=2534,tp_dst=0 actions=output:1 cookie=0x0, duration=228.78s, table=0, n_packets=0, n_bytes=0, reg0=0x7b,tun_id=0x1c8 actions=load:0x5->NXM_NX_REG0[] cookie=0x0, duration=3600.0005s, table=1, n_packets=100, n_bytes=6400, actions=drop ]]) AT_CLEANUP AT_SETUP([NXST_AGGREGATE request]) AT_KEYWORDS([ofp-print OFPT_STATS_REQUEST]) AT_CHECK([ovs-ofctl ofp-print "\ 01 10 00 20 00 00 00 04 ff ff 00 00 00 00 23 20 \ 00 00 00 01 00 00 00 00 ff ff 00 00 ff 00 00 00 \ "], [0], [dnl NXST_AGGREGATE request (xid=0x4): @&t@ ]) AT_CLEANUP AT_SETUP([NXST_AGGREGATE reply]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 01 11 00 30 00 00 00 04 ff ff 00 00 00 00 23 20 \ 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 07 \ 00 00 00 00 00 00 01 a4 00 00 00 07 00 00 00 00 \ "], [0], [dnl NXST_AGGREGATE reply (xid=0x4): packet_count=7 byte_count=420 flow_count=7 ]) AT_CLEANUP AT_SETUP([NXST_FLOW_MONITOR request]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 01 10 00 40 00 00 00 04 ff ff 00 00 00 00 23 20 00 00 00 02 00 00 00 00 \ 00 00 40 00 00 3f ff fe 00 00 01 00 00 00 00 00 \ 00 00 20 00 00 04 ff ff 00 06 02 00 00 00 00 00 00 00 00 02 00 01 00 00 \ "], [0], [dnl NXST_FLOW_MONITOR request (xid=0x4): id=16384 flags=initial,add,delete,modify,actions,own out_port=LOCAL table=1 id=8192 flags=delete table=2 in_port=1 ]) AT_CLEANUP AT_SETUP([NXST_FLOW_MONITOR reply]) AT_KEYWORDS([ofp-print OFPT_STATS_REPLY]) AT_CHECK([ovs-ofctl ofp-print "\ 01 11 00 40 00 00 00 04 ff ff 00 00 00 00 23 20 00 00 00 02 00 00 00 00 \ 00 20 00 01 00 05 80 00 00 05 00 10 00 06 01 00 12 34 56 78 9a bc de f0 \ 00 00 00 02 00 01 00 00 \ 00 08 00 03 00 01 86 a0 \ "], [0], [dnl NXST_FLOW_MONITOR reply (xid=0x4): event=DELETED reason=eviction table=1 idle_timeout=5 hard_timeout=16 cookie=0x123456789abcdef0 in_port=1 event=ABBREV xid=0x186a0 ]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/ofp-util.at000066400000000000000000000030751226605124000207060ustar00rootroot00000000000000AT_BANNER([OpenFlow utilities]) AT_SETUP([encoding hellos]) dnl All versions up to a max version supported: AT_CHECK([ovs-ofctl encode-hello 0x2], [0], [dnl 00000000 01 00 00 08 00 00 00 01- OFPT_HELLO (xid=0x1): version bitmap: 0x01 ]) AT_CHECK([ovs-ofctl encode-hello 0x6], [0], [dnl 00000000 02 00 00 08 00 00 00 01- OFPT_HELLO (OF1.1) (xid=0x1): version bitmap: 0x01, 0x02 ]) AT_CHECK([ovs-ofctl encode-hello 0xe], [0], [dnl 00000000 03 00 00 08 00 00 00 01- OFPT_HELLO (OF1.2) (xid=0x1): version bitmap: 0x01, 0x02, 0x03 ]) AT_CHECK([ovs-ofctl encode-hello 0x1e], [0], [dnl 00000000 04 00 00 08 00 00 00 01- OFPT_HELLO (OF1.3) (xid=0x1): version bitmap: 0x01, 0x02, 0x03, 0x04 ]) AT_CHECK([ovs-ofctl encode-hello 0x3e], [0], [dnl 00000000 05 00 00 08 00 00 00 01- OFPT_HELLO (OF 0x05) (xid=0x1): version bitmap: 0x01, 0x02, 0x03, 0x04, 0x05 ]) dnl Some versions below max version missing. AT_CHECK([ovs-ofctl encode-hello 0xc], [0], [dnl 00000000 03 00 00 10 00 00 00 01-00 01 00 08 00 00 00 0c @&t@ OFPT_HELLO (OF1.2) (xid=0x1): version bitmap: 0x02, 0x03 ]) AT_CHECK([ovs-ofctl encode-hello 0xa], [0], [dnl 00000000 03 00 00 10 00 00 00 01-00 01 00 08 00 00 00 0a @&t@ OFPT_HELLO (OF1.2) (xid=0x1): version bitmap: 0x01, 0x03 ]) AT_CHECK([ovs-ofctl encode-hello 0x8], [0], [dnl 00000000 03 00 00 10 00 00 00 01-00 01 00 08 00 00 00 08 @&t@ OFPT_HELLO (OF1.2) (xid=0x1): version bitmap: 0x03 ]) AT_CHECK([ovs-ofctl encode-hello 0x4], [0], [dnl 00000000 02 00 00 10 00 00 00 01-00 01 00 08 00 00 00 04 @&t@ OFPT_HELLO (OF1.1) (xid=0x1): version bitmap: 0x02 ]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/ofproto-dpif.at000066400000000000000000004022161226605124000215570ustar00rootroot00000000000000AT_BANNER([ofproto-dpif]) AT_SETUP([ofproto-dpif - resubmit]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21]) AT_DATA([flows.txt], [dnl table=0 in_port=1 priority=1000 icmp actions=output(10),resubmit(2),output(19),resubmit(3),output(21) table=0 in_port=2 priority=1500 icmp actions=output(11),resubmit(,1),output(16),resubmit(2,1),output(18) table=0 in_port=3 priority=2000 icmp actions=output(20) table=1 in_port=1 priority=1000 icmp actions=output(12),resubmit(4,1),output(13),resubmit(3),output(15) table=1 in_port=2 priority=1500 icmp actions=output(17),resubmit(,2) table=1 in_port=3 priority=1500 icmp actions=output(14),resubmit(,2) ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=1,nw_tos=0,nw_ttl=128,icmp_type=8,icmp_code=0'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: 10,11,12,13,14,15,16,17,18,19,20,21 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - goto table]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [10], [11]) echo "table=0 in_port=1 actions=output(10),goto_table(1)" > flows.txt for i in `seq 1 63`; do echo "table=$i actions=goto_table($(($i+1)))"; done >> flows.txt echo "table=64 actions=output(11)" >> flows.txt AT_CHECK([ovs-ofctl -O OpenFlow12 add-flows br0 flows.txt]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=1,nw_tos=0,nw_ttl=128,icmp_type=8,icmp_code=0'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: 10,11 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - registers]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [20], [21], [22], [33], [90]) AT_DATA([flows.txt], [dnl in_port=90 actions=resubmit:2,resubmit:3,resubmit:4,resubmit:91 in_port=91 actions=resubmit:5,resubmit:6,resubmit:7,resubmit:92 in_port=92 actions=resubmit:8,resubmit:9,resubmit:10,resubmit:11,resubmit:93 in_port=93 actions=resubmit:12,resubmit:13,resubmit:14,resubmit:15 in_port=2 actions=load:0x000db000->NXM_NX_REG0[[]] in_port=3 actions=load:0xdea->NXM_NX_REG0[[20..31]] in_port=4 actions=load:0xeef->NXM_NX_REG0[[0..11]] in_port=5 actions=move:NXM_NX_REG0[[]]->NXM_NX_REG1[[]] in_port=6 actions=load:0x22222222->NXM_NX_REG2[[]] in_port=7 actions=move:NXM_NX_REG1[[20..31]]->NXM_NX_REG2[[0..11]] in_port=8 actions=move:NXM_NX_REG1[[0..11]]->NXM_NX_REG2[[20..31]] in_port=9,reg0=0xdeadbeef actions=output:20 in_port=10,reg1=0xdeadbeef actions=output:21 in_port=11,reg2=0xeef22dea actions=output:22 dnl Sanilty check all registers in_port=12 actions=load:0x10->NXM_NX_REG0[[]],load:0x11->NXM_NX_REG1[[]],load:0x12->NXM_NX_REG2[[]] in_port=13 actions=load:0x13->NXM_NX_REG3[[]],load:0x14->NXM_NX_REG4[[]],load:0x15->NXM_NX_REG5[[]] in_port=14 actions=load:0x16->NXM_NX_REG6[[]],load:0x17->NXM_NX_REG7[[]] in_port=15,reg0=0x10,reg1=0x11,reg2=0x12,reg3=0x13,reg4=0x14,reg5=0x15,reg6=0x16,reg7=0x17 actions=output:33 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: 20,21,22,33 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - push-pop]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [20], [21], [22], [33], [90]) AT_DATA([flows.txt], [dnl in_port=90 actions=load:20->NXM_NX_REG0[[0..7]],load:21->NXM_NX_REG1[[0..7]],load:22->NXM_NX_REG2[[0..7]], load:33->NXM_NX_REG3[[0..7]], push:NXM_NX_REG0[[]], push:NXM_NX_REG1[[0..7]],push:NXM_NX_REG2[[0..15]], push:NXM_NX_REG3[[]], resubmit:2, resubmit:3, resubmit:4, resubmit:5 in_port=2 actions=pop:NXM_NX_REG0[[0..7]],output:NXM_NX_REG0[[]] in_port=3 actions=pop:NXM_NX_REG1[[0..7]],output:NXM_NX_REG1[[]] in_port=4 actions=pop:NXM_NX_REG2[[0..15]],output:NXM_NX_REG2[[]] in_port=5 actions=pop:NXM_NX_REG3[[]],output:NXM_NX_REG3[[]] ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: 33,22,21,20 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - output]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [9], [10], [11], [55], [66], [77], [88]) AT_DATA([flows.txt], [dnl in_port=1 actions=resubmit:2,resubmit:3,resubmit:4,resubmit:5,resubmit:6,resubmit:7,resubmit:8 in_port=2 actions=output:9 in_port=3 actions=load:55->NXM_NX_REG0[[]],output:NXM_NX_REG0[[]],load:66->NXM_NX_REG1[[]] in_port=4 actions=output:10,output:NXM_NX_REG0[[]],output:NXM_NX_REG1[[]],output:11 in_port=5 actions=load:77->NXM_NX_REG0[[0..15]],load:88->NXM_NX_REG0[[16..31]] in_port=6 actions=output:NXM_NX_REG0[[0..15]],output:NXM_NX_REG0[[16..31]] in_port=7 actions=load:0x110000ff->NXM_NX_REG0[[]],output:NXM_NX_REG0[[]] in_port=8 actions=1,9,load:9->NXM_OF_IN_PORT[[]],1,9 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: 9,55,10,55,66,11,77,88,9,1 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - dec_ttl]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2], [3], [4]) AT_DATA([flows.txt], [dnl table=0 in_port=1 action=dec_ttl,output:2,resubmit(1,1),output:4 table=1 in_port=1 action=dec_ttl,output:3 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=2,frag=no)' -generate], [0], [stdout]) AT_CHECK([tail -3 stdout], [0], [Datapath actions: set(ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=1,frag=no)),2,4 This flow is handled by the userspace slow path because it: - Sends "packet-in" messages to the OpenFlow controller. ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=3,frag=no)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: set(ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=2,frag=no)),2,set(ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=1,frag=no)),3,4 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=10,tclass=0x70,hlimit=128,frag=no)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: set(ipv6(src=::1,dst=::2,label=0,proto=10,tclass=0x70,hlimit=127,frag=no)),2,set(ipv6(src=::1,dst=::2,label=0,proto=10,tclass=0x70,hlimit=126,frag=no)),3,4 ]) AT_CAPTURE_FILE([ofctl_monitor.log]) AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl --detach --no-chdir --pidfile 2> ofctl_monitor.log]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=2,frag=no)' -generate], [0], [stdout]) OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit]) AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): table_id=1 total_len=42 in_port=1 (via invalid_ttl) data_len=42 (unbuffered) icmp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=1,icmp_type=0,icmp_code=0 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - output, OFPP_NONE ingress port]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2]) AT_CHECK([ovs-ofctl add-flow br0 action=normal]) # "in_port" defaults to OFPP_NONE if it's not specified. flow="icmp,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,icmp_type=8,icmp_code=0" AT_CHECK([ovs-appctl ofproto/trace br0 "$flow"], [0], [stdout]) AT_CHECK([tail -1 stdout | sed 's/Datapath actions: //' | tr "," "\n" | sort -n], [0], [dnl 1 2 100 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - DSCP]) OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=dummy]) ADD_OF_PORTS([br0], [9]) AT_DATA([flows.txt], [dnl actions=output:LOCAL,enqueue:1:1,enqueue:1:2,enqueue:1:2,enqueue:1:1,output:1,mod_nw_tos:0,output:1,output:LOCAL ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-vsctl -- \ set Port p1 qos=@newqos --\ --id=@newqos create QoS type=linux-htb queues=1=@q1,2=@q2 --\ --id=@q1 create Queue dscp=1 --\ --id=@q2 create Queue dscp=2], [0], [ignore]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(9),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=1.1.1.1,dst=2.2.2.2,proto=1,tos=0xff,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: dnl 100,dnl set(ipv4(src=1.1.1.1,dst=2.2.2.2,proto=1,tos=0x7,ttl=128,frag=no)),set(skb_priority(0x1)),1,dnl set(ipv4(src=1.1.1.1,dst=2.2.2.2,proto=1,tos=0xb,ttl=128,frag=no)),set(skb_priority(0x2)),1,dnl 1,dnl set(ipv4(src=1.1.1.1,dst=2.2.2.2,proto=1,tos=0x7,ttl=128,frag=no)),set(skb_priority(0x1)),1,dnl set(ipv4(src=1.1.1.1,dst=2.2.2.2,proto=1,tos=0xff,ttl=128,frag=no)),set(skb_priority(0)),1,dnl set(ipv4(src=1.1.1.1,dst=2.2.2.2,proto=1,tos=0x3,ttl=128,frag=no)),1,dnl 100 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - output/flood flags]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2], [3], [4], [5], [6], [7]) AT_DATA([flows.txt], [dnl in_port=local actions=local,flood in_port=1 actions=flood in_port=2 actions=all in_port=3 actions=output:LOCAL,output:1,output:2,output:3,output:4,output:5,output:6,output:7 in_port=4 actions=enqueue:LOCAL:1,enqueue:1:1,enqueue:2:1,enqueue:3:2,enqueue:4:1,enqueue:5:1,enqueue:6:1,enqueue:7:1 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-ofctl mod-port br0 5 noforward]) AT_CHECK([ovs-ofctl mod-port br0 6 noflood]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(100),eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:02),eth_type(0x0900)'], [0], [stdout]) AT_CHECK([tail -1 stdout \ | sed -e 's/Datapath actions: //' | tr ',' '\n' | sort], [0], [dnl 1 2 3 4 7 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:02),eth_type(0x0900)'], [0], [stdout]) AT_CHECK([tail -1 stdout \ | sed -e 's/Datapath actions: //' | tr ',' '\n' | sort], [0], [dnl 100 2 3 4 7 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:02),eth_type(0x0900)'], [0], [stdout]) AT_CHECK([tail -1 stdout \ | sed -e 's/Datapath actions: //' | tr ',' '\n' | sort], [0], [dnl 1 100 3 4 6 7 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(3),eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:02),eth_type(0x0900)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: 100,1,2,4,6,7 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(4),eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:02),eth_type(0x0900)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: set(skb_priority(0x1)),100,1,2,set(skb_priority(0x2)),3,set(skb_priority(0x1)),6,7 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - controller]) OVS_VSWITCHD_START([dnl add-port br0 p1 -- set Interface p1 type=dummy ]) ON_EXIT([kill `cat ovs-ofctl.pid`]) AT_CAPTURE_FILE([ofctl_monitor.log]) AT_DATA([flows.txt], [dnl cookie=0x0 dl_src=10:11:11:11:11:11 actions=controller cookie=0x1 dl_src=20:22:22:22:22:22 actions=controller,resubmit(80,1) cookie=0x2 dl_src=30:33:33:33:33:33 actions=mod_vlan_vid:15,controller cookie=0x3 table=1 in_port=80 actions=load:1->NXM_NX_REG0[[]],mod_vlan_vid:80,controller,resubmit(81,2) cookie=0x4 table=2 in_port=81 actions=load:2->NXM_NX_REG1[[]],mod_dl_src:80:81:81:81:81:81,controller,resubmit(82,3) cookie=0x5 table=3 in_port=82 actions=load:3->NXM_NX_REG2[[]],mod_dl_dst:82:82:82:82:82:82,controller,resubmit(83,4) cookie=0x6 table=4 in_port=83 actions=load:4->NXM_NX_REG3[[]],mod_nw_src:83.83.83.83,controller,resubmit(84,5) cookie=0x7 table=5 in_port=84 actions=load:5->NXM_NX_REG4[[]],load:6->NXM_NX_TUN_ID[[]],mod_nw_dst:84.84.84.84,controller,resubmit(85,6) cookie=0x8 table=6 in_port=85 actions=mod_tp_src:85,controller,resubmit(86,7) cookie=0x9 table=7 in_port=86 actions=mod_tp_dst:86,controller,controller cookie=0xa dl_src=40:44:44:44:44:41 actions=mod_vlan_vid:99,mod_vlan_pcp:1,controller cookie=0xa dl_src=40:44:44:44:44:42 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],controller cookie=0xa dl_src=40:44:44:44:44:43 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],controller cookie=0xa dl_src=40:44:44:44:44:44 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],controller cookie=0xa dl_src=40:44:44:44:44:45 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],dec_mpls_ttl,controller cookie=0xa dl_src=40:44:44:44:44:46 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],set_mpls_ttl(10),controller cookie=0xa dl_src=40:44:44:44:44:47 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],dec_mpls_ttl,set_mpls_ttl(10),controller cookie=0xa dl_src=40:44:44:44:44:48 actions=push_mpls:0x8847,load:10->OXM_OF_MPLS_LABEL[[]],load:3->OXM_OF_MPLS_TC[[]],set_mpls_ttl(10),dec_mpls_ttl,controller cookie=0xb dl_src=50:55:55:55:55:55 dl_type=0x8847 actions=load:1000->OXM_OF_MPLS_LABEL[[]],controller cookie=0xd dl_src=60:66:66:66:66:66 actions=pop_mpls:0x0800,controller cookie=0xc dl_src=70:77:77:77:77:77 actions=push_mpls:0x8848,load:1000->OXM_OF_MPLS_LABEL[[]],load:7->OXM_OF_MPLS_TC[[]],controller ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) dnl Flow miss. AT_CHECK([ovs-ofctl monitor -P openflow10 br0 65534 --detach --no-chdir --pidfile 2> ofctl_monitor.log]) for i in 1 2 3 ; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)' done OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) ovs-appctl -t ovs-ofctl exit AT_CHECK([cat ofctl_monitor.log], [0], [dnl OFPT_PACKET_IN (xid=0x0): total_len=60 in_port=1 (via no_match) data_len=60 (unbuffered) tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=9 tcp_csum:0 dnl OFPT_PACKET_IN (xid=0x0): total_len=60 in_port=1 (via no_match) data_len=60 (unbuffered) tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=9 tcp_csum:0 dnl OFPT_PACKET_IN (xid=0x0): total_len=60 in_port=1 (via no_match) data_len=60 (unbuffered) tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=9 tcp_csum:0 ]) dnl Singleton controller action. AT_CHECK([ovs-ofctl monitor -P openflow10 br0 65534 --detach --no-chdir --pidfile 2> ofctl_monitor.log]) for i in 1 2 3 ; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=10:11:11:11:11:11,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=10)' done OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) ovs-appctl -t ovs-ofctl exit AT_CHECK([cat ofctl_monitor.log], [0], [dnl OFPT_PACKET_IN (xid=0x0): total_len=60 in_port=1 (via action) data_len=60 (unbuffered) tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=10:11:11:11:11:11,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=10 tcp_csum:0 dnl OFPT_PACKET_IN (xid=0x0): total_len=60 in_port=1 (via action) data_len=60 (unbuffered) tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=10:11:11:11:11:11,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=10 tcp_csum:0 dnl OFPT_PACKET_IN (xid=0x0): total_len=60 in_port=1 (via action) data_len=60 (unbuffered) tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=10:11:11:11:11:11,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=10 tcp_csum:0 ]) dnl Modified controller action. AT_CHECK([ovs-ofctl monitor -P openflow10 br0 65534 --detach --no-chdir --pidfile 2> ofctl_monitor.log]) for i in 1 2 3 ; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=30:33:33:33:33:33,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=10)' done OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) ovs-appctl -t ovs-ofctl exit AT_CHECK([cat ofctl_monitor.log], [0], [dnl OFPT_PACKET_IN (xid=0x0): total_len=64 in_port=1 (via action) data_len=64 (unbuffered) tcp,metadata=0,in_port=0,dl_vlan=15,dl_vlan_pcp=0,dl_src=30:33:33:33:33:33,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=10 tcp_csum:0 dnl OFPT_PACKET_IN (xid=0x0): total_len=64 in_port=1 (via action) data_len=64 (unbuffered) tcp,metadata=0,in_port=0,dl_vlan=15,dl_vlan_pcp=0,dl_src=30:33:33:33:33:33,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=10 tcp_csum:0 dnl OFPT_PACKET_IN (xid=0x0): total_len=64 in_port=1 (via action) data_len=64 (unbuffered) tcp,metadata=0,in_port=0,dl_vlan=15,dl_vlan_pcp=0,dl_src=30:33:33:33:33:33,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=10 tcp_csum:0 ]) dnl Modified VLAN controller action. AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log]) for i in 1 2 3; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:44:41,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)' done OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) ovs-appctl -t ovs-ofctl exit AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) tcp,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:44:41,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64 tcp_csum:0 dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) tcp,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:44:41,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64 tcp_csum:0 dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) tcp,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=1,dl_src=40:44:44:44:44:41,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64 tcp_csum:0 ]) dnl Modified MPLS controller action. AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log]) for i in 1 2 3; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:44:42,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)' done OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) ovs-appctl -t ovs-ofctl exit AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:42,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1 dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:42,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1 dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:42,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1 ]) dnl Modified MPLS controller action. AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log]) dnl in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x8847),mpls(label=100,tc=3,ttl=64,bos=1) for i in 1 2 3; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:44:43,dst=50:54:00:00:00:07),eth_type(0x8847),mpls(label=11,tc=3,ttl=64,bos=1)' done OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit]) AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:43,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=0 dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:43,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=0 dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:43,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=0 ]) dnl Modified MPLS controller action. AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log]) for i in 1 2 3; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:44:44,dst=50:54:00:00:00:07),eth_type(0x8100),vlan(vid=99,pcp=7),encap(eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no))' done OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) ovs-appctl -t ovs-ofctl exit AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=7,dl_src=40:44:44:44:44:44,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1 dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=7,dl_src=40:44:44:44:44:44,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1 dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,dl_vlan=99,dl_vlan_pcp=7,dl_src=40:44:44:44:44:44,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=64,mpls_bos=1 ]) dnl Modified MPLS controller action. AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log]) for i in 1 2 3; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:44:45,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)' done OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) ovs-appctl -t ovs-ofctl exit AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:45,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=63,mpls_bos=1 dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:45,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=63,mpls_bos=1 dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:45,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=63,mpls_bos=1 ]) dnl Modified MPLS controller action. AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log]) for i in 1 2 3; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:44:46,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)' done OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) ovs-appctl -t ovs-ofctl exit AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:46,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=10,mpls_bos=1 dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:46,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=10,mpls_bos=1 dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:46,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=10,mpls_bos=1 ]) dnl Modified MPLS controller action. AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log]) for i in 1 2 3; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:44:47,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)' done OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) ovs-appctl -t ovs-ofctl exit AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:47,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=10,mpls_bos=1 dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:47,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=10,mpls_bos=1 dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:47,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=10,mpls_bos=1 ]) dnl Modified MPLS controller action. AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log]) for i in 1 2 3; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=40:44:44:44:44:48,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no)' done OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) ovs-appctl -t ovs-ofctl exit AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:48,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=9,mpls_bos=1 dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:48,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=9,mpls_bos=1 dnl NXT_PACKET_IN (xid=0x0): cookie=0xa total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=40:44:44:44:44:48,dl_dst=50:54:00:00:00:07,mpls_label=10,mpls_tc=3,mpls_ttl=9,mpls_bos=1 ]) dnl Modified MPLS actions. AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log]) for i in 1 2 3; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:55:55:55:55:55,dst=50:54:00:00:00:07),eth_type(0x8847),mpls(label=100,tc=7,ttl=64,bos=1)' done OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) ovs-appctl -t ovs-ofctl exit AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0xb total_len=60 in_port=1 (via action) data_len=60 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=50:55:55:55:55:55,dl_dst=50:54:00:00:00:07,mpls_label=1000,mpls_tc=7,mpls_ttl=64,mpls_bos=1 dnl NXT_PACKET_IN (xid=0x0): cookie=0xb total_len=60 in_port=1 (via action) data_len=60 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=50:55:55:55:55:55,dl_dst=50:54:00:00:00:07,mpls_label=1000,mpls_tc=7,mpls_ttl=64,mpls_bos=1 dnl NXT_PACKET_IN (xid=0x0): cookie=0xb total_len=60 in_port=1 (via action) data_len=60 (unbuffered) mpls,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=50:55:55:55:55:55,dl_dst=50:54:00:00:00:07,mpls_label=1000,mpls_tc=7,mpls_ttl=64,mpls_bos=1 ]) dnl Modified MPLS ipv6 controller action. AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log]) for i in 1 2 3; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=70:77:77:77:77:77,dst=50:54:00:00:00:07),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=10,tclass=0x70,hlimit=128,frag=no)' done OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) ovs-appctl -t ovs-ofctl exit AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0xc total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mplsm,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=70:77:77:77:77:77,dl_dst=50:54:00:00:00:07,mpls_label=1000,mpls_tc=7,mpls_ttl=64,mpls_bos=1 dnl NXT_PACKET_IN (xid=0x0): cookie=0xc total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mplsm,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=70:77:77:77:77:77,dl_dst=50:54:00:00:00:07,mpls_label=1000,mpls_tc=7,mpls_ttl=64,mpls_bos=1 dnl NXT_PACKET_IN (xid=0x0): cookie=0xc total_len=64 in_port=1 (via action) data_len=64 (unbuffered) mplsm,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=70:77:77:77:77:77,dl_dst=50:54:00:00:00:07,mpls_label=1000,mpls_tc=7,mpls_ttl=64,mpls_bos=1 ]) dnl Modified MPLS pop action. dnl The input is a frame with two MPLS headers which tcpdump -vve shows as: dnl 60:66:66:66:66:66 > 50:54:00:00:00:07, ethertype MPLS multicast (0x8847), length 66: MPLS (label 20, exp 0, ttl 32) dnl (tos 0x0, ttl 64, id 0, offset 0, flags [none], proto TCP (6), length 44) AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --pidfile 2> ofctl_monitor.log]) for i in 1 2 3; do ovs-appctl netdev-dummy/receive p1 '50 54 00 00 00 07 60 66 66 66 66 66 88 47 00 01 41 20 45 00 00 2c 00 00 00 00 40 06 3b 78 c0 a8 00 01 c0 a8 00 02 00 50 00 00 00 00 00 2a 00 00 00 2a 50 00 27 10 77 44 00 00 48 4f 47 45' done #for i in 2 3; do # ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=60:66:66:66:66:66,dst=50:54:00:00:00:07),eth_type(0x8847),mpls(label=10,tc=3,ttl=100,bos=1)' #done OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) ovs-appctl -t ovs-ofctl exit AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0xd total_len=58 in_port=1 (via action) data_len=58 (unbuffered) tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=60:66:66:66:66:66,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=80,tp_dst=0 tcp_csum:7744 dnl NXT_PACKET_IN (xid=0x0): cookie=0xd total_len=58 in_port=1 (via action) data_len=58 (unbuffered) tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=60:66:66:66:66:66,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=80,tp_dst=0 tcp_csum:7744 dnl NXT_PACKET_IN (xid=0x0): cookie=0xd total_len=58 in_port=1 (via action) data_len=58 (unbuffered) tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=60:66:66:66:66:66,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=80,tp_dst=0 tcp_csum:7744 ]) dnl Checksum TCP. AT_CHECK([ovs-ofctl monitor br0 65534 -P nxm --detach --no-chdir --pidfile 2> ofctl_monitor.log]) for i in 1 ; do ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=20:22:22:22:22:22,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=11)' done OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 18]) ovs-appctl -t ovs-ofctl exit AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0x1 total_len=60 in_port=1 (via action) data_len=60 (unbuffered) tcp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=20:22:22:22:22:22,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=11 tcp_csum:0 dnl NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x3 total_len=64 in_port=1 reg0=0x1 (via action) data_len=64 (unbuffered) tcp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=20:22:22:22:22:22,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=11 tcp_csum:0 dnl NXT_PACKET_IN (xid=0x0): table_id=2 cookie=0x4 total_len=64 in_port=1 reg0=0x1 reg1=0x2 (via action) data_len=64 (unbuffered) tcp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=11 tcp_csum:0 dnl NXT_PACKET_IN (xid=0x0): table_id=3 cookie=0x5 total_len=64 in_port=1 reg0=0x1 reg1=0x2 reg2=0x3 (via action) data_len=64 (unbuffered) tcp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=11 tcp_csum:0 dnl NXT_PACKET_IN (xid=0x0): table_id=4 cookie=0x6 total_len=64 in_port=1 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 (via action) data_len=64 (unbuffered) tcp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=11 tcp_csum:1a03 dnl NXT_PACKET_IN (xid=0x0): table_id=5 cookie=0x7 total_len=64 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) tcp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=8,tp_dst=11 tcp_csum:3205 dnl NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x8 total_len=64 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) tcp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=85,tp_dst=11 tcp_csum:31b8 dnl NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) tcp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=85,tp_dst=86 tcp_csum:316d dnl NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) tcp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=85,tp_dst=86 tcp_csum:316d ]) dnl Checksum UDP. AT_CHECK([ovs-ofctl monitor br0 65534 --detach --no-chdir --pidfile 2> ofctl_monitor.log]) for i in 1 ; do ovs-appctl netdev-dummy/receive p1 '50 54 00 00 00 07 20 22 22 22 22 22 08 00 45 00 00 1C 00 00 00 00 00 11 00 00 C0 A8 00 01 C0 A8 00 02 00 08 00 0B 00 00 12 34 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00' done OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 18]) ovs-appctl -t ovs-ofctl exit AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0x1 total_len=60 in_port=1 (via action) data_len=60 (unbuffered) udp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=20:22:22:22:22:22,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=8,tp_dst=11 udp_csum:1234 dnl NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x3 total_len=64 in_port=1 reg0=0x1 (via action) data_len=64 (unbuffered) udp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=20:22:22:22:22:22,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=8,tp_dst=11 udp_csum:1234 dnl NXT_PACKET_IN (xid=0x0): table_id=2 cookie=0x4 total_len=64 in_port=1 reg0=0x1 reg1=0x2 (via action) data_len=64 (unbuffered) udp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=8,tp_dst=11 udp_csum:1234 dnl NXT_PACKET_IN (xid=0x0): table_id=3 cookie=0x5 total_len=64 in_port=1 reg0=0x1 reg1=0x2 reg2=0x3 (via action) data_len=64 (unbuffered) udp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=8,tp_dst=11 udp_csum:1234 dnl NXT_PACKET_IN (xid=0x0): table_id=4 cookie=0x6 total_len=64 in_port=1 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 (via action) data_len=64 (unbuffered) udp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=8,tp_dst=11 udp_csum:2c37 dnl NXT_PACKET_IN (xid=0x0): table_id=5 cookie=0x7 total_len=64 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) udp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=8,tp_dst=11 udp_csum:4439 dnl NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x8 total_len=64 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) udp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=11 udp_csum:43ec dnl NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) udp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=86 udp_csum:43a1 dnl NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=64 (unbuffered) udp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=86 udp_csum:43a1 ]) AT_CHECK([ovs-appctl time/warp 5000], [0], [ignore]) dnl Checksum SCTP. AT_CHECK([ovs-ofctl monitor br0 65534 --detach --no-chdir --pidfile 2> ofctl_monitor.log]) for i in 1 ; do ovs-appctl netdev-dummy/receive p1 '50 54 00 00 00 07 20 22 22 22 22 22 08 00 45 00 00 24 00 00 00 00 00 84 00 00 C0 A8 00 01 C0 A8 00 02 04 58 08 af 00 00 00 00 d9 d7 91 57 01 00 00 34 cf 28 ec 4e 00 01 40 00 00 0a ff ff b7 53 24 19 00 05 00 08 7f 00 00 01 00 05 00 08 c0 a8 02 07 00 0c 00 06 00 05 00 00 80 00 00 04 c0 00 00 04' done OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit]) AT_CHECK([cat ofctl_monitor.log], [0], [dnl NXT_PACKET_IN (xid=0x0): cookie=0x1 total_len=98 in_port=1 (via action) data_len=98 (unbuffered) sctp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=20:22:22:22:22:22,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=1112,tp_dst=2223 sctp_csum:d9d79157 dnl NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x3 total_len=102 in_port=1 reg0=0x1 (via action) data_len=102 (unbuffered) sctp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=20:22:22:22:22:22,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=1112,tp_dst=2223 sctp_csum:d9d79157 dnl NXT_PACKET_IN (xid=0x0): table_id=2 cookie=0x4 total_len=102 in_port=1 reg0=0x1 reg1=0x2 (via action) data_len=102 (unbuffered) sctp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=1112,tp_dst=2223 sctp_csum:d9d79157 dnl NXT_PACKET_IN (xid=0x0): table_id=3 cookie=0x5 total_len=102 in_port=1 reg0=0x1 reg1=0x2 reg2=0x3 (via action) data_len=102 (unbuffered) sctp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=1112,tp_dst=2223 sctp_csum:d9d79157 dnl NXT_PACKET_IN (xid=0x0): table_id=4 cookie=0x6 total_len=102 in_port=1 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 (via action) data_len=102 (unbuffered) sctp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=192.168.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=1112,tp_dst=2223 sctp_csum:d9d79157 dnl NXT_PACKET_IN (xid=0x0): table_id=5 cookie=0x7 total_len=102 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=102 (unbuffered) sctp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=1112,tp_dst=2223 sctp_csum:d9d79157 dnl NXT_PACKET_IN (xid=0x0): table_id=6 cookie=0x8 total_len=102 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=102 (unbuffered) sctp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=2223 sctp_csum:7f12662e dnl NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=102 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=102 (unbuffered) sctp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=86 sctp_csum:a7e86f67 dnl NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=102 in_port=1 tun_id=0x6 reg0=0x1 reg1=0x2 reg2=0x3 reg3=0x4 reg4=0x5 (via action) data_len=102 (unbuffered) sctp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=86 sctp_csum:a7e86f67 ]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, n_packets=3, n_bytes=218, dl_src=20:22:22:22:22:22 actions=CONTROLLER:65535,resubmit(80,1) cookie=0x2, n_packets=3, n_bytes=180, dl_src=30:33:33:33:33:33 actions=mod_vlan_vid:15,CONTROLLER:65535 cookie=0x3, table=1, n_packets=3, n_bytes=218, in_port=80 actions=load:0x1->NXM_NX_REG0[[]],mod_vlan_vid:80,CONTROLLER:65535,resubmit(81,2) cookie=0x4, table=2, n_packets=3, n_bytes=218, in_port=81 actions=load:0x2->NXM_NX_REG1[[]],mod_dl_src:80:81:81:81:81:81,CONTROLLER:65535,resubmit(82,3) cookie=0x5, table=3, n_packets=3, n_bytes=218, in_port=82 actions=load:0x3->NXM_NX_REG2[[]],mod_dl_dst:82:82:82:82:82:82,CONTROLLER:65535,resubmit(83,4) cookie=0x6, table=4, n_packets=3, n_bytes=218, in_port=83 actions=load:0x4->NXM_NX_REG3[[]],mod_nw_src:83.83.83.83,CONTROLLER:65535,resubmit(84,5) cookie=0x7, table=5, n_packets=3, n_bytes=218, in_port=84 actions=load:0x5->NXM_NX_REG4[[]],load:0x6->NXM_NX_TUN_ID[[]],mod_nw_dst:84.84.84.84,CONTROLLER:65535,resubmit(85,6) cookie=0x8, table=6, n_packets=3, n_bytes=218, in_port=85 actions=mod_tp_src:85,CONTROLLER:65535,resubmit(86,7) cookie=0x9, table=7, n_packets=3, n_bytes=218, in_port=86 actions=mod_tp_dst:86,CONTROLLER:65535,CONTROLLER:65535 cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:44:41 actions=mod_vlan_vid:99,mod_vlan_pcp:1,CONTROLLER:65535 cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:44:42 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],CONTROLLER:65535 cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:44:43 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],CONTROLLER:65535 cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:44:44 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],CONTROLLER:65535 cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:44:45 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],dec_mpls_ttl,CONTROLLER:65535 cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:44:46 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],set_mpls_ttl(10),CONTROLLER:65535 cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:44:47 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],dec_mpls_ttl,set_mpls_ttl(10),CONTROLLER:65535 cookie=0xa, n_packets=3, n_bytes=180, dl_src=40:44:44:44:44:48 actions=push_mpls:0x8847,load:0xa->OXM_OF_MPLS_LABEL[[]],load:0x3->OXM_OF_MPLS_TC[[]],set_mpls_ttl(10),dec_mpls_ttl,CONTROLLER:65535 cookie=0xb, n_packets=3, n_bytes=180, mpls,dl_src=50:55:55:55:55:55 actions=load:0x3e8->OXM_OF_MPLS_LABEL[[]],CONTROLLER:65535 cookie=0xc, n_packets=3, n_bytes=180, dl_src=70:77:77:77:77:77 actions=push_mpls:0x8848,load:0x3e8->OXM_OF_MPLS_LABEL[[]],load:0x7->OXM_OF_MPLS_TC[[]],CONTROLLER:65535 cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:66:66 actions=pop_mpls:0x0800,CONTROLLER:65535 n_packets=3, n_bytes=180, dl_src=10:11:11:11:11:11 actions=CONTROLLER:65535 NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - VLAN handling]) OVS_VSWITCHD_START( [set Bridge br0 fail-mode=standalone -- \ add-port br0 p1 trunks=10,12 -- \ add-port br0 p2 tag=10 -- \ add-port br0 p3 tag=12 \ other-config:priority-tags=true -- \ add-port br0 p4 tag=12 -- \ add-port br0 p5 vlan_mode=native-tagged tag=10 -- \ add-port br0 p6 vlan_mode=native-tagged tag=10 trunks=10,12 -- \ add-port br0 p7 vlan_mode=native-untagged tag=12 -- \ add-port br0 p8 vlan_mode=native-untagged tag=12 trunks=10,12 \ other-config:priority-tags=true -- \ set Interface p1 type=dummy -- \ set Interface p2 type=dummy -- \ set Interface p3 type=dummy -- \ set Interface p4 type=dummy -- \ set Interface p5 type=dummy -- \ set Interface p6 type=dummy -- \ set Interface p7 type=dummy -- \ set Interface p8 type=dummy --]) dnl Each of these specifies an in_port by number, a VLAN VID (or "none"), dnl a VLAN PCP (used if the VID isn't "none") and the expected set of datapath dnl actions. for tuple in \ "100 none 0 drop" \ "100 0 0 drop" \ "100 0 1 drop" \ "100 10 0 1,5,6,7,8,pop_vlan,2" \ "100 10 1 1,5,6,7,8,pop_vlan,2" \ "100 11 0 5,7" \ "100 11 1 5,7" \ "100 12 0 1,5,6,pop_vlan,3,4,7,8" \ "100 12 1 1,5,6,pop_vlan,4,7,push_vlan(vid=0,pcp=1),3,8" \ "1 none 0 drop" \ "1 0 0 drop" \ "1 0 1 drop" \ "1 10 0 5,6,7,8,100,pop_vlan,2" \ "1 10 1 5,6,7,8,100,pop_vlan,2" \ "1 11 0 drop" \ "1 11 1 drop" \ "1 12 0 5,6,100,pop_vlan,3,4,7,8" \ "1 12 1 5,6,100,pop_vlan,4,7,push_vlan(vid=0,pcp=1),3,8" \ "2 none 0 push_vlan(vid=10,pcp=0),1,5,6,7,8,100" \ "2 0 0 pop_vlan,push_vlan(vid=10,pcp=0),1,5,6,7,8,100" \ "2 0 1 pop_vlan,push_vlan(vid=10,pcp=1),1,5,6,7,8,100" \ "2 10 0 drop" \ "2 10 1 drop" \ "2 11 0 drop" \ "2 11 1 drop" \ "2 12 0 drop" \ "2 12 1 drop" \ "3 none 0 4,7,8,push_vlan(vid=12,pcp=0),1,5,6,100" \ "3 0 0 pop_vlan,4,7,8,push_vlan(vid=12,pcp=0),1,5,6,100" \ "3 0 1 8,pop_vlan,4,7,push_vlan(vid=12,pcp=1),1,5,6,100" \ "3 10 0 drop" \ "3 10 1 drop" \ "3 11 0 drop" \ "3 11 1 drop" \ "3 12 0 drop" \ "3 12 1 drop" \ "4 none 0 3,7,8,push_vlan(vid=12,pcp=0),1,5,6,100" \ "4 0 0 pop_vlan,3,7,8,push_vlan(vid=12,pcp=0),1,5,6,100" \ "4 0 1 3,8,pop_vlan,7,push_vlan(vid=12,pcp=1),1,5,6,100" \ "4 10 0 drop" \ "4 10 1 drop" \ "4 11 0 drop" \ "4 11 1 drop" \ "4 12 0 drop" \ "4 12 1 drop" \ "5 none 0 2,push_vlan(vid=10,pcp=0),1,6,7,8,100" \ "5 0 0 pop_vlan,2,push_vlan(vid=10,pcp=0),1,6,7,8,100" \ "5 0 1 pop_vlan,2,push_vlan(vid=10,pcp=1),1,6,7,8,100" \ "5 10 0 1,6,7,8,100,pop_vlan,2" \ "5 10 1 1,6,7,8,100,pop_vlan,2" \ "5 11 0 7,100" \ "5 11 1 7,100" \ "5 12 0 1,6,100,pop_vlan,3,4,7,8" \ "5 12 1 1,6,100,pop_vlan,4,7,push_vlan(vid=0,pcp=1),3,8" \ "6 none 0 2,push_vlan(vid=10,pcp=0),1,5,7,8,100" \ "6 0 0 pop_vlan,2,push_vlan(vid=10,pcp=0),1,5,7,8,100" \ "6 0 1 pop_vlan,2,push_vlan(vid=10,pcp=1),1,5,7,8,100" \ "6 10 0 1,5,7,8,100,pop_vlan,2" \ "6 10 1 1,5,7,8,100,pop_vlan,2" \ "6 11 0 drop" \ "6 11 1 drop" \ "6 12 0 1,5,100,pop_vlan,3,4,7,8" \ "6 12 1 1,5,100,pop_vlan,4,7,push_vlan(vid=0,pcp=1),3,8" \ "7 none 0 3,4,8,push_vlan(vid=12,pcp=0),1,5,6,100" \ "7 0 0 pop_vlan,3,4,8,push_vlan(vid=12,pcp=0),1,5,6,100" \ "7 0 1 3,8,pop_vlan,4,push_vlan(vid=12,pcp=1),1,5,6,100" \ "7 10 0 1,5,6,8,100,pop_vlan,2" \ "7 10 1 1,5,6,8,100,pop_vlan,2" \ "7 11 0 5,100" \ "7 11 1 5,100" \ "7 12 0 1,5,6,100,pop_vlan,3,4,8" \ "7 12 1 1,5,6,100,pop_vlan,4,push_vlan(vid=0,pcp=1),3,8" \ "8 none 0 3,4,7,push_vlan(vid=12,pcp=0),1,5,6,100" \ "8 0 0 pop_vlan,3,4,7,push_vlan(vid=12,pcp=0),1,5,6,100" \ "8 0 1 3,pop_vlan,4,7,push_vlan(vid=12,pcp=1),1,5,6,100" \ "8 10 0 1,5,6,7,100,pop_vlan,2" \ "8 10 1 1,5,6,7,100,pop_vlan,2" \ "8 11 0 drop" \ "8 11 1 drop" \ "8 12 0 1,5,6,100,pop_vlan,3,4,7" \ "8 12 1 1,5,6,100,pop_vlan,4,7,push_vlan(vid=0,pcp=1),3" do set $tuple in_port=$1 vlan=$2 pcp=$3 expected=$4 if test $vlan = none; then flow="in_port($in_port),eth(src=50:54:00:00:00:01,dst=ff:ff:ff:ff:ff:ff),eth_type(0xabcd)" else flow="in_port($in_port),eth(src=50:54:00:00:00:01,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8100),vlan(vid=$vlan,pcp=$pcp),encap(eth_type(0xabcd))" fi echo "----------------------------------------------------------------------" echo "in_port=$in_port vlan=$vlan pcp=$pcp" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) actual=`tail -1 stdout | sed 's/Datapath actions: //'` AT_CHECK([ovs-dpctl normalize-actions "$flow" "$expected"], [0], [stdout]) mv stdout expout AT_CHECK([ovs-dpctl normalize-actions "$flow" "$actual"], [0], [expout]) done OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - fragment handling]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2], [3], [4], [5], [6], [90]) AT_DATA([flows.txt], [dnl priority=75 tcp ip_frag=no tp_dst=80 actions=output:1 priority=75 tcp ip_frag=first tp_dst=80 actions=output:2 priority=75 tcp ip_frag=later tp_dst=80 actions=output:3 priority=50 tcp ip_frag=no actions=output:4 priority=50 tcp ip_frag=first actions=output:5 priority=50 tcp ip_frag=later actions=output:6 ]) AT_CHECK([ovs-ofctl replace-flows br0 flows.txt]) base_flow="in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=128" no_flow="$base_flow,frag=no),tcp(src=12345,dst=80)" first_flow="$base_flow,frag=first),tcp(src=12345,dst=80)" later_flow="$base_flow,frag=later)" # mode no first later for tuple in \ 'normal 1 5 6' \ 'drop 1 drop drop' \ 'nx-match 1 2 6' do set $tuple mode=$1 no=$2 first=$3 later=$4 AT_CHECK([ovs-ofctl set-frags br0 $mode]) for type in no first later; do eval flow=\$${type}_flow exp_output=\$$type printf "\n%s\n" "----$mode $type-----" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) : > expout if test $mode = drop && test $type != no; then echo 'Packets dropped because they are IP fragments and the fragment handling mode is "drop".' >> expout fi echo "Datapath actions: $exp_output" >> expout AT_CHECK([grep 'IP fragments' stdout; tail -1 stdout], [0], [expout]) done done OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - exit]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2], [3], [10], [11], [12], [13], [14]) AT_DATA([flows.txt], [dnl in_port=1 actions=output:10,exit,output:11 in_port=2 actions=output:12,resubmit:1,output:12 in_port=3 actions=output:13,resubmit:2,output:14 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: 10 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: 12,10 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(3),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: 13,12,10 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - mirroring, select_all]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], 1, 2, 3) ovs-vsctl \ set Bridge br0 mirrors=@m --\ --id=@p3 get Port p3 --\ --id=@m create Mirror name=mymirror select_all=true output_port=@p3 AT_DATA([flows.txt], [dnl in_port=1 actions=output:2 in_port=2 actions=output:1 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) flow="in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: 2,3 ]) flow="in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: 1,3 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - mirroring, select_src]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], 1, 2, 3) ovs-vsctl \ set Bridge br0 mirrors=@m --\ --id=@p1 get Port p1 -- --id=@p3 get Port p3 --\ --id=@m create Mirror name=mymirror select_src_port=@p1 output_port=@p3 AT_DATA([flows.txt], [dnl in_port=1 actions=output:2 in_port=2 actions=output:1 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) flow="in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: 2,3 ]) flow="in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: 1 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - mirroring, OFPP_NONE ingress port]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], 1, 2) ovs-vsctl \ set Bridge br0 mirrors=@m --\ --id=@p2 get Port p2 --\ --id=@m create Mirror name=mymirror select_all=true output_port=@p2 AT_CHECK([ovs-ofctl add-flow br0 action=output:1]) # "in_port" defaults to OFPP_NONE if it's not specified. flow="icmp,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_ttl=128,icmp_type=8,icmp_code=0" AT_CHECK([ovs-appctl ofproto/trace br0 "$flow"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: 1,2 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - mirroring, select_dst]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], 1, 2, 3) ovs-vsctl \ set Bridge br0 mirrors=@m --\ --id=@p2 get Port p2 -- --id=@p3 get Port p3 --\ --id=@m create Mirror name=mymirror select_dst_port=@p2 output_port=@p3 AT_DATA([flows.txt], [dnl in_port=1 actions=output:2 in_port=2 actions=output:1 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) flow="in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: 2,3 ]) flow="in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: 1 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - mirroring, select_vlan]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], 1, 2, 3) ovs-vsctl \ set Bridge br0 mirrors=@m --\ --id=@p2 get Port p2 -- --id=@p3 get Port p3 --\ --id=@m create Mirror name=mymirror select_all=true select_vlan=11 output_port=@p3 AT_DATA([flows.txt], [dnl in_port=1, actions=output:2 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) flow="in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: 2 ]) flow="in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x8100),vlan(vid=10,pcp=0),encap(eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0))" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: 2 ]) flow="in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x8100),vlan(vid=11,pcp=0),encap(eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0))" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: 2,3 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - mirroring, output_port]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], 1, 2, 3) ovs-vsctl \ set Bridge br0 mirrors=@m --\ --id=@p3 get Port p3 --\ --id=@m create Mirror name=mymirror select_all=true output_port=@p3 AT_DATA([flows.txt], [dnl in_port=1 actions=mod_vlan_vid:17,output:2 in_port=2 actions=output:1 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) flow="in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: push_vlan(vid=17,pcp=0),2,pop_vlan,3 ]) flow="in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: 1,3 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - mirroring, output_vlan]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], 1, 2) ovs-vsctl \ set Bridge br0 mirrors=@m --\ --id=@m create Mirror name=mymirror select_all=true output_vlan=12 AT_DATA([flows.txt], [dnl in_port=1 actions=output:2 in_port=2 actions=mod_vlan_vid:17,output:1 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) flow="in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) actual=`tail -1 stdout | sed 's/Datapath actions: //'` expected="2,push_vlan(vid=12,pcp=0),1,2,100" AT_CHECK([ovs-dpctl normalize-actions "$flow" "$expected"], [0], [stdout]) mv stdout expout AT_CHECK([ovs-dpctl normalize-actions "$flow" "$actual"], [0], [expout]) flow="in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) actual=`tail -1 stdout | sed 's/Datapath actions: //'` expected="push_vlan(vid=17,pcp=0),1,pop_vlan,push_vlan(vid=12,pcp=0),1,2,100" AT_CHECK([ovs-dpctl normalize-actions "$flow" "$expected"], [0], [stdout]) mv stdout expout AT_CHECK([ovs-dpctl normalize-actions "$flow" "$actual"], [0], [expout]) OVS_VSWITCHD_STOP AT_CLEANUP # Two testcases below are for the ofproto/trace command # The first one tests all correct syntax: # ofproto/trace [dp_name] odp_flow [-generate|packet] # ofproto/trace br_name br_flow [-generate|packet] AT_SETUP([ofproto-dpif - ofproto/trace command 1]) OVS_VSWITCHD_START([set bridge br0 fail-mode=standalone]) ADD_OF_PORTS([br0], 1, 2, 3) AT_DATA([flows.txt], [dnl in_port=1 actions=output:2 in_port=2 actions=output:1 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) odp_flow="in_port(1)" br_flow="in_port=1" # Test command: ofproto/trace odp_flow AT_CHECK([ovs-appctl ofproto/trace "$odp_flow"], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl Datapath actions: 2 ]) # Test command: ofproto/trace dp_name odp_flow AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$odp_flow"], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl Datapath actions: 2 ]) # Test commmand: ofproto/trace br_name br_flow AT_CHECK([ovs-appctl ofproto/trace br0 "$br_flow"], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl Datapath actions: 2 ]) # Delete the inserted flows AT_CHECK([ovs-ofctl del-flows br0 "in_port=1"], [0], [stdout]) AT_CHECK([ovs-ofctl del-flows br0 "in_port=2"], [0], [stdout]) # This section beflow tests the [-generate] option odp_flow="in_port(3),eth(src=50:54:00:00:00:05,dst=ff:ff:ff:ff:ff:ff)" br_flow="arp,metadata=0,in_port=3,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=ff:ff:ff:ff:ff:ff" # Test command: ofproto/trace odp_flow AT_CHECK([ovs-appctl ofproto/trace "$odp_flow"], [0], [stdout]) # Check for no MAC learning entry AT_CHECK_UNQUOTED([ovs-appctl fdb/show br0 | sed 's/[[0-9]]\{1,\}$/?/'], [0], [dnl port VLAN MAC Age ]) # Test command: ofproto/trace br_name br_flow AT_CHECK([ovs-appctl ofproto/trace br0 "$br_flow"], [0], [stdout]) # Check for no MAC learning entry AT_CHECK_UNQUOTED([ovs-appctl fdb/show br0 | sed 's/[[0-9]]\{1,\}$/?/'], [0], [dnl port VLAN MAC Age ]) # Test command: ofproto/trace odp_flow -generate AT_CHECK([ovs-appctl ofproto/trace "$odp_flow" -generate], [0], [stdout]) # Check for the MAC learning entry AT_CHECK_UNQUOTED([ovs-appctl fdb/show br0 | sed 's/[[0-9]]\{1,\}$/?/'], [0], [dnl port VLAN MAC Age 3 0 50:54:00:00:00:05 ? ]) # Test command: ofproto/trace dp_name odp_flow -generate AT_CHECK([ovs-appctl ofproto/trace ovs-dummy \ "in_port(1),eth(src=50:54:00:00:00:06,dst=50:54:00:00:00:05)" \ -generate], [0], [stdout]) # Check for both MAC learning entries AT_CHECK_UNQUOTED([ovs-appctl fdb/show br0 | sed 's/[[0-9]]\{1,\}$/?/'], [0], [dnl port VLAN MAC Age 3 0 50:54:00:00:00:05 ? 1 0 50:54:00:00:00:06 ? ]) # Test command: ofproto/trace br_name br_flow -generate AT_CHECK([ovs-appctl ofproto/trace br0 \ "in_port=2,dl_src=50:54:00:00:00:07,dl_dst=50:54:00:00:00:06" \ -generate], [0], [stdout]) # Check for both MAC learning entries. AT_CHECK_UNQUOTED([ovs-appctl fdb/show br0 | sed 's/[[0-9]]\{1,\}$/?/'], [0], [dnl port VLAN MAC Age 3 0 50:54:00:00:00:05 ? 1 0 50:54:00:00:00:06 ? 2 0 50:54:00:00:00:07 ? ]) # This section beflow tests the [packet] option # The ovs-tcpundump of packets between port1 and port2 pkt1to2="50540000000250540000000108064500001C000100004001F98CC0A80001C0A800020800F7FF00000000" pkt2to1="50540000000150540000000208064500001C000100004001F98CC0A80002C0A800010800F7FF00000000" # Construct the MAC learning table AT_CHECK([ovs-appctl ofproto/trace ovs-dummy \ "in_port(1),eth(src=50:54:00:00:00:01,dst=ff:ff:ff:ff:ff:ff)" \ -generate], [0], [stdout]) # Construct the MAC learning table AT_CHECK([ovs-appctl ofproto/trace ovs-dummy \ "in_port(2),eth(src=50:54:00:00:00:02,dst=ff:ff:ff:ff:ff:ff)" \ -generate], [0], [stdout]) # Test command: ofproto/trace odp_flow packet AT_CHECK([ovs-appctl ofproto/trace \ "in_port(1),skb_priority(1),skb_mark(2)" "$pkt1to2"], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl Datapath actions: 2 ]) AT_CHECK([head -n 3 stdout], [0], [dnl Bridge: br0 Packet: arp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=50:54:00:00:00:01,dl_dst=50:54:00:00:00:02,arp_spa=0.0.0.0,arp_tpa=0.0.0.0,arp_sha=00:00:00:00:00:00,arp_tha=00:00:00:00:00:00 Flow: pkt_mark=0x2,skb_priority=0x1,arp,metadata=0,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:01,dl_dst=50:54:00:00:00:02,arp_spa=0.0.0.0,arp_tpa=0.0.0.0,arp_sha=00:00:00:00:00:00,arp_tha=00:00:00:00:00:00 ]) # Test command: ofproto/trace dp_name odp_flow packet AT_CHECK([ovs-appctl ofproto/trace ovs-dummy \ "in_port(1),skb_priority(1),skb_mark(2)" "$pkt1to2"], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl Datapath actions: 2 ]) AT_CHECK([head -n 3 stdout], [0], [dnl Bridge: br0 Packet: arp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=50:54:00:00:00:01,dl_dst=50:54:00:00:00:02,arp_spa=0.0.0.0,arp_tpa=0.0.0.0,arp_sha=00:00:00:00:00:00,arp_tha=00:00:00:00:00:00 Flow: pkt_mark=0x2,skb_priority=0x1,arp,metadata=0,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:01,dl_dst=50:54:00:00:00:02,arp_spa=0.0.0.0,arp_tpa=0.0.0.0,arp_sha=00:00:00:00:00:00,arp_tha=00:00:00:00:00:00 ]) # Test command: ofproto/trace br_name br_flow packet AT_CHECK([ovs-appctl ofproto/trace br0 \ "in_port=2,skb_priority=2,pkt_mark=1" "$pkt2to1"], [0], [stdout],[stderr]) AT_CHECK([tail -1 stdout], [0], [dnl Datapath actions: 1 ]) AT_CHECK([head -n 2 stdout], [0], [dnl Packet: arp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=50:54:00:00:00:02,dl_dst=50:54:00:00:00:01,arp_spa=0.0.0.0,arp_tpa=0.0.0.0,arp_sha=00:00:00:00:00:00,arp_tha=00:00:00:00:00:00 Flow: pkt_mark=0x1,skb_priority=0x2,arp,metadata=0,in_port=2,vlan_tci=0x0000,dl_src=50:54:00:00:00:02,dl_dst=50:54:00:00:00:01,arp_spa=0.0.0.0,arp_tpa=0.0.0.0,arp_sha=00:00:00:00:00:00,arp_tha=00:00:00:00:00:00 ]) OVS_VSWITCHD_STOP AT_CLEANUP # The second test tests the corner cases AT_SETUP([ofproto-dpif - ofproto/trace command 2]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], 1, 2) # Define flows odp_flow="in_port(1),eth(src=50:54:00:00:00:01,dst=50:54:00:00:00:02)" br_flow="in_port=1,dl_src=50:54:00:00:00:01,dl_dst=50:54:00:00:00:02" # Define options generate="-generate" pkt="50540000000250540000000108064500001C000100004001F98CC0A80001C0A800020800F7FF00000000" # Test incorrect command: ofproto/trace wrong_name odp_flow [-generate|packet] m4_foreach( [option], [[], ["$generate"], ["$pkt"]], [AT_CHECK([ovs-appctl ofproto/trace wrong_name "$odp_flow" option], [2], [], [stderr]) AT_CHECK([tail -2 stderr], [0], [dnl Cannot find datapath of this name ovs-appctl: ovs-vswitchd: server returned an error ])]) # Test incorrect command: ofproto/trace empty_string odp_flow [-generate|packet] m4_foreach( [option], [[], ["$generate"], ["$pkt"]], [AT_CHECK([ovs-appctl ofproto/trace "" "$odp_flow" option], [2], [], [stderr]) AT_CHECK([tail -2 stderr], [0], [dnl Cannot find datapath of this name ovs-appctl: ovs-vswitchd: server returned an error ])]) # Test incorrect command: ofproto/trace nonexist_dp_name odp_flow [-generate|packet] m4_foreach( [option], [[], ["$generate"], ["$pkt"]], [AT_CHECK([ovs-appctl ofproto/trace ovs-system "$odp_flow" option], [2], [], [stderr]) AT_CHECK([tail -2 stderr], [0], [dnl Cannot find datapath of this name ovs-appctl: ovs-vswitchd: server returned an error ])]) # Test incorrect command: ofproto/trace br_name odp_flow [-generate|packet] m4_foreach( [option], [[], ["$generate"], ["$pkt"]], [AT_CHECK([ovs-appctl ofproto/trace br0 "$odp_flow" option], [2], [], [stderr]) AT_CHECK([tail -2 stderr], [0], [dnl Cannot find datapath of this name ovs-appctl: ovs-vswitchd: server returned an error ])]) # Test incorrect command: ofproto/trace dp_name br_flow [-generate|packet] m4_foreach( [option], [[], ["$generate"], ["$pkt"]], [AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$br_flow" option], [2], [], [stderr]) AT_CHECK([tail -2 stderr], [0], [dnl Unknown bridge name ovs-appctl: ovs-vswitchd: server returned an error ])]) # Test incorrect command: ofproto/trace br_flow [-generate|packet] m4_foreach( [option], [[], ["$generate"], ["$pkt"]], [AT_CHECK([ovs-appctl ofproto/trace "$br_flow" option], [2], [], [stderr]) AT_CHECK([tail -2 stderr], [0], [dnl Must specify bridge name ovs-appctl: ovs-vswitchd: server returned an error ])]) # Test incorrect command: ofproto/trace dp_name odp_flow garbage_option AT_CHECK([ovs-appctl ofproto/trace \ ovs-dummy "$odp_flow" garbage_option], [2], [stdout],[stderr]) AT_CHECK([tail -2 stderr], [0], [dnl Trailing garbage in packet data ovs-appctl: ovs-vswitchd: server returned an error ]) # Test incorrect command: ofproto/trace with 4 arguments AT_CHECK([ovs-appctl ofproto/trace \ arg1, arg2, arg3, arg4], [2], [stdout],[stderr]) AT_CHECK([tail -2 stderr], [0], [dnl "ofproto/trace" command takes at most 3 arguments ovs-appctl: ovs-vswitchd: server returned an error ]) # Test incorrect command: ofproto/trace with 0 argument AT_CHECK([ovs-appctl ofproto/trace ], [2], [stdout],[stderr]) AT_CHECK([tail -2 stderr], [0], [dnl "ofproto/trace" command requires at least 1 arguments ovs-appctl: ovs-vswitchd: server returned an error ]) OVS_VSWITCHD_STOP AT_CLEANUP m4_define([OFPROTO_TRACE], [flow="$2" AT_CHECK([ovs-appctl ofproto/trace $1 "$flow" $3], [0], [stdout]) actual=`tail -1 stdout | sed 's/Datapath actions: //'` expected="$4" AT_CHECK([ovs-dpctl normalize-actions "$flow" "$expected" $5], [0], [stdout]) mv stdout expout AT_CHECK([ovs-dpctl normalize-actions "$flow" "$actual" $5], [0], [expout])]) AT_SETUP([ofproto-dpif - MAC learning]) OVS_VSWITCHD_START([set bridge br0 fail-mode=standalone]) ADD_OF_PORTS([br0], 1, 2, 3) arp='eth_type(0x0806),arp(sip=192.168.0.1,tip=192.168.0.2,op=1,sha=50:54:00:00:00:05,tha=00:00:00:00:00:00)' # Trace an ARP packet arriving on p3, to create a MAC learning entry. OFPROTO_TRACE( [ovs-dummy], [in_port(3),eth(src=50:54:00:00:00:05,dst=ff:ff:ff:ff:ff:ff),$arp], [-generate], [1,2,100]) # Check for the MAC learning entry. AT_CHECK_UNQUOTED([ovs-appctl fdb/show br0 | sed 's/[[0-9]]\{1,\}$/?/'], [0], [dnl port VLAN MAC Age 3 0 50:54:00:00:00:05 ? ]) # Trace a packet arrival destined for the learned MAC. # (This will also learn a MAC.) OFPROTO_TRACE( [ovs-dummy], [in_port(1),eth(src=50:54:00:00:00:06,dst=50:54:00:00:00:05),$arp], [-generate], [3]) # Check for both MAC learning entries. AT_CHECK_UNQUOTED([ovs-appctl fdb/show br0 | sed 's/[[0-9]]\{1,\}$/?/'], [0], [dnl port VLAN MAC Age 3 0 50:54:00:00:00:05 ? 1 0 50:54:00:00:00:06 ? ]) # Trace a packet arrival that updates the first learned MAC entry. OFPROTO_TRACE( [ovs-dummy], [in_port(2),eth(src=50:54:00:00:00:05,dst=ff:ff:ff:ff:ff:ff),$arp], [-generate], [1,3,100]) # Check that the MAC learning entry was updated. AT_CHECK_UNQUOTED([ovs-appctl fdb/show br0 | sed 's/[[0-9]]\{1,\}$/?/'], [0], [dnl port VLAN MAC Age 1 0 50:54:00:00:00:06 ? 2 0 50:54:00:00:00:05 ? ]) # Add another bridge. AT_CHECK( [ovs-vsctl \ -- add-br br1 \ -- set bridge br1 datapath-type=dummy]) ADD_OF_PORTS([br1], 4, 5) # Trace some packet arrivals in br1 to create MAC learning entries there too. OFPROTO_TRACE( [ovs-dummy], [in_port(4),eth(src=50:54:00:00:00:06,dst=ff:ff:ff:ff:ff:ff),$arp], [-generate], [5,101]) OFPROTO_TRACE( [ovs-dummy], [in_port(5),eth(src=50:54:00:00:00:07,dst=ff:ff:ff:ff:ff:ff),$arp], [-generate], [4,101]) # Check that the MAC learning entries were added. AT_CHECK_UNQUOTED([ovs-appctl fdb/show br1 | sed 's/[[0-9]]\{1,\}$/?/'], [0], [dnl port VLAN MAC Age 4 0 50:54:00:00:00:06 ? 5 0 50:54:00:00:00:07 ? ]) # Delete port p1 and see that its MAC learning entry disappeared, and # that the MAC learning entry for the same MAC was also deleted from br1. AT_CHECK([ovs-vsctl del-port p1]) AT_CHECK_UNQUOTED([ovs-appctl fdb/show br0 | sed 's/[[0-9]]\{1,\}$/?/'], [0], [dnl port VLAN MAC Age 2 0 50:54:00:00:00:05 ? ]) AT_CHECK_UNQUOTED([ovs-appctl fdb/show br1 | sed 's/[[0-9]]\{1,\}$/?/'], [0], [dnl port VLAN MAC Age 5 0 50:54:00:00:00:07 ? ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - MAC table overflow]) OVS_VSWITCHD_START( [set bridge br0 fail-mode=standalone other-config:mac-table-size=10]) ADD_OF_PORTS([br0], 1, 2, 3) arp='eth_type(0x0806),arp(sip=192.168.0.1,tip=192.168.0.2,op=1,sha=50:54:00:00:00:05,tha=00:00:00:00:00:00)' AT_CHECK([ovs-appctl time/stop]) # Trace 10 ARP packets arriving on p3, to create MAC learning entries. for i in 0 1 2 3 4 5 6 7 8 9; do OFPROTO_TRACE( [ovs-dummy], [in_port(3),eth(src=50:54:00:00:00:0$i,dst=ff:ff:ff:ff:ff:ff),$arp], [-generate], [1,2,100]) ovs-appctl time/warp 1000 done # Check for the MAC learning entries. AT_CHECK_UNQUOTED([ovs-appctl fdb/show br0 | sed 's/ *[[0-9]]\{1,\}$//' | sort], [0], [dnl 3 0 50:54:00:00:00:00 3 0 50:54:00:00:00:01 3 0 50:54:00:00:00:02 3 0 50:54:00:00:00:03 3 0 50:54:00:00:00:04 3 0 50:54:00:00:00:05 3 0 50:54:00:00:00:06 3 0 50:54:00:00:00:07 3 0 50:54:00:00:00:08 3 0 50:54:00:00:00:09 port VLAN MAC Age ]) # Trace another ARP packet on another MAC. OFPROTO_TRACE( [ovs-dummy], [in_port(3),eth(src=50:54:00:00:00:10,dst=ff:ff:ff:ff:ff:ff),$arp], [-generate], [1,2,100]) # Check that the new one chased the oldest one out of the table. AT_CHECK_UNQUOTED([ovs-appctl fdb/show br0 | sed 's/[[0-9]]\{1,\}$/?/' | sort], [0], [dnl 3 0 50:54:00:00:00:01 ? 3 0 50:54:00:00:00:02 ? 3 0 50:54:00:00:00:03 ? 3 0 50:54:00:00:00:04 ? 3 0 50:54:00:00:00:05 ? 3 0 50:54:00:00:00:06 ? 3 0 50:54:00:00:00:07 ? 3 0 50:54:00:00:00:08 ? 3 0 50:54:00:00:00:09 ? 3 0 50:54:00:00:00:10 ? port VLAN MAC Age ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl Test that sFlow samples packets correctly. AT_SETUP([ofproto-dpif - sFlow packet sampling]) OVS_VSWITCHD_START([set Bridge br0 fail-mode=standalone]) ON_EXIT([kill `cat test-sflow.pid`]) AT_CHECK([test-sflow --log-file --detach --no-chdir --pidfile 0:127.0.0.1 > sflow.log], [0], [], [ignore]) AT_CAPTURE_FILE([sflow.log]) SFLOW_PORT=`parse_listening_port < test-sflow.log` ovs-appctl time/stop ADD_OF_PORTS([br0], 1, 2) ovs-vsctl \ set Interface br0 options:ifindex=1002 -- \ set Interface p1 options:ifindex=1004 -- \ set Interface p2 options:ifindex=1003 -- \ set Bridge br0 sflow=@sf -- \ --id=@sf create sflow targets=\"127.0.0.1:$SFLOW_PORT\" \ header=128 sampling=1 polling=1 dnl open with ARP packets to seed the bridge-learning. The output dnl ifIndex numbers should be reported predictably after that. dnl Since we set sampling=1 we should see all of these packets dnl reported. Sorting the output by data-source and seqNo makes dnl it deterministic. Ensuring that we send at least two packets dnl into each port means we get to check the seq nos are dnl incrementing correctly. ovs-appctl netdev-dummy/receive p1 'in_port(2),eth(src=50:54:00:00:00:05,dst=FF:FF:FF:FF:FF:FF),eth_type(0x0806),arp(sip=192.168.0.2,tip=192.168.0.1,op=1,sha=50:54:00:00:00:05,tha=00:00:00:00:00:00)' ovs-appctl netdev-dummy/receive p2 'in_port(1),eth(src=50:54:00:00:00:07,dst=FF:FF:FF:FF:FF:FF),eth_type(0x0806),arp(sip=192.168.0.1,tip=192.168.0.2,op=1,sha=50:54:00:00:00:07,tha=00:00:00:00:00:00)' ovs-appctl netdev-dummy/receive p1 'in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' ovs-appctl netdev-dummy/receive p2 'in_port(1),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:05),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=0,code=0)' ovs-appctl netdev-dummy/receive p2 'in_port(1),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:05),eth_type(0x86dd),ipv6(src=fe80::1,dst=fe80::2,label=0,proto=10,tclass=0x70,hlimit=128,frag=no)' dnl sleep long enough to get more than one counter sample dnl from each datasource so we can check sequence numbers for i in `seq 1 30`; do ovs-appctl time/warp 100 done OVS_VSWITCHD_STOP ovs-appctl -t test-sflow exit AT_CHECK([[sort sflow.log | $EGREP 'HEADER|ERROR' | sed 's/ /\ /g']], [0], [dnl HEADER dgramSeqNo=1 ds=127.0.0.1>2:1000 fsSeqNo=1 in_vlan=0 in_priority=0 out_vlan=0 out_priority=0 meanSkip=1 samplePool=1 dropEvents=0 in_ifindex=1004 in_format=0 out_ifindex=2 out_format=2 hdr_prot=1 pkt_len=64 stripped=4 hdr_len=60 hdr=FF-FF-FF-FF-FF-FF-50-54-00-00-00-05-08-06-00-01-08-00-06-04-00-01-50-54-00-00-00-05-C0-A8-00-02-00-00-00-00-00-00-C0-A8-00-01-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00 HEADER dgramSeqNo=1 ds=127.0.0.1>2:1000 fsSeqNo=2 in_vlan=0 in_priority=0 out_vlan=0 out_priority=0 meanSkip=1 samplePool=2 dropEvents=0 in_ifindex=1003 in_format=0 out_ifindex=2 out_format=2 hdr_prot=1 pkt_len=64 stripped=4 hdr_len=60 hdr=FF-FF-FF-FF-FF-FF-50-54-00-00-00-07-08-06-00-01-08-00-06-04-00-01-50-54-00-00-00-07-C0-A8-00-01-00-00-00-00-00-00-C0-A8-00-02-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00 HEADER dgramSeqNo=1 ds=127.0.0.1>2:1000 fsSeqNo=3 in_vlan=0 in_priority=0 out_vlan=0 out_priority=0 meanSkip=1 samplePool=3 dropEvents=0 in_ifindex=1004 in_format=0 out_ifindex=1003 out_format=0 hdr_prot=1 pkt_len=64 stripped=4 hdr_len=60 hdr=50-54-00-00-00-07-50-54-00-00-00-05-08-00-45-00-00-1C-00-00-00-00-40-01-F9-8D-C0-A8-00-01-C0-A8-00-02-08-00-F7-FF-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00 HEADER dgramSeqNo=1 ds=127.0.0.1>2:1000 fsSeqNo=4 in_vlan=0 in_priority=0 out_vlan=0 out_priority=0 meanSkip=1 samplePool=4 dropEvents=0 in_ifindex=1003 in_format=0 out_ifindex=1004 out_format=0 hdr_prot=1 pkt_len=64 stripped=4 hdr_len=60 hdr=50-54-00-00-00-05-50-54-00-00-00-07-08-00-45-00-00-1C-00-00-00-00-40-01-F9-8D-C0-A8-00-02-C0-A8-00-01-00-00-FF-FF-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00 HEADER dgramSeqNo=1 ds=127.0.0.1>2:1000 fsSeqNo=5 in_vlan=0 in_priority=0 out_vlan=0 out_priority=0 meanSkip=1 samplePool=5 dropEvents=0 in_ifindex=1003 in_format=0 out_ifindex=1004 out_format=0 hdr_prot=1 pkt_len=64 stripped=4 hdr_len=60 hdr=50-54-00-00-00-05-50-54-00-00-00-07-86-DD-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00 ]) AT_CHECK([[sort sflow.log | $EGREP 'IFCOUNTERS|ERROR' | head -6 | sed 's/ /\ /g']], [0], [dnl IFCOUNTERS dgramSeqNo=2 ds=127.0.0.1>0:1002 csSeqNo=1 ifindex=1002 type=6 ifspeed=100000000 direction=0 status=3 in_octets=0 in_unicasts=0 in_multicasts=0 in_broadcasts=4294967295 in_discards=0 in_errors=0 in_unknownprotos=4294967295 out_octets=120 out_unicasts=2 out_multicasts=4294967295 out_broadcasts=4294967295 out_discards=0 out_errors=0 promiscuous=0 IFCOUNTERS dgramSeqNo=2 ds=127.0.0.1>0:1003 csSeqNo=1 ifindex=1003 type=6 ifspeed=100000000 direction=0 status=0 in_octets=98 in_unicasts=3 in_multicasts=0 in_broadcasts=4294967295 in_discards=0 in_errors=0 in_unknownprotos=4294967295 out_octets=120 out_unicasts=2 out_multicasts=4294967295 out_broadcasts=4294967295 out_discards=0 out_errors=0 promiscuous=0 IFCOUNTERS dgramSeqNo=2 ds=127.0.0.1>0:1004 csSeqNo=1 ifindex=1004 type=6 ifspeed=100000000 direction=0 status=0 in_octets=84 in_unicasts=2 in_multicasts=0 in_broadcasts=4294967295 in_discards=0 in_errors=0 in_unknownprotos=4294967295 out_octets=180 out_unicasts=3 out_multicasts=4294967295 out_broadcasts=4294967295 out_discards=0 out_errors=0 promiscuous=0 IFCOUNTERS dgramSeqNo=3 ds=127.0.0.1>0:1002 csSeqNo=2 ifindex=1002 type=6 ifspeed=100000000 direction=0 status=3 in_octets=0 in_unicasts=0 in_multicasts=0 in_broadcasts=4294967295 in_discards=0 in_errors=0 in_unknownprotos=4294967295 out_octets=120 out_unicasts=2 out_multicasts=4294967295 out_broadcasts=4294967295 out_discards=0 out_errors=0 promiscuous=0 IFCOUNTERS dgramSeqNo=3 ds=127.0.0.1>0:1003 csSeqNo=2 ifindex=1003 type=6 ifspeed=100000000 direction=0 status=0 in_octets=98 in_unicasts=3 in_multicasts=0 in_broadcasts=4294967295 in_discards=0 in_errors=0 in_unknownprotos=4294967295 out_octets=120 out_unicasts=2 out_multicasts=4294967295 out_broadcasts=4294967295 out_discards=0 out_errors=0 promiscuous=0 IFCOUNTERS dgramSeqNo=3 ds=127.0.0.1>0:1004 csSeqNo=2 ifindex=1004 type=6 ifspeed=100000000 direction=0 status=0 in_octets=84 in_unicasts=2 in_multicasts=0 in_broadcasts=4294967295 in_discards=0 in_errors=0 in_unknownprotos=4294967295 out_octets=180 out_unicasts=3 out_multicasts=4294967295 out_broadcasts=4294967295 out_discards=0 out_errors=0 promiscuous=0 ]) AT_CLEANUP dnl Test that basic NetFlow reports flow statistics correctly: dnl - The initial packet of a flow are correctly accounted. dnl - Later packets within a flow are correctly accounted. dnl - Flow actions changing (in this case, due to MAC learning) dnl cause a record to be sent. AT_SETUP([ofproto-dpif - NetFlow flow expiration]) OVS_VSWITCHD_START([set Bridge br0 fail-mode=standalone]) ADD_OF_PORTS([br0], 1, 2) ON_EXIT([kill `cat test-netflow.pid`]) AT_CHECK([test-netflow --log-file --detach --no-chdir --pidfile 0:127.0.0.1 > netflow.log], [0], [], [ignore]) AT_CAPTURE_FILE([netflow.log]) NETFLOW_PORT=`parse_listening_port < test-netflow.log` ovs-vsctl \ set Bridge br0 netflow=@nf -- \ --id=@nf create NetFlow targets=\"127.0.0.1:$NETFLOW_PORT\" \ engine_id=1 engine_type=2 active_timeout=30 add-id-to-interface=false for delay in 1000 30000; do ovs-appctl netdev-dummy/receive p1 'in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' ovs-appctl netdev-dummy/receive p2 'in_port(1),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:05),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=0,code=0)' ovs-appctl time/warp $delay done sleep 1 OVS_VSWITCHD_STOP ovs-appctl -t test-netflow exit AT_CHECK([[sed -e 's/, uptime [0-9]*// s/, now [0-9.]*// s/time \([0-9]*\)\.\.\.\1$/time / s/time [0-9]*\.\.\.[0-9]*/time / ' netflow.log | sort]], [0], [ header: v5, seq 0, engine 2,1 header: v5, seq 1, engine 2,1 seq 0: 192.168.0.1 > 192.168.0.2, if 1 > 65535, 1 pkts, 60 bytes, ICMP 8:0, time seq 1: 192.168.0.1 > 192.168.0.2, if 1 > 2, 1 pkts, 60 bytes, ICMP 8:0, time seq 1: 192.168.0.2 > 192.168.0.1, if 2 > 1, 2 pkts, 120 bytes, ICMP 0:0, time ]) AT_CLEANUP dnl Test that basic NetFlow reports active expirations correctly. AT_SETUP([ofproto-dpif - NetFlow active expiration]) OVS_VSWITCHD_START([set Bridge br0 fail-mode=standalone]) ADD_OF_PORTS([br0], 1, 2) ON_EXIT([kill `cat test-netflow.pid`]) AT_CHECK([test-netflow --log-file --detach --no-chdir --pidfile 0:127.0.0.1 > netflow.log], [0], [], [ignore]) AT_CAPTURE_FILE([netflow.log]) NETFLOW_PORT=`parse_listening_port < test-netflow.log` ovs-vsctl \ set Bridge br0 netflow=@nf -- \ --id=@nf create NetFlow targets=\"127.0.0.1:$NETFLOW_PORT\" \ engine_id=1 engine_type=2 active_timeout=10 add-id-to-interface=false AT_CHECK([ovs-appctl time/stop]) n=1 while test $n -le 60; do n=`expr $n + 1` ovs-appctl netdev-dummy/receive p1 'in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=1234,dst=80)' ovs-appctl netdev-dummy/receive p2 'in_port(1),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:05),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=6,tos=0,ttl=64,frag=no),tcp(src=80,dst=1234)' ovs-appctl time/warp 1000 done ovs-appctl time/warp 10000 sleep 1 OVS_VSWITCHD_STOP ovs-appctl -t test-netflow exit # Count the number of reported packets: # - From source to destination before MAC learning kicks in (just one). # - From source to destination after that. # - From destination to source. n_learn=0 n_in=0 n_out=0 n_other=0 n_recs=0 none=0 while read line; do pkts=`echo "$line" | sed 's/.*, \([[0-9]]*\) pkts,.*/\1/'` case $pkts in [[0-9]]*) ;; *) continue ;; esac case $line in "seq "*": 192.168.0.1 > 192.168.0.2, if 1 > 65535, "*" pkts, "*" bytes, TCP 1234 > 80, time "*) counter=n_learn ;; "seq "*": 192.168.0.1 > 192.168.0.2, if 1 > 2, "*" pkts, "*" bytes, TCP 1234 > 80, time "*) counter=n_in ;; "seq "*": 192.168.0.2 > 192.168.0.1, if 2 > 1, "*" pkts, "*" bytes, TCP 80 > 1234, time "*) counter=n_out ;; *) counter=n_other ;; esac eval $counter=\`expr \$$counter + \$pkts\` n_recs=`expr $n_recs + 1` done < netflow.log # There should be exactly 1 MAC learning packet, # exactly 59 other packets in that direction, # and exactly 60 packets in the other direction. AT_CHECK([echo $n_learn $n_in $n_out $n_other], [0], [1 59 60 0 ]) # There should be 1 expiration for MAC learning, # at least 5 active and a final expiration in one direction, # and at least 5 active and a final expiration in the other direction. echo $n_recs AT_CHECK([test $n_recs -ge 13]) AT_CLEANUP AT_SETUP([idle_age and hard_age increase over time]) OVS_VSWITCHD_START # get_ages DURATION HARD IDLE # # Fetch the flow duration, hard age, and idle age into the variables # whose names are given as arguments. Rounds DURATION down to the # nearest integer. If hard_age doesn't appear in the output, sets # HARD to "none". If idle_age doesn't appear in the output, sets IDLE # to 0. get_ages () { AT_CHECK([ovs-ofctl dump-flows br0], [0], [stdout]) duration=`sed -n 's/.*duration=\([[0-9]]*\)\(\.[[0-9]]*\)\{0,1\}s.*/\1/p' stdout` AT_CHECK([[expr X"$duration" : 'X[0-9][0-9]*$']], [0], [ignore]) AS_VAR_COPY([$1], [duration]) hard=`sed -n 's/.*hard_age=\([[0-9]]*\),.*/\1/p' stdout` if test X"$hard" = X; then hard=none else AT_CHECK([[expr X"$hard" : 'X[0-9][0-9]*$']], [0], [ignore]) fi AS_VAR_COPY([$2], [hard]) idle=`sed -n 's/.*idle_age=\([[0-9]]*\),.*/\1/p' stdout` if test X"$idle" = X; then idle=0 else AT_CHECK([[expr X"$idle" : 'X[0-9][0-9]*$']], [0], [ignore]) fi AS_VAR_COPY([$3], [idle]) } # Add a flow and get its initial hard and idle age. AT_CHECK([ovs-ofctl add-flow br0 hard_timeout=199,idle_timeout=188,actions=drop]) get_ages duration1 hard1 idle1 # Warp time forward by 10 seconds, then modify the flow's actions. ovs-appctl time/warp 10000 get_ages duration2 hard2 idle2 AT_CHECK([ovs-ofctl mod-flows br0 actions=flood]) # Warp time forward by 10 seconds. ovs-appctl time/warp 10000 get_ages duration3 hard3 idle3 # Warp time forward 10 more seconds, then pass some packets through the flow, # then warp forward a few more times because idle times are only updated # occasionally. ovs-appctl time/warp 10000 ovs-appctl netdev-dummy/receive br0 'in_port(0),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:05),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=6,tos=0,ttl=64,frag=no),tcp(src=80,dst=1234)' ovs-appctl time/warp 1000 ovs-appctl time/warp 1000 ovs-appctl time/warp 1000 get_ages duration4 hard4 idle4 printf "duration: %4s => %4s => %4s => %4s\n" $duration1 $duration2 $duration3 $duration4 printf "hard_age: %4s => %4s => %4s => %4s\n" $hard1 $hard2 $hard3 $hard4 printf "idle_age: %4s => %4s => %4s => %4s\n" $idle1 $idle2 $idle3 $idle4 # Duration should increase steadily over time. AT_CHECK([test $duration1 -lt $duration2]) AT_CHECK([test $duration2 -lt $duration3]) AT_CHECK([test $duration3 -lt $duration4]) # Hard age should be "none" initially because it's the same as flow_duration, # then it should increase. AT_CHECK([test $hard1 = none]) AT_CHECK([test $hard2 = none]) AT_CHECK([test $hard3 != none]) AT_CHECK([test $hard4 != none]) AT_CHECK([test $hard3 -lt $hard4]) # Idle age should increase from 1 to 2 to 3, then decrease. AT_CHECK([test $idle1 -lt $idle2]) AT_CHECK([test $idle2 -lt $idle3]) AT_CHECK([test $idle3 -gt $idle4]) # Check some invariant relationships. AT_CHECK([test $duration1 = $idle1]) AT_CHECK([test $duration2 = $idle2]) AT_CHECK([test $duration3 = $idle3]) AT_CHECK([test $idle3 -gt $hard3]) AT_CHECK([test $idle4 -lt $hard4]) AT_CHECK([test $hard4 -lt $duration4]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - fin_timeout]) OVS_VSWITCHD_START AT_DATA([flows.txt], [dnl in_port=1 actions=output:2 in_port=2 actions=mod_vlan_vid:17,output:1 ]) AT_CHECK([ovs-ofctl add-flow br0 'idle_timeout=60,actions=fin_timeout(idle_timeout=5)']) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip], [0], [NXST_FLOW reply: idle_timeout=60, actions=fin_timeout(idle_timeout=5) ]) # Check that a TCP SYN packet does not change the timeout. (Because # flow stats updates are mainly what implements the fin_timeout # feature, we warp forward a couple of times to ensure that flow stats # run before re-checking the flow table.) AT_CHECK([ovs-appctl netdev-dummy/receive br0 0021853763af0026b98cb0f908004500003c2e2440004006465dac11370dac11370b828b0016751e267b00000000a00216d017360000020405b40402080a2d25085f0000000001030307]) AT_CHECK([ovs-appctl time/warp 1000 && ovs-appctl time/warp 1000], [0], [warped warped ]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip], [0], [NXST_FLOW reply: n_packets=1, n_bytes=74, idle_timeout=60, actions=fin_timeout(idle_timeout=5) ]) # Check that a TCP FIN packet does change the timeout. AT_CHECK([ovs-appctl netdev-dummy/receive br0 0021853763af0026b98cb0f90800451000342e3e40004006463bac11370dac11370b828b0016751e319dfc96399b801100717ae800000101080a2d250a9408579588]) AT_CHECK([ovs-appctl time/warp 1000 && ovs-appctl time/warp 1000], [0], [warped warped ]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip], [0], [NXST_FLOW reply: n_packets=2, n_bytes=140, idle_timeout=5, actions=fin_timeout(idle_timeout=5) ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - ovs-appctl dpif/dump-dps]) OVS_VSWITCHD_START([add-br br1 -- set bridge br1 datapath-type=dummy]) ADD_OF_PORTS([br0], [1], [2]) ADD_OF_PORTS([br1], [3]) AT_CHECK([ovs-appctl dpif/dump-dps], [0], [dnl dummy@br0 dummy@br1 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - ovs-appctl dpif/show]) OVS_VSWITCHD_START([add-br br1 -- set bridge br1 datapath-type=dummy]) ADD_OF_PORTS([br0], [1], [2]) ADD_OF_PORTS([br1], [3]) AT_CHECK([ovs-appctl dpif/show], [0], [dnl dummy@ovs-dummy: hit:0 missed:0 flows: cur: 0, avg: 0, max: 0, life span: 0ms overall avg: add rate: 0.000/min, del rate: 0.000/min br0: hit:0 missed:0 br0 65534/100: (dummy) p1 1/1: (dummy) p2 2/2: (dummy) br1: hit:0 missed:0 br1 65534/101: (dummy) p3 3/3: (dummy) ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - ovs-appctl dpif/dump-flows]) OVS_VSWITCHD_START([add-br br1 -- \ set bridge br1 datapath-type=dummy fail-mode=secure]) ADD_OF_PORTS([br0], [1], [2]) ADD_OF_PORTS([br1], [3]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:05),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=0,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p3 'in_port(3),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-flows br0 | sort | STRIP_USED], [0], [dnl skb_priority(0),in_port(1),eth_type(0x0800),ipv4(src=192.168.0.1/0.0.0.0,dst=192.168.0.2/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller)) skb_priority(0),in_port(2),eth_type(0x0800),ipv4(src=192.168.0.2/0.0.0.0,dst=192.168.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller)) ]) AT_CHECK([ovs-appctl dpif/dump-flows br1 | sort | STRIP_USED], [0], [dnl skb_priority(0),in_port(3),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller)) ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - ovs-appctl dpif/del-flows]) OVS_VSWITCHD_START([add-br br1 -- \ set bridge br1 datapath-type=dummy fail-mode=secure]) ADD_OF_PORTS([br0], [1], [2]) ADD_OF_PORTS([br1], [3]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:05),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=0,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p3 'in_port(3),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-flows br0 | sort | STRIP_USED], [0], [dnl skb_priority(0),in_port(1),eth_type(0x0800),ipv4(src=192.168.0.1/0.0.0.0,dst=192.168.0.2/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller)) skb_priority(0),in_port(2),eth_type(0x0800),ipv4(src=192.168.0.2/0.0.0.0,dst=192.168.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller)) ]) AT_CHECK([ovs-appctl dpif/dump-flows br1 | sort | STRIP_USED], [0], [dnl skb_priority(0),in_port(3),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller)) ]) AT_CHECK([ovs-appctl dpif/del-flows br0]) AT_CHECK([ovs-appctl dpif/dump-flows br0 | sort | STRIP_USED], [0], [dnl ]) AT_CHECK([ovs-appctl dpif/dump-flows br1 | sort | STRIP_USED], [0], [dnl skb_priority(0),in_port(3),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:0, bytes:0, used:0.0s, actions:userspace(pid=0,slow_path(controller)) ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - patch ports]) OVS_VSWITCHD_START([add-br br1 \ -- set bridge br1 datapath-type=dummy fail-mode=secure \ -- add-port br1 pbr1 -- set int pbr1 type=patch options:peer=pbr0 \ -- add-port br0 pbr0 -- set int pbr0 type=patch options:peer=pbr1]) ADD_OF_PORTS([br0], [2]) ADD_OF_PORTS([br1], [3]) AT_CHECK([ovs-appctl time/stop]) dnl Make life span averages consistent. AT_CHECK([ovs-ofctl add-flow br0 actions=LOCAL,output:1,output:2]) AT_CHECK([ovs-ofctl add-flow br1 actions=LOCAL,output:1,output:3]) for i in $(seq 1 10); do ovs-appctl netdev-dummy/receive br0 'in_port(100),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' done for i in $(seq 1 5); do ovs-appctl netdev-dummy/receive br1 'in_port(101),eth(src=50:54:00:00:00:07,dst=50:54:00:00:00:05),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' done AT_CHECK([ovs-appctl time/warp 1000 && ovs-appctl time/warp 1000], [0], [warped warped ]) AT_CHECK([ovs-appctl dpif/show], [0], [dnl dummy@ovs-dummy: hit:13 missed:2 flows: cur: 2, avg: 1, max: 2, life span: 1250ms overall avg: add rate: 0.000/min, del rate: 0.000/min br0: hit:9 missed:1 br0 65534/100: (dummy) p2 2/2: (dummy) pbr0 1/none: (patch: peer=pbr1) br1: hit:4 missed:1 br1 65534/101: (dummy) p3 3/3: (dummy) pbr1 1/none: (patch: peer=pbr0) ]) AT_CHECK([ovs-appctl dpif/dump-flows br0 | STRIP_USED], [0], [dnl skb_priority(0),in_port(100),eth_type(0x0800),ipv4(src=192.168.0.1/0.0.0.0,dst=192.168.0.2/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:9, bytes:540, used:0.0s, actions:101,3,2 ]), AT_CHECK([ovs-appctl dpif/dump-flows br1 | STRIP_USED], [0], [dnl skb_priority(0),in_port(101),eth_type(0x0800),ipv4(src=192.168.0.2/0.0.0.0,dst=192.168.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no/0xff), packets:4, bytes:240, used:0.0s, actions:100,2,3 ]) AT_CHECK([ovs-ofctl dump-ports br0 pbr0], [0], [dnl OFPST_PORT reply (xid=0x4): 1 ports port 1: rx pkts=5, bytes=300, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=10, bytes=600, drop=0, errs=0, coll=0 ]) AT_CHECK([ovs-ofctl dump-ports br1 pbr1], [0], [dnl OFPST_PORT reply (xid=0x4): 1 ports port 1: rx pkts=10, bytes=600, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=5, bytes=300, drop=0, errs=0, coll=0 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - ovs-appctl dpif/show rates]) OVS_VSWITCHD_START([set Bridge br0 fail-mode=secure]) ADD_OF_PORTS([br0], 1, 2) AT_CHECK([ovs-appctl time/stop]) dnl Make life span averages consistent. AT_CHECK([ovs-ofctl add-flow br0 actions=LOCAL,output:1,output:2]) for i in $(seq 1 61); do ovs-appctl netdev-dummy/receive br0 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' ovs-appctl time/warp 10000 ovs-appctl time/warp 50000 done AT_CHECK([ovs-appctl time/warp 10000], [0], [warped ]) AT_CHECK([ovs-appctl dpif/show | sed 's/ 10[[0-9]]\{3\}(ms)$/ 10000(ms)/'], [0], [dnl dummy@ovs-dummy: hit:0 missed:61 flows: cur: 0, avg: 0, max: 1, life span: 1666ms hourly avg: add rate: 0.641/min, del rate: 0.641/min overall avg: add rate: 1.000/min, del rate: 1.000/min br0: hit:0 missed:61 br0 65534/100: (dummy) p1 1/1: (dummy) p2 2/2: (dummy) ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - port duration]) OVS_VSWITCHD_START([set Bridge br0 protocols=OpenFlow13]) ADD_OF_PORTS([br0], 1, 2) ovs-appctl time/warp 10000 AT_CHECK([ovs-ofctl -O openflow13 dump-ports br0], [0], [stdout]) AT_CHECK([sed 's/=[[0-9]][[0-9]]\(\.[[0-9]][[0-9]]*\)\{0,1\}s/=?s/' stdout], [0], [dnl OFPST_PORT reply (OF1.3) (xid=0x2): 3 ports port 1: rx pkts=0, bytes=0, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=0, bytes=0, drop=0, errs=0, coll=0 duration=?s port 2: rx pkts=0, bytes=0, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=0, bytes=0, drop=0, errs=0, coll=0 duration=?s port LOCAL: rx pkts=0, bytes=0, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=0, bytes=0, drop=0, errs=0, coll=0 duration=?s ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl ---------------------------------------------------------------------- AT_BANNER([ofproto-dpif -- megaflows]) # Strips out uninteresting parts of megaflow output, as well as parts # that vary from one run to another (e.g., timing and bond actions). m4_define([STRIP_USED], [[sed ' s/used:[0-9]*\.[0-9]*/used:0.0/ ' | sort]]) m4_define([STRIP_XOUT], [[sed ' s/used:[0-9]*\.[0-9]*/used:0.0/ s/Datapath actions:.*/Datapath actions: / ' | sort]]) AT_SETUP([ofproto-dpif megaflow - port classification]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2]) AT_DATA([flows.txt], [dnl table=0 in_port=1 actions=output(2) ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,ip,in_port=1,nw_frag=no, n_subfacets:2, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - L2 classification]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2]) AT_DATA([flows.txt], [dnl table=0 in_port=1,dl_src=50:54:00:00:00:09 actions=output(2) ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,ip,in_port=1,dl_src=50:54:00:00:00:09,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,ip,in_port=1,dl_src=50:54:00:00:00:0b,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - L3 classification]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2]) AT_DATA([flows.txt], [dnl table=0 in_port=1,icmp,nw_src=10.0.0.4 actions=output(2) ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,icmp,in_port=1,nw_src=10.0.0.2,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,icmp,in_port=1,nw_src=10.0.0.4,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - L4 classification]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2]) AT_DATA([flows.txt], [dnl table=0 in_port=1,icmp,icmp_type=8 actions=output(2) ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,icmp,in_port=1,nw_frag=no,icmp_type=8, n_subfacets:2, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - normal]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2]) AT_CHECK([ovs-ofctl add-flow br0 action=normal]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,ip,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,ip,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:0b,dl_dst=50:54:00:00:00:0c,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - mpls]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2]) AT_DATA([flows.txt], [dnl table=0 dl_src=50:54:00:00:00:09 actions=push_mpls:0x8847,2 table=0 dl_src=50:54:00:00:00:0b actions=pop_mpls:0x0800,2 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x8847),mpls(label=11,tc=3,ttl=64,bos=1)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0a),eth_type(0x8847),mpls(label=11,tc=3,ttl=64,bos=1)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,mpls,in_port=1,dl_src=50:54:00:00:00:09,mpls_label=11,mpls_tc=3,mpls_ttl=64,mpls_bos=1,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,mpls,in_port=1,dl_src=50:54:00:00:00:0b,mpls_label=11,mpls_tc=3,mpls_ttl=64,mpls_bos=1,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - netflow]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2]) dnl NetFlow configuration disables wildcarding relevant fields ON_EXIT([kill `cat test-netflow.pid`]) AT_CHECK([test-netflow --log-file --detach --no-chdir --pidfile 0:127.0.0.1 > netflow.log], [0], [], [ignore]) AT_CAPTURE_FILE([netflow.log]) NETFLOW_PORT=`parse_listening_port < test-netflow.log` ovs-vsctl \ set Bridge br0 netflow=@nf -- \ --id=@nf create NetFlow targets=\"127.0.0.1:$NETFLOW_PORT\" \ engine_id=1 engine_type=2 active_timeout=30 add-id-to-interface=false AT_CHECK([ovs-ofctl add-flow br0 action=normal]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,icmp,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.0.0.2,nw_dst=10.0.0.1,nw_tos=0,nw_frag=no,icmp_type=8,icmp_code=0, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,icmp,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:0b,dl_dst=50:54:00:00:00:0c,nw_src=10.0.0.4,nw_dst=10.0.0.3,nw_tos=0,nw_frag=no,icmp_type=8,icmp_code=0, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - normal, active-backup bonding]) OVS_VSWITCHD_START( [add-port br0 p1 -- set Interface p1 type=dummy ofport_request=1 -- \ add-bond br0 bond0 p2 p3 bond_mode=active-backup -- \ set interface p2 type=dummy ofport_request=2 -- \ set interface p3 type=dummy ofport_request=3]) AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK ]) AT_CHECK([ovs-ofctl add-flow br0 action=normal]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,ip,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,ip,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:0b,dl_dst=50:54:00:00:00:0c,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - normal, balance-slb bonding]) OVS_VSWITCHD_START( [add-port br0 p1 -- set Interface p1 type=dummy ofport_request=1 -- \ add-bond br0 bond0 p2 p3 bond_mode=balance-slb -- \ set interface p2 type=dummy ofport_request=2 -- \ set interface p3 type=dummy ofport_request=3]) AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK ]) AT_CHECK([ovs-ofctl add-flow br0 action=normal]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,ip,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,ip,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:0b,dl_dst=50:54:00:00:00:0c,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - normal, balance-tcp bonding]) # Create bond0 on br0 with interfaces p0 and p1 # and bond1 on br1 with interfaces p2 and p3 # with p0 patched to p2 and p1 patched to p3. OVS_VSWITCHD_START( [add-bond br0 bond0 p0 p1 bond_mode=balance-tcp lacp=active \ other-config:lacp-time=fast \ other-config:bond-rebalance-interval=0 -- \ set interface p0 type=patch options:peer=p2 ofport_request=1 -- \ set interface p1 type=patch options:peer=p3 ofport_request=2 -- \ add-br br1 -- \ set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \ set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \ fail-mode=secure -- \ add-bond br1 bond1 p2 p3 bond_mode=balance-tcp lacp=active \ other-config:lacp-time=fast \ other-config:bond-rebalance-interval=0 -- \ set interface p2 type=patch options:peer=p0 ofport_request=3 -- \ set interface p3 type=patch options:peer=p1 ofport_request=4 --]) AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK ]) ADD_OF_PORTS([br0], [7]) AT_CHECK([ovs-ofctl add-flow br0 action=normal]) AT_CHECK([ovs-ofctl add-flow br1 action=normal]) ovs-appctl time/warp 5000 AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,icmp,in_port=7,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.0.0.2,nw_dst=10.0.0.1,nw_frag=no,icmp_type=8,icmp_code=0, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,icmp,in_port=7,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:0b,dl_dst=50:54:00:00:00:0c,nw_src=10.0.0.4,nw_dst=10.0.0.3,nw_frag=no,icmp_type=8,icmp_code=0, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - resubmit port action]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2]) AT_DATA([flows.txt], [dnl table=0 in_port=1,ip actions=resubmit(90) table=0 in_port=90,dl_src=50:54:00:00:00:09 actions=output(2) ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,ip,in_port=1,dl_src=50:54:00:00:00:09,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,ip,in_port=1,dl_src=50:54:00:00:00:0b,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - resubmit table action]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2]) AT_DATA([flows.txt], [dnl table=0 in_port=1,ip actions=resubmit(,1) table=1 dl_src=50:54:00:00:00:09 actions=output(2) ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto= 1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,ip,in_port=1,dl_src=50:54:00:00:00:09,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,ip,in_port=1,dl_src=50:54:00:00:00:0b,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - goto_table action]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2]) AT_DATA([flows.txt], [dnl table=0 in_port=1,ip actions=goto_table(1) table=1 dl_src=50:54:00:00:00:09 actions=output(2) ]) AT_CHECK([ovs-ofctl -O OpenFlow12 add-flows br0 flows.txt]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,ip,in_port=1,dl_src=50:54:00:00:00:09,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,ip,in_port=1,dl_src=50:54:00:00:00:0b,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - mirroring, select_all]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2], [3]) ovs-vsctl \ set Bridge br0 mirrors=@m --\ --id=@p3 get Port p3 --\ --id=@m create Mirror name=mymirror select_all=true output_port=@p3 AT_DATA([flows.txt], [dnl in_port=1 actions=output:2 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,ip,in_port=1,nw_frag=no, n_subfacets:2, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - mirroring, select_vlan]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2], [3]) ovs-vsctl \ set Bridge br0 mirrors=@m --\ --id=@p2 get Port p2 -- --id=@p3 get Port p3 --\ --id=@m create Mirror name=mymirror select_all=true select_vlan=11 output_port=@p3 AT_DATA([flows.txt], [dnl in_port=1 actions=output:2 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x8100),vlan(vid=11,pcp=7),encap(eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0))']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,ip,in_port=1,dl_vlan=11,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,ip,in_port=1,vlan_tci=0x0000/0x1fff,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - move action]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2]) AT_DATA([flows.txt], [dnl table=0 in_port=1 ip,actions=move:NXM_OF_IP_SRC[[]]->NXM_NX_REG0[[]],resubmit(90) table=0 in_port=90 ip,actions=move:NXM_NX_REG0[[]]->NXM_NX_REG1[[]],resubmit(91) table=0 in_port=91 reg0=0x0a000002,actions=output(2) ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,ip,in_port=1,nw_src=10.0.0.2,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,ip,in_port=1,nw_src=10.0.0.4,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - push action]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2]) AT_DATA([flows.txt], [dnl table=0 in_port=1 ip,actions=push:NXM_OF_IP_SRC[[]],output(2) ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,ip,in_port=1,nw_src=10.0.0.2,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,ip,in_port=1,nw_src=10.0.0.4,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - learning]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2]) AT_DATA([flows.txt], [dnl table=0 in_port=1 actions=load:2->NXM_NX_REG0[[0..15]],learn(table=1,priority=65535,NXM_OF_ETH_SRC[[]],NXM_OF_VLAN_TCI[[0..11]],output:NXM_NX_REG0[[0..15]]),output:2 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) # We send each packet twice because the first packet in each flow causes the # flow table to change and thus revalidations, which (depending on timing) # can keep a megaflow from being installed. The revalidations are done by # the second iteration, allowing the flows to be installed. for i in 1 2; do AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) ovs-appctl time/warp 100 done dnl The original flow is missing due to a revalidation. AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,ip,in_port=1,vlan_tci=0x0000/0x0fff,dl_src=50:54:00:00:00:09,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,ip,in_port=1,vlan_tci=0x0000/0x0fff,dl_src=50:54:00:00:00:0b,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - tunnels]) OVS_VSWITCHD_START( [add-port br0 p1 -- set Interface p1 type=dummy ofport_request=1 \ ofport_request=1 -- \ add-port br0 p2 -- set Interface p2 type=gre options:remote_ip=1.1.1.1 \ ofport_request=2 options:key=flow -- \ add-port br0 p3 -- set Interface p3 type=dummy ofport_request=3 \ ofport_request=3 -- \ add-port br0 p4 -- set Interface p4 type=gre options:remote_ip=1.1.1.2 \ options:tos=inherit options:ttl=inherit ofport_request=4 options:key=flow]) AT_DATA([flows.txt], [dnl in_port=1,actions=output(2) in_port=3,actions=output(4) ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) dnl ECN bits are always copied out, but don't use 0x3 (CE), since that dnl will cause the packet to be dropped. AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0xfd,ttl=128,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0x1,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p3 'in_port(3),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0xfd,ttl=128,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p3 'in_port(3),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0x1,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,ip,in_port=1,nw_ecn=1,nw_frag=no, n_subfacets:2, used:0.0s, Datapath actions: skb_priority=0,ip,in_port=3,nw_tos=0,nw_ecn=1,nw_ttl=64,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,ip,in_port=3,nw_tos=252,nw_ecn=1,nw_ttl=128,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - dec_ttl]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2]) AT_DATA([flows.txt], [dnl table=0 in_port=1,icmp,nw_src=10.0.0.4 actions=dec_ttl,output(2) ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_XOUT], [0], [dnl skb_priority=0,icmp,in_port=1,nw_src=10.0.0.2,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: skb_priority=0,icmp,in_port=1,nw_src=10.0.0.4,nw_dst=10.0.0.3,nw_tos=0,nw_ecn=0,nw_ttl=64, n_subfacets:1, used:0.0s, Datapath actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - set dl_dst]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1], [2]) AT_DATA([flows.txt], [dnl table=0 in_port=1 actions=mod_dl_dst(50:54:00:00:00:0a),output(2) ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) dnl The megaflows do not match the same fields, since the first packet dnl is essentially a no-op. (The new destination MAC is the same as the dnl original.) The ofproto-dpif library un-wildcards the destination MAC dnl so that a packet that doesn't need its MAC address changed doesn't dnl hide one that does. Since the first entry doesn't need to change, dnl only the destination MAC address is matched (as decided by dnl ofproto-dpif). The second entry actually updates the destination dnl MAC, so both the source and destination MAC addresses are dnl un-wildcarded, since the ODP commit functions update both the source dnl and destination MAC addresses. AT_CHECK([ovs-appctl dpif/dump-megaflows br0 | STRIP_USED], [0], [dnl skb_priority=0,ip,in_port=1,dl_dst=50:54:00:00:00:0a,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: 2 skb_priority=0,ip,in_port=1,dl_src=50:54:00:00:00:0b,dl_dst=50:54:00:00:00:0c,nw_frag=no, n_subfacets:1, used:0.0s, Datapath actions: set(eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0a)),2 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - datapath port number change]) OVS_VSWITCHD_START([set Bridge br0 fail-mode=standalone]) ADD_OF_PORTS([br0], 1) # Trace a flow that should output to p1. AT_CHECK([ovs-appctl ofproto/trace br0 in_port=LOCAL,dl_src=10:20:30:40:50:60], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: 1 ]) # Change p1's port number to 5. AT_CHECK([ovs-appctl dpif-dummy/change-port-number ovs-dummy p1 5]) # Trace a flow that should output to p1 in its new location. AT_CHECK([ovs-appctl ofproto/trace br0 in_port=LOCAL,dl_src=10:20:30:40:50:60], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: 5 ]) OVS_VSWITCHD_STOP AT_CLEANUP # Tests the bundling with various bfd and cfm configurations. AT_SETUP([ofproto - bundle with variable bfd/cfm config]) OVS_VSWITCHD_START([add-br br1 -- set bridge br1 datapath-type=dummy -- \ add-bond br0 br0bond p0 p2 bond-mode=active-backup -- \ add-bond br1 br1bond p1 p3 bond-mode=active-backup -- \ set Interface p1 type=patch options:peer=p0 ofport_request=2 -- \ set Interface p3 type=patch options:peer=p2 ofport_request=4 -- \ set Interface p0 type=patch options:peer=p1 ofport_request=1 -- \ set Interface p2 type=patch options:peer=p3 ofport_request=3 -- \ set Interface p0 bfd:enable=true bfd:min_tx=300 bfd:min_rx=300 -- \ set Interface p0 cfm_mpid=1 -- \ set Interface p1 bfd:enable=true bfd:min_tx=500 bfd:min_rx=500]) ovs-appctl time/stop # advance the clock to stablize everything. for i in `seq 0 49`; do ovs-appctl time/warp 100; done # cfm/show should show 'recv' fault. AT_CHECK([ovs-appctl cfm/show | sed -n '/^.*fault:.*/p'], [0], [dnl fault: recv ]) # bfd/show should show 'up'. AT_CHECK([ovs-appctl bfd/show | sed -n '/^.*Session State:.*/p'], [0], [dnl Local Session State: up Remote Session State: up Local Session State: up Remote Session State: up ]) # bond/show should show 'may-enable: true' for all slaves. AT_CHECK([ovs-appctl bond/show | sed -n '/^.*may_enable:.*/p'], [0], [dnl may_enable: true may_enable: true may_enable: true may_enable: true ]) # now disable the bfd on p1. AT_CHECK([ovs-vsctl set Interface p1 bfd:enable=false]) # advance the clock to stablize everything. for i in `seq 0 49`; do ovs-appctl time/warp 100; done # cfm/show should show 'recv' fault. AT_CHECK([ovs-appctl cfm/show | sed -n '/^.*fault:.*/p'], [0], [dnl fault: recv ]) # bfd/show should show 'down'. AT_CHECK([ovs-appctl bfd/show | sed -n '/^.*Session State:.*/p'], [0], [dnl Local Session State: down Remote Session State: down ]) # bond/show should show 'may-enable: false' for p0. AT_CHECK([ovs-appctl bond/show | sed -n '/^.*may_enable:.*/p'], [0], [dnl may_enable: false may_enable: true may_enable: true may_enable: true ]) # now enable the bfd on p1 and disable bfd on p0. AT_CHECK([ovs-vsctl set Interface p1 bfd:enable=true]) AT_CHECK([ovs-vsctl set Interface p0 bfd:enable=false]) # advance the clock to stablize everything. for i in `seq 0 49`; do ovs-appctl time/warp 100; done # cfm/show should show 'recv' fault. AT_CHECK([ovs-appctl cfm/show | sed -n '/^.*fault:.*/p'], [0], [dnl fault: recv ]) # bfd/show should show 'down'. AT_CHECK([ovs-appctl bfd/show | sed -n '/^.*Session State:.*/p'], [0], [dnl Local Session State: down Remote Session State: down ]) # bond/show should show 'may-enable: false' for p0 and p1. AT_CHECK([ovs-appctl bond/show | sed -n '/^.*may_enable:.*/p'], [0], [dnl may_enable: false may_enable: true may_enable: false may_enable: true ]) OVS_VSWITCHD_STOP AT_CLEANUP openvswitch-2.0.1+git20140120/tests/ofproto-macros.at000066400000000000000000000111121226605124000221100ustar00rootroot00000000000000m4_divert_push([PREPARE_TESTS]) [ # Strips out uninteresting parts of ovs-ofctl output, as well as parts # that vary from one run to another. ofctl_strip () { sed ' s/ (xid=0x[0-9a-fA-F]*)// s/ duration=[0-9.]*s,// s/ cookie=0x0,// s/ table=0,// s/ n_packets=0,// s/ n_bytes=0,// s/ idle_age=[0-9]*,// s/ hard_age=[0-9]*,// ' } # parse_listening_port [SERVER] # # Parses the TCP or SSL port on which a server is listening from the # log, given that the server was told to listen on a kernel-chosen # port, file provided on stdin, and prints the port number on stdout. # You should specify the listening remote as ptcp:0:127.0.0.1 or # pssl:0:127.0.0.1. # # Here's an example of how to use this with ovsdb-server: # # OVS_LOGDIR=`pwd`; export OVS_LOGDIR # ovsdb-server --log-file --remote=ptcp:0:127.0.0.1 ... # TCP_PORT=`parse_listening_port < ovsdb-server.log` parse_listening_port () { sed -n 's/.*0:127\.0\.0\.1: listening on port \([0-9]*\)$/\1/p' }] m4_divert_pop([PREPARE_TESTS]) m4_define([STRIP_XIDS], [[sed 's/ (xid=0x[0-9a-fA-F]*)//']]) m4_define([STRIP_DURATION], [[sed 's/\bduration=[0-9.]*s/duration=?s/']]) m4_define([STRIP_USED], [[sed 's/used:[0-9]\.[0-9]*/used:0.0/']]) m4_define([TESTABLE_LOG], [-vPATTERN:ANY:'%c|%p|%m']) # OVS_VSWITCHD_START([vsctl-args], [vsctl-output], [=override]) # # Creates a database and starts ovsdb-server, starts ovs-vswitchd # connected to that database, calls ovs-vsctl to create a bridge named # br0 with predictable settings, passing 'vsctl-args' as additional # commands to ovs-vsctl. If 'vsctl-args' causes ovs-vsctl to provide # output (e.g. because it includes "create" commands) then 'vsctl-output' # specifies the expected output after filtering through uuidfilt.pl. # # If a test needs to use "system" devices (as dummies), then specify # =override (literally) as the third argument. Otherwise, system devices # won't work at all (which makes sense because tests should not access a # system's real Ethernet devices). m4_define([OVS_VSWITCHD_START], [OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR OVS_DBDIR=`pwd`; export OVS_DBDIR OVS_SYSCONFDIR=`pwd`; export OVS_SYSCONFDIR ON_EXIT([kill `cat ovsdb-server.pid ovs-vswitchd.pid`]) dnl Create database. touch .conf.db.~lock~ AT_CHECK([ovsdb-tool create conf.db $abs_top_srcdir/vswitchd/vswitch.ovsschema]) dnl Start ovsdb-server. AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --log-file --remote=punix:$OVS_RUNDIR/db.sock], [0], [], [stderr]) AT_CHECK([[sed < stderr ' /vlog|INFO|opened log file/d /ovsdb_server|INFO|ovsdb-server (Open vSwitch)/d']]) AT_CAPTURE_FILE([ovsdb-server.log]) dnl Initialize database. AT_CHECK([ovs-vsctl --no-wait init]) dnl Start ovs-vswitchd. AT_CHECK([ovs-vswitchd --detach --no-chdir --pidfile --enable-dummy$3 --disable-system --log-file -vvconn -vofproto_dpif], [0], [], [stderr]) AT_CAPTURE_FILE([ovs-vswitchd.log]) AT_CHECK([[sed < stderr ' /vlog|INFO|opened log file/d /vswitchd|INFO|ovs-vswitchd (Open vSwitch)/d /reconnect|INFO|/d /ofproto|INFO|using datapath ID/d /ofproto|INFO|datapath ID changed to fedcba9876543210/d']]) dnl Add bridges, ports, etc. AT_CHECK([ovs-vsctl -- add-br br0 -- set bridge br0 datapath-type=dummy other-config:datapath-id=fedcba9876543210 other-config:hwaddr=aa:55:aa:55:00:00 protocols=[[OpenFlow10,OpenFlow11,OpenFlow12,OpenFlow13]] fail-mode=secure -- $1 m4_if([$2], [], [], [| ${PERL} $srcdir/uuidfilt.pl])], [0], [$2]) ]) m4_divert_push([PREPARE_TESTS]) check_logs () { sed -n "$1 /|WARN|/p /|ERR|/p /|EMER|/p" ovs-vswitchd.log ovsdb-server.log } m4_divert_pop([PREPARE_TESTS]) # OVS_VSWITCHD_STOP([WHITELIST]) # # Gracefully stops ovs-vswitchd and ovsdb-server, checking their log files # for messages with severity WARN or higher and signaling an error if any # is present. The optional WHITELIST may contain shell-quoted "sed" # commands to delete any warnings that are actually expected, e.g.: # # OVS_VSWITCHD_STOP(["/expected error/d"]) m4_define([OVS_VSWITCHD_STOP], [AT_CHECK([check_logs $1]) AT_CHECK([ovs-appctl -t ovs-vswitchd exit]) AT_CHECK([ovs-appctl -t ovsdb-server exit])]) # ADD_OF_PORTS(BRIDGE, OF_PORT[, OF_PORT...]) # # Creates a dummy interface with an OpenFlow port number of OF_PORT and # name of p{OF_PORT}. The dummy implementation will treat the OF_PORT # as the datapath port number, which as the effect of making the # OpenFlow and datapath numbers the same. m4_define([ADD_OF_PORTS], [ovs-vsctl m4_foreach([of_port], m4_cdr($@), [ \ -- add-port $1 p[]of_port -- set Interface p[]of_port type=dummy ofport_request=of_port])]) openvswitch-2.0.1+git20140120/tests/ofproto.at000066400000000000000000002247161226605124000206460ustar00rootroot00000000000000AT_BANNER([ofproto]) AT_SETUP([ofproto - echo request]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -vwarn probe br0]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - feature request, config request]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -vwarn show br0], [0], [stdout]) AT_CHECK([STRIP_XIDS stdout], [0], [dnl OFPT_FEATURES_REPLY: dpid:fedcba9876543210 n_tables:254, n_buffers:256 capabilities: FLOW_STATS TABLE_STATS PORT_STATS QUEUE_STATS ARP_MATCH_IP actions: OUTPUT SET_VLAN_VID SET_VLAN_PCP STRIP_VLAN SET_DL_SRC SET_DL_DST SET_NW_SRC SET_NW_DST SET_NW_TOS SET_TP_SRC SET_TP_DST ENQUEUE LOCAL(br0): addr:aa:55:aa:55:00:00 config: 0 state: 0 speed: 0 Mbps now, 0 Mbps max OFPT_GET_CONFIG_REPLY: frags=normal miss_send_len=0 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - set OpenFlow port number]) OVS_VSWITCHD_START( [add-port br0 p1 -- set Interface p1 type=dummy --\ add-port br0 p2 -- set Interface p2 type=dummy ofport_request=99]) AT_CHECK([ovs-ofctl -vwarn show br0], [0], [stdout]) AT_CHECK([[sed ' s/ (xid=0x[0-9a-fA-F]*)// s/00:0.$/00:0x/' < stdout]], [0], [dnl OFPT_FEATURES_REPLY: dpid:fedcba9876543210 n_tables:254, n_buffers:256 capabilities: FLOW_STATS TABLE_STATS PORT_STATS QUEUE_STATS ARP_MATCH_IP actions: OUTPUT SET_VLAN_VID SET_VLAN_PCP STRIP_VLAN SET_DL_SRC SET_DL_DST SET_NW_SRC SET_NW_DST SET_NW_TOS SET_TP_SRC SET_TP_DST ENQUEUE 1(p1): addr:aa:55:aa:55:00:0x config: PORT_DOWN state: LINK_DOWN speed: 0 Mbps now, 0 Mbps max 99(p2): addr:aa:55:aa:55:00:0x config: PORT_DOWN state: LINK_DOWN speed: 0 Mbps now, 0 Mbps max LOCAL(br0): addr:aa:55:aa:55:00:0x config: 0 state: 0 speed: 0 Mbps now, 0 Mbps max OFPT_GET_CONFIG_REPLY: frags=normal miss_send_len=0 ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl This is really bare-bones. dnl It at least checks request and reply serialization and deserialization. AT_SETUP([ofproto - port stats - (OpenFlow 1.0)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -vwarn dump-ports br0], [0], [stdout]) AT_CHECK([STRIP_XIDS stdout], [0], [dnl OFPST_PORT reply: 1 ports port LOCAL: rx pkts=0, bytes=0, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=0, bytes=0, drop=0, errs=0, coll=0 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - port stats - (OpenFlow 1.2)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O OpenFlow12 -vwarn dump-ports br0], [0], [stdout]) AT_CHECK([STRIP_XIDS stdout], [0], [dnl OFPST_PORT reply (OF1.2): 1 ports port LOCAL: rx pkts=0, bytes=0, drop=0, errs=0, frame=0, over=0, crc=0 tx pkts=0, bytes=0, drop=0, errs=0, coll=0 ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl This is really bare-bones. dnl It at least checks request and reply serialization and deserialization. AT_SETUP([ofproto - port-desc stats (OpenFlow 1.0)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -vwarn dump-ports-desc br0], [0], [stdout]) AT_CHECK([STRIP_XIDS stdout], [0], [dnl OFPST_PORT_DESC reply: LOCAL(br0): addr:aa:55:aa:55:00:00 config: 0 state: 0 speed: 0 Mbps now, 0 Mbps max ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl This is really bare-bones. dnl It at least checks request and reply serialization and deserialization. AT_SETUP([ofproto - port-desc stats (OpenFlow 1.2)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O OpenFlow12 -vwarn dump-ports-desc br0], [0], [stdout]) AT_CHECK([STRIP_XIDS stdout], [0], [dnl OFPST_PORT_DESC reply (OF1.2): LOCAL(br0): addr:aa:55:aa:55:00:00 config: 0 state: 0 speed: 0 Mbps now, 0 Mbps max ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl This is really bare-bones. dnl It at least checks request and reply serialization and deserialization. AT_SETUP([ofproto - queue stats - (OpenFlow 1.0)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -vwarn queue-stats br0], [0], [stdout]) AT_CHECK([STRIP_XIDS stdout], [0], [dnl OFPST_QUEUE reply: 0 queues ]) AT_CHECK([ovs-ofctl -vwarn queue-stats br0 ANY 5], [0], [OFPT_ERROR (xid=0x2): OFPQOFC_BAD_QUEUE OFPST_QUEUE request (xid=0x2):port=ANY queue=5 ]) AT_CHECK([ovs-ofctl -vwarn queue-stats br0 10], [0], [OFPT_ERROR (xid=0x2): OFPQOFC_BAD_PORT OFPST_QUEUE request (xid=0x2):port=10 queue=ALL ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - queue stats - (OpenFlow 1.2)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O OpenFlow12 -vwarn queue-stats br0], [0], [stdout]) AT_CHECK([STRIP_XIDS stdout], [0], [dnl OFPST_QUEUE reply (OF1.2): 0 queues ]) AT_CHECK([ovs-ofctl -O OpenFlow12 -vwarn queue-stats br0 ALL 5], [0], [OFPT_ERROR (OF1.2) (xid=0x2): OFPQOFC_BAD_QUEUE OFPST_QUEUE request (OF1.2) (xid=0x2):port=ANY queue=5 ]) AT_CHECK([ovs-ofctl -O OpenFlow12 -vwarn queue-stats br0 10], [0], [OFPT_ERROR (OF1.2) (xid=0x2): OFPQOFC_BAD_PORT OFPST_QUEUE request (OF1.2) (xid=0x2):port=10 queue=ALL ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - mod-port (OpenFlow 1.0)]) OVS_VSWITCHD_START for command_config_state in \ 'up 0 0' \ 'noflood NO_FLOOD 0' \ 'down PORT_DOWN,NO_FLOOD LINK_DOWN' \ 'flood PORT_DOWN LINK_DOWN' \ 'no-receive PORT_DOWN,NO_RECV LINK_DOWN' \ 'no-forward PORT_DOWN,NO_RECV,NO_FWD LINK_DOWN' \ 'no-packet-in PORT_DOWN,NO_RECV,NO_FWD,NO_PACKET_IN LINK_DOWN' \ 'forward PORT_DOWN,NO_RECV,NO_PACKET_IN LINK_DOWN' \ 'packet-in PORT_DOWN,NO_RECV LINK_DOWN' \ 'up NO_RECV 0' \ 'receive 0 0' do set $command_config_state command=$[1] config=`echo $[2] | sed 's/,/ /g'` state=$[3] AT_CHECK([ovs-ofctl -vwarn mod-port br0 br0 $command]) AT_CHECK([ovs-ofctl -vwarn show br0], [0], [stdout]) AT_CHECK_UNQUOTED([STRIP_XIDS stdout], [0], [dnl OFPT_FEATURES_REPLY: dpid:fedcba9876543210 n_tables:254, n_buffers:256 capabilities: FLOW_STATS TABLE_STATS PORT_STATS QUEUE_STATS ARP_MATCH_IP actions: OUTPUT SET_VLAN_VID SET_VLAN_PCP STRIP_VLAN SET_DL_SRC SET_DL_DST SET_NW_SRC SET_NW_DST SET_NW_TOS SET_TP_SRC SET_TP_DST ENQUEUE LOCAL(br0): addr:aa:55:aa:55:00:00 config: $config state: $state speed: 0 Mbps now, 0 Mbps max OFPT_GET_CONFIG_REPLY: frags=normal miss_send_len=0 ]) done OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - mod-port (OpenFlow 1.2)]) OVS_VSWITCHD_START for command_config_state in \ 'up 0 0' \ 'down PORT_DOWN LINK_DOWN' \ 'no-receive PORT_DOWN,NO_RECV LINK_DOWN' \ 'no-forward PORT_DOWN,NO_RECV,NO_FWD LINK_DOWN' \ 'no-packet-in PORT_DOWN,NO_RECV,NO_FWD,NO_PACKET_IN LINK_DOWN' \ 'forward PORT_DOWN,NO_RECV,NO_PACKET_IN LINK_DOWN' \ 'packet-in PORT_DOWN,NO_RECV LINK_DOWN' \ 'up NO_RECV 0' \ 'receive 0 0' do set $command_config_state command=$[1] config=`echo $[2] | sed 's/,/ /g'` state=$[3] AT_CHECK([ovs-ofctl -O OpenFlow12 -vwarn mod-port br0 br0 $command]) AT_CHECK([ovs-ofctl -O OpenFlow12 -vwarn show br0], [0], [stdout]) AT_CHECK_UNQUOTED([STRIP_XIDS stdout], [0], [dnl OFPT_FEATURES_REPLY (OF1.2): dpid:fedcba9876543210 n_tables:254, n_buffers:256 capabilities: FLOW_STATS TABLE_STATS PORT_STATS QUEUE_STATS LOCAL(br0): addr:aa:55:aa:55:00:00 config: $config state: $state speed: 0 Mbps now, 0 Mbps max OFPT_GET_CONFIG_REPLY (OF1.2): frags=normal miss_send_len=0 ]) done OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - basic flow_mod commands (NXM)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip], [0], [NXST_FLOW reply: ]) AT_CHECK([echo 'in_port=2,actions=1' | ovs-ofctl add-flows br0 -]) AT_CHECK([ovs-ofctl add-flow br0 in_port=1,actions=2]) AT_CHECK([ovs-ofctl -F nxm add-flow br0 table=1,in_port=4,actions=3]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl in_port=1 actions=output:2 in_port=2 actions=output:1 table=1, in_port=4 actions=output:3 NXST_FLOW reply: ]) AT_CHECK([ovs-ofctl dump-aggregate br0 table=0 | STRIP_XIDS], [0], [dnl NXST_AGGREGATE reply: packet_count=0 byte_count=0 flow_count=2 ]) AT_CHECK([ovs-ofctl del-flows br0]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip], [0], [NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - basic flow_mod commands (OpenFlow 1.0)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -F openflow10 dump-flows br0 | ofctl_strip], [0], [OFPST_FLOW reply: ]) AT_CHECK([echo 'in_port=2,actions=1' | ovs-ofctl -F openflow10 add-flows br0 -]) AT_CHECK([ovs-ofctl -F openflow10 add-flow br0 in_port=1,actions=2]) AT_CHECK([ovs-ofctl -F openflow10 add-flow br0 table=1,in_port=4,actions=3]) AT_CHECK([ovs-ofctl -F openflow10 dump-flows br0 | ofctl_strip | sort], [0], [dnl in_port=1 actions=output:2 in_port=2 actions=output:1 table=1, in_port=4 actions=output:3 OFPST_FLOW reply: ]) AT_CHECK([ovs-ofctl -F openflow10 dump-aggregate br0 table=0 | STRIP_XIDS], [0], [dnl OFPST_AGGREGATE reply: packet_count=0 byte_count=0 flow_count=2 ]) AT_CHECK([ovs-ofctl -F openflow10 del-flows br0]) AT_CHECK([ovs-ofctl -F openflow10 dump-flows br0 | ofctl_strip], [0], [OFPST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - basic flow_mod commands (OpenFlow 1.1)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O OpenFlow11 dump-flows br0 | ofctl_strip], [0], [OFPST_FLOW reply (OF1.1): ]) AT_CHECK([echo 'in_port=2,actions=1' | ovs-ofctl -O OpenFlow11 add-flows br0 -]) AT_CHECK([ovs-ofctl -O OpenFlow11 add-flow br0 in_port=1,actions=2]) AT_CHECK([ovs-ofctl -O OpenFlow11 add-flow br0 table=1,in_port=4,actions=3]) AT_CHECK([ovs-ofctl -O OpenFlow11 dump-flows br0 | ofctl_strip | sort], [0], [dnl in_port=1 actions=output:2 in_port=2 actions=output:1 table=1, in_port=4 actions=output:3 OFPST_FLOW reply (OF1.1): ]) AT_CHECK([ovs-ofctl -O OpenFlow11 dump-aggregate br0 table=0 | STRIP_XIDS], [0], [dnl OFPST_AGGREGATE reply (OF1.1): packet_count=0 byte_count=0 flow_count=2 ]) AT_CHECK([ovs-ofctl -O OpenFlow11 del-flows br0]) AT_CHECK([ovs-ofctl -O OpenFlow11 dump-flows br0 | ofctl_strip], [0], [OFPST_FLOW reply (OF1.1): ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - set-field flow_mod commands (NXM)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl add-flow br0 ipv6,table=1,in_port=3,actions=drop]) AT_CHECK([ovs-ofctl add-flow br0 ipv6,table=1,in_port=3,actions=set_field:fe80:0123:4567:890a:a6ba:dbff:fefe:59fa-\>ipv6_src]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl table=1, ipv6,in_port=3 actions=load:0xa6badbfffefe59fa->NXM_NX_IPV6_SRC[[0..63]],load:0xfe8001234567890a->NXM_NX_IPV6_SRC[[64..127]] NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - basic flow_mod commands (OpenFlow 1.2)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip], [0], [OFPST_FLOW reply (OF1.2): ]) AT_CHECK([echo 'in_port=2,actions=1' | ovs-ofctl -O OpenFlow12 add-flows br0 -]) AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 in_port=1,actions=2]) AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 table=1,in_port=4,actions=3]) AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl in_port=1 actions=output:2 in_port=2 actions=output:1 table=1, in_port=4 actions=output:3 OFPST_FLOW reply (OF1.2): ]) AT_CHECK([ovs-ofctl -O OpenFlow12 del-flows br0]) AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip], [0], [OFPST_FLOW reply (OF1.2): ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - dump flows with cookie]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl add-flow br0 cookie=0x2,in_port=2,actions=1]) AT_CHECK([ovs-ofctl add-flow br0 cookie=0x3,in_port=3,actions=1]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 cookie=0x2, in_port=2 actions=output:1 cookie=0x3, in_port=3 actions=output:1 NXST_FLOW reply: ]) AT_CHECK([ovs-ofctl dump-aggregate br0 table=0 | STRIP_XIDS], [0], [dnl NXST_AGGREGATE reply: packet_count=0 byte_count=0 flow_count=3 ]) AT_CHECK([ovs-ofctl dump-flows br0 cookie=0x3/-1 | ofctl_strip | sort], [0], [dnl cookie=0x3, in_port=3 actions=output:1 NXST_FLOW reply: ]) AT_CHECK([ovs-ofctl dump-aggregate br0 cookie=0x3/-1 | STRIP_XIDS], [0], [dnl NXST_AGGREGATE reply: packet_count=0 byte_count=0 flow_count=1 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - mod flow with cookie change (OpenFlow 1.0)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -F openflow10 add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -F openflow10 dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 OFPST_FLOW reply: ]) AT_CHECK([ovs-ofctl -F openflow10 mod-flows br0 cookie=0x2,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -F openflow10 dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x2, in_port=1 actions=output:1 OFPST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - mod flow with cookie change (NXM)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -F nxm add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -F nxm dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 NXST_FLOW reply: ]) AT_CHECK([ovs-ofctl -F nxm mod-flows br0 cookie=0x2,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -F nxm dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x2, in_port=1 actions=output:1 NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - no mod flow with cookie change (OpenFlow 1.1)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O OpenFlow11 add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow11 dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 OFPST_FLOW reply (OF1.1): ]) AT_CHECK([ovs-ofctl -O OpenFlow11 mod-flows br0 cookie=0x2,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow11 dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 OFPST_FLOW reply (OF1.1): ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl The OpenFlow 1.2 spec states that the cookie may not be modified AT_SETUP([ofproto - no mod flow with cookie change (OpenFlow 1.2)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 OFPST_FLOW reply (OF1.2): ]) AT_CHECK([ovs-ofctl -O OpenFlow12 mod-flows br0 cookie=0x2,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 OFPST_FLOW reply (OF1.2): ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - mod flows based on cookie mask (OpenFlow 1.0)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl add-flow br0 cookie=0x1,in_port=2,actions=1]) AT_CHECK([ovs-ofctl add-flow br0 cookie=0x2,in_port=3,actions=1]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 cookie=0x1, in_port=2 actions=output:1 cookie=0x2, in_port=3 actions=output:1 NXST_FLOW reply: ]) AT_CHECK([ovs-ofctl -F nxm mod-flows br0 cookie=0x1/0xff,actions=4]) AT_CHECK([ovs-ofctl -F nxm dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:4 cookie=0x1, in_port=2 actions=output:4 cookie=0x2, in_port=3 actions=output:1 NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - mod flows based on cookie mask (OpenFlow 1.1)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O OpenFlow11 add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow11 add-flow br0 cookie=0x1,in_port=2,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow11 add-flow br0 cookie=0x2,in_port=3,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow11 dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 cookie=0x1, in_port=2 actions=output:1 cookie=0x2, in_port=3 actions=output:1 OFPST_FLOW reply (OF1.1): ]) AT_CHECK([ovs-ofctl -O OpenFlow11 mod-flows br0 cookie=0x1/0xff,actions=4]) AT_CHECK([ovs-ofctl -O OpenFlow11 dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:4 cookie=0x1, in_port=2 actions=output:4 cookie=0x2, in_port=3 actions=output:1 OFPST_FLOW reply (OF1.1): ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - mod flows based on cookie mask (OpenFlow 1.2)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 cookie=0x1,in_port=2,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 cookie=0x2,in_port=3,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 cookie=0x1, in_port=2 actions=output:1 cookie=0x2, in_port=3 actions=output:1 OFPST_FLOW reply (OF1.2): ]) AT_CHECK([ovs-ofctl -O OpenFlow12 mod-flows br0 cookie=0x1/0xff,actions=4]) AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:4 cookie=0x1, in_port=2 actions=output:4 cookie=0x2, in_port=3 actions=output:1 OFPST_FLOW reply (OF1.2): ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl The OpenFlow 1.2 spec states that the cookie may not be modified AT_SETUP([ofproto - mod flows based on cookie mask with cookie change]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl add-flow br0 cookie=0x1,in_port=2,actions=1]) AT_CHECK([ovs-ofctl add-flow br0 cookie=0x2,in_port=3,actions=1]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 cookie=0x1, in_port=2 actions=output:1 cookie=0x2, in_port=3 actions=output:1 NXST_FLOW reply: ]) AT_CHECK([ovs-ofctl -F nxm mod-flows br0 cookie=1/-1,cookie=4,actions=4]) AT_CHECK([ovs-ofctl -F nxm dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x2, in_port=3 actions=output:1 cookie=0x4, in_port=1 actions=output:4 cookie=0x4, in_port=2 actions=output:4 NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - mod flow with cookie miss (mask==0) - NXM]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -F nxm mod-flows br0 in_port=1,actions=1]) AT_CHECK([ovs-ofctl -F nxm dump-flows br0 | ofctl_strip | sort], [0], [dnl in_port=1 actions=output:1 NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - mod flow with cookie miss (mask==0) - OF1.1]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O openflow11 mod-flows br0 in_port=1,actions=1]) AT_CHECK([ovs-ofctl -O openflow11 dump-flows br0 | ofctl_strip | sort], [0], [dnl in_port=1 actions=output:1 OFPST_FLOW reply (OF1.1): ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - mod flow with cookie miss (mask==0) - OF1.2]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O openflow12 mod-flows br0 in_port=1,actions=1]) AT_CHECK([ovs-ofctl -O openflow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl OFPST_FLOW reply (OF1.2): ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - mod flow with cookie miss (mask!=0) - NXM]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -F nxm mod-flows br0 cookie=1/1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -F nxm dump-flows br0 | ofctl_strip | sort], [0], [dnl NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - mod flow with cookie miss (mask!=0) - OF1.1]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O openflow11 mod-flows br0 cookie=1/1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -O openflow11 dump-flows br0 | ofctl_strip | sort], [0], [dnl OFPST_FLOW reply (OF1.1): ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - mod flow with cookie miss (mask!=0) - OF1.2]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O openflow12 mod-flows br0 cookie=1/1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -O openflow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl OFPST_FLOW reply (OF1.2): ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - del flows with cookies]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl add-flow br0 cookie=0x2,in_port=2,actions=1]) AT_CHECK([ovs-ofctl add-flow br0 cookie=0x3,in_port=3,actions=1]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 cookie=0x2, in_port=2 actions=output:1 cookie=0x3, in_port=3 actions=output:1 NXST_FLOW reply: ]) AT_CHECK([ovs-ofctl del-flows br0]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - del flows based on cookie]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl add-flow br0 cookie=0x2,in_port=2,actions=1]) AT_CHECK([ovs-ofctl add-flow br0 cookie=0x3,in_port=3,actions=1]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 cookie=0x2, in_port=2 actions=output:1 cookie=0x3, in_port=3 actions=output:1 NXST_FLOW reply: ]) AT_CHECK([ovs-ofctl del-flows br0 cookie=0x3/-1]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 cookie=0x2, in_port=2 actions=output:1 NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - del flows based on cookie mask]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl add-flow br0 cookie=0x2,in_port=2,actions=1]) AT_CHECK([ovs-ofctl add-flow br0 cookie=0x3,in_port=3,actions=1]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 cookie=0x2, in_port=2 actions=output:1 cookie=0x3, in_port=3 actions=output:1 NXST_FLOW reply: ]) AT_CHECK([ovs-ofctl del-flows br0 cookie=0x3/0x1]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x2, in_port=2 actions=output:1 NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - del flows based on table id (NXM)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl add-flow br0 cookie=0x2,in_port=2,table=1,actions=1]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 cookie=0x2, table=1, in_port=2 actions=output:1 NXST_FLOW reply: ]) AT_CHECK([ovs-ofctl del-flows br0 table=0]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x2, table=1, in_port=2 actions=output:1 NXST_FLOW reply: ]) AT_CHECK([ovs-ofctl del-flows br0 table=1]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl NXST_FLOW reply: ]) AT_CHECK([ovs-ofctl add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl add-flow br0 cookie=0x2,in_port=2,table=1,actions=1]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 cookie=0x2, table=1, in_port=2 actions=output:1 NXST_FLOW reply: ]) AT_CHECK([ovs-ofctl del-flows br0]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - del flows based on table id (OpenFlow 1.1)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O OpenFlow11 add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow11 add-flow br0 cookie=0x2,in_port=2,table=1,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow11 dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 cookie=0x2, table=1, in_port=2 actions=output:1 OFPST_FLOW reply (OF1.1): ]) AT_CHECK([ovs-ofctl -O OpenFlow11 del-flows br0 table=0]) AT_CHECK([ovs-ofctl -O OpenFlow11 dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x2, table=1, in_port=2 actions=output:1 OFPST_FLOW reply (OF1.1): ]) AT_CHECK([ovs-ofctl -O OpenFlow11 del-flows br0 table=1]) AT_CHECK([ovs-ofctl -O OpenFlow11 dump-flows br0 | ofctl_strip | sort], [0], [dnl OFPST_FLOW reply (OF1.1): ]) AT_CHECK([ovs-ofctl -O OpenFlow11 add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow11 add-flow br0 cookie=0x2,in_port=2,table=1,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow11 dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 cookie=0x2, table=1, in_port=2 actions=output:1 OFPST_FLOW reply (OF1.1): ]) AT_CHECK([ovs-ofctl -O OpenFlow11 del-flows br0]) AT_CHECK([ovs-ofctl -O OpenFlow11 dump-flows br0 | ofctl_strip | sort], [0], [dnl OFPST_FLOW reply (OF1.1): ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - del flows based on table id (OpenFlow 1.2)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 cookie=0x2,in_port=2,table=1,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 cookie=0x2, table=1, in_port=2 actions=output:1 OFPST_FLOW reply (OF1.2): ]) AT_CHECK([ovs-ofctl -O OpenFlow12 del-flows br0 table=0]) AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x2, table=1, in_port=2 actions=output:1 OFPST_FLOW reply (OF1.2): ]) AT_CHECK([ovs-ofctl -O OpenFlow12 del-flows br0 table=1]) AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl OFPST_FLOW reply (OF1.2): ]) AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 cookie=0x1,in_port=1,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 cookie=0x2,in_port=2,table=1,actions=1]) AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0x1, in_port=1 actions=output:1 cookie=0x2, table=1, in_port=2 actions=output:1 OFPST_FLOW reply (OF1.2): ]) AT_CHECK([ovs-ofctl -O OpenFlow12 del-flows br0]) AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl OFPST_FLOW reply (OF1.2): ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - flow table configuration (OpenFlow 1.0)]) OVS_VSWITCHD_START # Check the default configuration. (echo "OFPST_TABLE reply (xid=0x2): 254 tables 0: classifier: wild=0x3fffff, max=1000000, active=0 lookup=0, matched=0" x=1 while test $x -lt 254; do printf " %d: %-8s: wild=0x3fffff, max=1000000, active=0 lookup=0, matched=0 " $x table$x x=`expr $x + 1` done) > expout AT_CHECK([ovs-ofctl dump-tables br0], [0], [expout]) # Change the configuration. AT_CHECK( [ovs-vsctl \ -- --id=@t0 create Flow_Table name=main \ -- --id=@t1 create Flow_Table flow-limit=1024 \ -- set bridge br0 'flow_tables={1=@t1,0=@t0}' \ | ${PERL} $srcdir/uuidfilt.pl], [0], [<0> <1> ]) # Check that the configuration was updated. mv expout orig-expout (echo "OFPST_TABLE reply (xid=0x2): 254 tables 0: main : wild=0x3fffff, max=1000000, active=0 lookup=0, matched=0 1: table1 : wild=0x3fffff, max= 1024, active=0 lookup=0, matched=0" tail -n +6 orig-expout) > expout AT_CHECK([ovs-ofctl dump-tables br0], [0], [expout]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - flow table configuration (OpenFlow 1.2)]) OVS_VSWITCHD_START # Check the default configuration. (mid="wild=0xffffffffff, max=1000000," tail=" lookup=0, matched=0 match=0xffffffffff, instructions=0x00000007, config=0x00000003 write_actions=0x00000000, apply_actions=0x00000000 write_setfields=0x000000ffffffffff apply_setfields=0x000000ffffffffff metadata_match=0xffffffffffffffff metadata_write=0xffffffffffffffff" echo "OFPST_TABLE reply (OF1.2) (xid=0x2): 254 tables 0: classifier: $mid active=0$tail" x=1 while test $x -lt 254; do printf " %d: %-8s: $mid active=0$tail " $x table$x x=`expr $x + 1` done) > expout AT_CHECK([ovs-ofctl -O OpenFlow12 dump-tables br0], [0], [expout]) # Change the configuration. AT_CHECK( [ovs-vsctl \ -- --id=@t0 create Flow_Table name=main \ -- --id=@t1 create Flow_Table flow-limit=1024 \ -- set bridge br0 'flow_tables={1=@t1,0=@t0}' \ | ${PERL} $srcdir/uuidfilt.pl], [0], [<0> <1> ]) # Check that the configuration was updated. mv expout orig-expout (echo "OFPST_TABLE reply (OF1.2) (xid=0x2): 254 tables 0: main : wild=0xffffffffff, max=1000000, active=0" tail -n +3 orig-expout | head -7 echo " 1: table1 : wild=0xffffffffff, max= 1024, active=0" tail -n +11 orig-expout) > expout AT_CHECK([ovs-ofctl -O OpenFlow12 dump-tables br0], [0], [expout]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - hard limits on flow table size (OpenFLow 1.0)]) OVS_VSWITCHD_START # Configure a maximum of 4 flows. AT_CHECK( [ovs-vsctl \ -- --id=@t0 create Flow_Table flow-limit=4 \ -- set bridge br0 flow_tables:0=@t0 \ | ${PERL} $srcdir/uuidfilt.pl], [0], [<0> ]) # Add 4 flows. for in_port in 1 2 3 4; do ovs-ofctl add-flow br0 in_port=$in_port,actions=drop done AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl in_port=1 actions=drop in_port=2 actions=drop in_port=3 actions=drop in_port=4 actions=drop NXST_FLOW reply: ]) # Adding another flow will be refused. AT_CHECK([ovs-ofctl add-flow br0 in_port=5,actions=drop], [1], [], [stderr]) AT_CHECK([head -n 1 stderr | ofctl_strip], [0], [OFPT_ERROR: OFPFMFC_TABLE_FULL ]) # Also a mod-flow that would add a flow will be refused. AT_CHECK([ovs-ofctl mod-flows br0 in_port=5,actions=drop], [1], [], [stderr]) AT_CHECK([head -n 1 stderr | ofctl_strip], [0], [OFPT_ERROR: OFPFMFC_TABLE_FULL ]) # Replacing or modifying an existing flow is allowed. AT_CHECK([ovs-ofctl add-flow br0 in_port=4,actions=normal]) AT_CHECK([ovs-ofctl mod-flows br0 in_port=3,actions=output:1]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl in_port=1 actions=drop in_port=2 actions=drop in_port=3 actions=output:1 in_port=4 actions=NORMAL NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - hard limits on flow table size (OpenFLow 1.2)]) OVS_VSWITCHD_START # Configure a maximum of 4 flows. AT_CHECK( [ovs-vsctl \ -- --id=@t0 create Flow_Table flow-limit=4 \ -- set bridge br0 flow_tables:0=@t0 \ | ${PERL} $srcdir/uuidfilt.pl], [0], [<0> ]) # Add 4 flows. for in_port in 1 2 3 4; do ovs-ofctl -O OpenFlow12 add-flow br0 in_port=$in_port,actions=drop done AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl in_port=1 actions=drop in_port=2 actions=drop in_port=3 actions=drop in_port=4 actions=drop OFPST_FLOW reply (OF1.2): ]) # Adding another flow will be refused. AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 in_port=5,actions=drop], [1], [], [stderr]) AT_CHECK([head -n 1 stderr | ofctl_strip], [0], [OFPT_ERROR (OF1.2): OFPFMFC_TABLE_FULL ]) # Replacing or modifying an existing flow is allowed. AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 in_port=4,actions=normal]) AT_CHECK([ovs-ofctl -O OpenFlow12 mod-flows br0 in_port=3,actions=output:1]) AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl in_port=1 actions=drop in_port=2 actions=drop in_port=3 actions=output:1 in_port=4 actions=NORMAL OFPST_FLOW reply (OF1.2): ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - eviction upon table overflow (OpenFlow 1.0)]) OVS_VSWITCHD_START # Configure a maximum of 4 flows. AT_CHECK( [ovs-vsctl \ -- --id=@t0 create Flow_Table flow-limit=4 overflow-policy=evict \ -- set bridge br0 flow_tables:0=@t0 \ | ${PERL} $srcdir/uuidfilt.pl], [0], [<0> ]) # Add 4 flows. for in_port in 4 3 2 1; do ovs-ofctl add-flow br0 idle_timeout=${in_port}0,in_port=$in_port,actions=drop done AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl idle_timeout=10, in_port=1 actions=drop idle_timeout=20, in_port=2 actions=drop idle_timeout=30, in_port=3 actions=drop idle_timeout=40, in_port=4 actions=drop NXST_FLOW reply: ]) # Adding another flow will cause the one that expires soonest to be evicted. AT_CHECK([ovs-ofctl add-flow br0 in_port=5,actions=drop]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl idle_timeout=20, in_port=2 actions=drop idle_timeout=30, in_port=3 actions=drop idle_timeout=40, in_port=4 actions=drop in_port=5 actions=drop NXST_FLOW reply: ]) # A mod-flow that adds a flow also causes eviction, but replacing or # modifying an existing flow doesn't. AT_CHECK([ovs-ofctl mod-flows br0 in_port=6,actions=drop]) AT_CHECK([ovs-ofctl add-flow br0 in_port=4,actions=normal]) AT_CHECK([ovs-ofctl mod-flows br0 in_port=3,actions=output:1]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl idle_timeout=30, in_port=3 actions=output:1 in_port=4 actions=NORMAL in_port=5 actions=drop in_port=6 actions=drop NXST_FLOW reply: ]) # Flows with no timeouts at all cannot be evicted. AT_CHECK([ovs-ofctl add-flow br0 in_port=7,actions=normal]) AT_CHECK([ovs-ofctl add-flow br0 in_port=8,actions=drop], [1], [], [stderr]) AT_CHECK([head -n 1 stderr | ofctl_strip], [0], [OFPT_ERROR: OFPFMFC_TABLE_FULL ]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl in_port=4 actions=NORMAL in_port=5 actions=drop in_port=6 actions=drop in_port=7 actions=NORMAL NXST_FLOW reply: ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - eviction upon table overflow (OpenFlow 1.2)]) OVS_VSWITCHD_START # Configure a maximum of 4 flows. AT_CHECK( [ovs-vsctl \ -- --id=@t0 create Flow_Table flow-limit=4 overflow-policy=evict \ -- set bridge br0 flow_tables:0=@t0 \ | ${PERL} $srcdir/uuidfilt.pl], [0], [<0> ]) # Add 4 flows. for in_port in 4 3 2 1; do ovs-ofctl -O OpenFlow12 add-flow br0 idle_timeout=${in_port}0,in_port=$in_port,actions=drop done AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl idle_timeout=10, in_port=1 actions=drop idle_timeout=20, in_port=2 actions=drop idle_timeout=30, in_port=3 actions=drop idle_timeout=40, in_port=4 actions=drop OFPST_FLOW reply (OF1.2): ]) # Adding another flow will cause the one that expires soonest to be evicted. AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 in_port=5,actions=drop]) AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl idle_timeout=20, in_port=2 actions=drop idle_timeout=30, in_port=3 actions=drop idle_timeout=40, in_port=4 actions=drop in_port=5 actions=drop OFPST_FLOW reply (OF1.2): ]) # In Open Flow 1.2 a mod-flow does not ever add a flow and thus # has no effect on eviction AT_CHECK([ovs-ofctl -O OpenFlow12 mod-flows br0 in_port=6,actions=drop]) AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 in_port=4,actions=normal]) AT_CHECK([ovs-ofctl -O OpenFlow12 mod-flows br0 in_port=3,actions=output:1]) AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl idle_timeout=20, in_port=2 actions=drop idle_timeout=30, in_port=3 actions=output:1 in_port=4 actions=NORMAL in_port=5 actions=drop OFPST_FLOW reply (OF1.2): ]) # Flows with no timeouts at all cannot be evicted. AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 in_port=6,actions=drop]) AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 in_port=7,actions=normal]) AT_CHECK([ovs-ofctl -O OpenFlow12 add-flow br0 in_port=8,actions=drop], [1], [], [stderr]) AT_CHECK([head -n 1 stderr | ofctl_strip], [0], [OFPT_ERROR (OF1.2): OFPFMFC_TABLE_FULL ]) AT_CHECK([ovs-ofctl -O OpenFlow12 dump-flows br0 | ofctl_strip | sort], [0], [dnl in_port=4 actions=NORMAL in_port=5 actions=drop in_port=6 actions=drop in_port=7 actions=NORMAL OFPST_FLOW reply (OF1.2): ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - eviction upon table overflow, with fairness (OpenFlow 1.0)]) OVS_VSWITCHD_START # Configure a maximum of 4 flows. AT_CHECK( [ovs-vsctl \ -- --id=@t0 create Flow_Table name=evict flow-limit=4 \ overflow-policy=evict \ groups='"NXM_OF_IN_PORT[[]]"' \ -- set bridge br0 flow_tables:0=@t0 \ | ${PERL} $srcdir/uuidfilt.pl], [0], [<0> ]) # Add 4 flows. ovs-ofctl add-flows br0 - < ]) # Add 4 flows. ovs-ofctl -O OpenFlow12 add-flows br0 - < expout # OFPT_PACKET_IN, OFPR_ACTION (controller_id=0) ovs-ofctl -v packet-out br0 controller controller '0001020304050010203040501234' if test X"$1" = X"OFPR_ACTION"; then shift; echo >>expout "OFPT_PACKET_IN: total_len=14 in_port=CONTROLLER (via action) data_len=14 (unbuffered) metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x1234" fi # OFPT_PACKET_IN, OFPR_NO_MATCH (controller_id=123) ovs-ofctl -v packet-out br0 controller 'controller(reason=no_match,id=123)' '0001020304050010203040501234' if test X"$1" = X"OFPR_NO_MATCH"; then shift; echo >>expout "OFPT_PACKET_IN: total_len=14 in_port=CONTROLLER (via no_match) data_len=14 (unbuffered) metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x1234" fi # OFPT_PACKET_IN, OFPR_INVALID_TTL (controller_id=0) ovs-ofctl packet-out br0 controller dec_ttl '002583dfb4000026b98cb0f908004500003fb7e200000011339bac11370dac100002d7730035002b8f6d86fb0100000100000000000006626c702d7873066e696369726103636f6d00000f00' if test X"$1" = X"OFPR_INVALID_TTL"; then shift; echo >>expout "OFPT_PACKET_IN: total_len=76 in_port=CONTROLLER (via invalid_ttl) data_len=76 (unbuffered) udp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:26:b9:8c:b0:f9,dl_dst=00:25:83:df:b4:00,nw_src=172.17.55.13,nw_dst=172.16.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=55155,tp_dst=53 udp_csum:8f6d" fi # OFPT_PORT_STATUS, OFPPR_ADD ovs-vsctl add-port br0 test -- set Interface test type=dummy ofport_request=1 if test X"$1" = X"OFPPR_ADD"; then shift; echo >>expout "OFPT_PORT_STATUS: ADD: 1(test): addr:aa:55:aa:55:00:0x config: PORT_DOWN state: LINK_DOWN speed: 0 Mbps now, 0 Mbps max" fi # OFPT_PORT_STATUS, OFPPR_DELETE ovs-vsctl del-port br0 test if test X"$1" = X"OFPPR_DELETE"; then shift; echo >>expout "OFPT_PORT_STATUS: DEL: 1(test): addr:aa:55:aa:55:00:0x config: PORT_DOWN state: LINK_DOWN speed: 0 Mbps now, 0 Mbps max" fi # OFPT_FLOW_REMOVED, OFPRR_DELETE ovs-ofctl add-flow br0 send_flow_rem,actions=drop ovs-ofctl --strict del-flows br0 '' if test X"$1" = X"OFPRR_DELETE"; then shift; echo >>expout "OFPT_FLOW_REMOVED: reason=delete" fi AT_FAIL_IF([test X"$1" != X]) ovs-appctl -t ovs-ofctl ofctl/barrier echo >>expout "OFPT_BARRIER_REPLY:" AT_CHECK( [[sed ' s/ (xid=0x[0-9a-fA-F]*)// s/ *duration.*// s/00:0.$/00:0x/' < monitor.log]], [0], [expout]) } # It's a service connection so initially there should be no async messages. check_async 1 # Set miss_send_len to 128, turning on packet-ins for our service connection. ovs-appctl -t ovs-ofctl ofctl/send 0109000c0123456700000080 check_async 2 OFPR_ACTION OFPPR_ADD OFPPR_DELETE OFPRR_DELETE # Set miss_send_len to 128 and enable invalid_ttl. ovs-appctl -t ovs-ofctl ofctl/send 0109000c0123456700040080 check_async 3 OFPR_ACTION OFPR_INVALID_TTL OFPPR_ADD OFPPR_DELETE OFPRR_DELETE # Become slave, which should disable everything except port status. ovs-appctl -t ovs-ofctl ofctl/send 0104001400000002000023200000000a00000002 check_async 4 OFPPR_ADD OFPPR_DELETE # Use NXT_SET_ASYNC_CONFIG to enable a patchwork of asynchronous messages. ovs-appctl -t ovs-ofctl ofctl/send 01040028000000020000232000000013000000020000000500000005000000020000000200000005 check_async 5 OFPR_INVALID_TTL OFPPR_DELETE OFPRR_DELETE # Set controller ID 123. ovs-appctl -t ovs-ofctl ofctl/send 01040018000000030000232000000014000000000000007b check_async 6 OFPR_NO_MATCH OFPPR_DELETE OFPRR_DELETE # Restore controller ID 0. ovs-appctl -t ovs-ofctl ofctl/send 010400180000000300002320000000140000000000000000 # Become master. ovs-appctl -t ovs-ofctl ofctl/send 0104001400000002000023200000000a00000001 check_async 7 OFPR_ACTION OFPPR_ADD ovs-appctl -t ovs-ofctl exit OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - asynchronous message control (OpenFlow 1.2)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O OpenFlow12 monitor br0 --detach --no-chdir --pidfile]) check_async () { printf '\n\n--- check_async %d ---\n\n\n' $1 INDEX=$1 shift ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log : > expout # OFPT_PACKET_IN, OFPR_ACTION (controller_id=0) ovs-ofctl -O OpenFlow12 -v packet-out br0 none controller '0001020304050010203040501234' if test X"$1" = X"OFPR_ACTION"; then shift; echo >>expout "OFPT_PACKET_IN (OF1.2): total_len=14 in_port=ANY (via action) data_len=14 (unbuffered) metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x1234" fi # OFPT_PACKET_IN, OFPR_NO_MATCH (controller_id=123) ovs-ofctl -O OpenFlow12 -v packet-out br0 none 'controller(reason=no_match,id=123)' '0001020304050010203040501234' if test X"$1" = X"OFPR_NO_MATCH"; then shift; echo >>expout "OFPT_PACKET_IN (OF1.2): total_len=14 in_port=ANY (via no_match) data_len=14 (unbuffered) metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x1234" fi # OFPT_PACKET_IN, OFPR_INVALID_TTL (controller_id=0) ovs-ofctl -O OpenFlow12 packet-out br0 none dec_ttl '002583dfb4000026b98cb0f908004500003fb7e200000011339bac11370dac100002d7730035002b8f6d86fb0100000100000000000006626c702d7873066e696369726103636f6d00000f00' if test X"$1" = X"OFPR_INVALID_TTL"; then shift; echo >>expout "OFPT_PACKET_IN (OF1.2): total_len=76 in_port=ANY (via invalid_ttl) data_len=76 (unbuffered) udp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:26:b9:8c:b0:f9,dl_dst=00:25:83:df:b4:00,nw_src=172.17.55.13,nw_dst=172.16.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=55155,tp_dst=53 udp_csum:8f6d" fi # OFPT_PORT_STATUS, OFPPR_ADD ovs-vsctl add-port br0 test -- set Interface test type=dummy if test X"$1" = X"OFPPR_ADD"; then shift; echo >>expout "OFPT_PORT_STATUS (OF1.2): ADD: ${INDEX}(test): addr:aa:55:aa:55:00:0x config: PORT_DOWN state: LINK_DOWN speed: 0 Mbps now, 0 Mbps max" fi # OFPT_PORT_STATUS, OFPPR_DELETE ovs-vsctl del-port br0 test if test X"$1" = X"OFPPR_DELETE"; then shift; echo >>expout "OFPT_PORT_STATUS (OF1.2): DEL: ${INDEX}(test): addr:aa:55:aa:55:00:0x config: PORT_DOWN state: LINK_DOWN speed: 0 Mbps now, 0 Mbps max" fi # OFPT_FLOW_REMOVED, OFPRR_DELETE ovs-ofctl -O OpenFlow12 add-flow br0 send_flow_rem,actions=drop ovs-ofctl -O OpenFlow12 --strict del-flows br0 '' if test X"$1" = X"OFPRR_DELETE"; then shift; echo >>expout "OFPT_FLOW_REMOVED (OF1.2): reason=delete table_id=0" fi AT_FAIL_IF([test X"$1" != X]) ovs-appctl -t ovs-ofctl ofctl/barrier echo >>expout "OFPT_BARRIER_REPLY (OF1.2):" AT_CHECK( [[sed ' s/ (xid=0x[0-9a-fA-F]*)// s/ *duration.*// s/00:0.$/00:0x/' < monitor.log]], [0], [expout]) } # It's a service connection so initially there should be no async messages. check_async 1 # Set miss_send_len to 128, turning on packet-ins for our service connection. ovs-appctl -t ovs-ofctl ofctl/send 0309000c0123456700000080 check_async 2 OFPR_ACTION OFPPR_ADD OFPPR_DELETE OFPRR_DELETE # Set miss_send_len to 128 and enable invalid_ttl. ovs-appctl -t ovs-ofctl ofctl/send 0309000c0123456700040080 check_async 3 OFPR_ACTION OFPR_INVALID_TTL OFPPR_ADD OFPPR_DELETE OFPRR_DELETE # Become slave (OF 1.2), which should disable everything except port status. ovs-appctl -t ovs-ofctl ofctl/send 031800180000000200000003000000000000000000000001 check_async 4 OFPPR_ADD OFPPR_DELETE # Use NXT_SET_ASYNC_CONFIG to enable a patchwork of asynchronous messages. ovs-appctl -t ovs-ofctl ofctl/send 03040028000000020000232000000013000000020000000500000005000000020000000200000005 check_async 5 OFPR_INVALID_TTL OFPPR_DELETE OFPRR_DELETE # Set controller ID 123. ovs-appctl -t ovs-ofctl ofctl/send 03040018000000030000232000000014000000000000007b check_async 6 OFPR_NO_MATCH OFPPR_DELETE OFPRR_DELETE # Restore controller ID 0. ovs-appctl -t ovs-ofctl ofctl/send 030400180000000300002320000000140000000000000000 # Become master (OF 1.2). ovs-appctl -t ovs-ofctl ofctl/send 031800180000000400000002000000000000000000000002 check_async 7 OFPR_ACTION OFPPR_ADD ovs-appctl -t ovs-ofctl exit OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - asynchronous message control (OpenFlow 1.3)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O OpenFlow13 monitor br0 --detach --no-chdir --pidfile]) check_async () { printf '\n\n--- check_async %d ---\n\n\n' $1 INDEX=$1 shift ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log : > expout # OFPT_PACKET_IN, OFPR_ACTION (controller_id=0) ovs-ofctl -O OpenFlow13 -v packet-out br0 none controller '0001020304050010203040501234' if test X"$1" = X"OFPR_ACTION"; then shift; echo >>expout "OFPT_PACKET_IN (OF1.3): total_len=14 in_port=ANY (via action) data_len=14 (unbuffered) metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x1234" fi # OFPT_PACKET_IN, OFPR_NO_MATCH (controller_id=123) ovs-ofctl -O OpenFlow13 -v packet-out br0 none 'controller(reason=no_match,id=123)' '0001020304050010203040501234' if test X"$1" = X"OFPR_NO_MATCH"; then shift; echo >>expout "OFPT_PACKET_IN (OF1.3): total_len=14 in_port=ANY (via no_match) data_len=14 (unbuffered) metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x1234" fi # OFPT_PACKET_IN, OFPR_INVALID_TTL (controller_id=0) ovs-ofctl -O OpenFlow13 packet-out br0 none dec_ttl '002583dfb4000026b98cb0f908004500003fb7e200000011339bac11370dac100002d7730035002b8f6d86fb0100000100000000000006626c702d7873066e696369726103636f6d00000f00' if test X"$1" = X"OFPR_INVALID_TTL"; then shift; echo >>expout "OFPT_PACKET_IN (OF1.3): total_len=76 in_port=ANY (via invalid_ttl) data_len=76 (unbuffered) udp,metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:26:b9:8c:b0:f9,dl_dst=00:25:83:df:b4:00,nw_src=172.17.55.13,nw_dst=172.16.0.2,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=55155,tp_dst=53 udp_csum:8f6d" fi # OFPT_PORT_STATUS, OFPPR_ADD ovs-vsctl add-port br0 test -- set Interface test type=dummy if test X"$1" = X"OFPPR_ADD"; then shift; echo >>expout "OFPT_PORT_STATUS (OF1.3): ADD: ${INDEX}(test): addr:aa:55:aa:55:00:0x config: PORT_DOWN state: LINK_DOWN speed: 0 Mbps now, 0 Mbps max" fi # OFPT_PORT_STATUS, OFPPR_DELETE ovs-vsctl del-port br0 test if test X"$1" = X"OFPPR_DELETE"; then shift; echo >>expout "OFPT_PORT_STATUS (OF1.3): DEL: ${INDEX}(test): addr:aa:55:aa:55:00:0x config: PORT_DOWN state: LINK_DOWN speed: 0 Mbps now, 0 Mbps max" fi # OFPT_FLOW_REMOVED, OFPRR_DELETE ovs-ofctl -O OpenFlow13 add-flow br0 send_flow_rem,actions=drop ovs-ofctl -O OpenFlow13 --strict del-flows br0 '' if test X"$1" = X"OFPRR_DELETE"; then shift; echo >>expout "OFPT_FLOW_REMOVED (OF1.3): reason=delete table_id=0" fi AT_FAIL_IF([test X"$1" != X]) ovs-appctl -t ovs-ofctl ofctl/barrier echo >>expout "OFPT_BARRIER_REPLY (OF1.3):" AT_CHECK( [[sed ' s/ (xid=0x[0-9a-fA-F]*)// s/ *duration.*// s/00:0.$/00:0x/' < monitor.log]], [0], [expout]) } # It's a service connection so initially there should be no async messages. check_async 1 # Set miss_send_len to 128, turning on packet-ins for our service connection. ovs-appctl -t ovs-ofctl ofctl/send 0409000c0123456700000080 check_async 2 OFPR_ACTION OFPPR_ADD OFPPR_DELETE OFPRR_DELETE # Become slave (OF 1.3), which should disable everything except port status. ovs-appctl -t ovs-ofctl ofctl/send 041800180000000200000003000000000000000000000001 check_async 3 OFPPR_ADD OFPPR_DELETE # Use OF 1.3 OFPT_SET_ASYNC to enable a patchwork of asynchronous messages. ovs-appctl -t ovs-ofctl ofctl/send 041c002000000002000000020000000500000005000000020000000200000005 check_async 4 OFPR_INVALID_TTL OFPPR_DELETE OFPRR_DELETE # Set controller ID 123. ovs-appctl -t ovs-ofctl ofctl/send 04040018000000030000232000000014000000000000007b check_async 5 OFPR_NO_MATCH OFPPR_DELETE OFPRR_DELETE # Restore controller ID 0. ovs-appctl -t ovs-ofctl ofctl/send 040400180000000300002320000000140000000000000000 # Become master (OF 1.3). ovs-appctl -t ovs-ofctl ofctl/send 041800180000000400000002000000000000000000000002 check_async 6 OFPR_ACTION OFPPR_ADD ovs-appctl -t ovs-ofctl exit OVS_VSWITCHD_STOP AT_CLEANUP dnl This test checks that the role request/response messaging works dnl and that generation_id is handled properly. AT_SETUP([ofproto - controller role (OpenFlow 1.2)]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -O OpenFlow12 monitor br0 --detach --no-chdir --pidfile]) ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log : > expout # find out current role ovs-appctl -t ovs-ofctl ofctl/send 031800180000000200000000000000000000000000000000 echo >>expout "send: OFPT_ROLE_REQUEST (OF1.2) (xid=0x2): role=nochange" echo >>expout "OFPT_ROLE_REPLY (OF1.2) (xid=0x2): role=equal" # Become slave (generation_id is initially undefined, so 2^63+2 should not be stale) ovs-appctl -t ovs-ofctl ofctl/send 031800180000000300000003000000008000000000000002 echo >>expout "send: OFPT_ROLE_REQUEST (OF1.2) (xid=0x3): role=slave generation_id=9223372036854775810" echo >>expout "OFPT_ROLE_REPLY (OF1.2) (xid=0x3): role=slave generation_id=9223372036854775810" # Try to become the master using a stale generation ID ovs-appctl -t ovs-ofctl ofctl/send 031800180000000400000002000000000000000000000002 echo >>expout "send: OFPT_ROLE_REQUEST (OF1.2) (xid=0x4): role=master generation_id=2" echo >>expout "OFPT_ERROR (OF1.2) (xid=0x4): OFPRRFC_STALE" echo >>expout "OFPT_ROLE_REQUEST (OF1.2) (xid=0x4): role=master generation_id=2" # Become master using a valid generation ID ovs-appctl -t ovs-ofctl ofctl/send 031800180000000500000002000000000000000000000001 echo >>expout "send: OFPT_ROLE_REQUEST (OF1.2) (xid=0x5): role=master generation_id=1" echo >>expout "OFPT_ROLE_REPLY (OF1.2) (xid=0x5): role=master generation_id=1" ovs-appctl -t ovs-ofctl ofctl/barrier echo >>expout "OFPT_BARRIER_REPLY (OF1.2) (xid=0x3):" AT_CHECK([cat monitor.log], [0], [expout]) ovs-appctl -t ovs-ofctl exit OVS_VSWITCHD_STOP AT_CLEANUP dnl This test checks that OFPT_PACKET_OUT accepts both OFPP_NONE (as dnl specified by OpenFlow 1.0) and OFPP_CONTROLLER (used by some dnl controllers despite the spec) as meaning a packet that was generated dnl by the controller. AT_SETUP([ofproto - packet-out from controller (OpenFlow 1.0)]) OVS_VSWITCHD_START ADD_OF_PORTS([br0], [1]) # Start a monitor listening for packet-ins. AT_CHECK([ovs-ofctl -P openflow10 monitor br0 --detach --no-chdir --pidfile]) ovs-appctl -t ovs-ofctl ofctl/send 0109000c0123456700000080 ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log AT_CAPTURE_FILE([monitor.log]) # Send some packet-outs with OFPP_NONE and OFPP_CONTROLLER (65533) as in_port. AT_CHECK([ovs-ofctl packet-out br0 none controller,1 '0001020304050010203040501234']) AT_CHECK([ovs-ofctl packet-out br0 controller controller,1 '0001020304050010203040505678']) # Stop the monitor and check its output. ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit ovs-ofctl dump-ports br0 AT_CHECK([sed 's/ (xid=0x[[0-9a-fA-F]]*)//' monitor.log], [0], [dnl OFPT_PACKET_IN: total_len=14 in_port=ANY (via action) data_len=14 (unbuffered) metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x1234 OFPT_PACKET_IN: total_len=14 in_port=CONTROLLER (via action) data_len=14 (unbuffered) metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x5678 OFPT_BARRIER_REPLY: ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl This test checks that OFPT_PACKET_OUT accepts both OFPP_NONE (as dnl specified by OpenFlow 1.2) and OFPP_CONTROLLER (used by some dnl controllers despite the spec) as meaning a packet that was generated dnl by the controller. AT_SETUP([ofproto - packet-out from controller (OpenFlow 1.2)]) OVS_VSWITCHD_START # Start a monitor listening for packet-ins. AT_CHECK([ovs-ofctl -O OpenFlow12 monitor br0 --detach --no-chdir --pidfile]) ovs-appctl -t ovs-ofctl ofctl/send 0309000c0123456700000080 ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log AT_CAPTURE_FILE([monitor.log]) # Send some packet-outs with OFPP_NONE and OFPP_CONTROLLER (65533) as in_port. AT_CHECK([ovs-ofctl -O OpenFlow12 packet-out br0 none controller '0001020304050010203040501234']) AT_CHECK([ovs-ofctl -O OpenFlow12 packet-out br0 4294967293 controller '0001020304050010203040505678']) # Stop the monitor and check its output. ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit AT_CHECK([sed 's/ (xid=0x[[0-9a-fA-F]]*)//' monitor.log], [0], [dnl OFPT_PACKET_IN (OF1.2): total_len=14 in_port=ANY (via action) data_len=14 (unbuffered) metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x1234 OFPT_PACKET_IN (OF1.2): total_len=14 in_port=CONTROLLER (via action) data_len=14 (unbuffered) metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x5678 OFPT_BARRIER_REPLY (OF1.2): ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl This test checks that metadata is encoded in packet_in structures, dnl supported by NXAST. AT_SETUP([ofproto - packet-out with metadata (NXM)]) OVS_VSWITCHD_START # Start a monitor listening for packet-ins. AT_CHECK([ovs-ofctl -P nxm monitor br0 --detach --no-chdir --pidfile]) ovs-appctl -t ovs-ofctl ofctl/send 0109000c0123456700000080 ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log AT_CAPTURE_FILE([monitor.log]) # Send a packet-out with a load action to set some metadata, and forward to controller AT_CHECK([ovs-ofctl packet-out br0 controller 'load(0xfafafafa5a5a5a5a->OXM_OF_METADATA[[0..63]]), load(0xaa->NXM_NX_PKT_MARK[[]]), controller' '0001020304050010203040501234']) # Stop the monitor and check its output. ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit AT_CHECK([sed 's/ (xid=0x[[0-9a-fA-F]]*)//' monitor.log], [0], [dnl NXT_PACKET_IN: total_len=14 in_port=CONTROLLER metadata=0xfafafafa5a5a5a5a pkt_mark=0xaa (via action) data_len=14 (unbuffered) metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x1234 OFPT_BARRIER_REPLY: ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl This test checks that metadata is encoded in packet_in structures, dnl supported by NXAST. AT_SETUP([ofproto - packet-out with metadata (OpenFlow 1.2)]) OVS_VSWITCHD_START # Start a monitor listening for packet-ins. AT_CHECK([ovs-ofctl -O OpenFlow12 monitor br0 --detach --no-chdir --pidfile]) ovs-appctl -t ovs-ofctl ofctl/send 0309000c0123456700000080 ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log AT_CAPTURE_FILE([monitor.log]) # Send a packet-out with a load action to set some metadata, and forward to controller AT_CHECK([ovs-ofctl -O OpenFlow12 packet-out br0 none 'load(0xfafafafa5a5a5a5a->OXM_OF_METADATA[[0..63]]), controller' '0001020304050010203040501234']) # Stop the monitor and check its output. ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit AT_CHECK([sed 's/ (xid=0x[[0-9a-fA-F]]*)//' monitor.log], [0], [dnl OFPT_PACKET_IN (OF1.2): total_len=14 in_port=ANY metadata=0xfafafafa5a5a5a5a (via action) data_len=14 (unbuffered) metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x1234 OFPT_BARRIER_REPLY (OF1.2): ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl This test checks that tunnel metadata is encoded in packet_in structures. AT_SETUP([ofproto - packet-out with tunnel metadata (OpenFlow 1.2)]) OVS_VSWITCHD_START # Start a monitor listening for packet-ins. AT_CHECK([ovs-ofctl -O OpenFlow12 monitor br0 --detach --no-chdir --pidfile]) ovs-appctl -t ovs-ofctl ofctl/send 0309000c0123456700000080 ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log AT_CAPTURE_FILE([monitor.log]) # Send a packet-out with set field actions to set some tunnel metadata, and forward to controller AT_CHECK([ovs-ofctl -O OpenFlow12 packet-out br0 none 'set_field:127.0.0.1->tun_src,set_field:0x01020304->tun_id,set_field:192.168.0.1->tun_dst, controller' '0001020304050010203040501234']) # Stop the monitor and check its output. ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit AT_CHECK([sed 's/ (xid=0x[[0-9a-fA-F]]*)//' monitor.log], [0], [dnl OFPT_PACKET_IN (OF1.2): total_len=14 in_port=ANY tun_id=0x1020304 tun_src=127.0.0.1 tun_dst=192.168.0.1 (via action) data_len=14 (unbuffered) metadata=0,in_port=0,vlan_tci=0x0000,dl_src=00:10:20:30:40:50,dl_dst=00:01:02:03:04:05,dl_type=0x1234 OFPT_BARRIER_REPLY (OF1.2): ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - flow monitoring]) AT_KEYWORDS([monitor]) OVS_VSWITCHD_START ovs-ofctl add-flow br0 in_port=0,dl_vlan=123,actions=output:1 # Start a monitor watching the flow table and check the initial reply. ovs-ofctl monitor br0 watch: --detach --no-chdir --pidfile >monitor.log 2>&1 AT_CAPTURE_FILE([monitor.log]) ovs-appctl -t ovs-ofctl ofctl/barrier AT_CHECK([sed 's/ (xid=0x[[1-9a-fA-F]][[0-9a-fA-F]]*)//' monitor.log], [0], [NXST_FLOW_MONITOR reply: event=ADDED table=0 cookie=0 in_port=0,dl_vlan=123 actions=output:1 OFPT_BARRIER_REPLY: ]) # Add, delete, and modify some flows and check the updates. ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log ovs-ofctl add-flow br0 in_port=0,dl_vlan=124,actions=output:2 ovs-ofctl add-flow br0 in_port=0,dl_vlan=123,actions=output:5 ovs-ofctl add-flow br0 in_port=0,dl_vlan=123,dl_vlan_pcp=0,actions=output:6 ovs-ofctl add-flow br0 in_port=0,dl_vlan=123,dl_vlan_pcp=1,actions=output:7 ovs-ofctl add-flow br0 in_port=0,dl_vlan=123,actions=output:8 ovs-ofctl add-flow br0 in_port=0,dl_vlan=65535,dl_vlan_pcp=0,actions=output:9 ovs-ofctl add-flow br0 in_port=0,dl_vlan=65535,dl_vlan_pcp=1,actions=output:10 ovs-ofctl add-flow br0 in_port=0,dl_vlan=65535,actions=output:11 ovs-ofctl add-flow br0 in_port=0,dl_vlan=8191,dl_vlan_pcp=0,actions=output:12 ovs-ofctl add-flow br0 in_port=0,dl_vlan=8191,dl_vlan_pcp=1,actions=output:13 ovs-ofctl add-flow br0 in_port=0,dl_vlan=8191,actions=output:14 ovs-ofctl add-flow br0 in_port=0,dl_vlan=0,dl_vlan_pcp=0,actions=output:15 ovs-ofctl add-flow br0 in_port=0,dl_vlan=0,dl_vlan_pcp=1,actions=output:16 ovs-ofctl add-flow br0 in_port=0,dl_vlan=0,actions=output:17 ovs-ofctl add-flow br0 in_port=0,dl_vlan=0,dl_vlan_pcp=0,actions=output:18 ovs-ofctl add-flow br0 in_port=0,dl_vlan=0,dl_vlan_pcp=1,actions=output:19 ovs-ofctl add-flow br0 in_port=0,dl_vlan=0,actions=output:20 ovs-ofctl add-flow br0 in_port=0,dl_vlan_pcp=0,actions=output:21 ovs-ofctl add-flow br0 in_port=0,dl_vlan_pcp=1,actions=output:22 ovs-ofctl add-flow br0 in_port=0,actions=output:23 ovs-ofctl mod-flows br0 cookie=5,dl_vlan=123,actions=output:3 ovs-ofctl del-flows br0 dl_vlan=123 ovs-ofctl del-flows br0 ovs-appctl -t ovs-ofctl ofctl/barrier sort=' # Sorts groups of lines that start with a space, without moving them # past the nearest line that does not start with a space. use warnings; use strict; my @buffer = (); while () { if (/^ /) { push(@buffer, $_); } else { print $_ foreach sort(@buffer); print $_; @buffer = (); } } print $_ foreach sort(@buffer); ' AT_CHECK([sed 's/ (xid=0x[[1-9a-fA-F]][[0-9a-fA-F]]*)//' monitor.log | ${PERL} -e "$sort"], [0], [NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan=124 actions=output:2 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan=123 actions=output:5 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan=123,dl_vlan_pcp=0 actions=output:6 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan=123,dl_vlan_pcp=1 actions=output:7 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan=123 actions=output:8 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan=0,dl_vlan_pcp=0 actions=output:9 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan=0,dl_vlan_pcp=1 actions=output:10 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,vlan_tci=0x0000 actions=output:11 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan=4095,dl_vlan_pcp=0 actions=output:12 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan=4095,dl_vlan_pcp=1 actions=output:13 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan=4095 actions=output:14 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan=0,dl_vlan_pcp=0 actions=output:15 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan=0,dl_vlan_pcp=1 actions=output:16 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan=0 actions=output:17 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan=0,dl_vlan_pcp=0 actions=output:18 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan=0,dl_vlan_pcp=1 actions=output:19 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan=0 actions=output:20 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan_pcp=0 actions=output:21 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0,dl_vlan_pcp=1 actions=output:22 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=0 actions=output:23 NXST_FLOW_MONITOR reply (xid=0x0): event=MODIFIED table=0 cookie=0x5 in_port=0,dl_vlan=123 actions=output:3 event=MODIFIED table=0 cookie=0x5 in_port=0,dl_vlan=123,dl_vlan_pcp=0 actions=output:3 event=MODIFIED table=0 cookie=0x5 in_port=0,dl_vlan=123,dl_vlan_pcp=1 actions=output:3 NXST_FLOW_MONITOR reply (xid=0x0): event=DELETED reason=delete table=0 cookie=0x5 in_port=0,dl_vlan=123 actions=output:3 event=DELETED reason=delete table=0 cookie=0x5 in_port=0,dl_vlan=123,dl_vlan_pcp=0 actions=output:3 event=DELETED reason=delete table=0 cookie=0x5 in_port=0,dl_vlan=123,dl_vlan_pcp=1 actions=output:3 NXST_FLOW_MONITOR reply (xid=0x0): event=DELETED reason=delete table=0 cookie=0 in_port=0 actions=output:23 event=DELETED reason=delete table=0 cookie=0 in_port=0,dl_vlan=0 actions=output:20 event=DELETED reason=delete table=0 cookie=0 in_port=0,dl_vlan=0,dl_vlan_pcp=0 actions=output:18 event=DELETED reason=delete table=0 cookie=0 in_port=0,dl_vlan=0,dl_vlan_pcp=1 actions=output:19 event=DELETED reason=delete table=0 cookie=0 in_port=0,dl_vlan=124 actions=output:2 event=DELETED reason=delete table=0 cookie=0 in_port=0,dl_vlan=4095 actions=output:14 event=DELETED reason=delete table=0 cookie=0 in_port=0,dl_vlan=4095,dl_vlan_pcp=0 actions=output:12 event=DELETED reason=delete table=0 cookie=0 in_port=0,dl_vlan=4095,dl_vlan_pcp=1 actions=output:13 event=DELETED reason=delete table=0 cookie=0 in_port=0,dl_vlan_pcp=0 actions=output:21 event=DELETED reason=delete table=0 cookie=0 in_port=0,dl_vlan_pcp=1 actions=output:22 event=DELETED reason=delete table=0 cookie=0 in_port=0,vlan_tci=0x0000 actions=output:11 OFPT_BARRIER_REPLY: ]) # Check that our own changes are reported as abbreviations. ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log ovs-ofctl add-flow br0 in_port=1,actions=output:2 ovs-ofctl add-flow br0 in_port=2,actions=output:1 ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl ofctl/send 010e004812345678003fffff00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003000000000000ffffffffffff0000 ovs-appctl -t ovs-ofctl ofctl/barrier AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip], [0], [NXST_FLOW reply: ]) AT_CHECK([sed 's/ (xid=0x[[1-9a-fA-F]][[0-9a-fA-F]]*)//' monitor.log], [0], [NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=1 actions=output:2 NXST_FLOW_MONITOR reply (xid=0x0): event=ADDED table=0 cookie=0 in_port=2 actions=output:1 OFPT_BARRIER_REPLY: send: OFPT_FLOW_MOD: DEL priority=0 actions=drop NXST_FLOW_MONITOR reply (xid=0x0): event=ABBREV xid=0x12345678 OFPT_BARRIER_REPLY: ]) ovs-appctl -t ovs-ofctl exit OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto - flow monitoring pause and resume]) AT_KEYWORDS([monitor]) # The maximum socket receive buffer size is important for this test, which # tests behavior when the receive buffer overflows. if test -e /proc/sys/net/core/rmem_max; then # Linux rmem_max=`cat /proc/sys/net/core/rmem_max` elif rmem_max=`sysctl -n net.inet.tcp.recvbuf_max 2>/dev/null`; then : # FreeBSD, NetBSD else # Don't know how to get maximum socket receive buffer on this OS AT_SKIP_IF([:]) fi # Calculate the total amount of queuing: rmem_max in the kernel, 128 kB # in ofproto sending userspace (see ofmonitor_flush() in connmgr.c). queue_size=`expr $rmem_max + 128 \* 1024` echo rmem_max=$rmem_max queue_size=$queue_size # Each flow update message takes up at least 48 bytes of space in queues # and in practice more than that. n_msgs=`expr $queue_size / 48` echo n_msgs=$n_msgs OVS_VSWITCHD_START # Start a monitor watching the flow table, then make it block. ON_EXIT([kill `cat ovs-ofctl.pid`]) ovs-ofctl monitor br0 watch: --detach --no-chdir --pidfile >monitor.log 2>&1 AT_CAPTURE_FILE([monitor.log]) ovs-appctl -t ovs-ofctl ofctl/block # Add $n_msgs flows. (echo "in_port=2,actions=output:2" ${PERL} -e ' for ($i = 0; $i < '$n_msgs'; $i++) { print "cookie=1,reg1=$i,actions=drop\n"; } ') > flows.txt AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) # Check that multipart flow dumps work properly: AT_CHECK([ovs-ofctl diff-flows br0 flows.txt]) AT_CHECK([ovs-ofctl add-flow br0 in_port=1,cookie=3,actions=drop]) AT_CHECK([ovs-ofctl mod-flows br0 in_port=2,cookie=2,actions=output:2]) AT_CHECK([ovs-ofctl del-flows br0 cookie=1/-1]) ovs-appctl -t ovs-ofctl ofctl/unblock ovs-appctl -t ovs-ofctl ofctl/barrier ovs-appctl -t ovs-ofctl exit # Check that the flow monitor reported the same number of flows # added and deleted, but fewer than we actually added and deleted. adds=`grep -c 'ADDED.*reg1=' monitor.log` deletes=`grep -c 'DELETED.*reg1=' monitor.log` echo adds=$adds deletes=$deletes AT_CHECK([test $adds -gt 100 && test $adds -lt $n_msgs]) AT_CHECK([test $adds = $deletes]) # Check that the flow monitor reported everything in the expected order: # # event=ADDED table=0 cookie=0x1 reg1=0x22 # ... # NXT_FLOW_MONITOR_PAUSED: # ... # event=DELETED reason=delete table=0 cookie=0x1 reg1=0x22 # ... # event=ADDED table=0 cookie=0x3 in_port=1 # event=MODIFIED table=0 cookie=0x2 in_port=2 actions=output:2 # NXT_FLOW_MONITOR_RESUMED: # # except that, between the PAUSED and RESUMED, the order of the ADDED # and MODIFIED lines lines depends on hash order, that is, it varies # as we change the hash function or change architecture. Therefore, # we use a couple of tests below to accept both orders. AT_CHECK([ofctl_strip < monitor.log | sed -n -e ' /reg1=0x22$/p /cookie=0x[[23]]/p /NXT_FLOW_MONITOR_PAUSED:/p /NXT_FLOW_MONITOR_RESUMED:/p ' > monitor.log.subset]) AT_CHECK([grep -v MODIFIED monitor.log.subset], [0], [dnl event=ADDED table=0 cookie=0x1 reg1=0x22 NXT_FLOW_MONITOR_PAUSED: event=DELETED reason=delete table=0 cookie=0x1 reg1=0x22 event=ADDED table=0 cookie=0x3 in_port=1 NXT_FLOW_MONITOR_RESUMED: ]) AT_CHECK([grep -v ADDED monitor.log.subset], [0], [dnl NXT_FLOW_MONITOR_PAUSED: event=DELETED reason=delete table=0 cookie=0x1 reg1=0x22 event=MODIFIED table=0 cookie=0x2 in_port=2 actions=output:2 NXT_FLOW_MONITOR_RESUMED: ]) OVS_VSWITCHD_STOP AT_CLEANUP openvswitch-2.0.1+git20140120/tests/openssl.supp000066400000000000000000000002241226605124000212060ustar00rootroot00000000000000# suppress OpenSSL errors from valgrind { BN_mod_inverse Memcheck:Cond fun:BN_mod_inverse } { BN_div Memcheck:Cond fun:BN_div } openvswitch-2.0.1+git20140120/tests/ovs-monitor-ipsec.at000066400000000000000000000210001226605124000225300ustar00rootroot00000000000000AT_BANNER([ovs-monitor-ipsec]) AT_SETUP([ovs-monitor-ipsec]) AT_SKIP_IF([test $HAVE_PYTHON = no]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_DBDIR=`pwd`; export OVS_DBDIR OVS_PKGDATADIR=`pwd`; export OVS_PKGDATADIR cp "$top_srcdir/vswitchd/vswitch.ovsschema" . ON_EXIT([kill `cat pid ovs-monitor-ipsec.pid`]) mkdir etc etc/init.d etc/racoon etc/racoon/certs mkdir usr usr/sbin AT_DATA([etc/init.d/racoon], [dnl #! /bin/sh echo "racoon: $@" >&3 exit 0 ]) chmod +x etc/init.d/racoon AT_DATA([usr/sbin/setkey], [dnl #! /bin/sh exec >&3 echo "setkey:" while read line; do echo "> $line" done ]) chmod +x usr/sbin/setkey touch etc/racoon/certs/ovs-stale.pem ovs_vsctl () { ovs-vsctl --no-wait -vreconnect:emer --db=unix:socket "$@" } trim () { # Removes blank lines and lines starting with # from input. sed -e '/^#/d' -e '/^[ ]*$/d' "$@" } ### ### Start ovsdb-server. ### OVS_VSCTL_SETUP ### ### Start ovs-monitor-ipsec and wait for it to delete the stale cert. ### AT_CHECK( [$PYTHON $top_srcdir/debian/ovs-monitor-ipsec "--root-prefix=`pwd`" \ "--pidfile=`pwd`/ovs-monitor-ipsec.pid" \ unix:socket 2>log 3>actions &]) AT_CAPTURE_FILE([log]) AT_CAPTURE_FILE([actions]) OVS_WAIT_UNTIL([test ! -f etc/racoon/certs/ovs-stale.pem]) ### ### Add an ipsec_gre psk interface and check what ovs-monitor-ipsec does ### AT_CHECK([ovs_vsctl \ -- add-br br0 \ -- add-port br0 gre0 \ -- set interface gre0 type=ipsec_gre \ options:remote_ip=1.2.3.4 \ options:psk=swordfish]) OVS_WAIT_UNTIL([test -f actions && grep 'spdadd 1.2.3.4' actions >/dev/null]) AT_CHECK([cat actions], [0], [dnl setkey: > flush; setkey: > spdflush; racoon: reload racoon: reload setkey: > spdadd 0.0.0.0/0 1.2.3.4 gre -P out ipsec esp/transport//require; > spdadd 1.2.3.4 0.0.0.0/0 gre -P in ipsec esp/transport//require; ]) AT_CHECK([trim etc/racoon/psk.txt], [0], [1.2.3.4 swordfish ]) AT_CHECK([trim etc/racoon/racoon.conf], [0], [dnl path pre_shared_key "/etc/racoon/psk.txt"; path certificate "/etc/racoon/certs"; remote 1.2.3.4 { exchange_mode main; nat_traversal on; proposal { encryption_algorithm aes; hash_algorithm sha1; authentication_method pre_shared_key; dh_group 2; } } sainfo anonymous { pfs_group 2; lifetime time 1 hour; encryption_algorithm aes; authentication_algorithm hmac_sha1, hmac_md5; compression_algorithm deflate; } ]) ### ### Delete the ipsec_gre interface and check what ovs-monitor-ipsec does ### AT_CHECK([ovs_vsctl del-port gre0]) OVS_WAIT_UNTIL([test `wc -l < actions` -ge 17]) AT_CHECK([sed '1,9d' actions], [0], [dnl racoon: reload setkey: > spddelete 0.0.0.0/0 1.2.3.4 gre -P out; > spddelete 1.2.3.4 0.0.0.0/0 gre -P in; setkey: > dump ; setkey: > dump ; ]) AT_CHECK([trim etc/racoon/psk.txt], [0], []) AT_CHECK([trim etc/racoon/racoon.conf], [0], [dnl path pre_shared_key "/etc/racoon/psk.txt"; path certificate "/etc/racoon/certs"; sainfo anonymous { pfs_group 2; lifetime time 1 hour; encryption_algorithm aes; authentication_algorithm hmac_sha1, hmac_md5; compression_algorithm deflate; } ]) ### ### Add ipsec_gre certificate interface and check what ovs-monitor-ipsec does ### AT_DATA([cert.pem], [dnl -----BEGIN CERTIFICATE----- (not a real certificate) -----END CERTIFICATE----- ]) AT_DATA([key.pem], [dnl -----BEGIN RSA PRIVATE KEY----- (not a real private key) -----END RSA PRIVATE KEY----- ]) AT_CHECK([ovs_vsctl \ -- add-port br0 gre1 \ -- set Interface gre1 type=ipsec_gre \ options:remote_ip=2.3.4.5 \ options:peer_cert='"-----BEGIN CERTIFICATE----- (not a real peer certificate) -----END CERTIFICATE----- "' \ options:certificate='"/cert.pem"' \ options:private_key='"/key.pem"']) OVS_WAIT_UNTIL([test `wc -l < actions` -ge 21]) AT_CHECK([sed '1,17d' actions], [0], [dnl racoon: reload setkey: > spdadd 0.0.0.0/0 2.3.4.5 gre -P out ipsec esp/transport//require; > spdadd 2.3.4.5 0.0.0.0/0 gre -P in ipsec esp/transport//require; ]) AT_CHECK([trim etc/racoon/psk.txt], [0], []) AT_CHECK([trim etc/racoon/racoon.conf], [0], [dnl path pre_shared_key "/etc/racoon/psk.txt"; path certificate "/etc/racoon/certs"; remote 2.3.4.5 { exchange_mode main; nat_traversal on; ike_frag on; certificate_type x509 "/cert.pem" "/key.pem"; my_identifier asn1dn; peers_identifier asn1dn; peers_certfile x509 "/etc/racoon/certs/ovs-2.3.4.5.pem"; verify_identifier on; proposal { encryption_algorithm aes; hash_algorithm sha1; authentication_method rsasig; dh_group 2; } } sainfo anonymous { pfs_group 2; lifetime time 1 hour; encryption_algorithm aes; authentication_algorithm hmac_sha1, hmac_md5; compression_algorithm deflate; } ]) AT_CHECK([cat etc/racoon/certs/ovs-2.3.4.5.pem], [0], [dnl -----BEGIN CERTIFICATE----- (not a real peer certificate) -----END CERTIFICATE----- ]) ### ### Delete the ipsec_gre certificate interface. ### AT_CHECK([ovs_vsctl del-port gre1]) OVS_WAIT_UNTIL([test `wc -l < actions` -ge 29]) AT_CHECK([sed '1,21d' actions], [0], [dnl racoon: reload setkey: > spddelete 0.0.0.0/0 2.3.4.5 gre -P out; > spddelete 2.3.4.5 0.0.0.0/0 gre -P in; setkey: > dump ; setkey: > dump ; ]) AT_CHECK([trim etc/racoon/psk.txt], [0], []) AT_CHECK([trim etc/racoon/racoon.conf], [0], [dnl path pre_shared_key "/etc/racoon/psk.txt"; path certificate "/etc/racoon/certs"; sainfo anonymous { pfs_group 2; lifetime time 1 hour; encryption_algorithm aes; authentication_algorithm hmac_sha1, hmac_md5; compression_algorithm deflate; } ]) AT_CHECK([test ! -f etc/racoon/certs/ovs-2.3.4.5.pem]) ### ### Add an SSL certificate interface. ### cp cert.pem ssl-cert.pem cp key.pem ssl-key.pem AT_DATA([ssl-cacert.pem], [dnl -----BEGIN CERTIFICATE----- (not a real CA certificate) -----END CERTIFICATE----- ]) AT_CHECK([ovs_vsctl set-ssl /ssl-key.pem /ssl-cert.pem /ssl-cacert.pem \ -- add-port br0 gre2 \ -- set Interface gre2 type=ipsec_gre \ options:remote_ip=3.4.5.6 \ options:peer_cert='"-----BEGIN CERTIFICATE----- (not a real peer certificate) -----END CERTIFICATE----- "' \ options:use_ssl_cert='"true"']) OVS_WAIT_UNTIL([test `wc -l < actions` -ge 33]) AT_CHECK([sed '1,29d' actions], [0], [dnl racoon: reload setkey: > spdadd 0.0.0.0/0 3.4.5.6 gre -P out ipsec esp/transport//require; > spdadd 3.4.5.6 0.0.0.0/0 gre -P in ipsec esp/transport//require; ]) AT_CHECK([trim etc/racoon/psk.txt], [0], []) AT_CHECK([trim etc/racoon/racoon.conf], [0], [dnl path pre_shared_key "/etc/racoon/psk.txt"; path certificate "/etc/racoon/certs"; remote 3.4.5.6 { exchange_mode main; nat_traversal on; ike_frag on; certificate_type x509 "/ssl-cert.pem" "/ssl-key.pem"; my_identifier asn1dn; peers_identifier asn1dn; peers_certfile x509 "/etc/racoon/certs/ovs-3.4.5.6.pem"; verify_identifier on; proposal { encryption_algorithm aes; hash_algorithm sha1; authentication_method rsasig; dh_group 2; } } sainfo anonymous { pfs_group 2; lifetime time 1 hour; encryption_algorithm aes; authentication_algorithm hmac_sha1, hmac_md5; compression_algorithm deflate; } ]) AT_CHECK([cat etc/racoon/certs/ovs-3.4.5.6.pem], [0], [dnl -----BEGIN CERTIFICATE----- (not a real peer certificate) -----END CERTIFICATE----- ]) ### ### Delete the SSL certificate interface. ### AT_CHECK([ovs_vsctl del-port gre2]) OVS_WAIT_UNTIL([test `wc -l < actions` -ge 41]) AT_CHECK([sed '1,33d' actions], [0], [dnl racoon: reload setkey: > spddelete 0.0.0.0/0 3.4.5.6 gre -P out; > spddelete 3.4.5.6 0.0.0.0/0 gre -P in; setkey: > dump ; setkey: > dump ; ]) AT_CHECK([trim etc/racoon/psk.txt], [0], []) AT_CHECK([trim etc/racoon/racoon.conf], [0], [dnl path pre_shared_key "/etc/racoon/psk.txt"; path certificate "/etc/racoon/certs"; sainfo anonymous { pfs_group 2; lifetime time 1 hour; encryption_algorithm aes; authentication_algorithm hmac_sha1, hmac_md5; compression_algorithm deflate; } ]) AT_CHECK([test ! -f etc/racoon/certs/ovs-3.4.5.6.pem]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP openvswitch-2.0.1+git20140120/tests/ovs-ofctl.at000066400000000000000000003014021226605124000210560ustar00rootroot00000000000000AT_BANNER([ovs-ofctl]) AT_SETUP([ovs-ofctl parse-flows choice of protocol]) # This doesn't cover some potential vlan_tci test cases. for test_case in \ 'tun_id=0 NXM,OXM' \ 'tun_src=1.2.3.4 NXM,OXM' \ 'tun_dst=1.2.3.4 NXM,OXM' \ 'tun_flags=0 none' \ 'tun_tos=0 none' \ 'tun_ttl=0 none' \ 'metadata=0 NXM,OXM,OpenFlow11' \ 'in_port=1 any' \ 'skb_priority=0 none' \ 'pkt_mark=1 NXM,OXM' \ 'reg0=0 NXM,OXM' \ 'reg1=1 NXM,OXM' \ 'reg2=2 NXM,OXM' \ 'reg3=3 NXM,OXM' \ 'reg4=4 NXM,OXM' \ 'reg5=5 NXM,OXM' \ 'reg6=6 NXM,OXM' \ 'reg7=7 NXM,OXM' \ 'dl_src=00:11:22:33:44:55 any' \ 'dl_src=00:11:22:33:44:55/00:ff:ff:ff:ff:ff NXM,OXM,OpenFlow11' \ 'dl_dst=00:11:22:33:44:55 any' \ 'dl_dst=00:11:22:33:44:55/00:ff:ff:ff:ff:ff NXM,OXM,OpenFlow11' \ 'dl_type=0x1234 any' \ 'dl_type=0x0800 any' \ 'dl_type=0x0806 any' \ 'dl_type=0x86dd any' \ 'vlan_tci=0 any' \ 'vlan_tci=0x1009 any' \ 'dl_vlan=9 any' \ 'vlan_vid=11 any' \ 'dl_vlan_pcp=6 any' \ 'vlan_pcp=5 any' \ 'mpls,mpls_label=5 NXM,OXM,OpenFlow11' \ 'mpls,mpls_tc=1 NXM,OXM,OpenFlow11' \ 'mpls,mpls_bos=0 NXM,OXM' \ 'ip,ip_src=1.2.3.4 any' \ 'ip,ip_src=192.168.0.0/24 any' \ 'ip,ip_src=192.0.168.0/255.0.255.0 NXM,OXM,OpenFlow11' \ 'ip,ip_dst=1.2.3.4 any' \ 'ip,ip_dst=192.168.0.0/24 any' \ 'ip,ip_dst=192.0.168.0/255.0.255.0 NXM,OXM,OpenFlow11' \ 'ipv6,ipv6_src=::1 NXM,OXM' \ 'ipv6,ipv6_dst=::1 NXM,OXM' \ 'ipv6,ipv6_label=5 NXM,OXM' \ 'ip,nw_proto=1 any' \ 'ipv6,nw_proto=1 NXM,OXM' \ 'ip,nw_tos=0xf0 any' \ 'ipv6,nw_tos=0xf0 NXM,OXM' \ 'ip,nw_tos_shifted=0x3c any' \ 'ipv6,nw_tos_shifted=0x3c NXM,OXM' \ 'ip,nw_ecn=1 NXM,OXM' \ 'ipv6,nw_ecn=1 NXM,OXM' \ 'ip,nw_ttl=5 NXM,OXM' \ 'ipv6,nw_ttl=5 NXM,OXM' \ 'ip,ip_frag=no NXM,OXM' \ 'ipv6,ip_frag=no NXM,OXM' \ 'arp,arp_op=0 any' \ 'arp,arp_spa=1.2.3.4 any' \ 'arp,arp_tpa=1.2.3.4 any' \ 'arp,arp_sha=00:11:22:33:44:55 NXM,OXM' \ 'arp,arp_tha=00:11:22:33:44:55 NXM,OXM' \ 'tcp,tcp_src=80 any' \ 'tcp,tcp_src=0x1000/0x1000 NXM,OXM' \ 'tcp6,tcp_src=80 NXM,OXM' \ 'tcp6,tcp_src=0x1000/0x1000 NXM,OXM' \ 'tcp,tcp_dst=80 any' \ 'tcp,tcp_dst=0x1000/0x1000 NXM,OXM' \ 'tcp6,tcp_dst=80 NXM,OXM' \ 'tcp6,tcp_dst=0x1000/0x1000 NXM,OXM' \ 'udp,udp_src=80 any' \ 'udp,udp_src=0x1000/0x1000 NXM,OXM' \ 'udp6,udp_src=80 NXM,OXM' \ 'udp6,udp_src=0x1000/0x1000 NXM,OXM' \ 'udp,udp_dst=80 any' \ 'udp,udp_dst=0x1000/0x1000 NXM,OXM' \ 'udp6,udp_dst=80 NXM,OXM' \ 'udp6,udp_dst=0x1000/0x1000 NXM,OXM' \ 'icmp,icmp_type=1 any' \ 'icmp,icmp_type=1 any' \ 'icmp6,icmpv6_type=1 NXM,OXM' \ 'icmp6,icmpv6_code=2 NXM,OXM' do set $test_case echo echo "### test case: '$1' should have usable protocols '$2'" if test "$2" = none; then AT_CHECK([ovs-ofctl parse-flow "$1,actions=drop"], [1], [usable protocols: none ], [ovs-ofctl: no usable protocol ]) else AT_CHECK_UNQUOTED([ovs-ofctl parse-flow "$1,actions=drop" | sed 1q], [0], [usable protocols: $2 ]) fi done AT_CLEANUP AT_SETUP([ovs-ofctl parse-flows (OpenFlow 1.0)]) AT_DATA([flows.txt], [[ # comment tcp,tp_src=123,out_port=5,actions=flood in_port=LOCAL dl_vlan=9 dl_src=00:0A:E4:25:6B:B0 actions=drop udp dl_vlan_pcp=7 idle_timeout=5 actions=strip_vlan output:0 tcp,nw_src=192.168.0.3,tp_dst=80 actions=set_queue:37,output:1 udp,nw_src=192.168.0.3,tp_dst=53 actions=pop_queue,output:1 cookie=0x123456789abcdef hard_timeout=10 priority=60000 actions=controller actions=note:41.42.43,note:00.01.02.03.04.05.06.07,note ip,actions=set_field:10.4.3.77->ip_src sctp actions=drop sctp actions=drop in_port=0 actions=resubmit:0 actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678) ]]) AT_CHECK([ovs-ofctl parse-flows flows.txt ], [0], [stdout]) AT_CHECK([[sed 's/ (xid=0x[0-9a-fA-F]*)//' stdout]], [0], [[usable protocols: any chosen protocol: OpenFlow10-table_id OFPT_FLOW_MOD: ADD tcp,tp_src=123 out_port:5 actions=FLOOD OFPT_FLOW_MOD: ADD in_port=LOCAL,dl_vlan=9,dl_src=00:0a:e4:25:6b:b0 actions=drop OFPT_FLOW_MOD: ADD udp,dl_vlan_pcp=7 idle:5 actions=strip_vlan,output:0 OFPT_FLOW_MOD: ADD tcp,nw_src=192.168.0.3,tp_dst=80 actions=set_queue:37,output:1 OFPT_FLOW_MOD: ADD udp,nw_src=192.168.0.3,tp_dst=53 actions=pop_queue,output:1 OFPT_FLOW_MOD: ADD priority=60000 cookie:0x123456789abcdef hard:10 actions=CONTROLLER:65535 OFPT_FLOW_MOD: ADD actions=note:41.42.43.00.00.00,note:00.01.02.03.04.05.06.07.00.00.00.00.00.00,note:00.00.00.00.00.00 OFPT_FLOW_MOD: ADD ip actions=load:0xa04034d->NXM_OF_IP_SRC[] OFPT_FLOW_MOD: ADD sctp actions=drop OFPT_FLOW_MOD: ADD sctp actions=drop OFPT_FLOW_MOD: ADD in_port=0 actions=resubmit:0 OFPT_FLOW_MOD: ADD actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678) ]]) AT_CLEANUP AT_SETUP([ovs-ofctl parse-flows (OpenFlow 1.2)]) AT_DATA([flows.txt], [[ # comment tcp,tp_src=123,actions=flood in_port=LOCAL dl_vlan=9 dl_src=00:0A:E4:25:6B:B0 actions=drop udp dl_vlan_pcp=7 idle_timeout=5 actions=strip_vlan output:0 tcp,nw_src=192.168.0.3,tp_dst=80 actions=set_queue:37,output:1 udp,nw_src=192.168.0.3,tp_dst=53 actions=pop_queue,output:1 cookie=0x123456789abcdef hard_timeout=10 priority=60000 actions=controller actions=note:41.42.43,note:00.01.02.03.04.05.06.07,note ipv6,actions=set_field:fe80:0123:4567:890a:a6ba:dbff:fefe:59fa->ipv6_src sctp actions=set_field:3334->sctp_src sctp actions=set_field:4445->sctp_dst in_port=0 actions=resubmit:0 actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678) ]]) AT_CHECK([ovs-ofctl --protocols OpenFlow12 parse-flows flows.txt ], [0], [stdout]) AT_CHECK([[sed 's/ (xid=0x[0-9a-fA-F]*)//' stdout]], [0], [[usable protocols: NXM,OXM chosen protocol: OXM-OpenFlow12 OFPT_FLOW_MOD (OF1.2): ADD table:255 tcp,tp_src=123 actions=FLOOD OFPT_FLOW_MOD (OF1.2): ADD table:255 in_port=LOCAL,dl_vlan=9,dl_src=00:0a:e4:25:6b:b0 actions=drop OFPT_FLOW_MOD (OF1.2): ADD table:255 udp,dl_vlan_pcp=7 idle:5 actions=strip_vlan,output:0 OFPT_FLOW_MOD (OF1.2): ADD table:255 tcp,nw_src=192.168.0.3,tp_dst=80 actions=set_queue:37,output:1 OFPT_FLOW_MOD (OF1.2): ADD table:255 udp,nw_src=192.168.0.3,tp_dst=53 actions=pop_queue,output:1 OFPT_FLOW_MOD (OF1.2): ADD table:255 priority=60000 cookie:0x123456789abcdef hard:10 actions=CONTROLLER:65535 OFPT_FLOW_MOD (OF1.2): ADD table:255 actions=note:41.42.43.00.00.00,note:00.01.02.03.04.05.06.07.00.00.00.00.00.00,note:00.00.00.00.00.00 OFPT_FLOW_MOD (OF1.2): ADD table:255 ipv6 actions=set_field:fe80:123:4567:890a:a6ba:dbff:fefe:59fa->ipv6_src OFPT_FLOW_MOD (OF1.2): ADD table:255 sctp actions=set_field:3334->sctp_src OFPT_FLOW_MOD (OF1.2): ADD table:255 sctp actions=set_field:4445->sctp_dst OFPT_FLOW_MOD (OF1.2): ADD table:255 in_port=0 actions=resubmit:0 OFPT_FLOW_MOD (OF1.2): ADD table:255 actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678) ]]) AT_CLEANUP AT_SETUP([ovs-ofctl parse-flows (With Tunnel-Parameters)]) AT_DATA([flows.txt], [[ tun_id=0x1234000056780000/0xffff0000ffff0000,tun_src=1.1.1.1,tun_dst=2.2.2.2,tun_tos=0x3,tun_ttl=20,tun_flags=key|csum actions=drop ]]) AT_CHECK([ovs-ofctl parse-flows flows.txt ], [1], [usable protocols: none ], [stderr]) AT_CLEANUP AT_SETUP([ovs-ofctl parse-flows (skb_priority)]) AT_DATA([flows.txt], [[ skb_priority=0x12341234,tcp,tp_src=123,actions=flood ]]) AT_CHECK([ovs-ofctl parse-flows flows.txt ], [1], [usable protocols: none ], [stderr]) AT_CLEANUP AT_SETUP([ovs-ofctl parse-flows (NXM)]) AT_DATA([flows.txt], [[ # comment tcp,tp_src=123,actions=flood in_port=LOCAL dl_vlan=9 dl_src=00:0A:E4:25:6B:B0 actions=drop pkt_mark=0xbb,actions=set_field:0xaa->pkt_mark udp dl_vlan_pcp=7 idle_timeout=5 actions=strip_vlan output:0 tcp,nw_src=192.168.0.3,tp_dst=80 actions=set_queue:37,output:1 udp,nw_src=192.168.0.3,tp_dst=53 actions=pop_queue,output:1 cookie=0x123456789abcdef hard_timeout=10 priority=60000 actions=controller actions=note:41.42.43,note:00.01.02.03.04.05.06.07,note tcp,tp_src=0x1230/0xfff0,tun_id=0x1234,cookie=0x5678,actions=flood actions=set_tunnel:0x1234,set_tunnel64:0x9876,set_tunnel:0x123456789 actions=multipath(eth_src, 50, hrw, 12, 0, NXM_NX_REG0[0..3]),multipath(symmetric_l4, 1024, iter_hash, 5000, 5050, NXM_NX_REG0[0..12]) table=1,actions=drop tun_id=0x1234000056780000/0xffff0000ffff0000,actions=drop metadata=0x1234ffff5678ffff/0xffff0000ffff0000,actions=drop actions=bundle(eth_src,50,active_backup,ofport,slaves:1) actions=bundle(symmetric_l4,60,hrw,ofport,slaves:2,3) actions=bundle(symmetric_l4,60,hrw,ofport,slaves:) actions=output:1,bundle(eth_src,0,hrw,ofport,slaves:1),output:2 actions=bundle_load(eth_src,50,active_backup,ofport,NXM_NX_REG0[],slaves:1) actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..15],slaves:2,3) actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..15],slaves:[2,3]) actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..30],slaves:) actions=output:1,bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[16..31],slaves:1),output:2 actions=resubmit:1,resubmit(2),resubmit(,3),resubmit(2,3) send_flow_rem,actions=output:1,output:NXM_NX_REG0[],output:2,output:NXM_NX_REG1[16..31],output:3 check_overlap,actions=output:1,exit,output:2 actions=fin_timeout(idle_timeout=5,hard_timeout=15) actions=controller(max_len=123,reason=invalid_ttl,id=555) actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678) ]]) AT_CHECK([ovs-ofctl parse-flows flows.txt ], [0], [stdout]) AT_CHECK([[sed 's/ (xid=0x[0-9a-fA-F]*)//' stdout]], [0], [[usable protocols: OXM,NXM+table_id chosen protocol: NXM+table_id NXT_FLOW_MOD: ADD table:255 tcp,tp_src=123 actions=FLOOD NXT_FLOW_MOD: ADD table:255 in_port=LOCAL,dl_vlan=9,dl_src=00:0a:e4:25:6b:b0 actions=drop NXT_FLOW_MOD: ADD table:255 pkt_mark=0xbb actions=load:0xaa->NXM_NX_PKT_MARK[] NXT_FLOW_MOD: ADD table:255 udp,dl_vlan_pcp=7 idle:5 actions=strip_vlan,output:0 NXT_FLOW_MOD: ADD table:255 tcp,nw_src=192.168.0.3,tp_dst=80 actions=set_queue:37,output:1 NXT_FLOW_MOD: ADD table:255 udp,nw_src=192.168.0.3,tp_dst=53 actions=pop_queue,output:1 NXT_FLOW_MOD: ADD table:255 priority=60000 cookie:0x123456789abcdef hard:10 actions=CONTROLLER:65535 NXT_FLOW_MOD: ADD table:255 actions=note:41.42.43.00.00.00,note:00.01.02.03.04.05.06.07.00.00.00.00.00.00,note:00.00.00.00.00.00 NXT_FLOW_MOD: ADD table:255 tcp,tun_id=0x1234,tp_src=0x1230/0xfff0 cookie:0x5678 actions=FLOOD NXT_FLOW_MOD: ADD table:255 actions=set_tunnel:0x1234,set_tunnel64:0x9876,set_tunnel64:0x123456789 NXT_FLOW_MOD: ADD table:255 actions=multipath(eth_src,50,hrw,12,0,NXM_NX_REG0[0..3]),multipath(symmetric_l4,1024,iter_hash,5000,5050,NXM_NX_REG0[0..12]) NXT_FLOW_MOD: ADD table:1 actions=drop NXT_FLOW_MOD: ADD table:255 tun_id=0x1234000056780000/0xffff0000ffff0000 actions=drop NXT_FLOW_MOD: ADD table:255 metadata=0x1234000056780000/0xffff0000ffff0000 actions=drop NXT_FLOW_MOD: ADD table:255 actions=bundle(eth_src,50,active_backup,ofport,slaves:1) NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l4,60,hrw,ofport,slaves:2,3) NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l4,60,hrw,ofport,slaves:) NXT_FLOW_MOD: ADD table:255 actions=output:1,bundle(eth_src,0,hrw,ofport,slaves:1),output:2 NXT_FLOW_MOD: ADD table:255 actions=bundle_load(eth_src,50,active_backup,ofport,NXM_NX_REG0[],slaves:1) NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..15],slaves:2,3) NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..15],slaves:2,3) NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..30],slaves:) NXT_FLOW_MOD: ADD table:255 actions=output:1,bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[16..31],slaves:1),output:2 NXT_FLOW_MOD: ADD table:255 actions=resubmit:1,resubmit:2,resubmit(,3),resubmit(2,3) NXT_FLOW_MOD: ADD table:255 send_flow_rem actions=output:1,output:NXM_NX_REG0[],output:2,output:NXM_NX_REG1[16..31],output:3 NXT_FLOW_MOD: ADD table:255 check_overlap actions=output:1,exit,output:2 NXT_FLOW_MOD: ADD table:255 actions=fin_timeout(idle_timeout=5,hard_timeout=15) NXT_FLOW_MOD: ADD table:255 actions=controller(reason=invalid_ttl,max_len=123,id=555) NXT_FLOW_MOD: ADD table:255 actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678) ]]) AT_CLEANUP AT_SETUP([ovs-ofctl -F nxm parse-flows]) AT_DATA([flows.txt], [ # comment tcp,tp_src=123,actions=flood in_port=LOCAL dl_vlan=9 dl_src=00:0A:E4:25:6B:B0 actions=drop arp,dl_src=00:0A:E4:25:6B:B0,arp_sha=00:0A:E4:25:6B:B0 actions=drop ipv6,ipv6_label=0x12345 actions=2 ipv6,ipv6_src=2001:db8:3c4d:1:2:3:4:5 actions=3 ipv6,ipv6_src=2001:db8:3c4d:1:2:3:4:5/64 actions=4 ipv6,ipv6_dst=2001:db8:3c4d:1:2:3:4:5/127 actions=5 tcp6,ipv6_src=2001:db8:3c4d:1::1,tp_dst=80 actions=drop udp6,ipv6_src=2001:db8:3c4d:1::3,tp_dst=53 actions=drop in_port=3 icmp6,ipv6_src=2001:db8:3c4d:1::1,icmp_type=134 actions=drop udp dl_vlan_pcp=7 idle_timeout=5 actions=strip_vlan output:0 tcp,nw_src=192.168.0.3,tp_dst=80 actions=set_queue:37,output:1 udp,nw_src=192.168.0.3,tp_dst=53 actions=pop_queue,output:1 icmp6,icmp_type=135,nd_target=FEC0::1234:F045:8FFF:1111:FE4E:0571 actions=drop icmp6,icmp_type=135,nd_target=FEC0::1234:F045:8FFF:1111:FE4F:0571/112 actions=drop icmp6,icmp_type=135,nd_sll=00:0A:E4:25:6B:B0 actions=drop icmp6,icmp_type=136,nd_target=FEC0::1234:F045:8FFF:1111:FE4E:0571,nd_tll=00:0A:E4:25:6B:B1 actions=drop icmp6,icmp_type=136,nd_target=FEC0::1234:F045:8FFF:1111:FE00:0000/96,nd_tll=00:0A:E4:25:6B:B1 actions=drop cookie=0x123456789abcdef hard_timeout=10 priority=60000 actions=controller actions=note:41.42.43,note:00.01.02.03.04.05.06.07,note tun_id=0x1234,cookie=0x5678,actions=flood actions=drop tun_id=0x1234000056780000/0xffff0000ffff0000,actions=drop dl_dst=01:00:00:00:00:00/01:00:00:00:00:00,actions=drop dl_dst=00:00:00:00:00:00/01:00:00:00:00:00,actions=drop dl_dst=aa:bb:cc:dd:ee:ff/fe:ff:ff:ff:ff:ff,actions=drop dl_dst=aa:bb:cc:dd:ee:ff/00:00:00:00:00:00,actions=drop actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678) ]) AT_CHECK([ovs-ofctl -F nxm parse-flows flows.txt], [0], [stdout]) AT_CHECK([[sed 's/ (xid=0x[0-9a-fA-F]*)//' stdout]], [0], [dnl usable protocols: NXM,OXM chosen protocol: NXM-table_id NXT_FLOW_MOD: ADD tcp,tp_src=123 actions=FLOOD NXT_FLOW_MOD: ADD in_port=LOCAL,dl_vlan=9,dl_src=00:0a:e4:25:6b:b0 actions=drop NXT_FLOW_MOD: ADD arp,dl_src=00:0a:e4:25:6b:b0,arp_sha=00:0a:e4:25:6b:b0 actions=drop NXT_FLOW_MOD: ADD ipv6,ipv6_label=0x12345 actions=output:2 NXT_FLOW_MOD: ADD ipv6,ipv6_src=2001:db8:3c4d:1:2:3:4:5 actions=output:3 NXT_FLOW_MOD: ADD ipv6,ipv6_src=2001:db8:3c4d:1::/64 actions=output:4 NXT_FLOW_MOD: ADD ipv6,ipv6_dst=2001:db8:3c4d:1:2:3:4:4/127 actions=output:5 NXT_FLOW_MOD: ADD tcp6,ipv6_src=2001:db8:3c4d:1::1,tp_dst=80 actions=drop NXT_FLOW_MOD: ADD udp6,ipv6_src=2001:db8:3c4d:1::3,tp_dst=53 actions=drop NXT_FLOW_MOD: ADD icmp6,in_port=3,ipv6_src=2001:db8:3c4d:1::1,icmp_type=134 actions=drop NXT_FLOW_MOD: ADD udp,dl_vlan_pcp=7 idle:5 actions=strip_vlan,output:0 NXT_FLOW_MOD: ADD tcp,nw_src=192.168.0.3,tp_dst=80 actions=set_queue:37,output:1 NXT_FLOW_MOD: ADD udp,nw_src=192.168.0.3,tp_dst=53 actions=pop_queue,output:1 NXT_FLOW_MOD: ADD icmp6,icmp_type=135,nd_target=fec0:0:1234:f045:8fff:1111:fe4e:571 actions=drop NXT_FLOW_MOD: ADD icmp6,icmp_type=135,nd_target=fec0:0:1234:f045:8fff:1111:fe4f:0/112 actions=drop NXT_FLOW_MOD: ADD icmp6,icmp_type=135,nd_sll=00:0a:e4:25:6b:b0 actions=drop NXT_FLOW_MOD: ADD icmp6,icmp_type=136,nd_target=fec0:0:1234:f045:8fff:1111:fe4e:571,nd_tll=00:0a:e4:25:6b:b1 actions=drop NXT_FLOW_MOD: ADD icmp6,icmp_type=136,nd_target=fec0:0:1234:f045:8fff:1111::/96,nd_tll=00:0a:e4:25:6b:b1 actions=drop NXT_FLOW_MOD: ADD priority=60000 cookie:0x123456789abcdef hard:10 actions=CONTROLLER:65535 NXT_FLOW_MOD: ADD actions=note:41.42.43.00.00.00,note:00.01.02.03.04.05.06.07.00.00.00.00.00.00,note:00.00.00.00.00.00 NXT_FLOW_MOD: ADD tun_id=0x1234 cookie:0x5678 actions=FLOOD NXT_FLOW_MOD: ADD actions=drop NXT_FLOW_MOD: ADD tun_id=0x1234000056780000/0xffff0000ffff0000 actions=drop NXT_FLOW_MOD: ADD dl_dst=01:00:00:00:00:00/01:00:00:00:00:00 actions=drop NXT_FLOW_MOD: ADD dl_dst=00:00:00:00:00:00/01:00:00:00:00:00 actions=drop NXT_FLOW_MOD: ADD dl_dst=aa:bb:cc:dd:ee:ff/fe:ff:ff:ff:ff:ff actions=drop NXT_FLOW_MOD: ADD actions=drop NXT_FLOW_MOD: ADD actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678) ]) AT_CLEANUP AT_SETUP([ovs-ofctl -F nxm -mmm parse-flows]) AT_DATA([flows.txt], [[ # comment tcp,tp_src=123,actions=flood in_port=LOCAL dl_vlan=9 dl_src=00:0A:E4:25:6B:B0 actions=drop arp,dl_src=00:0A:E4:25:6B:B0,arp_sha=00:0A:E4:25:6B:B0 actions=drop ipv6,ipv6_label=0x12345 actions=2 ipv6,ipv6_src=2001:db8:3c4d:1:2:3:4:5 actions=3 ipv6,ipv6_src=2001:db8:3c4d:1:2:3:4:5/64 actions=4 ipv6,ipv6_dst=2001:db8:3c4d:1:2:3:4:5/127 actions=5 tcp6,ipv6_src=2001:db8:3c4d:1::1,tp_dst=80 actions=drop udp6,ipv6_src=2001:db8:3c4d:1::3,tp_dst=53 actions=drop sctp6,ipv6_src=2001:db8:3c4d:1::5,tp_dst=309 actions=drop in_port=3 icmp6,ipv6_src=2001:db8:3c4d:1::1,icmp_type=134 actions=drop udp dl_vlan_pcp=7 idle_timeout=5 actions=strip_vlan output:0 tcp,nw_src=192.168.0.3,tp_dst=80 actions=set_queue:37,output:1 udp,nw_src=192.168.0.3,tp_dst=53 actions=pop_queue,output:1 sctp,nw_src=192.168.0.3,tp_dst=309 actions=pop_queue,output:1 icmp6,icmp_type=135,nd_target=FEC0::1234:F045:8FFF:1111:FE4E:0571 actions=drop icmp6,icmp_type=135,nd_sll=00:0A:E4:25:6B:B0 actions=drop icmp6,icmp_type=136,nd_target=FEC0::1234:F045:8FFF:1111:FE4E:0571,nd_tll=00:0A:E4:25:6B:B1 actions=drop cookie=0x123456789abcdef hard_timeout=10 priority=60000 actions=controller actions=note:41.42.43,note:00.01.02.03.04.05.06.07,note tun_id=0x1234,cookie=0x5678,actions=flood actions=drop reg0=123,actions=move:NXM_NX_REG0[0..5]->NXM_NX_REG1[26..31],load:55->NXM_NX_REG2[0..31],move:NXM_NX_REG0[0..31]->NXM_NX_TUN_ID[0..31],move:NXM_NX_REG0[0..15]->NXM_OF_VLAN_TCI[] actions=move:OXM_OF_ETH_DST[]->OXM_OF_ETH_SRC[] actions=push:NXM_NX_REG0[0..31],pop:NXM_NX_REG0[] vlan_tci=0x1123/0x1fff,actions=drop actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678) ]]) AT_CHECK([ovs-ofctl -F nxm -mmm parse-flows flows.txt], [0], [stdout], [stderr]) AT_CHECK([[sed 's/ (xid=0x[0-9a-fA-F]*)//' stdout]], [0], [[usable protocols: NXM,OXM chosen protocol: NXM-table_id NXT_FLOW_MOD: ADD NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(06), NXM_OF_TCP_SRC(007b) actions=FLOOD NXT_FLOW_MOD: ADD NXM_OF_IN_PORT(fffe), NXM_OF_ETH_SRC(000ae4256bb0), NXM_OF_VLAN_TCI_W(1009/1fff) actions=drop NXT_FLOW_MOD: ADD NXM_OF_ETH_SRC(000ae4256bb0), NXM_OF_ETH_TYPE(0806), NXM_NX_ARP_SHA(000ae4256bb0) actions=drop NXT_FLOW_MOD: ADD NXM_OF_ETH_TYPE(86dd), NXM_NX_IPV6_LABEL(00012345) actions=output:2 NXT_FLOW_MOD: ADD NXM_OF_ETH_TYPE(86dd), NXM_NX_IPV6_SRC(20010db83c4d00010002000300040005) actions=output:3 NXT_FLOW_MOD: ADD NXM_OF_ETH_TYPE(86dd), NXM_NX_IPV6_SRC_W(20010db83c4d00010000000000000000/ffffffffffffffff0000000000000000) actions=output:4 NXT_FLOW_MOD: ADD NXM_OF_ETH_TYPE(86dd), NXM_NX_IPV6_DST_W(20010db83c4d00010002000300040004/fffffffffffffffffffffffffffffffe) actions=output:5 NXT_FLOW_MOD: ADD NXM_OF_ETH_TYPE(86dd), NXM_NX_IPV6_SRC(20010db83c4d00010000000000000001), NXM_OF_IP_PROTO(06), NXM_OF_TCP_DST(0050) actions=drop NXT_FLOW_MOD: ADD NXM_OF_ETH_TYPE(86dd), NXM_NX_IPV6_SRC(20010db83c4d00010000000000000003), NXM_OF_IP_PROTO(11), NXM_OF_UDP_DST(0035) actions=drop NXT_FLOW_MOD: ADD NXM_OF_ETH_TYPE(86dd), NXM_NX_IPV6_SRC(20010db83c4d00010000000000000005), NXM_OF_IP_PROTO(84), OXM_OF_SCTP_DST(0135) actions=drop NXT_FLOW_MOD: ADD NXM_OF_IN_PORT(0003), NXM_OF_ETH_TYPE(86dd), NXM_NX_IPV6_SRC(20010db83c4d00010000000000000001), NXM_OF_IP_PROTO(3a), NXM_NX_ICMPV6_TYPE(86) actions=drop NXT_FLOW_MOD: ADD NXM_OF_ETH_TYPE(0800), NXM_OF_VLAN_TCI_W(f000/f000), NXM_OF_IP_PROTO(11) idle:5 actions=strip_vlan,output:0 NXT_FLOW_MOD: ADD NXM_OF_ETH_TYPE(0800), NXM_OF_IP_SRC(c0a80003), NXM_OF_IP_PROTO(06), NXM_OF_TCP_DST(0050) actions=set_queue:37,output:1 NXT_FLOW_MOD: ADD NXM_OF_ETH_TYPE(0800), NXM_OF_IP_SRC(c0a80003), NXM_OF_IP_PROTO(11), NXM_OF_UDP_DST(0035) actions=pop_queue,output:1 NXT_FLOW_MOD: ADD NXM_OF_ETH_TYPE(0800), NXM_OF_IP_SRC(c0a80003), NXM_OF_IP_PROTO(84), OXM_OF_SCTP_DST(0135) actions=pop_queue,output:1 NXT_FLOW_MOD: ADD NXM_OF_ETH_TYPE(86dd), NXM_OF_IP_PROTO(3a), NXM_NX_ICMPV6_TYPE(87), NXM_NX_ND_TARGET(fec000001234f0458fff1111fe4e0571) actions=drop NXT_FLOW_MOD: ADD NXM_OF_ETH_TYPE(86dd), NXM_OF_IP_PROTO(3a), NXM_NX_ICMPV6_TYPE(87), NXM_NX_ND_SLL(000ae4256bb0) actions=drop NXT_FLOW_MOD: ADD NXM_OF_ETH_TYPE(86dd), NXM_OF_IP_PROTO(3a), NXM_NX_ICMPV6_TYPE(88), NXM_NX_ND_TARGET(fec000001234f0458fff1111fe4e0571), NXM_NX_ND_TLL(000ae4256bb1) actions=drop NXT_FLOW_MOD: ADD cookie:0x123456789abcdef hard:10 pri:60000 actions=CONTROLLER:65535 NXT_FLOW_MOD: ADD actions=note:41.42.43.00.00.00,note:00.01.02.03.04.05.06.07.00.00.00.00.00.00,note:00.00.00.00.00.00 NXT_FLOW_MOD: ADD NXM_NX_TUN_ID(0000000000001234) cookie:0x5678 actions=FLOOD NXT_FLOW_MOD: ADD actions=drop NXT_FLOW_MOD: ADD NXM_NX_REG0(0000007b) actions=move:NXM_NX_REG0[0..5]->NXM_NX_REG1[26..31],load:0x37->NXM_NX_REG2[],move:NXM_NX_REG0[]->NXM_NX_TUN_ID[0..31],move:NXM_NX_REG0[0..15]->NXM_OF_VLAN_TCI[] NXT_FLOW_MOD: ADD actions=move:NXM_OF_ETH_DST[]->NXM_OF_ETH_SRC[] NXT_FLOW_MOD: ADD actions=push:NXM_NX_REG0[],pop:NXM_NX_REG0[] NXT_FLOW_MOD: ADD NXM_OF_VLAN_TCI_W(1123/1fff) actions=drop NXT_FLOW_MOD: ADD actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678) ]]) AT_CLEANUP AT_SETUP([ovs-ofctl parse-nx-match]) AT_KEYWORDS([nx-match]) AT_DATA([nx-match.txt], [dnl # in port NXM_OF_IN_PORT(0000) NXM_OF_IN_PORT(fffe) # eth dst NXM_OF_ETH_DST(0002e30f80a4) NXM_OF_ETH_DST_W(010000000000/010000000000) NXM_OF_ETH_DST_W(000000000000/010000000000) NXM_OF_ETH_DST_W(ffffffffffff/010000000000) NXM_OF_ETH_DST_W(0002e30f80a4/ffffffffffff) NXM_OF_ETH_DST_W(60175619848f/000000000000) NXM_OF_ETH_DST_W(0002e30f80a4/feffffffffff) NXM_OF_ETH_DST_W(60175619848f/5a5a5a5a5a5a) # eth src NXM_OF_ETH_SRC(020898456ddb) NXM_OF_ETH_SRC_W(012345abcdef/ffffff555555) NXM_OF_ETH_SRC_W(020898456ddb/ffffffffffff) NXM_OF_ETH_SRC_W(020898456ddb/000000000000) # eth type NXM_OF_ETH_TYPE(0800) NXM_OF_ETH_TYPE(0800) NXM_OF_IN_PORT(0012) # vlan tci NXM_OF_VLAN_TCI(f009) NXM_OF_VLAN_TCI(f009) NXM_OF_VLAN_TCI(f009) NXM_OF_VLAN_TCI(0000) # Packets without 802.1Q header. NXM_OF_VLAN_TCI(3123) # Packets with VID=123, PCP=1. NXM_OF_VLAN_TCI(0123) # Does not make sense (but supported anyway) NXM_OF_VLAN_TCI_W(1123/1fff) # Packets with VID=123, any PCP. NXM_OF_VLAN_TCI_W(1123/ffff) # Packets with VID=123, PCP=0 NXM_OF_VLAN_TCI_W(1123/0000) # Packets with or without 802.1Q header NXM_OF_VLAN_TCI_W(f000/f000) # Packets with any VID, PCP=7. NXM_OF_VLAN_TCI_W(0000/e000) # No 802.1Q or with VID=0 # IP TOS NXM_OF_ETH_TYPE(0800) NXM_OF_IP_TOS(f0) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_TOS(41) NXM_OF_IP_TOS(f0) # IP ECN NXM_OF_ETH_TYPE(0800) NXM_NX_IP_ECN(03) NXM_OF_ETH_TYPE(0800) NXM_NX_IP_ECN(06) NXM_NX_IP_ECN(03) # IP protocol NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(01) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(05) NXM_OF_IP_PROTO(05) # IP TTL NXM_OF_ETH_TYPE(0800) NXM_NX_IP_TTL(80) NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_TTL(ff) NXM_NX_IP_TTL(80) # IP source NXM_OF_ETH_TYPE(0800) NXM_OF_IP_SRC(ac100014) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_SRC_W(C0a80000/FFFF0000) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_SRC_W(C0a80000/5a5a5a5a) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_SRC_W(C0a80000/ffffffff) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_SRC_W(C0a80000/00000000) NXM_OF_ETH_TYPE(0806) NXM_OF_IP_SRC(ac100014) NXM_OF_IP_SRC_W(C0D80000/FFFF0000) # IP destination NXM_OF_ETH_TYPE(0800) NXM_OF_IP_DST(ac100014) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_DST_W(C0a88012/FFFF0000) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_DST_W(C0a80000/5a5a5a5a) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_DST_W(C0a80000/ffffffff) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_DST_W(C0a80000/00000000) NXM_OF_IP_DST(ac100014) NXM_OF_ETH_TYPE(0806) NXM_OF_IP_DST_W(C0D80000/FFFF0000) # TCP source port NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(06) NXM_OF_TCP_SRC(4231) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(06) NXM_OF_TCP_SRC_W(5050/F0F0) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(06) NXM_OF_TCP_SRC_W(5050/ffff) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(06) NXM_OF_TCP_SRC_W(5050/0000) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(07) NXM_OF_TCP_SRC(4231) # TCP destination port NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(06) NXM_OF_TCP_DST(4231) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(06) NXM_OF_TCP_DST_W(FDE0/FFF0) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(06) NXM_OF_TCP_DST_W(FDE0/ffff) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(06) NXM_OF_TCP_DST_W(FDE0/0000) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(07) NXM_OF_TCP_DST(4231) # UDP source port NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(11) NXM_OF_UDP_SRC(8732) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(11) NXM_OF_UDP_SRC_W(0132/01FF) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(11) NXM_OF_UDP_SRC_W(0132/ffff) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(11) NXM_OF_UDP_SRC_W(0132/0000) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(06) NXM_OF_UDP_SRC(7823) # UDP destination port NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(11) NXM_OF_UDP_DST(1782) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(11) NXM_OF_UDP_DST_W(5005/F00F) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(11) NXM_OF_UDP_DST_W(5005/FFFF) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(11) NXM_OF_UDP_DST_W(5005/0000) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(02) NXM_OF_UDP_DST(1293) # ICMP type NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(01) NXM_OF_ICMP_TYPE(12) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(00) NXM_OF_ICMP_TYPE(10) # ICMP code NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(01) NXM_OF_ICMP_CODE(12) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(00) NXM_OF_ICMP_CODE(10) NXM_OF_ETH_TYPE(0800) NXM_OF_ICMP_CODE(10) NXM_OF_ICMP_CODE(00) # ARP opcode NXM_OF_ETH_TYPE(0806) NXM_OF_ARP_OP(0001) NXM_OF_ETH_TYPE(0806) NXM_OF_ARP_OP(1111) NXM_OF_ETH_TYPE(0000) NXM_OF_ARP_OP(0001) NXM_OF_ARP_OP(0001) NXM_OF_ETH_TYPE(0806) NXM_OF_ARP_OP(0001) NXM_OF_ARP_OP(0001) # ARP source protocol address NXM_OF_ETH_TYPE(0806) NXM_OF_ARP_SPA(ac100014) NXM_OF_ETH_TYPE(0806) NXM_OF_ARP_SPA_W(C0a81234/FFFFFF00) NXM_OF_ETH_TYPE(0806) NXM_OF_ARP_SPA_W(C0a81234/aaaaaa00) NXM_OF_ETH_TYPE(0806) NXM_OF_ARP_SPA_W(C0a81234/ffffffff) NXM_OF_ETH_TYPE(0806) NXM_OF_ARP_SPA_W(C0a81234/00000000) NXM_OF_ETH_TYPE(0800) NXM_OF_ARP_SPA(ac100014) NXM_OF_ARP_SPA_W(C0D8fedc/FFFF0000) # ARP destination protocol address NXM_OF_ETH_TYPE(0806) NXM_OF_ARP_TPA(ac100014) NXM_OF_ETH_TYPE(0806) NXM_OF_ARP_TPA_W(C0a812fe/FFFFFF00) NXM_OF_ETH_TYPE(0806) NXM_OF_ARP_TPA_W(C0a81234/77777777) NXM_OF_ETH_TYPE(0806) NXM_OF_ARP_TPA_W(C0a81234/ffffffff) NXM_OF_ETH_TYPE(0806) NXM_OF_ARP_TPA_W(C0a81234/00000000) NXM_OF_ETH_TYPE(0800) NXM_OF_ARP_TPA(ac100014) NXM_OF_ARP_TPA_W(C0D80000/FFFF0000) # ARP source hardware address NXM_OF_ETH_TYPE(0806) NXM_NX_ARP_SHA(0002e30f80a4) NXM_OF_ETH_TYPE(0800) NXM_NX_ARP_SHA(0002e30f80a4) NXM_NX_ARP_SHA(0002e30f80a4) # ARP destination hardware address NXM_OF_ETH_TYPE(0806) NXM_NX_ARP_THA(0002e30f80a4) NXM_OF_ETH_TYPE(0800) NXM_NX_ARP_THA(0002e30f80a4) NXM_NX_ARP_THA(0002e30f80a4) # RARP opcode NXM_OF_ETH_TYPE(8035) NXM_OF_ARP_OP(0003) NXM_OF_ETH_TYPE(8035) NXM_OF_ARP_OP(1111) NXM_OF_ETH_TYPE(0000) NXM_OF_ARP_OP(0003) NXM_OF_ARP_OP(0003) NXM_OF_ETH_TYPE(8035) NXM_OF_ARP_OP(0003) NXM_OF_ARP_OP(0003) # RARP source protocol address NXM_OF_ETH_TYPE(8035) NXM_OF_ARP_SPA(ac100014) NXM_OF_ETH_TYPE(8035) NXM_OF_ARP_SPA_W(C0a81234/FFFFFF00) NXM_OF_ETH_TYPE(8035) NXM_OF_ARP_SPA_W(C0a81234/aaaaaa00) NXM_OF_ETH_TYPE(8035) NXM_OF_ARP_SPA_W(C0a81234/ffffffff) NXM_OF_ETH_TYPE(8035) NXM_OF_ARP_SPA_W(C0a81234/00000000) NXM_OF_ETH_TYPE(0800) NXM_OF_ARP_SPA(ac100014) NXM_OF_ARP_SPA_W(C0D8fedc/FFFF0000) # RARP destination protocol address NXM_OF_ETH_TYPE(8035) NXM_OF_ARP_TPA(ac100014) NXM_OF_ETH_TYPE(8035) NXM_OF_ARP_TPA_W(C0a812fe/FFFFFF00) NXM_OF_ETH_TYPE(8035) NXM_OF_ARP_TPA_W(C0a81234/77777777) NXM_OF_ETH_TYPE(8035) NXM_OF_ARP_TPA_W(C0a81234/ffffffff) NXM_OF_ETH_TYPE(8035) NXM_OF_ARP_TPA_W(C0a81234/00000000) NXM_OF_ETH_TYPE(0800) NXM_OF_ARP_TPA(ac100014) NXM_OF_ARP_TPA_W(C0D80000/FFFF0000) # RARP source hardware address NXM_OF_ETH_TYPE(8035) NXM_NX_ARP_SHA(0002e30f80a4) NXM_OF_ETH_TYPE(0800) NXM_NX_ARP_SHA(0002e30f80a4) NXM_NX_ARP_SHA(0002e30f80a4) # RARP destination hardware address NXM_OF_ETH_TYPE(8035) NXM_NX_ARP_THA(0002e30f80a4) NXM_OF_ETH_TYPE(0800) NXM_NX_ARP_THA(0002e30f80a4) NXM_NX_ARP_THA(0002e30f80a4) # IPv6 source NXM_OF_ETH_TYPE(86dd) NXM_NX_IPV6_SRC(20010db83c4d00010002000300040005) NXM_OF_ETH_TYPE(0800) NXM_NX_IPV6_SRC(20010db83c4d00010002000300040005) NXM_OF_ETH_TYPE(86dd) NXM_NX_IPV6_SRC_W(20010db83c4d00010000000000000000/ffffffffffffffff0000000000000000) NXM_OF_ETH_TYPE(86dd) NXM_NX_IPV6_SRC_W(20010db83c4d00010000000000000000/5a5a5a5a5a5a5a5a0000000000000000) NXM_OF_ETH_TYPE(86dd) NXM_NX_IPV6_SRC_W(20010db83c4d00010000000000000000/ffffffffffffffffffffffffffffffff) NXM_OF_ETH_TYPE(86dd) NXM_NX_IPV6_SRC_W(20010db83c4d00010000000000000000/00000000000000000000000000000000) NXM_OF_ETH_TYPE(0800) NXM_NX_IPV6_SRC_W(20010db83c4d00010000000000000000/ffffffffffffffff0000000000000000) # IPv6 destination NXM_OF_ETH_TYPE(86dd) NXM_NX_IPV6_DST(20010db83c4d00010002000300040005) NXM_OF_ETH_TYPE(0800) NXM_NX_IPV6_DST(20010db83c4d00010002000300040005) NXM_OF_ETH_TYPE(86dd) NXM_NX_IPV6_DST_W(20010db83c4d00010000000000000000/77777777777777777777777777777777) NXM_OF_ETH_TYPE(86dd) NXM_NX_IPV6_DST_W(20010db83c4d00010000000000000000/ffffffffffffffffffffffffffffffff) NXM_OF_ETH_TYPE(86dd) NXM_NX_IPV6_DST_W(20010db83c4d00010000000000000000/00000000000000000000000000000000) NXM_OF_ETH_TYPE(0800) NXM_NX_IPV6_DST_W(20010db83c4d00010000000000000000/ffffffffffffffff0000000000000000) # IPv6 Flow Label NXM_OF_ETH_TYPE(86dd) NXM_NX_IPV6_LABEL(1000000f) NXM_NX_IPV6_LABEL(0000000f) NXM_OF_ETH_TYPE(86dd) NXM_NX_IPV6_LABEL(0000000f) # ND target address NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(87) NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(88) NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(87) NXM_NX_ND_TARGET_W(20010db83c4d00010002000300040005/0123456789abcdeffedcba9876543210) NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(87) NXM_NX_ND_TARGET_W(20010db83c4d00010002000300040005/ffffffffffffffffffffffffffffffff) NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(87) NXM_NX_ND_TARGET_W(20010db83c4d00010002000300040005/00000000000000000000000000000000) NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(88) NXM_NX_ND_TARGET_W(20010db83c4d00010002000300040005/fedcba98765432100123456789abcdef) # ND source hardware address NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(87) NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_NX_ND_SLL(0002e30f80a4) NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(88) NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_NX_ND_SLL(0002e30f80a4) NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3b) NXM_NX_ICMPV6_TYPE(87) NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_NX_ND_SLL(0002e30f80a4) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(87) NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_NX_ND_SLL(0002e30f80a4) # ND destination hardware address NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(88) NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_NX_ND_TLL(0002e30f80a4) NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(87) NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_NX_ND_TLL(0002e30f80a4) NXM_OF_ETH_TYPE(86dd) NXM_OF_IP_PROTO(3b) NXM_NX_ICMPV6_TYPE(87) NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_NX_ND_TLL(0002e30f80a4) NXM_OF_ETH_TYPE(0800) NXM_OF_IP_PROTO(3a) NXM_NX_ICMPV6_TYPE(88) NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_NX_ND_TLL(0002e30f80a4) # IPv4 fragments. NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(00) NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(01) NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(02) NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(03) NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(00/03) NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(00/fd) NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(00/02) NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(01/01) NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(02/02) NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(03/03) NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(03/ff) NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG_W(03/00) NXM_OF_ETH_TYPE(0800) NXM_NX_IP_FRAG(f3) # IPv6 fragments. NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(00) NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(01) NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(02) NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(03) NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(00/03) NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(00/01) NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(00/02) NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(01/01) NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(02/02) NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(03/03) NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(03/00) NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG_W(03/ff) NXM_OF_ETH_TYPE(86dd) NXM_NX_IP_FRAG(f3) # Flow cookie. NXM_NX_COOKIE(00000000abcdef01) NXM_NX_COOKIE_W(84200000abcdef01/84200000FFFFFFFF) NXM_NX_COOKIE_W(84200000abcdef01/ffffffffffffffff) NXM_NX_COOKIE_W(84200000abcdef01/0000000000000000) # Tunnel ID. NXM_NX_TUN_ID(00000000abcdef01) NXM_NX_TUN_ID_W(84200000abcdef01/84200000FFFFFFFF) NXM_NX_TUN_ID_W(84200000abcdef01/FFFFFFFFFFFFFFFF) NXM_NX_TUN_ID_W(84200000abcdef01/0000000000000000) # Register 0. NXM_NX_REG0(acebdf56) NXM_NX_REG0_W(a0e0d050/f0f0f0f0) NXM_NX_REG0_W(a0e0d050/ffffffff) NXM_NX_REG0_W(a0e0d050/00000000) # Invalid field number. 01020304(1111/2222) # Unimplemented registers. # # This test assumes that at least two registers, but fewer than 16, # registers are implemented. 00010004(12345678) 00010108(12345678/12345678) 00011e04(12345678) 00011f08(12345678/12345678) ]) AT_CHECK([ovs-ofctl -vPATTERN:'console:%c|%p|%m' --strict parse-nx-match < nx-match.txt], [0], [dnl # in port NXM_OF_IN_PORT(0000) NXM_OF_IN_PORT(fffe) # eth dst NXM_OF_ETH_DST(0002e30f80a4) NXM_OF_ETH_DST_W(010000000000/010000000000) NXM_OF_ETH_DST_W(000000000000/010000000000) NXM_OF_ETH_DST_W(010000000000/010000000000) NXM_OF_ETH_DST(0002e30f80a4) NXM_OF_ETH_DST_W(0002e30f80a4/feffffffffff) NXM_OF_ETH_DST_W(40125218000a/5a5a5a5a5a5a) # eth src NXM_OF_ETH_SRC(020898456ddb) NXM_OF_ETH_SRC_W(012345014545/ffffff555555) NXM_OF_ETH_SRC(020898456ddb) # eth type NXM_OF_ETH_TYPE(0800) NXM_OF_IN_PORT(0012), NXM_OF_ETH_TYPE(0800) # vlan tci NXM_OF_VLAN_TCI(f009) nx_pull_match() returned error OFPBMC_DUP_FIELD NXM_OF_VLAN_TCI(0000) NXM_OF_VLAN_TCI(3123) NXM_OF_VLAN_TCI(0123) NXM_OF_VLAN_TCI_W(1123/1fff) NXM_OF_VLAN_TCI(1123) NXM_OF_VLAN_TCI_W(f000/f000) NXM_OF_VLAN_TCI_W(0000/e000) # IP TOS NXM_OF_ETH_TYPE(0800), NXM_OF_IP_TOS(f0) nx_pull_match() returned error OFPBMC_BAD_VALUE nx_pull_match() returned error OFPBMC_BAD_PREREQ # IP ECN NXM_OF_ETH_TYPE(0800), NXM_NX_IP_ECN(03) nx_pull_match() returned error OFPBMC_BAD_VALUE nx_pull_match() returned error OFPBMC_BAD_PREREQ # IP protocol NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(01) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(05) nx_pull_match() returned error OFPBMC_BAD_PREREQ # IP TTL NXM_OF_ETH_TYPE(0800), NXM_NX_IP_TTL(80) NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_TTL(ff) nx_pull_match() returned error OFPBMC_BAD_PREREQ # IP source NXM_OF_ETH_TYPE(0800), NXM_OF_IP_SRC(ac100014) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_SRC_W(c0a80000/ffff0000) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_SRC_W(40080000/5a5a5a5a) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_SRC(c0a80000) NXM_OF_ETH_TYPE(0800) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # IP destination NXM_OF_ETH_TYPE(0800), NXM_OF_IP_DST(ac100014) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_DST_W(c0a80000/ffff0000) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_DST_W(40080000/5a5a5a5a) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_DST(c0a80000) NXM_OF_ETH_TYPE(0800) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # TCP source port NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(06), NXM_OF_TCP_SRC(4231) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(06), NXM_OF_TCP_SRC_W(5050/f0f0) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(06), NXM_OF_TCP_SRC(5050) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(06) nx_pull_match() returned error OFPBMC_BAD_PREREQ # TCP destination port NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(06), NXM_OF_TCP_DST(4231) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(06), NXM_OF_TCP_DST_W(fde0/fff0) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(06), NXM_OF_TCP_DST(fde0) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(06) nx_pull_match() returned error OFPBMC_BAD_PREREQ # UDP source port NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(11), NXM_OF_UDP_SRC(8732) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(11), NXM_OF_UDP_SRC_W(0132/01ff) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(11), NXM_OF_UDP_SRC(0132) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(11) nx_pull_match() returned error OFPBMC_BAD_PREREQ # UDP destination port NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(11), NXM_OF_UDP_DST(1782) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(11), NXM_OF_UDP_DST_W(5005/f00f) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(11), NXM_OF_UDP_DST(5005) NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(11) nx_pull_match() returned error OFPBMC_BAD_PREREQ # ICMP type NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(01), NXM_OF_ICMP_TYPE(12) nx_pull_match() returned error OFPBMC_BAD_PREREQ # ICMP code NXM_OF_ETH_TYPE(0800), NXM_OF_IP_PROTO(01), NXM_OF_ICMP_CODE(12) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # ARP opcode NXM_OF_ETH_TYPE(0806), NXM_OF_ARP_OP(0001) nx_pull_match() returned error OFPBMC_BAD_VALUE nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_DUP_FIELD # ARP source protocol address NXM_OF_ETH_TYPE(0806), NXM_OF_ARP_SPA(ac100014) NXM_OF_ETH_TYPE(0806), NXM_OF_ARP_SPA_W(c0a81200/ffffff00) NXM_OF_ETH_TYPE(0806), NXM_OF_ARP_SPA_W(80a80200/aaaaaa00) NXM_OF_ETH_TYPE(0806), NXM_OF_ARP_SPA(c0a81234) NXM_OF_ETH_TYPE(0806) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # ARP destination protocol address NXM_OF_ETH_TYPE(0806), NXM_OF_ARP_TPA(ac100014) NXM_OF_ETH_TYPE(0806), NXM_OF_ARP_TPA_W(c0a81200/ffffff00) NXM_OF_ETH_TYPE(0806), NXM_OF_ARP_TPA_W(40201234/77777777) NXM_OF_ETH_TYPE(0806), NXM_OF_ARP_TPA(c0a81234) NXM_OF_ETH_TYPE(0806) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # ARP source hardware address NXM_OF_ETH_TYPE(0806), NXM_NX_ARP_SHA(0002e30f80a4) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # ARP destination hardware address NXM_OF_ETH_TYPE(0806), NXM_NX_ARP_THA(0002e30f80a4) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # RARP opcode NXM_OF_ETH_TYPE(8035), NXM_OF_ARP_OP(0003) nx_pull_match() returned error OFPBMC_BAD_VALUE nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_DUP_FIELD # RARP source protocol address NXM_OF_ETH_TYPE(8035), NXM_OF_ARP_SPA(ac100014) NXM_OF_ETH_TYPE(8035), NXM_OF_ARP_SPA_W(c0a81200/ffffff00) NXM_OF_ETH_TYPE(8035), NXM_OF_ARP_SPA_W(80a80200/aaaaaa00) NXM_OF_ETH_TYPE(8035), NXM_OF_ARP_SPA(c0a81234) NXM_OF_ETH_TYPE(8035) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # RARP destination protocol address NXM_OF_ETH_TYPE(8035), NXM_OF_ARP_TPA(ac100014) NXM_OF_ETH_TYPE(8035), NXM_OF_ARP_TPA_W(c0a81200/ffffff00) NXM_OF_ETH_TYPE(8035), NXM_OF_ARP_TPA_W(40201234/77777777) NXM_OF_ETH_TYPE(8035), NXM_OF_ARP_TPA(c0a81234) NXM_OF_ETH_TYPE(8035) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # RARP source hardware address NXM_OF_ETH_TYPE(8035), NXM_NX_ARP_SHA(0002e30f80a4) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # RARP destination hardware address NXM_OF_ETH_TYPE(8035), NXM_NX_ARP_THA(0002e30f80a4) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # IPv6 source NXM_OF_ETH_TYPE(86dd), NXM_NX_IPV6_SRC(20010db83c4d00010002000300040005) nx_pull_match() returned error OFPBMC_BAD_PREREQ NXM_OF_ETH_TYPE(86dd), NXM_NX_IPV6_SRC_W(20010db83c4d00010000000000000000/ffffffffffffffff0000000000000000) NXM_OF_ETH_TYPE(86dd), NXM_NX_IPV6_SRC_W(00000818184800000000000000000000/5a5a5a5a5a5a5a5a0000000000000000) NXM_OF_ETH_TYPE(86dd), NXM_NX_IPV6_SRC(20010db83c4d00010000000000000000) NXM_OF_ETH_TYPE(86dd) nx_pull_match() returned error OFPBMC_BAD_PREREQ # IPv6 destination NXM_OF_ETH_TYPE(86dd), NXM_NX_IPV6_DST(20010db83c4d00010002000300040005) nx_pull_match() returned error OFPBMC_BAD_PREREQ NXM_OF_ETH_TYPE(86dd), NXM_NX_IPV6_DST_W(20010530344500010000000000000000/77777777777777777777777777777777) NXM_OF_ETH_TYPE(86dd), NXM_NX_IPV6_DST(20010db83c4d00010000000000000000) NXM_OF_ETH_TYPE(86dd) nx_pull_match() returned error OFPBMC_BAD_PREREQ # IPv6 Flow Label nx_pull_match() returned error OFPBMC_BAD_VALUE nx_pull_match() returned error OFPBMC_BAD_PREREQ NXM_OF_ETH_TYPE(86dd), NXM_NX_IPV6_LABEL(0000000f) # ND target address NXM_OF_ETH_TYPE(86dd), NXM_OF_IP_PROTO(3a), NXM_NX_ICMPV6_TYPE(87), NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_OF_ETH_TYPE(86dd), NXM_OF_IP_PROTO(3a), NXM_NX_ICMPV6_TYPE(88), NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_OF_ETH_TYPE(86dd), NXM_OF_IP_PROTO(3a), NXM_NX_ICMPV6_TYPE(87), NXM_NX_ND_TARGET_W(00010520080900010000000000040000/0123456789abcdeffedcba9876543210) NXM_OF_ETH_TYPE(86dd), NXM_OF_IP_PROTO(3a), NXM_NX_ICMPV6_TYPE(87), NXM_NX_ND_TARGET(20010db83c4d00010002000300040005) NXM_OF_ETH_TYPE(86dd), NXM_OF_IP_PROTO(3a), NXM_NX_ICMPV6_TYPE(87) NXM_OF_ETH_TYPE(86dd), NXM_OF_IP_PROTO(3a), NXM_NX_ICMPV6_TYPE(88), NXM_NX_ND_TARGET_W(20000898344400000002000300000005/fedcba98765432100123456789abcdef) # ND source hardware address NXM_OF_ETH_TYPE(86dd), NXM_OF_IP_PROTO(3a), NXM_NX_ICMPV6_TYPE(87), NXM_NX_ND_TARGET(20010db83c4d00010002000300040005), NXM_NX_ND_SLL(0002e30f80a4) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # ND destination hardware address NXM_OF_ETH_TYPE(86dd), NXM_OF_IP_PROTO(3a), NXM_NX_ICMPV6_TYPE(88), NXM_NX_ND_TARGET(20010db83c4d00010002000300040005), NXM_NX_ND_TLL(0002e30f80a4) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # IPv4 fragments. NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(00) NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(01) NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(02) NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(03) NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(00) NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG_W(00/01) NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG_W(00/02) NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG_W(01/01) NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG_W(02/02) NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(03) NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(03) NXM_OF_ETH_TYPE(0800), NXM_NX_IP_FRAG(00) nx_pull_match() returned error OFPBMC_BAD_VALUE # IPv6 fragments. NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(00) NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(01) NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(02) NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(03) NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(00) NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG_W(00/01) NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG_W(00/02) NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG_W(01/01) NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG_W(02/02) NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(03) NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(00) NXM_OF_ETH_TYPE(86dd), NXM_NX_IP_FRAG(03) nx_pull_match() returned error OFPBMC_BAD_VALUE # Flow cookie. NXM_NX_COOKIE(00000000abcdef01) NXM_NX_COOKIE_W(84200000abcdef01/84200000ffffffff) NXM_NX_COOKIE(84200000abcdef01) # Tunnel ID. NXM_NX_TUN_ID(00000000abcdef01) NXM_NX_TUN_ID_W(84200000abcdef01/84200000ffffffff) NXM_NX_TUN_ID(84200000abcdef01) # Register 0. NXM_NX_REG0(acebdf56) NXM_NX_REG0_W(a0e0d050/f0f0f0f0) NXM_NX_REG0(a0e0d050) # Invalid field number. nx_pull_match() returned error OFPBMC_BAD_FIELD # Unimplemented registers. # # This test assumes that at least two registers, but fewer than 16, # registers are implemented. NXM_NX_REG0(12345678) NXM_NX_REG0_W(12345678/12345678) nx_pull_match() returned error OFPBMC_BAD_FIELD nx_pull_match() returned error OFPBMC_BAD_FIELD ], [stderr]) # Check that at least the first warning made it. (It's rate-limited # so a variable number could show up, especially under valgrind etc.) AT_CHECK([grep 'has 1-bits in value' stderr | sed 1q], [0], [dnl nx_match|WARN|NXM/OXM entry NXM_OF_ETH_DST_W(ffffffffffff/010000000000) has 1-bits in value for bits wildcarded by the mask. (Future versions of OVS may report this as an OpenFlow error.) ]) # Check that there wasn't any other stderr output. AT_CHECK([grep -v 'has 1-bits in value' stderr], [1]) AT_CLEANUP AT_SETUP([ovs-ofctl parse-ofp10-match]) AT_KEYWORDS([OF1.0]) AT_DATA([test-data], [dnl # in_port=LOCAL 003820fe fffe xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx xxxx xx xx xxxx dnl xxxxxxxx xxxxxxxx xxxx xxxx # dl_src=00:01:02:03:04:05 003820fb xxxx 000102030405 xxxxxxxxxxxx xxxx xx xx xxxx xx xx xxxx dnl xxxxxxxx xxxxxxxx xxxx xxxx # dl_dst=10:20:30:40:50:60 003820f7 xxxx xxxxxxxxxxxx 102030405060 xxxx xx xx xxxx xx xx xxxx dnl xxxxxxxx xxxxxxxx xxxx xxxx # dl_vlan=291 003820fd xxxx xxxxxxxxxxxx xxxxxxxxxxxx 0123 xx xx xxxx xx xx xxxx dnl xxxxxxxx xxxxxxxx xxxx xxxx # dl_vlan_pcp=5 002820ff xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx 05 xx xxxx xx xx xxxx dnl xxxxxxxx xxxxxxxx xxxx xxxx # dl_vlan=291,dl_vlan_pcp=4 002820fd xxxx xxxxxxxxxxxx xxxxxxxxxxxx 0123 04 xx xxxx xx xx xxxx dnl xxxxxxxx xxxxxxxx xxxx xxxx # vlan_tci=0x0000 003820fd xxxx xxxxxxxxxxxx xxxxxxxxxxxx ffff xx xx xxxx xx xx xxxx dnl xxxxxxxx xxxxxxxx xxxx xxxx dnl dl_vlan_pcp doesn't make sense when dl_vlan is "none", so dnl OVS ignores it and drops it on output. # vlan_tci=0x0000 # 1: 28 -> 38 # 20: 05 -> 00 002820fd xxxx xxxxxxxxxxxx xxxxxxxxxxxx ffff 05 xx xxxx xx xx xxxx dnl xxxxxxxx xxxxxxxx xxxx xxxx dnl Invalid VID and PCP discards out-of-range bits: # dl_vlan=256,dl_vlan_pcp=7 # 18: f1 -> 01 # 20: ff -> 07 002820fd xxxx xxxxxxxxxxxx xxxxxxxxxxxx f100 ff xx xxxx xx xx xxxx dnl xxxxxxxx xxxxxxxx xxxx xxxx # dl_type=0x1234 003820ef xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 1234 xx xx xxxx dnl xxxxxxxx xxxxxxxx xxxx xxxx # ip,nw_proto=5 003820cf xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 xx 05 xxxx dnl xxxxxxxx xxxxxxxx xxxx xxxx dnl Ignore nw_proto if not IP or ARP: # dl_type=0x1234,nw_proto=5 # normal: 3: cf -> ef # normal: 25: 05 -> 00 & ofp_util|INFO|normalization changed ofp_match, details: & ofp_util|INFO| pre: dl_type=0x1234,nw_proto=5 & ofp_util|INFO|post: dl_type=0x1234 003820cf xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 1234 xx 05 xxxx dnl xxxxxxxx xxxxxxxx xxxx xxxx # ip,nw_tos=252 001820ef xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 fc xx xxxx dnl xxxxxxxx xxxxxxxx xxxx xxxx dnl Ignore nw_tos if not IP: # arp,nw_tos=4 # 24: 05 -> 04 # normal: 1: 18 -> 38 # normal: 24: 04 -> 00 & ofp_util|INFO|normalization changed ofp_match, details: & ofp_util|INFO| pre: arp,nw_tos=4 & ofp_util|INFO|post: arp 001820ef xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0806 05 xx xxxx dnl xxxxxxxx xxxxxxxx xxxx xxxx dnl Low 2 bits of invalid TOS are forced to 0: # ip,nw_tos=48 # 24: 31 -> 30 001820ef xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 31 xx xxxx dnl xxxxxxxx xxxxxxxx xxxx xxxx # arp,arp_op=2 003820cf xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0806 xx 02 xxxx dnl xxxxxxxx xxxxxxxx xxxx xxxx # ip,nw_src=192.168.128.85 003800ef xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 xx xx xxxx dnl c0a88055 xxxxxxxx xxxx xxxx # ip,nw_src=192.168.128.0/24 # 31: 55 -> 00 003808ef xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 xx xx xxxx dnl c0a88055 xxxxxxxx xxxx xxxx # ip,nw_dst=192.168.128.85 003020ef xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 xx xx xxxx dnl xxxxxxxx c0a88055 xxxx xxxx # ip,nw_dst=192.168.128.0/24 # 35: 55 -> 00 003220ef xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 xx xx xxxx dnl xxxxxxxx c0a88055 xxxx xxxx # arp,arp_spa=192.168.128.85 003800ef xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0806 xx xx xxxx dnl c0a88055 xxxxxxxx xxxx xxxx # arp,arp_spa=192.168.128.0/24 # 31: 55 -> 00 003808ef xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0806 xx xx xxxx dnl c0a88055 xxxxxxxx xxxx xxxx # arp,arp_tpa=192.168.128.85 003020ef xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0806 xx xx xxxx dnl xxxxxxxx c0a88055 xxxx xxxx # arp,arp_tpa=192.168.128.0/24 # 35: 55 -> 00 003220ef xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0806 xx xx xxxx dnl xxxxxxxx c0a88055 xxxx xxxx dnl Ignore nw_src if not IP or ARP: # dl_type=0x1234,nw_src=192.168.128.0/24 # 31: 55 -> 00 # normal: 2: 08 -> 20 # normal: 28: c0 -> 00 # normal: 29: a8 -> 00 # normal: 30: 80 -> 00 & ofp_util|INFO|normalization changed ofp_match, details: & ofp_util|INFO| pre: dl_type=0x1234,nw_src=192.168.128.0/24 & ofp_util|INFO|post: dl_type=0x1234 003808ef xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 1234 xx xx xxxx dnl c0a88055 xxxxxxxx xxxx xxxx dnl Ignore nw_dst if not IP or ARP: # dl_type=0x1234,nw_dst=192.168.128.0/24 # 35: 55 -> 00 # normal: 1: 32 -> 38 # normal: 32: c0 -> 00 # normal: 33: a8 -> 00 # normal: 34: 80 -> 00 & ofp_util|INFO|normalization changed ofp_match, details: & ofp_util|INFO| pre: dl_type=0x1234,nw_dst=192.168.128.0/24 & ofp_util|INFO|post: dl_type=0x1234 003220ef xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 1234 xx xx xxxx dnl xxxxxxxx c0a88055 xxxx xxxx # tcp,tp_src=443 0038208f xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 xx 06 xxxx dnl xxxxxxxx xxxxxxxx 01bb xxxx # tcp,tp_dst=443 0038204f xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 xx 06 xxxx dnl xxxxxxxx xxxxxxxx xxxx 01bb # udp,tp_src=443 0038208f xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 xx 11 xxxx dnl xxxxxxxx xxxxxxxx 01bb xxxx # udp,tp_dst=443 0038204f xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 xx 11 xxxx dnl xxxxxxxx xxxxxxxx xxxx 01bb # sctp,tp_src=443 0038208f xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 xx 84 xxxx dnl xxxxxxxx xxxxxxxx 01bb xxxx # sctp,tp_dst=443 0038204f xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 xx 84 xxxx dnl xxxxxxxx xxxxxxxx xxxx 01bb # icmp,icmp_type=5 0038208f xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 xx 01 xxxx dnl xxxxxxxx xxxxxxxx 0005 xxxx # icmp,icmp_code=8 0038204f xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 xx 01 xxxx dnl xxxxxxxx xxxxxxxx xxxx 0008 dnl Ignore tp_src if not TCP/UDP/SCTP: # ip,nw_proto=21,tp_src=443 # normal: 3: 8f -> cf # normal: 36: 01 -> 00 # normal: 37: bb -> 00 & ofp_util|INFO|normalization changed ofp_match, details: & ofp_util|INFO| pre: ip,nw_proto=21,tp_src=443 & ofp_util|INFO|post: ip,nw_proto=21 0038208f xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 xx 15 xxxx dnl xxxxxxxx xxxxxxxx 01bb xxxx dnl Ignore tp_dst if not TCP/UDP/SCTP: # ip,nw_proto=21,tp_dst=443 # normal: 3: 4f -> cf # normal: 38: 01 -> 00 # normal: 39: bb -> 00 dnl The normalization details are suppressed here due to rate-limiting. 0038204f xxxx xxxxxxxxxxxx xxxxxxxxxxxx xxxx xx xx 0800 xx 15 xxxx dnl xxxxxxxx xxxxxxxx xxxx 01bb ]) sed '/^[[#&]]/d' < test-data > input.txt sed -n 's/^# //p; /^$/p' < test-data > expout sed -n 's/^& //p' < test-data > experr AT_CAPTURE_FILE([input.txt]) AT_CAPTURE_FILE([expout]) AT_CAPTURE_FILE([experr]) AT_CHECK( [ovs-ofctl '-vPATTERN:console:%c|%p|%m' parse-ofp10-match < input.txt], [0], [expout], [experr]) AT_CLEANUP AT_SETUP([ovs-ofctl parse-ofp11-match]) AT_KEYWORDS([OF1.1]) AT_DATA([test-data], [dnl # in_port=LOCAL 0000 0058 fffffffe 000003fe dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # bad ofp11_match: OFPBMC_BAD_VALUE & ofp_util|WARN|port 305419896 is outside the supported range 0 through 65279 or 0xffffff00 through 0xffffffff 0000 0058 12345678 000003fe dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # dl_src=00:01:02:03:04:05 0000 0058 00000000 000003ff dnl 000102030405000000000000 000000000000ffffffffffff dnl 0000 00 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # dl_src=55:55:55:55:55:55/55:55:55:55:55:55 0000 0058 00000000 000003ff dnl 555555555555aaaaaaaaaaaa 000000000000ffffffffffff dnl 0000 00 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # dl_dst=00:01:02:03:04:05 0000 0058 00000000 000003ff dnl 000000000000ffffffffffff 000102030405000000000000 dnl 0000 00 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # dl_dst=01:00:00:00:00:00/01:00:00:00:00:00 0000 0058 00000000 000003ff dnl 000000000000ffffffffffff 010000000000feffffffffff dnl 0000 00 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # dl_dst=00:01:02:03:04:05/fe:ff:ff:ff:ff:ff 0000 0058 00000000 000003ff dnl 000000000000ffffffffffff 000102030405010000000000 dnl 0000 00 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # dl_dst=55:55:55:55:55:55/55:55:55:55:55:55 0000 0058 00000000 000003ff dnl 000000000000ffffffffffff 555555555555aaaaaaaaaaaa dnl 0000 00 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff dnl dl_vlan_pcp is ignored if dl_vlan is wildcarded, which causes the dnl the wildcard bit and the dl_vlan_pcp to be dropped for output: # in_port=1 # 11: fa -> fe # 38: 03 -> 00 0000 0058 00000001 000003fa dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 03 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # dl_vlan=291 0000 0058 00000000 000003fd dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0123 00 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff dnl OFPVID_NONE: # vlan_tci=0x0000 0000 0058 00000000 000003fd dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl ffff 00 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff dnl OFPVID_NONE ignores dl_vlan_pcp even if not wildcarded, which causes dnl the wildcard bit and the dl_vlan_pcp to be dropped for output: # vlan_tci=0x0000 # 11: f9 -> fd # 38: 05 -> 00 0000 0058 00000000 000003f9 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl ffff 05 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # vlan_tci=0x1000/0x1000 0000 0058 00000000 000003fd dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl fffe 00 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff dnl Try invalid VID: # bad ofp11_match: OFPBMC_BAD_VALUE 0000 0058 00000000 000003fd dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 1234 00 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # dl_vlan_pcp=4 0000 0058 00000000 000003f9 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl fffe 04 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # dl_vlan=10,dl_vlan_pcp=6 0000 0058 00000000 000003f9 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 000a 06 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # dl_type=0x1234 0000 0058 00000000 000003f7 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 1234 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # ip,nw_tos=252 0000 0058 00000000 000003e7 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 fc 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff dnl Try invalid TOS: # bad ofp11_match: OFPBMC_BAD_VALUE 0000 0058 00000000 000003e7 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 01 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # ip,nw_proto=5 0000 0058 00000000 000003d7 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 05 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # arp,arp_op=2 0000 0058 00000000 000003d7 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0806 00 02 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # ip,nw_src=192.168.128.0/24 0000 0058 00000000 000003f7 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 00 c0a88000000000ff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # ip,nw_src=128.160.128.0/165.165.165.165 # 44: c0 -> 80 # 45: a8 -> a0 0000 0058 00000000 000003f7 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 00 c0a880005a5a5a5a 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # ip,nw_dst=192.168.128.0/24 0000 0058 00000000 000003f7 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 00 00000000ffffffff c0a88000000000ff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # ip,nw_dst=128.160.128.0/165.165.165.165 # 52: c0 -> 80 # 53: a8 -> a0 0000 0058 00000000 000003f7 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 00 00000000ffffffff c0a880005a5a5a5a 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # arp,arp_spa=192.168.128.0/24 0000 0058 00000000 000003f7 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0806 00 00 c0a88000000000ff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # arp,arp_tpa=192.168.128.0/24 0000 0058 00000000 000003f7 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0806 00 00 00000000ffffffff c0a88000000000ff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # tcp,tp_src=443 0000 0058 00000000 00000397 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 06 00000000ffffffff 00000000ffffffff 01bb 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # tcp,tp_dst=443 0000 0058 00000000 00000357 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 06 00000000ffffffff 00000000ffffffff 0000 01bb dnl 00000000 00 000000 0000000000000000ffffffffffffffff # udp,tp_src=443 0000 0058 00000000 00000397 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 11 00000000ffffffff 00000000ffffffff 01bb 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # icmp,icmp_type=5 0000 0058 00000000 00000397 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 01 00000000ffffffff 00000000ffffffff 0005 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # icmp,icmp_code=8 0000 0058 00000000 00000357 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 01 00000000ffffffff 00000000ffffffff 0000 0008 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # udp,tp_src=443 0000 0058 00000000 00000397 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 11 00000000ffffffff 00000000ffffffff 01bb 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # udp,tp_dst=443 0000 0058 00000000 00000357 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 11 00000000ffffffff 00000000ffffffff 0000 01bb dnl 00000000 00 000000 0000000000000000ffffffffffffffff # sctp 0000 0058 00000000 000003d7 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 84 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # sctp,tp_src=443 0000 0058 00000000 00000397 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 84 00000000ffffffff 00000000ffffffff 01bb 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff # sctp,tp_dst=443 0000 0058 00000000 00000357 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 84 00000000ffffffff 00000000ffffffff 0000 01bb dnl 00000000 00 000000 0000000000000000ffffffffffffffff dnl Ignore tp_src if not TCP/UDP/SCTP: # ip,nw_proto=21 # 11: 97 -> d7 # 60: 01 -> 00 # 61: bb -> 00 0000 0058 00000000 00000397 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 15 00000000ffffffff 00000000ffffffff 01bb 0000 dnl 00000000 00 000000 0000000000000000ffffffffffffffff dnl Ignore tp_dst if not TCP/UDP/SCTP: # ip,nw_proto=22 # 11: 57 -> d7 # 62: 01 -> 00 # 63: bb -> 00 0000 0058 00000000 00000357 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0800 00 16 00000000ffffffff 00000000ffffffff 0000 01bb dnl 00000000 00 000000 0000000000000000ffffffffffffffff dnl mpls_label not yet supported: # bad ofp11_match: OFPBMC_BAD_TAG 0000 0058 00000000 000002f7 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 8847 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 12345678 00 000000 0000000000000000ffffffffffffffff dnl mpls_tc not yet supported: # bad ofp11_match: OFPBMC_BAD_TAG 0000 0058 00000000 000001f7 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 8848 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 5a 000000 0000000000000000ffffffffffffffff dnl mpls_label and mpls_tc must be ignored if dl_type is not MPLS: # dl_type=0x1234 # 10: 00 -> 03 # 64: 12 -> 00 # 65: 34 -> 00 # 66: 56 -> 00 # 67: 78 -> 00 # 68: 5a -> 00 0000 0058 00000000 000000f7 dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 1234 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 12345678 5a 000000 0000000000000000ffffffffffffffff dnl metadata match: # metadata=0x1234567890abcdef 0000 0058 00000000 000003ff dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 1234567890abcdef0000000000000000 dnl metadata match: # metadata=0x5555555555555555/0x5555555555555555 0000 0058 00000000 000003ff dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 5555555555555555aaaaaaaaaaaaaaaa dnl metadata match: # metadata=0x1234000090ab0000/0xffff0000ffff0000 # 74: 56 -> 00 # 75: 78 -> 00 # 78: cd -> 00 # 79: ef -> 00 0000 0058 00000000 000003ff dnl 000000000000ffffffffffff 000000000000ffffffffffff dnl 0000 00 00 0000 00 00 00000000ffffffff 00000000ffffffff 0000 0000 dnl 00000000 00 000000 1234567890abcdef0000ffff0000ffff ]) sed '/^[[#&]]/d' < test-data > input.txt sed -n 's/^# //p; /^$/p' < test-data > expout sed -n 's/^& //p' < test-data > experr AT_CAPTURE_FILE([input.txt]) AT_CAPTURE_FILE([expout]) AT_CAPTURE_FILE([experr]) AT_CHECK( [ovs-ofctl '-vPATTERN:console:%c|%p|%m' parse-ofp11-match < input.txt], [0], [expout], [experr]) AT_CLEANUP AT_SETUP([ovs-ofctl parse-nx-match loose]) AT_KEYWORDS([nx-match]) AT_DATA([nx-match.txt], [dnl NXM_OF_IN_PORT(0001), 01020304(1111/2222), NXM_OF_ETH_TYPE(0800) ]) AT_CHECK([ovs-ofctl --strict parse-nx-match < nx-match.txt], [0], [dnl nx_pull_match() returned error OFPBMC_BAD_FIELD ]) AT_CHECK([ovs-ofctl parse-nx-match < nx-match.txt], [0], [dnl NXM_OF_IN_PORT(0001), NXM_OF_ETH_TYPE(0800) ]) AT_CLEANUP AT_SETUP([ovs-ofctl parse-oxm]) AT_KEYWORDS([oxm]) AT_DATA([oxm.txt], [dnl # in port OXM_OF_IN_PORT(00000000) OXM_OF_IN_PORT(fffffffe) # metadata OXM_OF_METADATA(5a5a5a5a5a5a5a5a) OXM_OF_METADATA_W(0000000000000000/00000000ffffffff) OXM_OF_METADATA_W(1234567890abcdef/ffff0000ffff0000) OXM_OF_METADATA_W(1234567890abcdef/ffffffffffffffff) OXM_OF_METADATA_W(1234567890abcdef/0000000000000000) # eth dst OXM_OF_ETH_DST(0002e30f80a4) OXM_OF_ETH_DST_W(010000000000/010000000000) OXM_OF_ETH_DST_W(000000000000/010000000000) OXM_OF_ETH_DST_W(ffffffffffff/010000000000) OXM_OF_ETH_DST_W(0002e30f80a4/ffffffffffff) OXM_OF_ETH_DST_W(0002e30f80a4/000000000000) OXM_OF_ETH_DST_W(0002e30f80a4/feffffffffff) # eth src OXM_OF_ETH_SRC(020898456ddb) # eth type OXM_OF_ETH_TYPE(0800) OXM_OF_ETH_TYPE(0800) OXM_OF_IN_PORT(00000012) # vlan OXM_OF_VLAN_VID(1009) OXM_OF_VLAN_VID(1009) # Duplicate Field OXM_OF_VLAN_VID(f009) # Bad Value OXM_OF_VLAN_PCP(00) # Bad Pre-Requisite OXM_OF_VLAN_VID(0000) # Packets without 802.1Q header or with VID=0 OXM_OF_VLAN_VID(1123) # Packets with VID=123, any PCP OXM_OF_VLAN_VID(1123) OXM_OF_VLAN_PCP(01) # Packets with VID=123, PCP=1. OXM_OF_VLAN_VID(0123) # Does not make sense (but supported anyway) OXM_OF_VLAN_VID_W(0123/0123) # Does not make sense (but supported anyway) OXM_OF_VLAN_VID_W(1123/0123) # Does not make sense (but supported anyway) OXM_OF_VLAN_VID_W(0123/1123) # Does not make sense (but supported anyway) OXM_OF_VLAN_VID(0123) OXM_OF_VLAN_PCP(01) #Bad Pre-Requisite OXM_OF_VLAN_VID_W(1123/1fff) # Packets with VID=123, any PCP. OXM_OF_VLAN_VID_W(1123/ffff) # Packets with VID=123, any PCP. OXM_OF_VLAN_VID_W(1123/0000) # Packets with or without 802.1Q header OXM_OF_VLAN_VID_W(1123/1f0f), # Packets with # VID=123 (masked) OXM_OF_VLAN_VID_W(1123/1f0f), OXM_OF_VLAN_PCP(01) # Packets with VID=123 (masked), any PCP. OXM_OF_VLAN_VID_W(1000/1000) # Packets with any VID, any PCP OXM_OF_VLAN_VID_W(1000/1000), OXM_OF_VLAN_PCP(01) # Packets with any VID, PCP=1. # IP TOS OXM_OF_ETH_TYPE(0800) OXM_OF_IP_DSCP(f0) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_DSCP(41) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_DSCP(3f) OXM_OF_IP_DSCP(f0) # IP ECN OXM_OF_ETH_TYPE(0800) OXM_OF_IP_ECN(03) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_ECN(06) OXM_OF_IP_ECN(03) # IP protocol OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(01) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(05) OXM_OF_IP_PROTO(05) # IP source OXM_OF_ETH_TYPE(0800) OXM_OF_IPV4_SRC(ac100014) OXM_OF_ETH_TYPE(0800) OXM_OF_IPV4_SRC_W(C0a80000/FFFF0000) OXM_OF_ETH_TYPE(0800) OXM_OF_IPV4_SRC_W(C0a80000/FFFFFFFF) OXM_OF_ETH_TYPE(0800) OXM_OF_IPV4_SRC_W(C0a80000/00000000) OXM_OF_ETH_TYPE(0806) OXM_OF_IPV4_SRC(ac100014) OXM_OF_IPV4_SRC_W(C0D80000/FFFF0000) # IP destination OXM_OF_ETH_TYPE(0800) OXM_OF_IPV4_DST(ac100014) OXM_OF_ETH_TYPE(0800) OXM_OF_IPV4_DST_W(C0a88012/FFFF0000) OXM_OF_ETH_TYPE(0800) OXM_OF_IPV4_DST_W(C0a88012/FFFFFFFF) OXM_OF_ETH_TYPE(0800) OXM_OF_IPV4_DST_W(C0a88012/00000000) OXM_OF_IPV4_DST(ac100014) OXM_OF_ETH_TYPE(0806) OXM_OF_IPV4_DST_W(C0D80000/FFFF0000) # TCP source port OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(06) OXM_OF_TCP_SRC(4231) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(06) OXM_OF_TCP_SRC_W(5050/F0F0) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(06) OXM_OF_TCP_SRC_W(5050/FFFF) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(06) OXM_OF_TCP_SRC_W(5050/0000) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(07) OXM_OF_TCP_SRC(4231) # TCP destination port OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(06) OXM_OF_TCP_DST(4231) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(06) OXM_OF_TCP_DST_W(FDE0/FFF0) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(06) OXM_OF_TCP_DST_W(FDE0/FFFF) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(06) OXM_OF_TCP_DST_W(FDE0/0000) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(07) OXM_OF_TCP_DST(4231) # UDP source port OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(11) OXM_OF_UDP_SRC(8732) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(11) OXM_OF_UDP_SRC_W(0132/01FF) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(11) OXM_OF_UDP_SRC_W(0132/FFFF) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(11) OXM_OF_UDP_SRC_W(0132/0000) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(06) OXM_OF_UDP_SRC(7823) # UDP destination port OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(11) OXM_OF_UDP_DST(1782) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(11) OXM_OF_UDP_DST_W(5005/F00F) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(11) OXM_OF_UDP_DST_W(5005/FFFF) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(11) OXM_OF_UDP_DST_W(5005/0000) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(02) OXM_OF_UDP_DST(1293) # SCTP source port OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(84) OXM_OF_SCTP_SRC(8732) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(84) OXM_OF_SCTP_SRC_W(0132/01FF) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(84) OXM_OF_SCTP_SRC_W(0132/FFFF) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(84) OXM_OF_SCTP_SRC_W(0132/0000) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(06) OXM_OF_SCTP_SRC(7823) # SCTP destination port OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(84) OXM_OF_SCTP_DST(1782) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(84) OXM_OF_SCTP_DST_W(5005/F00F) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(84) OXM_OF_SCTP_DST_W(5005/FFFF) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(84) OXM_OF_SCTP_DST_W(5005/0000) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(02) OXM_OF_SCTP_DST(1293) # ICMP type OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(01) OXM_OF_ICMPV4_TYPE(12) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(00) OXM_OF_ICMPV4_TYPE(10) # ICMP code OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(01) OXM_OF_ICMPV4_CODE(12) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(00) OXM_OF_ICMPV4_CODE(10) OXM_OF_ETH_TYPE(0800) OXM_OF_ICMPV4_CODE(10) OXM_OF_ICMPV4_CODE(00) # ARP opcode OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_OP(0001) OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_OP(1111) OXM_OF_ETH_TYPE(0000) OXM_OF_ARP_OP(0001) OXM_OF_ARP_OP(0001) OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_OP(0001) OXM_OF_ARP_OP(0001) # ARP source protocol address OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_SPA(ac100014) OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_SPA_W(C0a81234/FFFFFF00) OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_SPA_W(C0a81234/FFFFFFFF) OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_SPA_W(C0a81234/00000000) OXM_OF_ETH_TYPE(0800) OXM_OF_ARP_SPA(ac100014) OXM_OF_ARP_SPA_W(C0D8fedc/FFFF0000) # ARP destination protocol address OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_TPA(ac100014) OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_TPA_W(C0a812fe/FFFFFF00) OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_TPA_W(C0a812fe/FFFFFFFF) OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_TPA_W(C0a812fe/00000000) OXM_OF_ETH_TYPE(0800) OXM_OF_ARP_TPA(ac100014) OXM_OF_ARP_TPA_W(C0D80000/FFFF0000) # ARP source hardware address OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_SHA(0002e30f80a4) OXM_OF_ETH_TYPE(0800) OXM_OF_ARP_SHA(0002e30f80a4) OXM_OF_ARP_SHA(0002e30f80a4) OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_SHA_W(0002e30f80a4/ffffffffffff) OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_SHA_W(0002e30f80a4/000000000000) OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_SHA_W(0002e30f80a4/00000000000f) # ARP destination hardware address OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_THA(0002e30f80a4) OXM_OF_ETH_TYPE(0800) OXM_OF_ARP_THA(0002e30f80a4) OXM_OF_ARP_THA(0002e30f80a4) OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_THA_W(0002e30f80a4/ffffffffffff) OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_THA_W(0002e30f80a4/000000000000) OXM_OF_ETH_TYPE(0806) OXM_OF_ARP_THA_W(0002e30f80a4/00000000000f) # IPv6 source OXM_OF_ETH_TYPE(86dd) OXM_OF_IPV6_SRC(20010db83c4d00010002000300040005) OXM_OF_ETH_TYPE(0800) OXM_OF_IPV6_SRC(20010db83c4d00010002000300040005) OXM_OF_ETH_TYPE(86dd) OXM_OF_IPV6_SRC_W(20010db83c4d00010000000000000000/ffffffffffffffff0000000000000000) OXM_OF_ETH_TYPE(86dd) OXM_OF_IPV6_SRC_W(20010db83c4d00010000000000000000/ffffffffffffffffffffffffffffffff) OXM_OF_ETH_TYPE(86dd) OXM_OF_IPV6_SRC_W(20010db83c4d00010000000000000000/00000000000000000000000000000000) OXM_OF_ETH_TYPE(0800) OXM_OF_IPV6_SRC_W(20010db83c4d00010000000000000000/ffffffffffffffff0000000000000000) # IPv6 destination OXM_OF_ETH_TYPE(86dd) OXM_OF_IPV6_DST(20010db83c4d00010002000300040005) OXM_OF_ETH_TYPE(0800) OXM_OF_IPV6_DST(20010db83c4d00010002000300040005) OXM_OF_ETH_TYPE(86dd) OXM_OF_IPV6_DST_W(20010db83c4d00010000000000000000/ffffffffffffffff0000000000000000) OXM_OF_ETH_TYPE(86dd) OXM_OF_IPV6_DST_W(20010db83c4d00010000000000000000/ffffffffffffffffffffffffffffffff) OXM_OF_ETH_TYPE(86dd) OXM_OF_IPV6_DST_W(20010db83c4d00010000000000000000/00000000000000000000000000000000) OXM_OF_ETH_TYPE(0800) OXM_OF_IPV6_DST_W(20010db83c4d00010000000000000000/ffffffffffffffff0000000000000000) # IPv6 Flow Label OXM_OF_ETH_TYPE(86dd) OXM_OF_IPV6_FLABEL(1000000f) OXM_OF_IPV6_FLABEL(0000000f) OXM_OF_ETH_TYPE(86dd) OXM_OF_IPV6_FLABEL(0000000f) OXM_OF_ETH_TYPE(86dd) OXM_OF_IPV6_FLABEL_W(0000000f/0000000f) OXM_OF_ETH_TYPE(86dd) OXM_OF_IPV6_FLABEL_W(0000000f/000fffff) OXM_OF_ETH_TYPE(86dd) OXM_OF_IPV6_FLABEL_W(0000000f/000ffff0) OXM_OF_ETH_TYPE(86dd) OXM_OF_IPV6_FLABEL_W(0000000f/100fffff) OXM_OF_ETH_TYPE(86dd) OXM_OF_IPV6_FLABEL_W(0000000f/ffffffff) OXM_OF_ETH_TYPE(86dd) OXM_OF_IPV6_FLABEL_W(0000000f/00000000) # ND source hardware address OXM_OF_ETH_TYPE(86dd) OXM_OF_IP_PROTO(3a) OXM_OF_ICMPV6_TYPE(87) OXM_OF_IPV6_ND_TARGET(20010db83c4d00010002000300040005) OXM_OF_IPV6_ND_SLL(0002e30f80a4) OXM_OF_ETH_TYPE(86dd) OXM_OF_IP_PROTO(3a) OXM_OF_ICMPV6_TYPE(88) OXM_OF_IPV6_ND_TARGET(20010db83c4d00010002000300040005) OXM_OF_IPV6_ND_SLL(0002e30f80a4) OXM_OF_ETH_TYPE(86dd) OXM_OF_IP_PROTO(3b) OXM_OF_ICMPV6_TYPE(87) OXM_OF_IPV6_ND_TARGET(20010db83c4d00010002000300040005) OXM_OF_IPV6_ND_SLL(0002e30f80a4) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(3a) OXM_OF_ICMPV6_TYPE(87) OXM_OF_IPV6_ND_TARGET(20010db83c4d00010002000300040005) OXM_OF_IPV6_ND_SLL(0002e30f80a4) # ND destination hardware address OXM_OF_ETH_TYPE(86dd) OXM_OF_IP_PROTO(3a) OXM_OF_ICMPV6_TYPE(88) OXM_OF_IPV6_ND_TARGET(20010db83c4d00010002000300040005) OXM_OF_IPV6_ND_TLL(0002e30f80a4) OXM_OF_ETH_TYPE(86dd) OXM_OF_IP_PROTO(3a) OXM_OF_ICMPV6_TYPE(87) OXM_OF_IPV6_ND_TARGET(20010db83c4d00010002000300040005) OXM_OF_IPV6_ND_TLL(0002e30f80a4) OXM_OF_ETH_TYPE(86dd) OXM_OF_IP_PROTO(3b) OXM_OF_ICMPV6_TYPE(87) OXM_OF_IPV6_ND_TARGET(20010db83c4d00010002000300040005) OXM_OF_IPV6_ND_TLL(0002e30f80a4) OXM_OF_ETH_TYPE(0800) OXM_OF_IP_PROTO(3a) OXM_OF_ICMPV6_TYPE(88) OXM_OF_IPV6_ND_TARGET(20010db83c4d00010002000300040005) OXM_OF_IPV6_ND_TLL(0002e30f80a4) # Invalid field number. 01020304(1111/2222) ]) AT_CHECK([ovs-ofctl '-vPATTERN:console:%c|%p|%m' --strict parse-oxm < oxm.txt], [0], [dnl # in port OXM_OF_IN_PORT(00000000) OXM_OF_IN_PORT(fffffffe) # metadata OXM_OF_METADATA(5a5a5a5a5a5a5a5a) OXM_OF_METADATA_W(0000000000000000/00000000ffffffff) OXM_OF_METADATA_W(1234000090ab0000/ffff0000ffff0000) OXM_OF_METADATA(1234567890abcdef) # eth dst OXM_OF_ETH_DST(0002e30f80a4) OXM_OF_ETH_DST_W(010000000000/010000000000) OXM_OF_ETH_DST_W(000000000000/010000000000) OXM_OF_ETH_DST_W(010000000000/010000000000) OXM_OF_ETH_DST(0002e30f80a4) OXM_OF_ETH_DST_W(0002e30f80a4/feffffffffff) # eth src OXM_OF_ETH_SRC(020898456ddb) # eth type OXM_OF_ETH_TYPE(0800) OXM_OF_IN_PORT(00000012), OXM_OF_ETH_TYPE(0800) # vlan nx_pull_match() returned error OFPBMC_DUP_FIELD nx_pull_match() returned error OFPBMC_BAD_VALUE nx_pull_match() returned error OFPBMC_BAD_PREREQ OXM_OF_VLAN_VID(0000) OXM_OF_VLAN_VID(1123) OXM_OF_VLAN_VID(1123), OXM_OF_VLAN_PCP(01) OXM_OF_VLAN_VID(0123) OXM_OF_VLAN_VID_W(0123/0123) OXM_OF_VLAN_VID_W(0123/0123) OXM_OF_VLAN_VID_W(0123/1123) nx_pull_match() returned error OFPBMC_BAD_PREREQ OXM_OF_VLAN_VID(1123) OXM_OF_VLAN_VID(1123) OXM_OF_VLAN_VID_W(1103/1f0f) OXM_OF_VLAN_VID_W(1103/1f0f), OXM_OF_VLAN_PCP(01) OXM_OF_VLAN_VID_W(1000/1000) OXM_OF_VLAN_VID_W(1000/1000), OXM_OF_VLAN_PCP(01) # IP TOS nx_pull_match() returned error OFPBMC_BAD_VALUE nx_pull_match() returned error OFPBMC_BAD_VALUE OXM_OF_ETH_TYPE(0800), OXM_OF_IP_DSCP(3f) nx_pull_match() returned error OFPBMC_BAD_PREREQ # IP ECN OXM_OF_ETH_TYPE(0800), OXM_OF_IP_ECN(03) nx_pull_match() returned error OFPBMC_BAD_VALUE nx_pull_match() returned error OFPBMC_BAD_PREREQ # IP protocol OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(01) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(05) nx_pull_match() returned error OFPBMC_BAD_PREREQ # IP source OXM_OF_ETH_TYPE(0800), OXM_OF_IPV4_SRC(ac100014) OXM_OF_ETH_TYPE(0800), OXM_OF_IPV4_SRC_W(c0a80000/ffff0000) OXM_OF_ETH_TYPE(0800), OXM_OF_IPV4_SRC(c0a80000) OXM_OF_ETH_TYPE(0800) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # IP destination OXM_OF_ETH_TYPE(0800), OXM_OF_IPV4_DST(ac100014) OXM_OF_ETH_TYPE(0800), OXM_OF_IPV4_DST_W(c0a80000/ffff0000) OXM_OF_ETH_TYPE(0800), OXM_OF_IPV4_DST(c0a88012) OXM_OF_ETH_TYPE(0800) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # TCP source port OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(06), OXM_OF_TCP_SRC(4231) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(06), OXM_OF_TCP_SRC_W(5050/f0f0) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(06), OXM_OF_TCP_SRC(5050) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(06) nx_pull_match() returned error OFPBMC_BAD_PREREQ # TCP destination port OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(06), OXM_OF_TCP_DST(4231) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(06), OXM_OF_TCP_DST_W(fde0/fff0) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(06), OXM_OF_TCP_DST(fde0) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(06) nx_pull_match() returned error OFPBMC_BAD_PREREQ # UDP source port OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(11), OXM_OF_UDP_SRC(8732) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(11), OXM_OF_UDP_SRC_W(0132/01ff) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(11), OXM_OF_UDP_SRC(0132) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(11) nx_pull_match() returned error OFPBMC_BAD_PREREQ # UDP destination port OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(11), OXM_OF_UDP_DST(1782) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(11), OXM_OF_UDP_DST_W(5005/f00f) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(11), OXM_OF_UDP_DST(5005) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(11) nx_pull_match() returned error OFPBMC_BAD_PREREQ # SCTP source port OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(84), OXM_OF_SCTP_SRC(8732) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(84), OXM_OF_SCTP_SRC_W(0132/01ff) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(84), OXM_OF_SCTP_SRC(0132) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(84) nx_pull_match() returned error OFPBMC_BAD_PREREQ # SCTP destination port OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(84), OXM_OF_SCTP_DST(1782) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(84), OXM_OF_SCTP_DST_W(5005/f00f) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(84), OXM_OF_SCTP_DST(5005) OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(84) nx_pull_match() returned error OFPBMC_BAD_PREREQ # ICMP type OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(01), OXM_OF_ICMPV4_TYPE(12) nx_pull_match() returned error OFPBMC_BAD_PREREQ # ICMP code OXM_OF_ETH_TYPE(0800), OXM_OF_IP_PROTO(01), OXM_OF_ICMPV4_CODE(12) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # ARP opcode OXM_OF_ETH_TYPE(0806), OXM_OF_ARP_OP(0001) nx_pull_match() returned error OFPBMC_BAD_VALUE nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_DUP_FIELD # ARP source protocol address OXM_OF_ETH_TYPE(0806), OXM_OF_ARP_SPA(ac100014) OXM_OF_ETH_TYPE(0806), OXM_OF_ARP_SPA_W(c0a81200/ffffff00) OXM_OF_ETH_TYPE(0806), OXM_OF_ARP_SPA(c0a81234) OXM_OF_ETH_TYPE(0806) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # ARP destination protocol address OXM_OF_ETH_TYPE(0806), OXM_OF_ARP_TPA(ac100014) OXM_OF_ETH_TYPE(0806), OXM_OF_ARP_TPA_W(c0a81200/ffffff00) OXM_OF_ETH_TYPE(0806), OXM_OF_ARP_TPA(c0a812fe) OXM_OF_ETH_TYPE(0806) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # ARP source hardware address OXM_OF_ETH_TYPE(0806), OXM_OF_ARP_SHA(0002e30f80a4) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ OXM_OF_ETH_TYPE(0806), OXM_OF_ARP_SHA(0002e30f80a4) OXM_OF_ETH_TYPE(0806) OXM_OF_ETH_TYPE(0806), OXM_OF_ARP_SHA_W(000000000004/00000000000f) # ARP destination hardware address OXM_OF_ETH_TYPE(0806), OXM_OF_ARP_THA(0002e30f80a4) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ OXM_OF_ETH_TYPE(0806), OXM_OF_ARP_THA(0002e30f80a4) OXM_OF_ETH_TYPE(0806) OXM_OF_ETH_TYPE(0806), OXM_OF_ARP_THA_W(000000000004/00000000000f) # IPv6 source OXM_OF_ETH_TYPE(86dd), OXM_OF_IPV6_SRC(20010db83c4d00010002000300040005) nx_pull_match() returned error OFPBMC_BAD_PREREQ OXM_OF_ETH_TYPE(86dd), OXM_OF_IPV6_SRC_W(20010db83c4d00010000000000000000/ffffffffffffffff0000000000000000) OXM_OF_ETH_TYPE(86dd), OXM_OF_IPV6_SRC(20010db83c4d00010000000000000000) OXM_OF_ETH_TYPE(86dd) nx_pull_match() returned error OFPBMC_BAD_PREREQ # IPv6 destination OXM_OF_ETH_TYPE(86dd), OXM_OF_IPV6_DST(20010db83c4d00010002000300040005) nx_pull_match() returned error OFPBMC_BAD_PREREQ OXM_OF_ETH_TYPE(86dd), OXM_OF_IPV6_DST_W(20010db83c4d00010000000000000000/ffffffffffffffff0000000000000000) OXM_OF_ETH_TYPE(86dd), OXM_OF_IPV6_DST(20010db83c4d00010000000000000000) OXM_OF_ETH_TYPE(86dd) nx_pull_match() returned error OFPBMC_BAD_PREREQ # IPv6 Flow Label nx_pull_match() returned error OFPBMC_BAD_VALUE nx_pull_match() returned error OFPBMC_BAD_PREREQ OXM_OF_ETH_TYPE(86dd), OXM_OF_IPV6_FLABEL(0000000f) OXM_OF_ETH_TYPE(86dd), OXM_OF_IPV6_FLABEL_W(0000000f/0000000f) OXM_OF_ETH_TYPE(86dd), OXM_OF_IPV6_FLABEL(0000000f) OXM_OF_ETH_TYPE(86dd), OXM_OF_IPV6_FLABEL_W(00000000/000ffff0) OXM_OF_ETH_TYPE(86dd), OXM_OF_IPV6_FLABEL(0000000f) OXM_OF_ETH_TYPE(86dd), OXM_OF_IPV6_FLABEL(0000000f) OXM_OF_ETH_TYPE(86dd) # ND source hardware address OXM_OF_ETH_TYPE(86dd), OXM_OF_IP_PROTO(3a), OXM_OF_ICMPV6_TYPE(87), OXM_OF_IPV6_ND_TARGET(20010db83c4d00010002000300040005), OXM_OF_IPV6_ND_SLL(0002e30f80a4) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # ND destination hardware address OXM_OF_ETH_TYPE(86dd), OXM_OF_IP_PROTO(3a), OXM_OF_ICMPV6_TYPE(88), OXM_OF_IPV6_ND_TARGET(20010db83c4d00010002000300040005), OXM_OF_IPV6_ND_TLL(0002e30f80a4) nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ nx_pull_match() returned error OFPBMC_BAD_PREREQ # Invalid field number. nx_pull_match() returned error OFPBMC_BAD_FIELD ], [stderr]) # Check that at least the first warning made it. (It's rate-limited # so a variable number could show up, especially under valgrind etc.) AT_CHECK([grep 'has 1-bits in value' stderr | sed 1q], [0], [dnl nx_match|WARN|NXM/OXM entry OXM_OF_METADATA_W(1234567890abcdef/ffff0000ffff0000) has 1-bits in value for bits wildcarded by the mask. (Future versions of OVS may report this as an OpenFlow error.) ]) # Check that there wasn't any other stderr output. AT_CHECK([grep -v 'has 1-bits in value' stderr], [1]) AT_CLEANUP AT_SETUP([ovs-ofctl parse-oxm loose]) AT_KEYWORDS([oxm]) AT_DATA([oxm.txt], [dnl OXM_OF_IN_PORT(00000001), 01020304(1111/2222), OXM_OF_ETH_TYPE(0800) ]) AT_CHECK([ovs-ofctl --strict parse-oxm < oxm.txt], [0], [dnl nx_pull_match() returned error OFPBMC_BAD_FIELD ]) AT_CHECK([ovs-ofctl parse-oxm < oxm.txt], [0], [dnl OXM_OF_IN_PORT(00000001), OXM_OF_ETH_TYPE(0800) ]) AT_CLEANUP dnl Check all of the patterns mentioned in the "VLAN Matching" section dnl in the DESIGN file at top level. AT_SETUP([ovs-ofctl check-vlan]) AT_KEYWORDS([VLAN]) dnl [1] AT_CHECK([ovs-ofctl check-vlan 0000 0000], [0], [dnl -> 0000/0000 NXM: -> 0000/0000 OXM: -> 0000/0000,-- OF1.0: 0000/1,00/1 -> 0000/0000 OF1.1: 0000/1,00/1 -> 0000/0000 ]) dnl [2] AT_CHECK([ovs-ofctl check-vlan 0000 ffff], [0], [dnl vlan_tci=0x0000 -> 0000/ffff NXM: NXM_OF_VLAN_TCI(0000) -> 0000/ffff OXM: OXM_OF_VLAN_VID(0000) -> 0000/1fff,-- OF1.0: ffff/0,00/1 -> 0000/ffff OF1.1: ffff/0,00/1 -> 0000/ffff ]) dnl [3] AT_CHECK([ovs-ofctl check-vlan 1abc 1fff], [0], [dnl dl_vlan=2748 -> 1abc/1fff NXM: NXM_OF_VLAN_TCI_W(1abc/1fff) -> 1abc/1fff OXM: OXM_OF_VLAN_VID(1abc) -> 1abc/1fff,-- OF1.0: 0abc/0,00/1 -> 1abc/1fff OF1.1: 0abc/0,00/1 -> 1abc/1fff ]) dnl [4] AT_CHECK([ovs-ofctl check-vlan b000 f000], [0], [dnl dl_vlan_pcp=5 -> b000/f000 NXM: NXM_OF_VLAN_TCI_W(b000/f000) -> b000/f000 OXM: OXM_OF_VLAN_VID_W(1000/1000), OXM_OF_VLAN_PCP(05) -> 1000/1000,05 OF1.0: 0000/1,05/0 -> b000/f000 OF1.1: fffe/0,05/0 -> b000/f000 ]) dnl [5] AT_CHECK([ovs-ofctl check-vlan babc ffff], [0], [dnl dl_vlan=2748,dl_vlan_pcp=5 -> babc/ffff NXM: NXM_OF_VLAN_TCI(babc) -> babc/ffff OXM: OXM_OF_VLAN_VID(1abc), OXM_OF_VLAN_PCP(05) -> 1abc/1fff,05 OF1.0: 0abc/0,05/0 -> babc/ffff OF1.1: 0abc/0,05/0 -> babc/ffff ]) dnl [6] AT_CHECK([ovs-ofctl check-vlan 0000 0fff], [0], [dnl vlan_tci=0x0000/0x0fff -> 0000/0fff NXM: NXM_OF_VLAN_TCI_W(0000/0fff) -> 0000/0fff OXM: OXM_OF_VLAN_VID_W(0000/0fff) -> 0000/0fff,-- OF1.0: 0000/0,00/1 -> 1000/1fff OF1.1: 0000/0,00/1 -> 1000/1fff ]) dnl [7] AT_CHECK([ovs-ofctl check-vlan 0000 f000], [0], [dnl vlan_tci=0x0000/0xf000 -> 0000/f000 NXM: NXM_OF_VLAN_TCI_W(0000/f000) -> 0000/f000 OXM: OXM_OF_VLAN_VID_W(0000/1000) -> 0000/1000,-- OF1.0: ffff/0,00/1 -> 0000/ffff OF1.1: ffff/0,00/1 -> 0000/ffff ]) dnl [8] AT_CHECK([ovs-ofctl check-vlan 0000 efff], [0], [dnl vlan_tci=0x0000/0xefff -> 0000/efff NXM: NXM_OF_VLAN_TCI_W(0000/efff) -> 0000/efff OXM: OXM_OF_VLAN_VID_W(0000/0fff) -> 0000/0fff,-- OF1.0: 0000/0,00/0 -> 1000/ffff OF1.1: 0000/0,00/0 -> 1000/ffff ]) dnl [9] AT_CHECK([ovs-ofctl check-vlan 1001 1001], [0], [dnl vlan_tci=0x1001/0x1001 -> 1001/1001 NXM: NXM_OF_VLAN_TCI_W(1001/1001) -> 1001/1001 OXM: OXM_OF_VLAN_VID_W(1001/1001) -> 1001/1001,-- OF1.0: 0001/0,00/1 -> 1001/1fff OF1.1: 0001/0,00/1 -> 1001/1fff ]) dnl [10] AT_CHECK([ovs-ofctl check-vlan 3000 3000], [0], [dnl vlan_tci=0x3000/0x3000 -> 3000/3000 NXM: NXM_OF_VLAN_TCI_W(3000/3000) -> 3000/3000 OXM: OXM_OF_VLAN_VID_W(1000/1000), OXM_OF_VLAN_PCP(01) -> 1000/1000,01 OF1.0: 0000/1,01/0 -> 3000/f000 OF1.1: fffe/0,01/0 -> 3000/f000 ]) AT_CHECK AT_CLEANUP dnl Check that "-F openflow10" rejects a flow_mod with unsupported features, dnl such as tunnels and metadata. AT_SETUP([ovs-ofctl -F option and NXM features]) AT_CHECK([ovs-ofctl -F openflow10 add-flow dummy tun_id=123,actions=drop], [1], [], [ovs-ofctl: none of the usable flow formats (NXM,OXM) is among the allowed flow formats (OpenFlow10) ]) AT_CHECK([ovs-ofctl -F openflow10 add-flow dummy metadata=123,actions=drop], [1], [], [ovs-ofctl: none of the usable flow formats (NXM,OXM,OpenFlow11) is among the allowed flow formats (OpenFlow10) ]) AT_CLEANUP dnl Check that "-F nxm" really forces add-flow to use the NXM flow format. dnl (If it doesn't, then either the tun_id won't show up at all, or it will dnl additionally show up as the top 32 bits of the cookie.) This checks dnl for regression against bug #4566. AT_SETUP([ovs-ofctl -F option with flow_mods]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -F nxm add-flow br0 tun_id=0x12345678,actions=drop]) AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip], [0], [dnl NXST_FLOW reply: tun_id=0x12345678 actions=drop ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl Check that "-F openflow10" is really honored on dump-flows. dnl (If it isn't, then dump-flows will show the register match.) AT_SETUP([ovs-ofctl dump-flows honors -F option]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl add-flow br0 reg0=0x12345,actions=drop]) AT_CHECK([ovs-ofctl -F openflow10 dump-flows br0 | ofctl_strip], [0], [dnl OFPST_FLOW reply: actions=drop ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl Check that "-F openflow10" fails on dump-flows if the requested match dnl can't be represented in OpenFlow 1.0. AT_SETUP([ovs-ofctl dump-flows rejects bad -F option]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl -F openflow10 dump-flows unix:br0.mgmt reg0=0xabcdef], [1], [], [ovs-ofctl: none of the usable flow formats (NXM,OXM) is among the allowed flow formats (OpenFlow10) ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl Check that add-flow reports non-normalized flows (feature #5029). AT_SETUP([ovs-ofctl add-flow reports non-normalized flows]) OVS_VSWITCHD_START AT_CHECK([ovs-ofctl TESTABLE_LOG add-flow br0 nw_src=1.2.3.4,actions=5], [0], [], [dnl ofp_util|INFO|normalization changed ofp_match, details: ofp_util|INFO| pre: nw_src=1.2.3.4 ofp_util|INFO|post: @&t@ ]) OVS_VSWITCHD_STOP AT_CLEANUP dnl Check that --sort and --rsort works with dump-flows dnl Default field is 'priority'. Flow entries are displayed based dnl on field to sort. AT_SETUP([ovs-ofctl dump-flows with sorting]) OVS_VSWITCHD_START AT_KEYWORDS([sort]) AT_DATA([allflows.txt], [[ priority=4,in_port=23213 actions=output:42 priority=5,in_port=1029 actions=output:43 priority=7,in_port=1029 actions=output:43 priority=3,in_port=1028 actions=output:44 priority=1,in_port=1026 actions=output:45 priority=6,in_port=1027 actions=output:64 priority=2,in_port=1025 actions=output:47 priority=8,tcp,tp_src=5 actions=drop priority=9,tcp,tp_src=6 actions=drop ]]) AT_CHECK([ovs-ofctl add-flows br0 allflows.txt ], [0], [ignore]) AT_CHECK([ovs-ofctl --sort dump-flows br0 | ofctl_strip], [0], [dnl priority=1,in_port=1026 actions=output:45 priority=2,in_port=1025 actions=output:47 priority=3,in_port=1028 actions=output:44 priority=4,in_port=23213 actions=output:42 priority=5,in_port=1029 actions=output:43 priority=6,in_port=1027 actions=output:64 priority=7,in_port=1029 actions=output:43 priority=8,tcp,tp_src=5 actions=drop priority=9,tcp,tp_src=6 actions=drop ]) AT_CHECK([ovs-ofctl --rsort dump-flows br0 | ofctl_strip], [0], [dnl priority=9,tcp,tp_src=6 actions=drop priority=8,tcp,tp_src=5 actions=drop priority=7,in_port=1029 actions=output:43 priority=6,in_port=1027 actions=output:64 priority=5,in_port=1029 actions=output:43 priority=4,in_port=23213 actions=output:42 priority=3,in_port=1028 actions=output:44 priority=2,in_port=1025 actions=output:47 priority=1,in_port=1026 actions=output:45 ]) AT_CHECK([ovs-ofctl --sort=in_port dump-flows br0 | ofctl_strip], [0], [dnl priority=2,in_port=1025 actions=output:47 priority=1,in_port=1026 actions=output:45 priority=6,in_port=1027 actions=output:64 priority=3,in_port=1028 actions=output:44 priority=7,in_port=1029 actions=output:43 priority=5,in_port=1029 actions=output:43 priority=4,in_port=23213 actions=output:42 priority=9,tcp,tp_src=6 actions=drop priority=8,tcp,tp_src=5 actions=drop ]) AT_CHECK([ovs-ofctl --rsort=in_port dump-flows br0 | ofctl_strip], [0], [dnl priority=4,in_port=23213 actions=output:42 priority=7,in_port=1029 actions=output:43 priority=5,in_port=1029 actions=output:43 priority=3,in_port=1028 actions=output:44 priority=6,in_port=1027 actions=output:64 priority=1,in_port=1026 actions=output:45 priority=2,in_port=1025 actions=output:47 priority=9,tcp,tp_src=6 actions=drop priority=8,tcp,tp_src=5 actions=drop ]) AT_CHECK([ovs-ofctl --sort=tcp_src dump-flows br0 | ofctl_strip], [0], [dnl priority=8,tcp,tp_src=5 actions=drop priority=9,tcp,tp_src=6 actions=drop priority=7,in_port=1029 actions=output:43 priority=6,in_port=1027 actions=output:64 priority=5,in_port=1029 actions=output:43 priority=4,in_port=23213 actions=output:42 priority=3,in_port=1028 actions=output:44 priority=2,in_port=1025 actions=output:47 priority=1,in_port=1026 actions=output:45 ]) AT_CHECK( [ovs-ofctl --sort=in_port --sort=tcp_src dump-flows br0 | ofctl_strip], [0], [ priority=2,in_port=1025 actions=output:47 priority=1,in_port=1026 actions=output:45 priority=6,in_port=1027 actions=output:64 priority=3,in_port=1028 actions=output:44 priority=7,in_port=1029 actions=output:43 priority=5,in_port=1029 actions=output:43 priority=4,in_port=23213 actions=output:42 priority=8,tcp,tp_src=5 actions=drop priority=9,tcp,tp_src=6 actions=drop ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ovs-ofctl diff-flows]) OVS_VSWITCHD_START # Add tons of flows to br0. for i in `seq 0 1023`; do echo "dl_vlan=$i,actions=drop"; done > add-flows.txt AT_CHECK([ovs-ofctl add-flows br0 add-flows.txt]) # Dump them and compare against what we expect by hand, then with diff-flows. for i in `seq 0 1023`; do echo " dl_vlan=$i actions=drop"; done | sort > expout AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sed '/NXST_FLOW/d' | sort], [0], [expout]) AT_CHECK([ovs-ofctl diff-flows br0 add-flows.txt]) # Remove even-numbered flows, compare again. for i in `seq 0 1023 2`; do echo "dl_vlan=$i"; done > del-flows.txt AT_CHECK([ovs-ofctl del-flows br0 - < del-flows.txt]) for i in `seq 0 1023 2`; do echo "+dl_vlan=$i actions=drop"; done | sort > expout AT_CHECK([ovs-ofctl diff-flows br0 add-flows.txt | sort], [0], [expout]) for i in `seq 0 1023 2`; do echo "-dl_vlan=$i actions=drop"; done | sort > expout AT_CHECK([ovs-ofctl diff-flows add-flows.txt br0 | sort], [0], [expout]) OVS_VSWITCHD_STOP AT_CLEANUP dnl ofpacts that differ bytewise don't necessarily differ when dnl converted to another representation, such as OpenFlow 1.0 dnl or to a string. "resubmit(,1)" is an example of an action dnl of this type: "ofpact_resubmit"s can differ in their "compat" dnl values even though this doesn't affect the string format. dnl dnl This test checks that "ovs-ofctl diff-flows" doesn't report dnl false ofpacts differences. AT_SETUP([ovs-ofctl diff-flows - suppress false differences]) OVS_VSWITCHD_START AT_DATA([flows.txt], [actions=resubmit(,1) ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-ofctl diff-flows br0 flows.txt]) AT_CHECK([ovs-ofctl add-flow br0 idle_timeout=60,dl_vlan=9,actions=output:1]) AT_CHECK([ovs-ofctl diff-flows br0 flows.txt], [2], [dnl -dl_vlan=9 idle_timeout=60 actions=output:1 ]) AT_CHECK([ovs-ofctl add-flow br0 hard_timeout=120,cookie=1234,dl_vlan=9,actions=output:1]) AT_CHECK([ovs-ofctl diff-flows flows.txt br0], [2], [dnl +dl_vlan=9 cookie=0x4d2 hard_timeout=120 actions=output:1 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ovs-ofctl -F and -O interaction]) AT_CHECK([ovs-ofctl -F oxm -O openflow10], [1], [], [ovs-ofctl: None of the enabled OpenFlow versions (OpenFlow10) supports any of the enabled flow formats (OXM). (Use -O to enable additional OpenFlow versions or -F to enable additional flow formats.) ]) AT_CHECK([ovs-ofctl -F oxm -O openflow11], [1], [], [ovs-ofctl: None of the enabled OpenFlow versions (OpenFlow11) supports any of the enabled flow formats (OXM). (Use -O to enable additional OpenFlow versions or -F to enable additional flow formats.) ]) AT_CHECK([ovs-ofctl -F oxm -O openflow10,openflow11], [1], [], [ovs-ofctl: None of the enabled OpenFlow versions (OpenFlow10, OpenFlow11) supports any of the enabled flow formats (OXM). (Use -O to enable additional OpenFlow versions or -F to enable additional flow formats.) ]) AT_CHECK([ovs-ofctl -F oxm -O openflow10,openflow12], [1], [], [ovs-ofctl: missing command name; use --help for help ]) AT_CHECK([ovs-ofctl -F oxm -O openflow12], [1], [], [ovs-ofctl: missing command name; use --help for help ]) AT_CHECK([ovs-ofctl -F oxm -O openflow13], [1], [], [ovs-ofctl: missing command name; use --help for help ]) AT_CLEANUP AT_SETUP([ovs-ofctl ofp-parse]) # Test the echo request/reply messages (0 payload). AT_CHECK([printf '\1\2\0\10\0\0\0\0\1\3\0\10\0\0\0\0' > binary_ofp_msg]) AT_CHECK([ovs-ofctl ofp-parse binary_ofp_msg], [0], [dnl OFPT_ECHO_REQUEST (xid=0x0): 0 bytes of payload OFPT_ECHO_REPLY (xid=0x0): 0 bytes of payload ]) # Test the hello (xid:1 3-byte payload). AT_CHECK([printf '\1\0\0\13\0\0\0\1\101\102\103' > binary_ofp_msg]) AT_CHECK([ovs-ofctl ofp-parse - < binary_ofp_msg], [0], [dnl OFPT_HELLO (xid=0x1): version bitmap: 0x01 unknown data in hello: 00000000 01 00 00 0b 00 00 00 01-41 42 43 |........ABC | ]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/ovs-vsctl.at000066400000000000000000001226771226605124000211210ustar00rootroot00000000000000dnl OVS_VSCTL_SETUP dnl dnl Creates an empty database in the current directory and then starts dnl an ovsdb-server on it for ovs-vsctl to connect to. m4_define([OVS_VSCTL_SETUP], [OVSDB_INIT([db]) AT_CHECK([ovsdb-server --detach --no-chdir --pidfile="`pwd`"/pid --remote=punix:socket --unixctl="`pwd`"/unixctl db >/dev/null 2>&1], [0], [ignore], [ignore])]) dnl OVS_VSCTL_CLEANUP dnl dnl Kills off the database server. m4_define([OVS_VSCTL_CLEANUP], [OVSDB_SERVER_SHUTDOWN]) dnl RUN_OVS_VSCTL(COMMAND, ...) dnl dnl Executes each ovs-vsctl COMMAND. m4_define([RUN_OVS_VSCTL], [m4_foreach([command], [$@], [ovs-vsctl --no-wait -vreconnect:emer --db=unix:socket command ])]) m4_define([RUN_OVS_VSCTL_ONELINE], [m4_foreach([command], [$@], [ovs-vsctl --no-wait -vreconnect:emer --db=unix:socket --oneline -- command ])]) dnl RUN_OVS_VSCTL_TOGETHER(COMMAND, ...) dnl dnl Executes each ovs-vsctl COMMAND in a single run of ovs-vsctl. m4_define([RUN_OVS_VSCTL_TOGETHER], [ovs-vsctl --no-wait -vreconnect:emer --db=unix:socket --oneline dnl m4_foreach([command], [$@], [ -- command])]) dnl CHECK_BRIDGES([BRIDGE, PARENT, VLAN], ...) dnl dnl Verifies that "ovs-vsctl list-br" prints the specified list of bridges, dnl which must be in alphabetical order. Also checks that each BRIDGE has the dnl specified PARENT and is on the given VLAN. m4_define([_CHECK_BRIDGE], [AT_CHECK([RUN_OVS_VSCTL([br-to-parent $1])], [0], [$2 ], [], [OVS_VSCTL_CLEANUP]) # Check br-to-vlan, without --oneline. AT_CHECK([RUN_OVS_VSCTL([br-to-vlan $1])], [0], [$3 ], [], [OVS_VSCTL_CLEANUP]) # Check br-to-vlan, with --oneline. # (This particular test is interesting with --oneline because it returns # an integer instead of a string and that can cause type mismatches inside # python if not done carefully.) AT_CHECK([RUN_OVS_VSCTL_ONELINE([br-to-vlan $1])], [0], [$3 ], [], [OVS_VSCTL_CLEANUP]) # Check multiple queries in a single run. AT_CHECK([RUN_OVS_VSCTL_TOGETHER([br-to-parent $1], [br-to-vlan $1])], [0], [$2 $3 ], [], [OVS_VSCTL_CLEANUP])]) m4_define([CHECK_BRIDGES], [dnl Check that the bridges appear on list-br, without --oneline. AT_CHECK( [RUN_OVS_VSCTL([list-br])], [0], [m4_foreach([brinfo], [$@], [m4_car(brinfo) ])], [], [OVS_VSCTL_CLEANUP]) dnl Check that the bridges appear on list-br, with --oneline. AT_CHECK( [RUN_OVS_VSCTL_ONELINE([list-br])], [0], [m4_join([\n], m4_foreach([brinfo], [$@], [m4_car(brinfo),])) ], [], [OVS_VSCTL_CLEANUP]) dnl Check that each bridge exists according to br-exists and that dnl a bridge that should not exist does not. m4_foreach([brinfo], [$@], [AT_CHECK([RUN_OVS_VSCTL([br-exists m4_car(brinfo)])], [0], [], [], [OVS_VSCTL_CLEANUP])]) AT_CHECK([RUN_OVS_VSCTL([br-exists nonexistent])], [2], [], [], [OVS_VSCTL_CLEANUP]) dnl Check that each bridge has the expected parent and VLAN. m4_map([_CHECK_BRIDGE], [$@])]) dnl CHECK_PORTS(BRIDGE, PORT[, PORT...]) dnl dnl Verifies that "ovs-vsctl list-ports BRIDGE" prints the specified dnl list of ports, which must be in alphabetical order. Also checks dnl that "ovs-vsctl port-to-br" reports that each port is dnl in BRIDGE. m4_define([CHECK_PORTS], [dnl Check ports without --oneline. AT_CHECK( [RUN_OVS_VSCTL([list-ports $1])], [0], [m4_foreach([port], m4_cdr($@), [port ])], [], [OVS_VSCTL_CLEANUP]) dnl Check ports with --oneline. AT_CHECK( [RUN_OVS_VSCTL_ONELINE([list-ports $1])], [0], [m4_join([\n], m4_shift($@)) ], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([port-to-br $1])], [1], [], [ovs-vsctl: no port named $1 ], [OVS_VSCTL_CLEANUP]) m4_foreach( [port], m4_cdr($@), [AT_CHECK([RUN_OVS_VSCTL([[port-to-br] port])], [0], [$1 ], [], [OVS_VSCTL_CLEANUP])])]) dnl CHECK_IFACES(BRIDGE, IFACE[, IFACE...]) dnl dnl Verifies that "ovs-vsctl list-ifaces BRIDGE" prints the specified dnl list of ifaces, which must be in alphabetical order. Also checks dnl that "ovs-vsctl iface-to-br" reports that each interface is dnl in BRIDGE. m4_define([CHECK_IFACES], [AT_CHECK( [RUN_OVS_VSCTL([list-ifaces $1])], [0], [m4_foreach([iface], m4_cdr($@), [iface ])], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([iface-to-br $1])], [1], [], [ovs-vsctl: no interface named $1 ], [OVS_VSCTL_CLEANUP]) m4_foreach( [iface], m4_cdr($@), [AT_CHECK([RUN_OVS_VSCTL([[iface-to-br] iface])], [0], [$1 ], [], [OVS_VSCTL_CLEANUP])])]) dnl ---------------------------------------------------------------------- AT_BANNER([ovs-vsctl unit tests]) AT_SETUP([ovs-vsctl connection retry]) OVS_RUNDIR=$PWD; export OVS_RUNDIR dnl Without --retry, there should be no retry for active connections. AT_CHECK([ovs-vsctl --db=unix:foo --timeout=10 -vreconnect:emer -- init], [1], [], [stderr]) AT_CHECK([[sed 's/([^()]*)/(...reason...)/' stderr]], [0], [ovs-vsctl: unix:foo: database connection failed (...reason...) ]) dnl With --retry, we should retry for active connections. AT_CHECK( [ovs-vsctl --db=unix:foo --timeout=1 --retry -vreconnect:emer -vPATTERN:console:'%c|%p|%m' -- init echo $? > status], [0], [], [stderr]) AT_CHECK([grep -c 'terminating with signal' stderr], [0], [1 ]) AT_CHECK([kill -l `cat status`], [0], [ALRM ]) dnl Without --retry, we should retry for passive connections. AT_CHECK( [ovs-vsctl --db=punix:foo --timeout=1 -vreconnect:emer -vPATTERN:console:'%c|%p|%m' -- init echo $? > status], [0], [], [stderr]) AT_CHECK([grep -c 'terminating with signal' stderr], [0], [1 ]) AT_CHECK([kill -l `cat status`], [0], [ALRM ]) AT_CLEANUP dnl ---------------------------------------------------------------------- AT_BANNER([ovs-vsctl unit tests -- real bridges]) AT_SETUP([add-br a]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL([add-br a])], [0], [], [], [OVS_VSCTL_CLEANUP]) CHECK_BRIDGES([a, a, 0]) CHECK_PORTS([a]) CHECK_IFACES([a]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([add-br a, add-br a]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL([add-br a])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([add-br a])], [1], [], [ovs-vsctl: cannot create a bridge named a because a bridge named a already exists ], [OVS_VSCTL_CLEANUP]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([add-br a, add-br b]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL([add-br a], [add-br b])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([--may-exist add-br a b 9])], [1], [], [ovs-vsctl: "--may-exist add-br a b 9" but a is not a VLAN bridge ], [OVS_VSCTL_CLEANUP]) CHECK_BRIDGES([a, a, 0], [b, b, 0]) CHECK_PORTS([a]) CHECK_IFACES([a]) CHECK_PORTS([b]) CHECK_IFACES([b]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([add-br a, add-br b, del-br a]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL([add-br a], [add-br b], [del-br a])], [0], [], [], [OVS_VSCTL_CLEANUP]) CHECK_BRIDGES([b, b, 0]) CHECK_PORTS([b]) CHECK_IFACES([b]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([add-br a, del-br a, add-br a]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL_TOGETHER( [add-br a], [del-br a], [add-br a], [set Interface a other_config:key=value], [get Interface a other_config:key])], [0], [ value ], [], [OVS_VSCTL_CLEANUP]) CHECK_BRIDGES([a, a, 0]) CHECK_PORTS([a]) CHECK_IFACES([a]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([add-br a, add-port a a1, add-port a a2]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL( [add-br a], [--if-exists del-br b], [add-port a a1], [add-port a a2])], [0], [], [], [OVS_VSCTL_CLEANUP]) CHECK_BRIDGES([a, a, 0]) CHECK_PORTS([a], [a1], [a2]) CHECK_IFACES([a], [a1], [a2]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([add-br a, add-port a a1, add-port a a1]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL( [add-br a], [add-port a a1])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([add-port a a1])], [1], [], [ovs-vsctl: cannot create a port named a1 because a port named a1 already exists on bridge a ], [OVS_VSCTL_CLEANUP]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([add-br a b, add-port a a1, add-port b b1, del-br a]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL_TOGETHER( [add-br a], [add-br b], [add-port a a1], [add-port b b1], [--if-exists del-port b b2], [del-br a])], [0], [ ], [], [OVS_VSCTL_CLEANUP]) CHECK_BRIDGES([b, b, 0]) CHECK_PORTS([b], [b1]) CHECK_IFACES([b], [b1]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([add-br a, add-bond a bond0 a1 a2 a3]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL( [add-br a], [add-bond a bond0 a1 a2 a3])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([--may-exist add-bond a bond0 a3 a1 a2])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([--may-exist add-bond a bond0 a2 a1])], [1], [], [ovs-vsctl: "--may-exist add-bond a bond0 a2 a1" but bond0 actually has interface(s) a1, a2, a3 ], [OVS_VSCTL_CLEANUP]) CHECK_BRIDGES([a, a, 0]) CHECK_PORTS([a], [bond0]) CHECK_IFACES([a], [a1], [a2], [a3]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([add-br a b, add-port a a1, add-port b b1, del-port a a1]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL( [add-br a], [add-br b], [add-port a a1 tag=9], [get port a1 tag], [--may-exist add-port b b1], [del-port a a1])], [0], [9 ], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([--may-exist add-port b b1])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([del-port a])], [1], [], [ovs-vsctl: cannot delete port a because it is the local port for bridge a (deleting this port requires deleting the entire bridge) ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([--if-exists del-port a])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([--may-exist add-port a b1])], [1], [], [ovs-vsctl: "--may-exist add-port a b1" but b1 is actually attached to bridge b ], [OVS_VSCTL_CLEANUP]) CHECK_BRIDGES([a, a, 0], [b, b, 0]) CHECK_PORTS([a]) CHECK_IFACES([a]) CHECK_PORTS([b], [b1]) CHECK_IFACES([b], [b1]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([add-br a, add-bond a bond0 a1 a2 a3, del-port bond0]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL_TOGETHER( [add-br a], [add-bond a bond0 a1 a2 a3 tag=9], [get Port bond0 tag], [del-port bond0])], [0], [ 9 ], [], [OVS_VSCTL_CLEANUP]) CHECK_BRIDGES([a, a, 0]) CHECK_PORTS([a]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([external IDs]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL_ONELINE( [add-br a], [add-port a a1], [add-bond a bond0 a2 a3], [br-set-external-id a key0 value0], [set port a1 external-ids:key1=value1], [set interface a2 external-ids:key2=value2], [set interface a2 external-ids:key3=value3], [set interface a3 external-ids:key4=value4], [br-get-external-id a], [br-get-external-id a key0], [br-get-external-id a key1], [br-set-external-id a key0 othervalue], [br-get-external-id a], [br-set-external-id a key0], [br-get-external-id a], [get port a1 external-ids], [get interface a2 external-ids], [get interface a3 external-ids])], [0], [ key0=value0 value0 key0=othervalue {"key1"="value1"} {"key2"="value2", "key3"="value3"} {"key4"="value4"} ], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL_TOGETHER( [br-get-external-id a], [get port a1 external-ids], [get interface a2 external-ids], [get interface a3 external-ids])], [0], [ {"key1"="value1"} {"key2"="value2", "key3"="value3"} {"key4"="value4"} ], [], [OVS_VSCTL_CLEANUP]) CHECK_BRIDGES([a, a, 0]) CHECK_PORTS([a], [a1], [bond0]) CHECK_IFACES([a], [a1], [a2], [a3]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([controllers]) AT_KEYWORDS([controller ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL_TOGETHER( [add-br br0], [get-controller br0], [set-controller br0 tcp:4.5.6.7], [get-controller br0], [del-controller br0], [get-controller br0], [set-controller br0 tcp:8.9.10.11 tcp:5.4.3.2], [get-controller br0])], [0], [ tcp:4.5.6.7 tcp:5.4.3.2\ntcp:8.9.10.11 ], [], [OVS_VSCTL_CLEANUP]) OVS_VSCTL_CLEANUP AT_CLEANUP dnl ---------------------------------------------------------------------- dnl OVS_VSCTL_SETUP_SIMPLE_FAKE_CONF([VLAN]) m4_define([OVS_VSCTL_SETUP_SIMPLE_FAKE_CONF], [AT_CHECK( [RUN_OVS_VSCTL( [add-br xenbr0], [--may-exist add-br xenbr0], [add-port xenbr0 eth0], [--may-exist add-port xenbr0 eth0], [add-br xapi1 xenbr0 $1], [--may-exist add-br xapi1 xenbr0 $1], [add-port xapi1 eth0.$1])], [0], [], [], [OVS_VSCTL_CLEANUP])]) dnl OVS_VSCTL_FAKE_BRIDGE_TESTS([VLAN]) m4_define([OVS_VSCTL_FAKE_BRIDGE_TESTS], [ AT_BANNER([ovs-vsctl unit tests -- fake bridges (VLAN $1)]) AT_SETUP([simple fake bridge (VLAN $1)]) AT_KEYWORDS([ovs-vsctl fake-bridge]) OVS_VSCTL_SETUP OVS_VSCTL_SETUP_SIMPLE_FAKE_CONF([$1]) AT_CHECK([RUN_OVS_VSCTL([--may-exist add-br xapi1])], [1], [], [ovs-vsctl: "--may-exist add-br xapi1" but xapi1 is a VLAN bridge for VLAN $1 ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([--may-exist add-br xapi1 xxx $1])], [1], [], [ovs-vsctl: "--may-exist add-br xapi1 xxx $1" but xapi1 has the wrong parent xenbr0 ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([--may-exist add-br xapi1 xenbr0 10])], [1], [], [ovs-vsctl: "--may-exist add-br xapi1 xenbr0 10" but xapi1 is a VLAN bridge for the wrong VLAN $1 ], [OVS_VSCTL_CLEANUP]) CHECK_BRIDGES([xapi1, xenbr0, $1], [xenbr0, xenbr0, 0]) CHECK_PORTS([xenbr0], [eth0]) CHECK_IFACES([xenbr0], [eth0]) CHECK_PORTS([xapi1], [eth0.$1]) CHECK_IFACES([xapi1], [eth0.$1]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([list bridges -- real and fake (VLAN $1)]) AT_KEYWORDS([ovs-vsctl fake-bridge]) OVS_VSCTL_SETUP OVS_VSCTL_SETUP_SIMPLE_FAKE_CONF([$1]) AT_CHECK([RUN_OVS_VSCTL_ONELINE([-- list-br])], [0], [xapi1\nxenbr0 ], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL_ONELINE([-- --real list-br])], [0], [xenbr0 ], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL_ONELINE([-- --fake list-br])], [0], [xapi1 ], [], [OVS_VSCTL_CLEANUP]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([simple fake bridge + del-br fake bridge (VLAN $1)]) AT_KEYWORDS([ovs-vsctl fake-bridge]) OVS_VSCTL_SETUP OVS_VSCTL_SETUP_SIMPLE_FAKE_CONF([$1]) AT_CHECK([RUN_OVS_VSCTL([del-br xapi1])], [0], [], [], [OVS_VSCTL_CLEANUP]) CHECK_BRIDGES([xenbr0, xenbr0, 0]) CHECK_PORTS([xenbr0], [eth0]) CHECK_IFACES([xenbr0], [eth0]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([simple fake bridge + del-br real bridge (VLAN $1)]) AT_KEYWORDS([ovs-vsctl fake-bridge]) OVS_VSCTL_SETUP OVS_VSCTL_SETUP_SIMPLE_FAKE_CONF([$1]) AT_CHECK([RUN_OVS_VSCTL([del-br xenbr0])], [0], [], [], [OVS_VSCTL_CLEANUP]) CHECK_BRIDGES OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([simple fake bridge + external IDs (VLAN $1)]) AT_KEYWORDS([ovs-vsctl fake-bridge]) OVS_VSCTL_SETUP OVS_VSCTL_SETUP_SIMPLE_FAKE_CONF([$1]) AT_CHECK([RUN_OVS_VSCTL_TOGETHER( [br-set-external-id xenbr0 key0 value0], [br-set-external-id xapi1 key1 value1], [br-get-external-id xenbr0], [br-get-external-id xenbr0 key0], [br-get-external-id xapi1], [br-get-external-id xapi1 key1])], [0], [ key0=value0 value0 key1=value1 value1 ], [], [OVS_VSCTL_CLEANUP]) CHECK_BRIDGES([xapi1, xenbr0, $1], [xenbr0, xenbr0, 0]) CHECK_PORTS([xenbr0], [eth0]) CHECK_IFACES([xenbr0], [eth0]) CHECK_PORTS([xapi1], [eth0.$1]) CHECK_IFACES([xapi1], [eth0.$1]) OVS_VSCTL_CLEANUP AT_CLEANUP ]) # OVS_VSCTL_FAKE_BRIDGE_TESTS OVS_VSCTL_FAKE_BRIDGE_TESTS([9]) OVS_VSCTL_FAKE_BRIDGE_TESTS([0]) dnl OVS_VSCTL_SETUP_BOND_FAKE_CONF([VLAN]) m4_define([OVS_VSCTL_SETUP_BOND_FAKE_CONF], [AT_CHECK( [RUN_OVS_VSCTL( [add-br xapi1], [add-bond xapi1 bond0 eth0 eth1], [add-br xapi2 xapi1 $1], [add-port xapi2 bond0.$1])], [0], [], [], [OVS_VSCTL_CLEANUP])]) AT_SETUP([fake bridge on bond]) AT_KEYWORDS([ovs-vsctl fake-bridge]) OVS_VSCTL_SETUP OVS_VSCTL_SETUP_BOND_FAKE_CONF([11]) CHECK_BRIDGES([xapi1, xapi1, 0], [xapi2, xapi1, 11]) CHECK_PORTS([xapi1], [bond0]) CHECK_IFACES([xapi1], [eth0], [eth1]) CHECK_PORTS([xapi2], [bond0.11]) CHECK_IFACES([xapi2], [bond0.11]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([fake bridge on bond + del-br fake bridge]) AT_KEYWORDS([ovs-vsctl fake-bridge]) OVS_VSCTL_SETUP OVS_VSCTL_SETUP_BOND_FAKE_CONF([11]) AT_CHECK([RUN_OVS_VSCTL_ONELINE([del-br xapi2])], [0], [ ], [], [OVS_VSCTL_CLEANUP]) CHECK_BRIDGES([xapi1, xapi1, 0]) CHECK_PORTS([xapi1], [bond0]) CHECK_IFACES([xapi1], [eth0], [eth1]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([fake bridge on bond + del-br real bridge]) AT_KEYWORDS([ovs-vsctl fake-bridge]) OVS_VSCTL_SETUP OVS_VSCTL_SETUP_BOND_FAKE_CONF([11]) AT_CHECK([RUN_OVS_VSCTL([del-br xapi1])]) CHECK_BRIDGES OVS_VSCTL_CLEANUP AT_CLEANUP dnl ---------------------------------------------------------------------- AT_BANNER([ovs-vsctl unit tests -- manager commands]) AT_SETUP([managers]) AT_KEYWORDS([manager ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL_TOGETHER( [del-manager], [get-manager], [set-manager tcp:4.5.6.7], [get-manager], [set-manager tcp:8.9.10.11 tcp:5.4.3.2], [get-manager], [del-manager], [get-manager])], [0], [ tcp:4.5.6.7 tcp:5.4.3.2\ntcp:8.9.10.11 ], [], [OVS_VSCTL_CLEANUP]) OVS_VSCTL_CLEANUP AT_CLEANUP dnl ---------------------------------------------------------------------- AT_BANNER([ovs-vsctl unit tests -- database commands]) AT_SETUP([database commands -- positive checks]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK( [RUN_OVS_VSCTL_TOGETHER([--id=@br0 create b name=br0], [set o . bridges=@br0])], [0], [stdout], [], [OVS_VSCTL_CLEANUP]) cp stdout out1 AT_CHECK([RUN_OVS_VSCTL([list b], [get b br0 _uuid])], [0], [stdout], [], [OVS_VSCTL_CLEANUP]) cp stdout out2 AT_CHECK([${PERL} $srcdir/uuidfilt.pl out1 out2], [0], [[<0> _uuid : <0> controller : [] datapath_id : [] datapath_type : "" external_ids : {} fail_mode : [] flood_vlans : [] flow_tables : {} ipfix : [] mirrors : [] name : "br0" netflow : [] other_config : {} ports : [] protocols : [] sflow : [] status : {} stp_enable : false <0> ]], [ignore], [test ! -e pid || kill `cat pid`]) AT_CHECK( [RUN_OVS_VSCTL([--columns=fail_mode,name,datapath_type list b])], [0], [[fail_mode : [] name : "br0" datapath_type : "" ]], [ignore], [test ! -e pid || kill `cat pid`]) AT_CHECK( [RUN_OVS_VSCTL([--columns=fail_mode,name,datapath_type find b])], [0], [[fail_mode : [] name : "br0" datapath_type : "" ]], [ignore], [test ! -e pid || kill `cat pid`]) AT_CHECK([ RUN_OVS_VSCTL_TOGETHER([--id=@br1 create b name=br1 datapath_type="foo"], [--id=@br2 create b name=br2 external-ids:bar=quux], [add o . bridges @br1 @br2])], [0], [stdout], [], [OVS_VSCTL_CLEANUP]) AT_CHECK( [RUN_OVS_VSCTL([--columns=name find b datapath_type!=foo])], [0], [stdout], [ignore], [test ! -e pid || kill `cat pid`]) AT_CHECK([sed -n '/./p' stdout | sort], [0], [[name : "br0" name : "br2" ]]) AT_CHECK( [RUN_OVS_VSCTL( [set bridge br0 \ 'other_config:datapath_id="0123456789ab"' \ 'other_config:hwaddr="00:11:22:33:44:55"' \ 'external-ids={"uuids"="9c45f225-a7cf-439d-976d-83db6271fda1"}' -- \ add bridge br0 external_ids '"roles"="local; remote; cloud"'])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL_ONELINE([get bridge br0 other_config external-ids])], [0], [{datapath_id="0123456789ab", hwaddr="00:11:22:33:44:55"}\n{roles="local; remote; cloud", uuids="9c45f225-a7cf-439d-976d-83db6271fda1"} ], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([get bridge br0 other_config:hwaddr -- --if-exists get bridge br0 other-config:nonexistent])], [0], ["00:11:22:33:44:55" ], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([remove br br0 other_config hwaddr 'datapath_id=""' -- get br br0 other_config])], [0], [{datapath_id="0123456789ab"} ], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([remove br br0 other_config 'datapath_id="0123456789ab"' -- get br br0 other_config])], [0], [{} ], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([clear br br0 external-ids -- get br br0 external_ids])], [0], [{} ], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL_TOGETHER([destroy b br0], [destroy b br1], [destroy b br2], [clear o . bridges])], [0], [stdout], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([list b])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([--if-exists get b x datapath_id])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([--if-exists list b x])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([--if-exists set controller x connection_mode=standalone])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK( [RUN_OVS_VSCTL([--if-exists remove netflow x targets '"1.2.3.4:567"'])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK( [RUN_OVS_VSCTL([--if-exists clear netflow x targets])], [0], [], [], [OVS_VSCTL_CLEANUP]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([database commands -- negative checks]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([ovs-vsctl --may-exist], [1], [ignore], [ovs-vsctl: missing command name (use --help for help) ], [OVS_VSCTL_CLEANUP]) AT_CHECK([ovs-vsctl --may-exist --], [1], [ignore], [ovs-vsctl: missing command name (use --help for help) ], [OVS_VSCTL_CLEANUP]) AT_CHECK([ovs-vsctl -- --may-exist], [1], [ignore], [ovs-vsctl: missing command name (use --help for help) ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([add-br br0])], [0], [ignore], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([add-br br1])], [0], [ignore], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([set-controller br1 tcp:127.0.0.1])], [0], [ignore], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([ RUN_OVS_VSCTL_TOGETHER([--id=@n create netflow targets='"1.2.3.4:567"'], [set bridge br0 netflow=@n])], [0], [stdout], [], [OVS_VSCTL_CLEANUP]) cp stdout netflow-uuid AT_CHECK([RUN_OVS_VSCTL([list netflow `cat netflow-uuid`])], [0], [stdout], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl netflow-uuid stdout], [0], [[<0> _uuid : <0> active_timeout : 0 add_id_to_interface : false engine_id : [] engine_type : [] external_ids : {} targets : ["1.2.3.4:567"] ]], [ignore], [test ! -e pid || kill `cat pid`]) AT_CHECK([RUN_OVS_VSCTL([list interx x])], [1], [], [ovs-vsctl: unknown table "interx" ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([list b x])], [1], [], [ovs-vsctl: no row "x" in table Bridge ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([get b x datapath_id])], [1], [], [ovs-vsctl: no row "x" in table Bridge ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([get b br0 d])], [1], [], [ovs-vsctl: Bridge contains more than one column whose name matches "d" ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([get b br0 x])], [1], [], [ovs-vsctl: Bridge does not contain a column whose name matches "x" ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([get b br0 :y=z])], [1], [], [ovs-vsctl: :y=z: missing column name ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([get b br0 datapath_id:y=z])], [1], [], [ovs-vsctl: datapath_id:y=z: trailing garbage "=z" in argument ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([set b br0 'datapath_id:y>=z'])], [1], [], [ovs-vsctl: datapath_id:y>=z: argument does not end in "=" followed by a value. ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([set controller x connection_mode=standalone])], [1], [], [ovs-vsctl: no row "x" in table Controller ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([wait-until b br0 datapath_id:y,z])], [1], [], [ovs-vsctl: datapath_id:y,z: argument does not end in "=", "!=", "<", ">", "<=", ">=", "{=}", "{!=}", "{<}", "{>}", "{<=}", or "{>=}" followed by a value. ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([get b br0 datapath_id::])], [1], [], [ovs-vsctl: datapath_id::: trailing garbage ":" in argument ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([get b br0 datapath_id:x])], [1], [], [ovs-vsctl: cannot specify key to get for non-map column datapath_id ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([get b br0 external_ids:x])], [1], [], [ovs-vsctl: no key "x" in Bridge record "br0" column external_ids ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([set b br0 flood_vlans=-1])], [1], [], [ovs-vsctl: constraint violation: -1 is not in the valid range 0 to 4095 (inclusive) ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([set b br0 flood_vlans=4096])], [1], [], [ovs-vsctl: constraint violation: 4096 is not in the valid range 0 to 4095 (inclusive) ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([set c br1 'connection-mode=xyz'])], [1], [], [[ovs-vsctl: constraint violation: xyz is not one of the allowed values ([in-band, out-of-band]) ]], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([set c br1 connection-mode:x=y])], [1], [], [ovs-vsctl: cannot specify key to set for non-map column connection_mode ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([add b br1 datapath_id x y])], [1], [], [ovs-vsctl: "add" operation would put 2 values in column datapath_id of table Bridge but the maximum number is 1 ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([remove netflow `cat netflow-uuid` targets '"1.2.3.4:567"'])], [1], [], [ovs-vsctl: "remove" operation would put 0 values in column targets of table NetFlow but the minimum number is 1 ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([remove netflow x targets '"1.2.3.4:567"'])], [1], [], [ovs-vsctl: no row "x" in table NetFlow ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([clear netflow x targets])], [1], [], [ovs-vsctl: no row "x" in table NetFlow ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([clear netflow `cat netflow-uuid` targets])], [1], [], [ovs-vsctl: "clear" operation cannot be applied to column targets of table NetFlow, which is not allowed to be empty ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([destroy b br2])], [1], [], [ovs-vsctl: no row "br2" in table Bridge ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([add in br1 name x])], [1], [], [ovs-vsctl: cannot modify read-only column name in table Interface ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([set port br1 name br2])], [1], [], [ovs-vsctl: cannot modify read-only column name in table Port ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([remove b br1 name br1])], [1], [], [ovs-vsctl: cannot modify read-only column name in table Bridge ], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([clear b br1 name])], [1], [], [ovs-vsctl: cannot modify read-only column name in table Bridge ], [OVS_VSCTL_CLEANUP]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([database commands -- conditions]) AT_KEYWORDS([ovs-vsctl]) ON_EXIT([kill `cat pid`]) OVS_VSCTL_SETUP AT_CHECK( [RUN_OVS_VSCTL_TOGETHER( [add-br br0], [add-br br1], [set bridge br1 flood_vlans=0 other-config:x='""'], [add-br br2], [set bridge br2 flood_vlans=1 other-config:x=y], [add-br br3], [set bridge br3 flood_vlans=0,1 other-config:x=z], [add-br br4], [set bridge br4 flood_vlans=2], [add-br br5], [set bridge br5 flood_vlans=0,2], [add-br br6], [set bridge br6 flood_vlans=1,2], [add-br br7], [set bridge br7 flood_vlans=0,1,2])], [0], [ ]) m4_define([VSCTL_CHECK_FIND], [AT_CHECK([echo `ovs-vsctl --bare --no-wait -vreconnect:emer --db=unix:socket -- --columns=name find bridge '$1' | sort`], [0], [$2 ])]) # Arithmetic relational operators without keys. VSCTL_CHECK_FIND([flood_vlans=0], [br1]) VSCTL_CHECK_FIND([flood_vlans=1], [br2]) VSCTL_CHECK_FIND([flood_vlans=0,2], [br5]) VSCTL_CHECK_FIND([flood_vlans=0,1,2], [br7]) VSCTL_CHECK_FIND([flood_vlans=3], []) VSCTL_CHECK_FIND([flood_vlans!=0], [br0 br2 br3 br4 br5 br6 br7]) VSCTL_CHECK_FIND([flood_vlans!=1], [br0 br1 br3 br4 br5 br6 br7]) VSCTL_CHECK_FIND([flood_vlans!=0,2], [br0 br1 br2 br3 br4 br6 br7]) VSCTL_CHECK_FIND([flood_vlans!=0,1,2], [br0 br1 br2 br3 br4 br5 br6]) VSCTL_CHECK_FIND([flood_vlans!=3], [br0 br1 br2 br3 br4 br5 br6 br7]) VSCTL_CHECK_FIND([flood_vlans<2], [br0 br1 br2]) VSCTL_CHECK_FIND([flood_vlans<0,2], [br0 br1 br2 br3 br4]) VSCTL_CHECK_FIND([flood_vlans>1], [br3 br4 br5 br6 br7]) VSCTL_CHECK_FIND([flood_vlans>0,1], [br5 br6 br7]) VSCTL_CHECK_FIND([flood_vlans<=2], [br0 br1 br2 br4]) VSCTL_CHECK_FIND([flood_vlans<=0,2], [br0 br1 br2 br3 br4 br5]) VSCTL_CHECK_FIND([flood_vlans>=1], [br2 br3 br4 br5 br6 br7]) VSCTL_CHECK_FIND([flood_vlans>=0,1], [br3 br5 br6 br7]) # Set relational operators without keys. VSCTL_CHECK_FIND([flood_vlans{=}0], [br1]) VSCTL_CHECK_FIND([flood_vlans{=}1], [br2]) VSCTL_CHECK_FIND([flood_vlans{=}0,2], [br5]) VSCTL_CHECK_FIND([flood_vlans{=}0,1,2], [br7]) VSCTL_CHECK_FIND([flood_vlans{=}3], []) VSCTL_CHECK_FIND([flood_vlans{!=}0], [br0 br2 br3 br4 br5 br6 br7]) VSCTL_CHECK_FIND([flood_vlans{!=}1], [br0 br1 br3 br4 br5 br6 br7]) VSCTL_CHECK_FIND([flood_vlans{!=}0,2], [br0 br1 br2 br3 br4 br6 br7]) VSCTL_CHECK_FIND([flood_vlans{!=}0,1,2], [br0 br1 br2 br3 br4 br5 br6]) VSCTL_CHECK_FIND([flood_vlans{!=}3], [br0 br1 br2 br3 br4 br5 br6 br7]) VSCTL_CHECK_FIND([flood_vlans{<}[[]]], []) VSCTL_CHECK_FIND([flood_vlans{<=}[[]]], [br0]) VSCTL_CHECK_FIND([flood_vlans{<}0], [br0]) VSCTL_CHECK_FIND([flood_vlans{<=}0], [br0 br1]) VSCTL_CHECK_FIND([flood_vlans{<}1,2], [br0 br2 br4]) VSCTL_CHECK_FIND([flood_vlans{<=}1,2], [br0 br2 br4 br6]) VSCTL_CHECK_FIND([flood_vlans{>}[[]]], [br1 br2 br3 br4 br5 br6 br7]) VSCTL_CHECK_FIND([flood_vlans{>=}[[]]], [br0 br1 br2 br3 br4 br5 br6 br7]) VSCTL_CHECK_FIND([flood_vlans{>}0], [br3 br5 br7]) VSCTL_CHECK_FIND([flood_vlans{>=}0], [br1 br3 br5 br7]) VSCTL_CHECK_FIND([flood_vlans{>}0,2], [br7]) VSCTL_CHECK_FIND([flood_vlans{>=}1,2], [br6 br7]) VSCTL_CHECK_FIND([flood_vlans{>=}0,2], [br5 br7]) # Arithmetic relational operators with keys. VSCTL_CHECK_FIND([other-config:x=""], [br1]) VSCTL_CHECK_FIND([other-config:x=y], [br2]) VSCTL_CHECK_FIND([other-config:x=z], [br3]) VSCTL_CHECK_FIND([other-config:x!=""], [br2 br3]) VSCTL_CHECK_FIND([other-config:x!=y], [br1 br3]) VSCTL_CHECK_FIND([other-config:x!=z], [br1 br2]) VSCTL_CHECK_FIND([other-config:x>y], [br3]) VSCTL_CHECK_FIND([other-config:x>=y], [br2 br3]) VSCTL_CHECK_FIND([other-config:x=}[[]]], [br0 br1 br2 br3 br4 br5 br6 br7]) VSCTL_CHECK_FIND([other-config:x{>=}x], []) VSCTL_CHECK_FIND([other-config:x{>=}""], [br1]) VSCTL_CHECK_FIND([other-config:x{>=}y], [br2]) VSCTL_CHECK_FIND([other-config:x{>=}z], [br3]) VSCTL_CHECK_FIND([other-config:x{>}[[]]], [br1 br2 br3]) VSCTL_CHECK_FIND([other-config:x{>}x], []) VSCTL_CHECK_FIND([other-config:x{>}""], []) VSCTL_CHECK_FIND([other-config:x{>}y], []) VSCTL_CHECK_FIND([other-config:x{>}z], []) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([database commands -- wait-until immediately true]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL( [add-br br0], [add-bond br0 bond0 eth0 eth1], [set port bond0 bond_updelay=500 other-config:abc=def])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([[wait-until Open_vSwitch . manager_options=[]]])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([[wait-until Open_vSwitch . bridges!=[]]])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([[wait-until Port bond0 other-config:abc=def]])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL([[wait-until port bond0 'bond_updelay>50' 'other-config:abc>d' 'other-config:abc stdout1 & (RUN_OVS_VSCTL([[wait-until bridge br1 -- get bridge br1 other-config:abc]])) > stdout2 & (RUN_OVS_VSCTL([[wait-until b br1 other-config={abc=def} -- get bridge br1 other-config]])) > stdout3 & (RUN_OVS_VSCTL([[wait-until port bond0 'bond_updelay>50' -- get port bond0 bond-updelay]])) > stdout4 & # Give the ovs-vsctls a chance to read the database sleep 1 AT_CHECK([RUN_OVS_VSCTL([add-br br10 -- set bridge br10 other-config:abc=quux]) RUN_OVS_VSCTL([add-br br1 -- set bridge br1 other-config:abc=def -- add-bond br1 bond0 eth0 eth1 -- set port bond0 bond_updelay=500])], [0], [], [], [OVS_VSCTL_CLEANUP]) # Wait for the ovs-vsctls to finish. wait # Check output AT_CHECK([cat stdout1], [0], [quux ], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([cat stdout2], [0], [def ], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([cat stdout3], [0], [{abc=def} ], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([cat stdout4], [0], [500 ], [], [OVS_VSCTL_CLEANUP]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([--id option on create, get commands]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL([add-br br0], [add-port br0 eth0], [add-port br0 eth1])]) AT_CHECK( [RUN_OVS_VSCTL_TOGETHER( [set bridge br0 mirrors=@m], [--id=@eth0 get port eth0], [--id=@eth1 get port eth1], [--id=@m create mirror name=mymirror select-dst-port=@eth0 select-src-port=@eth0 output-port=@eth1])], [0], [stdout], [], [OVS_VSCTL_CLEANUP]) AT_CHECK( [${PERL} $srcdir/uuidfilt.pl stdout], [0], [dnl <0> ], [], [OVS_VSCTL_CLEANUP]) AT_CHECK( [RUN_OVS_VSCTL( [list port eth0 eth1], [list mirror], [list bridge br0])], [0], [stdout], [], [OVS_VSCTL_CLEANUP]) AT_CHECK( [sed -n -e '/uuid/p' -e '/name/p' -e '/mirrors/p' -e '/select/p' -e '/output/p' < stdout | ${PERL} $srcdir/uuidfilt.pl], [0], [dnl [_uuid : <0> name : "eth0" _uuid : <1> name : "eth1" _uuid : <2> name : mymirror output_port : <1> output_vlan : [] select_all : false select_dst_port : [<0>] select_src_port : [<0>] select_vlan : [] _uuid : <3> mirrors : [<2>] name : "br0" ]], [], [OVS_VSCTL_CLEANUP]) OVS_VSCTL_CLEANUP AT_CLEANUP AT_SETUP([unreferenced record warnings]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK( [ovs-vsctl -vPATTERN:console:'%c|%p|%m' --no-wait -vreconnect:emer --db=unix:socket \ -- create Bridge name=br0 | ${PERL} $srcdir/uuidfilt.pl], [0], [<0> ], [vsctl|WARN|applying "create" command to table Bridge without --id option will have no effect ], [OVS_VSCTL_CLEANUP]) AT_CHECK( [ovs-vsctl -vPATTERN:console:'%c|%p|%m' --no-wait -vreconnect:emer --db=unix:socket \ -- --id=@br0 create Bridge name=br0 | ${PERL} $srcdir/uuidfilt.pl], [0], [<0> ], [vsctl|WARN|row id "@br0" was created but no reference to it was inserted, so it will not actually appear in the database ], [OVS_VSCTL_CLEANUP]) AT_CHECK( [ovs-vsctl -vPATTERN:console:'%c|%p|%m' --no-wait -vreconnect:emer --db=unix:socket \ -- --id=@eth0_iface create Interface name=eth0 \ -- --id=@eth0 create Port name=eth0 interfaces=@eth0_iface \ -- --id=@m0 create Mirror name=m0 output_port=@eth0 \ -- --id=@br0 create Bridge name=br0 mirrors=@m0 \ -- set Open_vSwitch . bridges=@br0 | ${PERL} $srcdir/uuidfilt.pl], [0], [<0> <1> <2> <3> ], [vsctl|WARN|row id "@eth0" was created but only a weak reference to it was inserted, so it will not actually appear in the database ], [OVS_VSCTL_CLEANUP]) OVS_VSCTL_CLEANUP AT_CLEANUP dnl This test really shows a bug -- "create" followed by "list" in dnl the same execution shows the wrong UUID on the "list" command. dnl The bug is documented in ovs-vsctl.8. AT_SETUP([created row UUID is wrong in same execution]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL([--id=@br0 create Bridge name=br0 -- add Open_vSwitch . bridges @br0 -- list b])], [0], [stdout], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl stdout], [0], [[<0> _uuid : <1> controller : [] datapath_id : [] datapath_type : "" external_ids : {} fail_mode : [] flood_vlans : [] flow_tables : {} ipfix : [] mirrors : [] name : "br0" netflow : [] other_config : {} ports : [] protocols : [] sflow : [] status : {} stp_enable : false ]], [ignore], [test ! -e pid || kill `cat pid`]) OVS_VSCTL_CLEANUP AT_CLEANUP dnl This test will create a linux-htb QoS record that dnl points to a few queues and use it on a1 and a2 port. dnl It also destroys all records from Qos and Queue table. AT_SETUP([--all option on destroy command]) AT_KEYWORDS([ovs-vsctl]) OVS_VSCTL_SETUP AT_CHECK([RUN_OVS_VSCTL( [add-br a], [add-port a a1], [add-port a a2])], [0], [], [], [OVS_VSCTL_CLEANUP]) CHECK_BRIDGES([a, a, 0]) CHECK_PORTS([a], [a1], [a2]) CHECK_IFACES([a], [a1], [a2]) AT_CHECK([RUN_OVS_VSCTL_TOGETHER( [set Port a1 qos=@newqos], [set Port a2 qos=@newqos], [--id=@newqos create QoS type=linux-htb other-config:max-rate=1000000000 queues=0=@q0,1=@q1], [--id=@q0 create Queue other-config:min-rate=100000000 other-config:max-rate=100000000], [--id=@q1 create Queue other-config:min-rate=500000000])], [0], [ignore], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL( [--columns=other_config,type list Qos])], [0], [other_config : {max-rate="1000000000"} type : linux-htb ], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL( [--columns=other_config list Queue | sort | xargs echo])], [0], [other_config : {max-rate=100000000, min-rate=100000000} other_config : {min-rate=500000000} ], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL( [clear Port a1 qos], [clear Port a2 qos])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL( [--columns=qos list Port a1 a2])], [0], [[qos : [] qos : [] ]], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL( [--all destroy Qos])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL( [-- list Qos])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL( [--all destroy Queue])], [0], [], [], [OVS_VSCTL_CLEANUP]) AT_CHECK([RUN_OVS_VSCTL( [-- list Queue])], [0], [], [], [OVS_VSCTL_CLEANUP]) OVS_VSCTL_CLEANUP AT_CLEANUP dnl ---------------------------------------------------------------------- AT_BANNER([ovs-vsctl add-port -- reserved port names]) AT_SETUP([add-port -- reserved names 1]) OVS_VSWITCHD_START # Test creating all reserved port names m4_foreach( [reserved_name], [[ovs-netdev], [ovs-dummy], [gre_system], [gre64_system], [lisp_system], [vxlan_system]], [ # Try creating the port AT_CHECK([ovs-vsctl add-port br0 reserved_name], [0], [], []) # Detect the warning log message AT_CHECK([sed -n "s/^.*\(|bridge|WARN|.*\)$/\1/p" ovs-vswitchd.log], [0], [dnl |bridge|WARN|could not create interface reserved_name, name is reserved ]) # Delete the warning log message AT_CHECK([sed "/|bridge|WARN|/d" ovs-vswitchd.log > ovs-vswitchd.log], [0], [], []) # Delete the port AT_CHECK([ovs-vsctl del-port br0 reserved_name], [0], [], [])]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([add-port -- reserved names 2]) # Creates all type of tunnel ports OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \ options:remote_ip=1.1.1.1 ofport_request=1\ -- add-port br0 p2 -- set Interface p2 type=gre64 \ options:local_ip=2.2.2.2 options:remote_ip=1.1.1.1 \ ofport_request=2 \ -- add-port br0 p3 -- set Interface p3 type=lisp \ options:remote_ip=2.2.2.2 ofport_request=3 \ -- add-port br0 p4 -- set Interface p4 type=vxlan \ options:remote_ip=2.2.2.2 ofport_request=4]) # Test creating all reserved tunnel port names m4_foreach( [reserved_name], [[gre_system], [gre64_system], [lisp_system], [vxlan_system]], [ # Try creating the port AT_CHECK([ovs-vsctl add-port br0 reserved_name], [0], [], []) # Detect the warning log message AT_CHECK([sed -n "s/^.*\(|bridge|WARN|.*\)$/\1/p" ovs-vswitchd.log], [0], [dnl |bridge|WARN|could not create interface reserved_name, name is reserved ]) # Delete the warning log message AT_CHECK([sed "/|bridge|WARN|/d" ovs-vswitchd.log > ovs-vswitchd.log], [0], [], []) # Delete the port AT_CHECK([ovs-vsctl del-port br0 reserved_name], [0], [], [])]) OVS_VSWITCHD_STOP AT_CLEANUP openvswitch-2.0.1+git20140120/tests/ovs-vswitchd.at000066400000000000000000000022571226605124000216100ustar00rootroot00000000000000AT_BANNER([ovs-vswitchd]) dnl The OVS initscripts never make an empty database (one without even an dnl Open_vSwitch record) visible to ovs-vswitchd, but hand-rolled scripts dnl sometimes do. At one point, "ovs-vswitchd --detach" would never detach dnl and use 100% CPU if this happened, so this test checks for regression. AT_SETUP([ovs-vswitchd detaches correctly with empty db]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR OVS_DBDIR=`pwd`; export OVS_DBDIR OVS_SYSCONFDIR=`pwd`; export OVS_SYSCONFDIR ON_EXIT([kill `cat ovsdb-server.pid ovs-vswitchd.pid`]) dnl Create database. touch .conf.db.~lock~ AT_CHECK([ovsdb-tool create conf.db $abs_top_srcdir/vswitchd/vswitch.ovsschema]) dnl Start ovsdb-server. *Don't* initialize database. AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --log-file --remote=punix:$OVS_RUNDIR/db.sock], [0], [ignore], [ignore]) AT_CAPTURE_FILE([ovsdb-server.log]) dnl Start ovs-vswitchd. AT_CHECK([ovs-vswitchd --detach --no-chdir --pidfile --enable-dummy --disable-system --log-file], [0], [], [stderr]) AT_CAPTURE_FILE([ovs-vswitchd.log]) dnl ovs-vswitchd detached OK or we wouldn't have made it this far. Success. AT_CLEANUP openvswitch-2.0.1+git20140120/tests/ovs-xapi-sync.at000066400000000000000000000066471226605124000216770ustar00rootroot00000000000000AT_BANNER([ovs-xapi-sync]) AT_SETUP([ovs-xapi-sync]) AT_SKIP_IF([test $HAVE_PYTHON = no]) # Mock up the XenAPI. cp "$top_srcdir/tests/MockXenAPI.py" XenAPI.py PYTHONPATH=`pwd`:$PYTHONPATH export PYTHONPATH OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_DBDIR=`pwd`; export OVS_DBDIR OVS_PKGDATADIR=`pwd`; export OVS_PKGDATADIR cp "$top_srcdir/vswitchd/vswitch.ovsschema" . cp "$top_srcdir/xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync" \ ovs-xapi-sync ON_EXIT([kill `cat pid ovs-xapi-sync.pid`]) mkdir var var/run touch var/run/xapi_init_complete.cookie ovs_vsctl () { ovs-vsctl --no-wait -vreconnect:emer --db=unix:socket "$@" } # Start ovsdb-server. OVS_VSCTL_SETUP # Start ovs-xapi-sync. AT_CHECK([$PYTHON ./ovs-xapi-sync "--pidfile=`pwd`/ovs-xapi-sync.pid" \ "--root-prefix=`pwd`" unix:socket >log 2>&1 &]) AT_CAPTURE_FILE([log]) # Add bridges and check ovs-xapi-sync's work. AT_CHECK([ovs_vsctl -- add-br xenbr0 -- add-br xenbr1]) OVS_WAIT_UNTIL([test "X`ovs_vsctl get bridge xenbr0 fail-mode`" != "X[[]]"]) AT_CHECK([ovs_vsctl \ -- get bridge xenbr0 fail-mode other-config external-ids \ -- get bridge xenbr1 fail-mode other-config external-ids], [0], [[secure {} {bridge-id="custom bridge ID"} secure {disable-in-band="true"} {} ]]) # Add vif and check daemon's work. AT_CHECK([ovs_vsctl \ -- add-port xenbr0 vif1.0 \ -- set Interface vif1.0 'external-ids={attached-mac="00:11:22:33:44:55", xs-network-uuid="9b66c68b-a74e-4d34-89a5-20a8ab352d1e", xs-vif-uuid="6ab1b260-398e-49ba-827b-c7696108964c", xs-vm-uuid="fcb8a3f6-dc04-41d2-8b8a-55afd2b755b8"'}]) OVS_WAIT_UNTIL([ovs_vsctl get interface vif1.0 external-ids:iface-id >/dev/null 2>&1]) AT_CHECK([ovs_vsctl get interface vif1.0 external-ids], [0], [{attached-mac="00:11:22:33:44:55", iface-id="custom iface ID", iface-status=active, vm-id="custom vm ID", xs-network-uuid="9b66c68b-a74e-4d34-89a5-20a8ab352d1e", xs-vif-uuid="6ab1b260-398e-49ba-827b-c7696108964c", xs-vm-uuid="fcb8a3f6-dc04-41d2-8b8a-55afd2b755b8"} ]) # Add corresponding tap and check daemon's work. AT_CHECK([ovs_vsctl add-port xenbr0 tap1.0]) OVS_WAIT_UNTIL([ovs_vsctl get interface tap1.0 external-ids:iface-id >/dev/null 2>&1]) AT_CHECK([ovs_vsctl \ -- get interface vif1.0 external-ids \ -- get interface tap1.0 external-ids], [0], [{attached-mac="00:11:22:33:44:55", iface-id="custom iface ID", iface-status=inactive, vm-id="custom vm ID", xs-network-uuid="9b66c68b-a74e-4d34-89a5-20a8ab352d1e", xs-vif-uuid="6ab1b260-398e-49ba-827b-c7696108964c", xs-vm-uuid="fcb8a3f6-dc04-41d2-8b8a-55afd2b755b8"} {attached-mac="00:11:22:33:44:55", iface-id="custom iface ID", iface-status=active, vm-id="custom vm ID", xs-network-uuid="9b66c68b-a74e-4d34-89a5-20a8ab352d1e", xs-vif-uuid="6ab1b260-398e-49ba-827b-c7696108964c", xs-vm-uuid="fcb8a3f6-dc04-41d2-8b8a-55afd2b755b8"} ]) # Remove corresponding tap and check daemon's work. AT_CHECK([ovs_vsctl del-port tap1.0]) OVS_WAIT_UNTIL([test `ovs_vsctl get interface vif1.0 external-ids:iface-status` = active]) AT_CHECK([ovs_vsctl get interface vif1.0 external-ids], [0], [{attached-mac="00:11:22:33:44:55", iface-id="custom iface ID", iface-status=active, vm-id="custom vm ID", xs-network-uuid="9b66c68b-a74e-4d34-89a5-20a8ab352d1e", xs-vif-uuid="6ab1b260-398e-49ba-827b-c7696108964c", xs-vm-uuid="fcb8a3f6-dc04-41d2-8b8a-55afd2b755b8"} ]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP openvswitch-2.0.1+git20140120/tests/ovsdb-column.at000066400000000000000000000006771226605124000215640ustar00rootroot00000000000000AT_BANNER([OVSDB -- columns]) OVSDB_CHECK_POSITIVE_CPY([ordinary column], [[parse-column mycol '{"type": "integer"}']], [[{"type":"integer"}]]) OVSDB_CHECK_POSITIVE_CPY([immutable column], [[parse-column mycol '{"type": "real", "mutable": false}']], [[{"mutable":false,"type":"real"}]]) OVSDB_CHECK_POSITIVE_CPY([ephemeral column], [[parse-column mycol '{"type": "uuid", "ephemeral": true}']], [[{"ephemeral":true,"type":"uuid"}]]) openvswitch-2.0.1+git20140120/tests/ovsdb-condition.at000066400000000000000000000526261226605124000222560ustar00rootroot00000000000000AT_BANNER([OVSDB -- conditions]) OVSDB_CHECK_POSITIVE([null condition], [[parse-conditions \ '{"columns": {"name": {"type": "string"}}}' \ '[]']], [[[]]]) OVSDB_CHECK_POSITIVE([conditions on scalars], [[parse-conditions \ '{"columns": {"i": {"type": "integer"}, "r": {"type": "real"}, "b": {"type": "boolean"}, "s": {"type": "string"}, "u": {"type": "uuid"}}}' \ '[["i", "==", 0]]' \ '[["i", "!=", 1]]' \ '[["i", "<", 2]]' \ '[["i", "<=", 3]]' \ '[["i", ">", 4]]' \ '[["i", ">=", 5]]' \ '[["i", "includes", 6]]' \ '[["i", "excludes", 7]]' \ '[["r", "==", 0.5]]' \ '[["r", "!=", 1.5]]' \ '[["r", "<", 2.5]]' \ '[["r", "<=", 3.5]]' \ '[["r", ">", 4.5]]' \ '[["r", ">=", 5.5]]' \ '[["r", "includes", 6.5]]' \ '[["r", "excludes", 7.5]]' \ '[["b", "==", true]]' \ '[["b", "!=", false]]' \ '[["b", "includes", false]]' \ '[["b", "excludes", true]]' \ '[["s", "==", "a"]]' \ '[["s", "!=", "b"]]' \ '[["s", "includes", "c"]]' \ '[["s", "excludes", "d"]]' \ '[["u", "==", ["uuid", "b10d28f7-af18-4a67-9e78-2a6394516c59"]]]' \ '[["u", "!=", ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"]]]' \ '[["u", "includes", ["uuid", "ad0fa355-8b84-4a36-a4b5-b2c1bfd91758"]]]' \ '[["u", "excludes", ["uuid", "62315898-64e0-40b9-b26f-ff74225303e6"]]]']], [[[["i","==",0]] [["i","!=",1]] [["i","<",2]] [["i","<=",3]] [["i",">",4]] [["i",">=",5]] [["i","includes",6]] [["i","excludes",7]] [["r","==",0.5]] [["r","!=",1.5]] [["r","<",2.5]] [["r","<=",3.5]] [["r",">",4.5]] [["r",">=",5.5]] [["r","includes",6.5]] [["r","excludes",7.5]] [["b","==",true]] [["b","!=",false]] [["b","includes",false]] [["b","excludes",true]] [["s","==","a"]] [["s","!=","b"]] [["s","includes","c"]] [["s","excludes","d"]] [["u","==",["uuid","b10d28f7-af18-4a67-9e78-2a6394516c59"]]] [["u","!=",["uuid","9179ca6d-6d65-400a-b455-3ad92783a099"]]] [["u","includes",["uuid","ad0fa355-8b84-4a36-a4b5-b2c1bfd91758"]]] [["u","excludes",["uuid","62315898-64e0-40b9-b26f-ff74225303e6"]]]]], [condition]) AT_SETUP([disallowed conditions on scalars]) AT_KEYWORDS([ovsdb negative condition]) AT_CHECK([[test-ovsdb parse-conditions \ '{"columns": {"i": {"type": "integer"}, "r": {"type": "real"}, "b": {"type": "boolean"}, "s": {"type": "string"}, "u": {"type": "uuid"}}}' \ '[["b", ">", true]]' \ '[["b", ">=", false]]' \ '[["b", "<", false]]' \ '[["b", "<=", false]]' \ '[["s", ">", "a"]]' \ '[["s", ">=", "b"]]' \ '[["s", "<", "c"]]' \ '[["s", "<=", "d"]]' \ '[["u", ">", ["uuid", "b10d28f7-af18-4a67-9e78-2a6394516c59"]]]' \ '[["u", ">=", ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"]]]' \ '[["u", "<", ["uuid", "ad0fa355-8b84-4a36-a4b5-b2c1bfd91758"]]]' \ '[["u", "<=", ["uuid", "62315898-64e0-40b9-b26f-ff74225303e6"]]]']], [1], [], [[test-ovsdb: syntax "["b",">",true]": syntax error: Type mismatch: ">" operator may not be applied to column b of type boolean. test-ovsdb: syntax "["b",">=",false]": syntax error: Type mismatch: ">=" operator may not be applied to column b of type boolean. test-ovsdb: syntax "["b","<",false]": syntax error: Type mismatch: "<" operator may not be applied to column b of type boolean. test-ovsdb: syntax "["b","<=",false]": syntax error: Type mismatch: "<=" operator may not be applied to column b of type boolean. test-ovsdb: syntax "["s",">","a"]": syntax error: Type mismatch: ">" operator may not be applied to column s of type string. test-ovsdb: syntax "["s",">=","b"]": syntax error: Type mismatch: ">=" operator may not be applied to column s of type string. test-ovsdb: syntax "["s","<","c"]": syntax error: Type mismatch: "<" operator may not be applied to column s of type string. test-ovsdb: syntax "["s","<=","d"]": syntax error: Type mismatch: "<=" operator may not be applied to column s of type string. test-ovsdb: syntax "["u",">",["uuid","b10d28f7-af18-4a67-9e78-2a6394516c59"]]": syntax error: Type mismatch: ">" operator may not be applied to column u of type uuid. test-ovsdb: syntax "["u",">=",["uuid","9179ca6d-6d65-400a-b455-3ad92783a099"]]": syntax error: Type mismatch: ">=" operator may not be applied to column u of type uuid. test-ovsdb: syntax "["u","<",["uuid","ad0fa355-8b84-4a36-a4b5-b2c1bfd91758"]]": syntax error: Type mismatch: "<" operator may not be applied to column u of type uuid. test-ovsdb: syntax "["u","<=",["uuid","62315898-64e0-40b9-b26f-ff74225303e6"]]": syntax error: Type mismatch: "<=" operator may not be applied to column u of type uuid. ]]) AT_CLEANUP OVSDB_CHECK_POSITIVE([conditions on sets], [[parse-conditions \ '{"columns": {"i": {"type": {"key": "integer", "min": 0, "max": "unlimited"}}, "r": {"type": {"key": "real", "min": 0, "max": "unlimited"}}, "b": {"type": {"key": "boolean", "min": 0, "max": "unlimited"}}, "s": {"type": {"key": "string", "min": 0, "max": "unlimited"}}, "u": {"type": {"key": "uuid", "min": 0, "max": "unlimited"}}}}' \ '[["i", "==", ["set", []]]]' \ '[["i", "!=", ["set", [1]]]]' \ '[["i", "includes", ["set", [1, 2]]]]' \ '[["i", "excludes", ["set", [1, 2, 3]]]]' \ '[["r", "==", ["set", []]]]' \ '[["r", "!=", ["set", [1.5]]]]' \ '[["r", "includes", ["set", [1.5, 2.5]]]]' \ '[["r", "excludes", ["set", [1.5, 2.5, 3.5]]]]' \ '[["b", "==", ["set", [true]]]]' \ '[["b", "!=", ["set", [false]]]]' \ '[["b", "includes", ["set", [false]]]]' \ '[["b", "excludes", ["set", [true, false]]]]' \ '[["s", "==", ["set", ["a"]]]]' \ '[["s", "!=", ["set", ["a", "b"]]]]' \ '[["s", "includes", ["set", ["c"]]]]' \ '[["s", "excludes", ["set", ["c", "d"]]]]' \ '[["u", "==", ["set", [["uuid", "b10d28f7-af18-4a67-9e78-2a6394516c59"]]]]]' \ '[["u", "==", ["set", [["uuid", "b10d28f7-af18-4a67-9e78-2a6394516c59"], ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"]]]]]' \ '[["u", "includes", ["set", [["uuid", "b10d28f7-af18-4a67-9e78-2a6394516c59"], ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"], ["uuid", "ad0fa355-8b84-4a36-a4b5-b2c1bfd91758"]]]]]' \ '[["u", "excludes", ["set", [["uuid", "b10d28f7-af18-4a67-9e78-2a6394516c59"], ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"], ["uuid", "ad0fa355-8b84-4a36-a4b5-b2c1bfd91758"], ["uuid", "62315898-64e0-40b9-b26f-ff74225303e6"]]]]]' \ ]], [[[["i","==",["set",[]]]] [["i","!=",1]] [["i","includes",["set",[1,2]]]] [["i","excludes",["set",[1,2,3]]]] [["r","==",["set",[]]]] [["r","!=",1.5]] [["r","includes",["set",[1.5,2.5]]]] [["r","excludes",["set",[1.5,2.5,3.5]]]] [["b","==",true]] [["b","!=",false]] [["b","includes",false]] [["b","excludes",["set",[false,true]]]] [["s","==","a"]] [["s","!=",["set",["a","b"]]]] [["s","includes","c"]] [["s","excludes",["set",["c","d"]]]] [["u","==",["uuid","b10d28f7-af18-4a67-9e78-2a6394516c59"]]] [["u","==",["set",[["uuid","9179ca6d-6d65-400a-b455-3ad92783a099"],["uuid","b10d28f7-af18-4a67-9e78-2a6394516c59"]]]]] [["u","includes",["set",[["uuid","9179ca6d-6d65-400a-b455-3ad92783a099"],["uuid","ad0fa355-8b84-4a36-a4b5-b2c1bfd91758"],["uuid","b10d28f7-af18-4a67-9e78-2a6394516c59"]]]]] [["u","excludes",["set",[["uuid","62315898-64e0-40b9-b26f-ff74225303e6"],["uuid","9179ca6d-6d65-400a-b455-3ad92783a099"],["uuid","ad0fa355-8b84-4a36-a4b5-b2c1bfd91758"],["uuid","b10d28f7-af18-4a67-9e78-2a6394516c59"]]]]]]], [condition]) OVSDB_CHECK_POSITIVE([condition sorting], [[parse-conditions \ '{"columns": {"i": {"type": "integer"}}}' \ '[["i", "excludes", 7], ["i", "!=", 8], ["i", "==", 1], ["i", "includes", 2], ["i", "<=", 3], ["i", "<", 4], ["i", ">", 6], ["i", ">=", 5], ["_uuid", "==", ["uuid", "d50e85c6-8ae7-4b16-b69e-4395928bd9be"]]]']], [[[["_uuid","==",["uuid","d50e85c6-8ae7-4b16-b69e-4395928bd9be"]],["i","==",1],["i","includes",2],["i","<=",3],["i","<",4],["i",">=",5],["i",">",6],["i","excludes",7],["i","!=",8]]]]) OVSDB_CHECK_POSITIVE([evaluating null condition], [[evaluate-conditions \ '{"columns": {"i": {"type": "integer"}}}' \ '[[]]' \ '[{"i": 0}, {"i": 1}, {"i": 2}']]], [condition 0: TTT]) OVSDB_CHECK_POSITIVE([evaluating conditions on integers], [[evaluate-conditions \ '{"columns": {"i": {"type": "integer"}}}' \ '[[["i", "<", 1]], [["i", "<=", 1]], [["i", "==", 1]], [["i", "!=", 1]], [["i", ">=", 1]], [["i", ">", 1]], [["i", "includes", 1]], [["i", "excludes", 1]], [["i", ">", 0], ["i", "<", 2]]]' \ '[{"i": 0}, {"i": 1}, {"i": 2}']]], [condition 0: T-- condition 1: TT- condition 2: -T- condition 3: T-T condition 4: -TT condition 5: --T condition 6: -T- condition 7: T-T condition 8: -T-], [condition]) OVSDB_CHECK_POSITIVE([evaluating conditions on reals], [[evaluate-conditions \ '{"columns": {"r": {"type": "real"}}}' \ '[[["r", "<", 5.0]], [["r", "<=", 5.0]], [["r", "==", 5.0]], [["r", "!=", 5.0]], [["r", ">=", 5.0]], [["r", ">", 5.0]], [["r", "includes", 5.0]], [["r", "excludes", 5.0]], [["r", "!=", 0], ["r", "!=", 5.1]]]' \ '[{"r": 0}, {"r": 5.0}, {"r": 5.1}']]], [condition 0: T-- condition 1: TT- condition 2: -T- condition 3: T-T condition 4: -TT condition 5: --T condition 6: -T- condition 7: T-T condition 8: -T-], [condition]) OVSDB_CHECK_POSITIVE([evaluating conditions on booleans], [[evaluate-conditions \ '{"columns": {"b": {"type": "boolean"}}}' \ '[[["b", "==", true]], [["b", "!=", true]], [["b", "includes", true]], [["b", "excludes", true]], [["b", "==", false]], [["b", "!=", false]], [["b", "includes", false]], [["b", "excludes", false]], [["b", "==", true], ["b", "==", false]]]' \ '[{"b": true}, {"b": false}']]], [condition 0: T- condition 1: -T condition 2: T- condition 3: -T condition 4: -T condition 5: T- condition 6: -T condition 7: T- condition 8: --], [condition]) OVSDB_CHECK_POSITIVE([evaluating conditions on strings], [[evaluate-conditions \ '{"columns": {"s": {"type": "string"}}}' \ '[[["s", "==", ""]], [["s", "!=", ""]], [["s", "includes", ""]], [["s", "excludes", ""]], [["s", "==", "foo"]], [["s", "!=", "foo"]], [["s", "includes", "foo"]], [["s", "excludes", "foo"]], [["s", "!=", "foo"], ["s", "!=", ""]]]' \ '[{"s": ""}, {"s": "foo"}, {"s": "xxx"}']]], [condition 0: T-- condition 1: -TT condition 2: T-- condition 3: -TT condition 4: -T- condition 5: T-T condition 6: -T- condition 7: T-T condition 8: --T], [condition]) OVSDB_CHECK_POSITIVE([evaluating conditions on UUIDs], [[evaluate-conditions \ '{"columns": {"u": {"type": "uuid"}}}' \ '[[["u", "==", ["uuid", "8a1dbdb8-416f-4ce9-affa-3332691714b6"]]], [["u", "!=", ["uuid", "8a1dbdb8-416f-4ce9-affa-3332691714b6"]]], [["u", "includes", ["uuid", "8a1dbdb8-416f-4ce9-affa-3332691714b6"]]], [["u", "excludes", ["uuid", "8a1dbdb8-416f-4ce9-affa-3332691714b6"]]], [["u", "==", ["uuid", "06151f9d-62d6-4f59-8504-e9765107faa9"]]], [["u", "!=", ["uuid", "06151f9d-62d6-4f59-8504-e9765107faa9"]]], [["u", "includes", ["uuid", "06151f9d-62d6-4f59-8504-e9765107faa9"]]], [["u", "excludes", ["uuid", "06151f9d-62d6-4f59-8504-e9765107faa9"]]], [["u", "!=", ["uuid", "06151f9d-62d6-4f59-8504-e9765107faa9"]], ["u", "!=", ["uuid", "cb160ed6-92a6-4503-a6aa-a09a09e01f0d"]]]]' \ '[{"u": ["uuid", "8a1dbdb8-416f-4ce9-affa-3332691714b6"]}, {"u": ["uuid", "06151f9d-62d6-4f59-8504-e9765107faa9"]}, {"u": ["uuid", "00000000-0000-0000-0000-000000000000"]}']]], [condition 0: T-- condition 1: -TT condition 2: T-- condition 3: -TT condition 4: -T- condition 5: T-T condition 6: -T- condition 7: T-T condition 8: T-T], [condition]) OVSDB_CHECK_POSITIVE([evaluating conditions on sets], [[evaluate-conditions \ '{"columns": {"i": {"type": {"key": "integer", "min": 0, "max": "unlimited"}}}}' \ '[[["i", "==", ["set", []]]], [["i", "==", ["set", [0]]]], [["i", "==", ["set", [1]]]], [["i", "==", ["set", [0, 1]]]], [["i", "==", ["set", [2]]]], [["i", "==", ["set", [2, 0]]]], [["i", "==", ["set", [2, 1]]]], [["i", "==", ["set", [2, 1, 0]]]], [["i", "!=", ["set", []]]], [["i", "!=", ["set", [0]]]], [["i", "!=", ["set", [1]]]], [["i", "!=", ["set", [0, 1]]]], [["i", "!=", ["set", [2]]]], [["i", "!=", ["set", [2, 0]]]], [["i", "!=", ["set", [2, 1]]]], [["i", "!=", ["set", [2, 1, 0]]]], [["i", "includes", ["set", []]]], [["i", "includes", ["set", [0]]]], [["i", "includes", ["set", [1]]]], [["i", "includes", ["set", [0, 1]]]], [["i", "includes", ["set", [2]]]], [["i", "includes", ["set", [2, 0]]]], [["i", "includes", ["set", [2, 1]]]], [["i", "includes", ["set", [2, 1, 0]]]], [["i", "excludes", ["set", []]]], [["i", "excludes", ["set", [0]]]], [["i", "excludes", ["set", [1]]]], [["i", "excludes", ["set", [0, 1]]]], [["i", "excludes", ["set", [2]]]], [["i", "excludes", ["set", [2, 0]]]], [["i", "excludes", ["set", [2, 1]]]], [["i", "excludes", ["set", [2, 1, 0]]]], [["i", "includes", ["set", [0]]], ["i", "includes", ["set", [1]]]]]' \ '[{"i": ["set", []]}, {"i": ["set", [0]]}, {"i": ["set", [1]]}, {"i": ["set", [0, 1]]}, {"i": ["set", [2]]}, {"i": ["set", [2, 0]]}, {"i": ["set", [2, 1]]}, {"i": ["set", [2, 1, 0]]}]']], [dnl condition 0: T---- --- condition 1: -T--- --- condition 2: --T-- --- condition 3: ---T- --- condition 4: ----T --- condition 5: ----- T-- condition 6: ----- -T- condition 7: ----- --T condition 8: -TTTT TTT condition 9: T-TTT TTT condition 10: TT-TT TTT condition 11: TTT-T TTT condition 12: TTTT- TTT condition 13: TTTTT -TT condition 14: TTTTT T-T condition 15: TTTTT TT- condition 16: TTTTT TTT condition 17: -T-T- T-T condition 18: --TT- -TT condition 19: ---T- --T condition 20: ----T TTT condition 21: ----- T-T condition 22: ----- -TT condition 23: ----- --T condition 24: TTTTT TTT condition 25: T-T-T -T- condition 26: TT--T T-- condition 27: T---T --- condition 28: TTTT- --- condition 29: T-T-- --- condition 30: TT--- --- condition 31: T---- --- condition 32: ---T- --T], [condition]) # This is the same as the "set" test except that it adds values, # all of which always match. OVSDB_CHECK_POSITIVE([evaluating conditions on maps (1)], [[evaluate-conditions \ '{"columns": {"i": {"type": {"key": "integer", "value": "boolean", "min": 0, "max": "unlimited"}}}}' \ '[[["i", "==", ["map", []]]], [["i", "==", ["map", [[0, true]]]]], [["i", "==", ["map", [[1, false]]]]], [["i", "==", ["map", [[0, true], [1, false]]]]], [["i", "==", ["map", [[2, true]]]]], [["i", "==", ["map", [[2, true], [0, true]]]]], [["i", "==", ["map", [[2, true], [1, false]]]]], [["i", "==", ["map", [[2, true], [1, false], [0, true]]]]], [["i", "!=", ["map", []]]], [["i", "!=", ["map", [[0, true]]]]], [["i", "!=", ["map", [[1, false]]]]], [["i", "!=", ["map", [[0, true], [1, false]]]]], [["i", "!=", ["map", [[2, true]]]]], [["i", "!=", ["map", [[2, true], [0, true]]]]], [["i", "!=", ["map", [[2, true], [1, false]]]]], [["i", "!=", ["map", [[2, true], [1, false], [0, true]]]]], [["i", "includes", ["map", []]]], [["i", "includes", ["map", [[0, true]]]]], [["i", "includes", ["map", [[1, false]]]]], [["i", "includes", ["map", [[0, true], [1, false]]]]], [["i", "includes", ["map", [[2, true]]]]], [["i", "includes", ["map", [[2, true], [0, true]]]]], [["i", "includes", ["map", [[2, true], [1, false]]]]], [["i", "includes", ["map", [[2, true], [1, false], [0, true]]]]], [["i", "excludes", ["map", []]]], [["i", "excludes", ["map", [[0, true]]]]], [["i", "excludes", ["map", [[1, false]]]]], [["i", "excludes", ["map", [[0, true], [1, false]]]]], [["i", "excludes", ["map", [[2, true]]]]], [["i", "excludes", ["map", [[2, true], [0, true]]]]], [["i", "excludes", ["map", [[2, true], [1, false]]]]], [["i", "excludes", ["map", [[2, true], [1, false], [0, true]]]]], [["i", "includes", ["map", [[0, true]]]], ["i", "includes", ["map", [[1, false]]]]]]' \ '[{"i": ["map", []]}, {"i": ["map", [[0, true]]]}, {"i": ["map", [[1, false]]]}, {"i": ["map", [[0, true], [1, false]]]}, {"i": ["map", [[2, true]]]}, {"i": ["map", [[2, true], [0, true]]]}, {"i": ["map", [[2, true], [1, false]]]}, {"i": ["map", [[2, true], [1, false], [0, true]]]}]']], [dnl condition 0: T---- --- condition 1: -T--- --- condition 2: --T-- --- condition 3: ---T- --- condition 4: ----T --- condition 5: ----- T-- condition 6: ----- -T- condition 7: ----- --T condition 8: -TTTT TTT condition 9: T-TTT TTT condition 10: TT-TT TTT condition 11: TTT-T TTT condition 12: TTTT- TTT condition 13: TTTTT -TT condition 14: TTTTT T-T condition 15: TTTTT TT- condition 16: TTTTT TTT condition 17: -T-T- T-T condition 18: --TT- -TT condition 19: ---T- --T condition 20: ----T TTT condition 21: ----- T-T condition 22: ----- -TT condition 23: ----- --T condition 24: TTTTT TTT condition 25: T-T-T -T- condition 26: TT--T T-- condition 27: T---T --- condition 28: TTTT- --- condition 29: T-T-- --- condition 30: TT--- --- condition 31: T---- --- condition 32: ---T- --T], [condition]) # This is the same as the "set" test except that it adds values, # and those values don't always match. OVSDB_CHECK_POSITIVE([evaluating conditions on maps (2)], [[evaluate-conditions \ '{"columns": {"i": {"type": {"key": "integer", "value": "boolean", "min": 0, "max": "unlimited"}}}}' \ '[[["i", "==", ["map", []]]], [["i", "==", ["map", [[0, true]]]]], [["i", "==", ["map", [[1, false]]]]], [["i", "==", ["map", [[0, true], [1, false]]]]], [["i", "==", ["map", [[2, true]]]]], [["i", "==", ["map", [[2, true], [0, true]]]]], [["i", "==", ["map", [[2, true], [1, false]]]]], [["i", "==", ["map", [[2, true], [1, false], [0, true]]]]], [["i", "!=", ["map", []]]], [["i", "!=", ["map", [[0, true]]]]], [["i", "!=", ["map", [[1, false]]]]], [["i", "!=", ["map", [[0, true], [1, false]]]]], [["i", "!=", ["map", [[2, true]]]]], [["i", "!=", ["map", [[2, true], [0, true]]]]], [["i", "!=", ["map", [[2, true], [1, false]]]]], [["i", "!=", ["map", [[2, true], [1, false], [0, true]]]]], [["i", "includes", ["map", []]]], [["i", "includes", ["map", [[0, true]]]]], [["i", "includes", ["map", [[1, false]]]]], [["i", "includes", ["map", [[0, true], [1, false]]]]], [["i", "includes", ["map", [[2, true]]]]], [["i", "includes", ["map", [[2, true], [0, true]]]]], [["i", "includes", ["map", [[2, true], [1, false]]]]], [["i", "includes", ["map", [[2, true], [1, false], [0, true]]]]], [["i", "excludes", ["map", []]]], [["i", "excludes", ["map", [[0, true]]]]], [["i", "excludes", ["map", [[1, false]]]]], [["i", "excludes", ["map", [[0, true], [1, false]]]]], [["i", "excludes", ["map", [[2, true]]]]], [["i", "excludes", ["map", [[2, true], [0, true]]]]], [["i", "excludes", ["map", [[2, true], [1, false]]]]], [["i", "excludes", ["map", [[2, true], [1, false], [0, true]]]]], [["i", "includes", ["map", [[0, true]]]], ["i", "includes", ["map", [[1, false]]]]]]' \ '[{"i": ["map", []]}, {"i": ["map", [[0, true]]]}, {"i": ["map", [[0, false]]]}, {"i": ["map", [[1, false]]]}, {"i": ["map", [[1, true]]]}, {"i": ["map", [[0, true], [1, false]]]}, {"i": ["map", [[0, true], [1, true]]]}, {"i": ["map", [[2, true]]]}, {"i": ["map", [[2, false]]]}, {"i": ["map", [[2, true], [0, true]]]}, {"i": ["map", [[2, false], [0, true]]]}, {"i": ["map", [[2, true], [1, false]]]}, {"i": ["map", [[2, true], [1, true]]]}, {"i": ["map", [[2, true], [1, false], [0, true]]]}, {"i": ["map", [[2, true], [1, false], [0, false]]]}]']], [dnl condition 0: T---- ----- ----- condition 1: -T--- ----- ----- condition 2: ---T- ----- ----- condition 3: ----- T---- ----- condition 4: ----- --T-- ----- condition 5: ----- ----T ----- condition 6: ----- ----- -T--- condition 7: ----- ----- ---T- condition 8: -TTTT TTTTT TTTTT condition 9: T-TTT TTTTT TTTTT condition 10: TTT-T TTTTT TTTTT condition 11: TTTTT -TTTT TTTTT condition 12: TTTTT TT-TT TTTTT condition 13: TTTTT TTTT- TTTTT condition 14: TTTTT TTTTT T-TTT condition 15: TTTTT TTTTT TTT-T condition 16: TTTTT TTTTT TTTTT condition 17: -T--- TT--T T--T- condition 18: ---T- T---- -T-TT condition 19: ----- T---- ---T- condition 20: ----- --T-T -TTTT condition 21: ----- ----T ---T- condition 22: ----- ----- -T-TT condition 23: ----- ----- ---T- condition 24: TTTTT TTTTT TTTTT condition 25: T-TTT --TT- -TT-T condition 26: TTT-T -TTTT T-T-- condition 27: T-T-T --TT- --T-- condition 28: TTTTT TT-T- T---- condition 29: T-TTT ---T- ----- condition 30: TTT-T -T-T- T---- condition 31: T-T-T ---T- ----- condition 32: ----- T---- ---T-], [condition]) openvswitch-2.0.1+git20140120/tests/ovsdb-data.at000066400000000000000000000673341226605124000212030ustar00rootroot00000000000000AT_BANNER([OVSDB -- default values]) OVSDB_CHECK_POSITIVE_CPY([default atoms], [default-atoms], [[integer: OK real: OK boolean: OK string: OK uuid: OK]]) OVSDB_CHECK_POSITIVE_CPY([default data], [default-data], [[key integer, value void, n_min 0: OK key integer, value integer, n_min 0: OK key integer, value real, n_min 0: OK key integer, value boolean, n_min 0: OK key integer, value string, n_min 0: OK key integer, value uuid, n_min 0: OK key real, value void, n_min 0: OK key real, value integer, n_min 0: OK key real, value real, n_min 0: OK key real, value boolean, n_min 0: OK key real, value string, n_min 0: OK key real, value uuid, n_min 0: OK key boolean, value void, n_min 0: OK key boolean, value integer, n_min 0: OK key boolean, value real, n_min 0: OK key boolean, value boolean, n_min 0: OK key boolean, value string, n_min 0: OK key boolean, value uuid, n_min 0: OK key string, value void, n_min 0: OK key string, value integer, n_min 0: OK key string, value real, n_min 0: OK key string, value boolean, n_min 0: OK key string, value string, n_min 0: OK key string, value uuid, n_min 0: OK key uuid, value void, n_min 0: OK key uuid, value integer, n_min 0: OK key uuid, value real, n_min 0: OK key uuid, value boolean, n_min 0: OK key uuid, value string, n_min 0: OK key uuid, value uuid, n_min 0: OK key integer, value void, n_min 1: OK key integer, value integer, n_min 1: OK key integer, value real, n_min 1: OK key integer, value boolean, n_min 1: OK key integer, value string, n_min 1: OK key integer, value uuid, n_min 1: OK key real, value void, n_min 1: OK key real, value integer, n_min 1: OK key real, value real, n_min 1: OK key real, value boolean, n_min 1: OK key real, value string, n_min 1: OK key real, value uuid, n_min 1: OK key boolean, value void, n_min 1: OK key boolean, value integer, n_min 1: OK key boolean, value real, n_min 1: OK key boolean, value boolean, n_min 1: OK key boolean, value string, n_min 1: OK key boolean, value uuid, n_min 1: OK key string, value void, n_min 1: OK key string, value integer, n_min 1: OK key string, value real, n_min 1: OK key string, value boolean, n_min 1: OK key string, value string, n_min 1: OK key string, value uuid, n_min 1: OK key uuid, value void, n_min 1: OK key uuid, value integer, n_min 1: OK key uuid, value real, n_min 1: OK key uuid, value boolean, n_min 1: OK key uuid, value string, n_min 1: OK key uuid, value uuid, n_min 1: OK]]) AT_BANNER([OVSDB -- atoms without constraints]) OVSDB_CHECK_POSITIVE_CPY([integer atom from JSON], [[parse-atoms '["integer"]' \ '[0]' \ '[-1]' \ '[1e3]' \ '[9223372036854775807]' \ '[-9223372036854775808]' ]], [0 -1 1000 9223372036854775807 -9223372036854775808]) OVSDB_CHECK_POSITIVE([integer atom from string], [[parse-atom-strings -- '["integer"]' \ '0' \ '-1' \ '+1000' \ '9223372036854775807' \ '-9223372036854775808' ]], [0 -1 1000 9223372036854775807 -9223372036854775808]) OVSDB_CHECK_POSITIVE_CPY([real atom from JSON], [[parse-atoms '["real"]' \ '[0]' \ '[0.0]' \ '[-0.0]' \ '[-1.25]' \ '[1e3]' \ '[1e37]' \ '[0.00390625]' ]], [0 0 0 -1.25 1000 1e+37 0.00390625]) OVSDB_CHECK_POSITIVE([real atom from string], [[parse-atom-strings -- '["real"]' \ '0' \ '0.0' \ '-0.0' \ '-1.25' \ '1e3' \ '1e37' \ '0.00390625' ]], [0 0 0 -1.25 1000 1e+37 0.00390625]) OVSDB_CHECK_POSITIVE_CPY([boolean atom from JSON], [[parse-atoms '["boolean"]' '[true]' '[false]' ]], [true false]) OVSDB_CHECK_POSITIVE([boolean atom from string], [[parse-atom-strings '["boolean"]' 'true' 'false' ]], [true false]) OVSDB_CHECK_POSITIVE_CPY([string atom from JSON], [[parse-atoms '["string"]' '[""]' '["true"]' '["\"\\\/\b\f\n\r\t"]']], ["" "true" "\"\\/\b\f\n\r\t"]) OVSDB_CHECK_POSITIVE([string atom from string], [[parse-atom-strings '["string"]' \ 'unquoted' \ '"quoted-string"' \ '"needs quotes"' \ '""' \ '"true"' \ '"\"\\\/\b\f\n\r\t"']], [unquoted quoted-string "needs quotes" "" "true" "\"\\/\b\f\n\r\t"]) OVSDB_CHECK_POSITIVE_CPY([uuid atom from JSON], [[parse-atoms '["uuid"]' '["uuid", "550e8400-e29b-41d4-a716-446655440000"]']], [[["uuid","550e8400-e29b-41d4-a716-446655440000"]]]) OVSDB_CHECK_POSITIVE([uuid atom from string], [[parse-atom-strings '["uuid"]' '550e8400-e29b-41d4-a716-446655440000']], [550e8400-e29b-41d4-a716-446655440000]) OVSDB_CHECK_POSITIVE_CPY([integer atom sorting], [[sort-atoms '["integer"]' '[55,0,-1,2,1]']], [[[-1,0,1,2,55]]]) OVSDB_CHECK_POSITIVE_CPY([real atom sorting], [[sort-atoms '["real"]' '[1.25,1.23,0.0,-0.0,-1e99]']], [[[-1e+99,0,0,1.23,1.25]]]) OVSDB_CHECK_POSITIVE_CPY([boolean atom sorting], [[sort-atoms '["boolean"]' '[true,false,true,false,false]']], [[[false,false,false,true,true]]]) OVSDB_CHECK_POSITIVE_CPY([string atom sorting], [[sort-atoms '["string"]' '["abd","abc","\b","xxx"]']], [[["\b","abc","abd","xxx"]]]) OVSDB_CHECK_POSITIVE_CPY([uuid atom sorting], [[sort-atoms '["uuid"]' '[ ["uuid", "00000000-0000-0000-0000-000000000001"], ["uuid", "00000000-1000-0000-0000-000000000000"], ["uuid", "00000000-0000-1000-0000-000000000000"], ["uuid", "00010000-0000-0000-0000-000000000000"], ["uuid", "00000000-0000-0000-0000-000000000100"], ["uuid", "00000000-0000-0000-0000-000100000000"], ["uuid", "00000000-0000-0010-0000-000000000000"], ["uuid", "00000100-0000-0000-0000-000000000000"], ["uuid", "00000000-0000-0001-0000-000000000000"], ["uuid", "00000000-0000-0000-0000-000001000000"], ["uuid", "01000000-0000-0000-0000-000000000000"], ["uuid", "00000000-0000-0000-0000-000000001000"], ["uuid", "00000000-0000-0000-0000-000010000000"], ["uuid", "00000000-0000-0000-0000-010000000000"], ["uuid", "00000000-0000-0100-0000-000000000000"], ["uuid", "10000000-0000-0000-0000-000000000000"], ["uuid", "00000000-0000-0000-0000-000000000010"], ["uuid", "00000000-0100-0000-0000-000000000000"], ["uuid", "00000000-0000-0000-0100-000000000000"], ["uuid", "00000000-0000-0000-0001-000000000000"], ["uuid", "00000010-0000-0000-0000-000000000000"], ["uuid", "00000000-0000-0000-0010-000000000000"], ["uuid", "00000000-0000-0000-0000-000000010000"], ["uuid", "00000000-0000-0000-1000-000000000000"], ["uuid", "00000000-0000-0000-0000-100000000000"], ["uuid", "00000000-0000-0000-0000-001000000000"], ["uuid", "00000000-0000-0000-0000-000000100000"], ["uuid", "00000000-0000-0000-0000-000000000000"], ["uuid", "00000000-0010-0000-0000-000000000000"], ["uuid", "00100000-0000-0000-0000-000000000000"], ["uuid", "00000000-0001-0000-0000-000000000000"], ["uuid", "00000001-0000-0000-0000-000000000000"], ["uuid", "00001000-0000-0000-0000-000000000000"]]']], [[[["uuid","00000000-0000-0000-0000-000000000000"],["uuid","00000000-0000-0000-0000-000000000001"],["uuid","00000000-0000-0000-0000-000000000010"],["uuid","00000000-0000-0000-0000-000000000100"],["uuid","00000000-0000-0000-0000-000000001000"],["uuid","00000000-0000-0000-0000-000000010000"],["uuid","00000000-0000-0000-0000-000000100000"],["uuid","00000000-0000-0000-0000-000001000000"],["uuid","00000000-0000-0000-0000-000010000000"],["uuid","00000000-0000-0000-0000-000100000000"],["uuid","00000000-0000-0000-0000-001000000000"],["uuid","00000000-0000-0000-0000-010000000000"],["uuid","00000000-0000-0000-0000-100000000000"],["uuid","00000000-0000-0000-0001-000000000000"],["uuid","00000000-0000-0000-0010-000000000000"],["uuid","00000000-0000-0000-0100-000000000000"],["uuid","00000000-0000-0000-1000-000000000000"],["uuid","00000000-0000-0001-0000-000000000000"],["uuid","00000000-0000-0010-0000-000000000000"],["uuid","00000000-0000-0100-0000-000000000000"],["uuid","00000000-0000-1000-0000-000000000000"],["uuid","00000000-0001-0000-0000-000000000000"],["uuid","00000000-0010-0000-0000-000000000000"],["uuid","00000000-0100-0000-0000-000000000000"],["uuid","00000000-1000-0000-0000-000000000000"],["uuid","00000001-0000-0000-0000-000000000000"],["uuid","00000010-0000-0000-0000-000000000000"],["uuid","00000100-0000-0000-0000-000000000000"],["uuid","00001000-0000-0000-0000-000000000000"],["uuid","00010000-0000-0000-0000-000000000000"],["uuid","00100000-0000-0000-0000-000000000000"],["uuid","01000000-0000-0000-0000-000000000000"],["uuid","10000000-0000-0000-0000-000000000000"]]]]) OVSDB_CHECK_POSITIVE_CPY([real not acceptable integer JSON atom], [[parse-atoms '["integer"]' '[0.5]' ]], [syntax "0.5": syntax error: expected integer]) dnl is not allowed anywhere in a UTF-8 string. dnl is a surrogate and not allowed in UTF-8. OVSDB_CHECK_POSITIVE([no invalid UTF-8 sequences in strings], [parse-atoms '[["string"]]' \ '@<:@"m4_esyscmd([printf "\300"])"@:>@' \ '@<:@"m4_esyscmd([printf "\355\240\200"])"@:>@' \ ], [constraint violation: not a valid UTF-8 string: invalid UTF-8 sequence 0xc0 constraint violation: not a valid UTF-8 string: invalid UTF-8 sequence 0xed 0xa0]) dnl Python won't let invalid UTF-8 (its idea of invalid UTF-8, anyway) into it dnl at all, so this test never gets as far as a constraint violation. It's dnl just a JSON parse error. dnl dnl is not allowed anywhere in a UTF-8 string. dnl ( is not allowed in UTF-8 but Python doesn't care.) dnl is not allowed in UTF-8. OVSDB_CHECK_POSITIVE_PY([no invalid UTF-8 sequences in strings - Python], [parse-atoms '[["string"]]' \ '@<:@"m4_esyscmd([printf "\300"])"@:>@' \ '@<:@"m4_esyscmd([printf "\355\200\177"])"@:>@' \ ], ["not a valid UTF-8 string: invalid UTF-8 sequence 0xc0" "not a valid UTF-8 string: invalid UTF-8 sequence 0xed 0x80"]) OVSDB_CHECK_NEGATIVE([real not acceptable integer string atom], [[parse-atom-strings '["integer"]' '0.5' ]], ["0.5" is not a valid integer]) OVSDB_CHECK_POSITIVE_CPY([string "true" not acceptable boolean JSON atom], [[parse-atoms '["boolean"]' '["true"]' ]], [syntax ""true"": syntax error: expected boolean]) OVSDB_CHECK_NEGATIVE([string "true" not acceptable boolean string atom], [[parse-atom-strings '["boolean"]' '"true"' ]], [""true"" is not a valid boolean (use "true" or "false")]) OVSDB_CHECK_POSITIVE_CPY([integer not acceptable string JSON atom], [[parse-atoms '["string"]' '[1]']], [syntax "1": syntax error: expected string]) OVSDB_CHECK_POSITIVE_CPY([uuid atom must be expressed as JSON array], [[parse-atoms '["uuid"]' '["550e8400-e29b-41d4-a716-446655440000"]']], [[syntax ""550e8400-e29b-41d4-a716-446655440000"": syntax error: expected ["uuid", ]]]) OVSDB_CHECK_POSITIVE_CPY([named-uuid requires symbol table], [parse-atoms '[["uuid"]]' '[["named-uuid", "x"]]'], [[syntax "["named-uuid","x"]": syntax error: expected ["uuid", ]]]) OVSDB_CHECK_NEGATIVE([empty string atom must be quoted], [[parse-atom-strings '["string"]' '']], [An empty string is not valid as input; use "" to represent the empty string]) OVSDB_CHECK_NEGATIVE([quotes must be balanced], [parse-atom-strings '[["string"]]' '"asdf'], ["asdf: missing quote at end of quoted string]) OVSDB_CHECK_NEGATIVE([uuids must be valid], [parse-atom-strings '[["uuid"]]' '1234-5678'], ["1234-5678" is not a valid UUID]) AT_BANNER([OVSDB -- atoms with enum constraints]) OVSDB_CHECK_POSITIVE_CPY([integer atom enum], [[parse-atoms '[{"type": "integer", "enum": ["set", [1, 6, 8, 10]]}]' \ '[0]' \ '[1]' \ '[2]' \ '[3]' \ '[6]' \ '[7]' \ '[8]' \ '[9]' \ '[10]' \ '[11]']], [[constraint violation: 0 is not one of the allowed values ([1, 6, 8, 10]) 1 constraint violation: 2 is not one of the allowed values ([1, 6, 8, 10]) constraint violation: 3 is not one of the allowed values ([1, 6, 8, 10]) 6 constraint violation: 7 is not one of the allowed values ([1, 6, 8, 10]) 8 constraint violation: 9 is not one of the allowed values ([1, 6, 8, 10]) 10 constraint violation: 11 is not one of the allowed values ([1, 6, 8, 10])]]) OVSDB_CHECK_POSITIVE_CPY([real atom enum], [[parse-atoms '[{"type": "real", "enum": ["set", [-1.5, 1.5]]}]' \ '[-2]' \ '[-1]' \ '[-1.5]' \ '[0]' \ '[1]' \ '[1.5]' \ '[2]']], [[constraint violation: -2 is not one of the allowed values ([-1.5, 1.5]) constraint violation: -1 is not one of the allowed values ([-1.5, 1.5]) -1.5 constraint violation: 0 is not one of the allowed values ([-1.5, 1.5]) constraint violation: 1 is not one of the allowed values ([-1.5, 1.5]) 1.5 constraint violation: 2 is not one of the allowed values ([-1.5, 1.5])]]) OVSDB_CHECK_POSITIVE_CPY([boolean atom enum], [[parse-atoms '[{"type": "boolean", "enum": false}]' \ '[false]' \ '[true]']], [[false constraint violation: true is not one of the allowed values ([false])]]) OVSDB_CHECK_POSITIVE_CPY([string atom enum], [[parse-atoms '[{"type": "string", "enum": ["set", ["abc", "def"]]}]' \ '[""]' \ '["ab"]' \ '["abc"]' \ '["def"]' \ '["defg"]' \ '["DEF"]']], [[constraint violation: "" is not one of the allowed values ([abc, def]) constraint violation: ab is not one of the allowed values ([abc, def]) "abc" "def" constraint violation: defg is not one of the allowed values ([abc, def]) constraint violation: DEF is not one of the allowed values ([abc, def])]]) OVSDB_CHECK_POSITIVE_CPY([uuid atom enum], [[parse-atoms '[{"type": "uuid", "enum": ["set", [["uuid", "6d53a6dd-2da7-4924-9927-97f613812382"], ["uuid", "52cbc842-137a-4db5-804f-9f34106a0ba3"]]]}]' \ '["uuid", "6d53a6dd-2da7-4924-9927-97f613812382"]' \ '["uuid", "52cbc842-137a-4db5-804f-9f34106a0ba3"]' \ '["uuid", "dab2a6b2-6094-4f43-a7ef-4c0f0608f176"]']], [[["uuid","6d53a6dd-2da7-4924-9927-97f613812382"] ["uuid","52cbc842-137a-4db5-804f-9f34106a0ba3"] constraint violation: dab2a6b2-6094-4f43-a7ef-4c0f0608f176 is not one of the allowed values ([52cbc842-137a-4db5-804f-9f34106a0ba3, 6d53a6dd-2da7-4924-9927-97f613812382])]]) AT_BANNER([OVSDB -- atoms with other constraints]) OVSDB_CHECK_POSITIVE_CPY([integers >= 5], [[parse-atoms '[{"type": "integer", "minInteger": 5}]' \ '[0]' \ '[4]' \ '[5]' \ '[6]' \ '[12345]']], [constraint violation: 0 is less than minimum allowed value 5 constraint violation: 4 is less than minimum allowed value 5 5 6 12345]) OVSDB_CHECK_POSITIVE_CPY([integers <= -1], [[parse-atoms '[{"type": "integer", "maxInteger": -1}]' \ '[0]' \ '[-1]' \ '[-2]' \ '[-123]']], [constraint violation: 0 is greater than maximum allowed value -1 -1 -2 -123]) OVSDB_CHECK_POSITIVE_CPY([integers in range -10 to 10], [[parse-atoms '[{"type": "integer", "minInteger": -10, "maxInteger": 10}]' \ '[-20]' \ '[-11]' \ '[-10]' \ '[-9]' \ '[1]' \ '[9]' \ '[10]' \ '[11]' \ '[123576]']], [constraint violation: -20 is not in the valid range -10 to 10 (inclusive) constraint violation: -11 is not in the valid range -10 to 10 (inclusive) -10 -9 1 9 10 constraint violation: 11 is not in the valid range -10 to 10 (inclusive) constraint violation: 123576 is not in the valid range -10 to 10 (inclusive)]) OVSDB_CHECK_POSITIVE_CPY([reals >= 5], [[parse-atoms '[{"type": "real", "minReal": 5}]' \ '[0]' \ '[4]' \ '[5]' \ '[6]' \ '[12345]']], [constraint violation: 0 is less than minimum allowed value 5 constraint violation: 4 is less than minimum allowed value 5 5 6 12345]) OVSDB_CHECK_POSITIVE_CPY([reals <= -1], [[parse-atoms '[{"type": "real", "maxReal": -1}]' \ '[0]' \ '[-1]' \ '[-2]' \ '[-123]']], [constraint violation: 0 is greater than maximum allowed value -1 -1 -2 -123]) OVSDB_CHECK_POSITIVE_CPY([reals in range -10 to 10], [[parse-atoms '[{"type": "real", "minReal": -10, "maxReal": 10}]' \ '[-20]' \ '[-11]' \ '[-10]' \ '[-9]' \ '[1]' \ '[9]' \ '[10]' \ '[11]' \ '[123576]']], [constraint violation: -20 is not in the valid range -10 to 10 (inclusive) constraint violation: -11 is not in the valid range -10 to 10 (inclusive) -10 -9 1 9 10 constraint violation: 11 is not in the valid range -10 to 10 (inclusive) constraint violation: 123576 is not in the valid range -10 to 10 (inclusive)]) OVSDB_CHECK_POSITIVE_CPY([strings at least 2 characters long], [[parse-atoms '{"type": "string", "minLength": 2}' \ '[""]' \ '["a"]' \ '["ab"]' \ '["abc"]' \ '["\ud834\udd1e"]']], [[constraint violation: "" length 0 is less than minimum allowed length 2 constraint violation: "a" length 1 is less than minimum allowed length 2 "ab" "abc" constraint violation: "𝄞" length 1 is less than minimum allowed length 2]], [], [], [dnl This test requires a wide build of Python. AT_CHECK([$PYTHON -c 'unichr(0x10000)' || exit 77], [0], [ignore], [ignore])]) OVSDB_CHECK_POSITIVE_CPY([strings no more than 2 characters long], [[parse-atoms '{"type": "string", "maxLength": 2}' \ '[""]' \ '["a"]' \ '["ab"]' \ '["abc"]' \ '["de"]']], [["" "a" "ab" constraint violation: "abc" length 3 is greater than maximum allowed length 2 "de"]]) AT_BANNER([OSVDB -- simple data]) OVSDB_CHECK_POSITIVE_CPY([integer JSON datum], [[parse-data '["integer"]' '[0]' '["set",[1]]' '[-1]']], [0 1 -1]) OVSDB_CHECK_POSITIVE([integer string datum], [[parse-data-strings -- '["integer"]' '0' '1' '-1' '+1']], [0 1 -1 1]) OVSDB_CHECK_POSITIVE_CPY([real JSON datum], [[parse-data '["real"]' '[0]' '["set",[1.0]]' '[-1.25]']], [0 1 -1.25]) OVSDB_CHECK_POSITIVE([real string datum], [[parse-data-strings -- '["real"]' '0' '1.0' '-1.25']], [0 1 -1.25]) OVSDB_CHECK_POSITIVE_CPY([boolean JSON datum], [[parse-data '["boolean"]' '["set", [true]]' '[false]' ]], [true false]) OVSDB_CHECK_POSITIVE([boolean string datum], [[parse-data-strings '["boolean"]' 'true' 'false' ]], [true false]) OVSDB_CHECK_POSITIVE_CPY([string JSON datum], [[parse-data '["string"]' '["set",[""]]' '["true"]' '["\"\\\/\b\f\n\r\t"]']], ["" "true" "\"\\/\b\f\n\r\t"]) OVSDB_CHECK_POSITIVE([string string datum], [[parse-data-strings '["string"]' '"x"' '""' '"true"' '"\"\\\/\b\f\n\r\t"']], [x "" "true" "\"\\/\b\f\n\r\t"]) AT_BANNER([OVSDB -- set data]) OVSDB_CHECK_POSITIVE_CPY([JSON optional boolean], [[parse-data '{"key": "boolean", "min": 0}' \ '[true]' \ '["set", [false]]' \ '["set", []]']], [[true false ["set",[]]]], [set]) OVSDB_CHECK_POSITIVE([string optional boolean], [[parse-data-strings '{"key": "boolean", "min": 0}' \ 'true' \ 'false' \ '[]']], [[true false []]], [set]) OVSDB_CHECK_POSITIVE_CPY([JSON set of 0 or more integers], [[parse-data '{"key": "integer", "min": 0, "max": "unlimited"}' \ '["set", [0]]' \ '[1]' \ '["set", [0, 1]]' \ '["set", [0, 1, 2]]' \ '["set", [0, 1, 2, 3, 4, 5]]' \ '["set", [0, 1, 2, 3, 4, 5, 6, 7, 8]]' \ '["set", [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]']], [[0 1 ["set",[0,1]] ["set",[0,1,2]] ["set",[0,1,2,3,4,5]] ["set",[0,1,2,3,4,5,6,7,8]] ["set",[0,1,2,3,4,5,6,7,8,9,10]]]]) OVSDB_CHECK_POSITIVE([string set of 0 or more integers], [[parse-data-strings '{"key": "integer", "min": 0, "max": "unlimited"}' \ '0' \ '0,1' \ '0, 1, 2' \ '[0, 1,2, 3, 4, 5]' \ '0, 1,2, 3,4, 5, 6, 7, 8' \ '[0, 1, 2, 3, 4,5, 6,7, 8, 9, 10]']], [[[0] [0, 1] [0, 1, 2] [0, 1, 2, 3, 4, 5] [0, 1, 2, 3, 4, 5, 6, 7, 8] [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]]) OVSDB_CHECK_POSITIVE_CPY([JSON set of 1 to 3 uuids], [[parse-data '{"key": "uuid", "min": 1, "max": 3}' \ '["set", [["uuid", "550e8400-e29b-41d4-a716-446655440000"]]]' \ '["uuid", "b5078be0-7664-4299-b836-8bcc03ef941f"]' \ '["set", [["uuid", "c5051240-30ff-43ed-b4b9-93cf3f050813"], ["uuid", "90558331-09af-4d2f-a572-509cad2e9088"], ["uuid", "550e8400-e29b-41d4-a716-446655440000"]]]']], [[["uuid","550e8400-e29b-41d4-a716-446655440000"] ["uuid","b5078be0-7664-4299-b836-8bcc03ef941f"] ["set",[["uuid","550e8400-e29b-41d4-a716-446655440000"],["uuid","90558331-09af-4d2f-a572-509cad2e9088"],["uuid","c5051240-30ff-43ed-b4b9-93cf3f050813"]]]]]) OVSDB_CHECK_POSITIVE([string set of 1 to 3 uuids], [[parse-data-strings '{"key": "uuid", "min": 1, "max": 3}' \ '[550e8400-e29b-41d4-a716-446655440000]' \ '[c5051240-30ff-43ed-b4b9-93cf3f050813, 90558331-09af-4d2f-a572-509cad2e9088, 550e8400-e29b-41d4-a716-446655440000]']], [[[550e8400-e29b-41d4-a716-446655440000] [550e8400-e29b-41d4-a716-446655440000, 90558331-09af-4d2f-a572-509cad2e9088, c5051240-30ff-43ed-b4b9-93cf3f050813]]]) OVSDB_CHECK_POSITIVE_CPY([JSON set of 0 to 3 strings], [[parse-data '{"key": "string", "min": 0, "max": 3}' \ '["set", []]' \ '["a longer string"]' \ '["set", ["a relatively long string"]]' \ '["set", ["short string", "a relatively long string"]]' \ '["set", ["zzz", "short string", "a relatively long string"]]']], [[["set",[]] "a longer string" "a relatively long string" ["set",["a relatively long string","short string"]] ["set",["a relatively long string","short string","zzz"]]]]) OVSDB_CHECK_POSITIVE([string set of 0 to 3 strings], [[parse-data-strings '{"key": "string", "min": 0, "max": 3}' \ '[]' \ '"a relatively long string"' \ '["short string", "a relatively long string"]' \ '"zzz","short string","a relatively long string"']], [[[] ["a relatively long string"] ["a relatively long string", "short string"] ["a relatively long string", "short string", zzz]]]) OVSDB_CHECK_NEGATIVE_CPY([duplicate boolean not allowed in JSON set], [[parse-data '{"key": "boolean", "max": 5}' '["set", [true, true]]']], [ovsdb error: set contains duplicate]) OVSDB_CHECK_NEGATIVE([duplicate boolean not allowed in string set], [[parse-data-strings '{"key": "boolean", "max": 5}' 'true, true']], [set contains duplicate value]) OVSDB_CHECK_NEGATIVE_CPY([duplicate integer not allowed in JSON set], [[parse-data '{"key": "integer", "max": 5}' '["set", [1, 2, 3, 1]]']], [ovsdb error: set contains duplicate]) OVSDB_CHECK_NEGATIVE([duplicate integer not allowed in string set], [[parse-data-strings '{"key": "integer", "max": 5}' '[1, 2, 3, 1]']], [set contains duplicate value]) OVSDB_CHECK_NEGATIVE_CPY([duplicate real not allowed in JSON set], [[parse-data '{"key": "real", "max": 5}' '["set", [0.0, -0.0]]']], [ovsdb error: set contains duplicate]) OVSDB_CHECK_NEGATIVE([duplicate real not allowed in string set], [[parse-data-strings '{"key": "real", "max": 5}' '0.0, -0.0']], [set contains duplicate value]) OVSDB_CHECK_NEGATIVE_CPY([duplicate string not allowed in JSON set], [[parse-data '{"key": "string", "max": 5}' '["set", ["asdf", "ASDF", "asdf"]]']], [ovsdb error: set contains duplicate]) OVSDB_CHECK_NEGATIVE([duplicate string not allowed in string set], [[parse-data-strings '{"key": "string", "max": 5}' 'asdf, ASDF, "asdf"']], [set contains duplicate value]) OVSDB_CHECK_NEGATIVE_CPY([duplicate uuid not allowed in JSON set], [[parse-data '{"key": "uuid", "max": 5}' \ '["set", [["uuid", "7ef21525-0088-4a28-a418-5518413e43ea"], ["uuid", "355ad037-f1da-40aa-b47c-ff9c7e8c6a38"], ["uuid", "7ef21525-0088-4a28-a418-5518413e43ea"]]]']], [ovsdb error: set contains duplicate]) OVSDB_CHECK_NEGATIVE([duplicate uuid not allowed in string set], [[parse-data-strings '{"key": "uuid", "max": 5}' \ '7ef21525-0088-4a28-a418-5518413e43ea, 355ad037-f1da-40aa-b47c-ff9c7e8c6a38, 7ef21525-0088-4a28-a418-5518413e43ea']], [set contains duplicate value]) AT_BANNER([OVSDB -- map data]) OVSDB_CHECK_POSITIVE_CPY([JSON map of 1 integer to boolean], [[parse-data '{"key": "integer", "value": "boolean"}' \ '["map", [[1, true]]]']], [[["map",[[1,true]]]]]) OVSDB_CHECK_POSITIVE([string map of 1 integer to boolean], [[parse-data-strings '{"key": "integer", "value": "boolean"}' \ '1=true']], [[1=true]]) OVSDB_CHECK_POSITIVE_CPY([JSON map of at least 1 integer to boolean], [[parse-data '{"key": "integer", "value": "boolean", "max": "unlimited"}' \ '["map", [[1, true]]]' \ '["map", [[0, true], [1, false], [2, true], [3, true], [4, true]]]' \ '["map", [[3, false], [0, true], [4, false]]]']], [[["map",[[1,true]]] ["map",[[0,true],[1,false],[2,true],[3,true],[4,true]]] ["map",[[0,true],[3,false],[4,false]]]]]) OVSDB_CHECK_POSITIVE([string map of at least 1 integer to boolean], [[parse-data-strings '{"key": "integer", "value": "boolean", "max": "unlimited"}' \ '1=true' \ '0=true 1=false 2=true, 3=true 4=true,' \ '3=false,0=true ,4=false']], [[{1=true} {0=true, 1=false, 2=true, 3=true, 4=true} {0=true, 3=false, 4=false}]]) OVSDB_CHECK_POSITIVE_CPY([JSON map of 1 boolean to integer], [[parse-data '{"key": "boolean", "value": "integer"}' \ '["map", [[true, 1]]]']], [[["map",[[true,1]]]]]) OVSDB_CHECK_POSITIVE([string map of 1 boolean to integer], [[parse-data-strings '{"key": "boolean", "value": "integer"}' \ 'true=1']], [[true=1]]) OVSDB_CHECK_POSITIVE_CPY([JSON map of 1 uuid to real], [[parse-data '{"key": "uuid", "value": "real", "min": 1, "max": 5}' \ '["map", [[["uuid", "cad8542b-6ee1-486b-971b-7dcbf6e14979"], 1.0], [["uuid", "6b94b968-2702-4f64-9457-314a34d69b8c"], 2.0], [["uuid", "d2c4a168-24de-47eb-a8a3-c1abfc814979"], 3.0], [["uuid", "25bfa475-d072-4f60-8be1-00f48643e9cb"], 4.0], [["uuid", "1c92b8ca-d5e4-4628-a85d-1dc2d099a99a"], 5.0]]]']], [[["map",[[["uuid","1c92b8ca-d5e4-4628-a85d-1dc2d099a99a"],5],[["uuid","25bfa475-d072-4f60-8be1-00f48643e9cb"],4],[["uuid","6b94b968-2702-4f64-9457-314a34d69b8c"],2],[["uuid","cad8542b-6ee1-486b-971b-7dcbf6e14979"],1],[["uuid","d2c4a168-24de-47eb-a8a3-c1abfc814979"],3]]]]]) OVSDB_CHECK_POSITIVE([string map of 1 uuid to real], [[parse-data-strings '{"key": "uuid", "value": "real", "min": 1, "max": 5}' \ 'cad8542b-6ee1-486b-971b-7dcbf6e14979=1.0, 6b94b968-2702-4f64-9457-314a34d69b8c=2.0, d2c4a168-24de-47eb-a8a3-c1abfc814979=3.0, 25bfa475-d072-4f60-8be1-00f48643e9cb=4.0, 1c92b8ca-d5e4-4628-a85d-1dc2d099a99a=5.0']], [[{1c92b8ca-d5e4-4628-a85d-1dc2d099a99a=5, 25bfa475-d072-4f60-8be1-00f48643e9cb=4, 6b94b968-2702-4f64-9457-314a34d69b8c=2, cad8542b-6ee1-486b-971b-7dcbf6e14979=1, d2c4a168-24de-47eb-a8a3-c1abfc814979=3}]]) OVSDB_CHECK_POSITIVE_CPY([JSON map of 10 string to string], [[parse-data '{"key": "string", "value": "string", "min": 1, "max": 10}' \ '["map", [["2 gills", "1 chopin"], ["2 chopins", "1 pint"], ["2 pints", "1 quart"], ["2 quarts", "1 pottle"], ["2 pottles", "1 gallon"], ["2 gallons", "1 peck"], ["2 pecks", "1 demibushel"], ["2 demibushel", "1 firkin"], ["2 firkins", "1 kilderkin"], ["2 kilderkins", "1 barrel"]]]']], [[["map",[["2 chopins","1 pint"],["2 demibushel","1 firkin"],["2 firkins","1 kilderkin"],["2 gallons","1 peck"],["2 gills","1 chopin"],["2 kilderkins","1 barrel"],["2 pecks","1 demibushel"],["2 pints","1 quart"],["2 pottles","1 gallon"],["2 quarts","1 pottle"]]]]]) OVSDB_CHECK_POSITIVE([string map of 10 string to string], [[parse-data-strings '{"key": "string", "value": "string", "min": 1, "max": 10}' \ '{"2 gills"="1 chopin", "2 chopins"= "1 pint", "2 pints"= "1 quart", "2 quarts"= "1 pottle", "2 pottles"= "1 gallon", "2 gallons"= "1 peck", "2 pecks"= "1 demibushel", "2 demibushel"= "1 firkin", "2 firkins"= "1 kilderkin", "2 kilderkins"= "1 barrel"}']], [[{"2 chopins"="1 pint", "2 demibushel"="1 firkin", "2 firkins"="1 kilderkin", "2 gallons"="1 peck", "2 gills"="1 chopin", "2 kilderkins"="1 barrel", "2 pecks"="1 demibushel", "2 pints"="1 quart", "2 pottles"="1 gallon", "2 quarts"="1 pottle"}]]) OVSDB_CHECK_NEGATIVE_CPY([duplicate integer key not allowed in JSON map], [[parse-data '{"key": "integer", "value": "boolean", "max": 5}' \ '["map", [[1, true], [2, false], [1, false]]]']], [ovsdb error: map contains duplicate key]) OVSDB_CHECK_NEGATIVE([duplicate integer key not allowed in string map], [[parse-data-strings '{"key": "integer", "value": "boolean", "max": 5}' \ '1=true 2=false 1=false']], [map contains duplicate key]) openvswitch-2.0.1+git20140120/tests/ovsdb-execution.at000066400000000000000000001136411226605124000222660ustar00rootroot00000000000000AT_BANNER([OVSDB -- execution]) m4_divert_push([PREPARE_TESTS]) [ ordinal_schema () { cat <<'EOF' {"name": "ordinals", "tables": { "ordinals": { "columns": { "number": {"type": "integer"}, "name": {"type": "string"}}, "indexes": [["number"]]}}, "version": "5.1.3", "cksum": "12345678 9"} EOF } constraint_schema () { cat << 'EOF' {"name": "constraints", "tables": { "a": { "columns": { "a": {"type": "integer"}, "a2a": {"type": {"key": {"type": "uuid", "refTable": "a"}, "min": 0, "max": "unlimited"}}, "a2b": {"type": {"key": {"type": "uuid", "refTable": "b"}, "min": 0, "max": "unlimited"}}}}, "b": { "columns": { "b": {"type": "integer"}, "b2a": {"type": {"key": {"type": "uuid", "refTable": "a"}, "min": 0, "max": "unlimited"}}, "b2b": {"type": {"key": {"type": "uuid", "refTable": "b"}, "min": 0, "max": "unlimited"}}, "x": {"type": {"key": "integer", "min": 1, "max": 2}}}}, "constrained": { "columns": { "positive": {"type": {"key": {"type": "integer", "minInteger": 1}}}}, "maxRows": 1}}} EOF } weak_schema () { cat <<'EOF' {"name": "weak", "tables": { "a": { "columns": { "a": {"type": "integer"}, "a2a": {"type": {"key": {"type": "uuid", "refTable": "a", "refType": "weak"}, "min": 0, "max": "unlimited"}}, "a2a1": {"type": {"key": {"type": "uuid", "refTable": "a", "refType": "weak"}}}, "a2b": {"type": {"key": {"type": "uuid", "refTable": "b", "refType": "weak"}}}}}, "b": { "columns": { "b": {"type": "integer"}, "b2a": {"type": {"key": {"type": "uuid", "refTable": "a", "refType": "weak"}, "min": 0, "max": "unlimited"}}}}}} EOF } gc_schema () { cat <<'EOF' {"name": "gc", "tables": { "root": { "columns": { "a": {"type": {"key": {"type": "uuid", "refTable": "a"}, "min": 0, "max": "unlimited"}}}, "isRoot": true}, "a": { "columns": { "a": {"type": "integer"}, "a2a": {"type": {"key": {"type": "uuid", "refTable": "a"}, "min": 0, "max": "unlimited"}}, "a2b": {"type": {"key": {"type": "uuid", "refTable": "b"}, "min": 0, "max": "unlimited"}}, "wa2a": {"type": {"key": {"type": "uuid", "refTable": "a", "refType": "weak"}, "min": 0, "max": "unlimited"}}, "wa2b": {"type": {"key": {"type": "uuid", "refTable": "b", "refType": "weak"}, "min": 0, "max": "unlimited"}}}}, "b": { "columns": { "b": {"type": "integer"}, "b2a": {"type": {"key": {"type": "uuid", "refTable": "a"}, "min": 0, "max": "unlimited"}}, "wb2a": {"type": {"key": {"type": "uuid", "refTable": "a", "refType": "weak"}, "min": 0, "max": "unlimited"}}}, "isRoot": false}}} EOF } immutable_schema () { cat <<'EOF' {"name": "immutable", "tables": { "a": { "columns": {"i": {"type": "integer", "mutable": false}}}}} EOF } ] m4_divert_pop([PREPARE_TESTS]) # OVSDB_CHECK_EXECUTION(TITLE, SCHEMA, TRANSACTIONS, OUTPUT, [KEYWORDS]) # # Runs "test-ovsdb execute" with the given SCHEMA and each of the # TRANSACTIONS (which should be a quoted list of quoted strings). # # Checks that the overall output is OUTPUT, but UUIDs in the output # are replaced by markers of the form where N is a number. The # first unique UUID is replaced by <0>, the next by <1>, and so on. # If a given UUID appears more than once it is always replaced by the # same marker. # # TITLE is provided to AT_SETUP and KEYWORDS to AT_KEYWORDS. m4_define([OVSDB_CHECK_EXECUTION], [AT_SETUP([$1]) AT_KEYWORDS([ovsdb execute execution positive $5]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR AT_CHECK([test-ovsdb execute "`$2`" m4_foreach([txn], [$3], [ 'txn'])], [0], [stdout], []) AT_CHECK([${PERL} $srcdir/uuidfilt.pl stdout], [0], [$4]) AT_CLEANUP]) OVSDB_CHECK_EXECUTION([uuid-name must be ], [constraint_schema], [[[["constraints", {"op": "insert", "table": "a", "row": {}, "uuid-name": "0"}]]]], [[[{"details":"Parsing ovsdb operation 1 of 1 failed: Type mismatch for member 'uuid-name'.","error":"syntax error","syntax":"{\"op\":\"insert\",\"row\":{},\"table\":\"a\",\"uuid-name\":\"0\"}"}] ]]) OVSDB_CHECK_EXECUTION([named-uuid must be ], [constraint_schema], [[[["constraints", {"op": "insert", "table": "a", "row": {"a2a": ["named-uuid", "0"]}}]]]], [[[{"details":"named-uuid string is not a valid ","error":"syntax error","syntax":"[\"named-uuid\",\"0\"]"}] ]]) OVSDB_CHECK_EXECUTION([duplicate uuid-name not allowed], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {}, "uuid-name": "x"}, {"op": "insert", "table": "ordinals", "row": {}, "uuid-name": "x"}]]]], [[[{"uuid":["uuid","<0>"]},{"details":"This \"uuid-name\" appeared on an earlier \"insert\" operation.","error":"duplicate uuid-name","syntax":"\"x\""}] ]]) m4_define([EXECUTION_EXAMPLES], [ dnl At one point the "commit" code ignored new rows with all-default values, dnl so this checks for that problem. OVSDB_CHECK_EXECUTION([insert default row, query table], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {}}]]], [[["ordinals", {"op": "select", "table": "ordinals", "where": []}]]]], [[[{"uuid":["uuid","<0>"]}] [{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<1>"],"name":"","number":0}]}] ]]) OVSDB_CHECK_EXECUTION([insert row, query table], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}]]], [[["ordinals", {"op": "select", "table": "ordinals", "where": []}]]]], [[[{"uuid":["uuid","<0>"]}] [{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<1>"],"name":"zero","number":0}]}] ]]) OVSDB_CHECK_EXECUTION([insert rows, query by value], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}]]], [[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}}]]], [[["ordinals", {"op": "select", "table": "ordinals", "where": [["name", "==", "zero"]]}]]], [[["ordinals", {"op": "select", "table": "ordinals", "where": [["name", "==", "one"]]}]]]], [[[{"uuid":["uuid","<0>"]}] [{"uuid":["uuid","<1>"]}] [{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<2>"],"name":"zero","number":0}]}] [{"rows":[{"_uuid":["uuid","<1>"],"_version":["uuid","<3>"],"name":"one","number":1}]}] ]]) OVSDB_CHECK_EXECUTION([insert rows, query by named-uuid], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}, "uuid-name": "first"}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}, "uuid-name": "second"}, {"op": "select", "table": "ordinals", "where": [["_uuid", "==", ["named-uuid", "first"]]]}, {"op": "select", "table": "ordinals", "where": [["_uuid", "==", ["named-uuid", "second"]]]}]]]], [[[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]},{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<2>"],"name":"zero","number":0}]},{"rows":[{"_uuid":["uuid","<1>"],"_version":["uuid","<3>"],"name":"one","number":1}]}] ]]) OVSDB_CHECK_EXECUTION([insert rows, update rows by value], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}, "uuid-name": "first"}]]], [[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}, "uuid-name": "first"}]]], [[["ordinals", {"op": "update", "table": "ordinals", "where": [["name", "==", "zero"]], "row": {"name": "nought"}}]]], [[["ordinals", {"op": "select", "table": "ordinals", "where": [], "sort": ["number"]}]]]], [[[{"uuid":["uuid","<0>"]}] [{"uuid":["uuid","<1>"]}] [{"count":1}] [{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<2>"],"name":"nought","number":0},{"_uuid":["uuid","<1>"],"_version":["uuid","<3>"],"name":"one","number":1}]}] ]]) OVSDB_CHECK_EXECUTION([insert rows, mutate rows], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}, "uuid-name": "first"}]]], [[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}, "uuid-name": "first"}]]], [[["ordinals", {"op": "mutate", "table": "ordinals", "where": [["name", "==", "zero"]], "mutations": [["number", "+=", 2]]}]]], [[["ordinals", {"op": "select", "table": "ordinals", "where": [], "sort": ["number"]}]]]], [[[{"uuid":["uuid","<0>"]}] [{"uuid":["uuid","<1>"]}] [{"count":1}] [{"rows":[{"_uuid":["uuid","<1>"],"_version":["uuid","<2>"],"name":"one","number":1},{"_uuid":["uuid","<0>"],"_version":["uuid","<3>"],"name":"zero","number":2}]}] ]]) OVSDB_CHECK_EXECUTION([insert rows, delete by named-uuid], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}, "uuid-name": "first"}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}, "uuid-name": "second"}, {"op": "delete", "table": "ordinals", "where": [["_uuid", "==", ["named-uuid", "first"]]]}, {"op": "select", "table": "ordinals", "where": [], "columns": ["name","number"]}]]]], [[[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]},{"count":1},{"rows":[{"name":"one","number":1}]}] ]]) OVSDB_CHECK_EXECUTION([insert rows, delete rows by value], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}, "uuid-name": "first"}]]], [[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}, "uuid-name": "first"}]]], [[["ordinals", {"op": "delete", "table": "ordinals", "where": [["name", "==", "zero"]]}]]], [[["ordinals", {"op": "select", "table": "ordinals", "where": []}]]]], [[[{"uuid":["uuid","<0>"]}] [{"uuid":["uuid","<1>"]}] [{"count":1}] [{"rows":[{"_uuid":["uuid","<1>"],"_version":["uuid","<2>"],"name":"one","number":1}]}] ]]) OVSDB_CHECK_EXECUTION([insert rows, delete by (non-matching) value], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}, "uuid-name": "first"}]]], [[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}, "uuid-name": "first"}]]], [[["ordinals", {"op": "delete", "table": "ordinals", "where": [["name", "==", "nought"]]}]]], [[["ordinals", {"op": "select", "table": "ordinals", "where": [], "sort": ["number"]}]]]], [[[{"uuid":["uuid","<0>"]}] [{"uuid":["uuid","<1>"]}] [{"count":0}] [{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<2>"],"name":"zero","number":0},{"_uuid":["uuid","<1>"],"_version":["uuid","<3>"],"name":"one","number":1}]}] ]]) OVSDB_CHECK_EXECUTION([insert rows, delete all], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}, "uuid-name": "first"}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}, "uuid-name": "second"}, {"op": "delete", "table": "ordinals", "where": []}, {"op": "select", "table": "ordinals", "where": [], "columns": ["name","number"]}]]]], [[[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]},{"count":2},{"rows":[]}] ]]) OVSDB_CHECK_EXECUTION([insert row, query table, commit], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}, {"op": "select", "table": "ordinals", "where": []}, {"op": "commit", "durable": false}]]]], [[[{"uuid":["uuid","<0>"]},{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<1>"],"name":"zero","number":0}]},{}] ]]) OVSDB_CHECK_EXECUTION([insert row, query table, commit durably], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}, {"op": "select", "table": "ordinals", "where": []}, {"op": "commit", "durable": true}]]]], [[[{"uuid":["uuid","<0>"]},{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<1>"],"name":"zero","number":0}]},{}] ]]) OVSDB_CHECK_EXECUTION([equality wait with correct rows], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}}, {"op": "wait", "timeout": 0, "table": "ordinals", "where": [], "columns": ["name", "number"], "until": "==", "rows": [{"name": "zero", "number": 0}, {"name": "one", "number": 1}]}]]]], [[[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]},{}] ]]) OVSDB_CHECK_EXECUTION([equality wait with extra row], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}}, {"op": "wait", "timeout": 0, "table": "ordinals", "where": [], "columns": ["name", "number"], "until": "==", "rows": [{"name": "zero", "number": 0}, {"name": "one", "number": 1}, {"name": "two", "number": 2}]}]]]], [[[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]},{"details":"\"wait\" timed out","error":"timed out"}] ]]) OVSDB_CHECK_EXECUTION([equality wait with missing row], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}}, {"op": "wait", "timeout": 0, "table": "ordinals", "where": [], "columns": ["name", "number"], "until": "==", "rows": [{"name": "one", "number": 1}]}]]]], [[[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]},{"details":"\"wait\" timed out","error":"timed out"}] ]]) OVSDB_CHECK_EXECUTION([inequality wait with correct rows], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}}, {"op": "wait", "timeout": 0, "table": "ordinals", "where": [], "columns": ["name", "number"], "until": "!=", "rows": [{"name": "zero", "number": 0}, {"name": "one", "number": 1}]}]]]], [[[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]},{"details":"\"wait\" timed out","error":"timed out"}] ]]) OVSDB_CHECK_EXECUTION([inequality wait with extra row], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}}, {"op": "wait", "timeout": 0, "table": "ordinals", "where": [], "columns": ["name", "number"], "until": "!=", "rows": [{"name": "zero", "number": 0}, {"name": "one", "number": 1}, {"name": "two", "number": 2}]}]]]], [[[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]},{}] ]]) OVSDB_CHECK_EXECUTION([inequality wait with missing row], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}}, {"op": "wait", "timeout": 0, "table": "ordinals", "where": [], "columns": ["name", "number"], "until": "!=", "rows": [{"name": "one", "number": 1}]}]]]], [[[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]},{}] ]]) OVSDB_CHECK_EXECUTION([insert and update constraints], [constraint_schema], [[[["constraints", {"op": "insert", "table": "constrained", "row": {}}]]], [[["constraints", {"op": "insert", "table": "constrained", "row": {"positive": -1}}]]], [[["constraints", {"op": "update", "table": "constrained", "where": [], "row": {"positive": -2}}]]], [[["constraints", {"op": "insert", "table": "constrained", "row": {"positive": 1}}]]], [[["constraints", {"op": "insert", "table": "constrained", "row": {"positive": 2}}]]]], [[[{"details":"0 is less than minimum allowed value 1","error":"constraint violation"}] [{"details":"-1 is less than minimum allowed value 1","error":"constraint violation"}] [{"details":"-2 is less than minimum allowed value 1","error":"constraint violation"}] [{"uuid":["uuid","<0>"]}] [{"uuid":["uuid","<1>"]},{"details":"transaction causes \"constrained\" table to contain 2 rows, greater than the schema-defined limit of 1 row(s)","error":"constraint violation"}] ]]) OVSDB_CHECK_EXECUTION([index uniqueness checking], [ordinal_schema], dnl Insert initial row. [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}}]]], dnl Try to insert row with identical value (fails). [[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "another one"}}]]], dnl Remove initial row and insert new row with identical value in a single dnl transaction (succeeds). [[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "another one"}}, {"op": "delete", "table": "ordinals", "where": [["name", "==", "one"]]}]]], dnl Remove row and insert two new rows with identical value in a single dnl transaction (fails). [[["ordinals", {"op": "delete", "table": "ordinals", "where": []}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "still another one"}}]]], dnl Add new row with different value (succeeds). [[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 2, "name": "two"}}]]], dnl Change rows so values collide (fails). [[["ordinals", {"op": "update", "table": "ordinals", "where": [], "row": {"number": 3}}]]], dnl Swap rows' values (succeeds). [[["ordinals", {"op": "update", "table": "ordinals", "where": [["number", "==", 1]], "row": {"number": 2, "name": "old two"}}, {"op": "update", "table": "ordinals", "where": [["name", "==", "two"]], "row": {"number": 1, "name": "old one"}}]]], dnl Change all rows' values to values not used before and insert values that dnl collide (only) with their previous values (succeeds). [[["ordinals", {"op": "mutate", "table": "ordinals", "where": [], "mutations": [["number", "*=", 10]]}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "new one"}}, {"op": "insert", "table": "ordinals", "row": {"number": 2, "name": "new two"}}, {"op": "select", "table": "ordinals", "where": [], "columns": ["number", "name"], "sort": ["number"]}]]]], [[[{"uuid":["uuid","<0>"]}] [{"uuid":["uuid","<1>"]},{"details":"Transaction causes multiple rows in \"ordinals\" table to have identical values (1) for index on column \"number\". First row, with UUID <0>, existed in the database before this transaction and was not modified by the transaction. Second row, with UUID <1>, was inserted by this transaction.","error":"constraint violation"}] [{"uuid":["uuid","<2>"]},{"count":1}] [{"count":1},{"uuid":["uuid","<3>"]},{"uuid":["uuid","<4>"]},{"details":"Transaction causes multiple rows in \"ordinals\" table to have identical values (1) for index on column \"number\". First row, with UUID <4>, was inserted by this transaction. Second row, with UUID <3>, was inserted by this transaction.","error":"constraint violation"}] [{"uuid":["uuid","<5>"]}] [{"count":2},{"details":"Transaction causes multiple rows in \"ordinals\" table to have identical values (3) for index on column \"number\". First row, with UUID <5>, had the following index values before the transaction: 2. Second row, with UUID <2>, had the following index values before the transaction: 1.","error":"constraint violation"}] [{"count":1},{"count":1}] [{"count":2},{"uuid":["uuid","<6>"]},{"uuid":["uuid","<7>"]},{"rows":[{"name":"new one","number":1},{"name":"new two","number":2},{"name":"old one","number":10},{"name":"old two","number":20}]}] ]]) OVSDB_CHECK_EXECUTION([referential integrity -- simple], [constraint_schema], [[[["constraints", {"op": "insert", "table": "b", "row": {"b": 1}, "uuid-name": "brow"}, {"op": "insert", "table": "a", "row": {"a": 0, "a2b": ["set", [["named-uuid", "brow"]]]}}, {"op": "insert", "table": "a", "row": {"a": 1, "a2b": ["set", [["named-uuid", "brow"]]]}}, {"op": "insert", "table": "a", "row": {"a": 2, "a2b": ["set", [["named-uuid", "brow"]]]}}]]], [[["constraints", {"op": "delete", "table": "b", "where": []}]]], dnl Check that "mutate" honors number-of-elements constraints on sets and maps. [[["constraints", {"op": "mutate", "table": "b", "where": [], "mutations": [["x", "delete", 0]]}]]], [[["constraints", {"op": "delete", "table": "a", "where": [["a", "==", 0]]}]]], [[["constraints", {"op": "delete", "table": "b", "where": []}]]], [[["constraints", {"op": "delete", "table": "a", "where": [["a", "==", 1]]}]]], [[["constraints", {"op": "delete", "table": "b", "where": []}]]], [[["constraints", {"op": "delete", "table": "a", "where": [["a", "==", 2]]}]]], [[["constraints", {"op": "delete", "table": "b", "where": []}]]]], [[[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]},{"uuid":["uuid","<2>"]},{"uuid":["uuid","<3>"]}] [{"count":1},{"details":"cannot delete b row <0> because of 3 remaining reference(s)","error":"referential integrity violation"}] [{"details":"Attempted to store 0 elements in set of 1 to 2 integers.","error":"constraint violation"}] [{"count":1}] [{"count":1},{"details":"cannot delete b row <0> because of 2 remaining reference(s)","error":"referential integrity violation"}] [{"count":1}] [{"count":1},{"details":"cannot delete b row <0> because of 1 remaining reference(s)","error":"referential integrity violation"}] [{"count":1}] [{"count":1}] ]]) OVSDB_CHECK_EXECUTION([referential integrity -- mutual references], [constraint_schema], [[[["constraints", {"op": "insert", "table": "a", "row": {"a": 0, "a2b": ["set", [["named-uuid", "row2"]]], "a2a": ["set", [["named-uuid", "row1"]]]}, "uuid-name": "row1"}, {"op": "insert", "table": "b", "row": {"b": 1, "b2b": ["set", [["named-uuid", "row2"]]], "b2a": ["set", [["named-uuid", "row1"]]]}, "uuid-name": "row2"}]]], [[["constraints", {"op": "insert", "table": "a", "row": {"a2b": ["set", [["uuid", "b516b960-5b19-4fc2-bb82-fe1cbd6d0241"]]]}}]]], [[["constraints", {"op": "delete", "table": "a", "where": [["a", "==", 0]]}]]], [[["constraints", {"op": "delete", "table": "b", "where": [["b", "==", 1]]}]]], dnl Try the deletions again to make sure that the refcounts got rolled back. [[["constraints", {"op": "delete", "table": "a", "where": [["a", "==", 0]]}]]], [[["constraints", {"op": "delete", "table": "b", "where": [["b", "==", 1]]}]]], [[["constraints", {"op": "delete", "table": "a", "where": [["a", "==", 0]]}, {"op": "delete", "table": "b", "where": [["b", "==", 1]]}]]]], [[[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]}] [{"uuid":["uuid","<2>"]},{"details":"Table a column a2b row <2> references nonexistent row <3> in table b.","error":"referential integrity violation"}] [{"count":1},{"details":"cannot delete a row <0> because of 1 remaining reference(s)","error":"referential integrity violation"}] [{"count":1},{"details":"cannot delete b row <1> because of 1 remaining reference(s)","error":"referential integrity violation"}] [{"count":1},{"details":"cannot delete a row <0> because of 1 remaining reference(s)","error":"referential integrity violation"}] [{"count":1},{"details":"cannot delete b row <1> because of 1 remaining reference(s)","error":"referential integrity violation"}] [{"count":1},{"count":1}] ]]) OVSDB_CHECK_EXECUTION([weak references], [weak_schema], [[[["weak", {"op": "insert", "table": "a", "row": {"a": 0, "a2a": ["set", [["named-uuid", "row1"], ["named-uuid", "row2"], ["uuid", "0e767b36-6822-4044-8307-d58467e04669"]]], "a2a1": ["named-uuid", "row1"], "a2b": ["named-uuid", "row3"]}, "uuid-name": "row1"}, {"op": "insert", "table": "a", "row": {"a": 1, "a2a": ["set", [["named-uuid", "row1"], ["named-uuid", "row2"]]], "a2a1": ["named-uuid", "row2"], "a2b": ["named-uuid", "row3"]}, "uuid-name": "row2"}, {"op": "insert", "table": "a", "row": {"a": 2, "a2a": ["set", [["named-uuid", "row1"], ["named-uuid", "row2"]]], "a2a1": ["named-uuid", "row2"], "a2b": ["named-uuid", "row4"]}}, {"op": "insert", "table": "b", "row": {"b": 2, "b2a": ["named-uuid", "row1"]}, "uuid-name": "row3"}, {"op": "insert", "table": "b", "row": {"b": 3, "b2a": ["named-uuid", "row2"]}, "uuid-name": "row4"}]]], dnl Check that the nonexistent row UUID we added to row a0 was deleted, dnl and that other rows were inserted as requested. [[["weak", {"op": "select", "table": "a", "where": [], "columns": ["_uuid", "a2a", "a2a1", "a2b"], "sort": ["a"]}]]], [[["weak", {"op": "select", "table": "b", "where": [], "columns": ["_uuid", "b", "b2a"], "sort": ["b"]}]]], dnl Try to insert invalid all-zeros weak reference (the default) into dnl "a2b", which requires exactly one value. [[["weak", {"op": "insert", "table": "a", "row": {"a2a1": ["named-uuid", "me"]}, "uuid-name": "me"}]]], dnl Try to delete row from "b" that is referred to by weak references dnl from "a" table "a2b" column that requires exactly one value. [[["weak", {"op": "delete", "table": "b", "where": [["b", "==", 3]]}]]], dnl Try to delete row from "a" that is referred to by weak references dnl from "a" table "a2a1" column that requires exactly one value. [[["weak", {"op": "delete", "table": "a", "where": [["a", "==", 1]]}]]], dnl Delete the row that had the reference that caused the previous dnl deletion to fail, then check that other rows are unchanged. [[["weak", {"op": "delete", "table": "a", "where": [["a", "==", 2]]}]]], [[["weak", {"op": "select", "table": "a", "where": [], "columns": ["_uuid", "a2a", "a2a1", "a2b"], "sort": ["a"]}]]], [[["weak", {"op": "select", "table": "b", "where": [], "columns": ["_uuid", "b", "b2a"], "sort": ["b"]}]]], dnl Delete row a0 then check that references to it were removed. [[["weak", {"op": "delete", "table": "a", "where": [["a", "==", 0]]}]]], [[["weak", {"op": "select", "table": "a", "where": [], "columns": ["_uuid", "a2a", "a2a1", "a2b"], "sort": ["a"]}]]], [[["weak", {"op": "select", "table": "b", "where": [], "columns": ["_uuid", "b", "b2a"], "sort": ["b"]}]]], dnl Delete row a1 then check that references to it were removed. [[["weak", {"op": "delete", "table": "a", "where": [["a", "==", 1]]}]]], [[["weak", {"op": "select", "table": "a", "where": [], "columns": ["_uuid", "a2a", "a2a1", "a2b"], "sort": ["a"]}]]], [[["weak", {"op": "select", "table": "b", "where": [], "columns": ["_uuid", "b", "b2a"], "sort": ["b"]}]]]], [[[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]},{"uuid":["uuid","<2>"]},{"uuid":["uuid","<3>"]},{"uuid":["uuid","<4>"]}] [{"rows":[{"_uuid":["uuid","<0>"],"a2a":["set",[["uuid","<0>"],["uuid","<1>"]]],"a2a1":["uuid","<0>"],"a2b":["uuid","<3>"]},{"_uuid":["uuid","<1>"],"a2a":["set",[["uuid","<0>"],["uuid","<1>"]]],"a2a1":["uuid","<1>"],"a2b":["uuid","<3>"]},{"_uuid":["uuid","<2>"],"a2a":["set",[["uuid","<0>"],["uuid","<1>"]]],"a2a1":["uuid","<1>"],"a2b":["uuid","<4>"]}]}] [{"rows":[{"_uuid":["uuid","<3>"],"b":2,"b2a":["uuid","<0>"]},{"_uuid":["uuid","<4>"],"b":3,"b2a":["uuid","<1>"]}]}] [{"uuid":["uuid","<5>"]},{"details":"Weak reference column \"a2b\" in \"a\" row <5> (inserted within this transaction) contained all-zeros UUID (probably as the default value for this column) but deleting this value caused a constraint volation because this column is not allowed to be empty.","error":"constraint violation"}] [{"count":1},{"details":"Deletion of 1 weak reference(s) to deleted (or never-existing) rows from column \"a2b\" in \"a\" row <2> caused this column to become empty, but constraints on this column disallow an empty column.","error":"constraint violation"}] [{"count":1},{"details":"Deletion of 1 weak reference(s) to deleted (or never-existing) rows from column \"a2a1\" in \"a\" row <2> caused this column to become empty, but constraints on this column disallow an empty column.","error":"constraint violation"}] [{"count":1}] [{"rows":[{"_uuid":["uuid","<0>"],"a2a":["set",[["uuid","<0>"],["uuid","<1>"]]],"a2a1":["uuid","<0>"],"a2b":["uuid","<3>"]},{"_uuid":["uuid","<1>"],"a2a":["set",[["uuid","<0>"],["uuid","<1>"]]],"a2a1":["uuid","<1>"],"a2b":["uuid","<3>"]}]}] [{"rows":[{"_uuid":["uuid","<3>"],"b":2,"b2a":["uuid","<0>"]},{"_uuid":["uuid","<4>"],"b":3,"b2a":["uuid","<1>"]}]}] [{"count":1}] [{"rows":[{"_uuid":["uuid","<1>"],"a2a":["uuid","<1>"],"a2a1":["uuid","<1>"],"a2b":["uuid","<3>"]}]}] [{"rows":[{"_uuid":["uuid","<3>"],"b":2,"b2a":["set",[]]},{"_uuid":["uuid","<4>"],"b":3,"b2a":["uuid","<1>"]}]}] [{"count":1}] [{"rows":[]}] [{"rows":[{"_uuid":["uuid","<3>"],"b":2,"b2a":["set",[]]},{"_uuid":["uuid","<4>"],"b":3,"b2a":["set",[]]}]}] ]]) OVSDB_CHECK_EXECUTION([immutable columns], [immutable_schema], [[[["immutable", {"op": "insert", "table": "a", "row": {"i": 5}, "uuid-name": "row1"}]]], [[["immutable", {"op": "update", "table": "a", "row": {"i": 10}, "where": []}]]], [[["immutable", {"op": "update", "table": "a", "row": {"i": 5}, "where": []}]]], [[["immutable", {"op": "mutate", "table": "a", "where": [], "mutations": [["i", "-=", 5]]}]]], [[["immutable", {"op": "mutate", "table": "a", "where": [], "mutations": [["i", "*=", 1]]}]]]], [[[{"uuid":["uuid","<0>"]}] [{"details":"Cannot update immutable column i in table a.","error":"constraint violation","syntax":"{\"op\":\"update\",\"row\":{\"i\":10},\"table\":\"a\",\"where\":[]}"}] [{"details":"Cannot update immutable column i in table a.","error":"constraint violation","syntax":"{\"op\":\"update\",\"row\":{\"i\":5},\"table\":\"a\",\"where\":[]}"}] [{"details":"Cannot mutate immutable column i in table a.","error":"constraint violation","syntax":"[\"i\",\"-=\",5]"}] [{"details":"Cannot mutate immutable column i in table a.","error":"constraint violation","syntax":"[\"i\",\"*=\",1]"}] ]]) OVSDB_CHECK_EXECUTION([garbage collection], [gc_schema], [dnl Check that inserting a row without any references is a no-op. [[["gc", {"op": "insert", "table": "a", "row": {"a": 0}}]]], [[["gc", {"op": "select", "table": "a", "where": [], "columns": ["a"]}]]], dnl Check that inserting a chain of rows that reference each other dnl in turn is also a no-op. [[["gc", {"op": "insert", "table": "a", "row": {"a": 0, "a2a": ["named-uuid", "row1"]}, "uuid-name": "row0"}, {"op": "insert", "table": "a", "row": {"a": 1, "a2a": ["named-uuid", "row2"]}, "uuid-name": "row1"}, {"op": "insert", "table": "a", "row": {"a": 2, "a2a": ["named-uuid", "row3"]}, "uuid-name": "row2"}, {"op": "insert", "table": "a", "row": {"a": 3}, "uuid-name": "row3"}]]], [[["gc", {"op": "select", "table": "a", "where": [], "columns": ["a"]}]]], dnl Check that inserting a pair of rows that mutually reference each dnl other causes the rows to be retained. [[["gc", {"op": "insert", "table": "a", "row": {"a": 4, "a2a": ["named-uuid", "row5"]}, "uuid-name": "row4"}, {"op": "insert", "table": "a", "row": {"a": 5, "a2a": ["named-uuid", "row4"]}, "uuid-name": "row5"}]]], [[["gc", {"op": "select", "table": "a", "where": [], "columns": ["a"], "sort": ["a"]}]]], dnl Check that unreferencing one of the rows causes the other to be deleted. [[["gc", {"op": "update", "table": "a", "where": [["a", "==", 4]], "row": {"a2a": ["set", []]}}]]], [[["gc", {"op": "select", "table": "a", "where": [], "columns": ["a"]}]]], dnl Check that inserting a pair of rows that mutually weak reference each dnl other is a no-op. [[["gc", {"op": "insert", "table": "a", "row": {"a": 6, "wa2a": ["named-uuid", "row7"]}, "uuid-name": "row6"}, {"op": "insert", "table": "a", "row": {"a": 7, "wa2a": ["named-uuid", "row6"]}, "uuid-name": "row7"}]]], [[["gc", {"op": "select", "table": "a", "where": [], "columns": ["a"]}]]], dnl Check that a circular chain of rows is retained. [[["gc", {"op": "insert", "table": "a", "row": {"a": 8, "a2a": ["named-uuid", "row9"]}, "uuid-name": "row8"}, {"op": "insert", "table": "a", "row": {"a": 9, "a2a": ["named-uuid", "row10"]}, "uuid-name": "row9"}, {"op": "insert", "table": "a", "row": {"a": 10, "a2a": ["named-uuid", "row11"]}, "uuid-name": "row10"}, {"op": "insert", "table": "a", "row": {"a": 11, "a2a": ["named-uuid", "row8"]}, "uuid-name": "row11"}]]], [[["gc", {"op": "select", "table": "a", "where": [], "columns": ["a"], "sort": ["a"]}]]], dnl Check that breaking the chain causes all of the rows to be deleted. [[["gc", {"op": "update", "table": "a", "where": [["a", "==", 9]], "row": {"a2a": ["set", []]}}]]], [[["gc", {"op": "select", "table": "a", "where": [], "columns": ["a"]}]]], dnl Check that inserting a row only referenced by itself is a no-op. [[["gc", {"op": "insert", "table": "a", "row": {"a": 12, "a2a": ["named-uuid", "self"]}, "uuid-name": "self"}]]], [[["gc", {"op": "select", "table": "a", "where": [], "columns": ["a"]}]]]], [[[{"uuid":["uuid","<0>"]}] [{"rows":[]}] [{"uuid":["uuid","<1>"]},{"uuid":["uuid","<2>"]},{"uuid":["uuid","<3>"]},{"uuid":["uuid","<4>"]}] [{"rows":[]}] [{"uuid":["uuid","<5>"]},{"uuid":["uuid","<6>"]}] [{"rows":[{"a":4},{"a":5}]}] [{"count":1}] [{"rows":[]}] [{"uuid":["uuid","<7>"]},{"uuid":["uuid","<8>"]}] [{"rows":[]}] [{"uuid":["uuid","<9>"]},{"uuid":["uuid","<10>"]},{"uuid":["uuid","<11>"]},{"uuid":["uuid","<12>"]}] [{"rows":[{"a":8},{"a":9},{"a":10},{"a":11}]}] [{"count":1}] [{"rows":[]}] [{"uuid":["uuid","<13>"]}] [{"rows":[]}] ]])]) EXECUTION_EXAMPLES openvswitch-2.0.1+git20140120/tests/ovsdb-idl.at000066400000000000000000000435631226605124000210400ustar00rootroot00000000000000AT_BANNER([OVSDB -- interface description language (IDL)]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR # OVSDB_CHECK_IDL_C(TITLE, [PRE-IDL-TXN], TRANSACTIONS, OUTPUT, [KEYWORDS], # [FILTER]) # # Creates a database with a schema derived from idltest.ovsidl, runs # each PRE-IDL-TXN (if any), starts an ovsdb-server on that database, # and runs "test-ovsdb idl" passing each of the TRANSACTIONS along. # # Checks that the overall output is OUTPUT. Before comparison, the # output is sorted (using "sort") and UUIDs in the output are replaced # by markers of the form where N is a number. The first unique # UUID is replaced by <0>, the next by <1>, and so on. If a given # UUID appears more than once it is always replaced by the same # marker. If FILTER is supplied then the output is also filtered # through the specified program. # # TITLE is provided to AT_SETUP and KEYWORDS to AT_KEYWORDS. m4_define([OVSDB_CHECK_IDL_C], [AT_SETUP([$1 - C]) AT_KEYWORDS([ovsdb server idl positive $5]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR AT_CHECK([ovsdb-tool create db $abs_srcdir/idltest.ovsschema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server '-vPATTERN:console:ovsdb-server|%c|%m' --detach --no-chdir --pidfile="`pwd`"/pid --remote=punix:socket --unixctl="`pwd`"/unixctl db], [0], [ignore], [ignore]) m4_if([$2], [], [], [AT_CHECK([ovsdb-client transact unix:socket $2], [0], [ignore], [ignore], [kill `cat pid`])]) AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 idl unix:socket $3], [0], [stdout], [ignore], [kill `cat pid`]) AT_CHECK([sort stdout | ${PERL} $srcdir/uuidfilt.pl]m4_if([$6],,, [[| $6]]), [0], [$4], [], [kill `cat pid`]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) # same as OVSDB_CHECK_IDL but uses the Python IDL implementation. m4_define([OVSDB_CHECK_IDL_PY], [AT_SETUP([$1 - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_KEYWORDS([ovsdb server idl positive Python $5]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR AT_CHECK([ovsdb-tool create db $abs_srcdir/idltest.ovsschema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server '-vPATTERN:console:ovsdb-server|%c|%m' --detach --no-chdir --pidfile="`pwd`"/pid --remote=punix:socket --unixctl="`pwd`"/unixctl db], [0], [ignore], [ignore]) m4_if([$2], [], [], [AT_CHECK([ovsdb-client transact unix:socket $2], [0], [ignore], [ignore], [kill `cat pid`])]) AT_CHECK([$PYTHON $srcdir/test-ovsdb.py -t10 idl $srcdir/idltest.ovsschema unix:socket $3], [0], [stdout], [ignore], [kill `cat pid`]) AT_CHECK([sort stdout | ${PERL} $srcdir/uuidfilt.pl]m4_if([$6],,, [[| $6]]), [0], [$4], [], [kill `cat pid`]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) # same as OVSDB_CHECK_IDL but uses the Python IDL implementation with tcp m4_define([OVSDB_CHECK_IDL_TCP_PY], [AT_SETUP([$1 - Python tcp]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_KEYWORDS([ovsdb server idl positive Python with tcp socket $5]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR AT_CHECK([ovsdb-tool create db $abs_srcdir/idltest.ovsschema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server --log-file '-vPATTERN:console:ovsdb-server|%c|%m' --detach --no-chdir --pidfile="`pwd`"/pid --remote=punix:socket --remote=ptcp:0:127.0.0.1 --unixctl="`pwd`"/unixctl db], [0], [ignore], [ignore]) TCP_PORT=`parse_listening_port < ovsdb-server.log` m4_if([$2], [], [], [AT_CHECK([ovsdb-client transact tcp:127.0.0.1:$TCP_PORT $2], [0], [ignore], [ignore], [kill `cat pid`])]) AT_CHECK([$PYTHON $srcdir/test-ovsdb.py -t10 idl $srcdir/idltest.ovsschema tcp:127.0.0.1:$TCP_PORT $3], [0], [stdout], [ignore], [kill `cat pid`]) AT_CHECK([sort stdout | ${PERL} $srcdir/uuidfilt.pl]m4_if([$6],,, [[| $6]]), [0], [$4], [], [kill `cat pid`]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) m4_define([OVSDB_CHECK_IDL], [OVSDB_CHECK_IDL_C($@) OVSDB_CHECK_IDL_PY($@) OVSDB_CHECK_IDL_TCP_PY($@)]) OVSDB_CHECK_IDL([simple idl, initially empty, no ops], [], [], [000: empty 001: done ]) OVSDB_CHECK_IDL([simple idl, initially empty, various ops], [], [['["idltest", {"op": "insert", "table": "simple", "row": {"i": 1, "r": 2.0, "b": true, "s": "mystring", "u": ["uuid", "84f5c8f5-ac76-4dbc-a24f-8860eb407fc1"], "ia": ["set", [1, 2, 3]], "ra": ["set", [-0.5]], "ba": ["set", [true]], "sa": ["set", ["abc", "def"]], "ua": ["set", [["uuid", "69443985-7806-45e2-b35f-574a04e720f9"], ["uuid", "aad11ef0-816a-4b01-93e6-03b8b4256b98"]]]}}, {"op": "insert", "table": "simple", "row": {}}]' \ '["idltest", {"op": "update", "table": "simple", "where": [], "row": {"b": true}}]' \ '["idltest", {"op": "update", "table": "simple", "where": [], "row": {"r": 123.5}}]' \ '["idltest", {"op": "insert", "table": "simple", "row": {"i": -1, "r": 125, "b": false, "s": "", "ia": ["set", [1]], "ra": ["set", [1.5]], "ba": ["set", [false]], "sa": ["set", []], "ua": ["set", []]}}]' \ '["idltest", {"op": "update", "table": "simple", "where": [["i", "<", 1]], "row": {"s": "newstring"}}]' \ '["idltest", {"op": "delete", "table": "simple", "where": [["i", "==", 0]]}]' \ 'reconnect']], [[000: empty 001: {"error":null,"result":[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]}]} 002: i=0 r=0 b=false s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 002: i=1 r=2 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 003: {"error":null,"result":[{"count":2}]} 004: i=0 r=0 b=true s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 004: i=1 r=2 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 005: {"error":null,"result":[{"count":2}]} 006: i=0 r=123.5 b=true s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 006: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 007: {"error":null,"result":[{"uuid":["uuid","<6>"]}]} 008: i=-1 r=125 b=false s= u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> 008: i=0 r=123.5 b=true s= u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 008: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 009: {"error":null,"result":[{"count":2}]} 010: i=-1 r=125 b=false s=newstring u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> 010: i=0 r=123.5 b=true s=newstring u=<2> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 010: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 011: {"error":null,"result":[{"count":1}]} 012: i=-1 r=125 b=false s=newstring u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> 012: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 013: reconnect 014: i=-1 r=125 b=false s=newstring u=<2> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> 014: i=1 r=123.5 b=true s=mystring u=<3> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<4> <5>] uuid=<0> 015: done ]]) OVSDB_CHECK_IDL([simple idl, initially populated], [['["idltest", {"op": "insert", "table": "simple", "row": {"i": 1, "r": 2.0, "b": true, "s": "mystring", "u": ["uuid", "84f5c8f5-ac76-4dbc-a24f-8860eb407fc1"], "ia": ["set", [1, 2, 3]], "ra": ["set", [-0.5]], "ba": ["set", [true]], "sa": ["set", ["abc", "def"]], "ua": ["set", [["uuid", "69443985-7806-45e2-b35f-574a04e720f9"], ["uuid", "aad11ef0-816a-4b01-93e6-03b8b4256b98"]]]}}, {"op": "insert", "table": "simple", "row": {}}]']], [['["idltest", {"op": "update", "table": "simple", "where": [], "row": {"b": true}}]']], [[000: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 000: i=1 r=2 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<5> 001: {"error":null,"result":[{"count":2}]} 002: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 002: i=1 r=2 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<5> 003: done ]]) OVSDB_CHECK_IDL([simple idl, writing via IDL], [['["idltest", {"op": "insert", "table": "simple", "row": {"i": 1, "r": 2.0, "b": true, "s": "mystring", "u": ["uuid", "84f5c8f5-ac76-4dbc-a24f-8860eb407fc1"], "ia": ["set", [1, 2, 3]], "ra": ["set", [-0.5]], "ba": ["set", [true]], "sa": ["set", ["abc", "def"]], "ua": ["set", [["uuid", "69443985-7806-45e2-b35f-574a04e720f9"], ["uuid", "aad11ef0-816a-4b01-93e6-03b8b4256b98"]]]}}, {"op": "insert", "table": "simple", "row": {}}]']], [['verify 0 b, verify 1 r, set 0 b 1, set 1 r 3.5' \ 'insert 2, verify 2 i, verify 1 b, delete 1']], [[000: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 000: i=1 r=2 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<5> 001: commit, status=success 002: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 002: i=1 r=3.5 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<5> 003: commit, status=success 004: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 004: i=2 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<6> 005: done ]]) OVSDB_CHECK_IDL([simple idl, handling verification failure], [['["idltest", {"op": "insert", "table": "simple", "row": {"i": 1, "r": 2.0}}, {"op": "insert", "table": "simple", "row": {}}]']], [['set 0 b 1' \ '+["idltest", {"op": "update", "table": "simple", "where": [["i", "==", 1]], "row": {"r": 5.0}}]' \ '+verify 1 r, set 1 r 3' \ 'verify 1 r, set 1 r 3' \ ]], [[000: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 000: i=1 r=2 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> 001: commit, status=success 002: {"error":null,"result":[{"count":1}]} 003: commit, status=try again 004: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 004: i=1 r=5 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> 005: commit, status=success 006: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 006: i=1 r=3 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> 007: done ]]) OVSDB_CHECK_IDL([simple idl, increment operation], [['["idltest", {"op": "insert", "table": "simple", "row": {}}]']], [['set 0 r 2.0, increment 0']], [[000: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 001: commit, status=success, increment=1 002: i=1 r=2 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 003: done ]]) OVSDB_CHECK_IDL([simple idl, aborting], [['["idltest", {"op": "insert", "table": "simple", "row": {}}]']], [['set 0 r 2.0, abort' \ '+set 0 b 1']], [[000: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 001: commit, status=aborted 002: commit, status=success 003: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 004: done ]]) OVSDB_CHECK_IDL([simple idl, destroy without commit or abort], [['["idltest", {"op": "insert", "table": "simple", "row": {}}]']], [['set 0 r 2.0, destroy' \ '+set 0 b 1']], [[000: i=0 r=0 b=false s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 001: destroy 002: commit, status=success 003: i=0 r=0 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 004: done ]]) OVSDB_CHECK_IDL([self-linking idl, consistent ops], [], [['["idltest", {"op": "insert", "table": "link1", "row": {"i": 0, "k": ["named-uuid", "self"]}, "uuid-name": "self"}]' \ '["idltest", {"op": "insert", "table": "link1", "row": {"i": 1, "k": ["named-uuid", "row2"]}, "uuid-name": "row1"}, {"op": "insert", "table": "link1", "row": {"i": 2, "k": ["named-uuid", "row1"]}, "uuid-name": "row2"}]' \ '["idltest", {"op": "update", "table": "link1", "where": [["i", "==", 1]], "row": {"k": ["uuid", "#1#"]}}]' \ '["idltest", {"op": "update", "table": "link1", "where": [], "row": {"k": ["uuid", "#0#"]}}]']], [[000: empty 001: {"error":null,"result":[{"uuid":["uuid","<0>"]}]} 002: i=0 k=0 ka=[] l2= uuid=<0> 003: {"error":null,"result":[{"uuid":["uuid","<1>"]},{"uuid":["uuid","<2>"]}]} 004: i=0 k=0 ka=[] l2= uuid=<0> 004: i=1 k=2 ka=[] l2= uuid=<1> 004: i=2 k=1 ka=[] l2= uuid=<2> 005: {"error":null,"result":[{"count":1}]} 006: i=0 k=0 ka=[] l2= uuid=<0> 006: i=1 k=1 ka=[] l2= uuid=<1> 006: i=2 k=1 ka=[] l2= uuid=<2> 007: {"error":null,"result":[{"count":3}]} 008: i=0 k=0 ka=[] l2= uuid=<0> 008: i=1 k=0 ka=[] l2= uuid=<1> 008: i=2 k=0 ka=[] l2= uuid=<2> 009: done ]]) OVSDB_CHECK_IDL([self-linking idl, inconsistent ops], [], [['["idltest", {"op": "insert", "table": "link1", "row": {"i": 0, "k": ["uuid", "cf197cc5-c8c9-42f5-82d5-c71a9f2cb96b"]}}]' \ '+["idltest", {"op": "insert", "table": "link1", "uuid-name": "one", "row": {"i": 1, "k": ["named-uuid", "one"]}}, {"op": "insert", "table": "link1", "row": {"i": 2, "k": ["named-uuid", "one"]}}]' \ '["idltest", {"op": "update", "table": "link1", "where": [], "row": {"k": ["uuid", "c2fca39a-e69a-42a4-9c56-5eca85839ce9"]}}]' \ '+["idltest", {"op": "delete", "table": "link1", "where": [["_uuid", "==", ["uuid", "#1#"]]]}]' \ '+["idltest", {"op": "delete", "table": "link1", "where": [["_uuid", "==", ["uuid", "#2#"]]]}]' \ '["idltest", {"op": "delete", "table": "link1", "where": []}]' \ ]], [[000: empty 001: {"error":null,"result":[{"uuid":["uuid","<0>"]},{"details":"Table link1 column k row <0> references nonexistent row <1> in table link1.","error":"referential integrity violation"}]} 002: {"error":null,"result":[{"uuid":["uuid","<2>"]},{"uuid":["uuid","<3>"]}]} 003: i=1 k=1 ka=[] l2= uuid=<2> 003: i=2 k=1 ka=[] l2= uuid=<3> 004: {"error":null,"result":[{"count":2},{"details":"Table link1 column k row references nonexistent row <4> in table link1.","error":"referential integrity violation"}]} 005: {"error":null,"result":[{"count":1},{"details":"cannot delete link1 row <2> because of 1 remaining reference(s)","error":"referential integrity violation"}]} 006: {"error":null,"result":[{"count":1}]} 007: i=1 k=1 ka=[] l2= uuid=<2> 008: {"error":null,"result":[{"count":1}]} 009: empty 010: done ]], [], [[sed -e '/004:/s/row <[23]> references/row references/']]) OVSDB_CHECK_IDL([self-linking idl, sets], [], [['["idltest", {"op": "insert", "table": "link1", "row": {"i": 0, "k": ["named-uuid", "i0"], "ka": ["set", [["named-uuid", "i0"]]]}, "uuid-name": "i0"}, {"op": "insert", "table": "link1", "row": {"i": 1, "k": ["named-uuid", "i0"], "ka": ["set", [["named-uuid", "i1"]]]}, "uuid-name": "i1"}, {"op": "insert", "table": "link1", "row": {"i": 2, "k": ["named-uuid", "i0"], "ka": ["set", [["named-uuid", "i2"]]]}, "uuid-name": "i2"}, {"op": "insert", "table": "link1", "row": {"i": 3, "k": ["named-uuid", "i0"], "ka": ["set", [["named-uuid", "i3"]]]}, "uuid-name": "i3"}]' \ '["idltest", {"op": "update", "table": "link1", "where": [], "row": {"ka": ["set", [["uuid", "#0#"], ["uuid", "#1#"], ["uuid", "#2#"], ["uuid", "#3#"]]]}}]' \ '["idltest", {"op": "update", "table": "link1", "where": [["i", "==", 2]], "row": {"ka": ["set", [["uuid", "#0#"], ["uuid", "88702e78-845b-4a6e-ad08-cf68922ae84a"], ["uuid", "#2#"]]]}}]' \ '+["idltest", {"op": "delete", "table": "link1", "where": []}]']], [[000: empty 001: {"error":null,"result":[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]},{"uuid":["uuid","<2>"]},{"uuid":["uuid","<3>"]}]} 002: i=0 k=0 ka=[0] l2= uuid=<0> 002: i=1 k=0 ka=[1] l2= uuid=<1> 002: i=2 k=0 ka=[2] l2= uuid=<2> 002: i=3 k=0 ka=[3] l2= uuid=<3> 003: {"error":null,"result":[{"count":4}]} 004: i=0 k=0 ka=[0 1 2 3] l2= uuid=<0> 004: i=1 k=0 ka=[0 1 2 3] l2= uuid=<1> 004: i=2 k=0 ka=[0 1 2 3] l2= uuid=<2> 004: i=3 k=0 ka=[0 1 2 3] l2= uuid=<3> 005: {"error":null,"result":[{"count":1},{"details":"Table link1 column ka row <2> references nonexistent row <4> in table link1.","error":"referential integrity violation"}]} 006: {"error":null,"result":[{"count":4}]} 007: empty 008: done ]]) OVSDB_CHECK_IDL([external-linking idl, consistent ops], [], [['["idltest", {"op": "insert", "table": "link2", "row": {"i": 0}, "uuid-name": "row0"}, {"op": "insert", "table": "link1", "row": {"i": 1, "k": ["named-uuid", "row1"], "l2": ["set", [["named-uuid", "row0"]]]}, "uuid-name": "row1"}]']], [[000: empty 001: {"error":null,"result":[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]}]} 002: i=0 l1= uuid=<0> 002: i=1 k=1 ka=[] l2=0 uuid=<1> 003: done ]]) OVSDB_CHECK_IDL_PY([external-linking idl, insert ops], [], [['linktest']], [[000: empty 001: commit, status=success 002: i=1 k=1 ka=[1] l2= uuid=<0> 002: i=2 k=1 ka=[1 2] l2= uuid=<1> 003: done ]]) OVSDB_CHECK_IDL_PY([getattr idl, insert ops], [], [['getattrtest']], [[000: empty 001: commit, status=success 002: i=2 k=2 ka=[] l2= uuid=<0> 003: done ]]) openvswitch-2.0.1+git20140120/tests/ovsdb-log.at000066400000000000000000000173171226605124000210470ustar00rootroot00000000000000AT_BANNER([OVSDB -- logging]) AT_SETUP([create empty, reread]) AT_KEYWORDS([ovsdb log]) AT_CAPTURE_FILE([log]) AT_CHECK( [test-ovsdb log-io file create], [0], [file: open successful ], [ignore]) AT_CHECK( [test-ovsdb log-io file read-only read], [0], [file: open successful file: read: end of log ], [ignore]) AT_CHECK([test -f .file.~lock~]) AT_CLEANUP AT_SETUP([write one, reread]) AT_KEYWORDS([ovsdb log]) AT_CAPTURE_FILE([file]) AT_CHECK( [[test-ovsdb log-io file create 'write:[0]']], [0], [[file: open successful file: write:[0] successful ]], [ignore]) AT_CHECK( [test-ovsdb log-io file read-only read read], [0], [[file: open successful file: read: [0] file: read: end of log ]], [ignore]) AT_CHECK([test -f .file.~lock~]) AT_CLEANUP AT_SETUP([check that create fails if file exists]) AT_KEYWORDS([ovsdb log]) AT_CAPTURE_FILE([file]) AT_CHECK( [[test-ovsdb log-io file create 'write:[1]']], [0], [[file: open successful file: write:[1] successful ]], [ignore]) AT_CHECK( [test-ovsdb log-io file read-only read], [0], [[file: open successful file: read: [1] ]], [ignore]) AT_CHECK( [test-ovsdb log-io file create read], [1], [], [test-ovsdb: I/O error: create: file failed (File exists) ]) AT_CHECK([test -f .file.~lock~]) AT_CLEANUP AT_SETUP([write one, reread]) AT_KEYWORDS([ovsdb log]) AT_CAPTURE_FILE([file]) AT_CHECK( [[test-ovsdb log-io file create 'write:[0]' 'write:[1]' 'write:[2]']], [0], [[file: open successful file: write:[0] successful file: write:[1] successful file: write:[2] successful ]], [ignore]) AT_CHECK( [test-ovsdb log-io file read-only read read read read], [0], [[file: open successful file: read: [0] file: read: [1] file: read: [2] file: read: end of log ]], [ignore]) AT_CHECK([test -f .file.~lock~]) AT_CLEANUP AT_SETUP([write one, reread, append]) AT_KEYWORDS([ovsdb log]) AT_CAPTURE_FILE([file]) AT_CHECK( [[test-ovsdb log-io file create 'write:[0]' 'write:[1]' 'write:[2]']], [0], [[file: open successful file: write:[0] successful file: write:[1] successful file: write:[2] successful ]], [ignore]) AT_CHECK( [[test-ovsdb log-io file read/write read read read 'write:["append"]']], [0], [[file: open successful file: read: [0] file: read: [1] file: read: [2] file: write:["append"] successful ]], [ignore]) AT_CHECK( [test-ovsdb log-io file read-only read read read read read], [0], [[file: open successful file: read: [0] file: read: [1] file: read: [2] file: read: ["append"] file: read: end of log ]], [ignore]) AT_CHECK([test -f .file.~lock~]) AT_CLEANUP AT_SETUP([write, reread one, overwrite]) AT_KEYWORDS([ovsdb log]) AT_CAPTURE_FILE([file]) AT_CHECK( [[test-ovsdb log-io file create 'write:[0]' 'write:[1]' 'write:[2]']], [0], [[file: open successful file: write:[0] successful file: write:[1] successful file: write:[2] successful ]], [ignore]) AT_CHECK( [[test-ovsdb log-io file read/write read 'write:["more data"]']], [0], [[file: open successful file: read: [0] file: write:["more data"] successful ]], [ignore]) AT_CHECK( [test-ovsdb log-io file read-only read read read], [0], [[file: open successful file: read: [0] file: read: ["more data"] file: read: end of log ]], [ignore]) AT_CHECK([test -f .file.~lock~]) AT_CLEANUP AT_SETUP([write, add corrupted data, read]) AT_KEYWORDS([ovsdb log]) AT_CAPTURE_FILE([file]) AT_CHECK( [[test-ovsdb log-io file create 'write:[0]' 'write:[1]' 'write:[2]']], [0], [[file: open successful file: write:[0] successful file: write:[1] successful file: write:[2] successful ]], [ignore]) AT_CHECK([echo 'xxx' >> file]) AT_CHECK( [test-ovsdb log-io file read-only read read read read], [0], [[file: open successful file: read: [0] file: read: [1] file: read: [2] file: read failed: syntax error: file: parse error at offset 174 in header line "xxx" ]], [ignore]) AT_CHECK([test -f .file.~lock~]) AT_CLEANUP AT_SETUP([write, add corrupted data, read, overwrite]) AT_KEYWORDS([ovsdb log]) AT_CAPTURE_FILE([file]) AT_CHECK( [[test-ovsdb log-io file create 'write:[0]' 'write:[1]' 'write:[2]']], [0], [[file: open successful file: write:[0] successful file: write:[1] successful file: write:[2] successful ]], [ignore]) AT_CHECK([echo 'xxx' >> file]) AT_CHECK( [[test-ovsdb log-io file read/write read read read read 'write:[3]']], [0], [[file: open successful file: read: [0] file: read: [1] file: read: [2] file: read failed: syntax error: file: parse error at offset 174 in header line "xxx" file: write:[3] successful ]], [ignore]) AT_CHECK( [test-ovsdb log-io file read-only read read read read read], [0], [[file: open successful file: read: [0] file: read: [1] file: read: [2] file: read: [3] file: read: end of log ]], [ignore]) AT_CHECK([test -f .file.~lock~]) AT_CLEANUP AT_SETUP([write, corrupt some data, read, overwrite]) AT_KEYWORDS([ovsdb log]) AT_CAPTURE_FILE([file]) AT_CHECK( [[test-ovsdb log-io file create 'write:[0]' 'write:[1]' 'write:[2]']], [0], [[file: open successful file: write:[0] successful file: write:[1] successful file: write:[2] successful ]], [ignore]) AT_CHECK([[sed 's/\[2]/[3]/' < file > file.tmp]]) AT_CHECK([mv file.tmp file]) AT_CHECK([[grep -c '\[3]' file]], [0], [1 ]) AT_CHECK( [[test-ovsdb log-io file read/write read read read 'write:["longer data"]']], [0], [[file: open successful file: read: [0] file: read: [1] file: read failed: syntax error: file: 4 bytes starting at offset 170 have SHA-1 hash 5c031e5c0d3a9338cc127ebe40bb2748b6a67e78 but should have hash 98f55556e7ffd432381b56a19bd485b3e6446442 file: write:["longer data"] successful ]], [ignore]) AT_CHECK( [test-ovsdb log-io file read-only read read read read], [0], [[file: open successful file: read: [0] file: read: [1] file: read: ["longer data"] file: read: end of log ]], [ignore]) AT_CHECK([test -f .file.~lock~]) AT_CLEANUP AT_SETUP([write, truncate file, read, overwrite]) AT_KEYWORDS([ovsdb log]) AT_CAPTURE_FILE([file]) AT_CHECK( [[test-ovsdb log-io file create 'write:[0]' 'write:[1]' 'write:[2]']], [0], [[file: open successful file: write:[0] successful file: write:[1] successful file: write:[2] successful ]], [ignore]) AT_CHECK([[sed 's/\[2]/2/' < file > file.tmp]]) AT_CHECK([mv file.tmp file]) AT_CHECK([[grep -c '^2$' file]], [0], [1 ]) AT_CHECK( [[test-ovsdb log-io file read/write read read read 'write:["longer data"]']], [0], [[file: open successful file: read: [0] file: read: [1] file: read failed: I/O error: file: error reading 4 bytes starting at offset 170 (End of file) file: write:["longer data"] successful ]], [ignore]) AT_CHECK( [test-ovsdb log-io file read-only read read read read], [0], [[file: open successful file: read: [0] file: read: [1] file: read: ["longer data"] file: read: end of log ]], [ignore]) AT_CHECK([test -f .file.~lock~]) AT_CLEANUP AT_SETUP([write bad JSON, read, overwrite]) AT_KEYWORDS([ovsdb log]) AT_CAPTURE_FILE([file]) AT_CHECK( [[test-ovsdb log-io file create 'write:[0]' 'write:[1]' 'write:[2]']], [0], [[file: open successful file: write:[0] successful file: write:[1] successful file: write:[2] successful ]], [ignore]) AT_CHECK([[printf '%s\n%s\n' 'OVSDB JSON 5 d910b02871075d3156ec8675dfc95b7d5d640aa6' 'null' >> file]]) AT_CHECK( [[test-ovsdb log-io file read/write read read read read 'write:["replacement data"]']], [0], [[file: open successful file: read: [0] file: read: [1] file: read: [2] file: read failed: syntax error: file: 5 bytes starting at offset 228 are not valid JSON (line 0, column 4, byte 4: syntax error at beginning of input) file: write:["replacement data"] successful ]], [ignore]) AT_CHECK( [test-ovsdb log-io file read-only read read read read read], [0], [[file: open successful file: read: [0] file: read: [1] file: read: [2] file: read: ["replacement data"] file: read: end of log ]], [ignore]) AT_CHECK([test -f .file.~lock~]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/ovsdb-macros.at000066400000000000000000000006641226605124000215470ustar00rootroot00000000000000dnl OVSDB_INIT([$1]) dnl dnl Creates an empty database named $1. m4_define([OVSDB_INIT], [OVS_RUNDIR=`pwd`; export OVS_RUNDIR AT_CHECK( [ovsdb-tool create $1 $abs_top_srcdir/vswitchd/vswitch.ovsschema], [0], [stdout], [ignore]) AT_CHECK( [[ovsdb-tool transact $1 \ '["Open_vSwitch", {"op": "insert", "table": "Open_vSwitch", "row": {}}]']], [0], [ignore], [ignore])]) openvswitch-2.0.1+git20140120/tests/ovsdb-monitor-sort.pl000077500000000000000000000017671226605124000227560ustar00rootroot00000000000000#! /usr/bin/perl use strict; use warnings; # Breaks lines read from into groups using blank lines as # group separators, then sorts lines within the groups for # reproducibility. sub compare_lines { my ($a, $b) = @_; my $u = '[0-9a-fA-F]'; my $uuid_re = "${u}{8}-${u}{4}-${u}{4}-${u}{4}-${u}{12}"; if ($a =~ /^$uuid_re/) { if ($b =~ /^$uuid_re/) { return substr($a, 36) cmp substr($b, 36); } else { return 1; } } elsif ($b =~ /^$uuid_re/) { return -1; } else { return $a cmp $b; } } sub output_group { my (@group) = @_; print "$_\n" foreach sort { compare_lines($a, $b) } @group; } my @group = (); while () { chomp; if ($_ eq '') { output_group(@group); @group = (); print "\n"; } else { if (/^,/ && @group) { $group[$#group] .= "\n" . $_; } else { push(@group, $_); } } } output_group(@group) if @group; openvswitch-2.0.1+git20140120/tests/ovsdb-monitor.at000066400000000000000000000250311226605124000217450ustar00rootroot00000000000000AT_BANNER([OVSDB -- ovsdb-server monitors]) # OVSDB_CHECK_MONITOR(TITLE, SCHEMA, [PRE-MONITOR-TXN], DB, TABLE, # TRANSACTIONS, OUTPUT, [COLUMNS], [KEYWORDS]) # # Creates a database with the given SCHEMA, starts an ovsdb-server on # that database, and runs each of the TRANSACTIONS (which should be a # quoted list of quoted strings) against it with ovsdb-client one at a # time. COLUMNS, if specified, is passed to ovsdb-client as the set # of columns and operations to select. # # Checks that the overall output is OUTPUT, but UUIDs in the output # are replaced by markers of the form where N is a number. The # first unique UUID is replaced by <0>, the next by <1>, and so on. # If a given UUID appears more than once it is always replaced by the # same marker. # # TITLE is provided to AT_SETUP and KEYWORDS to AT_KEYWORDS. m4_define([OVSDB_CHECK_MONITOR], [AT_SETUP([$1]) AT_KEYWORDS([ovsdb server monitor positive $9]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR $2 > schema AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) m4_foreach([txn], [$3], [AT_CHECK([ovsdb-tool transact db 'txn'], [0], [ignore], [ignore])]) AT_CAPTURE_FILE([ovsdb-server-log]) AT_CHECK([ovsdb-server --detach --no-chdir --pidfile="`pwd`"/server-pid --remote=punix:socket --unixctl="`pwd`"/unixctl --log-file="`pwd`"/ovsdb-server-log db >/dev/null 2>&1], [0], [], []) AT_CHECK([ovsdb-client -vjsonrpc --detach --no-chdir --pidfile="`pwd`"/client-pid -d json monitor --format=csv unix:socket $4 $5 $8 > output], [0], [ignore], [ignore], [kill `cat server-pid`]) m4_foreach([txn], [$6], [AT_CHECK([ovsdb-client transact unix:socket 'txn'], [0], [ignore], [ignore], [kill `cat server-pid client-pid`])]) AT_CHECK([ovsdb-client transact unix:socket '[["$4"]]'], [0], [ignore], [ignore], [kill `cat server-pid client-pid`]) AT_CHECK([ovs-appctl -t "`pwd`"/unixctl -e exit], [0], [ignore], [ignore]) OVS_WAIT_UNTIL([test ! -e server-pid && test ! -e client-pid]) AT_CHECK([${PERL} $srcdir/ovsdb-monitor-sort.pl < output | ${PERL} $srcdir/uuidfilt.pl], [0], [$7], [ignore]) AT_CLEANUP]) OVSDB_CHECK_MONITOR([monitor insert into empty table], [ordinal_schema], [], [ordinals], [ordinals], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}]]]], [[row,action,name,number,_version <0>,insert,"""zero""",0,"[""uuid"",""<1>""]" ]]) OVSDB_CHECK_MONITOR([monitor insert into populated table], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 10, "name": "ten"}}]]]], [ordinals], [ordinals], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}]]]], [[row,action,name,number,_version <0>,initial,"""ten""",10,"[""uuid"",""<1>""]" row,action,name,number,_version <2>,insert,"""zero""",0,"[""uuid"",""<3>""]" ]]) OVSDB_CHECK_MONITOR([monitor delete], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 10, "name": "ten"}}]]]], [ordinals], [ordinals], [[[["ordinals", {"op": "delete", "table": "ordinals", "where": [["number", "==", 10]]}]]]], [[row,action,name,number,_version <0>,initial,"""ten""",10,"[""uuid"",""<1>""]" row,action,name,number,_version <0>,delete,"""ten""",10,"[""uuid"",""<1>""]" ]]) OVSDB_CHECK_MONITOR([monitor row update], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 10, "name": "ten"}}]]]], [ordinals], [ordinals], [[[["ordinals", {"op": "update", "table": "ordinals", "where": [["number", "==", 10]], "row": {"name": "five plus five"}}]]]], [[row,action,name,number,_version <0>,initial,"""ten""",10,"[""uuid"",""<1>""]" row,action,name,number,_version <0>,old,"""ten""",,"[""uuid"",""<1>""]" ,new,"""five plus five""",10,"[""uuid"",""<2>""]" ]]) OVSDB_CHECK_MONITOR([monitor no-op row updates], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 10, "name": "ten"}}]]]], [ordinals], [ordinals], [[[["ordinals", {"op": "update", "table": "ordinals", "where": [["number", "==", 10]], "row": {"number": 10, "name": "ten"}}]]], [[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 9, "name": "nine"}}]]]], [[row,action,name,number,_version <0>,initial,"""ten""",10,"[""uuid"",""<1>""]" row,action,name,number,_version <2>,insert,"""nine""",9,"[""uuid"",""<3>""]" ]]) OVSDB_CHECK_MONITOR([monitor insert-and-update transaction], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 10, "name": "ten"}}]]]], [ordinals], [ordinals], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 9, "name": "nine"}, "uuid-name": "nine"}, {"op": "update", "table": "ordinals", "where": [["_uuid", "==", ["named-uuid", "nine"]]], "row": {"name": "three squared"}}]]]], [[row,action,name,number,_version <0>,initial,"""ten""",10,"[""uuid"",""<1>""]" row,action,name,number,_version <2>,insert,"""three squared""",9,"[""uuid"",""<3>""]" ]]) OVSDB_CHECK_MONITOR([monitor insert-update-and-delete transaction], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 10, "name": "ten"}}]]]], [ordinals], [ordinals], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 9, "name": "nine"}, "uuid-name": "nine"}, {"op": "update", "table": "ordinals", "where": [["_uuid", "==", ["named-uuid", "nine"]]], "row": {"name": "three squared"}}, {"op": "delete", "table": "ordinals", "where": [["_uuid", "==", ["named-uuid", "nine"]]]}, {"op": "insert", "table": "ordinals", "row": {"number": 7, "name": "seven"}}]]]], [[row,action,name,number,_version <0>,initial,"""ten""",10,"[""uuid"",""<1>""]" row,action,name,number,_version <2>,insert,"""seven""",7,"[""uuid"",""<3>""]" ]]) OVSDB_CHECK_MONITOR([monitor weak reference change], [weak_schema], [[[["weak", {"op": "insert", "table": "a", "row": {"a": 0, "a2a1": ["named-uuid", "a0"], "a2b": ["named-uuid", "b2"]}, "uuid-name": "a0"}, {"op": "insert", "table": "a", "row": {"a": 1, "a2a": ["named-uuid", "a0"], "a2a1": ["named-uuid", "a1"], "a2b": ["named-uuid", "b2"]}, "uuid-name": "a1"}, {"op": "insert", "table": "b", "row": {"b": 2}, "uuid-name": "b2"}]]]], [weak], [a], [[[["weak", {"op": "delete", "table": "a", "where": [["a", "==", 0]]}]]]], [[row,action,a,a2a,a2a1,a2b,_version <0>,initial,0,"[""set"",[]]","[""uuid"",""<0>""]","[""uuid"",""<1>""]","[""uuid"",""<2>""]" <3>,initial,1,"[""uuid"",""<0>""]","[""uuid"",""<3>""]","[""uuid"",""<1>""]","[""uuid"",""<4>""]" row,action,a,a2a,a2a1,a2b,_version <0>,delete,0,"[""set"",[]]","[""uuid"",""<0>""]","[""uuid"",""<1>""]","[""uuid"",""<2>""]" <3>,old,,"[""uuid"",""<0>""]",,, ,new,1,"[""set"",[]]","[""uuid"",""<3>""]","[""uuid"",""<1>""]","[""uuid"",""<5>""]" ]]) OVSDB_CHECK_MONITOR([monitor insert-update-and-delete transaction], [ordinal_schema], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 10, "name": "ten"}}]]]], [ordinals], [ordinals], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 9, "name": "nine"}, "uuid-name": "nine"}, {"op": "update", "table": "ordinals", "where": [["_uuid", "==", ["named-uuid", "nine"]]], "row": {"name": "three squared"}}, {"op": "delete", "table": "ordinals", "where": [["_uuid", "==", ["named-uuid", "nine"]]]}, {"op": "insert", "table": "ordinals", "row": {"number": 7, "name": "seven"}}]]]], [[row,action,name,number,_version <0>,initial,"""ten""",10,"[""uuid"",""<1>""]" row,action,name,number,_version <2>,insert,"""seven""",7,"[""uuid"",""<3>""]" ]]) AT_BANNER([ovsdb -- ovsdb-monitor monitor only some operations]) m4_define([OVSDB_MONITOR_INITIAL], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 10, "name": "ten"}}]]]]) m4_define([OVSDB_MONITOR_TXNS], [[[["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 5, "name": "five"}}]]], [[["ordinals", {"op": "update", "table": "ordinals", "where": [["name", "==", "five"]], "row": {"name": "FIVE"}}]]], [[["ordinals", {"op": "delete", "table": "ordinals", "where": []}]]]]) OVSDB_CHECK_MONITOR([monitor all operations], [ordinal_schema], [OVSDB_MONITOR_INITIAL], [ordinals], [ordinals], [OVSDB_MONITOR_TXNS], [[row,action,name,number,_version <0>,initial,"""ten""",10,"[""uuid"",""<1>""]" row,action,name,number,_version <2>,insert,"""five""",5,"[""uuid"",""<3>""]" row,action,name,number,_version <2>,old,"""five""",,"[""uuid"",""<3>""]" ,new,"""FIVE""",5,"[""uuid"",""<4>""]" row,action,name,number,_version <2>,delete,"""FIVE""",5,"[""uuid"",""<4>""]" <0>,delete,"""ten""",10,"[""uuid"",""<1>""]" ]]) dnl A monitor with "initial" only doesn't really make sense, dnl but it's still allowed and should work. OVSDB_CHECK_MONITOR([monitor initial only], [ordinal_schema], [OVSDB_MONITOR_INITIAL], [ordinals], [ordinals], [OVSDB_MONITOR_TXNS], [[row,action,name,number,_version <0>,initial,"""ten""",10,"[""uuid"",""<1>""]" ]], [!insert,!delete,!modify]) OVSDB_CHECK_MONITOR([monitor insert only], [ordinal_schema], [OVSDB_MONITOR_INITIAL], [ordinals], [ordinals], [OVSDB_MONITOR_TXNS], [[row,action,name,number,_version <0>,insert,"""five""",5,"[""uuid"",""<1>""]" ]], [!initial,!delete,!modify]) OVSDB_CHECK_MONITOR([monitor delete only], [ordinal_schema], [OVSDB_MONITOR_INITIAL], [ordinals], [ordinals], [OVSDB_MONITOR_TXNS], [[row,action,name,number,_version <0>,delete,"""FIVE""",5,"[""uuid"",""<1>""]" <2>,delete,"""ten""",10,"[""uuid"",""<3>""]" ]], [!initial,!insert,!modify]) OVSDB_CHECK_MONITOR([monitor modify only], [ordinal_schema], [OVSDB_MONITOR_INITIAL], [ordinals], [ordinals], [OVSDB_MONITOR_TXNS], [[row,action,name,number,_version <0>,old,"""five""",,"[""uuid"",""<1>""]" ,new,"""FIVE""",5,"[""uuid"",""<2>""]" ]], [!initial,!insert,!delete]) openvswitch-2.0.1+git20140120/tests/ovsdb-mutation.at000066400000000000000000000626341226605124000221300ustar00rootroot00000000000000AT_BANNER([OVSDB -- mutations]) OVSDB_CHECK_POSITIVE([null mutation], [[parse-mutations \ '{"columns": {"name": {"type": "string"}}}' \ '[]']], [[[]]]) OVSDB_CHECK_POSITIVE([mutations on scalars], [[parse-mutations \ '{"columns": {"i": {"type": "integer"}, "r": {"type": "real"}, "b": {"type": "boolean"}, "s": {"type": "string"}, "u": {"type": "uuid"}}}' \ '[["i", "+=", 0]]' \ '[["i", "-=", 1]]' \ '[["i", "*=", 2]]' \ '[["i", "/=", 3]]' \ '[["i", "%=", 4]]' \ '[["r", "+=", 0.5]]' \ '[["r", "-=", 1.5]]' \ '[["r", "*=", 2.5]]' \ '[["r", "/=", 3.5]]']], [[[["i","+=",0]] [["i","-=",1]] [["i","*=",2]] [["i","/=",3]] [["i","%=",4]] [["r","+=",0.5]] [["r","-=",1.5]] [["r","*=",2.5]] [["r","/=",3.5]]]], [mutation]) AT_SETUP([disallowed mutations on scalars]) AT_KEYWORDS([ovsdb negative mutation]) AT_CHECK([[test-ovsdb parse-mutations \ '{"columns": {"i": {"type": "integer"}, "r": {"type": "real"}, "b": {"type": "boolean"}, "s": {"type": "string"}, "u": {"type": "uuid"}}}' \ '[["i", "xxx", 1]]' \ '[["i", "insert", 1]]' \ '[["i", "delete", 2]]' \ '[["r", "%=", 0.5]]' \ '[["r", "insert", 1.5]]' \ '[["r", "delete", 2.5]]' \ '[["b", "+=", true]]' \ '[["b", "-=", false]]' \ '[["b", "*=", true]]' \ '[["b", "/=", false]]' \ '[["b", "%=", true]]' \ '[["b", "insert", false]]' \ '[["b", "delete", true]]' \ '[["s", "+=", "a"]]' \ '[["s", "-=", "b"]]' \ '[["s", "*=", "c"]]' \ '[["s", "/=", "d"]]' \ '[["s", "%=", "e"]]' \ '[["s", "insert", "f"]]' \ '[["s", "delete", "g"]]' \ '[["u", "+=", ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"]]]' \ '[["u", "-=", ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"]]]' \ '[["u", "*=", ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"]]]' \ '[["u", "/=", ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"]]]' \ '[["u", "insert", ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"]]]' \ '[["u", "delete", ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"]]]']], [1], [], [[test-ovsdb: unknown mutator: No mutator named xxx. test-ovsdb: syntax "["i","insert",1]": syntax error: Type mismatch: "insert" operator may not be applied to column i of type integer. test-ovsdb: syntax "["i","delete",2]": syntax error: Type mismatch: "delete" operator may not be applied to column i of type integer. test-ovsdb: syntax "["r","%=",0.5]": syntax error: Type mismatch: "%=" operator may not be applied to column r of type real. test-ovsdb: syntax "["r","insert",1.5]": syntax error: Type mismatch: "insert" operator may not be applied to column r of type real. test-ovsdb: syntax "["r","delete",2.5]": syntax error: Type mismatch: "delete" operator may not be applied to column r of type real. test-ovsdb: syntax "["b","+=",true]": syntax error: Type mismatch: "+=" operator may not be applied to column b of type boolean. test-ovsdb: syntax "["b","-=",false]": syntax error: Type mismatch: "-=" operator may not be applied to column b of type boolean. test-ovsdb: syntax "["b","*=",true]": syntax error: Type mismatch: "*=" operator may not be applied to column b of type boolean. test-ovsdb: syntax "["b","/=",false]": syntax error: Type mismatch: "/=" operator may not be applied to column b of type boolean. test-ovsdb: syntax "["b","%=",true]": syntax error: Type mismatch: "%=" operator may not be applied to column b of type boolean. test-ovsdb: syntax "["b","insert",false]": syntax error: Type mismatch: "insert" operator may not be applied to column b of type boolean. test-ovsdb: syntax "["b","delete",true]": syntax error: Type mismatch: "delete" operator may not be applied to column b of type boolean. test-ovsdb: syntax "["s","+=","a"]": syntax error: Type mismatch: "+=" operator may not be applied to column s of type string. test-ovsdb: syntax "["s","-=","b"]": syntax error: Type mismatch: "-=" operator may not be applied to column s of type string. test-ovsdb: syntax "["s","*=","c"]": syntax error: Type mismatch: "*=" operator may not be applied to column s of type string. test-ovsdb: syntax "["s","/=","d"]": syntax error: Type mismatch: "/=" operator may not be applied to column s of type string. test-ovsdb: syntax "["s","%=","e"]": syntax error: Type mismatch: "%=" operator may not be applied to column s of type string. test-ovsdb: syntax "["s","insert","f"]": syntax error: Type mismatch: "insert" operator may not be applied to column s of type string. test-ovsdb: syntax "["s","delete","g"]": syntax error: Type mismatch: "delete" operator may not be applied to column s of type string. test-ovsdb: syntax "["u","+=",["uuid","9179ca6d-6d65-400a-b455-3ad92783a099"]]": syntax error: Type mismatch: "+=" operator may not be applied to column u of type uuid. test-ovsdb: syntax "["u","-=",["uuid","9179ca6d-6d65-400a-b455-3ad92783a099"]]": syntax error: Type mismatch: "-=" operator may not be applied to column u of type uuid. test-ovsdb: syntax "["u","*=",["uuid","9179ca6d-6d65-400a-b455-3ad92783a099"]]": syntax error: Type mismatch: "*=" operator may not be applied to column u of type uuid. test-ovsdb: syntax "["u","/=",["uuid","9179ca6d-6d65-400a-b455-3ad92783a099"]]": syntax error: Type mismatch: "/=" operator may not be applied to column u of type uuid. test-ovsdb: syntax "["u","insert",["uuid","9179ca6d-6d65-400a-b455-3ad92783a099"]]": syntax error: Type mismatch: "insert" operator may not be applied to column u of type uuid. test-ovsdb: syntax "["u","delete",["uuid","9179ca6d-6d65-400a-b455-3ad92783a099"]]": syntax error: Type mismatch: "delete" operator may not be applied to column u of type uuid. ]]) AT_CLEANUP AT_SETUP([disallowed mutations on immutable columns]) AT_KEYWORDS([ovsdb negative mutation]) AT_CHECK([[test-ovsdb parse-mutations \ '{"columns": {"i": {"type": "integer", "mutable": false}}}' \ '[["i", "+=", 1]]' ]], [1], [], [[test-ovsdb: syntax "["i","+=",1]": constraint violation: Cannot mutate immutable column i in table mytable. ]]) AT_CLEANUP OVSDB_CHECK_POSITIVE([mutations on sets], [[parse-mutations \ '{"columns": {"i": {"type": {"key": "integer", "min": 0, "max": "unlimited"}}, "r": {"type": {"key": "real", "min": 0, "max": "unlimited"}}, "b": {"type": {"key": "boolean", "min": 0, "max": "unlimited"}}, "s": {"type": {"key": "string", "min": 0, "max": "unlimited"}}, "u": {"type": {"key": "uuid", "min": 0, "max": "unlimited"}}}}' \ '[["i", "+=", 1]]' \ '[["i", "-=", 2]]' \ '[["i", "*=", 3]]' \ '[["i", "/=", 4]]' \ '[["i", "%=", 5]]' \ '[["i", "insert", ["set", [1, 2]]]]' \ '[["i", "delete", ["set", [1, 2, 3]]]]' \ '[["r", "+=", 1]]' \ '[["r", "-=", 2]]' \ '[["r", "*=", 3]]' \ '[["r", "/=", 4]]' \ '[["r", "insert", ["set", [1, 2]]]]' \ '[["r", "delete", ["set", [1, 2, 3]]]]' \ '[["b", "insert", ["set", [true]]]]' \ '[["b", "delete", ["set", [false]]]]' \ '[["s", "insert", ["set", ["a"]]]]' \ '[["s", "delete", ["set", ["a", "b"]]]]' \ '[["u", "insert", ["set", [["uuid", "b10d28f7-af18-4a67-9e78-2a6394516c59"]]]]]' \ '[["u", "delete", ["set", [["uuid", "b10d28f7-af18-4a67-9e78-2a6394516c59"], ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"]]]]]' \ ]], [[[["i","+=",1]] [["i","-=",2]] [["i","*=",3]] [["i","/=",4]] [["i","%=",5]] [["i","insert",["set",[1,2]]]] [["i","delete",["set",[1,2,3]]]] [["r","+=",1]] [["r","-=",2]] [["r","*=",3]] [["r","/=",4]] [["r","insert",["set",[1,2]]]] [["r","delete",["set",[1,2,3]]]] [["b","insert",true]] [["b","delete",false]] [["s","insert","a"]] [["s","delete",["set",["a","b"]]]] [["u","insert",["uuid","b10d28f7-af18-4a67-9e78-2a6394516c59"]]] [["u","delete",["set",[["uuid","9179ca6d-6d65-400a-b455-3ad92783a099"],["uuid","b10d28f7-af18-4a67-9e78-2a6394516c59"]]]]]]], [mutation]) OVSDB_CHECK_POSITIVE([executing null mutation], [[execute-mutations \ '{"columns": {"i": {"type": "integer"}}}' \ '[[]]' \ '[{"i": 0}, {"i": 1}, {"i": 2}']]], [mutation 0: row 0: no change row 1: no change row 2: no change ]) OVSDB_CHECK_POSITIVE([executing mutations on integers], [[execute-mutations \ '{"columns": {"i": {"type": "integer"}}}' \ '[[["i", "+=", 1]], [["i", "-=", 2]], [["i", "*=", 3]], [["i", "/=", 4]], [["i", "%=", 2]]]' \ '[{"i": 0}, {"i": 1}, {"i": 2}']]], [mutation 0: row 0: {"i":1} row 1: {"i":2} row 2: {"i":3} mutation 1: row 0: {"i":-2} row 1: {"i":-1} row 2: {"i":0} mutation 2: row 0: no change row 1: {"i":3} row 2: {"i":6} mutation 3: row 0: no change row 1: {"i":0} row 2: {"i":0} mutation 4: row 0: no change row 1: no change row 2: {"i":0} ], [mutation]) OVSDB_CHECK_POSITIVE([integer overflow detection], [[execute-mutations \ '{"columns": {"i": {"type": "integer"}}}' \ '[[["i", "+=", 9223372036854775807]], [["i", "+=", -9223372036854775808]], [["i", "-=", -9223372036854775808]], [["i", "-=", 9223372036854775807]], [["i", "*=", 3037000500]], [["i", "/=", -1]], [["i", "/=", 0]]]' \ '[{"i": 0}, {"i": 1}, {"i": -1}, {"i": 9223372036854775807}, {"i": -9223372036854775808}, {"i": 3037000500}, {"i": -3037000500}']]], [mutation 0: row 0: {"i":9223372036854775807} row 1: range error: Result of "+=" operation is out of range. row 2: {"i":9223372036854775806} row 3: range error: Result of "+=" operation is out of range. row 4: {"i":-1} row 5: range error: Result of "+=" operation is out of range. row 6: {"i":9223372033817775307} mutation 1: row 0: {"i":-9223372036854775808} row 1: {"i":-9223372036854775807} row 2: range error: Result of "+=" operation is out of range. row 3: {"i":-1} row 4: range error: Result of "+=" operation is out of range. row 5: {"i":-9223372033817775308} row 6: range error: Result of "+=" operation is out of range. mutation 2: row 0: range error: Result of "-=" operation is out of range. row 1: range error: Result of "-=" operation is out of range. row 2: {"i":9223372036854775807} row 3: range error: Result of "-=" operation is out of range. row 4: {"i":0} row 5: range error: Result of "-=" operation is out of range. row 6: {"i":9223372033817775308} mutation 3: row 0: {"i":-9223372036854775807} row 1: {"i":-9223372036854775806} row 2: {"i":-9223372036854775808} row 3: {"i":0} row 4: range error: Result of "-=" operation is out of range. row 5: {"i":-9223372033817775307} row 6: range error: Result of "-=" operation is out of range. mutation 4: row 0: no change row 1: {"i":3037000500} row 2: {"i":-3037000500} row 3: range error: Result of "*=" operation is out of range. row 4: range error: Result of "*=" operation is out of range. row 5: range error: Result of "*=" operation is out of range. row 6: range error: Result of "*=" operation is out of range. mutation 5: row 0: no change row 1: {"i":-1} row 2: {"i":1} row 3: {"i":-9223372036854775807} row 4: range error: Result of "/=" operation is out of range. row 5: {"i":-3037000500} row 6: {"i":3037000500} mutation 6: row 0: domain error: Division by zero. row 1: domain error: Division by zero. row 2: domain error: Division by zero. row 3: domain error: Division by zero. row 4: domain error: Division by zero. row 5: domain error: Division by zero. row 6: domain error: Division by zero. ], [mutation]) OVSDB_CHECK_POSITIVE([executing mutations on integers with constraints], [[execute-mutations \ '{"columns": {"i": {"type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 2}}}}}' \ '[[["i", "+=", 1]], [["i", "-=", 2]], [["i", "*=", 3]], [["i", "/=", 4]], [["i", "%=", 2]]]' \ '[{"i": 0}, {"i": 1}, {"i": 2}']]], [mutation 0: row 0: {"i":1} row 1: {"i":2} row 2: constraint violation: 3 is not in the valid range 0 to 2 (inclusive) mutation 1: row 0: constraint violation: -2 is not in the valid range 0 to 2 (inclusive) row 1: constraint violation: -1 is not in the valid range 0 to 2 (inclusive) row 2: {"i":0} mutation 2: row 0: no change row 1: constraint violation: 3 is not in the valid range 0 to 2 (inclusive) row 2: constraint violation: 6 is not in the valid range 0 to 2 (inclusive) mutation 3: row 0: no change row 1: {"i":0} row 2: {"i":0} mutation 4: row 0: no change row 1: no change row 2: {"i":0} ], [mutation]) OVSDB_CHECK_POSITIVE([executing mutations on reals], [[execute-mutations \ '{"columns": {"r": {"type": "real"}}}' \ '[[["r", "+=", 0.5]], [["r", "-=", 1.5]], [["r", "*=", 2.5]], [["r", "/=", 4]]]' \ '[{"r": 0}, {"r": -2.5}, {"r": 1.25}']]], [mutation 0: row 0: {"r":0.5} row 1: {"r":-2} row 2: {"r":1.75} mutation 1: row 0: {"r":-1.5} row 1: {"r":-4} row 2: {"r":-0.25} mutation 2: row 0: no change row 1: {"r":-6.25} row 2: {"r":3.125} mutation 3: row 0: no change row 1: {"r":-0.625} row 2: {"r":0.3125} ], [mutation]) OVSDB_CHECK_POSITIVE([real overflow detection], [[execute-mutations \ '{"columns": {"r": {"type": "real"}}}' \ '[[["r", "+=", 1.7976931348623157e+308]], [["r", "-=", 1.7976931348623157e+308]], [["r", "*=", 2]], [["r", "/=", 4]], [["r", "/=", 0.5]], [["r", "/=", 0]]]' \ '[{"r": 0}, {"r": 1.7976931348623157e+308}, {"r": -1.7976931348623157e+308}']]], [mutation 0: row 0: {"r":1.79769313486232e+308} row 1: range error: Result of "+=" operation is out of range. row 2: {"r":0} mutation 1: row 0: {"r":-1.79769313486232e+308} row 1: {"r":0} row 2: range error: Result of "-=" operation is out of range. mutation 2: row 0: no change row 1: range error: Result of "*=" operation is out of range. row 2: range error: Result of "*=" operation is out of range. mutation 3: row 0: no change row 1: {"r":4.49423283715579e+307} row 2: {"r":-4.49423283715579e+307} mutation 4: row 0: no change row 1: range error: Result of "/=" operation is out of range. row 2: range error: Result of "/=" operation is out of range. mutation 5: row 0: domain error: Division by zero. row 1: domain error: Division by zero. row 2: domain error: Division by zero. ], [mutation]) OVSDB_CHECK_POSITIVE([executing mutations on reals with constraints], [[execute-mutations \ '{"columns": {"r": {"type": {"key": {"type": "real", "minReal": -2.5, "maxReal": 1.75}}}}}' \ '[[["r", "+=", 0.5]], [["r", "-=", 1.5]], [["r", "*=", 2.5]], [["r", "/=", 4]]]' \ '[{"r": 0}, {"r": -2.5}, {"r": 1.25}']]], [mutation 0: row 0: {"r":0.5} row 1: {"r":-2} row 2: {"r":1.75} mutation 1: row 0: {"r":-1.5} row 1: constraint violation: -4 is not in the valid range -2.5 to 1.75 (inclusive) row 2: {"r":-0.25} mutation 2: row 0: no change row 1: constraint violation: -6.25 is not in the valid range -2.5 to 1.75 (inclusive) row 2: constraint violation: 3.125 is not in the valid range -2.5 to 1.75 (inclusive) mutation 3: row 0: no change row 1: {"r":-0.625} row 2: {"r":0.3125} ], [mutation]) OVSDB_CHECK_POSITIVE([executing mutations on integer sets], [[execute-mutations \ '{"columns": {"i": {"type": {"key": {"type": "integer", "maxInteger": 5}, "min": 0, "max": "unlimited"}}}}' \ '[[["i", "+=", 1]], [["i", "-=", 2]], [["i", "*=", 3]], [["i", "/=", 4]], [["i", "%=", 2]], [["i", "insert", ["set", [1]]]], [["i", "insert", ["set", [2, 3]]]], [["i", "delete", ["set", [1]]]], [["i", "delete", ["set", [2, 3]]]]]' \ '[{"i": ["set", []]}, {"i": ["set", [0]]}, {"i": ["set", [0, 1]]}, {"i": ["set", [0, 1, 2]]}']]], [[mutation 0: row 0: no change row 1: {"i":1} row 2: {"i":["set",[1,2]]} row 3: {"i":["set",[1,2,3]]} mutation 1: row 0: no change row 1: {"i":-2} row 2: {"i":["set",[-2,-1]]} row 3: {"i":["set",[-2,-1,0]]} mutation 2: row 0: no change row 1: no change row 2: {"i":["set",[0,3]]} row 3: constraint violation: 6 is greater than maximum allowed value 5 mutation 3: row 0: no change row 1: no change row 2: constraint violation: Result of "/=" operation contains duplicates. row 3: constraint violation: Result of "/=" operation contains duplicates. mutation 4: row 0: no change row 1: no change row 2: no change row 3: constraint violation: Result of "%=" operation contains duplicates. mutation 5: row 0: {"i":1} row 1: {"i":["set",[0,1]]} row 2: no change row 3: no change mutation 6: row 0: {"i":["set",[2,3]]} row 1: {"i":["set",[0,2,3]]} row 2: {"i":["set",[0,1,2,3]]} row 3: {"i":["set",[0,1,2,3]]} mutation 7: row 0: no change row 1: no change row 2: {"i":0} row 3: {"i":["set",[0,2]]} mutation 8: row 0: no change row 1: no change row 2: no change row 3: {"i":["set",[0,1]]} ]], [mutation]) OVSDB_CHECK_POSITIVE([executing mutations on integer sets with constraints], [[execute-mutations \ '{"columns": {"i": {"type": {"key": "integer", "min": 1, "max": 2}}}}' \ '[[["i", "insert", ["set", [1]]]], [["i", "insert", ["set", [2]]]], [["i", "delete", ["set", [1]]]], [["i", "delete", ["set", [2]]]], [["i", "delete", ["set", [0, 1]]]]]' \ '[{"i": ["set", [0]]}, {"i": ["set", [2]]}, {"i": ["set", [0, 1]]}']]], [[mutation 0: row 0: {"i":["set",[0,1]]} row 1: {"i":["set",[1,2]]} row 2: no change mutation 1: row 0: {"i":["set",[0,2]]} row 1: no change row 2: constraint violation: Attempted to store 3 elements in set of 1 to 2 integers. mutation 2: row 0: no change row 1: no change row 2: {"i":0} mutation 3: row 0: no change row 1: constraint violation: Attempted to store 0 elements in set of 1 to 2 integers. row 2: no change mutation 4: row 0: constraint violation: Attempted to store 0 elements in set of 1 to 2 integers. row 1: no change row 2: constraint violation: Attempted to store 0 elements in set of 1 to 2 integers. ]], [mutation]) OVSDB_CHECK_POSITIVE([executing mutations on real sets], [[execute-mutations \ '{"columns": {"r": {"type": {"key": {"type": "real", "maxReal": 6}, "min": 0, "max": "unlimited"}}}}' \ '[[["r", "+=", 0.5]], [["r", "-=", 1.5]], [["r", "*=", 2.5]], [["r", "/=", 4]], [["r", "*=", 0]], [["r", "insert", 1.5]], [["r", "insert", 3]], [["r", "delete", ["set", [1.5, 3.5]]]], [["r", "delete", ["set", [0.5, 1.5, 2.5]]]]]' \ '[{"r": ["set", []]}, {"r": 0.5}, {"r": ["set", [0.5, 1.5]]}, {"r": ["set", [0.5, 1.5, 2.5]]}']]], [[mutation 0: row 0: no change row 1: {"r":1} row 2: {"r":["set",[1,2]]} row 3: {"r":["set",[1,2,3]]} mutation 1: row 0: no change row 1: {"r":-1} row 2: {"r":["set",[-1,0]]} row 3: {"r":["set",[-1,0,1]]} mutation 2: row 0: no change row 1: {"r":1.25} row 2: {"r":["set",[1.25,3.75]]} row 3: constraint violation: 6.25 is greater than maximum allowed value 6 mutation 3: row 0: no change row 1: {"r":0.125} row 2: {"r":["set",[0.125,0.375]]} row 3: {"r":["set",[0.125,0.375,0.625]]} mutation 4: row 0: no change row 1: {"r":0} row 2: constraint violation: Result of "*=" operation contains duplicates. row 3: constraint violation: Result of "*=" operation contains duplicates. mutation 5: row 0: {"r":1.5} row 1: {"r":["set",[0.5,1.5]]} row 2: no change row 3: no change mutation 6: row 0: {"r":3} row 1: {"r":["set",[0.5,3]]} row 2: {"r":["set",[0.5,1.5,3]]} row 3: {"r":["set",[0.5,1.5,2.5,3]]} mutation 7: row 0: no change row 1: no change row 2: {"r":0.5} row 3: {"r":["set",[0.5,2.5]]} mutation 8: row 0: no change row 1: {"r":["set",[]]} row 2: {"r":["set",[]]} row 3: {"r":["set",[]]} ]], [mutation]) OVSDB_CHECK_POSITIVE([executing mutations on boolean sets], [[execute-mutations \ '{"columns": {"b": {"type": {"key": "boolean", "min": 0, "max": "unlimited"}}}}' \ '[[["b", "insert", ["set", [false]]]], [["b", "insert", ["set", [true]]]], [["b", "insert", ["set", [false, true]]]], [["b", "delete", ["set", [false]]]], [["b", "delete", ["set", [true]]]], [["b", "delete", ["set", [true, false]]]]]' \ '[{"b": ["set", []]}, {"b": ["set", [false]]}, {"b": ["set", [true]]}, {"b": ["set", [false, true]]}']]], [[mutation 0: row 0: {"b":false} row 1: no change row 2: {"b":["set",[false,true]]} row 3: no change mutation 1: row 0: {"b":true} row 1: {"b":["set",[false,true]]} row 2: no change row 3: no change mutation 2: row 0: {"b":["set",[false,true]]} row 1: {"b":["set",[false,true]]} row 2: {"b":["set",[false,true]]} row 3: no change mutation 3: row 0: no change row 1: {"b":["set",[]]} row 2: no change row 3: {"b":true} mutation 4: row 0: no change row 1: no change row 2: {"b":["set",[]]} row 3: {"b":false} mutation 5: row 0: no change row 1: {"b":["set",[]]} row 2: {"b":["set",[]]} row 3: {"b":["set",[]]} ]], [mutation]) OVSDB_CHECK_POSITIVE([executing mutations on string sets], [[execute-mutations \ '{"columns": {"s": {"type": {"key": "string", "min": 0, "max": "unlimited"}}}}' \ '[[["s", "insert", ["set", ["a"]]]], [["s", "insert", ["set", ["b"]]]], [["s", "insert", ["set", ["c", "d"]]]], [["s", "delete", ["set", ["a"]]]], [["s", "delete", ["set", ["b"]]]], [["s", "delete", ["set", ["c", "d"]]]]]' \ '[{"s": ["set", []]}, {"s": ["set", ["a"]]}, {"s": ["set", ["a", "b"]]}, {"s": ["set", ["a", "b", "c", "d"]]}']]], [[mutation 0: row 0: {"s":"a"} row 1: no change row 2: no change row 3: no change mutation 1: row 0: {"s":"b"} row 1: {"s":["set",["a","b"]]} row 2: no change row 3: no change mutation 2: row 0: {"s":["set",["c","d"]]} row 1: {"s":["set",["a","c","d"]]} row 2: {"s":["set",["a","b","c","d"]]} row 3: no change mutation 3: row 0: no change row 1: {"s":["set",[]]} row 2: {"s":"b"} row 3: {"s":["set",["b","c","d"]]} mutation 4: row 0: no change row 1: no change row 2: {"s":"a"} row 3: {"s":["set",["a","c","d"]]} mutation 5: row 0: no change row 1: no change row 2: no change row 3: {"s":["set",["a","b"]]} ]], [mutation]) OVSDB_CHECK_POSITIVE([executing mutations on uuid sets], [[execute-mutations \ '{"columns": {"u": {"type": {"key": "uuid", "min": 0, "max": "unlimited"}}}}' \ '[[["u", "insert", ["set", [["uuid", "ddd9e79d-7782-414c-8b22-1046c60b6ec2"]]]]], [["u", "insert", ["set", [["uuid", "a60fe7ff-317b-4568-9106-892b37445313"]]]]], [["u", "insert", ["set", [["uuid", "2607d30e-e652-4927-9fec-8bbf1b60c7e9"]]]]], [["u", "delete", ["set", [["uuid", "ddd9e79d-7782-414c-8b22-1046c60b6ec2"]]]]], [["u", "delete", ["set", [["uuid", "a60fe7ff-317b-4568-9106-892b37445313"]]]]], [["u", "delete", ["set", [["uuid", "2607d30e-e652-4927-9fec-8bbf1b60c7e9"]]]]]]' \ '[{"u": ["set", []]}, {"u": ["set", [["uuid", "ddd9e79d-7782-414c-8b22-1046c60b6ec2"]]]}, {"u": ["set", [["uuid", "a60fe7ff-317b-4568-9106-892b37445313"]]]}, {"u": ["set", [["uuid", "2607d30e-e652-4927-9fec-8bbf1b60c7e9"]]]}']]], [[mutation 0: row 0: {"u":["uuid","ddd9e79d-7782-414c-8b22-1046c60b6ec2"]} row 1: no change row 2: {"u":["set",[["uuid","a60fe7ff-317b-4568-9106-892b37445313"],["uuid","ddd9e79d-7782-414c-8b22-1046c60b6ec2"]]]} row 3: {"u":["set",[["uuid","2607d30e-e652-4927-9fec-8bbf1b60c7e9"],["uuid","ddd9e79d-7782-414c-8b22-1046c60b6ec2"]]]} mutation 1: row 0: {"u":["uuid","a60fe7ff-317b-4568-9106-892b37445313"]} row 1: {"u":["set",[["uuid","a60fe7ff-317b-4568-9106-892b37445313"],["uuid","ddd9e79d-7782-414c-8b22-1046c60b6ec2"]]]} row 2: no change row 3: {"u":["set",[["uuid","2607d30e-e652-4927-9fec-8bbf1b60c7e9"],["uuid","a60fe7ff-317b-4568-9106-892b37445313"]]]} mutation 2: row 0: {"u":["uuid","2607d30e-e652-4927-9fec-8bbf1b60c7e9"]} row 1: {"u":["set",[["uuid","2607d30e-e652-4927-9fec-8bbf1b60c7e9"],["uuid","ddd9e79d-7782-414c-8b22-1046c60b6ec2"]]]} row 2: {"u":["set",[["uuid","2607d30e-e652-4927-9fec-8bbf1b60c7e9"],["uuid","a60fe7ff-317b-4568-9106-892b37445313"]]]} row 3: no change mutation 3: row 0: no change row 1: {"u":["set",[]]} row 2: no change row 3: no change mutation 4: row 0: no change row 1: no change row 2: {"u":["set",[]]} row 3: no change mutation 5: row 0: no change row 1: no change row 2: no change row 3: {"u":["set",[]]} ]], [mutation]) OVSDB_CHECK_POSITIVE([executing mutations on integer maps], [[execute-mutations \ '{"columns": {"i": {"type": {"key": "integer", "value": "integer", "min": 0, "max": "unlimited"}}}}' \ '[[["i", "insert", ["map", [[1, 2]]]]], [["i", "insert", ["map", [[2, 4], [3, 5]]]]], [["i", "delete", ["map", [[1, 2]]]]], [["i", "delete", ["map", [[2, 3]]]]], [["i", "delete", ["set", [1]]]], [["i", "delete", ["set", [2, 3]]]]]' \ '[{"i": ["map", []]}, {"i": ["map", [[1, 2]]]}, {"i": ["map", [[1, 3], [2, 3]]]}, {"i": ["map", [[3, 5]]]}']]], [[mutation 0: row 0: {"i":["map",[[1,2]]]} row 1: no change row 2: no change row 3: {"i":["map",[[1,2],[3,5]]]} mutation 1: row 0: {"i":["map",[[2,4],[3,5]]]} row 1: {"i":["map",[[1,2],[2,4],[3,5]]]} row 2: {"i":["map",[[1,3],[2,3],[3,5]]]} row 3: {"i":["map",[[2,4],[3,5]]]} mutation 2: row 0: no change row 1: {"i":["map",[]]} row 2: no change row 3: no change mutation 3: row 0: no change row 1: no change row 2: {"i":["map",[[1,3]]]} row 3: no change mutation 4: row 0: no change row 1: {"i":["map",[]]} row 2: {"i":["map",[[2,3]]]} row 3: no change mutation 5: row 0: no change row 1: no change row 2: {"i":["map",[[1,3]]]} row 3: {"i":["map",[]]} ]], [mutation]) openvswitch-2.0.1+git20140120/tests/ovsdb-query.at000066400000000000000000000373401226605124000214310ustar00rootroot00000000000000AT_BANNER([OVSDB -- queries]) OVSDB_CHECK_POSITIVE([queries on scalars], [[query \ '{"columns": {"i": {"type": "integer"}, "r": {"type": "real"}, "b": {"type": "boolean"}, "s": {"type": "string"}, "u": {"type": "uuid"}}}' \ '[{"i": 0, "r": 0.5, "b": true, "s": "a", "u": ["uuid", "b10d28f7-af18-4a67-9e78-2a6394516c59"]}, {"i": 1, "r": 1.5, "b": false, "s": "b", "u": ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"]}, {"i": 2, "r": 2.5, "b": true, "s": "c", "u": ["uuid", "ad0fa355-8b84-4a36-a4b5-b2c1bfd91758"]}, {"i": 3, "r": 3.5, "b": false, "s": "d", "u": ["uuid", "62315898-64e0-40b9-b26f-ff74225303e6"]}, {"i": 4, "r": 4.5, "b": true, "s": "e", "u": ["uuid", "4a5127e2-0256-4a72-a7dc-6246213967c7"]}]' \ '[[], [["i", "==", 0]], [["i", "!=", 1]], [["i", "<", 2]], [["i", "<=", 3]], [["i", ">", 2]], [["i", ">=", 4]], [["i", "includes", 3]], [["i", "excludes", 2]], [["r", "==", 0.5]], [["r", "!=", 1.5]], [["r", "<", 2.5]], [["r", "<=", 3.5]], [["r", ">", 4.5]], [["r", ">=", 5.5]], [["r", "includes", 1]], [["r", "excludes", 3]], [["b", "==", true]], [["b", "!=", true]], [["b", "includes", false]], [["b", "excludes", true]], [["s", "==", "a"]], [["s", "!=", "b"]], [["s", "includes", "c"]], [["s", "excludes", "d"]], [["u", "==", ["uuid", "b10d28f7-af18-4a67-9e78-2a6394516c59"]]], [["u", "!=", ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"]]], [["u", "includes",["uuid", "ad0fa355-8b84-4a36-a4b5-b2c1bfd91758"]]]]']], [dnl query 0: 11111 query 1: 1---- query 2: 1-111 query 3: 11--- query 4: 1111- query 5: ---11 query 6: ----1 query 7: ---1- query 8: 11-11 query 9: 1---- query 10: 1-111 query 11: 11--- query 12: 1111- query 13: ----- query 14: ----- query 15: ----- query 16: 11111 query 17: 1-1-1 query 18: -1-1- query 19: -1-1- query 20: -1-1- query 21: 1---- query 22: 1-111 query 23: --1-- query 24: 111-1 query 25: 1---- query 26: 1-111 query 27: --1--], [query]) OVSDB_CHECK_POSITIVE([queries on sets], [[query \ '{"columns": {"i": {"type": {"key": "integer", "min": 0, "max": "unlimited"}}}}' \ '[{"i": ["set", []]}, {"i": ["set", [0]]}, {"i": ["set", [1]]}, {"i": ["set", [0, 1]]}, {"i": ["set", [2]]}, {"i": ["set", [2, 0]]}, {"i": ["set", [2, 1]]}, {"i": ["set", [2, 1, 0]]}]' \ '[[], [["i", "==", ["set", []]]], [["i", "==", ["set", [0]]]], [["i", "==", ["set", [1]]]], [["i", "==", ["set", [0, 1]]]], [["i", "==", ["set", [2]]]], [["i", "==", ["set", [2, 0]]]], [["i", "==", ["set", [2, 1]]]], [["i", "==", ["set", [2, 1, 0]]]], [["i", "!=", ["set", []]]], [["i", "!=", ["set", [0]]]], [["i", "!=", ["set", [1]]]], [["i", "!=", ["set", [0, 1]]]], [["i", "!=", ["set", [2]]]], [["i", "!=", ["set", [2, 0]]]], [["i", "!=", ["set", [2, 1]]]], [["i", "!=", ["set", [2, 1, 0]]]], [["i", "includes", ["set", []]]], [["i", "includes", ["set", [0]]]], [["i", "includes", ["set", [1]]]], [["i", "includes", ["set", [0, 1]]]], [["i", "includes", ["set", [2]]]], [["i", "includes", ["set", [2, 0]]]], [["i", "includes", ["set", [2, 1]]]], [["i", "includes", ["set", [2, 1, 0]]]], [["i", "excludes", ["set", []]]], [["i", "excludes", ["set", [0]]]], [["i", "excludes", ["set", [1]]]], [["i", "excludes", ["set", [0, 1]]]], [["i", "excludes", ["set", [2]]]], [["i", "excludes", ["set", [2, 0]]]], [["i", "excludes", ["set", [2, 1]]]], [["i", "excludes", ["set", [2, 1, 0]]]]]']], [dnl query 0: 11111 111 query 1: 1---- --- query 2: -1--- --- query 3: --1-- --- query 4: ---1- --- query 5: ----1 --- query 6: ----- 1-- query 7: ----- -1- query 8: ----- --1 query 9: -1111 111 query 10: 1-111 111 query 11: 11-11 111 query 12: 111-1 111 query 13: 1111- 111 query 14: 11111 -11 query 15: 11111 1-1 query 16: 11111 11- query 17: 11111 111 query 18: -1-1- 1-1 query 19: --11- -11 query 20: ---1- --1 query 21: ----1 111 query 22: ----- 1-1 query 23: ----- -11 query 24: ----- --1 query 25: 11111 111 query 26: 1-1-1 -1- query 27: 11--1 1-- query 28: 1---1 --- query 29: 1111- --- query 30: 1-1-- --- query 31: 11--- --- query 32: 1---- ---], [query]) # This is the same as the "set" test except that it adds values, # all of which always match. OVSDB_CHECK_POSITIVE([queries on maps (1)], [[query \ '{"columns": {"i": {"type": {"key": "integer", "value": "boolean", "min": 0, "max": "unlimited"}}}}' \ '[{"i": ["map", []]}, {"i": ["map", [[0, true]]]}, {"i": ["map", [[1, false]]]}, {"i": ["map", [[0, true], [1, false]]]}, {"i": ["map", [[2, true]]]}, {"i": ["map", [[2, true], [0, true]]]}, {"i": ["map", [[2, true], [1, false]]]}, {"i": ["map", [[2, true], [1, false], [0, true]]]}]' \ '[[], [["i", "==", ["map", []]]], [["i", "==", ["map", [[0, true]]]]], [["i", "==", ["map", [[1, false]]]]], [["i", "==", ["map", [[0, true], [1, false]]]]], [["i", "==", ["map", [[2, true]]]]], [["i", "==", ["map", [[2, true], [0, true]]]]], [["i", "==", ["map", [[2, true], [1, false]]]]], [["i", "==", ["map", [[2, true], [1, false], [0, true]]]]], [["i", "!=", ["map", []]]], [["i", "!=", ["map", [[0, true]]]]], [["i", "!=", ["map", [[1, false]]]]], [["i", "!=", ["map", [[0, true], [1, false]]]]], [["i", "!=", ["map", [[2, true]]]]], [["i", "!=", ["map", [[2, true], [0, true]]]]], [["i", "!=", ["map", [[2, true], [1, false]]]]], [["i", "!=", ["map", [[2, true], [1, false], [0, true]]]]], [["i", "includes", ["map", []]]], [["i", "includes", ["map", [[0, true]]]]], [["i", "includes", ["map", [[1, false]]]]], [["i", "includes", ["map", [[0, true], [1, false]]]]], [["i", "includes", ["map", [[2, true]]]]], [["i", "includes", ["map", [[2, true], [0, true]]]]], [["i", "includes", ["map", [[2, true], [1, false]]]]], [["i", "includes", ["map", [[2, true], [1, false], [0, true]]]]], [["i", "excludes", ["map", []]]], [["i", "excludes", ["map", [[0, true]]]]], [["i", "excludes", ["map", [[1, false]]]]], [["i", "excludes", ["map", [[0, true], [1, false]]]]], [["i", "excludes", ["map", [[2, true]]]]], [["i", "excludes", ["map", [[2, true], [0, true]]]]], [["i", "excludes", ["map", [[2, true], [1, false]]]]], [["i", "excludes", ["map", [[2, true], [1, false], [0, true]]]]]]']], [dnl query 0: 11111 111 query 1: 1---- --- query 2: -1--- --- query 3: --1-- --- query 4: ---1- --- query 5: ----1 --- query 6: ----- 1-- query 7: ----- -1- query 8: ----- --1 query 9: -1111 111 query 10: 1-111 111 query 11: 11-11 111 query 12: 111-1 111 query 13: 1111- 111 query 14: 11111 -11 query 15: 11111 1-1 query 16: 11111 11- query 17: 11111 111 query 18: -1-1- 1-1 query 19: --11- -11 query 20: ---1- --1 query 21: ----1 111 query 22: ----- 1-1 query 23: ----- -11 query 24: ----- --1 query 25: 11111 111 query 26: 1-1-1 -1- query 27: 11--1 1-- query 28: 1---1 --- query 29: 1111- --- query 30: 1-1-- --- query 31: 11--- --- query 32: 1---- ---], [query]) # This is the same as the "set" test except that it adds values, # and those values don't always match. OVSDB_CHECK_POSITIVE([queries on maps (2)], [[query \ '{"columns": {"i": {"type": {"key": "integer", "value": "boolean", "min": 0, "max": "unlimited"}}}}' \ '[{"i": ["map", []]}, {"i": ["map", [[0, true]]]}, {"i": ["map", [[0, false]]]}, {"i": ["map", [[1, false]]]}, {"i": ["map", [[1, true]]]}, {"i": ["map", [[0, true], [1, false]]]}, {"i": ["map", [[0, true], [1, true]]]}, {"i": ["map", [[2, true]]]}, {"i": ["map", [[2, false]]]}, {"i": ["map", [[2, true], [0, true]]]}, {"i": ["map", [[2, false], [0, true]]]}, {"i": ["map", [[2, true], [1, false]]]}, {"i": ["map", [[2, true], [1, true]]]}, {"i": ["map", [[2, true], [1, false], [0, true]]]}, {"i": ["map", [[2, true], [1, false], [0, false]]]}]' \ '[[], [["i", "==", ["map", []]]], [["i", "==", ["map", [[0, true]]]]], [["i", "==", ["map", [[1, false]]]]], [["i", "==", ["map", [[0, true], [1, false]]]]], [["i", "==", ["map", [[2, true]]]]], [["i", "==", ["map", [[2, true], [0, true]]]]], [["i", "==", ["map", [[2, true], [1, false]]]]], [["i", "==", ["map", [[2, true], [1, false], [0, true]]]]], [["i", "!=", ["map", []]]], [["i", "!=", ["map", [[0, true]]]]], [["i", "!=", ["map", [[1, false]]]]], [["i", "!=", ["map", [[0, true], [1, false]]]]], [["i", "!=", ["map", [[2, true]]]]], [["i", "!=", ["map", [[2, true], [0, true]]]]], [["i", "!=", ["map", [[2, true], [1, false]]]]], [["i", "!=", ["map", [[2, true], [1, false], [0, true]]]]], [["i", "includes", ["map", []]]], [["i", "includes", ["map", [[0, true]]]]], [["i", "includes", ["map", [[1, false]]]]], [["i", "includes", ["map", [[0, true], [1, false]]]]], [["i", "includes", ["map", [[2, true]]]]], [["i", "includes", ["map", [[2, true], [0, true]]]]], [["i", "includes", ["map", [[2, true], [1, false]]]]], [["i", "includes", ["map", [[2, true], [1, false], [0, true]]]]], [["i", "excludes", ["map", []]]], [["i", "excludes", ["map", [[0, true]]]]], [["i", "excludes", ["map", [[1, false]]]]], [["i", "excludes", ["map", [[0, true], [1, false]]]]], [["i", "excludes", ["map", [[2, true]]]]], [["i", "excludes", ["map", [[2, true], [0, true]]]]], [["i", "excludes", ["map", [[2, true], [1, false]]]]], [["i", "excludes", ["map", [[2, true], [1, false], [0, true]]]]]]']], [dnl query 0: 11111 11111 11111 query 1: 1---- ----- ----- query 2: -1--- ----- ----- query 3: ---1- ----- ----- query 4: ----- 1---- ----- query 5: ----- --1-- ----- query 6: ----- ----1 ----- query 7: ----- ----- -1--- query 8: ----- ----- ---1- query 9: -1111 11111 11111 query 10: 1-111 11111 11111 query 11: 111-1 11111 11111 query 12: 11111 -1111 11111 query 13: 11111 11-11 11111 query 14: 11111 1111- 11111 query 15: 11111 11111 1-111 query 16: 11111 11111 111-1 query 17: 11111 11111 11111 query 18: -1--- 11--1 1--1- query 19: ---1- 1---- -1-11 query 20: ----- 1---- ---1- query 21: ----- --1-1 -1111 query 22: ----- ----1 ---1- query 23: ----- ----- -1-11 query 24: ----- ----- ---1- query 25: 11111 11111 11111 query 26: 1-111 --11- -11-1 query 27: 111-1 -1111 1-1-- query 28: 1-1-1 --11- --1-- query 29: 11111 11-1- 1---- query 30: 1-111 ---1- ----- query 31: 111-1 -1-1- 1---- query 32: 1-1-1 ---1- -----], [query]) OVSDB_CHECK_POSITIVE([UUID-distinct queries on scalars], [[query-distinct \ '{"columns": {"i": {"type": "integer"}, "r": {"type": "real"}, "b": {"type": "boolean"}, "s": {"type": "string"}, "u": {"type": "uuid"}}}' \ '[{"i": 0, "r": 0.5, "b": true, "s": "a", "u": ["uuid", "b10d28f7-af18-4a67-9e78-2a6394516c59"]}, {"i": 1, "r": 1.5, "b": false, "s": "b", "u": ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"]}, {"i": 2, "r": 2.5, "b": true, "s": "c", "u": ["uuid", "ad0fa355-8b84-4a36-a4b5-b2c1bfd91758"]}, {"i": 3, "r": 3.5, "b": false, "s": "d", "u": ["uuid", "62315898-64e0-40b9-b26f-ff74225303e6"]}, {"i": 4, "r": 4.5, "b": true, "s": "e", "u": ["uuid", "4a5127e2-0256-4a72-a7dc-6246213967c7"]}]' \ '[[], [["i", "==", 0]], [["i", "!=", 1]], [["i", "<", 2]], [["i", "<=", 3]], [["i", ">", 2]], [["i", ">=", 4]], [["i", "includes", 3]], [["i", "excludes", 2]], [["r", "==", 0.5]], [["r", "!=", 1.5]], [["r", "<", 2.5]], [["r", "<=", 3.5]], [["r", ">", 4.5]], [["r", ">=", 5.5]], [["r", "includes", 1]], [["r", "excludes", 3]], [["b", "==", true]], [["b", "!=", true]], [["b", "includes", false]], [["b", "excludes", true]], [["s", "==", "a"]], [["s", "!=", "b"]], [["s", "includes", "c"]], [["s", "excludes", "d"]], [["u", "==", ["uuid", "b10d28f7-af18-4a67-9e78-2a6394516c59"]]], [["u", "!=", ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"]]], [["u", "includes",["uuid", "ad0fa355-8b84-4a36-a4b5-b2c1bfd91758"]]]]' \ '["_uuid"]']], [dnl query 0: abcde query 1: a---- query 2: a-cde query 3: ab--- query 4: abcd- query 5: ---de query 6: ----e query 7: ---d- query 8: ab-de query 9: a---- query 10: a-cde query 11: ab--- query 12: abcd- query 13: ----- query 14: ----- query 15: ----- query 16: abcde query 17: a-c-e query 18: -b-d- query 19: -b-d- query 20: -b-d- query 21: a---- query 22: a-cde query 23: --c-- query 24: abc-e query 25: a---- query 26: a-cde query 27: --c--], [query]) OVSDB_CHECK_POSITIVE([Boolean-distinct queries on scalars], [[query-distinct \ '{"columns": {"i": {"type": "integer"}, "r": {"type": "real"}, "b": {"type": "boolean"}, "s": {"type": "string"}, "u": {"type": "uuid"}}}' \ '[{"i": 0, "r": 0.5, "b": true, "s": "a", "u": ["uuid", "b10d28f7-af18-4a67-9e78-2a6394516c59"]}, {"i": 1, "r": 1.5, "b": false, "s": "b", "u": ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"]}, {"i": 2, "r": 2.5, "b": true, "s": "c", "u": ["uuid", "ad0fa355-8b84-4a36-a4b5-b2c1bfd91758"]}, {"i": 3, "r": 3.5, "b": false, "s": "d", "u": ["uuid", "62315898-64e0-40b9-b26f-ff74225303e6"]}, {"i": 4, "r": 4.5, "b": true, "s": "e", "u": ["uuid", "4a5127e2-0256-4a72-a7dc-6246213967c7"]}]' \ '[[], [["i", "==", 0]], [["i", "!=", 1]], [["i", "<", 2]], [["i", "<=", 3]], [["i", ">", 2]], [["i", ">=", 4]], [["i", "includes", 3]], [["i", "excludes", 2]], [["r", "==", 0.5]], [["r", "!=", 1.5]], [["r", "<", 2.5]], [["r", "<=", 3.5]], [["r", ">", 4.5]], [["r", ">=", 5.5]], [["r", "includes", 1]], [["r", "excludes", 3]], [["b", "==", true]], [["b", "!=", true]], [["b", "includes", false]], [["b", "excludes", true]], [["s", "==", "a"]], [["s", "!=", "b"]], [["s", "includes", "c"]], [["s", "excludes", "d"]], [["u", "==", ["uuid", "b10d28f7-af18-4a67-9e78-2a6394516c59"]]], [["u", "!=", ["uuid", "9179ca6d-6d65-400a-b455-3ad92783a099"]]], [["u", "includes",["uuid", "ad0fa355-8b84-4a36-a4b5-b2c1bfd91758"]]]]' \ '["b"]']], [dnl query 0: ababa query 1: a-a-a query 2: ababa query 3: ababa query 4: ababa query 5: ababa query 6: a-a-a query 7: -b-b- query 8: ababa query 9: a-a-a query 10: ababa query 11: ababa query 12: ababa query 13: ----- query 14: ----- query 15: ----- query 16: ababa query 17: a-a-a query 18: -b-b- query 19: -b-b- query 20: -b-b- query 21: a-a-a query 22: ababa query 23: a-a-a query 24: ababa query 25: a-a-a query 26: ababa query 27: a-a-a], [query]) OVSDB_CHECK_NEGATIVE([parse colunn set containing bad name], [[query-distinct \ '{"columns": {"i": {"type": "integer"}}}' \ '[{"i": 0}]' \ '[[]]' \ '["i", "bad"]']], [bad is not a valid column name]) openvswitch-2.0.1+git20140120/tests/ovsdb-row.at000066400000000000000000000231001226605124000210600ustar00rootroot00000000000000AT_BANNER([OVSDB -- rows]) m4_define([RESERVED_COLUMNS], [["_uuid":["uuid","00000000-0000-0000-0000-000000000000"],"_version":["uuid","00000000-0000-0000-0000-000000000000"]]]) OVSDB_CHECK_POSITIVE([row with one string column], [[parse-rows \ '{"columns": {"name": {"type": "string"}}}' \ '{"name": "value"}' \ '{"name": ""}' \ '{"name": "longer string with spaces"}' \ '{}']], [{RESERVED_COLUMNS,"name":"value"} name {RESERVED_COLUMNS,"name":""} name {RESERVED_COLUMNS,"name":"longer string with spaces"} name {RESERVED_COLUMNS,"name":""} ], []) OVSDB_CHECK_POSITIVE([row with one integer column], [[parse-rows \ '{"columns": {"count": {"type": "integer"}}}' \ '{"count": 1}' \ '{"count": -1}' \ '{"count": 2e10}' \ '{}']], [{RESERVED_COLUMNS,"count":1} count {RESERVED_COLUMNS,"count":-1} count {RESERVED_COLUMNS,"count":20000000000} count {RESERVED_COLUMNS,"count":0} ], []) OVSDB_CHECK_POSITIVE([row with one real column], [[parse-rows \ '{"columns": {"cost": {"type": "real"}}}' \ '{"cost": 1.0}' \ '{"cost": -2.0}' \ '{"cost": 123000}' \ '{}']], [{RESERVED_COLUMNS,"cost":1} cost {RESERVED_COLUMNS,"cost":-2} cost {RESERVED_COLUMNS,"cost":123000} cost {RESERVED_COLUMNS,"cost":0} ], []) OVSDB_CHECK_POSITIVE([row with one boolean column], [[parse-rows \ '{"columns": {"feasible": {"type": "boolean"}}}' \ '{"feasible": true}' \ '{"feasible": false}' \ '{}']], [{RESERVED_COLUMNS,"feasible":true} feasible {RESERVED_COLUMNS,"feasible":false} feasible {RESERVED_COLUMNS,"feasible":false} ], []) OVSDB_CHECK_POSITIVE([row with one uuid column], [[parse-rows \ '{"columns": {"ref": {"type": "uuid"}}}' \ '{"ref": ["uuid", "f707423d-bf5b-48b5-b6c0-797c900ba4b6"]}' \ '{"ref": ["uuid", "33583cc5-d2f4-43de-b1ca-8aac14071b51"]}' \ '{}']], [{RESERVED_COLUMNS,"ref":[["uuid","f707423d-bf5b-48b5-b6c0-797c900ba4b6"]]} ref {RESERVED_COLUMNS,"ref":[["uuid","33583cc5-d2f4-43de-b1ca-8aac14071b51"]]} ref {RESERVED_COLUMNS,"ref":[["uuid","00000000-0000-0000-0000-000000000000"]]} ], []) OVSDB_CHECK_POSITIVE([row with set of 1 to 2 elements], [[parse-rows \ '{"columns": {"myset": {"type": {"key": "integer", "min": 1, "max": 2}}}}' \ '{}']], [{RESERVED_COLUMNS,["myset":0]} ]) OVSDB_CHECK_POSITIVE([row with map of 1 to 2 elements], [[parse-rows \ '{"columns": {"mymap": {"type": {"key": "integer", "value": "uuid", "min": 1, "max": 2}}}}' \ '{}']], [{RESERVED_COLUMNS,["mymap":["map",[[0,["uuid","00000000-0000-0000-0000-000000000000"]]]]]} ], []) OVSDB_CHECK_POSITIVE([row with several columns], [[parse-rows \ '{"columns": {"vswitch": {"type": "uuid"}, "name": {"type": "string"}, "datapath_id": {"type": {"key": "string", "min": 0}}, "hwaddr": {"type": "string"}, "mirrors": {"type": {"key": "uuid", "min": 0, "max": "unlimited"}}, "netflows": {"type": {"key": "uuid", "min": 0, "max": "unlimited"}}, "controller": {"type": {"key": "uuid", "min": 0}}, "listeners": {"type": {"key": "uuid", "min": 0, "max": "unlimited"}}, "snoops": {"type": {"key": "uuid", "min": 0, "max": "unlimited"}}}}' \ '{"vswitch": ["uuid", "1a5c7280-0d4c-4e34-9ec7-c772339f7774"], "name": "br0", "datapath_id": "000ae4256bb0", "hwaddr": "00:0a:e4:25:6b:b0"}' \ '{}']], [{RESERVED_COLUMNS,["controller":["set",[]],"datapath_id":"000ae4256bb0","hwaddr":"00:0a:e4:25:6b:b0","listeners":["set",[]],"mirrors":["set",[]],"name":"br0","netflows":["set",[]],"snoops":["set",[]],"vswitch":["uuid","1a5c7280-0d4c-4e34-9ec7-c772339f7774"]]} datapath_id, hwaddr, name, vswitch {RESERVED_COLUMNS,["controller":["set",[]],"datapath_id":["set",[]],"hwaddr":"","listeners":["set",[]],"mirrors":["set",[]],"name":"","netflows":["set",[]],"snoops":["set",[]],"vswitch":["uuid","00000000-0000-0000-0000-000000000000"]]} ], []) OVSDB_CHECK_POSITIVE([row hashing (scalars)], [[compare-rows \ '{"columns": {"i": {"type": "integer"}, "r": {"type": "real"}, "b": {"type": "boolean"}, "s": {"type": "string"}, "u": {"type": "uuid"}}}' \ '["null", {}]' \ '["i1", {"i": 1}]' \ '["i2", {"i": 2}]' \ '["i4", {"i": 4}]' \ '["i8", {"i": 8}]' \ '["i16", {"i": 16}]' \ '["i32", {"i": 32}]' \ '["i64", {"i": 64}]' \ '["i128", {"i": 128}]' \ '["i256", {"i": 256}]' \ '["null2", {"r": -0}]' \ '["r123", {"r": 123}]' \ '["r0.0625", {"r": 0.0625}]' \ '["r0.125", {"r": 0.125}]' \ '["r0.25", {"r": 0.25}]' \ '["r0.5", {"r": 0.5}]' \ '["r1", {"r": 1}]' \ '["r2", {"r": 2}]' \ '["r4", {"r": 4}]' \ '["r8", {"r": 8}]' \ '["r16", {"r": 16}]' \ '["r32", {"r": 32}]' \ '["null3", {"b": false}]' \ '["b1", {"b": true}]' \ '["null4", {"s": ""}]' \ '["s0", {"s": "a"}]' \ '["s1", {"s": "b"}]' \ '["s2", {"s": "c"}]' \ '["s3", {"s": "d"}]' \ '["s4", {"s": "e"}]' \ '["s5", {"s": "f"}]' \ '["s6", {"s": "g"}]' \ '["s7", {"s": "h"}]' \ '["s8", {"s": "i"}]' \ '["s9", {"s": "j"}]' \ '["null5", {"u": ["uuid","00000000-0000-0000-0000-000000000000"]}]' \ '["u1", {"u": ["uuid","10000000-0000-0000-0000-000000000000"]}]' \ '["u2", {"u": ["uuid","01000000-0000-0000-0000-000000000000"]}]' \ '["u3", {"u": ["uuid","00100000-0000-0000-0000-000000000000"]}]' \ '["u4", {"u": ["uuid","00010000-0000-0000-0000-000000000000"]}]' \ '["u5", {"u": ["uuid","00001000-0000-0000-0000-000000000000"]}]' \ '["u6", {"u": ["uuid","00000100-0000-0000-0000-000000000000"]}]' \ '["u7", {"u": ["uuid","00000010-0000-0000-0000-000000000000"]}]' \ '["u8", {"u": ["uuid","00000001-0000-0000-0000-000000000000"]}]' \ '["null6", {"u": ["uuid","00000000-c6db-4d22-970f-b41fabd20c4b"]}]']], [[null == null2 null == null3 null == null4 null == null5 hash(null) == hash(null6) null2 == null3 null2 == null4 null2 == null5 hash(null2) == hash(null6) null3 == null4 null3 == null5 hash(null3) == hash(null6) null4 == null5 hash(null4) == hash(null6) hash(null5) == hash(null6)]]) OVSDB_CHECK_POSITIVE([row hashing (sets)], [[compare-rows \ '{"columns": {"i": {"type": {"key": "integer", "min": 0, "max": "unlimited"}}, "r": {"type": {"key": "real", "min": 0, "max": "unlimited"}}, "b": {"type": {"key": "boolean", "min": 0, "max": "unlimited"}}, "s": {"type": {"key": "string", "min": 0, "max": "unlimited"}}, "u": {"type": {"key": "uuid", "min": 0, "max": "unlimited"}}}}' \ '["null0", {"i": ["set", []]}]' \ '["i0", {"i": ["set", [0]]}]' \ '["i01", {"i": ["set", [0, 1]]}]' \ '["i012", {"i": ["set", [0, 1, 2]]}]' \ '["i021", {"i": ["set", [0, 2, 1]]}]' \ '["i201", {"i": ["set", [2, 0, 1]]}]' \ '["i102", {"i": ["set", [1, 0, 2]]}]' \ '["i120", {"i": ["set", [1, 2, 0]]}]' \ '["i210", {"i": ["set", [2, 1, 0]]}]' \ '["r0", {"r": ["set", [0]]}]' \ '["r01", {"r": ["set", [0, 1]]}]' \ '["r012", {"r": ["set", [0, 1, 2]]}]' \ '["r201", {"r": ["set", [2, 0, 1]]}]' \ '["null1", {"b": ["set", []]}]' \ '["b0", {"b": ["set", [false]]}]' \ '["b1", {"b": ["set", [true]]}]' \ '["b01", {"b": ["set", [false, true]]}]' \ '["b10", {"b": ["set", [true, false]]}]' \ '["null2", {"s": ["set", []]}]' \ '["sa", {"s": ["set", ["a"]]}]' \ '["sb", {"s": ["set", ["b"]]}]' \ '["sab", {"s": ["set", ["a", "b"]]}]' \ '["sba", {"s": ["set", ["b", "a"]]}]']], [[null0 == null1 null0 == null2 i012 == i021 i012 == i201 i012 == i102 i012 == i120 i012 == i210 i021 == i201 i021 == i102 i021 == i120 i021 == i210 i201 == i102 i201 == i120 i201 == i210 i102 == i120 i102 == i210 i120 == i210 r012 == r201 null1 == null2 b01 == b10 sab == sba]]) OVSDB_CHECK_POSITIVE([row hashing (maps)], [[compare-rows \ '{"columns": {"ii": {"type": {"key": "integer", "value": "integer", "min": 0, "max": "unlimited"}}, "rr": {"type": {"key": "real", "value": "real", "min": 0, "max": "unlimited"}}, "bb": {"type": {"key": "boolean", "value": "boolean", "min": 0, "max": "unlimited"}}, "ss": {"type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}}' \ '["null", {}]' \ '["ii0", {"ii": ["map", [[0, 0]]]}]' \ '["ii1", {"ii": ["map", [[0, 1]]]}]' \ '["ii00", {"ii": ["map", [[0, 0], [1, 0]]]}]' \ '["ii01", {"ii": ["map", [[0, 0], [1, 1]]]}]' \ '["ii10", {"ii": ["map", [[0, 1], [1, 0]]]}]' \ '["ii11", {"ii": ["map", [[0, 1], [1, 1]]]}]' \ '["rr0", {"rr": ["map", [[0, 0]]]}]' \ '["rr0", {"rr": ["map", [[0, 1]]]}]' \ '["rr00", {"rr": ["map", [[0, 0], [1, 0]]]}]' \ '["rr01", {"rr": ["map", [[0, 0], [1, 1]]]}]' \ '["rr10", {"rr": ["map", [[0, 1], [1, 0]]]}]' \ '["rr11", {"rr": ["map", [[0, 1], [1, 1]]]}]' \ '["bb0", {"bb": ["map", [[false, false]]]}]' \ '["bb1", {"bb": ["map", [[false, true]]]}]' \ '["bb00", {"bb": ["map", [[false, false], [true, false]]]}]' \ '["bb01", {"bb": ["map", [[false, false], [true, true]]]}]' \ '["bb10", {"bb": ["map", [[false, true], [true, false]]]}]' \ '["bb11", {"bb": ["map", [[false, true], [true, true]]]}]' \ '["ss0", {"ss": ["map", [["a", "a"]]]}]' \ '["ss1", {"ss": ["map", [["a", "b"]]]}]' \ '["ss00", {"ss": ["map", [["a", "a"], ["b", "a"]]]}]' \ '["ss01", {"ss": ["map", [["a", "a"], ["b", "b"]]]}]' \ '["ss10", {"ss": ["map", [["a", "b"], ["b", "a"]]]}]' \ '["ss11", {"ss": ["map", [["a", "b"], ["b", "b"]]]}]'; echo ]], [[]]) openvswitch-2.0.1+git20140120/tests/ovsdb-schema.at000066400000000000000000000067631226605124000215310ustar00rootroot00000000000000AT_BANNER([OVSDB -- schemas]) OVSDB_CHECK_POSITIVE_CPY([schema with valid refTables], [[parse-schema \ '{"name": "mydb", "version": "4.2.1", "tables": { "a": { "columns": { "map": { "type": { "key": { "type": "uuid", "refTable": "b"}, "value": { "type": "uuid", "refTable": "a"}}}}}, "b": { "columns": { "aRef": { "type": { "key": { "type": "uuid", "refTable": "a"}}}}}}}']], [[{"name":"mydb","tables":{"a":{"columns":{"map":{"type":{"key":{"refTable":"b","type":"uuid"},"value":{"refTable":"a","type":"uuid"}}}}},"b":{"columns":{"aRef":{"type":{"key":{"refTable":"a","type":"uuid"}}}}}},"version":"4.2.1"}]]) dnl Ephemeral strong references to root set tables are OK. dnl Ephemeral strong references to non-root set tables are forced to be dnl persistent. OVSDB_CHECK_POSITIVE_CPY([schema with ephemeral strong references], [[parse-schema \ '{"name": "mydb", "version": "4.2.1", "tables": { "a": { "columns": { "x": { "type": { "key": { "type": "uuid", "refTable": "b"}}, "ephemeral": true}, "y": { "type": { "key": { "type": "uuid", "refTable": "a"}}, "ephemeral": true}}}, "b": { "columns": { "aRef": { "type": { "key": { "type": "uuid", "refTable": "a"}}}}, "isRoot": true}}}']], [[{"name":"mydb","tables":{"a":{"columns":{"x":{"ephemeral":true,"type":{"key":{"refTable":"b","type":"uuid"}}},"y":{"type":{"key":{"refTable":"a","type":"uuid"}}}}},"b":{"columns":{"aRef":{"type":{"key":{"refTable":"a","type":"uuid"}}}},"isRoot":true}},"version":"4.2.1"}]]) dnl Schemas without version numbers are accepted for backward dnl compatibility, but this is a deprecated feature. OVSDB_CHECK_POSITIVE_CPY([schema without version number], [[parse-schema \ '{"name": "mydb", "tables": { "x": { "columns": { "y": { "type": "integer"}}}}}']], [{"name":"mydb","tables":{"x":{"columns":{"y":{"type":"integer"}}}}}]) OVSDB_CHECK_NEGATIVE_CPY([schema with invalid refTables], [[parse-schema \ '{"name": "mydb", "tables": { "a": { "columns": { "map": { "type": { "key": { "type": "uuid", "refTable": "c"}, "value": { "type": "uuid", "refTable": "a"}}}}}, "b": { "columns": { "aRef": { "type": { "key": { "type": "uuid", "refTable": "a"}}}}}}}']], [[syntax error: column map key refers to undefined table c]]) OVSDB_CHECK_NEGATIVE_CPY([schema with invalid version number], [[parse-schema \ '{"name": "mydb", "tables": { "x": { "columns": { "y": { "type": "integer"}}}}, "version": "xxx"}']], [[schema version "xxx" not in format x.y.z]]) openvswitch-2.0.1+git20140120/tests/ovsdb-server.at000066400000000000000000000754501226605124000215760ustar00rootroot00000000000000AT_BANNER([OVSDB -- ovsdb-server transactions (Unix sockets)]) m4_define([OVSDB_SERVER_SHUTDOWN], [cp pid savepid AT_CHECK([ovs-appctl -t "`pwd`"/unixctl -e exit], [0], [ignore], [ignore]) OVS_WAIT_WHILE([kill -0 `cat savepid`], [kill `cat savepid`])]) # OVSDB_CHECK_EXECUTION(TITLE, SCHEMA, TRANSACTIONS, OUTPUT, [KEYWORDS]) # # Creates a database with the given SCHEMA, starts an ovsdb-server on # that database, and runs each of the TRANSACTIONS (which should be a # quoted list of quoted strings) against it with ovsdb-client one at a # time. # # Checks that the overall output is OUTPUT, but UUIDs in the output # are replaced by markers of the form where N is a number. The # first unique UUID is replaced by <0>, the next by <1>, and so on. # If a given UUID appears more than once it is always replaced by the # same marker. # # TITLE is provided to AT_SETUP and KEYWORDS to AT_KEYWORDS. m4_define([OVSDB_CHECK_EXECUTION], [AT_SETUP([$1]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR AT_KEYWORDS([ovsdb server positive unix $5]) $2 > schema AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server --detach --no-chdir --pidfile="`pwd`"/pid --remote=punix:socket --unixctl="`pwd`"/unixctl db], [0], [ignore], [ignore]) m4_foreach([txn], [$3], [AT_CHECK([ovsdb-client transact unix:socket 'txn'], [0], [stdout], [ignore], [test ! -e pid || kill `cat pid`]) cat stdout >> output ]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl output], [0], [$4], [ignore], [test ! -e pid || kill `cat pid`]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) EXECUTION_EXAMPLES AT_SETUP([truncating corrupted database log]) AT_KEYWORDS([ovsdb server positive unix]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR ordinal_schema > schema AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) dnl Do one transaction and save the output. AT_DATA([txnfile], [[ovsdb-client transact unix:socket \ '["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}]' ]]) AT_CHECK([ovsdb-server --remote=punix:socket --unixctl="`pwd`"/unixctl db --run="sh txnfile"], [0], [stdout], []) cat stdout >> output dnl Add some crap to the database log and run another transaction, which should dnl ignore the crap and truncate it out of the log. echo 'xxx' >> db AT_DATA([txnfile], [[ovsdb-client transact unix:socket \ '["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}}]' ]]) AT_CHECK([ovsdb-server --remote=punix:socket --unixctl="`pwd`"/unixctl db --run="sh txnfile"], [0], [stdout], [stderr]) AT_CHECK([grep 'syntax error: db: parse error.* in header line "xxx"' stderr], [0], [ignore]) cat stdout >> output dnl Run a final transaction to verify that both transactions succeeeded. dnl The crap that we added should have been truncated by the previous run, dnl so ovsdb-server shouldn't log a warning this time. AT_DATA([txnfile], [[ovsdb-client transact unix:socket \ '["ordinals", {"op": "select", "table": "ordinals", "where": [], "sort": ["number"]}]' ]]) AT_CHECK([ovsdb-server --remote=punix:socket --unixctl="`pwd`"/unixctl db --run="sh txnfile"], [0], [stdout], []) cat stdout >> output AT_CHECK([${PERL} $srcdir/uuidfilt.pl output], [0], [[[{"uuid":["uuid","<0>"]}] [{"uuid":["uuid","<1>"]}] [{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<2>"],"name":"zero","number":0},{"_uuid":["uuid","<1>"],"_version":["uuid","<3>"],"name":"one","number":1}]}] ]], [], [test ! -e pid || kill `cat pid`]) AT_CLEANUP AT_SETUP([truncating database log with bad transaction]) AT_KEYWORDS([ovsdb server positive unix]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR ordinal_schema > schema AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) dnl Do one transaction and save the output. AT_DATA([txnfile], [[ovsdb-client transact unix:socket \ '["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}]' ]]) AT_CHECK([ovsdb-server --remote=punix:socket --unixctl="`pwd`"/unixctl db --run="sh txnfile"], [0], [stdout], []) cat stdout >> output dnl Add some crap to the database log and run another transaction, which should dnl ignore the crap and truncate it out of the log. echo 'OVSDB JSON 15 ffbcdae4b0386265f9ea3280dd7c8f0b72a20e56 {"invalid":{}}' >> db AT_DATA([txnfile], [[ovsdb-client transact unix:socket \ '["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}}]' ]]) AT_CHECK([ovsdb-server --remote=punix:socket --unixctl="`pwd`"/unixctl db --run="sh txnfile"], [0], [stdout], [stderr]) AT_CHECK([grep 'syntax "{"invalid":{}}": unknown table: No table named invalid.' stderr], [0], [ignore]) cat stdout >> output dnl Run a final transaction to verify that both transactions succeeeded. dnl The crap that we added should have been truncated by the previous run, dnl so ovsdb-server shouldn't log a warning this time. AT_DATA([txnfile], [[ovsdb-client transact unix:socket \ '["ordinals", {"op": "select", "table": "ordinals", "where": [], "sort": ["number"]}]' ]]) AT_CHECK([ovsdb-server --remote=punix:socket --unixctl="`pwd`"/unixctl db --run="sh txnfile"], [0], [stdout], []) cat stdout >> output AT_CHECK([${PERL} $srcdir/uuidfilt.pl output], [0], [[[{"uuid":["uuid","<0>"]}] [{"uuid":["uuid","<1>"]}] [{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<2>"],"name":"zero","number":0},{"_uuid":["uuid","<1>"],"_version":["uuid","<3>"],"name":"one","number":1}]}] ]], [], [test ! -e pid || kill `cat pid`]) AT_CLEANUP AT_SETUP([ovsdb-client get-schema-version]) AT_KEYWORDS([ovsdb server positive]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR ordinal_schema > schema AT_CHECK([ovsdb-tool create db schema], [0], [ignore], [ignore]) AT_CHECK([ovsdb-server --detach --no-chdir --pidfile="`pwd`"/pid --unixctl="`pwd`"/unixctl --remote=punix:socket db], [0], [ignore], [ignore]) AT_CHECK([ovsdb-client get-schema-version unix:socket ordinals], [0], [5.1.3 ]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP AT_SETUP([database multiplexing implementation]) AT_KEYWORDS([ovsdb server positive]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR ordinal_schema > schema1 constraint_schema > schema2 AT_CHECK([ovsdb-tool create db1 schema1], [0], [ignore], [ignore]) AT_CHECK([ovsdb-tool create db2 schema2], [0], [ignore], [ignore]) AT_CHECK([ovsdb-server --detach --no-chdir --pidfile="`pwd`"/pid --unixctl="`pwd`"/unixctl --remote=punix:socket db1 db2], [0], [ignore], [ignore]) AT_CHECK( [[ovsdb-client list-dbs unix:socket]], [0], [constraints ordinals ], [ignore], [test ! -e pid || kill `cat pid`]) AT_CHECK( [[test-jsonrpc request unix:socket get_schema [\"nonexistent\"]]], [0], [[{"error":null,"id":0,"result":{"details":"get_schema request specifies unknown database nonexistent","error":"unknown database","syntax":"[\"nonexistent\"]"}} ]], [], [test ! -e pid || kill `cat pid`]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP AT_SETUP([ovsdb-server/add-db and remove-db]) AT_KEYWORDS([ovsdb server positive]) ON_EXIT([kill `cat ovsdb-server.pid`]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR ordinal_schema > schema1 constraint_schema > schema2 AT_CHECK([ovsdb-tool create db1 schema1], [0], [ignore], [ignore]) AT_CHECK([ovsdb-tool create db2 schema2], [0], [ignore], [ignore]) # Start ovsdb-server with just a single database - db1. AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket db1], [0]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-dbs], [0], [ordinals ]) # Add the second database. AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/add-db db2], [0]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-dbs], [0], [constraints ordinals ]) # The databases are responsive. AT_CHECK([ovsdb-client list-tables unix:socket constraints], [0], [ignore], [ignore]) AT_CHECK([ovsdb-client list-tables unix:socket ordinals], [0], [ignore], [ignore]) # Add an already added database. AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/add-db db2], 2, [], [stderr]) AT_CHECK([sed 's/(.*)/(...)/' stderr], [0], [I/O error: db2: failed to lock lockfile (...) ovs-appctl: ovsdb-server: server returned an error ]) # Add a non-existing database. AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/add-db db3], 2, [], [stderr]) AT_CHECK([sed 's/(.*)/(...)/' stderr], [0], [I/O error: open: db3 failed (...) ovs-appctl: ovsdb-server: server returned an error ]) # Add a remote through a db path in db1. AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/add-remote db:ordinals,ordinals,name], [0]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-remotes], [0], [db:ordinals,ordinals,name punix:socket ]) # Removing db1 has no effect on its remote. AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/remove-db ordinals], [0]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-dbs], [0], [constraints ]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-remotes], [0], [db:ordinals,ordinals,name punix:socket ]) AT_CHECK([ovsdb-client list-tables unix:socket ordinals], [1], [ignore], [ignore]) # Remove db2. AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/remove-db constraints], [0]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-dbs], [0], []) AT_CHECK([ovsdb-client list-tables unix:socket constraints], [1], [ignore], [ignore]) # Remove a non-existent database. AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/remove-db ordinals], [2], [], [Failed to find the database. ovs-appctl: ovsdb-server: server returned an error ]) # Add a removed database. AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/add-db db2], [0]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-dbs], [0], [constraints ]) AT_CHECK([ovsdb-client list-tables unix:socket constraints], [0], [ignore], [ignore]) AT_CLEANUP AT_SETUP([ovsdb-server/add-db with --monitor]) AT_KEYWORDS([ovsdb server positive]) # Start ovsdb-server, initially with one db. OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR ordinal_schema > schema AT_CHECK([ovsdb-tool create db1 schema], [0], [ignore], [ignore]) ON_EXIT([kill `cat *.pid`]) AT_CHECK([ovsdb-server -v -vvlog:off --monitor --detach --no-chdir --pidfile --log-file db1]) # Add the second database. constraint_schema > schema2 AT_CHECK([ovsdb-tool create db2 schema2], [0], [ignore], [ignore]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/add-db db2], [0]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-dbs], [0], [constraints ordinals ]) # Kill the daemon process, making it look like a segfault, # and wait for a new daemon process to get spawned. cp ovsdb-server.pid old.pid AT_CHECK([kill -SEGV `cat ovsdb-server.pid`]) OVS_WAIT_WHILE([kill -0 `cat old.pid`]) OVS_WAIT_UNTIL( [test -s ovsdb-server.pid && test `cat ovsdb-server.pid` != `cat old.pid`]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-dbs], [0], [constraints ordinals ]) AT_CLEANUP AT_SETUP([ovsdb-server/add-db and remove-db with --monitor]) AT_KEYWORDS([ovsdb server positive]) # Start ovsdb-server, initially with one db. OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR ordinal_schema > schema AT_CHECK([ovsdb-tool create db1 schema], [0], [ignore], [ignore]) constraint_schema > schema2 AT_CHECK([ovsdb-tool create db2 schema2], [0], [ignore], [ignore]) ON_EXIT([kill `cat *.pid`]) AT_CHECK([ovsdb-server -v -vvlog:off --monitor --detach --no-chdir --pidfile --log-file db1 db2]) # Remove the second database. AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/remove-db constraints]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-dbs], [0], [ordinals ]) # Kill the daemon process, making it look like a segfault, # and wait for a new daemon process to get spawned. cp ovsdb-server.pid old.pid AT_CHECK([kill -SEGV `cat ovsdb-server.pid`]) OVS_WAIT_WHILE([kill -0 `cat old.pid`]) OVS_WAIT_UNTIL( [test -s ovsdb-server.pid && test `cat ovsdb-server.pid` != `cat old.pid`]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-dbs], [0], [ordinals ]) AT_CLEANUP AT_SETUP([--remote=db: implementation]) AT_KEYWORDS([ovsdb server positive]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR AT_DATA([schema], [[{"name": "mydb", "tables": { "Root": { "columns": { "managers": { "type": { "key": "string", "min": 0, "max": "unlimited"}}, "manager_options": { "type": { "key": {"type": "uuid", "refTable": "Manager"}, "min": 0, "max": "unlimited"}}}}, "Manager": { "columns": { "target": { "type": "string"}, "is_connected": { "type": { "key": "boolean", "min": 0, "max": 1}}}}}} ]]) AT_CHECK([ovsdb-tool create db schema], [0], [ignore], [ignore]) AT_CHECK( [[ovsdb-tool transact db \ '["mydb", {"op": "insert", "table": "Root", "row": { "managers": "punix:socket1", "manager_options": ["set", [["named-uuid", "x"]]]}}, {"op": "insert", "table": "Manager", "uuid-name": "x", "row": {"target": "punix:socket2"}}]']], [0], [ignore], [ignore]) ON_EXIT([kill `cat ovsdb-server.pid`]) AT_CHECK([ovsdb-server --enable-dummy --detach --no-chdir --pidfile --remote=db:mydb,Root,managers --remote=db:mydb,Root,manager_options --log-file db], [0], [ignore], [ignore]) for i in 1 2 3 4 5 6; do ovs-appctl -t ovsdb-server time/warp 1000; done AT_CHECK( [[ovsdb-client transact unix:socket1 \ '["mydb", {"op": "select", "table": "Root", "where": [], "columns": ["managers"]}, {"op": "select", "table": "Manager", "where": [], "columns": ["target", "is_connected"]}]']], [0], [stdout], [ignore]) AT_CHECK( [${PERL} $srcdir/uuidfilt.pl stdout], [0], [[[{"rows":[{"managers":"punix:socket1"}]},{"rows":[{"is_connected":false,"target":"punix:socket2"}]}] ]], [ignore]) AT_CLEANUP AT_SETUP([ovsdb-server/add-remote and remove-remote]) AT_KEYWORDS([ovsdb server positive]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR ordinal_schema > schema AT_CHECK([ovsdb-tool create db schema], [0], [ignore], [ignore]) ON_EXIT([kill `cat *.pid`]) AT_CHECK([ovsdb-server --detach --no-chdir --pidfile db]) AT_CHECK([test ! -e socket1]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/add-remote punix:socket1]) OVS_WAIT_UNTIL([test -S socket1]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-remotes], [0], [punix:socket1 ]) AT_CHECK([test ! -e socket2]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/add-remote punix:socket2]) OVS_WAIT_UNTIL([test -S socket2]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-remotes], [0], [punix:socket1 punix:socket2 ]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/add-remote db:x,y,z], [2], [], ["db:x,y,z": no database named x ovs-appctl: ovsdb-server: server returned an error ]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/remove-remote punix:socket1]) OVS_WAIT_UNTIL([test ! -e socket1]) AT_CHECK([test -S socket2]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-remotes], [0], [punix:socket2 ]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/remove-remote punix:socket2]) OVS_WAIT_UNTIL([test ! -e socket2]) AT_CHECK([test ! -e socket1]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-remotes]) AT_CLEANUP AT_SETUP([ovsdb-server/add-remote with --monitor]) AT_KEYWORDS([ovsdb server positive]) # Start ovsdb-server, initially with no remotes. OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR ordinal_schema > schema AT_CHECK([ovsdb-tool create db schema], [0], [ignore], [ignore]) ON_EXIT([kill `cat *.pid`]) AT_CHECK([ovsdb-server -v -vvlog:off --monitor --detach --no-chdir --pidfile --log-file db]) # Add a remote. AT_CHECK([test ! -e socket1]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/add-remote punix:socket1]) OVS_WAIT_UNTIL([test -S socket1]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-remotes], [0], [punix:socket1 ]) # Kill the daemon process, making it look like a segfault, # and wait for a new daemon process to get spawned and for it to # start listening on 'socket1'. cp ovsdb-server.pid old.pid rm socket1 AT_CHECK([kill -SEGV `cat ovsdb-server.pid`]) OVS_WAIT_WHILE([kill -0 `cat old.pid`]) OVS_WAIT_UNTIL( [test -s ovsdb-server.pid && test `cat ovsdb-server.pid` != `cat old.pid`]) OVS_WAIT_UNTIL([test -S socket1]) AT_CLEANUP AT_SETUP([ovsdb-server/add-remote and remove-remote with --monitor]) AT_KEYWORDS([ovsdb server positive]) # Start ovsdb-server, initially with no remotes. OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR ordinal_schema > schema AT_CHECK([ovsdb-tool create db schema], [0], [ignore], [ignore]) ON_EXIT([kill `cat *.pid`]) AT_CHECK([ovsdb-server -v -vvlog:off --monitor --detach --no-chdir --pidfile --log-file db]) # Add a remote. AT_CHECK([test ! -e socket1]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/add-remote punix:socket1]) OVS_WAIT_UNTIL([test -S socket1]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-remotes], [0], [punix:socket1 ]) # Remove the remote. AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/remove-remote punix:socket1]) OVS_WAIT_UNTIL([test ! -e socket1]) AT_CHECK([ovs-appctl -t ovsdb-server ovsdb-server/list-remotes]) # Kill the daemon process, making it look like a segfault, # and wait for a new daemon process to get spawned and make sure that it # does not listen on 'socket1'. cp ovsdb-server.pid old.pid AT_CHECK([kill -SEGV `cat ovsdb-server.pid`]) OVS_WAIT_WHILE([kill -0 `cat old.pid`]) OVS_WAIT_UNTIL( [test -s ovsdb-server.pid && test `cat ovsdb-server.pid` != `cat old.pid`]) AT_CHECK([test ! -e socket1]) AT_CLEANUP AT_SETUP([SSL db: implementation]) AT_KEYWORDS([ovsdb server positive ssl $5]) AT_SKIP_IF([test "$HAVE_OPENSSL" = no]) PKIDIR=$abs_top_builddir/tests AT_SKIP_IF([expr "$PKIDIR" : ".*[ '\" \\]"]) AT_DATA([schema], [[{"name": "mydb", "tables": { "SSL": { "columns": { "private_key": {"type": "string"}, "certificate": {"type": "string"}, "ca_cert": {"type": "string"}}}}} ]]) AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) AT_CHECK( [[ovsdb-tool transact db \ '["mydb", {"op": "insert", "table": "SSL", "row": {"private_key": "'"$PKIDIR/testpki-privkey2.pem"'", "certificate": "'"$PKIDIR/testpki-cert2.pem"'", "ca_cert": "'"$PKIDIR/testpki-cacert.pem"'"}}]']], [0], [ignore], [ignore]) OVS_LOGDIR=`pwd`; export OVS_LOGDIR AT_CHECK( [ovsdb-server --log-file --detach --no-chdir --pidfile="`pwd`"/pid \ --private-key=db:mydb,SSL,private_key \ --certificate=db:mydb,SSL,certificate \ --ca-cert=db:mydb,SSL,ca_cert \ --remote=pssl:0:127.0.0.1 --unixctl="`pwd`"/unixctl db], [0], [ignore], [ignore]) SSL_PORT=`parse_listening_port < ovsdb-server.log` AT_CHECK( [[ovsdb-client \ --private-key=$PKIDIR/testpki-privkey.pem \ --certificate=$PKIDIR/testpki-cert.pem \ --ca-cert=$PKIDIR/testpki-cacert.pem \ transact ssl:127.0.0.1:$SSL_PORT \ '["mydb", {"op": "select", "table": "SSL", "where": [], "columns": ["private_key"]}]']], [0], [stdout], [ignore], [test ! -e pid || kill `cat pid`]) cat stdout >> output AT_CHECK_UNQUOTED( [cat output], [0], [[[{"rows":[{"private_key":"$PKIDIR/testpki-privkey2.pem"}]}] ]], [ignore], [test ! -e pid || kill `cat pid`]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP AT_SETUP([compacting online]) AT_KEYWORDS([ovsdb server compact]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR ordinal_schema > schema dnl Make sure that "ovsdb-tool create" works with a dangling symlink for dnl the database and the lockfile, creating the target of each symlink rather dnl than replacing the symlinks with regular files. mkdir dir ln -s dir/db db ln -s dir/.db.~lock~ .db.~lock~ AT_SKIP_IF([test ! -h db || test ! -h .db.~lock~]) AT_CHECK([ovsdb-tool create db schema], [0], [ignore], [ignore]) dnl Start ovsdb-server. AT_CHECK([ovsdb-server --detach --no-chdir --pidfile="`pwd`"/pid --unixctl="`pwd`"/unixctl --remote=punix:socket --log-file="`pwd`"/ovsdb-server.log db], [0], [ignore], [ignore]) AT_CAPTURE_FILE([ovsdb-server.log]) dnl Do a bunch of random transactions that put crap in the database log. AT_CHECK( [[for pair in 'zero 0' 'one 1' 'two 2' 'three 3' 'four 4' 'five 5'; do set -- $pair ovsdb-client transact unix:socket ' ["ordinals", {"op": "insert", "table": "ordinals", "row": {"name": "'$1'", "number": '$2'}}, {"op": "comment", "comment": "add row for '"$pair"'"}]' ovsdb-client transact unix:socket ' ["ordinals", {"op": "delete", "table": "ordinals", "where": [["number", "==", '$2']]}, {"op": "comment", "comment": "delete row for '"$2"'"}]' ovsdb-client transact unix:socket ' ["ordinals", {"op": "insert", "table": "ordinals", "row": {"name": "'$1'", "number": '$2'}}, {"op": "comment", "comment": "add back row for '"$pair"'"}]' done]], [0], [stdout], [ignore], [test ! -e pid || kill `cat pid`]) dnl Check that all the crap is in fact in the database log. AT_CHECK([[${PERL} $srcdir/uuidfilt.pl db | grep -v ^OVSDB | sed 's/"_date":[0-9]*/"_date":0/' | test-json --multiple -]], [0], [[{"cksum":"12345678 9","name":"ordinals","tables":{"ordinals":{"columns":{"name":{"type":"string"},"number":{"type":"integer"}},"indexes":[["number"]]}},"version":"5.1.3"} {"_comment":"add row for zero 0","_date":0,"ordinals":{"<0>":{"name":"zero"}}} {"_comment":"delete row for 0","_date":0,"ordinals":{"<0>":null}} {"_comment":"add back row for zero 0","_date":0,"ordinals":{"<1>":{"name":"zero"}}} {"_comment":"add row for one 1","_date":0,"ordinals":{"<2>":{"name":"one","number":1}}} {"_comment":"delete row for 1","_date":0,"ordinals":{"<2>":null}} {"_comment":"add back row for one 1","_date":0,"ordinals":{"<3>":{"name":"one","number":1}}} {"_comment":"add row for two 2","_date":0,"ordinals":{"<4>":{"name":"two","number":2}}} {"_comment":"delete row for 2","_date":0,"ordinals":{"<4>":null}} {"_comment":"add back row for two 2","_date":0,"ordinals":{"<5>":{"name":"two","number":2}}} {"_comment":"add row for three 3","_date":0,"ordinals":{"<6>":{"name":"three","number":3}}} {"_comment":"delete row for 3","_date":0,"ordinals":{"<6>":null}} {"_comment":"add back row for three 3","_date":0,"ordinals":{"<7>":{"name":"three","number":3}}} {"_comment":"add row for four 4","_date":0,"ordinals":{"<8>":{"name":"four","number":4}}} {"_comment":"delete row for 4","_date":0,"ordinals":{"<8>":null}} {"_comment":"add back row for four 4","_date":0,"ordinals":{"<9>":{"name":"four","number":4}}} {"_comment":"add row for five 5","_date":0,"ordinals":{"<10>":{"name":"five","number":5}}} {"_comment":"delete row for 5","_date":0,"ordinals":{"<10>":null}} {"_comment":"add back row for five 5","_date":0,"ordinals":{"<11>":{"name":"five","number":5}}} ]], [], [test ! -e pid || kill `cat pid`]) dnl Dump out and check the actual database contents. AT_CHECK([[ovsdb-client dump unix:socket ordinals]], [0], [stdout], [ignore]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl stdout], [0], [dnl ordinals table _uuid name number ------------------------------------ ----- ------ <0> five 5 @&t@ <1> four 4 @&t@ <2> one 1 @&t@ <3> three 3 @&t@ <4> two 2 @&t@ <5> zero 0 @&t@ ], [], [test ! -e pid || kill `cat pid`]) dnl Now compact the database in-place. AT_CHECK([[ovs-appctl -t "`pwd`"/unixctl ovsdb-server/compact]], [0], [], [ignore], [test ! -e pid || kill `cat pid`]) dnl Make sure that "db" is still a symlink to dir/db instead of getting dnl replaced by a regular file, ditto for .db.~lock~. AT_CHECK([test -h db]) AT_CHECK([test -h .db.~lock~]) AT_CHECK([test -f dir/db]) AT_CHECK([test -f dir/.db.~lock~]) dnl We can't fully re-check the contents of the database log, because the dnl order of the records is not predictable, but there should only be 4 lines dnl in it now. AT_CAPTURE_FILE([db]) AT_CHECK([test `wc -l < db` -eq 4], [0], [], [], [test ! -e pid || kill `cat pid`]) dnl And check that the dumped data is the same too: AT_CHECK([ovsdb-client dump unix:socket ordinals], [0], [stdout], [ignore], [test ! -e pid || kill `cat pid`]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl stdout], [0], [dnl ordinals table _uuid name number ------------------------------------ ----- ------ <0> five 5 @&t@ <1> four 4 @&t@ <2> one 1 @&t@ <3> three 3 @&t@ <4> two 2 @&t@ <5> zero 0 @&t@ ], [], [test ! -e pid || kill `cat pid`]) dnl Now do some more transactions. AT_CHECK( [[ovsdb-client transact unix:socket ' ["ordinals", {"op": "delete", "table": "ordinals", "where": [["number", "<", 3]]}]']], [0], [[[{"count":3}] ]], [ignore], [test ! -e pid || kill `cat pid`]) dnl There should be 6 lines in the log now. AT_CHECK([test `wc -l < db` -eq 6], [0], [], [], [test ! -e pid || kill `cat pid`]) dnl Then check that the dumped data is correct. AT_CHECK([ovsdb-client dump unix:socket ordinals], [0], [stdout], [ignore], [test ! -e pid || kill `cat pid`]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl stdout], [0], [dnl ordinals table _uuid name number ------------------------------------ ----- ------ <0> five 5 @&t@ <1> four 4 @&t@ <2> three 3 @&t@ ], [], [test ! -e pid || kill `cat pid`]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP AT_BANNER([OVSDB -- ovsdb-server transactions (SSL sockets)]) # OVSDB_CHECK_EXECUTION(TITLE, SCHEMA, TRANSACTIONS, OUTPUT, [KEYWORDS]) # # Creates a database with the given SCHEMA, starts an ovsdb-server on # that database, and runs each of the TRANSACTIONS (which should be a # quoted list of quoted strings) against it with ovsdb-client one at a # time. # # Checks that the overall output is OUTPUT, but UUIDs in the output # are replaced by markers of the form where N is a number. The # first unique UUID is replaced by <0>, the next by <1>, and so on. # If a given UUID appears more than once it is always replaced by the # same marker. # # TITLE is provided to AT_SETUP and KEYWORDS to AT_KEYWORDS. m4_define([OVSDB_CHECK_EXECUTION], [AT_SETUP([$1]) AT_KEYWORDS([ovsdb server positive ssl $5]) AT_SKIP_IF([test "$HAVE_OPENSSL" = no]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR $2 > schema PKIDIR=$abs_top_builddir/tests AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile="`pwd`"/pid --private-key=$PKIDIR/testpki-privkey2.pem --certificate=$PKIDIR/testpki-cert2.pem --ca-cert=$PKIDIR/testpki-cacert.pem --remote=pssl:0:127.0.0.1 --unixctl="`pwd`"/unixctl db], [0], [ignore], [ignore]) SSL_PORT=`parse_listening_port < ovsdb-server.log` m4_foreach([txn], [$3], [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:127.0.0.1:$SSL_PORT 'txn'], [0], [stdout], [ignore], [test ! -e pid || kill `cat pid`]) cat stdout >> output ]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl output], [0], [$4], [ignore], [test ! -e pid || kill `cat pid`]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) EXECUTION_EXAMPLES AT_BANNER([OVSDB -- ovsdb-server transactions (TCP sockets)]) AT_SETUP([ovsdb-client get-schema-version - tcp socket]) AT_KEYWORDS([ovsdb server positive tcp]) ordinal_schema > schema AT_CHECK([ovsdb-tool create db schema], [0], [ignore], [ignore]) OVS_LOGDIR=`pwd`; export OVS_LOGDIR AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile="`pwd`"/pid --unixctl="`pwd`"/unixctl --remote=ptcp:0:127.0.0.1 db], [0], [ignore], [ignore]) TCP_PORT=`parse_listening_port < ovsdb-server.log` AT_CHECK([ovsdb-client get-schema-version tcp:127.0.0.1:$TCP_PORT ordinals], [0], [5.1.3 ]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) # OVSDB_CHECK_EXECUTION(TITLE, SCHEMA, TRANSACTIONS, OUTPUT, [KEYWORDS]) # # Creates a database with the given SCHEMA, starts an ovsdb-server on # that database, and runs each of the TRANSACTIONS (which should be a # quoted list of quoted strings) against it with ovsdb-client one at a # time. # # Checks that the overall output is OUTPUT, but UUIDs in the output # are replaced by markers of the form where N is a number. The # first unique UUID is replaced by <0>, the next by <1>, and so on. # If a given UUID appears more than once it is always replaced by the # same marker. # # TITLE is provided to AT_SETUP and KEYWORDS to AT_KEYWORDS. m4_define([OVSDB_CHECK_EXECUTION], [AT_SETUP([$1]) AT_KEYWORDS([ovsdb server positive tcp $5]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR $2 > schema PKIDIR=$abs_top_builddir/tests AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile="`pwd`"/pid --remote=ptcp:0:127.0.0.1 --unixctl="`pwd`"/unixctl db], [0], [ignore], [ignore]) TCP_PORT=`parse_listening_port < ovsdb-server.log` m4_foreach([txn], [$3], [AT_CHECK([ovsdb-client transact tcp:127.0.0.1:$TCP_PORT 'txn'], [0], [stdout], [ignore], [test ! -e pid || kill `cat pid`]) cat stdout >> output ]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl output], [0], [$4], [ignore], [test ! -e pid || kill `cat pid`]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) EXECUTION_EXAMPLES AT_BANNER([OVSDB -- transactions on transient ovsdb-server]) # OVSDB_CHECK_EXECUTION(TITLE, SCHEMA, TRANSACTIONS, OUTPUT, [KEYWORDS]) # # Creates a database with the given SCHEMA and runs each of the # TRANSACTIONS (which should be a quoted list of quoted strings) # against it with ovsdb-client one at a time. Each ovsdb-client # is run against a separately started ovsdb-server that executes # only that single transaction. (The idea is that this should # help to ferret out any differences between what ovsdb-server has # in memory and what actually gets committed to disk.) # # Checks that the overall output is OUTPUT, but UUIDs in the output # are replaced by markers of the form where N is a number. The # first unique UUID is replaced by <0>, the next by <1>, and so on. # If a given UUID appears more than once it is always replaced by the # same marker. # # TITLE is provided to AT_SETUP and KEYWORDS to AT_KEYWORDS. m4_define([OVSDB_CHECK_EXECUTION], [AT_SETUP([$1]) AT_KEYWORDS([ovsdb server positive transient $5]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR $2 > schema AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) m4_foreach([txn], [$3], [AT_DATA([txnfile], [ovsdb-client transact unix:socket 'txn' ]) AT_CHECK([ovsdb-server --remote=punix:socket --unixctl="`pwd`"/unixctl db --run="sh txnfile"], [0], [stdout], [ignore]) cat stdout >> output ]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl output], [0], [$4], [ignore]) AT_CLEANUP]) EXECUTION_EXAMPLES openvswitch-2.0.1+git20140120/tests/ovsdb-table.at000066400000000000000000000064561226605124000213570ustar00rootroot00000000000000AT_BANNER([OVSDB -- tables]) OVSDB_CHECK_POSITIVE_CPY([non-root table with one column], [[parse-table mytable '{"columns": {"name": {"type": "string"}}}']], [[{"columns":{"name":{"type":"string"}}}]]) OVSDB_CHECK_POSITIVE_CPY([immutable table with one column], [[parse-table mytable \ '{"columns": {"name": {"type": "string"}}, "mutable": false}']], [[{"columns":{"name":{"type":"string"}},"mutable":false}]]) OVSDB_CHECK_POSITIVE_CPY([root table with one column], [[parse-table mytable \ '{"columns": {"name": {"type": "string"}}, "isRoot": true}']], [[{"columns":{"name":{"type":"string"}},"isRoot":true}]]) OVSDB_CHECK_POSITIVE_CPY([non-root table with default_is_root=true], [[parse-table mytable '{"columns": {"name": {"type": "string"}}}' true]], [[{"columns":{"name":{"type":"string"}},"isRoot":false}]]) OVSDB_CHECK_POSITIVE_CPY([root table with default_is_root=true], [[parse-table mytable \ '{"columns": {"name": {"type": "string"}}, "isRoot": true}' true]], [[{"columns":{"name":{"type":"string"}}}]]) OVSDB_CHECK_POSITIVE_CPY([table with maxRows of 2], [[parse-table mytable '{"columns": {"name": {"type": "string"}}, "maxRows": 2}']], [[{"columns":{"name":{"type":"string"}},"maxRows":2}]]) OVSDB_CHECK_POSITIVE_CPY([table with index], [[parse-table mytable '{"columns": {"a": {"type": "integer"}, "b": {"type": "string"}}, "indexes": [["b", "a"]]}']], [[{"columns":{"a":{"type":"integer"},"b":{"type":"string"}},"indexes":[["b","a"]]}]]) OVSDB_CHECK_NEGATIVE_CPY([table with syntax error in index], [[parse-table mytable '{"columns": {"a": {"type": "integer"}, "b": {"type": "string"}}, "indexes": [["b", "a"], [0]]}']], [[array of distinct column names expected]]) OVSDB_CHECK_NEGATIVE_CPY([table with empty index], [[parse-table mytable '{"columns": {"a": {"type": "integer"}, "b": {"type": "string"}}, "indexes": [[]]}']], [[index must have at least one column]]) OVSDB_CHECK_NEGATIVE_CPY([table with index of ephemeral column], [[parse-table mytable '{"columns": {"a": {"type": "integer", "ephemeral": true}, "b": {"type": "string"}}, "indexes": [["b", "a"]]}']], [[ephemeral columns (such as a) may not be indexed]]) OVSDB_CHECK_NEGATIVE_CPY([column names may not begin with _], [[parse-table mytable \ '{"columns": {"_column": {"type": "integer"}}}']], [[names beginning with "_" are reserved]], [table]) OVSDB_CHECK_NEGATIVE_CPY([table must have at least one column (1)], [[parse-table mytable '{}']], [[Parsing table schema for table mytable failed: Required 'columns' member is missing.]]) OVSDB_CHECK_NEGATIVE_CPY([table must have at least one column (2)], [[parse-table mytable '{"columns": {}}']], [[table must have at least one column]]) OVSDB_CHECK_NEGATIVE_CPY([table maxRows must be positive], [[parse-table mytable '{"columns": {"name": {"type": "string"}}, "maxRows": 0}']], [[syntax "{"columns":{"name":{"type":"string"}},"maxRows":0}": syntax error: maxRows must be at least 1]]) openvswitch-2.0.1+git20140120/tests/ovsdb-tool.at000066400000000000000000000276441226605124000212470ustar00rootroot00000000000000AT_BANNER([OVSDB -- ovsdb-tool]) # OVSDB_CHECK_EXECUTION(TITLE, SCHEMA, TRANSACTIONS, OUTPUT, [KEYWORDS]) # # Creates a database with the given SCHEMA and runs each of the # TRANSACTIONS (which should be a quoted list of quoted strings) # against it with ovsdb-tool one at a time. # # Checks that the overall output is OUTPUT, but UUIDs in the output # are replaced by markers of the form where N is a number. The # first unique UUID is replaced by <0>, the next by <1>, and so on. # If a given UUID appears more than once it is always replaced by the # same marker. # # TITLE is provided to AT_SETUP and KEYWORDS to AT_KEYWORDS. m4_define([OVSDB_CHECK_EXECUTION], [AT_SETUP([$1]) AT_KEYWORDS([ovsdb file positive $5]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR $2 > schema touch .db.~lock~ AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) m4_foreach([txn], [$3], [AT_CHECK([ovsdb-tool transact db 'txn'], [0], [stdout], [ignore]) cat stdout >> output ]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl output], [0], [$4]) AT_CLEANUP]) EXECUTION_EXAMPLES AT_SETUP([transaction comments]) AT_KEYWORDS([ovsdb file positive]) ordinal_schema > schema touch .db.~lock~ AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore]) AT_CHECK([[ovsdb-tool transact db ' ["ordinals", {"op": "insert", "table": "ordinals", "row": {"name": "five", "number": 5}}, {"op": "comment", "comment": "add row for 5"}]']], [0], [stdout], [ignore]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl stdout], [0], [[[{"uuid":["uuid","<0>"]},{}] ]]) AT_CHECK([grep "add row for 5" db], [0], [ignore]) AT_CLEANUP AT_SETUP([ovsdb-tool compact]) AT_KEYWORDS([ovsdb file positive]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR ordinal_schema > schema dnl Make sure that "ovsdb-tool create" works with a dangling symlink, dnl creating the target of the symlink rather than replacing the symlink dnl with a regular file, and that the lockfile gets created relative to dnl the symlink's target. mkdir dir : > dir/.db.~lock~ ln -s dir/db db AT_SKIP_IF([test ! -h db]) AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore]) AT_CHECK([test ! -e .db.~lock]) AT_CHECK([test -h db]) AT_CHECK([test -f dir/db]) dnl Do a bunch of random transactions that put crap in the database log. AT_CHECK( [[for pair in 'zero 0' 'one 1' 'two 2' 'three 3' 'four 4' 'five 5'; do set -- $pair ovsdb-tool transact db ' ["ordinals", {"op": "insert", "table": "ordinals", "row": {"name": "'$1'", "number": '$2'}}, {"op": "comment", "comment": "add row for '"$pair"'"}]' ovsdb-tool transact db ' ["ordinals", {"op": "delete", "table": "ordinals", "where": [["number", "==", '$2']]}, {"op": "comment", "comment": "delete row for '"$2"'"}]' ovsdb-tool transact db ' ["ordinals", {"op": "insert", "table": "ordinals", "row": {"name": "'$1'", "number": '$2'}}, {"op": "comment", "comment": "add back row for '"$pair"'"}]' done]], [0], [stdout], [ignore]) dnl Check that all the crap is in fact in the database log. AT_CHECK([[${PERL} $srcdir/uuidfilt.pl db | grep -v ^OVSDB | sed 's/"_date":[0-9]*/"_date":0/' | test-json --multiple -]], [0], [[{"cksum":"12345678 9","name":"ordinals","tables":{"ordinals":{"columns":{"name":{"type":"string"},"number":{"type":"integer"}},"indexes":[["number"]]}},"version":"5.1.3"} {"_comment":"add row for zero 0","_date":0,"ordinals":{"<0>":{"name":"zero"}}} {"_comment":"delete row for 0","_date":0,"ordinals":{"<0>":null}} {"_comment":"add back row for zero 0","_date":0,"ordinals":{"<1>":{"name":"zero"}}} {"_comment":"add row for one 1","_date":0,"ordinals":{"<2>":{"name":"one","number":1}}} {"_comment":"delete row for 1","_date":0,"ordinals":{"<2>":null}} {"_comment":"add back row for one 1","_date":0,"ordinals":{"<3>":{"name":"one","number":1}}} {"_comment":"add row for two 2","_date":0,"ordinals":{"<4>":{"name":"two","number":2}}} {"_comment":"delete row for 2","_date":0,"ordinals":{"<4>":null}} {"_comment":"add back row for two 2","_date":0,"ordinals":{"<5>":{"name":"two","number":2}}} {"_comment":"add row for three 3","_date":0,"ordinals":{"<6>":{"name":"three","number":3}}} {"_comment":"delete row for 3","_date":0,"ordinals":{"<6>":null}} {"_comment":"add back row for three 3","_date":0,"ordinals":{"<7>":{"name":"three","number":3}}} {"_comment":"add row for four 4","_date":0,"ordinals":{"<8>":{"name":"four","number":4}}} {"_comment":"delete row for 4","_date":0,"ordinals":{"<8>":null}} {"_comment":"add back row for four 4","_date":0,"ordinals":{"<9>":{"name":"four","number":4}}} {"_comment":"add row for five 5","_date":0,"ordinals":{"<10>":{"name":"five","number":5}}} {"_comment":"delete row for 5","_date":0,"ordinals":{"<10>":null}} {"_comment":"add back row for five 5","_date":0,"ordinals":{"<11>":{"name":"five","number":5}}} ]]) dnl Dump out and check the actual database contents. AT_CHECK([[ovsdb-server --unixctl="`pwd`"/unixctl --remote=punix:socket --run "ovsdb-client dump unix:socket ordinals" db]], [0], [stdout], [ignore]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl stdout], [0], [dnl ordinals table _uuid name number ------------------------------------ ----- ------ <0> five 5 @&t@ <1> four 4 @&t@ <2> one 1 @&t@ <3> three 3 @&t@ <4> two 2 @&t@ <5> zero 0 @&t@ ]) dnl Now compact the database in-place. touch .db.tmp.~lock~ AT_CHECK([[ovsdb-tool compact db]], [0], [], [ignore]) dnl Make sure that "db" is still a symlink to dir/db instead of getting dnl replaced by a regular file. AT_CHECK([test ! -e .db.~lock]) AT_CHECK([test -h db]) AT_CHECK([test -f dir/db]) dnl We can't fully re-check the contents of the database log, because the dnl order of the records is not predictable, but there should only be 4 lines dnl in it now. AT_CAPTURE_FILE([db]) AT_CHECK([test `wc -l < db` -eq 4]) dnl And check that the dumped data is the same too: AT_CHECK([[ovsdb-server --unixctl="`pwd`"/unixctl --remote=punix:socket --run "ovsdb-client dump unix:socket ordinals" db]], [0], [stdout], [ignore]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl stdout], [0], [dnl ordinals table _uuid name number ------------------------------------ ----- ------ <0> five 5 @&t@ <1> four 4 @&t@ <2> one 1 @&t@ <3> three 3 @&t@ <4> two 2 @&t@ <5> zero 0 @&t@ ]) AT_CLEANUP AT_SETUP([ovsdb-tool convert -- removing a column]) AT_KEYWORDS([ovsdb file positive]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR ordinal_schema > schema AT_DATA([new-schema], [[{"name": "ordinals", "tables": { "ordinals": { "columns": { "number": {"type": "integer"}}}}} ]]) touch .db.~lock~ AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore]) dnl Put some data in the database. AT_CHECK( [[for pair in 'zero 0' 'one 1' 'two 2' 'three 3' 'four 4' 'five 5'; do set -- $pair ovsdb-tool transact db ' ["ordinals", {"op": "insert", "table": "ordinals", "row": {"name": "'$1'", "number": '$2'}}, {"op": "comment", "comment": "add row for '"$pair"'"}]' done]], [0], [stdout], [ignore]) dnl Dump out and check the actual database contents. AT_CHECK([[ovsdb-server --unixctl="`pwd`"/unixctl --remote=punix:socket --run "ovsdb-client dump unix:socket ordinals" db]], [0], [stdout], [ignore]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl stdout], [0], [dnl ordinals table _uuid name number ------------------------------------ ----- ------ <0> five 5 @&t@ <1> four 4 @&t@ <2> one 1 @&t@ <3> three 3 @&t@ <4> two 2 @&t@ <5> zero 0 @&t@ ]) dnl Now convert the database in-place. touch .db.tmp.~lock~ AT_CHECK([[ovsdb-tool convert db new-schema]], [0], [], [ignore]) dnl We can't fully re-check the contents of the database log, because the dnl order of the records is not predictable, but there should only be 4 lines dnl in it now. AT_CAPTURE_FILE([db]) AT_CHECK([test `wc -l < db` -eq 4]) dnl And check that the dumped data is the same except for the removed column: AT_CHECK([[ovsdb-server --unixctl="`pwd`"/unixctl --remote=punix:socket --run "ovsdb-client dump unix:socket ordinals" db]], [0], [stdout], [ignore]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl stdout], [0], [dnl ordinals table _uuid number ------------------------------------ ------ <0> 0 @&t@ <1> 1 @&t@ <2> 2 @&t@ <3> 3 @&t@ <4> 4 @&t@ <5> 5 @&t@ ]) AT_CLEANUP AT_SETUP([ovsdb-tool convert -- adding a column]) AT_KEYWORDS([ovsdb file positive]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR AT_DATA([schema], [[{"name": "ordinals", "tables": { "ordinals": { "columns": { "number": {"type": "integer"}}}}} ]]) ordinal_schema > new-schema touch .db.~lock~ AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore]) dnl Put some data in the database. AT_CHECK( [[for number in 0 1 2 3 4 5; do ovsdb-tool transact db ' ["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": '$number'}}, {"op": "comment", "comment": "add row for '"$number"'"}]' done]], [0], [stdout], [ignore]) dnl Dump out and check the actual database contents. AT_CHECK([[ovsdb-server --unixctl="`pwd`"/unixctl --remote=punix:socket --run "ovsdb-client dump unix:socket ordinals" db]], [0], [stdout], [ignore]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl stdout], [0], [dnl ordinals table _uuid number ------------------------------------ ------ <0> 0 @&t@ <1> 1 @&t@ <2> 2 @&t@ <3> 3 @&t@ <4> 4 @&t@ <5> 5 @&t@ ]) dnl Now convert the database in-place. touch .db.tmp.~lock~ AT_CHECK([[ovsdb-tool convert db new-schema]], [0], [], [ignore]) dnl We can't fully re-check the contents of the database log, because the dnl order of the records is not predictable, but there should only be 4 lines dnl in it now. AT_CAPTURE_FILE([db]) AT_CHECK([test `wc -l < db` -eq 4]) dnl And check that the dumped data is the same except for the added column: AT_CHECK([[ovsdb-server --unixctl="`pwd`"/unixctl --remote=punix:socket --run "ovsdb-client dump unix:socket ordinals" db]], [0], [stdout], [ignore]) AT_CHECK([${PERL} $srcdir/uuidfilt.pl stdout], [0], [dnl ordinals table _uuid name number ------------------------------------ ---- ------ <0> "" 0 @&t@ <1> "" 1 @&t@ <2> "" 2 @&t@ <3> "" 3 @&t@ <4> "" 4 @&t@ <5> "" 5 @&t@ ]) AT_CLEANUP AT_SETUP([ovsdb-tool schema-version]) AT_KEYWORDS([ovsdb file positive]) ordinal_schema > schema AT_CHECK([ovsdb-tool schema-version schema], [0], [5.1.3 ]) AT_CLEANUP AT_SETUP([ovsdb-tool db-version]) AT_KEYWORDS([ovsdb file positive]) ordinal_schema > schema touch .db.~lock~ AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore]) AT_CHECK([ovsdb-tool db-version db], [0], [5.1.3 ]) AT_CLEANUP AT_SETUP([ovsdb-tool schema-cksum]) AT_KEYWORDS([ovsdb file positive]) ordinal_schema > schema AT_CHECK([ovsdb-tool schema-cksum schema], [0], [12345678 9 ]) AT_CLEANUP AT_SETUP([ovsdb-tool db-cksum]) AT_KEYWORDS([ovsdb file positive]) ordinal_schema > schema touch .db.~lock~ AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore]) AT_CHECK([ovsdb-tool db-cksum db], [0], [12345678 9 ]) AT_CLEANUP AT_SETUP([ovsdb-tool needs-conversion (no conversion needed)]) AT_KEYWORDS([ovsdb file positive]) ordinal_schema > schema touch .db.~lock~ AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore]) AT_CHECK([ovsdb-tool needs-conversion db schema], [0], [no ]) AT_CLEANUP AT_SETUP([ovsdb-tool needs-conversion (conversion needed)]) AT_KEYWORDS([ovsdb file positive]) ordinal_schema > schema touch .db.~lock~ AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore]) sed 's/5\.1\.3/5.1.4/' < schema > schema2 AT_CHECK([diff schema schema2], [1], [ignore]) AT_CHECK([ovsdb-tool needs-conversion db schema2], [0], [yes ]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/ovsdb-transaction.at000066400000000000000000000147051226605124000226110ustar00rootroot00000000000000AT_BANNER([OVSDB -- transactions]) OVSDB_CHECK_POSITIVE([empty table, empty transaction], [[transact \ '["print"]' \ '["commit"]' \ '["print"]' \ '["abort"]' \ '["print"]']], [dnl print: commit: print: abort: print:]) OVSDB_CHECK_POSITIVE([nonempty table, empty transaction], [[transact \ '["insert", "1", "2", "3"]' \ '["insert", "2", "2", "3"]' \ '["print"]' \ '["commit"]' \ '["print"]' \ '["abort"]' \ '["print"]']], [dnl insert 1 2 3: insert 2 2 3: print: 1: i=2, j=3 2: i=2, j=3 commit: print: 1: i=2, j=3 2: i=2, j=3 abort: print: 1: i=2, j=3 2: i=2, j=3]) OVSDB_CHECK_POSITIVE([insert, commit], [[transact \ '["insert", "1", "2", "3"]' \ '["insert", "2", "2", "3"]' \ '["commit"]' \ '["print"]' \ '["insert", "3", "1", "2"]' \ '["print"]' \ '["commit"]' \ '["print"]']], [dnl insert 1 2 3: insert 2 2 3: commit: print: 1: i=2, j=3 2: i=2, j=3 insert 3 1 2: print: 1: i=2, j=3 2: i=2, j=3 3: i=1, j=2 commit: print: 1: i=2, j=3 2: i=2, j=3 3: i=1, j=2], [transaction]) OVSDB_CHECK_POSITIVE([insert, abort], [[transact \ '["insert", "1", "2", "3"]' \ '["insert", "2", "2", "3"]' \ '["commit"]' \ '["print"]' \ '["insert", "3", "1", "2"]' \ '["print"]' \ '["abort"]' \ '["print"]']], [dnl insert 1 2 3: insert 2 2 3: commit: print: 1: i=2, j=3 2: i=2, j=3 insert 3 1 2: print: 1: i=2, j=3 2: i=2, j=3 3: i=1, j=2 abort: print: 1: i=2, j=3 2: i=2, j=3], [transaction]) OVSDB_CHECK_POSITIVE([modify, commit], [[transact \ '["insert", "1", "2", "3"]' \ '["insert", "2", "2", "3"]' \ '["commit"]' \ '["print"]' \ '["modify", "2", "5", "-1"]' \ '["modify", "1", "-1", "4"]' \ '["print"]' \ '["commit"]' \ '["print"]']], [dnl insert 1 2 3: insert 2 2 3: commit: print: 1: i=2, j=3 2: i=2, j=3 modify 2 5 -1: modify 1 -1 4: print: 1: i=2, j=4 2: i=5, j=3 commit: print: 1: i=2, j=4 2: i=5, j=3], [transaction]) OVSDB_CHECK_POSITIVE([modify, abort], [[transact \ '["insert", "1", "2", "3"]' \ '["insert", "2", "2", "3"]' \ '["commit"]' \ '["print"]' \ '["modify", "2", "5", "-1"]' \ '["modify", "1", "-1", "4"]' \ '["print"]' \ '["abort"]' \ '["print"]']], [dnl insert 1 2 3: insert 2 2 3: commit: print: 1: i=2, j=3 2: i=2, j=3 modify 2 5 -1: modify 1 -1 4: print: 1: i=2, j=4 2: i=5, j=3 abort: print: 1: i=2, j=3 2: i=2, j=3], [transaction]) OVSDB_CHECK_POSITIVE([delete, commit], [[transact \ '["insert", "1", "2", "3"]' \ '["insert", "2", "2", "3"]' \ '["commit"]' \ '["print"]' \ '["delete", "1"]' \ '["print"]' \ '["commit"]' \ '["print"]']], [dnl insert 1 2 3: insert 2 2 3: commit: print: 1: i=2, j=3 2: i=2, j=3 delete 1: print: 2: i=2, j=3 commit: print: 2: i=2, j=3], [transaction]) OVSDB_CHECK_POSITIVE([delete, abort], [[transact \ '["insert", "1", "2", "3"]' \ '["insert", "2", "2", "3"]' \ '["commit"]' \ '["print"]' \ '["delete", "1"]' \ '["print"]' \ '["abort"]' \ '["print"]']], [dnl insert 1 2 3: insert 2 2 3: commit: print: 1: i=2, j=3 2: i=2, j=3 delete 1: print: 2: i=2, j=3 abort: print: 1: i=2, j=3 2: i=2, j=3], [transaction]) OVSDB_CHECK_POSITIVE([modify, delete, commit], [[transact \ '["insert", "1", "2", "3"]' \ '["insert", "2", "2", "3"]' \ '["commit"]' \ '["print"]' \ '["modify", "1", "5", "6"]' \ '["delete", "1"]' \ '["print"]' \ '["commit"]' \ '["print"]']], [dnl insert 1 2 3: insert 2 2 3: commit: print: 1: i=2, j=3 2: i=2, j=3 modify 1 5 6: delete 1: print: 2: i=2, j=3 commit: print: 2: i=2, j=3], [transaction]) OVSDB_CHECK_POSITIVE([modify, delete, abort], [[transact \ '["insert", "1", "2", "3"]' \ '["insert", "2", "2", "3"]' \ '["commit"]' \ '["print"]' \ '["modify", "1", "5", "6"]' \ '["delete", "1"]' \ '["print"]' \ '["abort"]' \ '["print"]']], [dnl insert 1 2 3: insert 2 2 3: commit: print: 1: i=2, j=3 2: i=2, j=3 modify 1 5 6: delete 1: print: 2: i=2, j=3 abort: print: 1: i=2, j=3 2: i=2, j=3], [transaction]) OVSDB_CHECK_POSITIVE([insert, delete, commit], [[transact \ '["insert", "1", "2", "3"]' \ '["insert", "2", "2", "3"]' \ '["commit"]' \ '["print"]' \ '["insert", "3", "5", "6"]' \ '["delete", "1"]' \ '["delete", "3"]' \ '["print"]' \ '["commit"]' \ '["print"]']], [dnl insert 1 2 3: insert 2 2 3: commit: print: 1: i=2, j=3 2: i=2, j=3 insert 3 5 6: delete 1: delete 3: print: 2: i=2, j=3 commit: print: 2: i=2, j=3], [transaction]) OVSDB_CHECK_POSITIVE([insert, delete, abort], [[transact \ '["insert", "1", "2", "3"]' \ '["insert", "2", "2", "3"]' \ '["commit"]' \ '["print"]' \ '["insert", "3", "5", "6"]' \ '["delete", "1"]' \ '["delete", "3"]' \ '["print"]' \ '["abort"]' \ '["print"]']], [dnl insert 1 2 3: insert 2 2 3: commit: print: 1: i=2, j=3 2: i=2, j=3 insert 3 5 6: delete 1: delete 3: print: 2: i=2, j=3 abort: print: 1: i=2, j=3 2: i=2, j=3], [transaction]) OVSDB_CHECK_POSITIVE([insert, modify, delete, commit], [[transact \ '["insert", "1", "2", "3"]' \ '["insert", "2", "2", "3"]' \ '["commit"]' \ '["print"]' \ '["insert", "3", "5", "6"]' \ '["delete", "1"]' \ '["modify", "3", "7", "8"]' \ '["delete", "3"]' \ '["print"]' \ '["commit"]' \ '["print"]']], [dnl insert 1 2 3: insert 2 2 3: commit: print: 1: i=2, j=3 2: i=2, j=3 insert 3 5 6: delete 1: modify 3 7 8: delete 3: print: 2: i=2, j=3 commit: print: 2: i=2, j=3], [transaction]) OVSDB_CHECK_POSITIVE([insert, modify, delete, abort], [[transact \ '["insert", "1", "2", "3"]' \ '["insert", "2", "2", "3"]' \ '["commit"]' \ '["print"]' \ '["insert", "3", "5", "6"]' \ '["delete", "1"]' \ '["modify", "3", "7", "8"]' \ '["delete", "3"]' \ '["print"]' \ '["abort"]' \ '["print"]']], [dnl insert 1 2 3: insert 2 2 3: commit: print: 1: i=2, j=3 2: i=2, j=3 insert 3 5 6: delete 1: modify 3 7 8: delete 3: print: 2: i=2, j=3 abort: print: 1: i=2, j=3 2: i=2, j=3], [transaction]) OVSDB_CHECK_POSITIVE([deletes are aborted cleanly], [[transact \ '["insert", "1", "2", "3"]' \ '["commit"]' \ '["print"]' \ '["delete", "1"]' \ '["abort"]' \ '["print"]' \ '["delete", "1"]' \ '["abort"]' \ '["print"]']], [dnl insert 1 2 3: commit: print: 1: i=2, j=3 delete 1: abort: print: 1: i=2, j=3 delete 1: abort: print: 1: i=2, j=3], [transaction]) openvswitch-2.0.1+git20140120/tests/ovsdb-trigger.at000066400000000000000000000137771226605124000217370ustar00rootroot00000000000000AT_BANNER([OVSDB -- triggers]) # This is like OVSDB_CHECK_POSITIVE, except that UUIDs in the output # are replaced by markers of the form where N is a number. The # first unique UUID is replaced by <0>, the next by <1>, and so on. # If a given UUID appears more than once it is always replaced by the # same marker. m4_define([OVSDB_CHECK_TRIGGER], [AT_SETUP([$1]) AT_KEYWORDS([ovsdb execute execution trigger positive $4]) AT_CHECK([test-ovsdb trigger $2], [0], [stdout], []) AT_CHECK([${PERL} $srcdir/uuidfilt.pl stdout], [0], [$3]) AT_CLEANUP]) OVSDB_CHECK_TRIGGER([trigger fires immediately], ["`ordinal_schema`" [\ '["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}}, {"op": "wait", "timeout": 10, "table": "ordinals", "where": [], "columns": ["name", "number"], "until": "==", "rows": [{"name": "zero", "number": 0}, {"name": "one", "number": 1}]}, {"op": "insert", "table": "ordinals", "row": {"number": 2, "name": "two"}}]']], [[t=0: trigger 0 (immediate): [{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]},{},{"uuid":["uuid","<2>"]}] ]]) OVSDB_CHECK_TRIGGER([trigger times out], ["`ordinal_schema`" [\ '["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}}, {"op": "wait", "timeout": 10, "table": "ordinals", "where": [], "columns": ["name", "number"], "until": "==", "rows": [{"name": "zero", "number": 0}, {"name": "one", "number": 1}, {"name": "two", "number": 2}]}]' \ '["advance", 10]']], [[t=0: new trigger 0 t=10: trigger 0 (delayed): [{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]},{"details":"\"wait\" timed out after 10 ms","error":"timed out"}] ]]) OVSDB_CHECK_TRIGGER([trigger fires after delay], ["`ordinal_schema`" [\ '["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}}]' \ '["advance", 5]' \ '["ordinals", {"op": "wait", "timeout": 10, "table": "ordinals", "where": [], "columns": ["name", "number"], "until": "==", "rows": [{"name": "zero", "number": 0}, {"name": "one", "number": 1}, {"name": "two", "number": 2}]}]' \ '["advance", 5]' \ '["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 2, "name": "two"}}]']], [[t=0: trigger 0 (immediate): [{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]}] t=5: new trigger 1 t=10: trigger 2 (immediate): [{"uuid":["uuid","<2>"]}] t=10: trigger 1 (delayed): [{}] ]]) OVSDB_CHECK_TRIGGER([delayed trigger modifies database], ["`ordinal_schema`" [\ '["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}}]' \ '["advance", 5]' \ '["ordinals", {"op": "wait", "timeout": 10, "table": "ordinals", "where": [], "columns": ["name", "number"], "until": "==", "rows": [{"name": "zero", "number": 0}, {"name": "one", "number": 1}, {"name": "two", "number": 2}]}, {"op": "delete", "table": "ordinals", "where": [["number", "<", 2]]}]' \ '["advance", 5]' \ '["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 2, "name": "two"}}]' \ '["advance", 5]' \ '["ordinals", {"op": "select", "table": "ordinals", "where": []}]']], [[t=0: trigger 0 (immediate): [{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]}] t=5: new trigger 1 t=10: trigger 2 (immediate): [{"uuid":["uuid","<2>"]}] t=10: trigger 1 (delayed): [{},{"count":2}] t=15: trigger 3 (immediate): [{"rows":[{"_uuid":["uuid","<2>"],"_version":["uuid","<3>"],"name":"two","number":2}]}] ]]) OVSDB_CHECK_TRIGGER([one delayed trigger wakes up another], ["`ordinal_schema`" [\ '["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 0, "name": "zero"}}, {"op": "insert", "table": "ordinals", "row": {"number": 1, "name": "one"}}]' \ '["advance", 5]' \ '["ordinals", {"op": "wait", "timeout": 10, "table": "ordinals", "where": [], "columns": ["name", "number"], "until": "==", "rows": [{"name": "two", "number": 2}]}, {"op": "delete", "table": "ordinals", "where": [["number", "==", 2]]}, {"op": "insert", "table": "ordinals", "row": {"number": 3, "name": "three"}}]' \ '["ordinals", {"op": "wait", "timeout": 10, "table": "ordinals", "where": [], "columns": ["name", "number"], "until": "==", "rows": [{"name": "zero", "number": 0}, {"name": "one", "number": 1}, {"name": "two", "number": 2}]}, {"op": "delete", "table": "ordinals", "where": [["number", "<", 2]]}]' \ '["advance", 5]' \ '["ordinals", {"op": "insert", "table": "ordinals", "row": {"number": 2, "name": "two"}}]' \ '["advance", 5]' \ '["ordinals", {"op": "select", "table": "ordinals", "where": []}]']], [[t=0: trigger 0 (immediate): [{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]}] t=5: new trigger 1 t=5: new trigger 2 t=10: trigger 3 (immediate): [{"uuid":["uuid","<2>"]}] t=10: trigger 2 (delayed): [{},{"count":2}] t=15: trigger 1 (delayed): [{},{"count":1},{"uuid":["uuid","<3>"]}] t=15: trigger 4 (immediate): [{"rows":[{"_uuid":["uuid","<3>"],"_version":["uuid","<4>"],"name":"three","number":3}]}] ]]) openvswitch-2.0.1+git20140120/tests/ovsdb-types.at000066400000000000000000000172721226605124000214320ustar00rootroot00000000000000AT_BANNER([OVSDB -- atomic types]) OVSDB_CHECK_POSITIVE_CPY([integer], [[parse-atomic-type '["integer"]' ]], ["integer"]) OVSDB_CHECK_POSITIVE_CPY([real], [[parse-atomic-type '["real"]' ]], ["real"]) OVSDB_CHECK_POSITIVE_CPY([boolean], [[parse-atomic-type '["boolean"]' ]], ["boolean"]) OVSDB_CHECK_POSITIVE_CPY([string], [[parse-atomic-type '["string"]' ]], ["string"]) OVSDB_CHECK_POSITIVE_CPY([uuid], [[parse-atomic-type '["uuid"]' ]], ["uuid"]) OVSDB_CHECK_NEGATIVE_CPY([void is not a valid atomic-type], [[parse-atomic-type '["void"]' ]], ["void" is not an atomic-type]) AT_BANNER([OVSDB -- base types]) OVSDB_CHECK_POSITIVE_CPY([integer enum], [[parse-base-type '{"type": "integer", "enum": ["set", [-1, 4, 5]]}' ]], [[{"enum":["set",[-1,4,5]],"type":"integer"}]]) OVSDB_CHECK_POSITIVE_CPY([integer >= 5], [[parse-base-type '{"type": "integer", "minInteger": 5}' ]], [{"minInteger":5,"type":"integer"}]) OVSDB_CHECK_POSITIVE_CPY([integer <= 7], [[parse-base-type '{"type": "integer", "maxInteger": 7}' ]], [{"maxInteger":7,"type":"integer"}]) OVSDB_CHECK_POSITIVE_CPY([integer between -5 and 10], [[parse-base-type '{"type": "integer", "minInteger": -5, "maxInteger": 10}']], [{"maxInteger":10,"minInteger":-5,"type":"integer"}]) OVSDB_CHECK_NEGATIVE_CPY([integer max may not be less than min], [[parse-base-type '{"type": "integer", "minInteger": 5, "maxInteger": 3}']], [minInteger exceeds maxInteger]) OVSDB_CHECK_POSITIVE_CPY([real enum], [[parse-base-type '{"type": "real", "enum": ["set", [1.5, 0, 2.75]]}' ]], [[{"enum":["set",[0,1.5,2.75]],"type":"real"}]]) OVSDB_CHECK_POSITIVE_CPY([real >= -1.5], [[parse-base-type '{"type": "real", "minReal": -1.5}']], [{"minReal":-1.5,"type":"real"}]) OVSDB_CHECK_POSITIVE_CPY([real <= 1e5], [[parse-base-type '{"type": "real", "maxReal": 1e5}']], [{"maxReal":100000,"type":"real"}]) OVSDB_CHECK_POSITIVE_CPY([real between -2.5 and 3.75], [[parse-base-type '{"type": "real", "minReal": -2.5, "maxReal": 3.75}']], [{"maxReal":3.75,"minReal":-2.5,"type":"real"}]) OVSDB_CHECK_NEGATIVE_CPY([real max may not be less than min], [[parse-base-type '{"type": "real", "minReal": 555, "maxReal": 444}']], [minReal exceeds maxReal]) OVSDB_CHECK_POSITIVE_CPY([boolean], [[parse-base-type '[{"type": "boolean"}]' ]], ["boolean"]) OVSDB_CHECK_POSITIVE_CPY([boolean enum], [[parse-base-type '{"type": "boolean", "enum": true}' ]], [[{"enum":true,"type":"boolean"}]]) OVSDB_CHECK_POSITIVE_CPY([string enum], [[parse-base-type '{"type": "string", "enum": ["set", ["def", "abc"]]}']], [[{"enum":["set",["abc","def"]],"type":"string"}]]) OVSDB_CHECK_POSITIVE_CPY([string minLength], [[parse-base-type '{"type": "string", "minLength": 1}']], [{"minLength":1,"type":"string"}]) OVSDB_CHECK_POSITIVE_CPY([string maxLength], [[parse-base-type '{"type": "string", "maxLength": 5}']], [{"maxLength":5,"type":"string"}]) OVSDB_CHECK_POSITIVE_CPY([string minLength and maxLength], [[parse-base-type '{"type": "string", "minLength": 1, "maxLength": 5}']], [{"maxLength":5,"minLength":1,"type":"string"}]) OVSDB_CHECK_NEGATIVE_CPY([maxLength must not be less than minLength], [[parse-base-type '{"type": "string", "minLength": 5, "maxLength": 3}']], [minLength exceeds maxLength]) OVSDB_CHECK_NEGATIVE_CPY([maxLength must not be negative], [[parse-base-type '{"type": "string", "maxLength": -1}']], [maxLength out of valid range 0 to 4294967295]) OVSDB_CHECK_POSITIVE_CPY([uuid enum], [[parse-base-type '{"type": "uuid", "enum": ["uuid", "36bf19c0-ad9d-4232-bb85-b3d73dfe2123"]}' ]], [[{"enum":["uuid","36bf19c0-ad9d-4232-bb85-b3d73dfe2123"],"type":"uuid"}]]) OVSDB_CHECK_POSITIVE_CPY([uuid refTable], [[parse-base-type '{"type": "uuid", "refTable": "myTable"}' ]], [{"refTable":"myTable","type":"uuid"}]) OVSDB_CHECK_NEGATIVE_CPY([uuid refTable must be valid id], [[parse-base-type '{"type": "uuid", "refTable": "a-b-c"}' ]], [Type mismatch for member 'refTable']) OVSDB_CHECK_NEGATIVE_CPY([void is not a valid base-type], [[parse-base-type '["void"]' ]], ["void" is not an atomic-type]) OVSDB_CHECK_NEGATIVE_CPY(["type" member must be present], [[parse-base-type '{}']], [Parsing ovsdb type failed: Required 'type' member is missing.]) AT_BANNER([OVSDB -- simple types]) OVSDB_CHECK_POSITIVE_CPY([simple integer], [[parse-type '["integer"]' ]], ["integer"]) OVSDB_CHECK_POSITIVE_CPY([simple real], [[parse-type '["real"]' ]], ["real"]) OVSDB_CHECK_POSITIVE_CPY([simple boolean], [[parse-type '["boolean"]' ]], ["boolean"]) OVSDB_CHECK_POSITIVE_CPY([simple string], [[parse-type '["string"]' ]], ["string"]) OVSDB_CHECK_POSITIVE_CPY([simple uuid], [[parse-type '["uuid"]' ]], ["uuid"]) OVSDB_CHECK_POSITIVE_CPY([integer in object], [[parse-type '{"key": "integer"}' ]], ["integer"]) OVSDB_CHECK_POSITIVE_CPY([real in object with explicit min and max], [[parse-type '{"key": "real", "min": 1, "max": 1}' ]], ["real"]) OVSDB_CHECK_NEGATIVE_CPY([key type is required], [[parse-type '{}' ]], [Required 'key' member is missing.]) OVSDB_CHECK_NEGATIVE_CPY([void is not a valid type], [[parse-type '["void"]' ]], ["void" is not an atomic-type]) AT_BANNER([OVSDB -- set types]) OVSDB_CHECK_POSITIVE_CPY([optional boolean], [[parse-type '{"key": "boolean", "min": 0}' ]], [[{"key":"boolean","min":0}]], [set]) OVSDB_CHECK_POSITIVE_CPY([set of 1 to 3 uuids], [[parse-type '{"key": "uuid", "min": 1, "max": 3}' ]], [[{"key":"uuid","max":3}]]) OVSDB_CHECK_POSITIVE_CPY([set of 0 to 3 strings], [[parse-type '{"key": "string", "min": 0, "max": 3}' ]], [[{"key":"string","max":3,"min":0}]]) OVSDB_CHECK_POSITIVE_CPY([set of 0 or more integers], [[parse-type '{"key": "integer", "min": 0, "max": "unlimited"}']], [[{"key":"integer","max":"unlimited","min":0}]]) OVSDB_CHECK_POSITIVE_CPY([set of 1 or more reals], [[parse-type '{"key": "real", "min": 1, "max": "unlimited"}']], [[{"key":"real","max":"unlimited"}]]) OVSDB_CHECK_NEGATIVE_CPY([set max cannot be less than min], [[parse-type '{"key": "real", "min": 5, "max": 3}' ]], [ovsdb type fails constraint checks]) OVSDB_CHECK_NEGATIVE_CPY([set max cannot be negative], [[parse-type '{"key": "real", "max": -1}' ]], [bad min or max value]) OVSDB_CHECK_NEGATIVE_CPY([set min cannot be negative], [[parse-type '{"key": "real", "min": -1}' ]], [bad min or max value]) OVSDB_CHECK_NEGATIVE_CPY([set min cannot be greater than one], [[parse-type '{"key": "real", "min": 10, "max": "unlimited"}']], [ovsdb type fails constraint checks]) AT_BANNER([OVSDB -- map types]) OVSDB_CHECK_POSITIVE_CPY([map of 1 integer to boolean], [[parse-type '{"key": "integer", "value": "boolean"}' ]], [[{"key":"integer","value":"boolean"}]]) OVSDB_CHECK_POSITIVE_CPY([map of 1 boolean to integer, explicit min and max], [[parse-type '{"key": "boolean", "value": "integer", "min": 1, "max": 1}' ]], [[{"key":"boolean","value":"integer"}]]) OVSDB_CHECK_POSITIVE_CPY([map of 1 to 5 uuid to real], [[parse-type '{"key": "uuid", "value": "real", "min": 1, "max": 5}' ]], [[{"key":"uuid","max":5,"value":"real"}]]) OVSDB_CHECK_POSITIVE_CPY([map of 0 to 10 string to uuid], [[parse-type '{"key": "string", "value": "uuid", "min": 0, "max": 10}' ]], [[{"key":"string","max":10,"min":0,"value":"uuid"}]]) OVSDB_CHECK_POSITIVE_CPY([map of 1 to 20 real to string], [[parse-type '{"key": "real", "value": "string", "min": 1, "max": 20}' ]], [[{"key":"real","max":20,"value":"string"}]]) OVSDB_CHECK_POSITIVE_CPY([map of 0 or more string to real], [[parse-type '{"key": "string", "value": "real", "min": 0, "max": "unlimited"}' ]], [[{"key":"string","max":"unlimited","min":0,"value":"real"}]]) OVSDB_CHECK_NEGATIVE_CPY([map key type is required], [[parse-type '{"value": "integer"}' ]], [Required 'key' member is missing.]) openvswitch-2.0.1+git20140120/tests/ovsdb.at000066400000000000000000000070541226605124000202650ustar00rootroot00000000000000# OVSDB_CHECK_POSITIVE(TITLE, TEST-OVSDB-ARGS, OUTPUT, [KEYWORDS], [PREREQ]) # # Runs "test-ovsdb TEST-OVSDB-ARGS" and checks that it exits with # status 0 and prints OUTPUT on stdout. # # TITLE is provided to AT_SETUP and KEYWORDS to AT_KEYWORDS. m4_define([OVSDB_CHECK_POSITIVE], [AT_SETUP([$1]) AT_KEYWORDS([ovsdb positive $4]) AT_CHECK([test-ovsdb $2], [0], [$3 ], []) AT_CLEANUP]) # OVSDB_CHECK_POSITIVE_PY(TITLE, TEST-OVSDB-ARGS, OUTPUT, [KEYWORDS], [PREREQ], # [PY-CHECK]) # # Runs "test-ovsdb.py TEST-OVSDB-ARGS" and checks that it exits with # status 0 and prints OUTPUT on stdout. # # PY-CHECK is expanded before the check. It can check for features of the # Python implementation that are required for the test to pass. # # TITLE is provided to AT_SETUP and KEYWORDS to AT_KEYWORDS. m4_define([OVSDB_CHECK_POSITIVE_PY], [AT_SETUP([$1]) AT_SKIP_IF([test $HAVE_PYTHON = no]) $6 AT_KEYWORDS([ovsdb positive Python $4]) AT_CHECK([$PYTHON $srcdir/test-ovsdb.py $2], [0], [$3 ], []) AT_CLEANUP]) # OVSDB_CHECK_POSITIVE_CPY(TITLE, TEST-OVSDB-ARGS, OUTPUT, [KEYWORDS], # [PREREQ], [PY-CHECK]) # # Runs identical C and Python tests, as specified. m4_define([OVSDB_CHECK_POSITIVE_CPY], [OVSDB_CHECK_POSITIVE([$1 - C], [$2], [$3], [$4], [$5]) OVSDB_CHECK_POSITIVE_PY([$1 - Python], [$2], [$3], [$4], [$5], [$6])]) # OVSDB_CHECK_NEGATIVE(TITLE, TEST-OVSDB-ARGS, OUTPUT, [KEYWORDS], [PREREQ]) # # Runs "test-ovsdb TEST-OVSDB-ARGS" and checks that it exits with # status 1 and that its output on stdout contains substring OUTPUT. # TITLE is provided to AT_SETUP and KEYWORDS to AT_KEYWORDS. m4_define([OVSDB_CHECK_NEGATIVE], [AT_SETUP([$1]) AT_KEYWORDS([ovsdb negative $4]) AT_CHECK([test-ovsdb $2], [1], [], [stderr]) m4_assert(m4_len([$3])) AT_CHECK( [if grep -F -e "AS_ESCAPE([$3])" stderr then : else exit 99 fi], [0], [ignore], [ignore]) AT_CLEANUP]) # OVSDB_CHECK_NEGATIVE_PY(TITLE, TEST-OVSDB-ARGS, OUTPUT, [KEYWORDS], [PREREQ]) # # Runs "test-ovsdb TEST-OVSDB-ARGS" and checks that it exits with # status 1 and that its output on stdout contains substring OUTPUT. # TITLE is provided to AT_SETUP and KEYWORDS to AT_KEYWORDS. m4_define([OVSDB_CHECK_NEGATIVE_PY], [AT_SETUP([$1]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_KEYWORDS([ovsdb negative $4]) AT_CHECK([$PYTHON $srcdir/test-ovsdb.py $2], [1], [], [stderr]) m4_assert(m4_len([$3])) AT_CHECK( [if grep -F -e "AS_ESCAPE([$3])" stderr then : else exit 99 fi], [0], [ignore], [ignore]) AT_CLEANUP]) # OVSDB_CHECK_NEGATIVE_CPY(TITLE, TEST-OVSDB-ARGS, OUTPUT, [KEYWORDS], # [PREREQ]) # # Runs identical C and Python tests, as specified. m4_define([OVSDB_CHECK_NEGATIVE_CPY], [OVSDB_CHECK_NEGATIVE([$1 - C], [$2], [$3], [$4], [$5]) OVSDB_CHECK_NEGATIVE_PY([$1 - Python], [$2], [$3], [$4], [$5])]) m4_include([tests/ovsdb-log.at]) m4_include([tests/ovsdb-types.at]) m4_include([tests/ovsdb-data.at]) m4_include([tests/ovsdb-column.at]) m4_include([tests/ovsdb-table.at]) m4_include([tests/ovsdb-row.at]) m4_include([tests/ovsdb-schema.at]) m4_include([tests/ovsdb-condition.at]) m4_include([tests/ovsdb-mutation.at]) m4_include([tests/ovsdb-query.at]) m4_include([tests/ovsdb-transaction.at]) m4_include([tests/ovsdb-execution.at]) m4_include([tests/ovsdb-trigger.at]) m4_include([tests/ovsdb-tool.at]) m4_include([tests/ovsdb-server.at]) m4_include([tests/ovsdb-monitor.at]) m4_include([tests/ovsdb-idl.at]) openvswitch-2.0.1+git20140120/tests/reconnect.at000066400000000000000000000537431226605124000211360ustar00rootroot00000000000000AT_BANNER([reconnect library]) m4_define([__RECONNECT_CHECK], [AT_SETUP([$1]) $2 AT_KEYWORDS([reconnect]) AT_DATA([input], [$3]) AT_CHECK([$4], [0], [$5]) AT_CLEANUP]) m4_define([RECONNECT_CHECK], [__RECONNECT_CHECK( [$1 - C], [], [$2], [test-reconnect < input], [$3]) __RECONNECT_CHECK( [$1 - Python], [AT_SKIP_IF([test $HAVE_PYTHON = no])], [$2], [$PYTHON $srcdir/test-reconnect.py < input], [$3])]) ###################################################################### RECONNECT_CHECK([nothing happens if not enabled], [run timeout ], [### t=1000 ### run timeout no timeout ]) ###################################################################### RECONNECT_CHECK([quick connect, idle disconnect], [enable # Connection succeeds. run connected # Send inactivity probe. timeout run # Idle timeout kills connection. timeout run disconnected ], [### t=1000 ### enable in BACKOFF for 0 ms (0 ms backoff) # Connection succeeds. run should connect connected in ACTIVE for 0 ms (0 ms backoff) created 1000, last activity 1000, last connected 1000 1 successful connections out of 1 attempts, seqno 1 connected last connected 0 ms ago, connected 0 ms total # Send inactivity probe. timeout advance 5000 ms ### t=6000 ### in ACTIVE for 5000 ms (0 ms backoff) run should send probe in IDLE for 0 ms (0 ms backoff) # Idle timeout kills connection. timeout advance 5000 ms ### t=11000 ### in IDLE for 5000 ms (0 ms backoff) run should disconnect disconnected in BACKOFF for 0 ms (1000 ms backoff) 1 successful connections out of 1 attempts, seqno 2 disconnected disconnected at 11000 ms (0 ms ago) ]) ###################################################################### RECONNECT_CHECK([slow connect, idle disconnect], [enable # Start connecting. run connecting # Connect after 500 ms. advance 500 run connected # Send inactivity probe. timeout run # Idle timeout kills connection. timeout run disconnected ], [### t=1000 ### enable in BACKOFF for 0 ms (0 ms backoff) # Start connecting. run should connect connecting in CONNECTING for 0 ms (0 ms backoff) # Connect after 500 ms. advance 500 ### t=1500 ### in CONNECTING for 500 ms (0 ms backoff) run connected in ACTIVE for 0 ms (0 ms backoff) created 1000, last activity 1000, last connected 1500 1 successful connections out of 1 attempts, seqno 1 connected last connected 0 ms ago, connected 0 ms total # Send inactivity probe. timeout advance 5000 ms ### t=6500 ### in ACTIVE for 5000 ms (0 ms backoff) run should send probe in IDLE for 0 ms (0 ms backoff) # Idle timeout kills connection. timeout advance 5000 ms ### t=11500 ### in IDLE for 5000 ms (0 ms backoff) run should disconnect disconnected in BACKOFF for 0 ms (1000 ms backoff) 1 successful connections out of 1 attempts, seqno 2 disconnected disconnected at 11500 ms (0 ms ago) ]) ###################################################################### RECONNECT_CHECK([connect backs off], [enable # First connection attempt fails after 1000 ms. run connecting run timeout run connect-failed # Back off for 1000 ms. timeout run # Second connection attempt fails after 1000 ms. connecting timeout run connect-failed # Back off for 2000 ms. timeout run # Third connection attempt fails after 2000 ms. connecting timeout run connect-failed # Back off for 4000 ms. timeout run # Third connection attempt fails after 4000 ms. connecting timeout run connect-failed # Back off for 8000 ms. timeout run # Third connection attempt fails after 8000 ms. connecting timeout run connect-failed # Back off for 8000 ms. timeout run # Fourth connection attempt fails after 8000 ms. connecting timeout run connect-failed ], [### t=1000 ### enable in BACKOFF for 0 ms (0 ms backoff) # First connection attempt fails after 1000 ms. run should connect connecting in CONNECTING for 0 ms (0 ms backoff) run timeout advance 1000 ms ### t=2000 ### in CONNECTING for 1000 ms (0 ms backoff) run should disconnect connect-failed in BACKOFF for 0 ms (1000 ms backoff) 0 successful connections out of 1 attempts, seqno 0 # Back off for 1000 ms. timeout advance 1000 ms ### t=3000 ### in BACKOFF for 1000 ms (1000 ms backoff) run should connect # Second connection attempt fails after 1000 ms. connecting in CONNECTING for 0 ms (1000 ms backoff) timeout advance 1000 ms ### t=4000 ### in CONNECTING for 1000 ms (1000 ms backoff) run should disconnect connect-failed in BACKOFF for 0 ms (2000 ms backoff) 0 successful connections out of 2 attempts, seqno 0 # Back off for 2000 ms. timeout advance 2000 ms ### t=6000 ### in BACKOFF for 2000 ms (2000 ms backoff) run should connect # Third connection attempt fails after 2000 ms. connecting in CONNECTING for 0 ms (2000 ms backoff) timeout advance 2000 ms ### t=8000 ### in CONNECTING for 2000 ms (2000 ms backoff) run should disconnect connect-failed in BACKOFF for 0 ms (4000 ms backoff) 0 successful connections out of 3 attempts, seqno 0 # Back off for 4000 ms. timeout advance 4000 ms ### t=12000 ### in BACKOFF for 4000 ms (4000 ms backoff) run should connect # Third connection attempt fails after 4000 ms. connecting in CONNECTING for 0 ms (4000 ms backoff) timeout advance 4000 ms ### t=16000 ### in CONNECTING for 4000 ms (4000 ms backoff) run should disconnect connect-failed in BACKOFF for 0 ms (8000 ms backoff) 0 successful connections out of 4 attempts, seqno 0 # Back off for 8000 ms. timeout advance 8000 ms ### t=24000 ### in BACKOFF for 8000 ms (8000 ms backoff) run should connect # Third connection attempt fails after 8000 ms. connecting in CONNECTING for 0 ms (8000 ms backoff) timeout advance 8000 ms ### t=32000 ### in CONNECTING for 8000 ms (8000 ms backoff) run should disconnect connect-failed in BACKOFF for 0 ms (8000 ms backoff) 0 successful connections out of 5 attempts, seqno 0 # Back off for 8000 ms. timeout advance 8000 ms ### t=40000 ### in BACKOFF for 8000 ms (8000 ms backoff) run should connect # Fourth connection attempt fails after 8000 ms. connecting in CONNECTING for 0 ms (8000 ms backoff) timeout advance 8000 ms ### t=48000 ### in CONNECTING for 8000 ms (8000 ms backoff) run should disconnect connect-failed in BACKOFF for 0 ms (8000 ms backoff) 0 successful connections out of 6 attempts, seqno 0 ]) ###################################################################### RECONNECT_CHECK([connections with no data preserve backoff], [enable # First connect, then idle timeout kills connection. run connected timeout run timeout run disconnected # Back off for 1000 ms. timeout run # Second connect, then idle timeout kills connection. run connected timeout run timeout run disconnected # Back off for 2000 ms. timeout run # Third connect, then idle timeout kills connection. run connected timeout run timeout run disconnected # Back off for 4000 ms. timeout ], [### t=1000 ### enable in BACKOFF for 0 ms (0 ms backoff) # First connect, then idle timeout kills connection. run should connect connected in ACTIVE for 0 ms (0 ms backoff) created 1000, last activity 1000, last connected 1000 1 successful connections out of 1 attempts, seqno 1 connected last connected 0 ms ago, connected 0 ms total timeout advance 5000 ms ### t=6000 ### in ACTIVE for 5000 ms (0 ms backoff) run should send probe in IDLE for 0 ms (0 ms backoff) timeout advance 5000 ms ### t=11000 ### in IDLE for 5000 ms (0 ms backoff) run should disconnect disconnected in BACKOFF for 0 ms (1000 ms backoff) 1 successful connections out of 1 attempts, seqno 2 disconnected disconnected at 11000 ms (0 ms ago) # Back off for 1000 ms. timeout advance 1000 ms ### t=12000 ### in BACKOFF for 1000 ms (1000 ms backoff) last connected 11000 ms ago, connected 10000 ms total run should connect # Second connect, then idle timeout kills connection. run should connect connected in ACTIVE for 0 ms (1000 ms backoff) created 1000, last activity 1000, last connected 12000 2 successful connections out of 2 attempts, seqno 3 connected last connected 0 ms ago, connected 10000 ms total timeout advance 5000 ms ### t=17000 ### in ACTIVE for 5000 ms (1000 ms backoff) run should send probe in IDLE for 0 ms (1000 ms backoff) timeout advance 5000 ms ### t=22000 ### in IDLE for 5000 ms (1000 ms backoff) run should disconnect disconnected in BACKOFF for 0 ms (2000 ms backoff) 2 successful connections out of 2 attempts, seqno 4 disconnected disconnected at 22000 ms (0 ms ago) # Back off for 2000 ms. timeout advance 2000 ms ### t=24000 ### in BACKOFF for 2000 ms (2000 ms backoff) last connected 12000 ms ago, connected 20000 ms total run should connect # Third connect, then idle timeout kills connection. run should connect connected in ACTIVE for 0 ms (2000 ms backoff) created 1000, last activity 1000, last connected 24000 3 successful connections out of 3 attempts, seqno 5 connected last connected 0 ms ago, connected 20000 ms total timeout advance 5000 ms ### t=29000 ### in ACTIVE for 5000 ms (2000 ms backoff) run should send probe in IDLE for 0 ms (2000 ms backoff) timeout advance 5000 ms ### t=34000 ### in IDLE for 5000 ms (2000 ms backoff) run should disconnect disconnected in BACKOFF for 0 ms (4000 ms backoff) 3 successful connections out of 3 attempts, seqno 6 disconnected disconnected at 34000 ms (0 ms ago) # Back off for 4000 ms. timeout advance 4000 ms ### t=38000 ### in BACKOFF for 4000 ms (4000 ms backoff) last connected 14000 ms ago, connected 30000 ms total ]) ###################################################################### RECONNECT_CHECK([brief connection preserves backoff], [enable # First connection attempt fails after 1000 ms. run connecting run timeout run connect-failed # Back off for 1000 ms. timeout run # Second connection attempt fails after 1000 ms. connecting timeout run connect-failed # Back off for 2000 ms. timeout run # Third connection attempt succeeds after 500 ms. connecting advance 500 run connected # Connection drops after another 250 ms. advance 250 disconnected run # Back off for 4000 ms. timeout run ], [### t=1000 ### enable in BACKOFF for 0 ms (0 ms backoff) # First connection attempt fails after 1000 ms. run should connect connecting in CONNECTING for 0 ms (0 ms backoff) run timeout advance 1000 ms ### t=2000 ### in CONNECTING for 1000 ms (0 ms backoff) run should disconnect connect-failed in BACKOFF for 0 ms (1000 ms backoff) 0 successful connections out of 1 attempts, seqno 0 # Back off for 1000 ms. timeout advance 1000 ms ### t=3000 ### in BACKOFF for 1000 ms (1000 ms backoff) run should connect # Second connection attempt fails after 1000 ms. connecting in CONNECTING for 0 ms (1000 ms backoff) timeout advance 1000 ms ### t=4000 ### in CONNECTING for 1000 ms (1000 ms backoff) run should disconnect connect-failed in BACKOFF for 0 ms (2000 ms backoff) 0 successful connections out of 2 attempts, seqno 0 # Back off for 2000 ms. timeout advance 2000 ms ### t=6000 ### in BACKOFF for 2000 ms (2000 ms backoff) run should connect # Third connection attempt succeeds after 500 ms. connecting in CONNECTING for 0 ms (2000 ms backoff) advance 500 ### t=6500 ### in CONNECTING for 500 ms (2000 ms backoff) run connected in ACTIVE for 0 ms (2000 ms backoff) created 1000, last activity 1000, last connected 6500 1 successful connections out of 3 attempts, seqno 1 connected last connected 0 ms ago, connected 0 ms total # Connection drops after another 250 ms. advance 250 ### t=6750 ### in ACTIVE for 250 ms (2000 ms backoff) disconnected in BACKOFF for 0 ms (4000 ms backoff) 1 successful connections out of 3 attempts, seqno 2 disconnected disconnected at 6750 ms (0 ms ago) run # Back off for 4000 ms. timeout advance 4000 ms ### t=10750 ### in BACKOFF for 4000 ms (4000 ms backoff) last connected 4250 ms ago, connected 250 ms total run should connect ]) ###################################################################### RECONNECT_CHECK([brief connection with data preserves backoff], [enable # First connection attempt fails after 1000 ms. run connecting run timeout run connect-failed # Back off for 1000 ms. timeout run # Second connection attempt fails after 1000 ms. connecting timeout run connect-failed # Back off for 2000 ms. timeout run # Third connection attempt succeeds after 500 ms. connecting advance 500 run connected # Connection receives 3 chunks of data spaced 250 ms apart. advance 250 run activity advance 250 run activity advance 250 run activity # Connection drops. disconnected run # Back off for 4000 ms. timeout run ], [### t=1000 ### enable in BACKOFF for 0 ms (0 ms backoff) # First connection attempt fails after 1000 ms. run should connect connecting in CONNECTING for 0 ms (0 ms backoff) run timeout advance 1000 ms ### t=2000 ### in CONNECTING for 1000 ms (0 ms backoff) run should disconnect connect-failed in BACKOFF for 0 ms (1000 ms backoff) 0 successful connections out of 1 attempts, seqno 0 # Back off for 1000 ms. timeout advance 1000 ms ### t=3000 ### in BACKOFF for 1000 ms (1000 ms backoff) run should connect # Second connection attempt fails after 1000 ms. connecting in CONNECTING for 0 ms (1000 ms backoff) timeout advance 1000 ms ### t=4000 ### in CONNECTING for 1000 ms (1000 ms backoff) run should disconnect connect-failed in BACKOFF for 0 ms (2000 ms backoff) 0 successful connections out of 2 attempts, seqno 0 # Back off for 2000 ms. timeout advance 2000 ms ### t=6000 ### in BACKOFF for 2000 ms (2000 ms backoff) run should connect # Third connection attempt succeeds after 500 ms. connecting in CONNECTING for 0 ms (2000 ms backoff) advance 500 ### t=6500 ### in CONNECTING for 500 ms (2000 ms backoff) run connected in ACTIVE for 0 ms (2000 ms backoff) created 1000, last activity 1000, last connected 6500 1 successful connections out of 3 attempts, seqno 1 connected last connected 0 ms ago, connected 0 ms total # Connection receives 3 chunks of data spaced 250 ms apart. advance 250 ### t=6750 ### in ACTIVE for 250 ms (2000 ms backoff) run activity created 1000, last activity 6750, last connected 6500 advance 250 ### t=7000 ### in ACTIVE for 500 ms (2000 ms backoff) run activity created 1000, last activity 7000, last connected 6500 advance 250 ### t=7250 ### in ACTIVE for 750 ms (2000 ms backoff) run activity created 1000, last activity 7250, last connected 6500 # Connection drops. disconnected in BACKOFF for 0 ms (4000 ms backoff) 1 successful connections out of 3 attempts, seqno 2 disconnected disconnected at 7250 ms (0 ms ago) run # Back off for 4000 ms. timeout advance 4000 ms ### t=11250 ### in BACKOFF for 4000 ms (4000 ms backoff) last connected 4750 ms ago, connected 750 ms total run should connect ]) ###################################################################### RECONNECT_CHECK([long connection resets backoff], [enable # First connection attempt fails after 1000 ms. run connecting run timeout run connect-failed # Back off for 1000 ms. timeout run # Second connection attempt fails after 1000 ms. connecting timeout run connect-failed # Back off for 2000 ms. timeout run # Third connection attempt succeeds after 500 ms. connecting advance 500 run connected # Connection receives 3 chunks of data spaced 2000 ms apart. advance 2000 run activity advance 2000 run activity advance 2000 run activity # Connection drops. disconnected run # Back off for 1000 ms. timeout run ], [### t=1000 ### enable in BACKOFF for 0 ms (0 ms backoff) # First connection attempt fails after 1000 ms. run should connect connecting in CONNECTING for 0 ms (0 ms backoff) run timeout advance 1000 ms ### t=2000 ### in CONNECTING for 1000 ms (0 ms backoff) run should disconnect connect-failed in BACKOFF for 0 ms (1000 ms backoff) 0 successful connections out of 1 attempts, seqno 0 # Back off for 1000 ms. timeout advance 1000 ms ### t=3000 ### in BACKOFF for 1000 ms (1000 ms backoff) run should connect # Second connection attempt fails after 1000 ms. connecting in CONNECTING for 0 ms (1000 ms backoff) timeout advance 1000 ms ### t=4000 ### in CONNECTING for 1000 ms (1000 ms backoff) run should disconnect connect-failed in BACKOFF for 0 ms (2000 ms backoff) 0 successful connections out of 2 attempts, seqno 0 # Back off for 2000 ms. timeout advance 2000 ms ### t=6000 ### in BACKOFF for 2000 ms (2000 ms backoff) run should connect # Third connection attempt succeeds after 500 ms. connecting in CONNECTING for 0 ms (2000 ms backoff) advance 500 ### t=6500 ### in CONNECTING for 500 ms (2000 ms backoff) run connected in ACTIVE for 0 ms (2000 ms backoff) created 1000, last activity 1000, last connected 6500 1 successful connections out of 3 attempts, seqno 1 connected last connected 0 ms ago, connected 0 ms total # Connection receives 3 chunks of data spaced 2000 ms apart. advance 2000 ### t=8500 ### in ACTIVE for 2000 ms (2000 ms backoff) run activity created 1000, last activity 8500, last connected 6500 advance 2000 ### t=10500 ### in ACTIVE for 4000 ms (2000 ms backoff) run activity created 1000, last activity 10500, last connected 6500 advance 2000 ### t=12500 ### in ACTIVE for 6000 ms (2000 ms backoff) run activity created 1000, last activity 12500, last connected 6500 # Connection drops. disconnected in BACKOFF for 0 ms (1000 ms backoff) 1 successful connections out of 3 attempts, seqno 2 disconnected disconnected at 12500 ms (0 ms ago) run # Back off for 1000 ms. timeout advance 1000 ms ### t=13500 ### in BACKOFF for 1000 ms (1000 ms backoff) last connected 7000 ms ago, connected 6000 ms total run should connect ]) ###################################################################### RECONNECT_CHECK([connection attempt fails quickly], [enable # Connection fails quickly. run connect-failed ECONNREFUSED # Back off for 1000 ms. run timeout # Connection fails quickly again. run connect-failed ECONNREFUSED # Back off for 2000 ms. run timeout ], [### t=1000 ### enable in BACKOFF for 0 ms (0 ms backoff) # Connection fails quickly. run should connect connect-failed ECONNREFUSED in BACKOFF for 0 ms (1000 ms backoff) 0 successful connections out of 1 attempts, seqno 0 # Back off for 1000 ms. run timeout advance 1000 ms ### t=2000 ### in BACKOFF for 1000 ms (1000 ms backoff) # Connection fails quickly again. run should connect connect-failed ECONNREFUSED in BACKOFF for 0 ms (2000 ms backoff) 0 successful connections out of 2 attempts, seqno 0 # Back off for 2000 ms. run timeout advance 2000 ms ### t=4000 ### in BACKOFF for 2000 ms (2000 ms backoff) ]) ###################################################################### RECONNECT_CHECK([max-tries of 1 honored], [set-max-tries 1 enable # Connection succeeds. run connected # Send inactivity probe. timeout run # Idle timeout kills connection. timeout run disconnected ], [### t=1000 ### set-max-tries 1 1 tries left enable in BACKOFF for 0 ms (0 ms backoff) 0 tries left # Connection succeeds. run should connect connected in ACTIVE for 0 ms (0 ms backoff) created 1000, last activity 1000, last connected 1000 1 successful connections out of 1 attempts, seqno 1 connected last connected 0 ms ago, connected 0 ms total # Send inactivity probe. timeout advance 5000 ms ### t=6000 ### in ACTIVE for 5000 ms (0 ms backoff) run should send probe in IDLE for 0 ms (0 ms backoff) # Idle timeout kills connection. timeout advance 5000 ms ### t=11000 ### in IDLE for 5000 ms (0 ms backoff) run should disconnect disconnected in VOID for 0 ms (1000 ms backoff) 1 successful connections out of 1 attempts, seqno 2 disconnected disconnected at 11000 ms (0 ms ago) ]) ###################################################################### RECONNECT_CHECK([max-tries of 0 honored], [set-max-tries 0 enable run timeout ], [### t=1000 ### set-max-tries 0 0 tries left enable run timeout no timeout ]) ###################################################################### RECONNECT_CHECK([passive mode], [passive enable # Start listening. timeout run listening # Listening never times out. timeout run # Listening failed (accept() returned funny error?). Back off and try again. listen-error 0 timeout run listening # Connection accepted. connected activity advance 1000 activity # Connection times out. timeout run timeout run disconnected # Start listening again. timeout run listening ], [### t=1000 ### passive enable in BACKOFF for 0 ms (0 ms backoff) # Start listening. timeout advance 0 ms run should connect listening in LISTENING for 0 ms (0 ms backoff) # Listening never times out. timeout no timeout run # Listening failed (accept() returned funny error?). Back off and try again. listen-error 0 in BACKOFF for 0 ms (1000 ms backoff) timeout advance 1000 ms ### t=2000 ### in BACKOFF for 1000 ms (1000 ms backoff) run should connect listening in LISTENING for 0 ms (1000 ms backoff) # Connection accepted. connected in ACTIVE for 0 ms (1000 ms backoff) created 1000, last activity 1000, last connected 2000 1 successful connections out of 1 attempts, seqno 1 connected last connected 0 ms ago, connected 0 ms total activity created 1000, last activity 2000, last connected 2000 advance 1000 ### t=3000 ### in ACTIVE for 1000 ms (1000 ms backoff) activity created 1000, last activity 3000, last connected 2000 # Connection times out. timeout advance 5000 ms ### t=8000 ### in ACTIVE for 6000 ms (1000 ms backoff) run should send probe in IDLE for 0 ms (1000 ms backoff) timeout advance 5000 ms ### t=13000 ### in IDLE for 5000 ms (1000 ms backoff) run should disconnect disconnected in BACKOFF for 0 ms (0 ms backoff) 1 successful connections out of 1 attempts, seqno 2 disconnected disconnected at 13000 ms (0 ms ago) # Start listening again. timeout advance 0 ms run should connect listening in LISTENING for 0 ms (0 ms backoff) ]) openvswitch-2.0.1+git20140120/tests/run-oftest000077500000000000000000000046701226605124000206570ustar00rootroot00000000000000#! /bin/sh set -e run () { echo "$@" "$@" || exit 1 } # Put built tools early in $PATH. builddir=`pwd` if test ! -e vswitchd/ovs-vswitchd; then echo >&2 'not in build directory, please change directory or run via \"make check-oftest' exit 1 fi PATH=$builddir/ovsdb:$builddir/vswitchd:$builddir/utilities:$PATH; export PATH # Find srcdir. case $srcdir in '') srcdir=$builddir ;; /*) ;; *) srcdir=`pwd`/$srcdir ;; esac if test ! -e "$srcdir"/WHY-OVS; then echo >&2 'source directory not found, please set $srcdir or run via \"make check-oftest' exit 1 fi # Make sure oftest is available. if test X"$OFT" = X; then OFT=oft fi if ($OFT --version) >/dev/null 2>&1; then : else echo >&2 'OFTest "oft" binary not found or cannot be run, please add to $PATH or set $OFT' exit 1 fi # Create sandbox. rm -rf sandbox mkdir sandbox cd sandbox sandbox=`pwd` # Set up environment for OVS programs to sandbox themselves. OVS_RUNDIR=$sandbox; export OVS_RUNDIR OVS_LOGDIR=$sandbox; export OVS_LOGDIR OVS_DBDIR=$sandbox; export OVS_DBDIR OVS_SYSCONFDIR=$sandbox; export OVS_SYSCONFDIR trap 'kill `cat *.pid`' 0 1 2 3 13 14 15 # Create database and start ovsdb-server. touch .conf.db.~lock~ rm -f conf.db run ovsdb-tool create conf.db "$srcdir"/vswitchd/vswitch.ovsschema run ovsdb-server --detach --no-chdir --pidfile -vconsole:off --log-file \ --remote=punix:"$sandbox"/db.sock # Start ovs-vswitchd. run ovs-vswitchd --detach --no-chdir --pidfile -vconsole:off --log-file \ --enable-dummy --disable-system -vvconn -vnetdev_dummy # Add a bridge and some ports for OFTest to use, # and configure ovs-vswitchd to connect to OFTest. run ovs-vsctl --no-wait \ -- add-br br0 \ -- set bridge br0 datapath-type=dummy fail-mode=secure for port in p1 p2 p3 p4; do run ovs-vsctl --no-wait \ -- add-port br0 $port \ -- set interface $port type=dummy \ options:pstream=punix:$OVS_RUNDIR/$port done run ovs-vsctl \ -- set-controller br0 tcp:127.0.0.1 \ -- set controller br0 connection-mode=out-of-band max-backoff=1000 # Run OFTest. run $OFT -P ovs-dummy $OFTFLAGS; status=$? cat < #include #include "aes128.h" #include "util.h" static void hex_to_uint8(const char *input, uint8_t *output, size_t n) { size_t i; if (strlen(input) != n * 2) { goto error; } for (i = 0; i < n; i++) { bool ok; output[i] = hexits_value(&input[i * 2], 2, &ok); if (!ok) { goto error; } } return; error: ovs_fatal(0, "\"%s\" is not exactly %zu hex digits", input, n * 2); } int main(int argc, char *argv[]) { struct aes128 aes; uint8_t plaintext[16]; uint8_t ciphertext[16]; uint8_t key[16]; size_t i; if (argc != 3) { ovs_fatal(0, "usage: %s KEY PLAINTEXT, where KEY and PLAINTEXT each " "consist of 32 hex digits", argv[0]); } hex_to_uint8(argv[1], key, 16); hex_to_uint8(argv[2], plaintext, 16); aes128_schedule(&aes, key); aes128_encrypt(&aes, plaintext, ciphertext); for (i = 0; i < 16; i++) { printf("%02x", ciphertext[i]); } putchar('\n'); return 0; } openvswitch-2.0.1+git20140120/tests/test-atomic.c000066400000000000000000000107051226605124000212140ustar00rootroot00000000000000/* * Copyright (c) 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "ovs-atomic.h" #include "util.h" #define TEST_ATOMIC_TYPE(ATOMIC_TYPE, BASE_TYPE) \ { \ ATOMIC_TYPE x = ATOMIC_VAR_INIT(1); \ BASE_TYPE value, orig; \ \ atomic_read(&x, &value); \ ovs_assert(value == 1); \ \ atomic_store(&x, 2); \ atomic_read(&x, &value); \ ovs_assert(value == 2); \ \ atomic_init(&x, 3); \ atomic_read(&x, &value); \ ovs_assert(value == 3); \ \ atomic_add(&x, 1, &orig); \ ovs_assert(orig == 3); \ atomic_read(&x, &value); \ ovs_assert(value == 4); \ \ atomic_sub(&x, 2, &orig); \ ovs_assert(orig == 4); \ atomic_read(&x, &value); \ ovs_assert(value == 2); \ \ atomic_or(&x, 6, &orig); \ ovs_assert(orig == 2); \ atomic_read(&x, &value); \ ovs_assert(value == 6); \ \ atomic_and(&x, 10, &orig); \ ovs_assert(orig == 6); \ atomic_read(&x, &value); \ ovs_assert(value == 2); \ \ atomic_xor(&x, 10, &orig); \ ovs_assert(orig == 2); \ atomic_read(&x, &value); \ ovs_assert(value == 8); \ } static void test_atomic_flag(void) { atomic_flag flag = ATOMIC_FLAG_INIT; ovs_assert(atomic_flag_test_and_set(&flag) == false); ovs_assert(atomic_flag_test_and_set(&flag) == true); atomic_flag_clear(&flag); ovs_assert(atomic_flag_test_and_set(&flag) == false); } int main(void) { TEST_ATOMIC_TYPE(atomic_char, char); TEST_ATOMIC_TYPE(atomic_uchar, unsigned char); TEST_ATOMIC_TYPE(atomic_schar, signed char); TEST_ATOMIC_TYPE(atomic_short, short); TEST_ATOMIC_TYPE(atomic_ushort, unsigned short); TEST_ATOMIC_TYPE(atomic_int, int); TEST_ATOMIC_TYPE(atomic_uint, unsigned int); TEST_ATOMIC_TYPE(atomic_long, long int); TEST_ATOMIC_TYPE(atomic_ulong, unsigned long int); TEST_ATOMIC_TYPE(atomic_llong, long long int); TEST_ATOMIC_TYPE(atomic_ullong, unsigned long long int); TEST_ATOMIC_TYPE(atomic_size_t, size_t); TEST_ATOMIC_TYPE(atomic_ptrdiff_t, ptrdiff_t); TEST_ATOMIC_TYPE(atomic_intmax_t, intmax_t); TEST_ATOMIC_TYPE(atomic_uintmax_t, uintmax_t); TEST_ATOMIC_TYPE(atomic_intptr_t, intptr_t); TEST_ATOMIC_TYPE(atomic_uintptr_t, uintptr_t); TEST_ATOMIC_TYPE(atomic_uint8_t, uint8_t); TEST_ATOMIC_TYPE(atomic_int8_t, int8_t); TEST_ATOMIC_TYPE(atomic_uint16_t, uint16_t); TEST_ATOMIC_TYPE(atomic_int16_t, int16_t); TEST_ATOMIC_TYPE(atomic_uint32_t, uint32_t); TEST_ATOMIC_TYPE(atomic_int32_t, int32_t); TEST_ATOMIC_TYPE(atomic_uint64_t, uint64_t); TEST_ATOMIC_TYPE(atomic_int64_t, int64_t); test_atomic_flag(); return 0; } openvswitch-2.0.1+git20140120/tests/test-bundle.c000066400000000000000000000165731226605124000212220ustar00rootroot00000000000000/* Copyright (c) 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "bundle.h" #include #include #include "flow.h" #include "ofp-actions.h" #include "ofpbuf.h" #include "random.h" #include "util.h" #define N_FLOWS 50000 #define MAX_SLAVES 8 /* Maximum supported by this test framework. */ struct slave { ofp_port_t slave_id; bool enabled; size_t flow_count; }; struct slave_group { size_t n_slaves; struct slave slaves[MAX_SLAVES]; }; static struct slave * slave_lookup(struct slave_group *sg, ofp_port_t slave_id) { size_t i; for (i = 0; i < sg->n_slaves; i++) { if (sg->slaves[i].slave_id == slave_id) { return &sg->slaves[i]; } } return NULL; } static bool slave_enabled_cb(ofp_port_t slave_id, void *aux) { struct slave *slave; slave = slave_lookup(aux, slave_id); return slave ? slave->enabled : false; } static struct ofpact_bundle * parse_bundle_actions(char *actions) { struct ofpact_bundle *bundle; struct ofpbuf ofpacts; struct ofpact *action; char *error; ofpbuf_init(&ofpacts, 0); error = bundle_parse_load(actions, &ofpacts); if (error) { ovs_fatal(0, "%s", error); } action = ofpacts.data; bundle = ofpact_get_BUNDLE(xmemdup(action, action->len)); ofpbuf_uninit(&ofpacts); if (bundle->n_slaves > MAX_SLAVES) { ovs_fatal(0, "At most %u slaves are supported", MAX_SLAVES); } return bundle; } static const char * mask_str(uint8_t mask, size_t n_bits) { static char str[9]; size_t i; n_bits = MIN(n_bits, 8); for (i = 0; i < n_bits; i++) { str[i] = (1 << i) & mask ? '1' : '0'; } str[i] = '\0'; return str; } int main(int argc, char *argv[]) { bool ok = true; struct ofpact_bundle *bundle; struct flow *flows; size_t i, n_permute, old_n_enabled; struct slave_group sg; int old_active; set_program_name(argv[0]); random_init(); if (argc != 2) { ovs_fatal(0, "usage: %s bundle_action", program_name); } bundle = parse_bundle_actions(argv[1]); /* Generate 'slaves' array. */ sg.n_slaves = 0; for (i = 0; i < bundle->n_slaves; i++) { ofp_port_t slave_id = bundle->slaves[i]; if (slave_lookup(&sg, slave_id)) { ovs_fatal(0, "Redundant slaves are not supported. "); } sg.slaves[sg.n_slaves].slave_id = slave_id; sg.n_slaves++; } /* Generate flows. */ flows = xmalloc(N_FLOWS * sizeof *flows); for (i = 0; i < N_FLOWS; i++) { random_bytes(&flows[i], sizeof flows[i]); memset(flows[i].zeros, 0, sizeof flows[i].zeros); flows[i].mpls_depth = 0; flows[i].regs[0] = ofp_to_u16(OFPP_NONE); } /* Cycles through each possible liveness permutation for the given * n_slaves. The initial state is equivalent to all slaves down, so we * skip it by starting at i = 1. We do one extra iteration to cover * transitioning from the final state back to the initial state. */ old_n_enabled = 0; old_active = -1; n_permute = 1 << sg.n_slaves; for (i = 1; i <= n_permute + 1; i++) { struct slave *slave; size_t j, n_enabled, changed; double disruption, perfect; uint8_t mask; int active; mask = i % n_permute; /* Gray coding ensures that in each iteration exactly one slave * changes its liveness. This makes the expected disruption a bit * easier to calculate, and is likely similar to how failures will be * experienced in the wild. */ mask = mask ^ (mask >> 1); /* Initialize slaves. */ n_enabled = 0; for (j = 0; j < sg.n_slaves; j++) { slave = &sg.slaves[j]; slave->flow_count = 0; slave->enabled = ((1 << j) & mask) != 0; if (slave->enabled) { n_enabled++; } } active = -1; for (j = 0; j < sg.n_slaves; j++) { if (sg.slaves[j].enabled) { active = j; break; } } changed = 0; for (j = 0; j < N_FLOWS; j++) { struct flow *flow = &flows[j]; ofp_port_t old_slave_id, ofp_port; struct flow_wildcards wc; old_slave_id = u16_to_ofp(flow->regs[0]); ofp_port = bundle_execute(bundle, flow, &wc, slave_enabled_cb, &sg); flow->regs[0] = ofp_to_u16(ofp_port); if (ofp_port != OFPP_NONE) { slave_lookup(&sg, ofp_port)->flow_count++; } if (old_slave_id != ofp_port) { changed++; } } if (bundle->algorithm == NX_BD_ALG_ACTIVE_BACKUP) { perfect = active == old_active ? 0.0 : 1.0; } else { if (old_n_enabled || n_enabled) { perfect = 1.0 / MAX(old_n_enabled, n_enabled); } else { /* This will happen when 'sg.n_slaves' is 0. */ perfect = 0; } } disruption = changed / (double)N_FLOWS; printf("%s: disruption=%.2f (perfect=%.2f)", mask_str(mask, sg.n_slaves), disruption, perfect); for (j = 0 ; j < sg.n_slaves; j++) { struct slave *slave = &sg.slaves[j]; double flow_percent; flow_percent = slave->flow_count / (double)N_FLOWS; printf( " %.2f", flow_percent); if (slave->enabled) { double perfect_fp; if (bundle->algorithm == NX_BD_ALG_ACTIVE_BACKUP) { perfect_fp = j == active ? 1.0 : 0.0; } else { perfect_fp = 1.0 / n_enabled; } if (fabs(flow_percent - perfect_fp) >= .01) { fprintf(stderr, "%s: slave %d: flow_percentage=%.5f for" " differs from perfect=%.5f by more than .01\n", mask_str(mask, sg.n_slaves), slave->slave_id, flow_percent, perfect_fp); ok = false; } } else if (slave->flow_count) { fprintf(stderr, "%s: slave %d: disabled slave received" " flows.\n", mask_str(mask, sg.n_slaves), slave->slave_id); ok = false; } } printf("\n"); if (fabs(disruption - perfect) >= .01) { fprintf(stderr, "%s: disruption=%.5f differs from perfect=%.5f by" " more than .01\n", mask_str(mask, sg.n_slaves), disruption, perfect); ok = false; } old_active = active; old_n_enabled = n_enabled; } free(bundle); free(flows); return ok ? 0 : 1; } openvswitch-2.0.1+git20140120/tests/test-byte-order.c000066400000000000000000000031751226605124000220170ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "byte-order.h" #include #include int main(void) { #ifndef __CHECKER__ /* I picked some random numbers. */ const uint16_t s = 0xc9bd; const uint32_t l = 0xffe56ae8; const uint64_t ll = UINT64_C(0xb6fe878a9117ecdb); assert(htons(ntohs(s)) == s); assert(ntohs(htons(s)) == s); assert(CONSTANT_HTONS(ntohs(s)) == s); assert(ntohs(CONSTANT_HTONS(s)) == s); assert(ntohs(CONSTANT_HTONS(l)) == (uint16_t) l); assert(ntohs(CONSTANT_HTONS(ll)) == (uint16_t) ll); assert(htonl(ntohl(l)) == l); assert(ntohl(htonl(l)) == l); assert(CONSTANT_HTONL(ntohl(l)) == l); assert(ntohl(CONSTANT_HTONL(l)) == l); assert(ntohl(CONSTANT_HTONL(ll)) == (uint32_t) ll); assert(htonll(ntohll(ll)) == ll); assert(ntohll(htonll(ll)) == ll); assert(CONSTANT_HTONLL(ntohll(ll)) == ll); assert(ntohll(CONSTANT_HTONLL(ll))); #else /* __CHECKER__ */ /* Making sparse happy with this code makes it unreadable, so don't bother. */ #endif return 0; } openvswitch-2.0.1+git20140120/tests/test-classifier.c000066400000000000000000001170431226605124000220670ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* "White box" tests for classifier. * * With very few exceptions, these tests obtain complete coverage of every * basic block and every branch in the classifier implementation, e.g. a clean * report from "gcov -b". (Covering the exceptions would require finding * collisions in the hash function used for flow data, etc.) * * This test should receive a clean report from "valgrind --leak-check=full": * it frees every heap block that it allocates. */ #include #include "classifier.h" #include #include #include "byte-order.h" #include "command-line.h" #include "flow.h" #include "ofp-util.h" #include "packets.h" #include "random.h" #include "unaligned.h" #undef NDEBUG #include /* Fields in a rule. */ #define CLS_FIELDS \ /* struct flow all-caps */ \ /* member name name */ \ /* ----------- -------- */ \ CLS_FIELD(tunnel.tun_id, TUN_ID) \ CLS_FIELD(metadata, METADATA) \ CLS_FIELD(nw_src, NW_SRC) \ CLS_FIELD(nw_dst, NW_DST) \ CLS_FIELD(in_port, IN_PORT) \ CLS_FIELD(vlan_tci, VLAN_TCI) \ CLS_FIELD(dl_type, DL_TYPE) \ CLS_FIELD(tp_src, TP_SRC) \ CLS_FIELD(tp_dst, TP_DST) \ CLS_FIELD(dl_src, DL_SRC) \ CLS_FIELD(dl_dst, DL_DST) \ CLS_FIELD(nw_proto, NW_PROTO) \ CLS_FIELD(nw_tos, NW_DSCP) /* Field indexes. * * (These are also indexed into struct classifier's 'tables' array.) */ enum { #define CLS_FIELD(MEMBER, NAME) CLS_F_IDX_##NAME, CLS_FIELDS #undef CLS_FIELD CLS_N_FIELDS }; /* Field information. */ struct cls_field { int ofs; /* Offset in struct flow. */ int len; /* Length in bytes. */ const char *name; /* Name (for debugging). */ }; static const struct cls_field cls_fields[CLS_N_FIELDS] = { #define CLS_FIELD(MEMBER, NAME) \ { offsetof(struct flow, MEMBER), \ sizeof ((struct flow *)0)->MEMBER, \ #NAME }, CLS_FIELDS #undef CLS_FIELD }; struct test_rule { int aux; /* Auxiliary data. */ struct cls_rule cls_rule; /* Classifier rule data. */ }; static struct test_rule * test_rule_from_cls_rule(const struct cls_rule *rule) { return rule ? CONTAINER_OF(rule, struct test_rule, cls_rule) : NULL; } static void test_rule_destroy(struct test_rule *rule) { if (rule) { cls_rule_destroy(&rule->cls_rule); free(rule); } } static struct test_rule *make_rule(int wc_fields, unsigned int priority, int value_pat); static void free_rule(struct test_rule *); static struct test_rule *clone_rule(const struct test_rule *); /* Trivial (linear) classifier. */ struct tcls { size_t n_rules; size_t allocated_rules; struct test_rule **rules; }; static void tcls_init(struct tcls *tcls) { tcls->n_rules = 0; tcls->allocated_rules = 0; tcls->rules = NULL; } static void tcls_destroy(struct tcls *tcls) { if (tcls) { size_t i; for (i = 0; i < tcls->n_rules; i++) { test_rule_destroy(tcls->rules[i]); } free(tcls->rules); } } static bool tcls_is_empty(const struct tcls *tcls) { return tcls->n_rules == 0; } static struct test_rule * tcls_insert(struct tcls *tcls, const struct test_rule *rule) { size_t i; for (i = 0; i < tcls->n_rules; i++) { const struct cls_rule *pos = &tcls->rules[i]->cls_rule; if (cls_rule_equal(pos, &rule->cls_rule)) { /* Exact match. */ free_rule(tcls->rules[i]); tcls->rules[i] = clone_rule(rule); return tcls->rules[i]; } else if (pos->priority < rule->cls_rule.priority) { break; } } if (tcls->n_rules >= tcls->allocated_rules) { tcls->rules = x2nrealloc(tcls->rules, &tcls->allocated_rules, sizeof *tcls->rules); } if (i != tcls->n_rules) { memmove(&tcls->rules[i + 1], &tcls->rules[i], sizeof *tcls->rules * (tcls->n_rules - i)); } tcls->rules[i] = clone_rule(rule); tcls->n_rules++; return tcls->rules[i]; } static void tcls_remove(struct tcls *cls, const struct test_rule *rule) { size_t i; for (i = 0; i < cls->n_rules; i++) { struct test_rule *pos = cls->rules[i]; if (pos == rule) { test_rule_destroy(pos); memmove(&cls->rules[i], &cls->rules[i + 1], sizeof *cls->rules * (cls->n_rules - i - 1)); cls->n_rules--; return; } } NOT_REACHED(); } static bool match(const struct cls_rule *wild_, const struct flow *fixed) { struct match wild; int f_idx; minimatch_expand(&wild_->match, &wild); for (f_idx = 0; f_idx < CLS_N_FIELDS; f_idx++) { bool eq; if (f_idx == CLS_F_IDX_NW_SRC) { eq = !((fixed->nw_src ^ wild.flow.nw_src) & wild.wc.masks.nw_src); } else if (f_idx == CLS_F_IDX_NW_DST) { eq = !((fixed->nw_dst ^ wild.flow.nw_dst) & wild.wc.masks.nw_dst); } else if (f_idx == CLS_F_IDX_TP_SRC) { eq = !((fixed->tp_src ^ wild.flow.tp_src) & wild.wc.masks.tp_src); } else if (f_idx == CLS_F_IDX_TP_DST) { eq = !((fixed->tp_dst ^ wild.flow.tp_dst) & wild.wc.masks.tp_dst); } else if (f_idx == CLS_F_IDX_DL_SRC) { eq = eth_addr_equal_except(fixed->dl_src, wild.flow.dl_src, wild.wc.masks.dl_src); } else if (f_idx == CLS_F_IDX_DL_DST) { eq = eth_addr_equal_except(fixed->dl_dst, wild.flow.dl_dst, wild.wc.masks.dl_dst); } else if (f_idx == CLS_F_IDX_VLAN_TCI) { eq = !((fixed->vlan_tci ^ wild.flow.vlan_tci) & wild.wc.masks.vlan_tci); } else if (f_idx == CLS_F_IDX_TUN_ID) { eq = !((fixed->tunnel.tun_id ^ wild.flow.tunnel.tun_id) & wild.wc.masks.tunnel.tun_id); } else if (f_idx == CLS_F_IDX_METADATA) { eq = !((fixed->metadata ^ wild.flow.metadata) & wild.wc.masks.metadata); } else if (f_idx == CLS_F_IDX_NW_DSCP) { eq = !((fixed->nw_tos ^ wild.flow.nw_tos) & (wild.wc.masks.nw_tos & IP_DSCP_MASK)); } else if (f_idx == CLS_F_IDX_NW_PROTO) { eq = !((fixed->nw_proto ^ wild.flow.nw_proto) & wild.wc.masks.nw_proto); } else if (f_idx == CLS_F_IDX_DL_TYPE) { eq = !((fixed->dl_type ^ wild.flow.dl_type) & wild.wc.masks.dl_type); } else if (f_idx == CLS_F_IDX_IN_PORT) { eq = !((fixed->in_port.ofp_port ^ wild.flow.in_port.ofp_port) & wild.wc.masks.in_port.ofp_port); } else { NOT_REACHED(); } if (!eq) { return false; } } return true; } static struct cls_rule * tcls_lookup(const struct tcls *cls, const struct flow *flow) { size_t i; for (i = 0; i < cls->n_rules; i++) { struct test_rule *pos = cls->rules[i]; if (match(&pos->cls_rule, flow)) { return &pos->cls_rule; } } return NULL; } static void tcls_delete_matches(struct tcls *cls, const struct cls_rule *target) { size_t i; for (i = 0; i < cls->n_rules; ) { struct test_rule *pos = cls->rules[i]; if (!minimask_has_extra(&pos->cls_rule.match.mask, &target->match.mask)) { struct flow flow; miniflow_expand(&pos->cls_rule.match.flow, &flow); if (match(target, &flow)) { tcls_remove(cls, pos); continue; } } i++; } } static ovs_be32 nw_src_values[] = { CONSTANT_HTONL(0xc0a80001), CONSTANT_HTONL(0xc0a04455) }; static ovs_be32 nw_dst_values[] = { CONSTANT_HTONL(0xc0a80002), CONSTANT_HTONL(0xc0a04455) }; static ovs_be64 tun_id_values[] = { 0, CONSTANT_HTONLL(UINT64_C(0xfedcba9876543210)) }; static ovs_be64 metadata_values[] = { 0, CONSTANT_HTONLL(UINT64_C(0xfedcba9876543210)) }; static ofp_port_t in_port_values[] = { OFP_PORT_C(1), OFPP_LOCAL }; static ovs_be16 vlan_tci_values[] = { CONSTANT_HTONS(101), CONSTANT_HTONS(0) }; static ovs_be16 dl_type_values[] = { CONSTANT_HTONS(ETH_TYPE_IP), CONSTANT_HTONS(ETH_TYPE_ARP) }; static ovs_be16 tp_src_values[] = { CONSTANT_HTONS(49362), CONSTANT_HTONS(80) }; static ovs_be16 tp_dst_values[] = { CONSTANT_HTONS(6667), CONSTANT_HTONS(22) }; static uint8_t dl_src_values[][6] = { { 0x00, 0x02, 0xe3, 0x0f, 0x80, 0xa4 }, { 0x5e, 0x33, 0x7f, 0x5f, 0x1e, 0x99 } }; static uint8_t dl_dst_values[][6] = { { 0x4a, 0x27, 0x71, 0xae, 0x64, 0xc1 }, { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } }; static uint8_t nw_proto_values[] = { IPPROTO_TCP, IPPROTO_ICMP }; static uint8_t nw_dscp_values[] = { 48, 0 }; static void *values[CLS_N_FIELDS][2]; static void init_values(void) { values[CLS_F_IDX_TUN_ID][0] = &tun_id_values[0]; values[CLS_F_IDX_TUN_ID][1] = &tun_id_values[1]; values[CLS_F_IDX_METADATA][0] = &metadata_values[0]; values[CLS_F_IDX_METADATA][1] = &metadata_values[1]; values[CLS_F_IDX_IN_PORT][0] = &in_port_values[0]; values[CLS_F_IDX_IN_PORT][1] = &in_port_values[1]; values[CLS_F_IDX_VLAN_TCI][0] = &vlan_tci_values[0]; values[CLS_F_IDX_VLAN_TCI][1] = &vlan_tci_values[1]; values[CLS_F_IDX_DL_SRC][0] = dl_src_values[0]; values[CLS_F_IDX_DL_SRC][1] = dl_src_values[1]; values[CLS_F_IDX_DL_DST][0] = dl_dst_values[0]; values[CLS_F_IDX_DL_DST][1] = dl_dst_values[1]; values[CLS_F_IDX_DL_TYPE][0] = &dl_type_values[0]; values[CLS_F_IDX_DL_TYPE][1] = &dl_type_values[1]; values[CLS_F_IDX_NW_SRC][0] = &nw_src_values[0]; values[CLS_F_IDX_NW_SRC][1] = &nw_src_values[1]; values[CLS_F_IDX_NW_DST][0] = &nw_dst_values[0]; values[CLS_F_IDX_NW_DST][1] = &nw_dst_values[1]; values[CLS_F_IDX_NW_PROTO][0] = &nw_proto_values[0]; values[CLS_F_IDX_NW_PROTO][1] = &nw_proto_values[1]; values[CLS_F_IDX_NW_DSCP][0] = &nw_dscp_values[0]; values[CLS_F_IDX_NW_DSCP][1] = &nw_dscp_values[1]; values[CLS_F_IDX_TP_SRC][0] = &tp_src_values[0]; values[CLS_F_IDX_TP_SRC][1] = &tp_src_values[1]; values[CLS_F_IDX_TP_DST][0] = &tp_dst_values[0]; values[CLS_F_IDX_TP_DST][1] = &tp_dst_values[1]; } #define N_NW_SRC_VALUES ARRAY_SIZE(nw_src_values) #define N_NW_DST_VALUES ARRAY_SIZE(nw_dst_values) #define N_TUN_ID_VALUES ARRAY_SIZE(tun_id_values) #define N_METADATA_VALUES ARRAY_SIZE(metadata_values) #define N_IN_PORT_VALUES ARRAY_SIZE(in_port_values) #define N_VLAN_TCI_VALUES ARRAY_SIZE(vlan_tci_values) #define N_DL_TYPE_VALUES ARRAY_SIZE(dl_type_values) #define N_TP_SRC_VALUES ARRAY_SIZE(tp_src_values) #define N_TP_DST_VALUES ARRAY_SIZE(tp_dst_values) #define N_DL_SRC_VALUES ARRAY_SIZE(dl_src_values) #define N_DL_DST_VALUES ARRAY_SIZE(dl_dst_values) #define N_NW_PROTO_VALUES ARRAY_SIZE(nw_proto_values) #define N_NW_DSCP_VALUES ARRAY_SIZE(nw_dscp_values) #define N_FLOW_VALUES (N_NW_SRC_VALUES * \ N_NW_DST_VALUES * \ N_TUN_ID_VALUES * \ N_IN_PORT_VALUES * \ N_VLAN_TCI_VALUES * \ N_DL_TYPE_VALUES * \ N_TP_SRC_VALUES * \ N_TP_DST_VALUES * \ N_DL_SRC_VALUES * \ N_DL_DST_VALUES * \ N_NW_PROTO_VALUES * \ N_NW_DSCP_VALUES) static unsigned int get_value(unsigned int *x, unsigned n_values) { unsigned int rem = *x % n_values; *x /= n_values; return rem; } static void compare_classifiers(struct classifier *cls, struct tcls *tcls) OVS_REQ_RDLOCK(cls->rwlock) { static const int confidence = 500; unsigned int i; assert(classifier_count(cls) == tcls->n_rules); for (i = 0; i < confidence; i++) { struct cls_rule *cr0, *cr1; struct flow flow; unsigned int x; x = random_range(N_FLOW_VALUES); memset(&flow, 0, sizeof flow); flow.nw_src = nw_src_values[get_value(&x, N_NW_SRC_VALUES)]; flow.nw_dst = nw_dst_values[get_value(&x, N_NW_DST_VALUES)]; flow.tunnel.tun_id = tun_id_values[get_value(&x, N_TUN_ID_VALUES)]; flow.metadata = metadata_values[get_value(&x, N_METADATA_VALUES)]; flow.in_port.ofp_port = in_port_values[get_value(&x, N_IN_PORT_VALUES)]; flow.vlan_tci = vlan_tci_values[get_value(&x, N_VLAN_TCI_VALUES)]; flow.dl_type = dl_type_values[get_value(&x, N_DL_TYPE_VALUES)]; flow.tp_src = tp_src_values[get_value(&x, N_TP_SRC_VALUES)]; flow.tp_dst = tp_dst_values[get_value(&x, N_TP_DST_VALUES)]; memcpy(flow.dl_src, dl_src_values[get_value(&x, N_DL_SRC_VALUES)], ETH_ADDR_LEN); memcpy(flow.dl_dst, dl_dst_values[get_value(&x, N_DL_DST_VALUES)], ETH_ADDR_LEN); flow.nw_proto = nw_proto_values[get_value(&x, N_NW_PROTO_VALUES)]; flow.nw_tos = nw_dscp_values[get_value(&x, N_NW_DSCP_VALUES)]; cr0 = classifier_lookup(cls, &flow, NULL); cr1 = tcls_lookup(tcls, &flow); assert((cr0 == NULL) == (cr1 == NULL)); if (cr0 != NULL) { const struct test_rule *tr0 = test_rule_from_cls_rule(cr0); const struct test_rule *tr1 = test_rule_from_cls_rule(cr1); assert(cls_rule_equal(cr0, cr1)); assert(tr0->aux == tr1->aux); } } } static void destroy_classifier(struct classifier *cls) { struct test_rule *rule, *next_rule; struct cls_cursor cursor; ovs_rwlock_wrlock(&cls->rwlock); cls_cursor_init(&cursor, cls, NULL); CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, cls_rule, &cursor) { classifier_remove(cls, &rule->cls_rule); free_rule(rule); } ovs_rwlock_unlock(&cls->rwlock); classifier_destroy(cls); } static void check_tables(const struct classifier *cls, int n_tables, int n_rules, int n_dups) OVS_REQ_RDLOCK(cls->rwlock) { const struct cls_table *table; struct test_rule *test_rule; struct cls_cursor cursor; int found_tables = 0; int found_rules = 0; int found_dups = 0; int found_rules2 = 0; HMAP_FOR_EACH (table, hmap_node, &cls->tables) { const struct cls_rule *head; unsigned int max_priority = 0; unsigned int max_count = 0; assert(!hmap_is_empty(&table->rules)); found_tables++; HMAP_FOR_EACH (head, hmap_node, &table->rules) { unsigned int prev_priority = UINT_MAX; const struct cls_rule *rule; if (head->priority > max_priority) { max_priority = head->priority; max_count = 1; } else if (head->priority == max_priority) { ++max_count; } found_rules++; LIST_FOR_EACH (rule, list, &head->list) { assert(rule->priority < prev_priority); assert(rule->priority <= table->max_priority); prev_priority = rule->priority; found_rules++; found_dups++; assert(classifier_find_rule_exactly(cls, rule) == rule); } } assert(table->max_priority == max_priority); assert(table->max_count == max_count); } assert(found_tables == hmap_count(&cls->tables)); assert(n_tables == -1 || n_tables == hmap_count(&cls->tables)); assert(n_rules == -1 || found_rules == n_rules); assert(n_dups == -1 || found_dups == n_dups); cls_cursor_init(&cursor, cls, NULL); CLS_CURSOR_FOR_EACH (test_rule, cls_rule, &cursor) { found_rules2++; } assert(found_rules == found_rules2); } static struct test_rule * make_rule(int wc_fields, unsigned int priority, int value_pat) { const struct cls_field *f; struct test_rule *rule; struct match match; match_init_catchall(&match); for (f = &cls_fields[0]; f < &cls_fields[CLS_N_FIELDS]; f++) { int f_idx = f - cls_fields; int value_idx = (value_pat & (1u << f_idx)) != 0; memcpy((char *) &match.flow + f->ofs, values[f_idx][value_idx], f->len); if (f_idx == CLS_F_IDX_NW_SRC) { match.wc.masks.nw_src = htonl(UINT32_MAX); } else if (f_idx == CLS_F_IDX_NW_DST) { match.wc.masks.nw_dst = htonl(UINT32_MAX); } else if (f_idx == CLS_F_IDX_TP_SRC) { match.wc.masks.tp_src = htons(UINT16_MAX); } else if (f_idx == CLS_F_IDX_TP_DST) { match.wc.masks.tp_dst = htons(UINT16_MAX); } else if (f_idx == CLS_F_IDX_DL_SRC) { memset(match.wc.masks.dl_src, 0xff, ETH_ADDR_LEN); } else if (f_idx == CLS_F_IDX_DL_DST) { memset(match.wc.masks.dl_dst, 0xff, ETH_ADDR_LEN); } else if (f_idx == CLS_F_IDX_VLAN_TCI) { match.wc.masks.vlan_tci = htons(UINT16_MAX); } else if (f_idx == CLS_F_IDX_TUN_ID) { match.wc.masks.tunnel.tun_id = htonll(UINT64_MAX); } else if (f_idx == CLS_F_IDX_METADATA) { match.wc.masks.metadata = htonll(UINT64_MAX); } else if (f_idx == CLS_F_IDX_NW_DSCP) { match.wc.masks.nw_tos |= IP_DSCP_MASK; } else if (f_idx == CLS_F_IDX_NW_PROTO) { match.wc.masks.nw_proto = UINT8_MAX; } else if (f_idx == CLS_F_IDX_DL_TYPE) { match.wc.masks.dl_type = htons(UINT16_MAX); } else if (f_idx == CLS_F_IDX_IN_PORT) { match.wc.masks.in_port.ofp_port = u16_to_ofp(UINT16_MAX); } else { NOT_REACHED(); } } rule = xzalloc(sizeof *rule); cls_rule_init(&rule->cls_rule, &match, wc_fields ? priority : UINT_MAX); return rule; } static struct test_rule * clone_rule(const struct test_rule *src) { struct test_rule *dst; dst = xmalloc(sizeof *dst); dst->aux = src->aux; cls_rule_clone(&dst->cls_rule, &src->cls_rule); return dst; } static void free_rule(struct test_rule *rule) { cls_rule_destroy(&rule->cls_rule); free(rule); } static void shuffle(unsigned int *p, size_t n) { for (; n > 1; n--, p++) { unsigned int *q = &p[random_range(n)]; unsigned int tmp = *p; *p = *q; *q = tmp; } } static void shuffle_u32s(uint32_t *p, size_t n) { for (; n > 1; n--, p++) { uint32_t *q = &p[random_range(n)]; uint32_t tmp = *p; *p = *q; *q = tmp; } } /* Classifier tests. */ /* Tests an empty classifier. */ static void test_empty(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { struct classifier cls; struct tcls tcls; classifier_init(&cls); ovs_rwlock_rdlock(&cls.rwlock); tcls_init(&tcls); assert(classifier_is_empty(&cls)); assert(tcls_is_empty(&tcls)); compare_classifiers(&cls, &tcls); ovs_rwlock_unlock(&cls.rwlock); classifier_destroy(&cls); tcls_destroy(&tcls); } /* Destroys a null classifier. */ static void test_destroy_null(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { classifier_destroy(NULL); } /* Tests classification with one rule at a time. */ static void test_single_rule(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { unsigned int wc_fields; /* Hilarious. */ for (wc_fields = 0; wc_fields < (1u << CLS_N_FIELDS); wc_fields++) { struct classifier cls; struct test_rule *rule, *tcls_rule; struct tcls tcls; rule = make_rule(wc_fields, hash_bytes(&wc_fields, sizeof wc_fields, 0), 0); classifier_init(&cls); ovs_rwlock_wrlock(&cls.rwlock); tcls_init(&tcls); tcls_rule = tcls_insert(&tcls, rule); classifier_insert(&cls, &rule->cls_rule); check_tables(&cls, 1, 1, 0); compare_classifiers(&cls, &tcls); classifier_remove(&cls, &rule->cls_rule); tcls_remove(&tcls, tcls_rule); assert(classifier_is_empty(&cls)); assert(tcls_is_empty(&tcls)); compare_classifiers(&cls, &tcls); free_rule(rule); ovs_rwlock_unlock(&cls.rwlock); classifier_destroy(&cls); tcls_destroy(&tcls); } } /* Tests replacing one rule by another. */ static void test_rule_replacement(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { unsigned int wc_fields; for (wc_fields = 0; wc_fields < (1u << CLS_N_FIELDS); wc_fields++) { struct classifier cls; struct test_rule *rule1; struct test_rule *rule2; struct tcls tcls; rule1 = make_rule(wc_fields, OFP_DEFAULT_PRIORITY, UINT_MAX); rule2 = make_rule(wc_fields, OFP_DEFAULT_PRIORITY, UINT_MAX); rule2->aux += 5; rule2->aux += 5; classifier_init(&cls); ovs_rwlock_wrlock(&cls.rwlock); tcls_init(&tcls); tcls_insert(&tcls, rule1); classifier_insert(&cls, &rule1->cls_rule); check_tables(&cls, 1, 1, 0); compare_classifiers(&cls, &tcls); tcls_destroy(&tcls); tcls_init(&tcls); tcls_insert(&tcls, rule2); assert(test_rule_from_cls_rule( classifier_replace(&cls, &rule2->cls_rule)) == rule1); free_rule(rule1); check_tables(&cls, 1, 1, 0); compare_classifiers(&cls, &tcls); tcls_destroy(&tcls); ovs_rwlock_unlock(&cls.rwlock); destroy_classifier(&cls); } } static int factorial(int n_items) { int n, i; n = 1; for (i = 2; i <= n_items; i++) { n *= i; } return n; } static void swap(int *a, int *b) { int tmp = *a; *a = *b; *b = tmp; } static void reverse(int *a, int n) { int i; for (i = 0; i < n / 2; i++) { int j = n - (i + 1); swap(&a[i], &a[j]); } } static bool next_permutation(int *a, int n) { int k; for (k = n - 2; k >= 0; k--) { if (a[k] < a[k + 1]) { int l; for (l = n - 1; ; l--) { if (a[l] > a[k]) { swap(&a[k], &a[l]); reverse(a + (k + 1), n - (k + 1)); return true; } } } } return false; } /* Tests classification with rules that have the same matching criteria. */ static void test_many_rules_in_one_list (int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { enum { N_RULES = 3 }; int n_pris; for (n_pris = N_RULES; n_pris >= 1; n_pris--) { int ops[N_RULES * 2]; int pris[N_RULES]; int n_permutations; int i; pris[0] = 0; for (i = 1; i < N_RULES; i++) { pris[i] = pris[i - 1] + (n_pris > i); } for (i = 0; i < N_RULES * 2; i++) { ops[i] = i / 2; } n_permutations = 0; do { struct test_rule *rules[N_RULES]; struct test_rule *tcls_rules[N_RULES]; int pri_rules[N_RULES]; struct classifier cls; struct tcls tcls; n_permutations++; for (i = 0; i < N_RULES; i++) { rules[i] = make_rule(456, pris[i], 0); tcls_rules[i] = NULL; pri_rules[i] = -1; } classifier_init(&cls); ovs_rwlock_wrlock(&cls.rwlock); tcls_init(&tcls); for (i = 0; i < ARRAY_SIZE(ops); i++) { int j = ops[i]; int m, n; if (!tcls_rules[j]) { struct test_rule *displaced_rule; tcls_rules[j] = tcls_insert(&tcls, rules[j]); displaced_rule = test_rule_from_cls_rule( classifier_replace(&cls, &rules[j]->cls_rule)); if (pri_rules[pris[j]] >= 0) { int k = pri_rules[pris[j]]; assert(displaced_rule != NULL); assert(displaced_rule != rules[j]); assert(pris[j] == displaced_rule->cls_rule.priority); tcls_rules[k] = NULL; } else { assert(displaced_rule == NULL); } pri_rules[pris[j]] = j; } else { classifier_remove(&cls, &rules[j]->cls_rule); tcls_remove(&tcls, tcls_rules[j]); tcls_rules[j] = NULL; pri_rules[pris[j]] = -1; } n = 0; for (m = 0; m < N_RULES; m++) { n += tcls_rules[m] != NULL; } check_tables(&cls, n > 0, n, n - 1); compare_classifiers(&cls, &tcls); } ovs_rwlock_unlock(&cls.rwlock); classifier_destroy(&cls); tcls_destroy(&tcls); for (i = 0; i < N_RULES; i++) { free_rule(rules[i]); } } while (next_permutation(ops, ARRAY_SIZE(ops))); assert(n_permutations == (factorial(N_RULES * 2) >> N_RULES)); } } static int count_ones(unsigned long int x) { int n = 0; while (x) { x = zero_rightmost_1bit(x); n++; } return n; } static bool array_contains(int *array, int n, int value) { int i; for (i = 0; i < n; i++) { if (array[i] == value) { return true; } } return false; } /* Tests classification with two rules at a time that fall into the same * table but different lists. */ static void test_many_rules_in_one_table(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { int iteration; for (iteration = 0; iteration < 50; iteration++) { enum { N_RULES = 20 }; struct test_rule *rules[N_RULES]; struct test_rule *tcls_rules[N_RULES]; struct classifier cls; struct tcls tcls; int value_pats[N_RULES]; int value_mask; int wcf; int i; do { wcf = random_uint32() & ((1u << CLS_N_FIELDS) - 1); value_mask = ~wcf & ((1u << CLS_N_FIELDS) - 1); } while ((1 << count_ones(value_mask)) < N_RULES); classifier_init(&cls); ovs_rwlock_wrlock(&cls.rwlock); tcls_init(&tcls); for (i = 0; i < N_RULES; i++) { unsigned int priority = random_uint32(); do { value_pats[i] = random_uint32() & value_mask; } while (array_contains(value_pats, i, value_pats[i])); rules[i] = make_rule(wcf, priority, value_pats[i]); tcls_rules[i] = tcls_insert(&tcls, rules[i]); classifier_insert(&cls, &rules[i]->cls_rule); check_tables(&cls, 1, i + 1, 0); compare_classifiers(&cls, &tcls); } for (i = 0; i < N_RULES; i++) { tcls_remove(&tcls, tcls_rules[i]); classifier_remove(&cls, &rules[i]->cls_rule); free_rule(rules[i]); check_tables(&cls, i < N_RULES - 1, N_RULES - (i + 1), 0); compare_classifiers(&cls, &tcls); } ovs_rwlock_unlock(&cls.rwlock); classifier_destroy(&cls); tcls_destroy(&tcls); } } /* Tests classification with many rules at a time that fall into random lists * in 'n' tables. */ static void test_many_rules_in_n_tables(int n_tables) { enum { MAX_RULES = 50 }; int wcfs[10]; int iteration; int i; assert(n_tables < 10); for (i = 0; i < n_tables; i++) { do { wcfs[i] = random_uint32() & ((1u << CLS_N_FIELDS) - 1); } while (array_contains(wcfs, i, wcfs[i])); } for (iteration = 0; iteration < 30; iteration++) { unsigned int priorities[MAX_RULES]; struct classifier cls; struct tcls tcls; random_set_seed(iteration + 1); for (i = 0; i < MAX_RULES; i++) { priorities[i] = i * 129; } shuffle(priorities, ARRAY_SIZE(priorities)); classifier_init(&cls); ovs_rwlock_wrlock(&cls.rwlock); tcls_init(&tcls); for (i = 0; i < MAX_RULES; i++) { struct test_rule *rule; unsigned int priority = priorities[i]; int wcf = wcfs[random_range(n_tables)]; int value_pat = random_uint32() & ((1u << CLS_N_FIELDS) - 1); rule = make_rule(wcf, priority, value_pat); tcls_insert(&tcls, rule); classifier_insert(&cls, &rule->cls_rule); check_tables(&cls, -1, i + 1, -1); compare_classifiers(&cls, &tcls); } while (!classifier_is_empty(&cls)) { struct test_rule *rule, *next_rule; struct test_rule *target; struct cls_cursor cursor; target = clone_rule(tcls.rules[random_range(tcls.n_rules)]); cls_cursor_init(&cursor, &cls, &target->cls_rule); CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, cls_rule, &cursor) { classifier_remove(&cls, &rule->cls_rule); free_rule(rule); } tcls_delete_matches(&tcls, &target->cls_rule); compare_classifiers(&cls, &tcls); check_tables(&cls, -1, -1, -1); free_rule(target); } ovs_rwlock_unlock(&cls.rwlock); destroy_classifier(&cls); tcls_destroy(&tcls); } } static void test_many_rules_in_two_tables(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { test_many_rules_in_n_tables(2); } static void test_many_rules_in_five_tables(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { test_many_rules_in_n_tables(5); } /* Miniflow tests. */ static uint32_t random_value(void) { static const uint32_t values[] = { 0xffffffff, 0xaaaaaaaa, 0x55555555, 0x80000000, 0x00000001, 0xface0000, 0x00d00d1e, 0xdeadbeef }; return values[random_range(ARRAY_SIZE(values))]; } static bool choose(unsigned int n, unsigned int *idxp) { if (*idxp < n) { return true; } else { *idxp -= n; return false; } } static bool init_consecutive_values(int n_consecutive, struct flow *flow, unsigned int *idxp) { uint32_t *flow_u32 = (uint32_t *) flow; if (choose(FLOW_U32S - n_consecutive + 1, idxp)) { int i; for (i = 0; i < n_consecutive; i++) { flow_u32[*idxp + i] = random_value(); } return true; } else { return false; } } static bool next_random_flow(struct flow *flow, unsigned int idx) { uint32_t *flow_u32 = (uint32_t *) flow; int i; memset(flow, 0, sizeof *flow); /* Empty flow. */ if (choose(1, &idx)) { return true; } /* All flows with a small number of consecutive nonzero values. */ for (i = 1; i <= 4; i++) { if (init_consecutive_values(i, flow, &idx)) { return true; } } /* All flows with a large number of consecutive nonzero values. */ for (i = FLOW_U32S - 4; i <= FLOW_U32S; i++) { if (init_consecutive_values(i, flow, &idx)) { return true; } } /* All flows with exactly two nonconsecutive nonzero values. */ if (choose((FLOW_U32S - 1) * (FLOW_U32S - 2) / 2, &idx)) { int ofs1; for (ofs1 = 0; ofs1 < FLOW_U32S - 2; ofs1++) { int ofs2; for (ofs2 = ofs1 + 2; ofs2 < FLOW_U32S; ofs2++) { if (choose(1, &idx)) { flow_u32[ofs1] = random_value(); flow_u32[ofs2] = random_value(); return true; } } } NOT_REACHED(); } /* 16 randomly chosen flows with N >= 3 nonzero values. */ if (choose(16 * (FLOW_U32S - 4), &idx)) { int n = idx / 16 + 3; int i; for (i = 0; i < n; i++) { flow_u32[i] = random_value(); } shuffle_u32s(flow_u32, FLOW_U32S); return true; } return false; } static void any_random_flow(struct flow *flow) { static unsigned int max; if (!max) { while (next_random_flow(flow, max)) { max++; } } next_random_flow(flow, random_range(max)); } static void toggle_masked_flow_bits(struct flow *flow, const struct flow_wildcards *mask) { const uint32_t *mask_u32 = (const uint32_t *) &mask->masks; uint32_t *flow_u32 = (uint32_t *) flow; int i; for (i = 0; i < FLOW_U32S; i++) { if (mask_u32[i] != 0) { uint32_t bit; do { bit = 1u << random_range(32); } while (!(bit & mask_u32[i])); flow_u32[i] ^= bit; } } } static void wildcard_extra_bits(struct flow_wildcards *mask) { uint32_t *mask_u32 = (uint32_t *) &mask->masks; int i; for (i = 0; i < FLOW_U32S; i++) { if (mask_u32[i] != 0) { uint32_t bit; do { bit = 1u << random_range(32); } while (!(bit & mask_u32[i])); mask_u32[i] &= ~bit; } } } static void test_miniflow(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { struct flow flow; unsigned int idx; random_set_seed(0xb3faca38); for (idx = 0; next_random_flow(&flow, idx); idx++) { const uint32_t *flow_u32 = (const uint32_t *) &flow; struct miniflow miniflow, miniflow2, miniflow3; struct flow flow2, flow3; struct flow_wildcards mask; struct minimask minimask; int i; /* Convert flow to miniflow. */ miniflow_init(&miniflow, &flow); /* Check that the flow equals its miniflow. */ assert(miniflow_get_vid(&miniflow) == vlan_tci_to_vid(flow.vlan_tci)); for (i = 0; i < FLOW_U32S; i++) { assert(miniflow_get(&miniflow, i) == flow_u32[i]); } /* Check that the miniflow equals itself. */ assert(miniflow_equal(&miniflow, &miniflow)); /* Convert miniflow back to flow and verify that it's the same. */ miniflow_expand(&miniflow, &flow2); assert(flow_equal(&flow, &flow2)); /* Check that copying a miniflow works properly. */ miniflow_clone(&miniflow2, &miniflow); assert(miniflow_equal(&miniflow, &miniflow2)); assert(miniflow_hash(&miniflow, 0) == miniflow_hash(&miniflow2, 0)); miniflow_expand(&miniflow2, &flow3); assert(flow_equal(&flow, &flow3)); /* Check that masked matches work as expected for identical flows and * miniflows. */ do { next_random_flow(&mask.masks, 1); } while (flow_wildcards_is_catchall(&mask)); minimask_init(&minimask, &mask); assert(minimask_is_catchall(&minimask) == flow_wildcards_is_catchall(&mask)); assert(miniflow_equal_in_minimask(&miniflow, &miniflow2, &minimask)); assert(miniflow_equal_flow_in_minimask(&miniflow, &flow2, &minimask)); assert(miniflow_hash_in_minimask(&miniflow, &minimask, 0x12345678) == flow_hash_in_minimask(&flow, &minimask, 0x12345678)); /* Check that masked matches work as expected for differing flows and * miniflows. */ toggle_masked_flow_bits(&flow2, &mask); assert(!miniflow_equal_flow_in_minimask(&miniflow, &flow2, &minimask)); miniflow_init(&miniflow3, &flow2); assert(!miniflow_equal_in_minimask(&miniflow, &miniflow3, &minimask)); /* Clean up. */ miniflow_destroy(&miniflow); miniflow_destroy(&miniflow2); miniflow_destroy(&miniflow3); minimask_destroy(&minimask); } } static void test_minimask_has_extra(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { struct flow_wildcards catchall; struct minimask minicatchall; struct flow flow; unsigned int idx; flow_wildcards_init_catchall(&catchall); minimask_init(&minicatchall, &catchall); assert(minimask_is_catchall(&minicatchall)); random_set_seed(0x2ec7905b); for (idx = 0; next_random_flow(&flow, idx); idx++) { struct flow_wildcards mask; struct minimask minimask; mask.masks = flow; minimask_init(&minimask, &mask); assert(!minimask_has_extra(&minimask, &minimask)); assert(minimask_has_extra(&minicatchall, &minimask) == !minimask_is_catchall(&minimask)); if (!minimask_is_catchall(&minimask)) { struct minimask minimask2; wildcard_extra_bits(&mask); minimask_init(&minimask2, &mask); assert(minimask_has_extra(&minimask2, &minimask)); assert(!minimask_has_extra(&minimask, &minimask2)); minimask_destroy(&minimask2); } minimask_destroy(&minimask); } minimask_destroy(&minicatchall); } static void test_minimask_combine(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { struct flow_wildcards catchall; struct minimask minicatchall; struct flow flow; unsigned int idx; flow_wildcards_init_catchall(&catchall); minimask_init(&minicatchall, &catchall); assert(minimask_is_catchall(&minicatchall)); random_set_seed(0x181bf0cd); for (idx = 0; next_random_flow(&flow, idx); idx++) { struct minimask minimask, minimask2, minicombined; struct flow_wildcards mask, mask2, combined, combined2; uint32_t storage[FLOW_U32S]; struct flow flow2; mask.masks = flow; minimask_init(&minimask, &mask); minimask_combine(&minicombined, &minimask, &minicatchall, storage); assert(minimask_is_catchall(&minicombined)); any_random_flow(&flow2); mask2.masks = flow2; minimask_init(&minimask2, &mask2); minimask_combine(&minicombined, &minimask, &minimask2, storage); flow_wildcards_and(&combined, &mask, &mask2); minimask_expand(&minicombined, &combined2); assert(flow_wildcards_equal(&combined, &combined2)); minimask_destroy(&minimask); minimask_destroy(&minimask2); } minimask_destroy(&minicatchall); } static const struct command commands[] = { /* Classifier tests. */ {"empty", 0, 0, test_empty}, {"destroy-null", 0, 0, test_destroy_null}, {"single-rule", 0, 0, test_single_rule}, {"rule-replacement", 0, 0, test_rule_replacement}, {"many-rules-in-one-list", 0, 0, test_many_rules_in_one_list}, {"many-rules-in-one-table", 0, 0, test_many_rules_in_one_table}, {"many-rules-in-two-tables", 0, 0, test_many_rules_in_two_tables}, {"many-rules-in-five-tables", 0, 0, test_many_rules_in_five_tables}, /* Miniflow and minimask tests. */ {"miniflow", 0, 0, test_miniflow}, {"minimask_has_extra", 0, 0, test_minimask_has_extra}, {"minimask_combine", 0, 0, test_minimask_combine}, {NULL, 0, 0, NULL}, }; int main(int argc, char *argv[]) { set_program_name(argv[0]); init_values(); run_command(argc - 1, argv + 1, commands); return 0; } openvswitch-2.0.1+git20140120/tests/test-csum.c000066400000000000000000000201101226605124000206760ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "csum.h" #include "crc32c.h" #include #include #include #include #include #include "random.h" #include "unaligned.h" #include "util.h" #undef NDEBUG #include struct test_case { char *data; size_t size; /* Test requires a multiple of 4. */ uint16_t csum; }; #define TEST_CASE(DATA, CSUM) { DATA, (sizeof DATA) - 1, CSUM } static const struct test_case test_cases[] = { /* RFC 1071 section 3. */ TEST_CASE("\x00\x01\xf2\x03" "\xf4\xf5\xf6\xf7", 0xffff - 0xddf2 /* ~0xddf2 */), /* http://www.sbprojects.com/projects/tcpip/theory/theory14.htm */ TEST_CASE("\x45\x00\x00\x28" "\x1F\xFD\x40\x00" "\x80\x06\x00\x00" "\xC0\xA8\x3B\x0A" "\xC0\xA8\x3B\x32", 0xe345), /* http://mathforum.org/library/drmath/view/54379.html */ TEST_CASE("\x86\x5e\xac\x60" "\x71\x2a\x81\xb5", 0xda60), }; static void mark(char c) { putchar(c); fflush(stdout); } #if 0 /* This code is useful for generating new test cases for RFC 1624 section 4. */ static void generate_rfc1624_test_case(void) { int i; for (i = 0; i < 10000000; i++) { uint32_t data[8]; int j; for (j = 0; j < 8; j++) { data[j] = random_uint32(); } data[7] &= 0x0000ffff; data[7] |= 0x55550000; if (ntohs(~csum(data, sizeof data - 2)) == 0xcd7a) { ovs_hex_dump(stdout, data, sizeof data, 0, false); exit(0); } } } #endif /* Make sure we get the calculation in RFC 1624 section 4 correct. */ static void test_rfc1624(void) { /* "...an IP packet header in which a 16-bit field m = 0x5555..." */ uint8_t data[32] = { 0xfe, 0x8f, 0xc1, 0x14, 0x4b, 0x6f, 0x70, 0x2a, 0x80, 0x29, 0x78, 0xc0, 0x58, 0x81, 0x77, 0xaa, 0x66, 0x64, 0xfc, 0x96, 0x63, 0x97, 0x64, 0xee, 0x12, 0x53, 0x1d, 0xa9, 0x2d, 0xa9, 0x55, 0x55 }; /* "...the one's complement sum of all other header octets is 0xCD7A." */ assert(ntohs(csum(data, sizeof data - 2)) == 0xffff - 0xcd7a); /* "...the header checksum would be: HC = ~(0xCD7A + 0x5555) = ~0x22D0 = 0xDD2F" */ assert(ntohs(csum(data, sizeof data)) == 0xdd2f); /* "a 16-bit field m = 0x5555 changes to m' = 0x3285..." */ data[30] = 0x32; data[31] = 0x85; /* "The new checksum via recomputation is: HC' = ~(0xCD7A + 0x3285) = ~0xFFFF = 0x0000" */ assert(ntohs(csum(data, sizeof data)) == 0x0000); /* "Applying [Eqn. 3] to the example above, we get the correct result: HC' = ~(C + (-m) + m') = ~(0x22D0 + ~0x5555 + 0x3285) = ~0xFFFF = 0x0000" */ assert(recalc_csum16(htons(0xdd2f), htons(0x5555), htons(0x3285)) == htons(0x0000)); mark('#'); } /* CRC32C checksum tests, based on Intel IPPs, Chapter 13, * ippsCRC32C_8u() example, found at the following location: * http://software.intel.com/sites/products/documentation/hpc/ipp/ipps/ */ static void test_crc32c(void) { int i; uint8_t data[48] = { 0x01, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x18, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; /* iSCSI Read PDU */ assert(ntohl(crc32c(data, 48)) == 0x563a96d9L); /* 32 bytes of all zeroes */ for (i = 0; i < 32; i++) data[i] = 0x00; assert(ntohl(crc32c(data, 32)) == 0xaa36918aL); /* 32 bytes of all ones */ for (i = 0; i < 32; i++) data[i] = 0xff; assert(ntohl(crc32c(data, 32)) == 0x43aba862L); /* 32 bytes of incrementing 00..1f */ for (i = 0; i < 32; i++) data[i] = i; assert(ntohl(crc32c(data, 32)) == 0x4e79dd46L); /* 32 bytes of decrementing 1f..00 */ for (i = 0; i < 32; i++) data[i] = 31 - i; assert(ntohl(crc32c(data, 32)) == 0x5cdb3f11L); mark('#'); } int main(void) { const struct test_case *tc; int i; for (tc = test_cases; tc < &test_cases[ARRAY_SIZE(test_cases)]; tc++) { const void *data = tc->data; const ovs_be16 *data16 = (OVS_FORCE const ovs_be16 *) data; const ovs_be32 *data32 = (OVS_FORCE const ovs_be32 *) data; uint32_t partial; /* Test csum(). */ assert(ntohs(csum(tc->data, tc->size)) == tc->csum); mark('.'); /* Test csum_add16(). */ partial = 0; for (i = 0; i < tc->size / 2; i++) { partial = csum_add16(partial, get_unaligned_be16(&data16[i])); } assert(ntohs(csum_finish(partial)) == tc->csum); mark('.'); /* Test csum_add32(). */ partial = 0; for (i = 0; i < tc->size / 4; i++) { partial = csum_add32(partial, get_unaligned_be32(&data32[i])); } assert(ntohs(csum_finish(partial)) == tc->csum); mark('.'); /* Test alternating csum_add16() and csum_add32(). */ partial = 0; for (i = 0; i < tc->size / 4; i++) { if (i % 2) { partial = csum_add32(partial, get_unaligned_be32(&data32[i])); } else { ovs_be16 u0 = get_unaligned_be16(&data16[i * 2]); ovs_be16 u1 = get_unaligned_be16(&data16[i * 2 + 1]); partial = csum_add16(partial, u0); partial = csum_add16(partial, u1); } } assert(ntohs(csum_finish(partial)) == tc->csum); mark('.'); /* Test csum_continue(). */ partial = 0; for (i = 0; i < tc->size / 4; i++) { if (i) { partial = csum_continue(partial, &data32[i], 4); } else { partial = csum_continue(partial, &data16[i * 2], 2); partial = csum_continue(partial, &data16[i * 2 + 1], 2); } } assert(ntohs(csum_finish(partial)) == tc->csum); mark('#'); } test_rfc1624(); test_crc32c(); /* Test recalc_csum16(). */ for (i = 0; i < 32; i++) { ovs_be16 old_u16, new_u16; ovs_be16 old_csum; ovs_be16 data[16]; int j, index; for (j = 0; j < ARRAY_SIZE(data); j++) { data[j] = (OVS_FORCE ovs_be16) random_uint32(); } old_csum = csum(data, sizeof data); index = random_range(ARRAY_SIZE(data)); old_u16 = data[index]; new_u16 = data[index] = (OVS_FORCE ovs_be16) random_uint32(); assert(csum(data, sizeof data) == recalc_csum16(old_csum, old_u16, new_u16)); mark('.'); } mark('#'); /* Test recalc_csum32(). */ for (i = 0; i < 32; i++) { ovs_be32 old_u32, new_u32; ovs_be16 old_csum; ovs_be32 data[16]; int j, index; for (j = 0; j < ARRAY_SIZE(data); j++) { data[j] = (OVS_FORCE ovs_be32) random_uint32(); } old_csum = csum(data, sizeof data); index = random_range(ARRAY_SIZE(data)); old_u32 = data[index]; new_u32 = data[index] = (OVS_FORCE ovs_be32) random_uint32(); assert(csum(data, sizeof data) == recalc_csum32(old_csum, old_u32, new_u32)); mark('.'); } mark('#'); putchar('\n'); return 0; } openvswitch-2.0.1+git20140120/tests/test-daemon.py000066400000000000000000000031641226605124000214120ustar00rootroot00000000000000# Copyright (c) 2010, 2011 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse import logging import signal import sys import time import ovs.daemon import ovs.util def handler(signum, _): raise Exception("Signal handler called with %d" % signum) def main(): signal.signal(signal.SIGHUP, handler) parser = argparse.ArgumentParser( description="Open vSwitch daemonization test program for Python.") parser.add_argument("-b", "--bail", action="store_true", help="Exit with an error after daemonize_start().") ovs.daemon.add_args(parser) args = parser.parse_args() ovs.daemon.handle_args(args) ovs.daemon.daemonize_start() if args.bail: sys.stderr.write("%s: exiting after daemonize_start() as requested\n" % ovs.util.PROGRAM_NAME) sys.exit(1) ovs.daemon.daemonize_complete() while True: time.sleep(1) if __name__ == '__main__': try: main() except SystemExit: # Let system.exit() calls complete normally raise except: sys.exit(ovs.daemon.RESTART_EXIT_CODE) openvswitch-2.0.1+git20140120/tests/test-file_name.c000066400000000000000000000016601226605124000216570ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "util.h" #include int main(int argc, char *argv[]) { int i; for (i = 1; i < argc; i++) { char *dir, *base; dir = dir_name(argv[i]); puts(dir); free(dir); base = base_name(argv[i]); puts(base); free(base); } return 0; } openvswitch-2.0.1+git20140120/tests/test-flows.c000066400000000000000000000060361226605124000210740ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "flow.h" #include #include #include #include "classifier.h" #include "openflow/openflow.h" #include "timeval.h" #include "ofpbuf.h" #include "ofp-print.h" #include "ofp-util.h" #include "pcap-file.h" #include "util.h" #include "vlog.h" #undef NDEBUG #include int main(int argc OVS_UNUSED, char *argv[]) { struct ofp10_match expected_match; FILE *flows, *pcap; int retval; int n = 0, errors = 0; set_program_name(argv[0]); flows = stdin; pcap = fdopen(3, "rb"); if (!pcap) { ovs_fatal(errno, "failed to open fd 3 for reading"); } retval = pcap_read_header(pcap); if (retval) { ovs_fatal(retval > 0 ? retval : 0, "reading pcap header failed"); } while (fread(&expected_match, sizeof expected_match, 1, flows)) { struct ofpbuf *packet; struct ofp10_match extracted_match; struct match match; struct flow flow; union flow_in_port in_port_; n++; retval = pcap_read(pcap, &packet); if (retval == EOF) { ovs_fatal(0, "unexpected end of file reading pcap file"); } else if (retval) { ovs_fatal(retval, "error reading pcap file"); } in_port_.ofp_port = u16_to_ofp(1); flow_extract(packet, 0, 0, NULL, &in_port_, &flow); match_init_exact(&match, &flow); ofputil_match_to_ofp10_match(&match, &extracted_match); if (memcmp(&expected_match, &extracted_match, sizeof expected_match)) { char *exp_s = ofp10_match_to_string(&expected_match, 2); char *got_s = ofp10_match_to_string(&extracted_match, 2); errors++; printf("mismatch on packet #%d (1-based).\n", n); printf("Packet:\n"); ofp_print_packet(stdout, packet->data, packet->size); ovs_hex_dump(stdout, packet->data, packet->size, 0, true); match_print(&match); printf("Expected flow:\n%s\n", exp_s); printf("Actually extracted flow:\n%s\n", got_s); ovs_hex_dump(stdout, &expected_match, sizeof expected_match, 0, false); ovs_hex_dump(stdout, &extracted_match, sizeof extracted_match, 0, false); printf("\n"); free(exp_s); free(got_s); } ofpbuf_delete(packet); } printf("checked %d packets, %d errors\n", n, errors); return errors != 0; } openvswitch-2.0.1+git20140120/tests/test-hash.c000066400000000000000000000131421226605124000206610ustar00rootroot00000000000000/* * Copyright (c) 2009, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include "hash.h" #include "jhash.h" #undef NDEBUG #include static void set_bit(uint32_t array[3], int bit) { assert(bit >= 0 && bit <= 96); memset(array, 0, sizeof(uint32_t) * 3); if (bit < 96) { array[bit / 32] = UINT32_C(1) << (bit % 32); } } static uint32_t hash_words_cb(uint32_t input) { return hash_words(&input, 1, 0); } static uint32_t jhash_words_cb(uint32_t input) { return jhash_words(&input, 1, 0); } static uint32_t hash_int_cb(uint32_t input) { return hash_int(input, 0); } static void check_word_hash(uint32_t (*hash)(uint32_t), const char *name, int min_unique) { int i, j; for (i = 0; i <= 32; i++) { uint32_t in1 = i < 32 ? UINT32_C(1) << i : 0; for (j = i + 1; j <= 32; j++) { uint32_t in2 = j < 32 ? UINT32_C(1) << j : 0; uint32_t out1 = hash(in1); uint32_t out2 = hash(in2); const uint32_t unique_mask = (UINT32_C(1) << min_unique) - 1; int ofs; for (ofs = 0; ofs < 32 - min_unique; ofs++) { uint32_t bits1 = (out1 >> ofs) & unique_mask; uint32_t bits2 = (out2 >> ofs) & unique_mask; if (bits1 == bits2) { printf("Partial collision for '%s':\n", name); printf("%s(%08"PRIx32") = %08"PRIx32"\n", name, in1, out1); printf("%s(%08"PRIx32") = %08"PRIx32"\n", name, in2, out2); printf("%d bits of output starting at bit %d " "are both 0x%"PRIx32"\n", min_unique, ofs, bits1); exit(1); } } } } } static void check_3word_hash(uint32_t (*hash)(const uint32_t[], size_t, uint32_t), const char *name) { int i, j; for (i = 0; i <= 96; i++) { for (j = i + 1; j <= 96; j++) { uint32_t in1[3], in2[3]; uint32_t out1, out2; const int min_unique = 12; const uint32_t unique_mask = (UINT32_C(1) << min_unique) - 1; set_bit(in1, i); set_bit(in2, j); out1 = hash(in1, 3, 0); out2 = hash(in2, 3, 0); if ((out1 & unique_mask) == (out2 & unique_mask)) { printf("%s has a partial collision:\n", name); printf("hash(1 << %d) == %08"PRIx32"\n", i, out1); printf("hash(1 << %d) == %08"PRIx32"\n", j, out2); printf("The low-order %d bits of output are both " "0x%"PRIx32"\n", min_unique, out1 & unique_mask); } } } } int main(void) { /* Check that all hashes computed with hash_words with one 1-bit (or no * 1-bits) set within a single 32-bit word have different values in all * 11-bit consecutive runs. * * Given a random distribution, the probability of at least one collision * in any set of 11 bits is approximately * * 1 - ((2**11 - 1)/2**11)**C(33,2) * == 1 - (2047/2048)**528 * =~ 0.22 * * There are 21 ways to pick 11 consecutive bits in a 32-bit word, so if we * assumed independence then the chance of having no collisions in any of * those 11-bit runs would be (1-0.22)**21 =~ .0044. Obviously * independence must be a bad assumption :-) */ check_word_hash(hash_words_cb, "hash_words", 11); check_word_hash(jhash_words_cb, "jhash_words", 11); /* Check that all hash functions of with one 1-bit (or no 1-bits) set * within three 32-bit words have different values in their lowest 12 * bits. * * Given a random distribution, the probability of at least one collision * in 12 bits is approximately * * 1 - ((2**12 - 1)/2**12)**C(97,2) * == 1 - (4095/4096)**4656 * =~ 0.68 * * so we are doing pretty well to not have any collisions in 12 bits. */ check_3word_hash(hash_words, "hash_words"); check_3word_hash(jhash_words, "jhash_words"); /* Check that all hashes computed with hash_int with one 1-bit (or no * 1-bits) set within a single 32-bit word have different values in all * 12-bit consecutive runs. * * Given a random distribution, the probability of at least one collision * in any set of 12 bits is approximately * * 1 - ((2**12 - 1)/2**12)**C(33,2) * == 1 - (4,095/4,096)**528 * =~ 0.12 * * There are 20 ways to pick 12 consecutive bits in a 32-bit word, so if we * assumed independence then the chance of having no collisions in any of * those 12-bit runs would be (1-0.12)**20 =~ 0.078. This refutes our * assumption of independence, which makes it seem like a good hash * function. */ check_word_hash(hash_int_cb, "hash_int", 12); return 0; } openvswitch-2.0.1+git20140120/tests/test-heap.c000066400000000000000000000277401226605124000206640ustar00rootroot00000000000000/* * Copyright (c) 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* A test for for functions and macros declared in heap.h. */ #include #include "heap.h" #include #include #include #include "command-line.h" #include "random.h" #include "util.h" #undef NDEBUG #include /* Sample heap element. */ struct element { uint32_t full_pri; struct heap_node heap_node; }; static struct element * element_from_heap_node(const struct heap_node *node) { return CONTAINER_OF(node, struct element, heap_node); } static int compare_uint32s(const void *a_, const void *b_) { const uint32_t *a = a_; const uint32_t *b = b_; return *a < *b ? -1 : *a > *b; } /* Verifies that 'heap' is internally consistent and contains all 'n' of the * 'priorities'. */ static void check_heap(const struct heap *heap, const uint32_t priorities[], size_t n) { uint32_t *priorities_copy; uint32_t *elements_copy; struct element *element; size_t i; assert(heap_count(heap) == n); assert(heap_is_empty(heap) == !n); if (n > 0) { assert(heap_max(heap) == heap->array[1]); } /* Check indexes. */ for (i = 1; i <= n; i++) { assert(heap->array[i]->idx == i); } /* Check that priority values are internally consistent. */ for (i = 1; i <= n; i++) { element = element_from_heap_node(heap->array[i]); assert(element->heap_node.priority == (element->full_pri >> 16)); } /* Check the heap property. */ for (i = 1; i <= n; i++) { size_t parent = heap_parent__(i); size_t left = heap_left__(i); size_t right = heap_right__(i); if (parent >= 1) { assert(heap->array[parent]->priority >= heap->array[i]->priority); } if (left <= n) { assert(heap->array[left]->priority <= heap->array[i]->priority); } if (right <= n) { assert(heap->array[right]->priority <= heap->array[i]->priority); } } /* Check that HEAP_FOR_EACH iterates all the nodes in order. */ i = 0; HEAP_FOR_EACH (element, heap_node, heap) { assert(i < n); assert(&element->heap_node == heap->array[i + 1]); i++; } assert(i == n); priorities_copy = xmemdup(priorities, n * sizeof *priorities); elements_copy = xmalloc(n * sizeof *priorities); i = 0; HEAP_FOR_EACH (element, heap_node, heap) { elements_copy[i++] = element->heap_node.priority; } qsort(priorities_copy, n, sizeof *priorities_copy, compare_uint32s); qsort(elements_copy, n, sizeof *elements_copy, compare_uint32s); for (i = 0; i < n; i++) { assert((priorities_copy[i] >> 16) == elements_copy[i]); } free(priorities_copy); free(elements_copy); } static void shuffle(uint32_t *p, size_t n) { for (; n > 1; n--, p++) { uint32_t *q = &p[random_range(n)]; uint32_t tmp = *p; *p = *q; *q = tmp; } } /* Prints the values in 'heap', plus 'name' as a title. */ static void OVS_UNUSED print_heap(const char *name, struct heap *heap) { struct element *e; printf("%s:", name); HEAP_FOR_EACH (e, heap_node, heap) { printf(" %"PRIu32":%"PRIu32, e->full_pri >> 16, e->full_pri & 0xffff); } printf("\n"); } static int factorial(int n_items) { int n, i; n = 1; for (i = 2; i <= n_items; i++) { n *= i; } return n; } static void swap(uint32_t *a, uint32_t *b) { uint32_t tmp = *a; *a = *b; *b = tmp; } static void reverse(uint32_t *a, int n) { int i; for (i = 0; i < n / 2; i++) { int j = n - (i + 1); swap(&a[i], &a[j]); } } static bool next_permutation(uint32_t *a, int n) { int k; for (k = n - 2; k >= 0; k--) { if ((a[k] >> 16) < (a[k + 1] >> 16)) { int l; for (l = n - 1; ; l--) { if ((a[l] >> 16) > (a[k] >> 16)) { swap(&a[k], &a[l]); reverse(a + (k + 1), n - (k + 1)); return true; } } } } return false; } static void test_insert_delete__(struct element *elements, const uint32_t *insert, const uint32_t *delete, size_t n) { struct heap heap; size_t i; heap_init(&heap); check_heap(&heap, NULL, 0); for (i = 0; i < n; i++) { uint32_t priority = insert[i]; elements[i].full_pri = priority; heap_insert(&heap, &elements[i].heap_node, priority >> 16); check_heap(&heap, insert, i + 1); } for (i = 0; i < n; i++) { struct element *element; HEAP_FOR_EACH (element, heap_node, &heap) { if (element->full_pri == delete[i]) { goto found; } } NOT_REACHED(); found: heap_remove(&heap, &element->heap_node); check_heap(&heap, delete + i + 1, n - (i + 1)); } heap_destroy(&heap); } static void test_insert_delete_raw__(struct element *elements, const uint32_t *insert, unsigned int insert_pattern, const uint32_t *delete, unsigned int delete_pattern, size_t n) { struct heap heap; size_t i; heap_init(&heap); check_heap(&heap, NULL, 0); for (i = 0; i < n; i++) { uint32_t priority = insert[i]; elements[i].full_pri = priority; heap_raw_insert(&heap, &elements[i].heap_node, priority >> 16); if (insert_pattern & (1u << i)) { heap_rebuild(&heap); check_heap(&heap, insert, i + 1); } } for (i = 0; i < n; i++) { struct element *element; HEAP_FOR_EACH (element, heap_node, &heap) { if (element->full_pri == delete[i]) { goto found; } } NOT_REACHED(); found: heap_raw_remove(&heap, &element->heap_node); if (delete_pattern & (1u << i)) { heap_rebuild(&heap); check_heap(&heap, delete + i + 1, n - (i + 1)); } } heap_destroy(&heap); } static void test_heap_insert_delete_same_order(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { enum { N_ELEMS = 7 }; uint32_t insert[N_ELEMS]; int n_permutations; size_t i; for (i = 0; i < N_ELEMS; i++) { insert[i] = i << 16; } n_permutations = 0; do { struct element elements[N_ELEMS]; n_permutations++; test_insert_delete__(elements, insert, insert, N_ELEMS); } while (next_permutation(insert, N_ELEMS)); assert(n_permutations == factorial(N_ELEMS)); } static void test_heap_insert_delete_reverse_order(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { enum { N_ELEMS = 7 }; uint32_t insert[N_ELEMS]; int n_permutations; size_t i; for (i = 0; i < N_ELEMS; i++) { insert[i] = i << 16; } n_permutations = 0; do { struct element elements[N_ELEMS]; uint32_t delete[N_ELEMS]; n_permutations++; for (i = 0; i < N_ELEMS; i++) { delete[N_ELEMS - i - 1] = insert[i]; } test_insert_delete__(elements, insert, delete, N_ELEMS); } while (next_permutation(insert, N_ELEMS)); assert(n_permutations == factorial(N_ELEMS)); } static void test_heap_insert_delete_every_order(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { enum { N_ELEMS = 5 }; uint32_t insert[N_ELEMS]; int outer_permutations; size_t i; for (i = 0; i < N_ELEMS; i++) { insert[i] = i << 16; } outer_permutations = 0; do { struct element elements[N_ELEMS]; uint32_t delete[N_ELEMS]; int inner_permutations; outer_permutations++; for (i = 0; i < N_ELEMS; i++) { delete[i] = i << 16; } inner_permutations = 0; do { inner_permutations++; test_insert_delete__(elements, insert, delete, N_ELEMS); } while (next_permutation(delete, N_ELEMS)); assert(inner_permutations == factorial(N_ELEMS)); } while (next_permutation(insert, N_ELEMS)); assert(outer_permutations == factorial(N_ELEMS)); } static void test_heap_insert_delete_same_order_with_dups(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { enum { N_ELEMS = 7 }; unsigned int pattern; size_t i; for (pattern = 0; pattern < (1u << N_ELEMS); pattern += 2) { int n_permutations, expected_permutations; uint32_t insert[N_ELEMS]; int j; j = 0; for (i = 0; i < N_ELEMS; i++) { if (i && !(pattern & (1u << i))) { j++; } insert[i] = (j << 16) | i; } expected_permutations = factorial(N_ELEMS); for (i = 0; i < N_ELEMS; ) { j = i + 1; if (pattern & (1u << i)) { for (; j < N_ELEMS; j++) { if (!(pattern & (1u << j))) { break; } } expected_permutations /= factorial(j - i + 1); } i = j; } n_permutations = 0; do { struct element elements[N_ELEMS]; n_permutations++; test_insert_delete__(elements, insert, insert, N_ELEMS); } while (next_permutation(insert, N_ELEMS)); assert(n_permutations == expected_permutations); } } static void test_heap_raw_insert(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { enum { N_ELEMS = 7 }; uint32_t insert[N_ELEMS]; int n_permutations; size_t i; for (i = 0; i < N_ELEMS; i++) { insert[i] = i << 16; } n_permutations = 0; do { struct element elements[N_ELEMS]; n_permutations++; test_insert_delete_raw__(elements, insert, 1u << (N_ELEMS - 1), insert, UINT_MAX, N_ELEMS); } while (next_permutation(insert, N_ELEMS)); assert(n_permutations == factorial(N_ELEMS)); } static void test_heap_raw_delete(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { enum { N_ELEMS = 16 }; uint32_t insert[N_ELEMS]; uint32_t delete[N_ELEMS]; size_t i; for (i = 0; i < N_ELEMS; i++) { insert[i] = i << 16; delete[i] = i << 16; } for (i = 0; i < 1000; i++) { struct element elements[N_ELEMS]; shuffle(insert, N_ELEMS); shuffle(delete, N_ELEMS); test_insert_delete_raw__(elements, insert, 0, delete, (1u << (N_ELEMS - 1)) | (1u << (N_ELEMS / 2)), N_ELEMS); } } static const struct command commands[] = { { "insert-delete-same-order", 0, 0, test_heap_insert_delete_same_order, }, { "insert-delete-reverse-order", 0, 0, test_heap_insert_delete_reverse_order, }, { "insert-delete-every-order", 0, 0, test_heap_insert_delete_every_order, }, { "insert-delete-same-order-with-dups", 0, 0, test_heap_insert_delete_same_order_with_dups, }, { "raw-insert", 0, 0, test_heap_raw_insert, }, { "raw-delete", 0, 0, test_heap_raw_delete, }, }; int main(int argc, char *argv[]) { set_program_name(argv[0]); run_command(argc - 1, argv + 1, commands); return 0; } openvswitch-2.0.1+git20140120/tests/test-hindex.c000066400000000000000000000173151226605124000212230ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* A non-exhaustive test for some of the functions and macros declared in * hindex.h. */ #include #include "hindex.h" #include #include "hash.h" #include "random.h" #include "util.h" #undef NDEBUG #include /* Sample hindex element. */ struct element { int value; struct hindex_node node; }; typedef size_t hash_func(int value); static int compare_ints(const void *a_, const void *b_) { const int *a = a_; const int *b = b_; return *a < *b ? -1 : *a > *b; } /* Verifies that 'hindex' contains exactly the 'n' values in 'values'. */ static void check_hindex(struct hindex *hindex, const int values[], size_t n, hash_func *hash) { int *sort_values, *hindex_values; struct element *e; size_t i; /* Check that all the values are there in iteration. */ sort_values = xmalloc(sizeof *sort_values * n); hindex_values = xmalloc(sizeof *sort_values * n); i = 0; HINDEX_FOR_EACH (e, node, hindex) { assert(i < n); hindex_values[i++] = e->value; } assert(i == n); memcpy(sort_values, values, sizeof *sort_values * n); qsort(sort_values, n, sizeof *sort_values, compare_ints); qsort(hindex_values, n, sizeof *hindex_values, compare_ints); for (i = 0; i < n; i++) { assert(sort_values[i] == hindex_values[i]); } free(hindex_values); free(sort_values); /* Check that all the values are there in lookup. */ for (i = 0; i < n; i++) { size_t count = 0; HINDEX_FOR_EACH_WITH_HASH (e, node, hash(values[i]), hindex) { count += e->value == values[i]; } assert(count == 1); } /* Check counters. */ assert(hindex_is_empty(hindex) == !n); assert(hindex->n_unique <= n); } /* Puts the 'n' values in 'values' into 'elements', and then puts those * elements into 'hindex'. */ static void make_hindex(struct hindex *hindex, struct element elements[], int values[], size_t n, hash_func *hash) { size_t i; hindex_init(hindex); for (i = 0; i < n; i++) { elements[i].value = i; hindex_insert(hindex, &elements[i].node, hash(elements[i].value)); values[i] = i; } } static void shuffle(int *p, size_t n) { for (; n > 1; n--, p++) { int *q = &p[random_range(n)]; int tmp = *p; *p = *q; *q = tmp; } } /* Prints the 'n' values in 'values', plus 'name' as a title. */ static void OVS_UNUSED print_ints(const char *name, const int *values, size_t n) { size_t i; printf("%s:", name); for (i = 0; i < n; i++) { printf(" %d", values[i]); } printf("\n"); } /* Prints the values in 'hindex', plus 'name' as a title. */ static void OVS_UNUSED print_hindex(const char *name, struct hindex *hindex) { struct element *e; printf("%s:", name); HINDEX_FOR_EACH (e, node, hindex) { printf(" %d(%zu)", e->value, e->node.hash & hindex->mask); } printf("\n"); } static size_t unique_hash(int value) { return value; } static size_t good_hash(int value) { return hash_int(value, 0x1234abcd); } static size_t constant_hash(int value OVS_UNUSED) { return 123; } static size_t mod4_hash(int value) { return value % 4; } static size_t mod3_hash(int value) { return value % 3; } static size_t mod2_hash(int value) { return value % 2; } static size_t multipart_hash(int value) { return (mod4_hash(value) << 16) | (constant_hash(value) & 0xFFFF); } /* Tests basic hindex insertion and deletion. */ static void test_hindex_insert_delete(hash_func *hash) { enum { N_ELEMS = 100 }; struct element elements[N_ELEMS]; int values[N_ELEMS]; struct hindex hindex; size_t i; hindex_init(&hindex); for (i = 0; i < N_ELEMS; i++) { elements[i].value = i; hindex_insert(&hindex, &elements[i].node, hash(i)); values[i] = i; check_hindex(&hindex, values, i + 1, hash); } shuffle(values, N_ELEMS); for (i = 0; i < N_ELEMS; i++) { hindex_remove(&hindex, &elements[values[i]].node); check_hindex(&hindex, values + (i + 1), N_ELEMS - (i + 1), hash); } hindex_destroy(&hindex); } /* Tests basic hindex_reserve() and hindex_shrink(). */ static void test_hindex_reserve_shrink(hash_func *hash) { enum { N_ELEMS = 32 }; size_t i; for (i = 0; i < N_ELEMS; i++) { struct element elements[N_ELEMS]; int values[N_ELEMS]; struct hindex hindex; size_t j; hindex_init(&hindex); hindex_reserve(&hindex, i); for (j = 0; j < N_ELEMS; j++) { elements[j].value = j; hindex_insert(&hindex, &elements[j].node, hash(j)); values[j] = j; check_hindex(&hindex, values, j + 1, hash); } shuffle(values, N_ELEMS); for (j = 0; j < N_ELEMS; j++) { hindex_remove(&hindex, &elements[values[j]].node); hindex_shrink(&hindex); check_hindex(&hindex, values + (j + 1), N_ELEMS - (j + 1), hash); } hindex_destroy(&hindex); } } /* Tests that HINDEX_FOR_EACH_SAFE properly allows for deletion of the current * element of a hindex. */ static void test_hindex_for_each_safe(hash_func *hash) { enum { MAX_ELEMS = 10 }; size_t n; unsigned long int pattern; for (n = 0; n <= MAX_ELEMS; n++) { for (pattern = 0; pattern < 1ul << n; pattern++) { struct element elements[MAX_ELEMS]; int values[MAX_ELEMS]; struct hindex hindex; struct element *e, *next; size_t n_remaining; int i; make_hindex(&hindex, elements, values, n, hash); i = 0; n_remaining = n; HINDEX_FOR_EACH_SAFE (e, next, node, &hindex) { assert(i < n); if (pattern & (1ul << e->value)) { size_t j; hindex_remove(&hindex, &e->node); for (j = 0; ; j++) { assert(j < n_remaining); if (values[j] == e->value) { values[j] = values[--n_remaining]; break; } } } check_hindex(&hindex, values, n_remaining, hash); i++; } assert(i == n); for (i = 0; i < n; i++) { if (pattern & (1ul << i)) { n_remaining++; } } assert(n == n_remaining); hindex_destroy(&hindex); } } } static void run_test(void (*function)(hash_func *)) { hash_func *hash_funcs[] = { unique_hash, good_hash, constant_hash, mod4_hash, mod3_hash, mod2_hash, multipart_hash, }; size_t i; for (i = 0; i < ARRAY_SIZE(hash_funcs); i++) { function(hash_funcs[i]); printf("."); fflush(stdout); } } int main(void) { run_test(test_hindex_insert_delete); run_test(test_hindex_for_each_safe); run_test(test_hindex_reserve_shrink); printf("\n"); return 0; } openvswitch-2.0.1+git20140120/tests/test-hmap.c000066400000000000000000000161661226605124000206740ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* A non-exhaustive test for some of the functions and macros declared in * hmap.h. */ #include #include "hmap.h" #include #include "hash.h" #include "random.h" #include "util.h" #undef NDEBUG #include /* Sample hmap element. */ struct element { int value; struct hmap_node node; }; typedef size_t hash_func(int value); static int compare_ints(const void *a_, const void *b_) { const int *a = a_; const int *b = b_; return *a < *b ? -1 : *a > *b; } /* Verifies that 'hmap' contains exactly the 'n' values in 'values'. */ static void check_hmap(struct hmap *hmap, const int values[], size_t n, hash_func *hash) { int *sort_values, *hmap_values; struct element *e; size_t i; /* Check that all the values are there in iteration. */ sort_values = xmalloc(sizeof *sort_values * n); hmap_values = xmalloc(sizeof *sort_values * n); i = 0; HMAP_FOR_EACH (e, node, hmap) { assert(i < n); hmap_values[i++] = e->value; } assert(i == n); memcpy(sort_values, values, sizeof *sort_values * n); qsort(sort_values, n, sizeof *sort_values, compare_ints); qsort(hmap_values, n, sizeof *hmap_values, compare_ints); for (i = 0; i < n; i++) { assert(sort_values[i] == hmap_values[i]); } free(hmap_values); free(sort_values); /* Check that all the values are there in lookup. */ for (i = 0; i < n; i++) { size_t count = 0; HMAP_FOR_EACH_WITH_HASH (e, node, hash(values[i]), hmap) { count += e->value == values[i]; } assert(count == 1); } /* Check counters. */ assert(hmap_is_empty(hmap) == !n); assert(hmap_count(hmap) == n); } /* Puts the 'n' values in 'values' into 'elements', and then puts those * elements into 'hmap'. */ static void make_hmap(struct hmap *hmap, struct element elements[], int values[], size_t n, hash_func *hash) { size_t i; hmap_init(hmap); for (i = 0; i < n; i++) { elements[i].value = i; hmap_insert(hmap, &elements[i].node, hash(elements[i].value)); values[i] = i; } } static void shuffle(int *p, size_t n) { for (; n > 1; n--, p++) { int *q = &p[random_range(n)]; int tmp = *p; *p = *q; *q = tmp; } } #if 0 /* Prints the values in 'hmap', plus 'name' as a title. */ static void print_hmap(const char *name, struct hmap *hmap) { struct element *e; printf("%s:", name); HMAP_FOR_EACH (e, node, hmap) { printf(" %d(%zu)", e->value, e->node.hash & hmap->mask); } printf("\n"); } /* Prints the 'n' values in 'values', plus 'name' as a title. */ static void print_ints(const char *name, const int *values, size_t n) { size_t i; printf("%s:", name); for (i = 0; i < n; i++) { printf(" %d", values[i]); } printf("\n"); } #endif static size_t identity_hash(int value) { return value; } static size_t good_hash(int value) { return hash_int(value, 0x1234abcd); } static size_t constant_hash(int value OVS_UNUSED) { return 123; } /* Tests basic hmap insertion and deletion. */ static void test_hmap_insert_delete(hash_func *hash) { enum { N_ELEMS = 100 }; struct element elements[N_ELEMS]; int values[N_ELEMS]; struct hmap hmap; size_t i; hmap_init(&hmap); for (i = 0; i < N_ELEMS; i++) { elements[i].value = i; hmap_insert(&hmap, &elements[i].node, hash(i)); values[i] = i; check_hmap(&hmap, values, i + 1, hash); } shuffle(values, N_ELEMS); for (i = 0; i < N_ELEMS; i++) { hmap_remove(&hmap, &elements[values[i]].node); check_hmap(&hmap, values + (i + 1), N_ELEMS - (i + 1), hash); } hmap_destroy(&hmap); } /* Tests basic hmap_reserve() and hmap_shrink(). */ static void test_hmap_reserve_shrink(hash_func *hash) { enum { N_ELEMS = 32 }; size_t i; for (i = 0; i < N_ELEMS; i++) { struct element elements[N_ELEMS]; int values[N_ELEMS]; struct hmap hmap; size_t j; hmap_init(&hmap); hmap_reserve(&hmap, i); for (j = 0; j < N_ELEMS; j++) { elements[j].value = j; hmap_insert(&hmap, &elements[j].node, hash(j)); values[j] = j; check_hmap(&hmap, values, j + 1, hash); } shuffle(values, N_ELEMS); for (j = 0; j < N_ELEMS; j++) { hmap_remove(&hmap, &elements[values[j]].node); hmap_shrink(&hmap); check_hmap(&hmap, values + (j + 1), N_ELEMS - (j + 1), hash); } hmap_destroy(&hmap); } } /* Tests that HMAP_FOR_EACH_SAFE properly allows for deletion of the current * element of a hmap. */ static void test_hmap_for_each_safe(hash_func *hash) { enum { MAX_ELEMS = 10 }; size_t n; unsigned long int pattern; for (n = 0; n <= MAX_ELEMS; n++) { for (pattern = 0; pattern < 1ul << n; pattern++) { struct element elements[MAX_ELEMS]; int values[MAX_ELEMS]; struct hmap hmap; struct element *e, *next; size_t n_remaining; int i; make_hmap(&hmap, elements, values, n, hash); i = 0; n_remaining = n; HMAP_FOR_EACH_SAFE (e, next, node, &hmap) { assert(i < n); if (pattern & (1ul << e->value)) { size_t j; hmap_remove(&hmap, &e->node); for (j = 0; ; j++) { assert(j < n_remaining); if (values[j] == e->value) { values[j] = values[--n_remaining]; break; } } } check_hmap(&hmap, values, n_remaining, hash); i++; } assert(i == n); for (i = 0; i < n; i++) { if (pattern & (1ul << i)) { n_remaining++; } } assert(n == n_remaining); hmap_destroy(&hmap); } } } static void run_test(void (*function)(hash_func *)) { hash_func *hash_funcs[] = { identity_hash, good_hash, constant_hash }; size_t i; for (i = 0; i < ARRAY_SIZE(hash_funcs); i++) { function(hash_funcs[i]); printf("."); fflush(stdout); } } int main(void) { run_test(test_hmap_insert_delete); run_test(test_hmap_for_each_safe); run_test(test_hmap_reserve_shrink); printf("\n"); return 0; } openvswitch-2.0.1+git20140120/tests/test-json.c000066400000000000000000000074071226605124000207160ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "json.h" #include #include #include #include #include "util.h" /* --pretty: If set, the JSON output is pretty-printed, instead of printed as * compactly as possible. */ static int pretty = 0; /* --multiple: If set, the input is a sequence of JSON objects or arrays, * instead of exactly one object or array. */ static int multiple = 0; static bool print_and_free_json(struct json *json) { bool ok; if (json->type == JSON_STRING) { printf("error: %s\n", json->u.string); ok = false; } else { char *s = json_to_string(json, JSSF_SORT | (pretty ? JSSF_PRETTY : 0)); ovs_assert(pretty || json_serialized_length(json) == strlen(s)); puts(s); free(s); ok = true; } json_destroy(json); return ok; } static bool refill(FILE *file, void *buffer, size_t buffer_size, size_t *n, size_t *used) { *used = 0; if (feof(file)) { *n = 0; return false; } else { *n = fread(buffer, 1, buffer_size, file); if (ferror(file)) { ovs_fatal(errno, "Error reading input file"); } return *n > 0; } } static bool parse_multiple(FILE *stream) { struct json_parser *parser; char buffer[BUFSIZ]; size_t n, used; bool ok; parser = NULL; n = used = 0; ok = true; while (used < n || refill(stream, buffer, sizeof buffer, &n, &used)) { if (!parser && isspace((unsigned char) buffer[used])) { /* Skip white space. */ used++; } else { if (!parser) { parser = json_parser_create(0); } used += json_parser_feed(parser, &buffer[used], n - used); if (used < n) { if (!print_and_free_json(json_parser_finish(parser))) { ok = false; } parser = NULL; } } } if (parser) { if (!print_and_free_json(json_parser_finish(parser))) { ok = false; } } return ok; } int main(int argc, char *argv[]) { const char *input_file; FILE *stream; bool ok; set_program_name(argv[0]); for (;;) { static const struct option options[] = { {"pretty", no_argument, &pretty, 1}, {"multiple", no_argument, &multiple, 1}, }; int option_index = 0; int c = getopt_long (argc, argv, "", options, &option_index); if (c == -1) { break; } switch (c) { case 0: break; case '?': exit(1); default: abort(); } } if (argc - optind != 1) { ovs_fatal(0, "usage: %s [--pretty] [--multiple] INPUT.json", program_name); } input_file = argv[optind]; stream = !strcmp(input_file, "-") ? stdin : fopen(input_file, "r"); if (!stream) { ovs_fatal(errno, "Cannot open \"%s\"", input_file); } if (multiple) { ok = parse_multiple(stream); } else { ok = print_and_free_json(json_from_stream(stream)); } fclose(stream); return !ok; } openvswitch-2.0.1+git20140120/tests/test-json.py000066400000000000000000000050111226605124000211110ustar00rootroot00000000000000# Copyright (c) 2009, 2010 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import codecs import getopt import sys import ovs.json def print_json(json): if type(json) in [str, unicode]: print "error: %s" % json return False else: ovs.json.to_stream(json, sys.stdout) sys.stdout.write("\n") return True def parse_multiple(stream): buf = stream.read(4096) ok = True parser = None while len(buf): if parser is None and buf[0] in " \t\r\n": buf = buf[1:] else: if parser is None: parser = ovs.json.Parser() n = parser.feed(buf) buf = buf[n:] if len(buf): if not print_json(parser.finish()): ok = False parser = None if len(buf) == 0: buf = stream.read(4096) if parser and not print_json(parser.finish()): ok = False return ok def main(argv): argv0 = argv[0] # Make stdout and stderr UTF-8, even if they are redirected to a file. sys.stdout = codecs.getwriter("utf-8")(sys.stdout) sys.stderr = codecs.getwriter("utf-8")(sys.stderr) try: options, args = getopt.gnu_getopt(argv[1:], '', ['multiple']) except getopt.GetoptError, geo: sys.stderr.write("%s: %s\n" % (argv0, geo.msg)) sys.exit(1) multiple = False for key, value in options: if key == '--multiple': multiple = True else: sys.stderr.write("%s: unhandled option %s\n" % (argv0, key)) sys.exit(1) if len(args) != 1: sys.stderr.write("usage: %s [--multiple] INPUT.json\n" % argv0) sys.exit(1) input_file = args[0] if input_file == "-": stream = sys.stdin else: stream = open(input_file, "r") if multiple: ok = parse_multiple(stream) else: ok = print_json(ovs.json.from_stream(stream)) if not ok: sys.exit(1) if __name__ == '__main__': main(sys.argv) openvswitch-2.0.1+git20140120/tests/test-jsonrpc.c000066400000000000000000000212231226605124000214130ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "jsonrpc.h" #include #include #include #include #include #include "command-line.h" #include "daemon.h" #include "json.h" #include "poll-loop.h" #include "stream-ssl.h" #include "stream.h" #include "timeval.h" #include "util.h" #include "vlog.h" static struct command all_commands[]; static void usage(void) NO_RETURN; static void parse_options(int argc, char *argv[]); int main(int argc, char *argv[]) { proctitle_init(argc, argv); set_program_name(argv[0]); parse_options(argc, argv); run_command(argc - optind, argv + optind, all_commands); return 0; } static void parse_options(int argc, char *argv[]) { enum { OPT_BOOTSTRAP_CA_CERT = UCHAR_MAX + 1, DAEMON_OPTION_ENUMS }; static const struct option long_options[] = { {"verbose", optional_argument, NULL, 'v'}, {"help", no_argument, NULL, 'h'}, DAEMON_LONG_OPTIONS, {"bootstrap-ca-cert", required_argument, NULL, OPT_BOOTSTRAP_CA_CERT}, STREAM_SSL_LONG_OPTIONS, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); for (;;) { int c = getopt_long(argc, argv, short_options, long_options, NULL); if (c == -1) { break; } switch (c) { case 'h': usage(); case 'v': vlog_set_verbosity(optarg); break; DAEMON_OPTION_HANDLERS STREAM_SSL_OPTION_HANDLERS case OPT_BOOTSTRAP_CA_CERT: stream_ssl_set_ca_cert_file(optarg, true); break; case '?': exit(EXIT_FAILURE); default: abort(); } } free(short_options); } static void usage(void) { printf("%s: JSON-RPC test utility\n" "usage: %s [OPTIONS] COMMAND [ARG...]\n" " listen LOCAL listen for connections on LOCAL\n" " request REMOTE METHOD PARAMS send request, print reply\n" " notify REMOTE METHOD PARAMS send notification and exit\n", program_name, program_name); stream_usage("JSON-RPC", true, true, true); daemon_usage(); vlog_usage(); printf("\nOther options:\n" " -h, --help display this help message\n"); exit(EXIT_SUCCESS); } /* Command helper functions. */ static struct json * parse_json(const char *s) { struct json *json = json_from_string(s); if (json->type == JSON_STRING) { ovs_fatal(0, "\"%s\": %s", s, json->u.string); } return json; } static void print_and_free_json(struct json *json) { char *string = json_to_string(json, JSSF_SORT); json_destroy(json); puts(string); free(string); } /* Command implementations. */ static int handle_rpc(struct jsonrpc *rpc, struct jsonrpc_msg *msg, bool *done) { if (msg->type == JSONRPC_REQUEST) { struct jsonrpc_msg *reply = NULL; if (!strcmp(msg->method, "echo")) { reply = jsonrpc_create_reply(json_clone(msg->params), msg->id); } else { struct json *error = json_object_create(); json_object_put_string(error, "error", "unknown method"); reply = jsonrpc_create_error(error, msg->id); ovs_error(0, "unknown request %s", msg->method); } jsonrpc_send(rpc, reply); return 0; } else if (msg->type == JSONRPC_NOTIFY) { if (!strcmp(msg->method, "shutdown")) { *done = true; return 0; } else { ovs_error(0, "unknown notification %s", msg->method); return ENOTTY; } } else { ovs_error(0, "unsolicited JSON-RPC reply or error"); return EPROTO; } } static void do_listen(int argc OVS_UNUSED, char *argv[]) { struct pstream *pstream; struct jsonrpc **rpcs; size_t n_rpcs, allocated_rpcs; bool done; int error; error = jsonrpc_pstream_open(argv[1], &pstream, DSCP_DEFAULT); if (error) { ovs_fatal(error, "could not listen on \"%s\"", argv[1]); } daemonize(); rpcs = NULL; n_rpcs = allocated_rpcs = 0; done = false; for (;;) { struct stream *stream; size_t i; /* Accept new connections. */ error = pstream_accept(pstream, &stream); if (!error) { if (n_rpcs >= allocated_rpcs) { rpcs = x2nrealloc(rpcs, &allocated_rpcs, sizeof *rpcs); } rpcs[n_rpcs++] = jsonrpc_open(stream); } else if (error != EAGAIN) { ovs_fatal(error, "pstream_accept failed"); } /* Service existing connections. */ for (i = 0; i < n_rpcs; ) { struct jsonrpc *rpc = rpcs[i]; struct jsonrpc_msg *msg; jsonrpc_run(rpc); if (!jsonrpc_get_backlog(rpc)) { error = jsonrpc_recv(rpc, &msg); if (!error) { error = handle_rpc(rpc, msg, &done); jsonrpc_msg_destroy(msg); } else if (error == EAGAIN) { error = 0; } } if (!error) { error = jsonrpc_get_status(rpc); } if (error) { jsonrpc_close(rpc); ovs_error(error, "connection closed"); memmove(&rpcs[i], &rpcs[i + 1], (n_rpcs - i - 1) * sizeof *rpcs); n_rpcs--; } else { i++; } } /* Wait for something to do. */ if (done && !n_rpcs) { break; } pstream_wait(pstream); for (i = 0; i < n_rpcs; i++) { struct jsonrpc *rpc = rpcs[i]; jsonrpc_wait(rpc); if (!jsonrpc_get_backlog(rpc)) { jsonrpc_recv_wait(rpc); } } poll_block(); } free(rpcs); pstream_close(pstream); } static void do_request(int argc OVS_UNUSED, char *argv[]) { struct jsonrpc_msg *msg; struct jsonrpc *rpc; struct json *params; struct stream *stream; const char *method; char *string; int error; method = argv[2]; params = parse_json(argv[3]); msg = jsonrpc_create_request(method, params, NULL); string = jsonrpc_msg_is_valid(msg); if (string) { ovs_fatal(0, "not a valid JSON-RPC request: %s", string); } error = stream_open_block(jsonrpc_stream_open(argv[1], &stream, DSCP_DEFAULT), &stream); if (error) { ovs_fatal(error, "could not open \"%s\"", argv[1]); } rpc = jsonrpc_open(stream); error = jsonrpc_send(rpc, msg); if (error) { ovs_fatal(error, "could not send request"); } error = jsonrpc_recv_block(rpc, &msg); if (error) { ovs_fatal(error, "error waiting for reply"); } print_and_free_json(jsonrpc_msg_to_json(msg)); jsonrpc_close(rpc); } static void do_notify(int argc OVS_UNUSED, char *argv[]) { struct jsonrpc_msg *msg; struct jsonrpc *rpc; struct json *params; struct stream *stream; const char *method; char *string; int error; method = argv[2]; params = parse_json(argv[3]); msg = jsonrpc_create_notify(method, params); string = jsonrpc_msg_is_valid(msg); if (string) { ovs_fatal(0, "not a JSON RPC-valid notification: %s", string); } error = stream_open_block(jsonrpc_stream_open(argv[1], &stream, DSCP_DEFAULT), &stream); if (error) { ovs_fatal(error, "could not open \"%s\"", argv[1]); } rpc = jsonrpc_open(stream); error = jsonrpc_send_block(rpc, msg); if (error) { ovs_fatal(error, "could not send notification"); } jsonrpc_close(rpc); } static void do_help(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { usage(); } static struct command all_commands[] = { { "listen", 1, 1, do_listen }, { "request", 3, 3, do_request }, { "notify", 3, 3, do_notify }, { "help", 0, INT_MAX, do_help }, { NULL, 0, 0, NULL }, }; openvswitch-2.0.1+git20140120/tests/test-jsonrpc.py000066400000000000000000000146501226605124000216270ustar00rootroot00000000000000# Copyright (c) 2009, 2010, 2011 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse import errno import os import sys import ovs.daemon import ovs.json import ovs.jsonrpc import ovs.poller import ovs.stream def handle_rpc(rpc, msg): done = False reply = None if msg.type == ovs.jsonrpc.Message.T_REQUEST: if msg.method == "echo": reply = ovs.jsonrpc.Message.create_reply(msg.params, msg.id) else: reply = ovs.jsonrpc.Message.create_error( {"error": "unknown method"}, msg.id) sys.stderr.write("unknown request %s" % msg.method) elif msg.type == ovs.jsonrpc.Message.T_NOTIFY: if msg.method == "shutdown": done = True else: rpc.error(errno.ENOTTY) sys.stderr.write("unknown notification %s" % msg.method) else: rpc.error(errno.EPROTO) sys.stderr.write("unsolicited JSON-RPC reply or error\n") if reply: rpc.send(reply) return done def do_listen(name): error, pstream = ovs.stream.PassiveStream.open(name) if error: sys.stderr.write("could not listen on \"%s\": %s\n" % (name, os.strerror(error))) sys.exit(1) ovs.daemon.daemonize() rpcs = [] done = False while True: # Accept new connections. error, stream = pstream.accept() if stream: rpcs.append(ovs.jsonrpc.Connection(stream)) elif error != errno.EAGAIN: sys.stderr.write("PassiveStream.accept() failed\n") sys.exit(1) # Service existing connections. dead_rpcs = [] for rpc in rpcs: rpc.run() error = 0 if not rpc.get_backlog(): error, msg = rpc.recv() if not error: if handle_rpc(rpc, msg): done = True error = rpc.get_status() if error: rpc.close() dead_rpcs.append(rpc) rpcs = [rpc for rpc in rpcs if not rpc in dead_rpcs] if done and not rpcs: break poller = ovs.poller.Poller() pstream.wait(poller) for rpc in rpcs: rpc.wait(poller) if not rpc.get_backlog(): rpc.recv_wait(poller) poller.block() pstream.close() def do_request(name, method, params_string): params = ovs.json.from_string(params_string) msg = ovs.jsonrpc.Message.create_request(method, params) s = msg.is_valid() if s: sys.stderr.write("not a valid JSON-RPC request: %s\n" % s) sys.exit(1) error, stream = ovs.stream.Stream.open_block(ovs.stream.Stream.open(name)) if error: sys.stderr.write("could not open \"%s\": %s\n" % (name, os.strerror(error))) sys.exit(1) rpc = ovs.jsonrpc.Connection(stream) error = rpc.send(msg) if error: sys.stderr.write("could not send request: %s\n" % os.strerror(error)) sys.exit(1) error, msg = rpc.recv_block() if error: sys.stderr.write("error waiting for reply: %s\n" % os.strerror(error)) sys.exit(1) print ovs.json.to_string(msg.to_json()) rpc.close() def do_notify(name, method, params_string): params = ovs.json.from_string(params_string) msg = ovs.jsonrpc.Message.create_notify(method, params) s = msg.is_valid() if s: sys.stderr.write("not a valid JSON-RPC notification: %s\n" % s) sys.exit(1) error, stream = ovs.stream.Stream.open_block(ovs.stream.Stream.open(name)) if error: sys.stderr.write("could not open \"%s\": %s\n" % (name, os.strerror(error))) sys.exit(1) rpc = ovs.jsonrpc.Connection(stream) error = rpc.send_block(msg) if error: sys.stderr.write("could not send notification: %s\n" % os.strerror(error)) sys.exit(1) rpc.close() def main(argv): parser = argparse.ArgumentParser( description="JSON-RPC test utility for Python.", formatter_class=argparse.RawDescriptionHelpFormatter) commands = {"listen": (do_listen, 1), "request": (do_request, 3), "notify": (do_notify, 3), "help": (parser.print_help, (0,))} group_description = """\ listen LOCAL listen for connections on LOCAL request REMOTE METHOD PARAMS send request, print reply notify REMOTE METHOD PARAMS send notification and exit """ + ovs.stream.usage("JSON-RPC") group = parser.add_argument_group(title="Commands", description=group_description) group.add_argument('command', metavar="COMMAND", nargs=1, choices=commands, help="Command to use.") group.add_argument('command_args', metavar="ARG", nargs='*', help="Arguments to COMMAND.") ovs.daemon.add_args(parser) args = parser.parse_args() ovs.daemon.handle_args(args) command_name = args.command[0] args = args.command_args if not command_name in commands: sys.stderr.write("%s: unknown command \"%s\" " "(use --help for help)\n" % (argv[0], command_name)) sys.exit(1) func, n_args = commands[command_name] if type(n_args) == tuple: if len(args) < n_args[0]: sys.stderr.write("%s: \"%s\" requires at least %d arguments but " "only %d provided\n" % (argv[0], command_name, n_args, len(args))) sys.exit(1) elif type(n_args) == int: if len(args) != n_args: sys.stderr.write("%s: \"%s\" requires %d arguments but %d " "provided\n" % (argv[0], command_name, n_args, len(args))) sys.exit(1) else: assert False func(*args) if __name__ == '__main__': main(sys.argv) openvswitch-2.0.1+git20140120/tests/test-list.c000066400000000000000000000103721226605124000207130ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* A non-exhaustive test for some of the functions and macros declared in * list.h. */ #include #include "list.h" #include #undef NDEBUG #include /* Sample list element. */ struct element { int value; struct list node; }; /* Puts the 'n' values in 'values' into 'elements', and then puts those * elements in order into 'list'. */ static void make_list(struct list *list, struct element elements[], int values[], size_t n) { size_t i; list_init(list); for (i = 0; i < n; i++) { elements[i].value = i; list_push_back(list, &elements[i].node); values[i] = i; } } /* Verifies that 'list' contains exactly the 'n' values in 'values', in the * specified order. */ static void check_list(struct list *list, const int values[], size_t n) { struct element *e; size_t i; i = 0; LIST_FOR_EACH (e, node, list) { assert(i < n); assert(e->value == values[i]); i++; } assert(&e->node == list); assert(i == n); i = 0; LIST_FOR_EACH_REVERSE (e, node, list) { assert(i < n); assert(e->value == values[n - i - 1]); i++; } assert(&e->node == list); assert(i == n); assert(list_is_empty(list) == !n); assert(list_is_singleton(list) == (n == 1)); assert(list_is_short(list) == (n < 2)); assert(list_size(list) == n); } #if 0 /* Prints the values in 'list', plus 'name' as a title. */ static void print_list(const char *name, struct list *list) { struct element *e; printf("%s:", name); LIST_FOR_EACH (e, node, list) { printf(" %d", e->value); } printf("\n"); } #endif /* Tests basic list construction. */ static void test_list_construction(void) { enum { MAX_ELEMS = 100 }; size_t n; for (n = 0; n <= MAX_ELEMS; n++) { struct element elements[MAX_ELEMS]; int values[MAX_ELEMS]; struct list list; make_list(&list, elements, values, n); check_list(&list, values, n); } } /* Tests that LIST_FOR_EACH_SAFE properly allows for deletion of the current * element of a list. */ static void test_list_for_each_safe(void) { enum { MAX_ELEMS = 10 }; size_t n; unsigned long int pattern; for (n = 0; n <= MAX_ELEMS; n++) { for (pattern = 0; pattern < 1ul << n; pattern++) { struct element elements[MAX_ELEMS]; int values[MAX_ELEMS]; struct list list; struct element *e, *next; size_t values_idx, n_remaining; int i; make_list(&list, elements, values, n); i = 0; values_idx = 0; n_remaining = n; LIST_FOR_EACH_SAFE (e, next, node, &list) { assert(i < n); if (pattern & (1ul << i)) { list_remove(&e->node); n_remaining--; memmove(&values[values_idx], &values[values_idx + 1], sizeof *values * (n_remaining - values_idx)); } else { values_idx++; } check_list(&list, values, n_remaining); i++; } assert(i == n); assert(&e->node == &list); for (i = 0; i < n; i++) { if (pattern & (1ul << i)) { n_remaining++; } } assert(n == n_remaining); } } } static void run_test(void (*function)(void)) { function(); printf("."); } int main(void) { run_test(test_list_construction); run_test(test_list_for_each_safe); printf("\n"); return 0; } openvswitch-2.0.1+git20140120/tests/test-lockfile.c000066400000000000000000000173471226605124000215410ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "lockfile.h" #include #include #include #include #include #include "process.h" #include "timeval.h" #include "util.h" #include "vlog.h" struct test { const char *name; void (*function)(void); }; static const struct test tests[]; #define CHECK(A, B) check(A, B, #A, #B, __FILE__, __LINE__) static void check(int a, int b, const char *a_string, const char *b_string, const char *file, int line) { if (a != b) { fprintf(stderr, "%s:%d: expected %s == %s but %d != %d\n", file, line, a_string, b_string, a, b); fflush(stderr); abort(); } } static void run_lock_and_unlock(void) { struct lockfile *lockfile; CHECK(lockfile_lock("file", &lockfile), 0); lockfile_unlock(lockfile); } static void run_lock_and_unlock_twice(void) { struct lockfile *lockfile; CHECK(lockfile_lock("file", &lockfile), 0); lockfile_unlock(lockfile); CHECK(lockfile_lock("file", &lockfile), 0); lockfile_unlock(lockfile); } static void run_lock_blocks_same_process(void) { struct lockfile *lockfile; CHECK(lockfile_lock("file", &lockfile), 0); CHECK(lockfile_lock("file", &lockfile), EDEADLK); lockfile_unlock(lockfile); } static void run_lock_blocks_same_process_twice(void) { struct lockfile *lockfile; CHECK(lockfile_lock("file", &lockfile), 0); CHECK(lockfile_lock("file", &lockfile), EDEADLK); CHECK(lockfile_lock("file", &lockfile), EDEADLK); lockfile_unlock(lockfile); } static enum { PARENT, CHILD } do_fork(void) { switch (fork()) { case 0: time_postfork(); lockfile_postfork(); return CHILD; default: return PARENT; case -1: /* Error. */ ovs_fatal(errno, "fork failed"); } } static void run_lock_blocks_other_process(void) { /* Making this static prevents a memory leak warning from valgrind for the * parent process, which cannot easily unlock (and free) 'lockfile' because * it can only do so after the child has exited, and it's the caller of * this function that does the wait() call. */ static struct lockfile *lockfile; CHECK(lockfile_lock("file", &lockfile), 0); if (do_fork() == CHILD) { lockfile_unlock(lockfile); CHECK(lockfile_lock("file", &lockfile), EAGAIN); exit(11); } } static void run_lock_twice_blocks_other_process(void) { struct lockfile *lockfile, *dummy; CHECK(lockfile_lock("file", &lockfile), 0); CHECK(lockfile_lock("file", &dummy), EDEADLK); if (do_fork() == CHILD) { CHECK(lockfile_lock("file", &dummy), EAGAIN); exit(11); } } static void run_lock_and_unlock_allows_other_process(void) { struct lockfile *lockfile; CHECK(lockfile_lock("file", &lockfile), 0); lockfile_unlock(lockfile); if (do_fork() == CHILD) { CHECK(lockfile_lock("file", &lockfile), 0); exit(11); } } static void run_lock_multiple(void) { struct lockfile *a, *b, *c, *dummy; CHECK(lockfile_lock("a", &a), 0); CHECK(lockfile_lock("b", &b), 0); CHECK(lockfile_lock("c", &c), 0); lockfile_unlock(a); CHECK(lockfile_lock("a", &a), 0); CHECK(lockfile_lock("a", &dummy), EDEADLK); lockfile_unlock(a); lockfile_unlock(b); CHECK(lockfile_lock("a", &a), 0); lockfile_unlock(c); lockfile_unlock(a); } /* Checks that locking a dangling symlink works OK. (It used to hang.) */ static void run_lock_symlink(void) { struct lockfile *a, *b, *dummy; struct stat s; /* Create a symlink .a.~lock~ pointing to .b.~lock~. */ CHECK(symlink(".b.~lock~", ".a.~lock~"), 0); CHECK(lstat(".a.~lock~", &s), 0); CHECK(S_ISLNK(s.st_mode) != 0, 1); CHECK(stat(".a.~lock~", &s), -1); CHECK(errno, ENOENT); CHECK(stat(".b.~lock~", &s), -1); CHECK(errno, ENOENT); CHECK(lockfile_lock("a", &a), 0); CHECK(lockfile_lock("a", &dummy), EDEADLK); CHECK(lockfile_lock("b", &dummy), EDEADLK); lockfile_unlock(a); CHECK(lockfile_lock("b", &b), 0); CHECK(lockfile_lock("b", &dummy), EDEADLK); CHECK(lockfile_lock("a", &dummy), EDEADLK); lockfile_unlock(b); CHECK(lstat(".a.~lock~", &s), 0); CHECK(S_ISLNK(s.st_mode) != 0, 1); CHECK(stat(".a.~lock~", &s), 0); CHECK(S_ISREG(s.st_mode) != 0, 1); CHECK(stat(".b.~lock~", &s), 0); CHECK(S_ISREG(s.st_mode) != 0, 1); } /* Checks that locking a file that is itself a symlink yields a lockfile in the * directory that the symlink points to, named for the target of the * symlink. * * (That is, if "a" is a symlink to "dir/b", then "a"'s lockfile is named * "dir/.b.~lock".) */ static void run_lock_symlink_to_dir(void) { struct lockfile *a, *dummy; struct stat s; /* Create a symlink "a" pointing to "dir/b". */ CHECK(mkdir("dir", 0700), 0); CHECK(symlink("dir/b", "a"), 0); CHECK(lstat("a", &s), 0); CHECK(S_ISLNK(s.st_mode) != 0, 1); /* Lock 'a'. */ CHECK(lockfile_lock("a", &a), 0); CHECK(lstat("dir/.b.~lock~", &s), 0); CHECK(S_ISREG(s.st_mode) != 0, 1); CHECK(lstat(".a.~lock~", &s), -1); CHECK(errno, ENOENT); CHECK(lockfile_lock("dir/b", &dummy), EDEADLK); lockfile_unlock(a); } static void run_help(void) { size_t i; printf("usage: %s TESTNAME\n" "where TESTNAME is one of the following:\n", program_name); for (i = 0; tests[i].name; i++) { fprintf(stderr, "\t%s\n", tests[i].name); } } static const struct test tests[] = { #define TEST(NAME) { #NAME, run_##NAME } TEST(lock_and_unlock), TEST(lock_and_unlock_twice), TEST(lock_blocks_same_process), TEST(lock_blocks_same_process_twice), TEST(lock_blocks_other_process), TEST(lock_twice_blocks_other_process), TEST(lock_and_unlock_allows_other_process), TEST(lock_multiple), TEST(lock_symlink), TEST(lock_symlink_to_dir), TEST(help), { NULL, NULL } #undef TEST }; int main(int argc, char *argv[]) { size_t i; set_program_name(argv[0]); vlog_set_pattern(VLF_CONSOLE, "%c|%p|%m"); vlog_set_levels(NULL, VLF_SYSLOG, VLL_OFF); if (argc != 2) { ovs_fatal(0, "exactly one argument required; use \"%s help\" for help", program_name); return 1; } for (i = 0; tests[i].name; i++) { if (!strcmp(argv[1], tests[i].name)) { int n_children; int status; (tests[i].function)(); n_children = 0; while (wait(&status) > 0) { if (WIFEXITED(status) && WEXITSTATUS(status) == 11) { n_children++; } else { ovs_fatal(0, "child exited in unexpected way: %s", process_status_msg(status)); } } if (errno != ECHILD) { ovs_fatal(errno, "wait"); } printf("%s: success (%d child%s)\n", tests[i].name, n_children, n_children != 1 ? "ren" : ""); exit(0); } } ovs_fatal(0, "unknown test \"%s\"; use \"%s help\" for help", argv[1], program_name); } openvswitch-2.0.1+git20140120/tests/test-multipath.c000066400000000000000000000075471226605124000217610ustar00rootroot00000000000000/* * Copyright (c) 2010, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "multipath.h" #include #include #include #include #include #include "flow.h" #include "ofp-actions.h" #include "random.h" #include "util.h" int main(int argc, char *argv[]) { enum { MP_MAX_LINKS = 63 }; struct ofpact_multipath mp; bool ok = true; char *error; int n; set_program_name(argv[0]); random_init(); if (argc != 2) { ovs_fatal(0, "usage: %s multipath_action", program_name); } error = multipath_parse(&mp, argv[1]); if (error) { ovs_fatal(0, "%s", error); } for (n = 1; n <= MP_MAX_LINKS; n++) { enum { N_FLOWS = 65536 }; double disruption, perfect, distribution; int histogram[MP_MAX_LINKS]; double sum_dev2, stddev; int changed; int i; changed = 0; memset(histogram, 0, sizeof histogram); for (i = 0; i < N_FLOWS; i++) { int old_link, new_link; struct flow_wildcards wc; struct flow flow; random_bytes(&flow, sizeof flow); memset(flow.zeros, 0, sizeof flow.zeros); flow.mpls_depth = 0; mp.max_link = n - 1; multipath_execute(&mp, &flow, &wc); old_link = flow.regs[0]; mp.max_link = n; multipath_execute(&mp, &flow, &wc); new_link = flow.regs[0]; assert(old_link >= 0 && old_link < n); assert(new_link >= 0 && new_link < n + 1); histogram[old_link]++; changed += old_link != new_link; } sum_dev2 = 0.0; for (i = 0; i < n; i++) { double mean = (double) N_FLOWS / n; double deviation = histogram[i] - mean; sum_dev2 += deviation * deviation; } stddev = sqrt(sum_dev2 / n); disruption = (double) changed / N_FLOWS; perfect = 1.0 / (n + 1); distribution = stddev / ((double) N_FLOWS / n); printf("%2d -> %2d: disruption=%.2f (perfect=%.2f); " "stddev/expected=%.4f\n", n, n + 1, disruption, perfect, distribution); switch (mp.algorithm) { case NX_MP_ALG_MODULO_N: if (disruption < (n < 2 ? .25 : .5)) { fprintf(stderr, "%d -> %d: disruption=%.2f < .5\n", n, n + 1, disruption); ok = false; } break; case NX_MP_ALG_HASH_THRESHOLD: if (disruption < .48 || disruption > .52) { fprintf(stderr, "%d -> %d: disruption=%.2f not approximately " ".5\n", n, n + 1, disruption); ok = false; } break; case NX_MP_ALG_ITER_HASH: if (!(n & (n - 1))) { break; } /* Fall through. */ case NX_MP_ALG_HRW: if (fabs(disruption - perfect) >= .01) { fprintf(stderr, "%d -> %d: disruption=%.5f differs from " "perfect=%.5f by more than .01\n", n, n + 1, disruption, perfect); ok = false; } break; default: NOT_REACHED(); } } return ok ? 0 : 1; } openvswitch-2.0.1+git20140120/tests/test-netflow.c000066400000000000000000000200641226605124000214150ustar00rootroot00000000000000/* * Copyright (c) 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include "command-line.h" #include "daemon.h" #include "dynamic-string.h" #include "netflow.h" #include "ofpbuf.h" #include "packets.h" #include "poll-loop.h" #include "socket-util.h" #include "unixctl.h" #include "util.h" #include "vlog.h" static void usage(void) NO_RETURN; static void parse_options(int argc, char *argv[]); static unixctl_cb_func test_netflow_exit; static void print_netflow(struct ofpbuf *buf) { const struct netflow_v5_header *hdr; int i; hdr = ofpbuf_try_pull(buf, sizeof *hdr); if (!hdr) { printf("truncated NetFlow packet header\n"); return; } printf("header: v%"PRIu16", " "uptime %"PRIu32", " "now %"PRIu32".%09"PRIu32", " "seq %"PRIu32", " "engine %"PRIu8",%"PRIu8, ntohs(hdr->version), ntohl(hdr->sysuptime), ntohl(hdr->unix_secs), ntohl(hdr->unix_nsecs), ntohl(hdr->flow_seq), hdr->engine_type, hdr->engine_id); if (hdr->sampling_interval != htons(0)) { printf(", interval %"PRIu16, ntohs(hdr->sampling_interval)); } putchar('\n'); for (i = 0; i < ntohs(hdr->count); i++) { struct netflow_v5_record *rec; rec = ofpbuf_try_pull(buf, sizeof *rec); if (!rec) { printf("truncated NetFlow records\n"); return; } printf("seq %"PRIu32": "IP_FMT" > "IP_FMT, ntohl(hdr->flow_seq), IP_ARGS(rec->src_addr), IP_ARGS(rec->dst_addr)); printf(", if %"PRIu16" > %"PRIu16, ntohs(rec->input), ntohs(rec->output)); printf(", %"PRIu32" pkts, %"PRIu32" bytes", ntohl(rec->packet_count), ntohl(rec->byte_count)); switch (rec->ip_proto) { case IPPROTO_TCP: printf(", TCP %"PRIu16" > %"PRIu16, ntohs(rec->src_port), ntohs(rec->dst_port)); if (rec->tcp_flags) { struct ds s = DS_EMPTY_INITIALIZER; packet_format_tcp_flags(&s, rec->tcp_flags); printf(" %s", ds_cstr(&s)); ds_destroy(&s); } break; case IPPROTO_UDP: printf(", UDP %"PRIu16" > %"PRIu16, ntohs(rec->src_port), ntohs(rec->dst_port)); break; case IPPROTO_SCTP: printf(", SCTP %"PRIu16" > %"PRIu16, ntohs(rec->src_port), ntohs(rec->dst_port)); break; case IPPROTO_ICMP: printf(", ICMP %"PRIu16":%"PRIu16, ntohs(rec->dst_port) >> 8, ntohs(rec->dst_port) & 0xff); if (rec->src_port != htons(0)) { printf(", src_port=%"PRIu16, ntohs(rec->src_port)); } break; default: printf(", proto %"PRIu8, rec->ip_proto); break; } if (rec->ip_proto != IPPROTO_TCP && rec->tcp_flags != 0) { printf(", flags %"PRIx8, rec->tcp_flags); } if (rec->ip_proto != IPPROTO_TCP && rec->ip_proto != IPPROTO_UDP && rec->ip_proto != IPPROTO_SCTP && rec->ip_proto != IPPROTO_ICMP) { if (rec->src_port != htons(0)) { printf(", src_port %"PRIu16, ntohs(rec->src_port)); } if (rec->dst_port != htons(0)) { printf(", dst_port %"PRIu16, ntohs(rec->dst_port)); } } if (rec->ip_tos) { printf(", TOS %"PRIx8, rec->ip_tos); } printf(", time %"PRIu32"...%"PRIu32, ntohl(rec->init_time), ntohl(rec->used_time)); if (rec->nexthop != htonl(0)) { printf(", nexthop "IP_FMT, IP_ARGS(rec->nexthop)); } if (rec->src_as != htons(0) || rec->dst_as != htons(0)) { printf(", AS %"PRIu16" > %"PRIu16, ntohs(rec->src_as), ntohs(rec->dst_as)); } if (rec->src_mask != 0 || rec->dst_mask != 0) { printf(", mask %"PRIu8" > %"PRIu8, rec->src_mask, rec->dst_mask); } if (rec->pad1) { printf(", pad1 %"PRIu8, rec->pad1); } if (rec->pad[0] || rec->pad[1]) { printf(", pad %"PRIu8", %"PRIu8, rec->pad[0], rec->pad[1]); } putchar('\n'); } if (buf->size) { printf("%zu extra bytes after last record\n", buf->size); } } int main(int argc, char *argv[]) { struct unixctl_server *server; enum { MAX_RECV = 1500 }; const char *target; struct ofpbuf buf; bool exiting = false; int error; int sock; int n; proctitle_init(argc, argv); set_program_name(argv[0]); parse_options(argc, argv); if (argc - optind != 1) { ovs_fatal(0, "exactly one non-option argument required " "(use --help for help)"); } target = argv[optind]; sock = inet_open_passive(SOCK_DGRAM, target, 0, NULL, 0); if (sock < 0) { ovs_fatal(0, "%s: failed to open (%s)", argv[1], ovs_strerror(-sock)); } daemon_save_fd(STDOUT_FILENO); daemonize_start(); error = unixctl_server_create(NULL, &server); if (error) { ovs_fatal(error, "failed to create unixctl server"); } unixctl_command_register("exit", "", 0, 0, test_netflow_exit, &exiting); daemonize_complete(); ofpbuf_init(&buf, MAX_RECV); n = 0; for (;;) { int retval; unixctl_server_run(server); ofpbuf_clear(&buf); do { retval = read(sock, buf.data, buf.allocated); } while (retval < 0 && errno == EINTR); if (retval > 0) { ofpbuf_put_uninit(&buf, retval); if (n++ > 0) { putchar('\n'); } print_netflow(&buf); fflush(stdout); } if (exiting) { break; } poll_fd_wait(sock, POLLIN); unixctl_server_wait(server); poll_block(); } return 0; } static void parse_options(int argc, char *argv[]) { enum { DAEMON_OPTION_ENUMS, VLOG_OPTION_ENUMS }; static const struct option long_options[] = { {"help", no_argument, NULL, 'h'}, DAEMON_LONG_OPTIONS, VLOG_LONG_OPTIONS, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); for (;;) { int c = getopt_long(argc, argv, short_options, long_options, NULL); if (c == -1) { break; } switch (c) { case 'h': usage(); DAEMON_OPTION_HANDLERS VLOG_OPTION_HANDLERS case '?': exit(EXIT_FAILURE); default: abort(); } } free(short_options); } static void usage(void) { printf("%s: netflow collector test utility\n" "usage: %s [OPTIONS] PORT[:IP]\n" "where PORT is the UDP port to listen on and IP is optionally\n" "the IP address to listen on.\n", program_name, program_name); daemon_usage(); vlog_usage(); printf("\nOther options:\n" " -h, --help display this help message\n"); exit(EXIT_SUCCESS); } static void test_netflow_exit(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *exiting_) { bool *exiting = exiting_; *exiting = true; unixctl_command_reply(conn, NULL); } openvswitch-2.0.1+git20140120/tests/test-odp.c000066400000000000000000000102311226605124000205140ustar00rootroot00000000000000/* * Copyright (c) 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include "dynamic-string.h" #include "flow.h" #include "odp-util.h" #include "ofpbuf.h" #include "util.h" #include "vlog.h" static int parse_keys(bool wc_keys) { int exit_code = 0; struct ds in; ds_init(&in); vlog_set_levels_from_string_assert("odp_util:console:dbg"); while (!ds_get_test_line(&in, stdin)) { enum odp_key_fitness fitness; struct ofpbuf odp_key; struct ofpbuf odp_mask; struct flow flow; struct ds out; int error; /* Convert string to OVS DP key. */ ofpbuf_init(&odp_key, 0); ofpbuf_init(&odp_mask, 0); error = odp_flow_from_string(ds_cstr(&in), NULL, &odp_key, &odp_mask); if (error) { printf("odp_flow_from_string: error\n"); goto next; } if (!wc_keys) { /* Convert odp_key to flow. */ fitness = odp_flow_key_to_flow(odp_key.data, odp_key.size, &flow); switch (fitness) { case ODP_FIT_PERFECT: break; case ODP_FIT_TOO_LITTLE: printf("ODP_FIT_TOO_LITTLE: "); break; case ODP_FIT_TOO_MUCH: printf("ODP_FIT_TOO_MUCH: "); break; case ODP_FIT_ERROR: printf("odp_flow_key_to_flow: error\n"); goto next; } /* Convert cls_rule back to odp_key. */ ofpbuf_uninit(&odp_key); ofpbuf_init(&odp_key, 0); odp_flow_key_from_flow(&odp_key, &flow, flow.in_port.odp_port); if (odp_key.size > ODPUTIL_FLOW_KEY_BYTES) { printf ("too long: %zu > %d\n", odp_key.size, ODPUTIL_FLOW_KEY_BYTES); exit_code = 1; } } /* Convert odp_key to string. */ ds_init(&out); if (wc_keys) { odp_flow_format(odp_key.data, odp_key.size, odp_mask.data, odp_mask.size, &out, false); } else { odp_flow_key_format(odp_key.data, odp_key.size, &out); } puts(ds_cstr(&out)); ds_destroy(&out); next: ofpbuf_uninit(&odp_key); } ds_destroy(&in); return exit_code; } static int parse_actions(void) { struct ds in; ds_init(&in); vlog_set_levels_from_string_assert("odp_util:console:dbg"); while (!ds_get_test_line(&in, stdin)) { struct ofpbuf odp_actions; struct ds out; int error; /* Convert string to OVS DP actions. */ ofpbuf_init(&odp_actions, 0); error = odp_actions_from_string(ds_cstr(&in), NULL, &odp_actions); if (error) { printf("odp_actions_from_string: error\n"); goto next; } /* Convert odp_actions back to string. */ ds_init(&out); format_odp_actions(&out, odp_actions.data, odp_actions.size); puts(ds_cstr(&out)); ds_destroy(&out); next: ofpbuf_uninit(&odp_actions); } ds_destroy(&in); return 0; } int main(int argc, char *argv[]) { if (argc == 2 &&!strcmp(argv[1], "parse-keys")) { return parse_keys(false); } else if (argc == 2 &&!strcmp(argv[1], "parse-wc-keys")) { return parse_keys(true); } else if (argc == 2 && !strcmp(argv[1], "parse-actions")) { return parse_actions(); } else { ovs_fatal(0, "usage: %s parse-keys | parse-wc-keys | parse-actions", argv[0]); } } openvswitch-2.0.1+git20140120/tests/test-ovsdb.c000066400000000000000000001643511226605124000210640ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include "command-line.h" #include "dynamic-string.h" #include "json.h" #include "jsonrpc.h" #include "ovsdb-data.h" #include "ovsdb-error.h" #include "ovsdb-idl.h" #include "ovsdb-types.h" #include "ovsdb/column.h" #include "ovsdb/condition.h" #include "ovsdb/file.h" #include "ovsdb/log.h" #include "ovsdb/mutation.h" #include "ovsdb/ovsdb.h" #include "ovsdb/query.h" #include "ovsdb/row.h" #include "ovsdb/server.h" #include "ovsdb/table.h" #include "ovsdb/transaction.h" #include "ovsdb/trigger.h" #include "poll-loop.h" #include "stream.h" #include "svec.h" #include "tests/idltest.h" #include "timeval.h" #include "util.h" #include "vlog.h" static struct command all_commands[]; static void usage(void) NO_RETURN; static void parse_options(int argc, char *argv[]); int main(int argc, char *argv[]) { set_program_name(argv[0]); parse_options(argc, argv); run_command(argc - optind, argv + optind, all_commands); return 0; } static void parse_options(int argc, char *argv[]) { static const struct option long_options[] = { {"timeout", required_argument, NULL, 't'}, {"verbose", optional_argument, NULL, 'v'}, {"help", no_argument, NULL, 'h'}, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); for (;;) { unsigned long int timeout; int c; c = getopt_long(argc, argv, short_options, long_options, NULL); if (c == -1) { break; } switch (c) { case 't': timeout = strtoul(optarg, NULL, 10); if (timeout <= 0) { ovs_fatal(0, "value %s on -t or --timeout is not at least 1", optarg); } else { time_alarm(timeout); } break; case 'h': usage(); case 'v': vlog_set_verbosity(optarg); break; case '?': exit(EXIT_FAILURE); default: abort(); } } free(short_options); } static void usage(void) { printf("%s: Open vSwitch database test utility\n" "usage: %s [OPTIONS] COMMAND [ARG...]\n\n" " log-io FILE FLAGS COMMAND...\n" " open FILE with FLAGS, run COMMANDs\n" " default-atoms\n" " test ovsdb_atom_default()\n" " default-data\n" " test ovsdb_datum_default()\n" " parse-atomic-type TYPE\n" " parse TYPE as OVSDB atomic type, and re-serialize\n" " parse-base-type TYPE\n" " parse TYPE as OVSDB base type, and re-serialize\n" " parse-type JSON\n" " parse JSON as OVSDB type, and re-serialize\n" " parse-atoms TYPE ATOM...\n" " parse JSON ATOMs as atoms of TYPE, and re-serialize\n" " parse-atom-strings TYPE ATOM...\n" " parse string ATOMs as atoms of given TYPE, and re-serialize\n" " sort-atoms TYPE ATOM...\n" " print JSON ATOMs in sorted order\n" " parse-data TYPE DATUM...\n" " parse JSON DATUMs as data of given TYPE, and re-serialize\n" " parse-data-strings TYPE DATUM...\n" " parse string DATUMs as data of given TYPE, and re-serialize\n" " parse-column NAME OBJECT\n" " parse column NAME with info OBJECT, and re-serialize\n" " parse-table NAME OBJECT [DEFAULT-IS-ROOT]\n" " parse table NAME with info OBJECT\n" " parse-row TABLE ROW..., and re-serialize\n" " parse each ROW of defined TABLE\n" " compare-row TABLE ROW...\n" " mutually compare all of the ROWs, print those that are equal\n" " parse-conditions TABLE CONDITION...\n" " parse each CONDITION on TABLE, and re-serialize\n" " evaluate-conditions TABLE [CONDITION,...] [ROW,...]\n" " test CONDITIONS on TABLE against each ROW, print results\n" " parse-mutations TABLE MUTATION...\n" " parse each MUTATION on TABLE, and re-serialize\n" " execute-mutations TABLE [MUTATION,...] [ROW,...]\n" " execute MUTATIONS on TABLE on each ROW, print results\n" " query TABLE [ROW,...] [CONDITION,...]\n" " add each ROW to TABLE, then query and print the rows that\n" " satisfy each CONDITION.\n" " query-distinct TABLE [ROW,...] [CONDITION,...] COLUMNS\n" " add each ROW to TABLE, then query and print the rows that\n" " satisfy each CONDITION and have distinct COLUMNS.\n" " parse-schema JSON\n" " parse JSON as an OVSDB schema, and re-serialize\n" " transact COMMAND\n" " execute each specified transactional COMMAND:\n" " commit\n" " abort\n" " insert UUID I J\n" " delete UUID\n" " modify UUID I J\n" " print\n" " execute SCHEMA TRANSACTION...\n" " executes each TRANSACTION on an initially empty database\n" " the specified SCHEMA\n" " trigger SCHEMA TRANSACTION...\n" " executes each TRANSACTION on an initially empty database\n" " the specified SCHEMA. A TRANSACTION of the form\n" " [\"advance\", NUMBER] advances NUMBER milliseconds in\n" " simulated time, for causing triggers to time out.\n" " idl SERVER [TRANSACTION...]\n" " connect to SERVER and dump the contents of the database\n" " as seen initially by the IDL implementation and after\n" " executing each TRANSACTION. (Each TRANSACTION must modify\n" " the database or this command will hang.)\n", program_name, program_name); vlog_usage(); printf("\nOther options:\n" " -t, --timeout=SECS give up after SECS seconds\n" " -h, --help display this help message\n"); exit(EXIT_SUCCESS); } /* Command helper functions. */ static struct json * parse_json(const char *s) { struct json *json = json_from_string(s); if (json->type == JSON_STRING) { ovs_fatal(0, "\"%s\": %s", s, json->u.string); } return json; } static struct json * unbox_json(struct json *json) { if (json->type == JSON_ARRAY && json->u.array.n == 1) { struct json *inner = json->u.array.elems[0]; json->u.array.elems[0] = NULL; json_destroy(json); return inner; } else { return json; } } static size_t print_and_free_json(struct json *json) { char *string = json_to_string(json, JSSF_SORT); size_t length = strlen(string); json_destroy(json); puts(string); free(string); return length; } static void print_and_free_ovsdb_error(struct ovsdb_error *error) { char *string = ovsdb_error_to_string(error); ovsdb_error_destroy(error); puts(string); free(string); } static void check_ovsdb_error(struct ovsdb_error *error) { if (error) { char *s = ovsdb_error_to_string(error); ovsdb_error_destroy(error); ovs_fatal(0, "%s", s); } } static void die_if_error(char *error) { if (error) { ovs_fatal(0, "%s", error); } } /* Command implementations. */ static void do_log_io(int argc, char *argv[]) { const char *name = argv[1]; char *mode_string = argv[2]; struct ovsdb_error *error; enum ovsdb_log_open_mode mode; struct ovsdb_log *log; int i; if (!strcmp(mode_string, "read-only")) { mode = OVSDB_LOG_READ_ONLY; } else if (!strcmp(mode_string, "read/write")) { mode = OVSDB_LOG_READ_WRITE; } else if (!strcmp(mode_string, "create")) { mode = OVSDB_LOG_CREATE; } else { ovs_fatal(0, "unknown log-io open mode \"%s\"", mode_string); } check_ovsdb_error(ovsdb_log_open(name, mode, -1, &log)); printf("%s: open successful\n", name); for (i = 3; i < argc; i++) { const char *command = argv[i]; if (!strcmp(command, "read")) { struct json *json; error = ovsdb_log_read(log, &json); if (!error) { printf("%s: read: ", name); if (json) { print_and_free_json(json); } else { printf("end of log\n"); } continue; } } else if (!strncmp(command, "write:", 6)) { struct json *json = parse_json(command + 6); error = ovsdb_log_write(log, json); json_destroy(json); } else if (!strcmp(command, "commit")) { error = ovsdb_log_commit(log); } else { ovs_fatal(0, "unknown log-io command \"%s\"", command); } if (error) { char *s = ovsdb_error_to_string(error); printf("%s: %s failed: %s\n", name, command, s); free(s); ovsdb_error_destroy(error); } else { printf("%s: %s successful\n", name, command); } } ovsdb_log_close(log); } static void do_default_atoms(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { int type; for (type = 0; type < OVSDB_N_TYPES; type++) { union ovsdb_atom atom; if (type == OVSDB_TYPE_VOID) { continue; } printf("%s: ", ovsdb_atomic_type_to_string(type)); ovsdb_atom_init_default(&atom, type); if (!ovsdb_atom_equals(&atom, ovsdb_atom_default(type), type)) { printf("wrong\n"); exit(1); } ovsdb_atom_destroy(&atom, type); printf("OK\n"); } } static void do_default_data(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { unsigned int n_min; int key, value; for (n_min = 0; n_min <= 1; n_min++) { for (key = 0; key < OVSDB_N_TYPES; key++) { if (key == OVSDB_TYPE_VOID) { continue; } for (value = 0; value < OVSDB_N_TYPES; value++) { struct ovsdb_datum datum; struct ovsdb_type type; ovsdb_base_type_init(&type.key, key); ovsdb_base_type_init(&type.value, value); type.n_min = n_min; type.n_max = 1; assert(ovsdb_type_is_valid(&type)); printf("key %s, value %s, n_min %u: ", ovsdb_atomic_type_to_string(key), ovsdb_atomic_type_to_string(value), n_min); ovsdb_datum_init_default(&datum, &type); if (!ovsdb_datum_equals(&datum, ovsdb_datum_default(&type), &type)) { printf("wrong\n"); exit(1); } ovsdb_datum_destroy(&datum, &type); ovsdb_type_destroy(&type); printf("OK\n"); } } } } static void do_parse_atomic_type(int argc OVS_UNUSED, char *argv[]) { enum ovsdb_atomic_type type; struct json *json; json = unbox_json(parse_json(argv[1])); check_ovsdb_error(ovsdb_atomic_type_from_json(&type, json)); json_destroy(json); print_and_free_json(ovsdb_atomic_type_to_json(type)); } static void do_parse_base_type(int argc OVS_UNUSED, char *argv[]) { struct ovsdb_base_type base; struct json *json; json = unbox_json(parse_json(argv[1])); check_ovsdb_error(ovsdb_base_type_from_json(&base, json)); json_destroy(json); print_and_free_json(ovsdb_base_type_to_json(&base)); ovsdb_base_type_destroy(&base); } static void do_parse_type(int argc OVS_UNUSED, char *argv[]) { struct ovsdb_type type; struct json *json; json = unbox_json(parse_json(argv[1])); check_ovsdb_error(ovsdb_type_from_json(&type, json)); json_destroy(json); print_and_free_json(ovsdb_type_to_json(&type)); ovsdb_type_destroy(&type); } static void do_parse_atoms(int argc, char *argv[]) { struct ovsdb_base_type base; struct json *json; int i; json = unbox_json(parse_json(argv[1])); check_ovsdb_error(ovsdb_base_type_from_json(&base, json)); json_destroy(json); for (i = 2; i < argc; i++) { struct ovsdb_error *error; union ovsdb_atom atom; json = unbox_json(parse_json(argv[i])); error = ovsdb_atom_from_json(&atom, &base, json, NULL); json_destroy(json); if (error) { print_and_free_ovsdb_error(error); } else { size_t length; length = print_and_free_json(ovsdb_atom_to_json(&atom, base.type)); ovs_assert(length == ovsdb_atom_json_length(&atom, base.type)); ovsdb_atom_destroy(&atom, base.type); } } ovsdb_base_type_destroy(&base); } static void do_parse_atom_strings(int argc, char *argv[]) { struct ovsdb_base_type base; struct json *json; int i; json = unbox_json(parse_json(argv[1])); check_ovsdb_error(ovsdb_base_type_from_json(&base, json)); json_destroy(json); for (i = 2; i < argc; i++) { union ovsdb_atom atom; struct ds out; die_if_error(ovsdb_atom_from_string(&atom, &base, argv[i], NULL)); ds_init(&out); ovsdb_atom_to_string(&atom, base.type, &out); puts(ds_cstr(&out)); ds_destroy(&out); ovsdb_atom_destroy(&atom, base.type); } ovsdb_base_type_destroy(&base); } static void do_parse_data__(int argc, char *argv[], struct ovsdb_error * (*parse)(struct ovsdb_datum *datum, const struct ovsdb_type *type, const struct json *json, struct ovsdb_symbol_table *symtab)) { struct ovsdb_type type; struct json *json; int i; json = unbox_json(parse_json(argv[1])); check_ovsdb_error(ovsdb_type_from_json(&type, json)); json_destroy(json); for (i = 2; i < argc; i++) { struct ovsdb_datum datum; size_t length; json = unbox_json(parse_json(argv[i])); check_ovsdb_error(parse(&datum, &type, json, NULL)); json_destroy(json); length = print_and_free_json(ovsdb_datum_to_json(&datum, &type)); ovs_assert(length == ovsdb_datum_json_length(&datum, &type)); ovsdb_datum_destroy(&datum, &type); } ovsdb_type_destroy(&type); } static void do_parse_data(int argc, char *argv[]) { do_parse_data__(argc, argv, ovsdb_datum_from_json); } static void do_parse_data_strings(int argc, char *argv[]) { struct ovsdb_type type; struct json *json; int i; json = unbox_json(parse_json(argv[1])); check_ovsdb_error(ovsdb_type_from_json(&type, json)); json_destroy(json); for (i = 2; i < argc; i++) { struct ovsdb_datum datum; struct ds out; die_if_error(ovsdb_datum_from_string(&datum, &type, argv[i], NULL)); ds_init(&out); ovsdb_datum_to_string(&datum, &type, &out); puts(ds_cstr(&out)); ds_destroy(&out); ovsdb_datum_destroy(&datum, &type); } ovsdb_type_destroy(&type); } static enum ovsdb_atomic_type compare_atoms_atomic_type; static int compare_atoms(const void *a_, const void *b_) { const union ovsdb_atom *a = a_; const union ovsdb_atom *b = b_; return ovsdb_atom_compare_3way(a, b, compare_atoms_atomic_type); } static void do_sort_atoms(int argc OVS_UNUSED, char *argv[]) { struct ovsdb_base_type base; union ovsdb_atom *atoms; struct json *json, **json_atoms; size_t n_atoms; int i; json = unbox_json(parse_json(argv[1])); check_ovsdb_error(ovsdb_base_type_from_json(&base, json)); json_destroy(json); json = unbox_json(parse_json(argv[2])); if (json->type != JSON_ARRAY) { ovs_fatal(0, "second argument must be array"); } /* Convert JSON atoms to internal representation. */ n_atoms = json->u.array.n; atoms = xmalloc(n_atoms * sizeof *atoms); for (i = 0; i < n_atoms; i++) { check_ovsdb_error(ovsdb_atom_from_json(&atoms[i], &base, json->u.array.elems[i], NULL)); } json_destroy(json); /* Sort atoms. */ compare_atoms_atomic_type = base.type; qsort(atoms, n_atoms, sizeof *atoms, compare_atoms); /* Convert internal representation back to JSON. */ json_atoms = xmalloc(n_atoms * sizeof *json_atoms); for (i = 0; i < n_atoms; i++) { json_atoms[i] = ovsdb_atom_to_json(&atoms[i], base.type); ovsdb_atom_destroy(&atoms[i], base.type); } print_and_free_json(json_array_create(json_atoms, n_atoms)); free(atoms); ovsdb_base_type_destroy(&base); } static void do_parse_column(int argc OVS_UNUSED, char *argv[]) { struct ovsdb_column *column; struct json *json; json = parse_json(argv[2]); check_ovsdb_error(ovsdb_column_from_json(json, argv[1], &column)); json_destroy(json); print_and_free_json(ovsdb_column_to_json(column)); ovsdb_column_destroy(column); } static void do_parse_table(int argc OVS_UNUSED, char *argv[]) { struct ovsdb_table_schema *ts; bool default_is_root; struct json *json; default_is_root = argc > 3 && !strcmp(argv[3], "true"); json = parse_json(argv[2]); check_ovsdb_error(ovsdb_table_schema_from_json(json, argv[1], &ts)); json_destroy(json); print_and_free_json(ovsdb_table_schema_to_json(ts, default_is_root)); ovsdb_table_schema_destroy(ts); } static void do_parse_rows(int argc, char *argv[]) { struct ovsdb_column_set all_columns; struct ovsdb_table_schema *ts; struct ovsdb_table *table; struct json *json; int i; json = unbox_json(parse_json(argv[1])); check_ovsdb_error(ovsdb_table_schema_from_json(json, "mytable", &ts)); json_destroy(json); table = ovsdb_table_create(ts); ovsdb_column_set_init(&all_columns); ovsdb_column_set_add_all(&all_columns, table); for (i = 2; i < argc; i++) { struct ovsdb_column_set columns; struct ovsdb_row *row; ovsdb_column_set_init(&columns); row = ovsdb_row_create(table); json = unbox_json(parse_json(argv[i])); check_ovsdb_error(ovsdb_row_from_json(row, json, NULL, &columns)); json_destroy(json); print_and_free_json(ovsdb_row_to_json(row, &all_columns)); if (columns.n_columns) { struct svec names; size_t j; char *s; svec_init(&names); for (j = 0; j < columns.n_columns; j++) { svec_add(&names, columns.columns[j]->name); } svec_sort(&names); s = svec_join(&names, ", ", ""); puts(s); free(s); svec_destroy(&names); } else { printf("\n"); } ovsdb_column_set_destroy(&columns); ovsdb_row_destroy(row); } ovsdb_column_set_destroy(&all_columns); ovsdb_table_destroy(table); /* Also destroys 'ts'. */ } static void do_compare_rows(int argc, char *argv[]) { struct ovsdb_column_set all_columns; struct ovsdb_table_schema *ts; struct ovsdb_table *table; struct ovsdb_row **rows; struct json *json; char **names; int n_rows; int i, j; json = unbox_json(parse_json(argv[1])); check_ovsdb_error(ovsdb_table_schema_from_json(json, "mytable", &ts)); json_destroy(json); table = ovsdb_table_create(ts); ovsdb_column_set_init(&all_columns); ovsdb_column_set_add_all(&all_columns, table); n_rows = argc - 2; rows = xmalloc(sizeof *rows * n_rows); names = xmalloc(sizeof *names * n_rows); for (i = 0; i < n_rows; i++) { rows[i] = ovsdb_row_create(table); json = parse_json(argv[i + 2]); if (json->type != JSON_ARRAY || json->u.array.n != 2 || json->u.array.elems[0]->type != JSON_STRING) { ovs_fatal(0, "\"%s\" does not have expected form " "[\"name\", {data}]", argv[i]); } names[i] = xstrdup(json->u.array.elems[0]->u.string); check_ovsdb_error(ovsdb_row_from_json(rows[i], json->u.array.elems[1], NULL, NULL)); json_destroy(json); } for (i = 0; i < n_rows; i++) { uint32_t i_hash = ovsdb_row_hash_columns(rows[i], &all_columns, 0); for (j = i + 1; j < n_rows; j++) { uint32_t j_hash = ovsdb_row_hash_columns(rows[j], &all_columns, 0); if (ovsdb_row_equal_columns(rows[i], rows[j], &all_columns)) { printf("%s == %s\n", names[i], names[j]); if (i_hash != j_hash) { printf("but hash(%s) != hash(%s)\n", names[i], names[j]); abort(); } } else if (i_hash == j_hash) { printf("hash(%s) == hash(%s)\n", names[i], names[j]); } } } for (i = 0; i < n_rows; i++) { ovsdb_row_destroy(rows[i]); free(names[i]); } free(rows); free(names); ovsdb_column_set_destroy(&all_columns); ovsdb_table_destroy(table); /* Also destroys 'ts'. */ } static void do_parse_conditions(int argc, char *argv[]) { struct ovsdb_table_schema *ts; struct json *json; int exit_code = 0; int i; json = unbox_json(parse_json(argv[1])); check_ovsdb_error(ovsdb_table_schema_from_json(json, "mytable", &ts)); json_destroy(json); for (i = 2; i < argc; i++) { struct ovsdb_condition cnd; struct ovsdb_error *error; json = parse_json(argv[i]); error = ovsdb_condition_from_json(ts, json, NULL, &cnd); if (!error) { print_and_free_json(ovsdb_condition_to_json(&cnd)); } else { char *s = ovsdb_error_to_string(error); ovs_error(0, "%s", s); free(s); ovsdb_error_destroy(error); exit_code = 1; } json_destroy(json); ovsdb_condition_destroy(&cnd); } ovsdb_table_schema_destroy(ts); exit(exit_code); } static void do_evaluate_conditions(int argc OVS_UNUSED, char *argv[]) { struct ovsdb_table_schema *ts; struct ovsdb_table *table; struct ovsdb_condition *conditions; size_t n_conditions; struct ovsdb_row **rows; size_t n_rows; struct json *json; size_t i, j; /* Parse table schema, create table. */ json = unbox_json(parse_json(argv[1])); check_ovsdb_error(ovsdb_table_schema_from_json(json, "mytable", &ts)); json_destroy(json); table = ovsdb_table_create(ts); /* Parse conditions. */ json = parse_json(argv[2]); if (json->type != JSON_ARRAY) { ovs_fatal(0, "CONDITION argument is not JSON array"); } n_conditions = json->u.array.n; conditions = xmalloc(n_conditions * sizeof *conditions); for (i = 0; i < n_conditions; i++) { check_ovsdb_error(ovsdb_condition_from_json(ts, json->u.array.elems[i], NULL, &conditions[i])); } json_destroy(json); /* Parse rows. */ json = parse_json(argv[3]); if (json->type != JSON_ARRAY) { ovs_fatal(0, "ROW argument is not JSON array"); } n_rows = json->u.array.n; rows = xmalloc(n_rows * sizeof *rows); for (i = 0; i < n_rows; i++) { rows[i] = ovsdb_row_create(table); check_ovsdb_error(ovsdb_row_from_json(rows[i], json->u.array.elems[i], NULL, NULL)); } json_destroy(json); for (i = 0; i < n_conditions; i++) { printf("condition %2zu:", i); for (j = 0; j < n_rows; j++) { bool result = ovsdb_condition_evaluate(rows[j], &conditions[i]); if (j % 5 == 0) { putchar(' '); } putchar(result ? 'T' : '-'); } printf("\n"); } for (i = 0; i < n_conditions; i++) { ovsdb_condition_destroy(&conditions[i]); } free(conditions); for (i = 0; i < n_rows; i++) { ovsdb_row_destroy(rows[i]); } free(rows); ovsdb_table_destroy(table); /* Also destroys 'ts'. */ } static void do_parse_mutations(int argc, char *argv[]) { struct ovsdb_table_schema *ts; struct json *json; int exit_code = 0; int i; json = unbox_json(parse_json(argv[1])); check_ovsdb_error(ovsdb_table_schema_from_json(json, "mytable", &ts)); json_destroy(json); for (i = 2; i < argc; i++) { struct ovsdb_mutation_set set; struct ovsdb_error *error; json = parse_json(argv[i]); error = ovsdb_mutation_set_from_json(ts, json, NULL, &set); if (!error) { print_and_free_json(ovsdb_mutation_set_to_json(&set)); } else { char *s = ovsdb_error_to_string(error); ovs_error(0, "%s", s); free(s); ovsdb_error_destroy(error); exit_code = 1; } json_destroy(json); ovsdb_mutation_set_destroy(&set); } ovsdb_table_schema_destroy(ts); exit(exit_code); } static void do_execute_mutations(int argc OVS_UNUSED, char *argv[]) { struct ovsdb_table_schema *ts; struct ovsdb_table *table; struct ovsdb_mutation_set *sets; size_t n_sets; struct ovsdb_row **rows; size_t n_rows; struct json *json; size_t i, j; /* Parse table schema, create table. */ json = unbox_json(parse_json(argv[1])); check_ovsdb_error(ovsdb_table_schema_from_json(json, "mytable", &ts)); json_destroy(json); table = ovsdb_table_create(ts); /* Parse mutations. */ json = parse_json(argv[2]); if (json->type != JSON_ARRAY) { ovs_fatal(0, "MUTATION argument is not JSON array"); } n_sets = json->u.array.n; sets = xmalloc(n_sets * sizeof *sets); for (i = 0; i < n_sets; i++) { check_ovsdb_error(ovsdb_mutation_set_from_json(ts, json->u.array.elems[i], NULL, &sets[i])); } json_destroy(json); /* Parse rows. */ json = parse_json(argv[3]); if (json->type != JSON_ARRAY) { ovs_fatal(0, "ROW argument is not JSON array"); } n_rows = json->u.array.n; rows = xmalloc(n_rows * sizeof *rows); for (i = 0; i < n_rows; i++) { rows[i] = ovsdb_row_create(table); check_ovsdb_error(ovsdb_row_from_json(rows[i], json->u.array.elems[i], NULL, NULL)); } json_destroy(json); for (i = 0; i < n_sets; i++) { printf("mutation %2zu:\n", i); for (j = 0; j < n_rows; j++) { struct ovsdb_error *error; struct ovsdb_row *row; row = ovsdb_row_clone(rows[j]); error = ovsdb_mutation_set_execute(row, &sets[i]); printf("row %zu: ", j); if (error) { print_and_free_ovsdb_error(error); } else { struct ovsdb_column_set columns; struct shash_node *node; ovsdb_column_set_init(&columns); SHASH_FOR_EACH (node, &ts->columns) { struct ovsdb_column *c = node->data; if (!ovsdb_datum_equals(&row->fields[c->index], &rows[j]->fields[c->index], &c->type)) { ovsdb_column_set_add(&columns, c); } } if (columns.n_columns) { print_and_free_json(ovsdb_row_to_json(row, &columns)); } else { printf("no change\n"); } ovsdb_column_set_destroy(&columns); } ovsdb_row_destroy(row); } printf("\n"); } for (i = 0; i < n_sets; i++) { ovsdb_mutation_set_destroy(&sets[i]); } free(sets); for (i = 0; i < n_rows; i++) { ovsdb_row_destroy(rows[i]); } free(rows); ovsdb_table_destroy(table); /* Also destroys 'ts'. */ } /* Inserts a row, without bothering to update metadata such as refcounts. */ static void put_row(struct ovsdb_table *table, struct ovsdb_row *row) { const struct uuid *uuid = ovsdb_row_get_uuid(row); if (!ovsdb_table_get_row(table, uuid)) { hmap_insert(&table->rows, &row->hmap_node, uuid_hash(uuid)); } } struct do_query_cbdata { struct uuid *row_uuids; int *counts; size_t n_rows; }; static bool do_query_cb(const struct ovsdb_row *row, void *cbdata_) { struct do_query_cbdata *cbdata = cbdata_; size_t i; for (i = 0; i < cbdata->n_rows; i++) { if (uuid_equals(ovsdb_row_get_uuid(row), &cbdata->row_uuids[i])) { cbdata->counts[i]++; } } return true; } static void do_query(int argc OVS_UNUSED, char *argv[]) { struct do_query_cbdata cbdata; struct ovsdb_table_schema *ts; struct ovsdb_table *table; struct json *json; int exit_code = 0; size_t i; /* Parse table schema, create table. */ json = unbox_json(parse_json(argv[1])); check_ovsdb_error(ovsdb_table_schema_from_json(json, "mytable", &ts)); json_destroy(json); table = ovsdb_table_create(ts); /* Parse rows, add to table. */ json = parse_json(argv[2]); if (json->type != JSON_ARRAY) { ovs_fatal(0, "ROW argument is not JSON array"); } cbdata.n_rows = json->u.array.n; cbdata.row_uuids = xmalloc(cbdata.n_rows * sizeof *cbdata.row_uuids); cbdata.counts = xmalloc(cbdata.n_rows * sizeof *cbdata.counts); for (i = 0; i < cbdata.n_rows; i++) { struct ovsdb_row *row = ovsdb_row_create(table); uuid_generate(ovsdb_row_get_uuid_rw(row)); check_ovsdb_error(ovsdb_row_from_json(row, json->u.array.elems[i], NULL, NULL)); if (ovsdb_table_get_row(table, ovsdb_row_get_uuid(row))) { ovs_fatal(0, "duplicate UUID "UUID_FMT" in table", UUID_ARGS(ovsdb_row_get_uuid(row))); } cbdata.row_uuids[i] = *ovsdb_row_get_uuid(row); put_row(table, row); } json_destroy(json); /* Parse conditions and execute queries. */ json = parse_json(argv[3]); if (json->type != JSON_ARRAY) { ovs_fatal(0, "CONDITION argument is not JSON array"); } for (i = 0; i < json->u.array.n; i++) { struct ovsdb_condition cnd; size_t j; check_ovsdb_error(ovsdb_condition_from_json(ts, json->u.array.elems[i], NULL, &cnd)); memset(cbdata.counts, 0, cbdata.n_rows * sizeof *cbdata.counts); ovsdb_query(table, &cnd, do_query_cb, &cbdata); printf("query %2zu:", i); for (j = 0; j < cbdata.n_rows; j++) { if (j % 5 == 0) { putchar(' '); } if (cbdata.counts[j]) { printf("%d", cbdata.counts[j]); if (cbdata.counts[j] > 1) { /* Dup! */ exit_code = 1; } } else { putchar('-'); } } putchar('\n'); ovsdb_condition_destroy(&cnd); } json_destroy(json); ovsdb_table_destroy(table); /* Also destroys 'ts'. */ exit(exit_code); } struct do_query_distinct_class { struct ovsdb_row *example; int count; }; struct do_query_distinct_row { struct uuid uuid; struct do_query_distinct_class *class; }; static void do_query_distinct(int argc OVS_UNUSED, char *argv[]) { struct ovsdb_column_set columns; struct ovsdb_table_schema *ts; struct ovsdb_table *table; struct do_query_distinct_row *rows; size_t n_rows; struct do_query_distinct_class *classes; size_t n_classes; struct json *json; int exit_code = 0; size_t i; /* Parse table schema, create table. */ json = unbox_json(parse_json(argv[1])); check_ovsdb_error(ovsdb_table_schema_from_json(json, "mytable", &ts)); json_destroy(json); table = ovsdb_table_create(ts); /* Parse column set. */ json = parse_json(argv[4]); check_ovsdb_error(ovsdb_column_set_from_json(json, table->schema, &columns)); json_destroy(json); /* Parse rows, add to table. */ json = parse_json(argv[2]); if (json->type != JSON_ARRAY) { ovs_fatal(0, "ROW argument is not JSON array"); } n_rows = json->u.array.n; rows = xmalloc(n_rows * sizeof *rows); classes = xmalloc(n_rows * sizeof *classes); n_classes = 0; for (i = 0; i < n_rows; i++) { struct ovsdb_row *row; size_t j; /* Parse row. */ row = ovsdb_row_create(table); uuid_generate(ovsdb_row_get_uuid_rw(row)); check_ovsdb_error(ovsdb_row_from_json(row, json->u.array.elems[i], NULL, NULL)); /* Initialize row and find equivalence class. */ rows[i].uuid = *ovsdb_row_get_uuid(row); rows[i].class = NULL; for (j = 0; j < n_classes; j++) { if (ovsdb_row_equal_columns(row, classes[j].example, &columns)) { rows[i].class = &classes[j]; break; } } if (!rows[i].class) { rows[i].class = &classes[n_classes]; classes[n_classes].example = ovsdb_row_clone(row); n_classes++; } /* Add row to table. */ if (ovsdb_table_get_row(table, ovsdb_row_get_uuid(row))) { ovs_fatal(0, "duplicate UUID "UUID_FMT" in table", UUID_ARGS(ovsdb_row_get_uuid(row))); } put_row(table, row); } json_destroy(json); /* Parse conditions and execute queries. */ json = parse_json(argv[3]); if (json->type != JSON_ARRAY) { ovs_fatal(0, "CONDITION argument is not JSON array"); } for (i = 0; i < json->u.array.n; i++) { struct ovsdb_row_set results; struct ovsdb_condition cnd; size_t j; check_ovsdb_error(ovsdb_condition_from_json(ts, json->u.array.elems[i], NULL, &cnd)); for (j = 0; j < n_classes; j++) { classes[j].count = 0; } ovsdb_row_set_init(&results); ovsdb_query_distinct(table, &cnd, &columns, &results); for (j = 0; j < results.n_rows; j++) { size_t k; for (k = 0; k < n_rows; k++) { if (uuid_equals(ovsdb_row_get_uuid(results.rows[j]), &rows[k].uuid)) { rows[k].class->count++; } } } ovsdb_row_set_destroy(&results); printf("query %2zu:", i); for (j = 0; j < n_rows; j++) { int count = rows[j].class->count; if (j % 5 == 0) { putchar(' '); } if (count > 1) { /* Dup! */ printf("%d", count); exit_code = 1; } else if (count == 1) { putchar("abcdefghijklmnopqrstuvwxyz"[rows[j].class - classes]); } else { putchar('-'); } } putchar('\n'); ovsdb_condition_destroy(&cnd); } json_destroy(json); ovsdb_table_destroy(table); /* Also destroys 'ts'. */ exit(exit_code); } static void do_parse_schema(int argc OVS_UNUSED, char *argv[]) { struct ovsdb_schema *schema; struct json *json; json = parse_json(argv[1]); check_ovsdb_error(ovsdb_schema_from_json(json, &schema)); json_destroy(json); print_and_free_json(ovsdb_schema_to_json(schema)); ovsdb_schema_destroy(schema); } static void do_execute(int argc OVS_UNUSED, char *argv[]) { struct ovsdb_schema *schema; struct json *json; struct ovsdb *db; int i; /* Create database. */ json = parse_json(argv[1]); check_ovsdb_error(ovsdb_schema_from_json(json, &schema)); json_destroy(json); db = ovsdb_create(schema); for (i = 2; i < argc; i++) { struct json *params, *result; char *s; params = parse_json(argv[i]); result = ovsdb_execute(db, NULL, params, 0, NULL); s = json_to_string(result, JSSF_SORT); printf("%s\n", s); free(s); json_destroy(params); json_destroy(result); } ovsdb_destroy(db); } struct test_trigger { struct ovsdb_trigger trigger; int number; }; static void do_trigger_dump(struct test_trigger *t, long long int now, const char *title) { struct json *result; char *s; result = ovsdb_trigger_steal_result(&t->trigger); s = json_to_string(result, JSSF_SORT); printf("t=%lld: trigger %d (%s): %s\n", now, t->number, title, s); free(s); json_destroy(result); ovsdb_trigger_destroy(&t->trigger); free(t); } static void do_trigger(int argc OVS_UNUSED, char *argv[]) { struct ovsdb_schema *schema; struct ovsdb_session session; struct ovsdb_server server; struct json *json; struct ovsdb *db; long long int now; int number; int i; /* Create database. */ json = parse_json(argv[1]); check_ovsdb_error(ovsdb_schema_from_json(json, &schema)); json_destroy(json); db = ovsdb_create(schema); ovsdb_server_init(&server); ovsdb_server_add_db(&server, db); ovsdb_session_init(&session, &server); now = 0; number = 0; for (i = 2; i < argc; i++) { struct json *params = parse_json(argv[i]); if (params->type == JSON_ARRAY && json_array(params)->n == 2 && json_array(params)->elems[0]->type == JSON_STRING && !strcmp(json_string(json_array(params)->elems[0]), "advance") && json_array(params)->elems[1]->type == JSON_INTEGER) { now += json_integer(json_array(params)->elems[1]); json_destroy(params); } else { struct test_trigger *t = xmalloc(sizeof *t); ovsdb_trigger_init(&session, db, &t->trigger, params, now); t->number = number++; if (ovsdb_trigger_is_complete(&t->trigger)) { do_trigger_dump(t, now, "immediate"); } else { printf("t=%lld: new trigger %d\n", now, t->number); } } ovsdb_trigger_run(db, now); while (!list_is_empty(&session.completions)) { do_trigger_dump(CONTAINER_OF(list_pop_front(&session.completions), struct test_trigger, trigger.node), now, "delayed"); } ovsdb_trigger_wait(db, now); poll_immediate_wake(); poll_block(); } ovsdb_server_destroy(&server); ovsdb_destroy(db); } static void do_help(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { usage(); } /* "transact" command. */ static struct ovsdb *do_transact_db; static struct ovsdb_txn *do_transact_txn; static struct ovsdb_table *do_transact_table; static void do_transact_commit(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { ovsdb_error_destroy(ovsdb_txn_commit(do_transact_txn, false)); do_transact_txn = NULL; } static void do_transact_abort(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { ovsdb_txn_abort(do_transact_txn); do_transact_txn = NULL; } static void uuid_from_integer(int integer, struct uuid *uuid) { uuid_zero(uuid); uuid->parts[3] = integer; } static const struct ovsdb_row * do_transact_find_row(const char *uuid_string) { const struct ovsdb_row *row; struct uuid uuid; uuid_from_integer(atoi(uuid_string), &uuid); row = ovsdb_table_get_row(do_transact_table, &uuid); if (!row) { ovs_fatal(0, "table does not contain row with UUID "UUID_FMT, UUID_ARGS(&uuid)); } return row; } static void do_transact_set_integer(struct ovsdb_row *row, const char *column_name, int integer) { if (integer != -1) { const struct ovsdb_column *column; column = ovsdb_table_schema_get_column(do_transact_table->schema, column_name); row->fields[column->index].keys[0].integer = integer; } } static int do_transact_get_integer(const struct ovsdb_row *row, const char *column_name) { const struct ovsdb_column *column; column = ovsdb_table_schema_get_column(do_transact_table->schema, column_name); return row->fields[column->index].keys[0].integer; } static void do_transact_set_i_j(struct ovsdb_row *row, const char *i_string, const char *j_string) { do_transact_set_integer(row, "i", atoi(i_string)); do_transact_set_integer(row, "j", atoi(j_string)); } static void do_transact_insert(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { struct ovsdb_row *row; struct uuid *uuid; row = ovsdb_row_create(do_transact_table); /* Set UUID. */ uuid = ovsdb_row_get_uuid_rw(row); uuid_from_integer(atoi(argv[1]), uuid); if (ovsdb_table_get_row(do_transact_table, uuid)) { ovs_fatal(0, "table already contains row with UUID "UUID_FMT, UUID_ARGS(uuid)); } do_transact_set_i_j(row, argv[2], argv[3]); /* Insert row. */ ovsdb_txn_row_insert(do_transact_txn, row); } static void do_transact_delete(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { const struct ovsdb_row *row = do_transact_find_row(argv[1]); ovsdb_txn_row_delete(do_transact_txn, row); } static void do_transact_modify(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { const struct ovsdb_row *row_ro; struct ovsdb_row *row_rw; row_ro = do_transact_find_row(argv[1]); row_rw = ovsdb_txn_row_modify(do_transact_txn, row_ro); do_transact_set_i_j(row_rw, argv[2], argv[3]); } static int compare_rows_by_uuid(const void *a_, const void *b_) { struct ovsdb_row *const *ap = a_; struct ovsdb_row *const *bp = b_; return uuid_compare_3way(ovsdb_row_get_uuid(*ap), ovsdb_row_get_uuid(*bp)); } static void do_transact_print(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { const struct ovsdb_row **rows; const struct ovsdb_row *row; size_t n_rows; size_t i; n_rows = hmap_count(&do_transact_table->rows); rows = xmalloc(n_rows * sizeof *rows); i = 0; HMAP_FOR_EACH (row, hmap_node, &do_transact_table->rows) { rows[i++] = row; } assert(i == n_rows); qsort(rows, n_rows, sizeof *rows, compare_rows_by_uuid); for (i = 0; i < n_rows; i++) { printf("\n%"PRId32": i=%d, j=%d", ovsdb_row_get_uuid(rows[i])->parts[3], do_transact_get_integer(rows[i], "i"), do_transact_get_integer(rows[i], "j")); } free(rows); } static void do_transact(int argc, char *argv[]) { static const struct command do_transact_commands[] = { { "commit", 0, 0, do_transact_commit }, { "abort", 0, 0, do_transact_abort }, { "insert", 2, 3, do_transact_insert }, { "delete", 1, 1, do_transact_delete }, { "modify", 2, 3, do_transact_modify }, { "print", 0, 0, do_transact_print }, { NULL, 0, 0, NULL }, }; struct ovsdb_schema *schema; struct json *json; int i; /* Create table. */ json = parse_json("{\"name\": \"testdb\", " " \"tables\": " " {\"mytable\": " " {\"columns\": " " {\"i\": {\"type\": \"integer\"}, " " \"j\": {\"type\": \"integer\"}}}}}"); check_ovsdb_error(ovsdb_schema_from_json(json, &schema)); json_destroy(json); do_transact_db = ovsdb_create(schema); do_transact_table = ovsdb_get_table(do_transact_db, "mytable"); assert(do_transact_table != NULL); for (i = 1; i < argc; i++) { struct json *command; size_t n_args; char **args; int j; command = parse_json(argv[i]); if (command->type != JSON_ARRAY) { ovs_fatal(0, "transaction %d must be JSON array " "with at least 1 element", i); } n_args = command->u.array.n; args = xmalloc((n_args + 1) * sizeof *args); for (j = 0; j < n_args; j++) { struct json *s = command->u.array.elems[j]; if (s->type != JSON_STRING) { ovs_fatal(0, "transaction %d argument %d must be JSON string", i, j); } args[j] = xstrdup(json_string(s)); } args[n_args] = NULL; if (!do_transact_txn) { do_transact_txn = ovsdb_txn_create(do_transact_db); } for (j = 0; j < n_args; j++) { if (j) { putchar(' '); } fputs(args[j], stdout); } fputs(":", stdout); run_command(n_args, args, do_transact_commands); putchar('\n'); for (j = 0; j < n_args; j++) { free(args[j]); } free(args); json_destroy(command); } ovsdb_txn_abort(do_transact_txn); ovsdb_destroy(do_transact_db); /* Also destroys 'schema'. */ } static int compare_link1(const void *a_, const void *b_) { const struct idltest_link1 *const *ap = a_; const struct idltest_link1 *const *bp = b_; const struct idltest_link1 *a = *ap; const struct idltest_link1 *b = *bp; return a->i < b->i ? -1 : a->i > b->i; } static void print_idl(struct ovsdb_idl *idl, int step) { const struct idltest_simple *s; const struct idltest_link1 *l1; const struct idltest_link2 *l2; int n = 0; IDLTEST_SIMPLE_FOR_EACH (s, idl) { size_t i; printf("%03d: i=%"PRId64" r=%g b=%s s=%s u="UUID_FMT" ia=[", step, s->i, s->r, s->b ? "true" : "false", s->s, UUID_ARGS(&s->u)); for (i = 0; i < s->n_ia; i++) { printf("%s%"PRId64, i ? " " : "", s->ia[i]); } printf("] ra=["); for (i = 0; i < s->n_ra; i++) { printf("%s%g", i ? " " : "", s->ra[i]); } printf("] ba=["); for (i = 0; i < s->n_ba; i++) { printf("%s%s", i ? " " : "", s->ba[i] ? "true" : "false"); } printf("] sa=["); for (i = 0; i < s->n_sa; i++) { printf("%s%s", i ? " " : "", s->sa[i]); } printf("] ua=["); for (i = 0; i < s->n_ua; i++) { printf("%s"UUID_FMT, i ? " " : "", UUID_ARGS(&s->ua[i])); } printf("] uuid="UUID_FMT"\n", UUID_ARGS(&s->header_.uuid)); n++; } IDLTEST_LINK1_FOR_EACH (l1, idl) { struct idltest_link1 **links; size_t i; printf("%03d: i=%"PRId64" k=", step, l1->i); if (l1->k) { printf("%"PRId64, l1->k->i); } printf(" ka=["); links = xmemdup(l1->ka, l1->n_ka * sizeof *l1->ka); qsort(links, l1->n_ka, sizeof *links, compare_link1); for (i = 0; i < l1->n_ka; i++) { printf("%s%"PRId64, i ? " " : "", links[i]->i); } free(links); printf("] l2="); if (l1->l2) { printf("%"PRId64, l1->l2->i); } printf(" uuid="UUID_FMT"\n", UUID_ARGS(&l1->header_.uuid)); n++; } IDLTEST_LINK2_FOR_EACH (l2, idl) { printf("%03d: i=%"PRId64" l1=", step, l2->i); if (l2->l1) { printf("%"PRId64, l2->l1->i); } printf(" uuid="UUID_FMT"\n", UUID_ARGS(&l2->header_.uuid)); n++; } if (!n) { printf("%03d: empty\n", step); } } static void parse_uuids(const struct json *json, struct ovsdb_symbol_table *symtab, size_t *n) { struct uuid uuid; if (json->type == JSON_STRING && uuid_from_string(&uuid, json->u.string)) { char *name = xasprintf("#%zu#", *n); fprintf(stderr, "%s = "UUID_FMT"\n", name, UUID_ARGS(&uuid)); ovsdb_symbol_table_put(symtab, name, &uuid, false); free(name); *n += 1; } else if (json->type == JSON_ARRAY) { size_t i; for (i = 0; i < json->u.array.n; i++) { parse_uuids(json->u.array.elems[i], symtab, n); } } else if (json->type == JSON_OBJECT) { const struct shash_node *node; SHASH_FOR_EACH (node, json_object(json)) { parse_uuids(node->data, symtab, n); } } } static void substitute_uuids(struct json *json, const struct ovsdb_symbol_table *symtab) { if (json->type == JSON_STRING) { const struct ovsdb_symbol *symbol; symbol = ovsdb_symbol_table_get(symtab, json->u.string); if (symbol) { free(json->u.string); json->u.string = xasprintf(UUID_FMT, UUID_ARGS(&symbol->uuid)); } } else if (json->type == JSON_ARRAY) { size_t i; for (i = 0; i < json->u.array.n; i++) { substitute_uuids(json->u.array.elems[i], symtab); } } else if (json->type == JSON_OBJECT) { const struct shash_node *node; SHASH_FOR_EACH (node, json_object(json)) { substitute_uuids(node->data, symtab); } } } static const struct idltest_simple * idltest_find_simple(struct ovsdb_idl *idl, int i) { const struct idltest_simple *s; IDLTEST_SIMPLE_FOR_EACH (s, idl) { if (s->i == i) { return s; } } return NULL; } static void idl_set(struct ovsdb_idl *idl, char *commands, int step) { char *cmd, *save_ptr1 = NULL; struct ovsdb_idl_txn *txn; enum ovsdb_idl_txn_status status; bool increment = false; txn = ovsdb_idl_txn_create(idl); for (cmd = strtok_r(commands, ",", &save_ptr1); cmd; cmd = strtok_r(NULL, ",", &save_ptr1)) { char *save_ptr2 = NULL; char *name, *arg1, *arg2, *arg3; name = strtok_r(cmd, " ", &save_ptr2); arg1 = strtok_r(NULL, " ", &save_ptr2); arg2 = strtok_r(NULL, " ", &save_ptr2); arg3 = strtok_r(NULL, " ", &save_ptr2); if (!strcmp(name, "set")) { const struct idltest_simple *s; if (!arg3) { ovs_fatal(0, "\"set\" command requires 3 arguments"); } s = idltest_find_simple(idl, atoi(arg1)); if (!s) { ovs_fatal(0, "\"set\" command asks for nonexistent " "i=%d", atoi(arg1)); } if (!strcmp(arg2, "b")) { idltest_simple_set_b(s, atoi(arg3)); } else if (!strcmp(arg2, "s")) { idltest_simple_set_s(s, arg3); } else if (!strcmp(arg2, "u")) { struct uuid uuid; if (!uuid_from_string(&uuid, arg3)) { ovs_fatal(0, "\"%s\" is not a valid UUID", arg3); } idltest_simple_set_u(s, uuid); } else if (!strcmp(arg2, "r")) { idltest_simple_set_r(s, atof(arg3)); } else { ovs_fatal(0, "\"set\" command asks for unknown column %s", arg2); } } else if (!strcmp(name, "insert")) { struct idltest_simple *s; if (!arg1 || arg2) { ovs_fatal(0, "\"insert\" command requires 1 argument"); } s = idltest_simple_insert(txn); idltest_simple_set_i(s, atoi(arg1)); } else if (!strcmp(name, "delete")) { const struct idltest_simple *s; if (!arg1 || arg2) { ovs_fatal(0, "\"delete\" command requires 1 argument"); } s = idltest_find_simple(idl, atoi(arg1)); if (!s) { ovs_fatal(0, "\"delete\" command asks for nonexistent " "i=%d", atoi(arg1)); } idltest_simple_delete(s); } else if (!strcmp(name, "verify")) { const struct idltest_simple *s; if (!arg2 || arg3) { ovs_fatal(0, "\"verify\" command requires 2 arguments"); } s = idltest_find_simple(idl, atoi(arg1)); if (!s) { ovs_fatal(0, "\"verify\" command asks for nonexistent " "i=%d", atoi(arg1)); } if (!strcmp(arg2, "i")) { idltest_simple_verify_i(s); } else if (!strcmp(arg2, "b")) { idltest_simple_verify_b(s); } else if (!strcmp(arg2, "s")) { idltest_simple_verify_s(s); } else if (!strcmp(arg2, "u")) { idltest_simple_verify_s(s); } else if (!strcmp(arg2, "r")) { idltest_simple_verify_r(s); } else { ovs_fatal(0, "\"verify\" command asks for unknown column %s", arg2); } } else if (!strcmp(name, "increment")) { const struct idltest_simple *s; if (!arg1 || arg2) { ovs_fatal(0, "\"increment\" command requires 1 argument"); } s = idltest_find_simple(idl, atoi(arg1)); if (!s) { ovs_fatal(0, "\"set\" command asks for nonexistent " "i=%d", atoi(arg1)); } ovsdb_idl_txn_increment(txn, &s->header_, &idltest_simple_col_i); increment = true; } else if (!strcmp(name, "abort")) { ovsdb_idl_txn_abort(txn); break; } else if (!strcmp(name, "destroy")) { printf("%03d: destroy\n", step); ovsdb_idl_txn_destroy(txn); return; } else { ovs_fatal(0, "unknown command %s", name); } } status = ovsdb_idl_txn_commit_block(txn); printf("%03d: commit, status=%s", step, ovsdb_idl_txn_status_to_string(status)); if (increment) { printf(", increment=%"PRId64, ovsdb_idl_txn_get_increment_new_value(txn)); } putchar('\n'); ovsdb_idl_txn_destroy(txn); } static void do_idl(int argc, char *argv[]) { struct jsonrpc *rpc; struct ovsdb_idl *idl; unsigned int seqno = 0; struct ovsdb_symbol_table *symtab; size_t n_uuids = 0; int step = 0; int error; int i; idltest_init(); idl = ovsdb_idl_create(argv[1], &idltest_idl_class, true, true); if (argc > 2) { struct stream *stream; error = stream_open_block(jsonrpc_stream_open(argv[1], &stream, DSCP_DEFAULT), &stream); if (error) { ovs_fatal(error, "failed to connect to \"%s\"", argv[1]); } rpc = jsonrpc_open(stream); } else { rpc = NULL; } setvbuf(stdout, NULL, _IOLBF, 0); symtab = ovsdb_symbol_table_create(); for (i = 2; i < argc; i++) { char *arg = argv[i]; struct jsonrpc_msg *request, *reply; if (*arg == '+') { /* The previous transaction didn't change anything. */ arg++; } else { /* Wait for update. */ for (;;) { ovsdb_idl_run(idl); if (ovsdb_idl_get_seqno(idl) != seqno) { break; } jsonrpc_run(rpc); ovsdb_idl_wait(idl); jsonrpc_wait(rpc); poll_block(); } /* Print update. */ print_idl(idl, step++); } seqno = ovsdb_idl_get_seqno(idl); if (!strcmp(arg, "reconnect")) { printf("%03d: reconnect\n", step++); ovsdb_idl_force_reconnect(idl); } else if (arg[0] != '[') { idl_set(idl, arg, step++); } else { struct json *json = parse_json(arg); substitute_uuids(json, symtab); request = jsonrpc_create_request("transact", json, NULL); error = jsonrpc_transact_block(rpc, request, &reply); if (error || reply->error) { ovs_fatal(error, "jsonrpc transaction failed"); } printf("%03d: ", step++); if (reply->result) { parse_uuids(reply->result, symtab, &n_uuids); } json_destroy(reply->id); reply->id = NULL; print_and_free_json(jsonrpc_msg_to_json(reply)); } } ovsdb_symbol_table_destroy(symtab); if (rpc) { jsonrpc_close(rpc); } for (;;) { ovsdb_idl_run(idl); if (ovsdb_idl_get_seqno(idl) != seqno) { break; } ovsdb_idl_wait(idl); poll_block(); } print_idl(idl, step++); ovsdb_idl_destroy(idl); printf("%03d: done\n", step); } static struct command all_commands[] = { { "log-io", 2, INT_MAX, do_log_io }, { "default-atoms", 0, 0, do_default_atoms }, { "default-data", 0, 0, do_default_data }, { "parse-atomic-type", 1, 1, do_parse_atomic_type }, { "parse-base-type", 1, 1, do_parse_base_type }, { "parse-type", 1, 1, do_parse_type }, { "parse-atoms", 2, INT_MAX, do_parse_atoms }, { "parse-atom-strings", 2, INT_MAX, do_parse_atom_strings }, { "parse-data", 2, INT_MAX, do_parse_data }, { "parse-data-strings", 2, INT_MAX, do_parse_data_strings }, { "sort-atoms", 2, 2, do_sort_atoms }, { "parse-column", 2, 2, do_parse_column }, { "parse-table", 2, 3, do_parse_table }, { "parse-rows", 2, INT_MAX, do_parse_rows }, { "compare-rows", 2, INT_MAX, do_compare_rows }, { "parse-conditions", 2, INT_MAX, do_parse_conditions }, { "evaluate-conditions", 3, 3, do_evaluate_conditions }, { "parse-mutations", 2, INT_MAX, do_parse_mutations }, { "execute-mutations", 3, 3, do_execute_mutations }, { "query", 3, 3, do_query }, { "query-distinct", 4, 4, do_query_distinct }, { "transact", 1, INT_MAX, do_transact }, { "parse-schema", 1, 1, do_parse_schema }, { "execute", 2, INT_MAX, do_execute }, { "trigger", 2, INT_MAX, do_trigger }, { "idl", 1, INT_MAX, do_idl }, { "help", 0, INT_MAX, do_help }, { NULL, 0, 0, NULL }, }; openvswitch-2.0.1+git20140120/tests/test-ovsdb.py000066400000000000000000000436461226605124000212750ustar00rootroot00000000000000# Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import getopt import re import os import signal import sys import uuid from ovs.db import error import ovs.db.idl import ovs.db.schema from ovs.db import data from ovs.db import types import ovs.ovsuuid import ovs.poller import ovs.util def unbox_json(json): if type(json) == list and len(json) == 1: return json[0] else: return json def do_default_atoms(): for type_ in types.ATOMIC_TYPES: if type_ == types.VoidType: continue sys.stdout.write("%s: " % type_.to_string()) atom = data.Atom.default(type_) if atom != data.Atom.default(type_): sys.stdout.write("wrong\n") sys.exit(1) sys.stdout.write("OK\n") def do_default_data(): any_errors = False for n_min in 0, 1: for key in types.ATOMIC_TYPES: if key == types.VoidType: continue for value in types.ATOMIC_TYPES: if value == types.VoidType: valueBase = None else: valueBase = types.BaseType(value) type_ = types.Type(types.BaseType(key), valueBase, n_min, 1) assert type_.is_valid() sys.stdout.write("key %s, value %s, n_min %d: " % (key.to_string(), value.to_string(), n_min)) datum = data.Datum.default(type_) if datum != data.Datum.default(type_): sys.stdout.write("wrong\n") any_errors = True else: sys.stdout.write("OK\n") if any_errors: sys.exit(1) def do_parse_atomic_type(type_string): type_json = unbox_json(ovs.json.from_string(type_string)) atomic_type = types.AtomicType.from_json(type_json) print ovs.json.to_string(atomic_type.to_json(), sort_keys=True) def do_parse_base_type(type_string): type_json = unbox_json(ovs.json.from_string(type_string)) base_type = types.BaseType.from_json(type_json) print ovs.json.to_string(base_type.to_json(), sort_keys=True) def do_parse_type(type_string): type_json = unbox_json(ovs.json.from_string(type_string)) type_ = types.Type.from_json(type_json) print ovs.json.to_string(type_.to_json(), sort_keys=True) def do_parse_atoms(type_string, *atom_strings): type_json = unbox_json(ovs.json.from_string(type_string)) base = types.BaseType.from_json(type_json) for atom_string in atom_strings: atom_json = unbox_json(ovs.json.from_string(atom_string)) try: atom = data.Atom.from_json(base, atom_json) print ovs.json.to_string(atom.to_json()) except error.Error, e: print unicode(e) def do_parse_data(type_string, *data_strings): type_json = unbox_json(ovs.json.from_string(type_string)) type_ = types.Type.from_json(type_json) for datum_string in data_strings: datum_json = unbox_json(ovs.json.from_string(datum_string)) datum = data.Datum.from_json(type_, datum_json) print ovs.json.to_string(datum.to_json()) def do_sort_atoms(type_string, atom_strings): type_json = unbox_json(ovs.json.from_string(type_string)) base = types.BaseType.from_json(type_json) atoms = [data.Atom.from_json(base, atom_json) for atom_json in unbox_json(ovs.json.from_string(atom_strings))] print ovs.json.to_string([data.Atom.to_json(atom) for atom in sorted(atoms)]) def do_parse_column(name, column_string): column_json = unbox_json(ovs.json.from_string(column_string)) column = ovs.db.schema.ColumnSchema.from_json(column_json, name) print ovs.json.to_string(column.to_json(), sort_keys=True) def do_parse_table(name, table_string, default_is_root_string='false'): default_is_root = default_is_root_string == 'true' table_json = unbox_json(ovs.json.from_string(table_string)) table = ovs.db.schema.TableSchema.from_json(table_json, name) print ovs.json.to_string(table.to_json(default_is_root), sort_keys=True) def do_parse_schema(schema_string): schema_json = unbox_json(ovs.json.from_string(schema_string)) schema = ovs.db.schema.DbSchema.from_json(schema_json) print ovs.json.to_string(schema.to_json(), sort_keys=True) def print_idl(idl, step): simple = idl.tables["simple"].rows l1 = idl.tables["link1"].rows l2 = idl.tables["link2"].rows n = 0 for row in simple.itervalues(): s = ("%03d: i=%s r=%s b=%s s=%s u=%s " "ia=%s ra=%s ba=%s sa=%s ua=%s uuid=%s" % (step, row.i, row.r, row.b, row.s, row.u, row.ia, row.ra, row.ba, row.sa, row.ua, row.uuid)) s = re.sub('""|,|u?\'', "", s) s = re.sub('UUID\(([^)]+)\)', r'\1', s) s = re.sub('False', 'false', s) s = re.sub('True', 'true', s) s = re.sub(r'(ba)=([^[][^ ]*) ', r'\1=[\2] ', s) print(s) n += 1 for row in l1.itervalues(): s = ["%03d: i=%s k=" % (step, row.i)] if row.k: s.append(str(row.k.i)) s.append(" ka=[") s.append(' '.join(sorted(str(ka.i) for ka in row.ka))) s.append("] l2=") if row.l2: s.append(str(row.l2[0].i)) s.append(" uuid=%s" % row.uuid) print(''.join(s)) n += 1 for row in l2.itervalues(): s = ["%03d: i=%s l1=" % (step, row.i)] if row.l1: s.append(str(row.l1[0].i)) s.append(" uuid=%s" % row.uuid) print(''.join(s)) n += 1 if not n: print("%03d: empty" % step) sys.stdout.flush() def substitute_uuids(json, symtab): if type(json) in [str, unicode]: symbol = symtab.get(json) if symbol: return str(symbol) elif type(json) == list: return [substitute_uuids(element, symtab) for element in json] elif type(json) == dict: d = {} for key, value in json.iteritems(): d[key] = substitute_uuids(value, symtab) return d return json def parse_uuids(json, symtab): if type(json) in [str, unicode] and ovs.ovsuuid.is_valid_string(json): name = "#%d#" % len(symtab) sys.stderr.write("%s = %s\n" % (name, json)) symtab[name] = json elif type(json) == list: for element in json: parse_uuids(element, symtab) elif type(json) == dict: for value in json.itervalues(): parse_uuids(value, symtab) def idltest_find_simple(idl, i): for row in idl.tables["simple"].rows.itervalues(): if row.i == i: return row return None def idl_set(idl, commands, step): txn = ovs.db.idl.Transaction(idl) increment = False for command in commands.split(','): words = command.split() name = words[0] args = words[1:] if name == "set": if len(args) != 3: sys.stderr.write('"set" command requires 3 arguments\n') sys.exit(1) s = idltest_find_simple(idl, int(args[0])) if not s: sys.stderr.write('"set" command asks for nonexistent i=%d\n' % int(args[0])) sys.exit(1) if args[1] == "b": s.b = args[2] == "1" elif args[1] == "s": s.s = args[2] elif args[1] == "u": s.u = uuid.UUID(args[2]) elif args[1] == "r": s.r = float(args[2]) else: sys.stderr.write('"set" comamnd asks for unknown column %s\n' % args[2]) sys.stderr.exit(1) elif name == "insert": if len(args) != 1: sys.stderr.write('"set" command requires 1 argument\n') sys.exit(1) s = txn.insert(idl.tables["simple"]) s.i = int(args[0]) elif name == "delete": if len(args) != 1: sys.stderr.write('"delete" command requires 1 argument\n') sys.exit(1) s = idltest_find_simple(idl, int(args[0])) if not s: sys.stderr.write('"delete" command asks for nonexistent i=%d\n' % int(args[0])) sys.exit(1) s.delete() elif name == "verify": if len(args) != 2: sys.stderr.write('"verify" command requires 2 arguments\n') sys.exit(1) s = idltest_find_simple(idl, int(args[0])) if not s: sys.stderr.write('"verify" command asks for nonexistent i=%d\n' % int(args[0])) sys.exit(1) if args[1] in ("i", "b", "s", "u", "r"): s.verify(args[1]) else: sys.stderr.write('"verify" command asks for unknown column ' '"%s"\n' % args[1]) sys.exit(1) elif name == "increment": if len(args) != 1: sys.stderr.write('"increment" command requires 1 argument\n') sys.exit(1) s = idltest_find_simple(idl, int(args[0])) if not s: sys.stderr.write('"set" command asks for nonexistent i=%d\n' % int(args[0])) sys.exit(1) s.increment("i") increment = True elif name == "abort": txn.abort() break elif name == "destroy": print "%03d: destroy" % step sys.stdout.flush() txn.abort() return elif name == "linktest": l1_0 = txn.insert(idl.tables["link1"]) l1_0.i = 1 l1_0.k = [l1_0] l1_0.ka = [l1_0] l1_1 = txn.insert(idl.tables["link1"]) l1_1.i = 2 l1_1.k = [l1_0] l1_1.ka = [l1_0, l1_1] elif name == 'getattrtest': l1 = txn.insert(idl.tables["link1"]) i = getattr(l1, 'i', 1) assert i == 1 l1.i = 2 i = getattr(l1, 'i', 1) assert i == 2 l1.k = [l1] else: sys.stderr.write("unknown command %s\n" % name) sys.exit(1) status = txn.commit_block() sys.stdout.write("%03d: commit, status=%s" % (step, ovs.db.idl.Transaction.status_to_string(status))) if increment and status == ovs.db.idl.Transaction.SUCCESS: sys.stdout.write(", increment=%d" % txn.get_increment_new_value()) sys.stdout.write("\n") sys.stdout.flush() def do_idl(schema_file, remote, *commands): schema_helper = ovs.db.idl.SchemaHelper(schema_file) schema_helper.register_all() idl = ovs.db.idl.Idl(remote, schema_helper) if commands: error, stream = ovs.stream.Stream.open_block( ovs.stream.Stream.open(remote)) if error: sys.stderr.write("failed to connect to \"%s\"" % remote) sys.exit(1) rpc = ovs.jsonrpc.Connection(stream) else: rpc = None symtab = {} seqno = 0 step = 0 for command in commands: if command.startswith("+"): # The previous transaction didn't change anything. command = command[1:] else: # Wait for update. while idl.change_seqno == seqno and not idl.run(): rpc.run() poller = ovs.poller.Poller() idl.wait(poller) rpc.wait(poller) poller.block() print_idl(idl, step) step += 1 seqno = idl.change_seqno if command == "reconnect": print("%03d: reconnect" % step) sys.stdout.flush() step += 1 idl.force_reconnect() elif not command.startswith("["): idl_set(idl, command, step) step += 1 else: json = ovs.json.from_string(command) if type(json) in [str, unicode]: sys.stderr.write("\"%s\": %s\n" % (command, json)) sys.exit(1) json = substitute_uuids(json, symtab) request = ovs.jsonrpc.Message.create_request("transact", json) error, reply = rpc.transact_block(request) if error: sys.stderr.write("jsonrpc transaction failed: %s" % os.strerror(error)) sys.exit(1) elif reply.error is not None: sys.stderr.write("jsonrpc transaction failed: %s" % reply.error) sys.exit(1) sys.stdout.write("%03d: " % step) sys.stdout.flush() step += 1 if reply.result is not None: parse_uuids(reply.result, symtab) reply.id = None sys.stdout.write("%s\n" % ovs.json.to_string(reply.to_json())) sys.stdout.flush() if rpc: rpc.close() while idl.change_seqno == seqno and not idl.run(): poller = ovs.poller.Poller() idl.wait(poller) poller.block() print_idl(idl, step) step += 1 idl.close() print("%03d: done" % step) def usage(): print """\ %(program_name)s: test utility for Open vSwitch database Python bindings usage: %(program_name)s [OPTIONS] COMMAND ARG... The following commands are supported: default-atoms test ovsdb_atom_default() default-data test ovsdb_datum_default() parse-atomic-type TYPE parse TYPE as OVSDB atomic type, and re-serialize parse-base-type TYPE parse TYPE as OVSDB base type, and re-serialize parse-type JSON parse JSON as OVSDB type, and re-serialize parse-atoms TYPE ATOM... parse JSON ATOMs as atoms of TYPE, and re-serialize parse-atom-strings TYPE ATOM... parse string ATOMs as atoms of given TYPE, and re-serialize sort-atoms TYPE ATOM... print JSON ATOMs in sorted order parse-data TYPE DATUM... parse JSON DATUMs as data of given TYPE, and re-serialize parse-column NAME OBJECT parse column NAME with info OBJECT, and re-serialize parse-table NAME OBJECT [DEFAULT-IS-ROOT] parse table NAME with info OBJECT parse-schema JSON parse JSON as an OVSDB schema, and re-serialize idl SCHEMA SERVER [TRANSACTION...] connect to SERVER (which has the specified SCHEMA) and dump the contents of the database as seen initially by the IDL implementation and after executing each TRANSACTION. (Each TRANSACTION must modify the database or this command will hang.) The following options are also available: -t, --timeout=SECS give up after SECS seconds -h, --help display this help message\ """ % {'program_name': ovs.util.PROGRAM_NAME} sys.exit(0) def main(argv): try: options, args = getopt.gnu_getopt(argv[1:], 't:h', ['timeout', 'help']) except getopt.GetoptError, geo: sys.stderr.write("%s: %s\n" % (ovs.util.PROGRAM_NAME, geo.msg)) sys.exit(1) for key, value in options: if key in ['-h', '--help']: usage() elif key in ['-t', '--timeout']: try: timeout = int(value) if timeout < 1: raise TypeError except TypeError: raise error.Error("value %s on -t or --timeout is not at " "least 1" % value) signal.alarm(timeout) else: sys.exit(0) if not args: sys.stderr.write("%s: missing command argument " "(use --help for help)\n" % ovs.util.PROGRAM_NAME) sys.exit(1) commands = {"default-atoms": (do_default_atoms, 0), "default-data": (do_default_data, 0), "parse-atomic-type": (do_parse_atomic_type, 1), "parse-base-type": (do_parse_base_type, 1), "parse-type": (do_parse_type, 1), "parse-atoms": (do_parse_atoms, (2,)), "parse-data": (do_parse_data, (2,)), "sort-atoms": (do_sort_atoms, 2), "parse-column": (do_parse_column, 2), "parse-table": (do_parse_table, (2, 3)), "parse-schema": (do_parse_schema, 1), "idl": (do_idl, (2,))} command_name = args[0] args = args[1:] if not command_name in commands: sys.stderr.write("%s: unknown command \"%s\" " "(use --help for help)\n" % (ovs.util.PROGRAM_NAME, command_name)) sys.exit(1) func, n_args = commands[command_name] if type(n_args) == tuple: if len(args) < n_args[0]: sys.stderr.write("%s: \"%s\" requires at least %d arguments but " "only %d provided\n" % (ovs.util.PROGRAM_NAME, command_name, n_args, len(args))) sys.exit(1) elif type(n_args) == int: if len(args) != n_args: sys.stderr.write("%s: \"%s\" requires %d arguments but %d " "provided\n" % (ovs.util.PROGRAM_NAME, command_name, n_args, len(args))) sys.exit(1) else: assert False func(*args) if __name__ == '__main__': try: main(sys.argv) except error.Error, e: sys.stderr.write("%s\n" % e) sys.exit(1) openvswitch-2.0.1+git20140120/tests/test-packets.c000066400000000000000000000110561226605124000213720ustar00rootroot00000000000000/* * Copyright (c) 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "packets.h" #include #include #include #undef NDEBUG #include static void test_ipv4_cidr(void) { assert(ip_is_cidr(htonl(0x00000000))); assert(ip_is_cidr(htonl(0x80000000))); assert(ip_is_cidr(htonl(0xf0000000))); assert(ip_is_cidr(htonl(0xffffffe0))); assert(ip_is_cidr(htonl(0xffffffff))); assert(!ip_is_cidr(htonl(0x00000001))); assert(!ip_is_cidr(htonl(0x40000000))); assert(!ip_is_cidr(htonl(0x0fffffff))); assert(!ip_is_cidr(htonl(0xffffffd0))); } static void test_ipv6_static_masks(void) { /* The 'exact' and 'any' addresses should be identical to * 'in6addr_exact' and 'in6addr_any' definitions, but we redefine * them here since the pre-defined ones are used in the functions * we're testing. */ struct in6_addr exact = {{{ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, \ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff }}}; struct in6_addr any = {{{ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, \ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 }}}; struct in6_addr neither = {{{ 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef, \ 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef }}}; assert(ipv6_mask_is_exact(&exact)); assert(!ipv6_mask_is_exact(&any)); assert(!ipv6_mask_is_exact(&neither)); assert(!ipv6_mask_is_any(&exact)); assert(ipv6_mask_is_any(&any)); assert(!ipv6_mask_is_any(&neither)); } static void test_ipv6_cidr(void) { struct in6_addr dest; struct in6_addr src = {{{ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, \ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 }}}; dest = ipv6_create_mask(0); assert(ipv6_mask_is_any(&dest)); assert(ipv6_count_cidr_bits(&dest) == 0); assert(ipv6_is_cidr(&dest)); dest = ipv6_create_mask(128); assert(ipv6_mask_is_exact(&dest)); assert(ipv6_count_cidr_bits(&dest) == 128); assert(ipv6_is_cidr(&dest)); dest = ipv6_create_mask(1); assert(ipv6_count_cidr_bits(&dest) == 1); assert(ipv6_is_cidr(&dest)); dest = ipv6_create_mask(13); assert(ipv6_count_cidr_bits(&dest) == 13); assert(ipv6_is_cidr(&dest)); dest = ipv6_create_mask(64); assert(ipv6_count_cidr_bits(&dest) == 64); assert(ipv6_is_cidr(&dest)); dest = ipv6_create_mask(95); assert(ipv6_count_cidr_bits(&dest) == 95); assert(ipv6_is_cidr(&dest)); dest = ipv6_create_mask(96); assert(ipv6_count_cidr_bits(&dest) == 96); assert(ipv6_is_cidr(&dest)); dest = ipv6_create_mask(97); assert(ipv6_count_cidr_bits(&dest) == 97); assert(ipv6_is_cidr(&dest)); dest = ipv6_create_mask(127); assert(ipv6_count_cidr_bits(&dest) == 127); assert(ipv6_is_cidr(&dest)); src.s6_addr[8] = 0xf0; assert(ipv6_is_cidr(&src)); assert(ipv6_count_cidr_bits(&src) == 68); src.s6_addr[15] = 0x01; assert(!ipv6_is_cidr(&src)); src.s6_addr[15] = 0x00; assert(ipv6_is_cidr(&src)); src.s6_addr[8] = 0x0f; assert(!ipv6_is_cidr(&src)); } static void test_ipv6_masking(void) { struct in6_addr dest; struct in6_addr mask; mask = ipv6_create_mask(0); dest = ipv6_addr_bitand(&in6addr_exact, &mask); assert(ipv6_count_cidr_bits(&dest) == 0); mask = ipv6_create_mask(1); dest = ipv6_addr_bitand(&in6addr_exact, &mask); assert(ipv6_count_cidr_bits(&dest) == 1); mask = ipv6_create_mask(13); dest = ipv6_addr_bitand(&in6addr_exact, &mask); assert(ipv6_count_cidr_bits(&dest) == 13); mask = ipv6_create_mask(127); dest = ipv6_addr_bitand(&in6addr_exact, &mask); assert(ipv6_count_cidr_bits(&dest) == 127); mask = ipv6_create_mask(128); dest = ipv6_addr_bitand(&in6addr_exact, &mask); assert(ipv6_count_cidr_bits(&dest) == 128); } int main(void) { test_ipv4_cidr(); test_ipv6_static_masks(); test_ipv6_cidr(); test_ipv6_masking(); return 0; } openvswitch-2.0.1+git20140120/tests/test-random.c000066400000000000000000000035551226605124000212250ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "random.h" #include #include int main(void) { enum { N_ROUNDS = 10000 }; unsigned long long int total; int hist16[8][16]; int hist2[32]; int i; random_set_seed(1); total = 0; memset(hist2, 0, sizeof hist2); memset(hist16, 0, sizeof hist16); for (i = 0; i < N_ROUNDS; i++) { uint32_t x; int j; x = random_uint32(); total += x; for (j = 0; j < 32; j++) { if (x & (1u << j)) { hist2[j]++; } } for (j = 0; j < 8; j++) { hist16[j][(x >> (j * 4)) & 15]++; } } printf("average=%08llx\n", total / N_ROUNDS); printf("\nbit 0 1\n"); for (i = 0; i < 32; i++) { printf("%3d %5d %5d\n", i, N_ROUNDS - hist2[i], hist2[i]); } printf("(expected values are %d)\n", N_ROUNDS / 2); printf("\nnibble 0 1 2 3 4 5 6 7 8 9 10 11 12 " "13 14 15\n"); for (i = 0; i < 8; i++) { int j; printf("%6d", i); for (j = 0; j < 16; j++) { printf(" %3d", hist16[i][j]); } printf("\n"); } printf("(expected values are %d)\n", N_ROUNDS / 16); return 0; } openvswitch-2.0.1+git20140120/tests/test-reconnect.c000066400000000000000000000174741226605124000217320ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "reconnect.h" #include #include #include #include #include "command-line.h" #include "compiler.h" #include "svec.h" #include "util.h" #include "vlog.h" static struct reconnect *reconnect; static int now; static const struct command commands[]; static void diff_stats(const struct reconnect_stats *old, const struct reconnect_stats *new, int delta); int main(void) { extern struct vlog_module VLM_reconnect; struct reconnect_stats prev; unsigned int old_max_tries; int old_time; char line[128]; vlog_set_levels(&VLM_reconnect, VLF_ANY_FACILITY, VLL_OFF); now = 1000; reconnect = reconnect_create(now); reconnect_set_name(reconnect, "remote"); reconnect_get_stats(reconnect, now, &prev); printf("### t=%d ###\n", now); old_time = now; old_max_tries = reconnect_get_max_tries(reconnect); while (fgets(line, sizeof line, stdin)) { struct reconnect_stats cur; struct svec args; fputs(line, stdout); if (line[0] == '#') { continue; } svec_init(&args); svec_parse_words(&args, line); svec_terminate(&args); if (!svec_is_empty(&args)) { run_command(args.n, args.names, commands); } svec_destroy(&args); if (old_time != now) { printf("\n### t=%d ###\n", now); } reconnect_get_stats(reconnect, now, &cur); diff_stats(&prev, &cur, now - old_time); prev = cur; if (reconnect_get_max_tries(reconnect) != old_max_tries) { old_max_tries = reconnect_get_max_tries(reconnect); printf(" %u tries left\n", old_max_tries); } old_time = now; } return 0; } static void do_enable(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { reconnect_enable(reconnect, now); } static void do_disable(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { reconnect_disable(reconnect, now); } static void do_force_reconnect(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { reconnect_force_reconnect(reconnect, now); } static int error_from_string(const char *s) { if (!s) { return 0; } else if (!strcmp(s, "ECONNREFUSED")) { return ECONNREFUSED; } else if (!strcmp(s, "EOF")) { return EOF; } else { ovs_fatal(0, "unknown error '%s'", s); } } static void do_disconnected(int argc OVS_UNUSED, char *argv[]) { reconnect_disconnected(reconnect, now, error_from_string(argv[1])); } static void do_connecting(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { reconnect_connecting(reconnect, now); } static void do_connect_failed(int argc OVS_UNUSED, char *argv[]) { reconnect_connect_failed(reconnect, now, error_from_string(argv[1])); } static void do_connected(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { reconnect_connected(reconnect, now); } static void do_activity(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { reconnect_activity(reconnect, now); } static void do_run(int argc, char *argv[]) { enum reconnect_action action; if (argc > 1) { now += atoi(argv[1]); } action = reconnect_run(reconnect, now); switch (action) { default: if (action != 0) { NOT_REACHED(); } break; case RECONNECT_CONNECT: printf(" should connect\n"); break; case RECONNECT_DISCONNECT: printf(" should disconnect\n"); break; case RECONNECT_PROBE: printf(" should send probe\n"); break; } } static void do_advance(int argc OVS_UNUSED, char *argv[]) { now += atoi(argv[1]); } static void do_timeout(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { int timeout = reconnect_timeout(reconnect, now); if (timeout >= 0) { printf(" advance %d ms\n", timeout); now += timeout; } else { printf(" no timeout\n"); } } static void do_set_max_tries(int argc OVS_UNUSED, char *argv[]) { reconnect_set_max_tries(reconnect, atoi(argv[1])); } static void diff_stats(const struct reconnect_stats *old, const struct reconnect_stats *new, int delta) { if (old->state != new->state || old->state_elapsed != new->state_elapsed || old->backoff != new->backoff) { printf(" in %s for %u ms (%d ms backoff)\n", new->state, new->state_elapsed, new->backoff); } if (old->creation_time != new->creation_time || old->last_activity != new->last_activity || old->last_connected != new->last_connected) { printf(" created %lld, last activity %lld, last connected %lld\n", new->creation_time, new->last_activity, new->last_connected); } if (old->n_successful_connections != new->n_successful_connections || old->n_attempted_connections != new->n_attempted_connections || old->seqno != new->seqno) { printf(" %u successful connections out of %u attempts, seqno %u\n", new->n_successful_connections, new->n_attempted_connections, new->seqno); } if (old->is_connected != new->is_connected) { printf(" %sconnected\n", new->is_connected ? "" : "dis"); } if (old->last_connected != new->last_connected || (old->msec_since_connect != new->msec_since_connect - delta && !(old->msec_since_connect == UINT_MAX && new->msec_since_connect == UINT_MAX)) || (old->total_connected_duration != new->total_connected_duration - delta && !(old->total_connected_duration == 0 && new->total_connected_duration == 0))) { printf(" last connected %u ms ago, connected %u ms total\n", new->msec_since_connect, new->total_connected_duration); } if (old->last_disconnected != new->last_disconnected || (old->msec_since_disconnect != new->msec_since_disconnect - delta && !(old->msec_since_disconnect == UINT_MAX && new->msec_since_disconnect == UINT_MAX))) { printf(" disconnected at %llu ms (%u ms ago)\n", new->last_disconnected, new->msec_since_disconnect); } } static void do_set_passive(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { reconnect_set_passive(reconnect, true, now); } static void do_listening(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { reconnect_listening(reconnect, now); } static void do_listen_error(int argc OVS_UNUSED, char *argv[]) { reconnect_listen_error(reconnect, now, atoi(argv[1])); } static const struct command commands[] = { { "enable", 0, 0, do_enable }, { "disable", 0, 0, do_disable }, { "force-reconnect", 0, 0, do_force_reconnect }, { "disconnected", 0, 1, do_disconnected }, { "connecting", 0, 0, do_connecting }, { "connect-failed", 0, 1, do_connect_failed }, { "connected", 0, 0, do_connected }, { "activity", 0, 0, do_activity }, { "run", 0, 1, do_run }, { "advance", 1, 1, do_advance }, { "timeout", 0, 0, do_timeout }, { "set-max-tries", 1, 1, do_set_max_tries }, { "passive", 0, 0, do_set_passive }, { "listening", 0, 0, do_listening }, { "listen-error", 1, 1, do_listen_error }, { NULL, 0, 0, NULL }, }; openvswitch-2.0.1+git20140120/tests/test-reconnect.py000066400000000000000000000130621226605124000221250ustar00rootroot00000000000000# Copyright (c) 2009, 2010, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import errno import sys import ovs.reconnect now = 0 r = None def do_enable(_): r.enable(now) def do_disable(_): r.disable(now) def do_force_reconnect(_): r.force_reconnect(now) def error_from_string(s): if not s: return 0 elif s == "ECONNREFUSED": return errno.ECONNREFUSED elif s == "EOF": return ovs.reconnect.EOF else: sys.stderr.write("unknown error '%s'\n" % s) sys.exit(1) def do_disconnected(arg): r.disconnected(now, error_from_string(arg)) def do_connecting(_): r.connecting(now) def do_connect_failed(arg): r.connect_failed(now, error_from_string(arg)) def do_connected(_): r.connected(now) def do_activity(_): r.activity(now) def do_run(arg): global now if arg is not None: now += int(arg) action = r.run(now) if action is None: pass elif action == ovs.reconnect.CONNECT: print " should connect" elif action == ovs.reconnect.DISCONNECT: print " should disconnect" elif action == ovs.reconnect.PROBE: print " should send probe" else: assert False def do_advance(arg): global now now += int(arg) def do_timeout(_): global now timeout = r.timeout(now) if timeout >= 0: print " advance %d ms" % timeout now += timeout else: print " no timeout" def do_set_max_tries(arg): r.set_max_tries(int(arg)) def diff_stats(old, new, delta): if (old.state != new.state or old.state_elapsed != new.state_elapsed or old.backoff != new.backoff): print(" in %s for %d ms (%d ms backoff)" % (new.state, new.state_elapsed, new.backoff)) if (old.creation_time != new.creation_time or old.last_activity != new.last_activity or old.last_connected != new.last_connected): print(" created %d, last activity %d, last connected %d" % (new.creation_time, new.last_activity, new.last_connected)) if (old.n_successful_connections != new.n_successful_connections or old.n_attempted_connections != new.n_attempted_connections or old.seqno != new.seqno): print(" %d successful connections out of %d attempts, seqno %d" % (new.n_successful_connections, new.n_attempted_connections, new.seqno)) if (old.is_connected != new.is_connected): if new.is_connected: negate = "" else: negate = "dis" print(" %sconnected" % negate) if (old.last_connected != new.last_connected or (new.msec_since_connect != None and old.msec_since_connect != new.msec_since_connect - delta) or (old.total_connected_duration != new.total_connected_duration - delta and not (old.total_connected_duration == 0 and new.total_connected_duration == 0))): print(" last connected %d ms ago, connected %d ms total" % (new.msec_since_connect, new.total_connected_duration)) if (old.last_disconnected != new.last_disconnected or (new.msec_since_disconnect != None and old.msec_since_disconnect != new.msec_since_disconnect - delta)): print(" disconnected at %d ms (%d ms ago)" % (new.last_disconnected, new.msec_since_disconnect)) def do_set_passive(_): r.set_passive(True, now) def do_listening(_): r.listening(now) def do_listen_error(arg): r.listen_error(now, int(arg)) def main(): commands = { "enable": do_enable, "disable": do_disable, "force-reconnect": do_force_reconnect, "disconnected": do_disconnected, "connecting": do_connecting, "connect-failed": do_connect_failed, "connected": do_connected, "activity": do_activity, "run": do_run, "advance": do_advance, "timeout": do_timeout, "set-max-tries": do_set_max_tries, "passive": do_set_passive, "listening": do_listening, "listen-error": do_listen_error } global now global r now = 1000 r = ovs.reconnect.Reconnect(now) r.set_name("remote") prev = r.get_stats(now) print "### t=%d ###" % now old_time = now old_max_tries = r.get_max_tries() while True: line = sys.stdin.readline() if line == "": break print line[:-1] if line[0] == "#": continue args = line.split() if len(args) == 0: continue command = args[0] if len(args) > 1: op = args[1] else: op = None commands[command](op) if old_time != now: print print "### t=%d ###" % now cur = r.get_stats(now) diff_stats(prev, cur, now - old_time) prev = cur if r.get_max_tries() != old_max_tries: old_max_tries = r.get_max_tries() print " %d tries left" % old_max_tries old_time = now if __name__ == '__main__': main() openvswitch-2.0.1+git20140120/tests/test-sflow.c000066400000000000000000000416451226605124000211010ustar00rootroot00000000000000/* * Copyright (c) 2011, 2012, 2013 Nicira, Inc. * Copyright (c) 2013 InMon Corp. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include "command-line.h" #include "daemon.h" #include "dynamic-string.h" #include "netflow.h" #include "ofpbuf.h" #include "packets.h" #include "poll-loop.h" #include "socket-util.h" #include "unixctl.h" #include "util.h" #include "vlog.h" static void usage(void) NO_RETURN; static void parse_options(int argc, char *argv[]); static unixctl_cb_func test_sflow_exit; /* Datagram. */ #define SFLOW_VERSION_5 5 #define SFLOW_MIN_LEN 36 #define SFLOW_MAX_AGENTIP_STRLEN 64 /* Sample tag numbers. */ #define SFLOW_FLOW_SAMPLE 1 #define SFLOW_COUNTERS_SAMPLE 2 #define SFLOW_FLOW_SAMPLE_EXPANDED 3 #define SFLOW_COUNTERS_SAMPLE_EXPANDED 4 /* Structure element tag numbers. */ #define SFLOW_TAG_CTR_IFCOUNTERS 1 #define SFLOW_TAG_PKT_HEADER 1 #define SFLOW_TAG_PKT_SWITCH 1001 struct sflow_addr { enum { SFLOW_ADDRTYPE_undefined = 0, SFLOW_ADDRTYPE_IP4, SFLOW_ADDRTYPE_IP6 } type; union { ovs_be32 ip4; ovs_be32 ip6[4]; } a; }; struct sflow_xdr { /* Exceptions. */ jmp_buf env; int errline; /* Cursor. */ ovs_be32 *datap; uint32_t i; uint32_t quads; /* Agent. */ struct sflow_addr agentAddr; char agentIPStr[SFLOW_MAX_AGENTIP_STRLEN]; uint32_t subAgentId; uint32_t uptime_mS; /* Datasource. */ uint32_t dsClass; uint32_t dsIndex; /* Sequence numbers. */ uint32_t dgramSeqNo; uint32_t fsSeqNo; uint32_t csSeqNo; /* Structure offsets. */ struct { uint32_t HEADER; uint32_t SWITCH; uint32_t IFCOUNTERS; } offset; /* Flow sample fields. */ uint32_t meanSkipCount; uint32_t samplePool; uint32_t dropEvents; uint32_t inputPortFormat; uint32_t inputPort; uint32_t outputPortFormat; uint32_t outputPort; }; #define SFLOWXDR_try(x) ((x->errline = setjmp(x->env)) == 0) #define SFLOWXDR_throw(x) longjmp(x->env, __LINE__) #define SFLOWXDR_assert(x, t) if (!(t)) SFLOWXDR_throw(x) static void sflowxdr_init(struct sflow_xdr *x, void *buf, size_t len) { x->datap = buf; x->quads = len >> 2; } static uint32_t sflowxdr_next(struct sflow_xdr *x) { return ntohl(x->datap[x->i++]); } static ovs_be32 sflowxdr_next_n(struct sflow_xdr *x) { return x->datap[x->i++]; } static bool sflowxdr_more(const struct sflow_xdr *x, uint32_t q) { return q + x->i <= x->quads; } static void sflowxdr_skip(struct sflow_xdr *x, uint32_t q) { x->i += q; } static uint32_t sflowxdr_mark(const struct sflow_xdr *x, uint32_t q) { return x->i + q; } static bool sflowxdr_mark_ok(const struct sflow_xdr *x, uint32_t m) { return m == x->i; } static void sflowxdr_mark_unique(struct sflow_xdr *x, uint32_t *pi) { if (*pi) { SFLOWXDR_throw(x); } *pi = x->i; } static void sflowxdr_setc(struct sflow_xdr *x, uint32_t j) { x->i = j; } static const char * sflowxdr_str(const struct sflow_xdr *x) { return (const char *) (x->datap + x->i); } static uint64_t sflowxdr_next_int64(struct sflow_xdr *x) { uint64_t scratch; scratch = sflowxdr_next(x); scratch <<= 32; scratch += sflowxdr_next(x); return scratch; } static void process_counter_sample(struct sflow_xdr *x) { if (x->offset.IFCOUNTERS) { sflowxdr_setc(x, x->offset.IFCOUNTERS); printf("IFCOUNTERS"); printf(" dgramSeqNo=%"PRIu32, x->dgramSeqNo); printf(" ds=%s>%"PRIu32":%"PRIu32, x->agentIPStr, x->dsClass, x->dsIndex); printf(" csSeqNo=%"PRIu32, x->csSeqNo); printf(" ifindex=%"PRIu32, sflowxdr_next(x)); printf(" type=%"PRIu32, sflowxdr_next(x)); printf(" ifspeed=%"PRIu64, sflowxdr_next_int64(x)); printf(" direction=%"PRIu32, sflowxdr_next(x)); printf(" status=%"PRIu32, sflowxdr_next(x)); printf(" in_octets=%"PRIu64, sflowxdr_next_int64(x)); printf(" in_unicasts=%"PRIu32, sflowxdr_next(x)); printf(" in_multicasts=%"PRIu32, sflowxdr_next(x)); printf(" in_broadcasts=%"PRIu32, sflowxdr_next(x)); printf(" in_discards=%"PRIu32, sflowxdr_next(x)); printf(" in_errors=%"PRIu32, sflowxdr_next(x)); printf(" in_unknownprotos=%"PRIu32, sflowxdr_next(x)); printf(" out_octets=%"PRIu64, sflowxdr_next_int64(x)); printf(" out_unicasts=%"PRIu32, sflowxdr_next(x)); printf(" out_multicasts=%"PRIu32, sflowxdr_next(x)); printf(" out_broadcasts=%"PRIu32, sflowxdr_next(x)); printf(" out_discards=%"PRIu32, sflowxdr_next(x)); printf(" out_errors=%"PRIu32, sflowxdr_next(x)); printf(" promiscuous=%"PRIu32, sflowxdr_next(x)); printf("\n"); } } static char bin_to_hex(int hexit) { return "0123456789ABCDEF"[hexit]; } static int print_hex(const char *a, int len, char *buf, int bufLen) { unsigned char nextByte; int b = 0; int i; for (i = 0; i < len; i++) { if (b > bufLen - 10) { break; } nextByte = a[i]; buf[b++] = bin_to_hex(nextByte >> 4); buf[b++] = bin_to_hex(nextByte & 0x0f); if (i < len - 1) { buf[b++] = '-'; } } buf[b] = '\0'; return b; } #define SFLOW_HEX_SCRATCH 1024 static void process_flow_sample(struct sflow_xdr *x) { if (x->offset.HEADER) { uint32_t headerLen; char scratch[SFLOW_HEX_SCRATCH]; printf("HEADER"); printf(" dgramSeqNo=%"PRIu32, x->dgramSeqNo); printf(" ds=%s>%"PRIu32":%"PRIu32, x->agentIPStr, x->dsClass, x->dsIndex); printf(" fsSeqNo=%"PRIu32, x->fsSeqNo); if (x->offset.SWITCH) { sflowxdr_setc(x, x->offset.SWITCH); printf(" in_vlan=%"PRIu32, sflowxdr_next(x)); printf(" in_priority=%"PRIu32, sflowxdr_next(x)); printf(" out_vlan=%"PRIu32, sflowxdr_next(x)); printf(" out_priority=%"PRIu32, sflowxdr_next(x)); } sflowxdr_setc(x, x->offset.HEADER); printf(" meanSkip=%"PRIu32, x->meanSkipCount); printf(" samplePool=%"PRIu32, x->samplePool); printf(" dropEvents=%"PRIu32, x->dropEvents); printf(" in_ifindex=%"PRIu32, x->inputPort); printf(" in_format=%"PRIu32, x->inputPortFormat); printf(" out_ifindex=%"PRIu32, x->outputPort); printf(" out_format=%"PRIu32, x->outputPortFormat); printf(" hdr_prot=%"PRIu32, sflowxdr_next(x)); printf(" pkt_len=%"PRIu32, sflowxdr_next(x)); printf(" stripped=%"PRIu32, sflowxdr_next(x)); headerLen = sflowxdr_next(x); printf(" hdr_len=%"PRIu32, headerLen); print_hex(sflowxdr_str(x), headerLen, scratch, SFLOW_HEX_SCRATCH); printf(" hdr=%s", scratch); printf("\n"); } } static void process_datagram(struct sflow_xdr *x) { uint32_t samples, s; SFLOWXDR_assert(x, (sflowxdr_next(x) == SFLOW_VERSION_5)); /* Read the sFlow header. */ x->agentAddr.type = sflowxdr_next(x); switch (x->agentAddr.type) { case SFLOW_ADDRTYPE_IP4: x->agentAddr.a.ip4 = sflowxdr_next_n(x); break; case SFLOW_ADDRTYPE_IP6: x->agentAddr.a.ip6[0] = sflowxdr_next_n(x); x->agentAddr.a.ip6[1] = sflowxdr_next_n(x); x->agentAddr.a.ip6[2] = sflowxdr_next_n(x); x->agentAddr.a.ip6[3] = sflowxdr_next_n(x); break; case SFLOW_ADDRTYPE_undefined: default: SFLOWXDR_throw(x); break; } x->subAgentId = sflowxdr_next(x); x->dgramSeqNo = sflowxdr_next(x); x->uptime_mS = sflowxdr_next(x); /* Store the agent address as a string. */ if (x->agentAddr.type == SFLOW_ADDRTYPE_IP6) { snprintf(x->agentIPStr, SFLOW_MAX_AGENTIP_STRLEN, "%04x:%04x:%04x:%04x", x->agentAddr.a.ip6[0], x->agentAddr.a.ip6[1], x->agentAddr.a.ip6[2], x->agentAddr.a.ip6[3]); } else { snprintf(x->agentIPStr, SFLOW_MAX_AGENTIP_STRLEN, IP_FMT, IP_ARGS(x->agentAddr.a.ip4)); } /* Array of flow/counter samples. */ samples = sflowxdr_next(x); for (s = 0; s < samples; s++) { uint32_t sType = sflowxdr_next(x); uint32_t sQuads = sflowxdr_next(x) >> 2; uint32_t sMark = sflowxdr_mark(x, sQuads); SFLOWXDR_assert(x, sflowxdr_more(x, sQuads)); switch (sType) { case SFLOW_COUNTERS_SAMPLE_EXPANDED: case SFLOW_COUNTERS_SAMPLE: { uint32_t csElements, e; uint32_t ceTag, ceQuads, ceMark, csEnd; x->csSeqNo = sflowxdr_next(x); if (sType == SFLOW_COUNTERS_SAMPLE_EXPANDED) { x->dsClass = sflowxdr_next(x); x->dsIndex = sflowxdr_next(x); } else { uint32_t dsCombined = sflowxdr_next(x); x->dsClass = dsCombined >> 24; x->dsIndex = dsCombined & 0x00FFFFFF; } csElements = sflowxdr_next(x); for (e = 0; e < csElements; e++) { SFLOWXDR_assert(x, sflowxdr_more(x,2)); ceTag = sflowxdr_next(x); ceQuads = sflowxdr_next(x) >> 2; ceMark = sflowxdr_mark(x, ceQuads); SFLOWXDR_assert(x, sflowxdr_more(x,ceQuads)); /* Only care about selected structures. Just record their * offsets here. We'll read the fields out later. */ switch (ceTag) { case SFLOW_TAG_CTR_IFCOUNTERS: sflowxdr_mark_unique(x, &x->offset.IFCOUNTERS); break; /* Add others here... */ } sflowxdr_skip(x, ceQuads); SFLOWXDR_assert(x, sflowxdr_mark_ok(x, ceMark)); } csEnd = sflowxdr_mark(x, 0); process_counter_sample(x); /* Make sure we pick up the decoding where we left off. */ sflowxdr_setc(x, csEnd); /* Clear the offsets for the next sample. */ memset(&x->offset, 0, sizeof x->offset); } break; case SFLOW_FLOW_SAMPLE: case SFLOW_FLOW_SAMPLE_EXPANDED: { uint32_t fsElements, e; uint32_t feTag, feQuads, feMark, fsEnd; x->fsSeqNo = sflowxdr_next(x); if (sType == SFLOW_FLOW_SAMPLE_EXPANDED) { x->dsClass = sflowxdr_next(x); x->dsIndex = sflowxdr_next(x); } else { uint32_t dsCombined = sflowxdr_next(x); x->dsClass = dsCombined >> 24; x->dsIndex = dsCombined & 0x00FFFFFF; } x->meanSkipCount = sflowxdr_next(x); x->samplePool = sflowxdr_next(x); x->dropEvents = sflowxdr_next(x); if (sType == SFLOW_FLOW_SAMPLE_EXPANDED) { x->inputPortFormat = sflowxdr_next(x); x->inputPort = sflowxdr_next(x); x->outputPortFormat = sflowxdr_next(x); x->outputPort = sflowxdr_next(x); } else { uint32_t inp, outp; inp = sflowxdr_next(x); outp = sflowxdr_next(x); x->inputPortFormat = inp >> 30; x->inputPort = inp & 0x3fffffff; x->outputPortFormat = outp >> 30; x->outputPort = outp & 0x3fffffff; } fsElements = sflowxdr_next(x); for (e = 0; e < fsElements; e++) { SFLOWXDR_assert(x, sflowxdr_more(x,2)); feTag = sflowxdr_next(x); feQuads = sflowxdr_next(x) >> 2; feMark = sflowxdr_mark(x, feQuads); SFLOWXDR_assert(x, sflowxdr_more(x,feQuads)); /* Only care about selected structures. Just record their * offsets here. We'll read the fields out below. */ switch (feTag) { case SFLOW_TAG_PKT_HEADER: sflowxdr_mark_unique(x, &x->offset.HEADER); break; case SFLOW_TAG_PKT_SWITCH: sflowxdr_mark_unique(x, &x->offset.SWITCH); break; /* Add others here... */ } sflowxdr_skip(x, feQuads); SFLOWXDR_assert(x, sflowxdr_mark_ok(x, feMark)); } fsEnd = sflowxdr_mark(x, 0); process_flow_sample(x); /* Make sure we pick up the decoding where we left off. */ sflowxdr_setc(x, fsEnd); /* Clear the offsets for the next counter/flow sample. */ memset(&x->offset, 0, sizeof x->offset); } break; default: /* Skip other sample types. */ sflowxdr_skip(x, sQuads); } SFLOWXDR_assert(x, sflowxdr_mark_ok(x, sMark)); } } static void print_sflow(struct ofpbuf *buf) { char *dgram_buf; int dgram_len = buf->size; struct sflow_xdr xdrDatagram; struct sflow_xdr *x = &xdrDatagram; memset(x, 0, sizeof *x); if (SFLOWXDR_try(x)) { SFLOWXDR_assert(x, (dgram_buf = ofpbuf_try_pull(buf, buf->size))); sflowxdr_init(x, dgram_buf, dgram_len); SFLOWXDR_assert(x, dgram_len >= SFLOW_MIN_LEN); process_datagram(x); } else { // CATCH printf("\n>>>>> ERROR in " __FILE__ " at line %u\n", x->errline); } } int main(int argc, char *argv[]) { struct unixctl_server *server; enum { MAX_RECV = 1500 }; const char *target; struct ofpbuf buf; bool exiting = false; int error; int sock; proctitle_init(argc, argv); set_program_name(argv[0]); parse_options(argc, argv); if (argc - optind != 1) { ovs_fatal(0, "exactly one non-option argument required " "(use --help for help)"); } target = argv[optind]; sock = inet_open_passive(SOCK_DGRAM, target, 0, NULL, 0); if (sock < 0) { ovs_fatal(0, "%s: failed to open (%s)", argv[1], ovs_strerror(-sock)); } daemon_save_fd(STDOUT_FILENO); daemonize_start(); error = unixctl_server_create(NULL, &server); if (error) { ovs_fatal(error, "failed to create unixctl server"); } unixctl_command_register("exit", "", 0, 0, test_sflow_exit, &exiting); daemonize_complete(); ofpbuf_init(&buf, MAX_RECV); for (;;) { int retval; unixctl_server_run(server); ofpbuf_clear(&buf); do { retval = read(sock, buf.data, buf.allocated); } while (retval < 0 && errno == EINTR); if (retval > 0) { ofpbuf_put_uninit(&buf, retval); print_sflow(&buf); fflush(stdout); } if (exiting) { break; } poll_fd_wait(sock, POLLIN); unixctl_server_wait(server); poll_block(); } return 0; } static void parse_options(int argc, char *argv[]) { enum { DAEMON_OPTION_ENUMS, VLOG_OPTION_ENUMS }; static const struct option long_options[] = { {"verbose", optional_argument, NULL, 'v'}, {"help", no_argument, NULL, 'h'}, DAEMON_LONG_OPTIONS, VLOG_LONG_OPTIONS, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); for (;;) { int c = getopt_long(argc, argv, short_options, long_options, NULL); if (c == -1) { break; } switch (c) { case 'h': usage(); DAEMON_OPTION_HANDLERS VLOG_OPTION_HANDLERS case '?': exit(EXIT_FAILURE); default: abort(); } } free(short_options); } static void usage(void) { printf("%s: sflow collector test utility\n" "usage: %s [OPTIONS] PORT[:IP]\n" "where PORT is the UDP port to listen on and IP is optionally\n" "the IP address to listen on.\n", program_name, program_name); daemon_usage(); vlog_usage(); printf("\nOther options:\n" " -h, --help display this help message\n"); exit(EXIT_SUCCESS); } static void test_sflow_exit(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *exiting_) { bool *exiting = exiting_; *exiting = true; unixctl_command_reply(conn, NULL); } openvswitch-2.0.1+git20140120/tests/test-sha1.c000066400000000000000000000112351226605124000205730ustar00rootroot00000000000000/* * Copyright (c) 2009, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "sha1.h" #include #include #include #include #include "random.h" #include "util.h" #undef NDEBUG #include struct test_vector { char *data; size_t size; const uint8_t output[20]; }; static const struct test_vector vectors[] = { /* FIPS 180-1. */ { "abc", 3, { 0xA9, 0x99, 0x3E, 0x36, 0x47, 0x06, 0x81, 0x6A, 0xBA, 0x3E, 0x25, 0x71, 0x78, 0x50, 0xC2, 0x6C, 0x9C, 0xD0, 0xD8, 0x9D } }, { "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56, { 0x84, 0x98, 0x3E, 0x44, 0x1C, 0x3B, 0xD2, 0x6E, 0xBA, 0xAE, 0x4A, 0xA1, 0xF9, 0x51, 0x29, 0xE5, 0xE5, 0x46, 0x70, 0xF1 }, }, /* RFC 3174. */ { "0123456701234567012345670123456701234567012345670123456701234567" "0123456701234567012345670123456701234567012345670123456701234567" "0123456701234567012345670123456701234567012345670123456701234567" "0123456701234567012345670123456701234567012345670123456701234567" "0123456701234567012345670123456701234567012345670123456701234567" "0123456701234567012345670123456701234567012345670123456701234567" "0123456701234567012345670123456701234567012345670123456701234567" "0123456701234567012345670123456701234567012345670123456701234567" "0123456701234567012345670123456701234567012345670123456701234567" "0123456701234567012345670123456701234567012345670123456701234567", 64 * 10, { 0xDE, 0xA3, 0x56, 0xA2, 0xCD, 0xDD, 0x90, 0xC7, 0xA7, 0xEC, 0xED, 0xC5, 0xEB, 0xB5, 0x63, 0x93, 0x4F, 0x46, 0x04, 0x52 }, }, /* http://www.febooti.com/products/filetweak/members/hash-and-crc/test-vectors/ */ { "", 0, { 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, 0xaf, 0xd8, 0x07, 0x09 } }, { "Test vector from febooti.com", 28, { 0xa7, 0x63, 0x17, 0x95, 0xf6, 0xd5, 0x9c, 0xd6, 0xd1, 0x4e, 0xbd, 0x00, 0x58, 0xa6, 0x39, 0x4a, 0x4b, 0x93, 0xd8, 0x68 } }, /* http://en.wikipedia.org/wiki/SHA_hash_functions */ { "The quick brown fox jumps over the lazy dog", 43, { 0x2f, 0xd4, 0xe1, 0xc6, 0x7a, 0x2d, 0x28, 0xfc, 0xed, 0x84, 0x9e, 0xe1, 0xbb, 0x76, 0xe7, 0x39, 0x1b, 0x93, 0xeb, 0x12 }, }, { "The quick brown fox jumps over the lazy cog", 43, { 0xde, 0x9f, 0x2c, 0x7f, 0xd2, 0x5e, 0x1b, 0x3a, 0xfa, 0xd3, 0xe8, 0x5a, 0x0b, 0xd1, 0x7d, 0x9b, 0x10, 0x0d, 0xb4, 0xb3 }, }, /* http://www.hashcash.org/docs/sha1-hashcash.html */ { "0:030626:adam@cypherspace.org:6470e06d773e05a8", 46, { 0x00, 0x00, 0x00, 0x00, 0xc7, 0x0d, 0xb7, 0x38, 0x9f, 0x24, 0x1b, 0x8f, 0x44, 0x1f, 0xcf, 0x06, 0x8a, 0xea, 0xd3, 0xf0 }, }, }; static void test_one(const struct test_vector *vec) { uint8_t md[SHA1_DIGEST_SIZE]; int i; /* All at once. */ sha1_bytes(vec->data, vec->size, md); assert(!memcmp(md, vec->output, SHA1_DIGEST_SIZE)); /* In two pieces. */ for (i = 0; i < 20; i++) { int n0 = vec->size ? random_range(vec->size) : 0; int n1 = vec->size - n0; struct sha1_ctx sha1; sha1_init(&sha1); sha1_update(&sha1, vec->data, n0); sha1_update(&sha1, vec->data + n0, n1); sha1_final(&sha1, md); assert(!memcmp(md, vec->output, SHA1_DIGEST_SIZE)); } putchar('.'); fflush(stdout); } static void test_big_vector(void) { enum { SIZE = 1000000 }; struct test_vector vec = { NULL, SIZE, { 0x34, 0xAA, 0x97, 0x3C, 0xD4, 0xC4, 0xDA, 0xA4, 0xF6, 0x1E, 0xEB, 0x2B, 0xDB, 0xAD, 0x27, 0x31, 0x65, 0x34, 0x01, 0x6F } }; size_t i; vec.data = xmalloc(SIZE); for (i = 0; i < SIZE; i++) { vec.data[i] = 'a'; } test_one(&vec); free(vec.data); } int main(void) { int i; for (i = 0; i < ARRAY_SIZE(vectors); i++) { test_one(&vectors[i]); } test_big_vector(); putchar('\n'); return 0; } openvswitch-2.0.1+git20140120/tests/test-stp.c000066400000000000000000000446531226605124000205570ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "stp.h" #include #include #include #include #include #include #include "ofpbuf.h" #include "packets.h" #include "vlog.h" struct bpdu { int port_no; void *data; size_t size; }; struct bridge { struct test_case *tc; int id; bool reached; struct stp *stp; struct lan *ports[STP_MAX_PORTS]; int n_ports; #define RXQ_SIZE 16 struct bpdu rxq[RXQ_SIZE]; int rxq_head, rxq_tail; }; struct lan_conn { struct bridge *bridge; int port_no; }; struct lan { struct test_case *tc; const char *name; bool reached; struct lan_conn conns[16]; int n_conns; }; struct test_case { struct bridge *bridges[16]; int n_bridges; struct lan *lans[26]; int n_lans; }; static const char *file_name; static int line_number; static char line[128]; static char *pos, *token; static int n_warnings; static struct test_case * new_test_case(void) { struct test_case *tc = xmalloc(sizeof *tc); tc->n_bridges = 0; tc->n_lans = 0; return tc; } static void send_bpdu(struct ofpbuf *pkt, int port_no, void *b_) { struct bridge *b = b_; struct lan *lan; assert(port_no < b->n_ports); lan = b->ports[port_no]; if (lan) { const void *data = pkt->l3; size_t size = (char *) ofpbuf_tail(pkt) - (char *) data; int i; for (i = 0; i < lan->n_conns; i++) { struct lan_conn *conn = &lan->conns[i]; if (conn->bridge != b || conn->port_no != port_no) { struct bridge *dst = conn->bridge; struct bpdu *bpdu = &dst->rxq[dst->rxq_head++ % RXQ_SIZE]; assert(dst->rxq_head - dst->rxq_tail <= RXQ_SIZE); bpdu->data = xmemdup(data, size); bpdu->size = size; bpdu->port_no = conn->port_no; } } } ofpbuf_delete(pkt); } static struct bridge * new_bridge(struct test_case *tc, int id) { struct bridge *b = xmalloc(sizeof *b); char name[16]; b->tc = tc; b->id = id; snprintf(name, sizeof name, "stp%x", id); b->stp = stp_create(name, id, send_bpdu, b); assert(tc->n_bridges < ARRAY_SIZE(tc->bridges)); b->n_ports = 0; b->rxq_head = b->rxq_tail = 0; tc->bridges[tc->n_bridges++] = b; return b; } static struct lan * new_lan(struct test_case *tc, const char *name) { struct lan *lan = xmalloc(sizeof *lan); lan->tc = tc; lan->name = xstrdup(name); lan->n_conns = 0; assert(tc->n_lans < ARRAY_SIZE(tc->lans)); tc->lans[tc->n_lans++] = lan; return lan; } static void reconnect_port(struct bridge *b, int port_no, struct lan *new_lan) { struct lan *old_lan; int j; assert(port_no < b->n_ports); old_lan = b->ports[port_no]; if (old_lan == new_lan) { return; } /* Disconnect from old_lan. */ if (old_lan) { for (j = 0; j < old_lan->n_conns; j++) { struct lan_conn *c = &old_lan->conns[j]; if (c->bridge == b && c->port_no == port_no) { memmove(c, c + 1, sizeof *c * (old_lan->n_conns - j - 1)); old_lan->n_conns--; break; } } } /* Connect to new_lan. */ b->ports[port_no] = new_lan; if (new_lan) { int conn_no = new_lan->n_conns++; assert(conn_no < ARRAY_SIZE(new_lan->conns)); new_lan->conns[conn_no].bridge = b; new_lan->conns[conn_no].port_no = port_no; } } static void new_port(struct bridge *b, struct lan *lan, int path_cost) { int port_no = b->n_ports++; struct stp_port *p = stp_get_port(b->stp, port_no); assert(port_no < ARRAY_SIZE(b->ports)); b->ports[port_no] = NULL; stp_port_set_path_cost(p, path_cost); stp_port_enable(p); reconnect_port(b, port_no, lan); } static void dump(struct test_case *tc) { int i; for (i = 0; i < tc->n_bridges; i++) { struct bridge *b = tc->bridges[i]; struct stp *stp = b->stp; int j; printf("%s:", stp_get_name(stp)); if (stp_is_root_bridge(stp)) { printf(" root"); } printf("\n"); for (j = 0; j < b->n_ports; j++) { struct stp_port *p = stp_get_port(stp, j); enum stp_state state = stp_port_get_state(p); printf("\tport %d", j); if (b->ports[j]) { printf(" (lan %s)", b->ports[j]->name); } else { printf(" (disconnected)"); } printf(": %s", stp_state_name(state)); if (p == stp_get_root_port(stp)) { printf(" (root port, root_path_cost=%u)", stp_get_root_path_cost(stp)); } printf("\n"); } } } static void dump_lan_tree(struct test_case *, struct lan *, int level); static void dump_bridge_tree(struct test_case *tc, struct bridge *b, int level) { int i; if (b->reached) { return; } b->reached = true; for (i = 0; i < level; i++) { printf("\t"); } printf("%s\n", stp_get_name(b->stp)); for (i = 0; i < b->n_ports; i++) { struct lan *lan = b->ports[i]; struct stp_port *p = stp_get_port(b->stp, i); if (stp_port_get_state(p) == STP_FORWARDING && lan) { dump_lan_tree(tc, lan, level + 1); } } } static void dump_lan_tree(struct test_case *tc, struct lan *lan, int level) { int i; if (lan->reached) { return; } lan->reached = true; for (i = 0; i < level; i++) { printf("\t"); } printf("%s\n", lan->name); for (i = 0; i < lan->n_conns; i++) { struct bridge *b = lan->conns[i].bridge; dump_bridge_tree(tc, b, level + 1); } } static void tree(struct test_case *tc) { int i; for (i = 0; i < tc->n_bridges; i++) { struct bridge *b = tc->bridges[i]; b->reached = false; } for (i = 0; i < tc->n_lans; i++) { struct lan *lan = tc->lans[i]; lan->reached = false; } for (i = 0; i < tc->n_bridges; i++) { struct bridge *b = tc->bridges[i]; struct stp *stp = b->stp; if (stp_is_root_bridge(stp)) { dump_bridge_tree(tc, b, 0); } } } static void simulate(struct test_case *tc, int granularity) { int time; for (time = 0; time < 1000 * 180; time += granularity) { int round_trips; int i; for (i = 0; i < tc->n_bridges; i++) { stp_tick(tc->bridges[i]->stp, granularity); } for (round_trips = 0; round_trips < granularity; round_trips++) { bool any = false; for (i = 0; i < tc->n_bridges; i++) { struct bridge *b = tc->bridges[i]; for (; b->rxq_tail != b->rxq_head; b->rxq_tail++) { struct bpdu *bpdu = &b->rxq[b->rxq_tail % RXQ_SIZE]; stp_received_bpdu(stp_get_port(b->stp, bpdu->port_no), bpdu->data, bpdu->size); free(bpdu->data); any = true; } } if (!any) { break; } } } } static void err(const char *message, ...) PRINTF_FORMAT(1, 2) NO_RETURN; static void err(const char *message, ...) { va_list args; fprintf(stderr, "%s:%d:%td: ", file_name, line_number, pos - line); va_start(args, message); vfprintf(stderr, message, args); va_end(args); putc('\n', stderr); exit(EXIT_FAILURE); } static void warn(const char *message, ...) PRINTF_FORMAT(1, 2); static void warn(const char *message, ...) { va_list args; fprintf(stderr, "%s:%d: ", file_name, line_number); va_start(args, message); vfprintf(stderr, message, args); va_end(args); putc('\n', stderr); n_warnings++; } static bool get_token(void) { char *start; while (isspace((unsigned char) *pos)) { pos++; } if (*pos == '\0') { free(token); token = NULL; return false; } start = pos; if (isalpha((unsigned char) *pos)) { while (isalpha((unsigned char) *++pos)) { continue; } } else if (isdigit((unsigned char) *pos)) { if (*pos == '0' && (pos[1] == 'x' || pos[1] == 'X')) { pos += 2; while (isxdigit((unsigned char) *pos)) { pos++; } } else { while (isdigit((unsigned char) *++pos)) { continue; } } } else { pos++; } free(token); token = xmemdup0(start, pos - start); return true; } static bool get_int(int *intp) { char *save_pos = pos; if (token && isdigit((unsigned char) *token)) { *intp = strtol(token, NULL, 0); get_token(); return true; } else { pos = save_pos; return false; } } static bool match(const char *want) { if (token && !strcmp(want, token)) { get_token(); return true; } else { return false; } } static int must_get_int(void) { int x; if (!get_int(&x)) { err("expected integer"); } return x; } static void must_match(const char *want) { if (!match(want)) { err("expected \"%s\"", want); } } int main(int argc, char *argv[]) { struct test_case *tc; FILE *input_file; int i; vlog_set_pattern(VLF_CONSOLE, "%c|%p|%m"); vlog_set_levels(NULL, VLF_SYSLOG, VLL_OFF); if (argc != 2) { ovs_fatal(0, "usage: test-stp INPUT.STP\n"); } file_name = argv[1]; input_file = fopen(file_name, "r"); if (!input_file) { ovs_fatal(errno, "error opening \"%s\"", file_name); } tc = new_test_case(); for (i = 0; i < 26; i++) { char name[2]; name[0] = 'a' + i; name[1] = '\0'; new_lan(tc, name); } for (line_number = 1; fgets(line, sizeof line, input_file); line_number++) { char *newline, *hash; newline = strchr(line, '\n'); if (newline) { *newline = '\0'; } hash = strchr(line, '#'); if (hash) { *hash = '\0'; } pos = line; if (!get_token()) { continue; } if (match("bridge")) { struct bridge *bridge; int bridge_no, port_no; bridge_no = must_get_int(); if (bridge_no < tc->n_bridges) { bridge = tc->bridges[bridge_no]; } else if (bridge_no == tc->n_bridges) { bridge = new_bridge(tc, must_get_int()); } else { err("bridges must be numbered consecutively from 0"); } if (match("^")) { stp_set_bridge_priority(bridge->stp, must_get_int()); } if (match("=")) { for (port_no = 0; port_no < STP_MAX_PORTS; port_no++) { struct stp_port *p = stp_get_port(bridge->stp, port_no); if (!token || match("X")) { stp_port_disable(p); } else if (match("_")) { /* Nothing to do. */ } else { struct lan *lan; int path_cost; if (!strcmp(token, "0")) { lan = NULL; } else if (strlen(token) == 1 && islower((unsigned char)*token)) { lan = tc->lans[*token - 'a']; } else { err("%s is not a valid LAN name " "(0 or a lowercase letter)", token); } get_token(); path_cost = match(":") ? must_get_int() : 10; if (port_no < bridge->n_ports) { stp_port_set_path_cost(p, path_cost); stp_port_enable(p); reconnect_port(bridge, port_no, lan); } else if (port_no == bridge->n_ports) { new_port(bridge, lan, path_cost); } else { err("ports must be numbered consecutively"); } if (match("^")) { stp_port_set_priority(p, must_get_int()); } } } } } else if (match("run")) { simulate(tc, must_get_int()); } else if (match("dump")) { dump(tc); } else if (match("tree")) { tree(tc); } else if (match("check")) { struct bridge *b; struct stp *stp; int bridge_no, port_no; bridge_no = must_get_int(); if (bridge_no >= tc->n_bridges) { err("no bridge numbered %d", bridge_no); } b = tc->bridges[bridge_no]; stp = b->stp; must_match("="); if (match("rootid")) { uint64_t rootid; must_match(":"); rootid = must_get_int(); if (match("^")) { rootid |= (uint64_t) must_get_int() << 48; } else { rootid |= UINT64_C(0x8000) << 48; } if (stp_get_designated_root(stp) != rootid) { warn("%s: root %"PRIx64", not %"PRIx64, stp_get_name(stp), stp_get_designated_root(stp), rootid); } } if (match("root")) { if (stp_get_root_path_cost(stp)) { warn("%s: root path cost of root is %u but should be 0", stp_get_name(stp), stp_get_root_path_cost(stp)); } if (!stp_is_root_bridge(stp)) { warn("%s: root is %"PRIx64", not %"PRIx64, stp_get_name(stp), stp_get_designated_root(stp), stp_get_bridge_id(stp)); } for (port_no = 0; port_no < b->n_ports; port_no++) { struct stp_port *p = stp_get_port(stp, port_no); enum stp_state state = stp_port_get_state(p); if (!(state & (STP_DISABLED | STP_FORWARDING))) { warn("%s: root port %d in state %s", stp_get_name(b->stp), port_no, stp_state_name(state)); } } } else { for (port_no = 0; port_no < STP_MAX_PORTS; port_no++) { struct stp_port *p = stp_get_port(stp, port_no); enum stp_state state; if (token == NULL || match("D")) { state = STP_DISABLED; } else if (match("B")) { state = STP_BLOCKING; } else if (match("Li")) { state = STP_LISTENING; } else if (match("Le")) { state = STP_LEARNING; } else if (match("F")) { state = STP_FORWARDING; } else if (match("_")) { continue; } else { err("unknown port state %s", token); } if (stp_port_get_state(p) != state) { warn("%s port %d: state is %s but should be %s", stp_get_name(stp), port_no, stp_state_name(stp_port_get_state(p)), stp_state_name(state)); } if (state == STP_FORWARDING) { struct stp_port *root_port = stp_get_root_port(stp); if (match(":")) { int root_path_cost = must_get_int(); if (p != root_port) { warn("%s: port %d is not the root port", stp_get_name(stp), port_no); if (!root_port) { warn("%s: (there is no root port)", stp_get_name(stp)); } else { warn("%s: (port %d is the root port)", stp_get_name(stp), stp_port_no(root_port)); } } else if (root_path_cost != stp_get_root_path_cost(stp)) { warn("%s: root path cost is %u, should be %d", stp_get_name(stp), stp_get_root_path_cost(stp), root_path_cost); } } else if (p == root_port) { warn("%s: port %d is the root port but " "not expected to be", stp_get_name(stp), port_no); } } } } if (n_warnings) { exit(EXIT_FAILURE); } } if (get_token()) { err("trailing garbage on line"); } } free(token); for (i = 0; i < tc->n_lans; i++) { struct lan *lan = tc->lans[i]; free(CONST_CAST(char *, lan->name)); free(lan); } for (i = 0; i < tc->n_bridges; i++) { struct bridge *bridge = tc->bridges[i]; stp_unref(bridge->stp); free(bridge); } free(tc); return 0; } openvswitch-2.0.1+git20140120/tests/test-strtok_r.c000066400000000000000000000023731226605124000216110ustar00rootroot00000000000000/* * Copyright (c) 2010 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include /* Some versions of glibc 2.7 has a bug in strtok_r when with optimization that * can cause segfaults: * http://sources.redhat.com/bugzilla/show_bug.cgi?id=5614. * * Open vSwitch works around this problem by supplying a replacement string.h. * This test program verifies that the workaround is in place. */ int main(void) { char string[] = ":::"; char *save_ptr = (char *) 0xc0ffee; char *token1, *token2; token1 = strtok_r(string, ":", &save_ptr); token2 = strtok_r(NULL, ":", &save_ptr); printf ("%s %s\n", token1 ? token1 : "NULL", token2 ? token2 : "NULL"); return 0; } openvswitch-2.0.1+git20140120/tests/test-timeval.c000066400000000000000000000101431226605124000213750ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "timeval.h" #include #include #include #include #include #include #include "command-line.h" #include "daemon.h" #include "util.h" #undef NDEBUG #include static long long int gettimeofday_in_msec(void) { struct timeval tv; xgettimeofday(&tv); return timeval_to_msec(&tv); } static void do_test(void) { /* Wait until we are awakened by a signal (typically EINTR due to the * setitimer()). Then ensure that, if time has really advanced by * TIME_UPDATE_INTERVAL, then time_msec() reports that it advanced. */ long long int start_time_msec, start_time_wall; long long int start_gtod; start_time_msec = time_msec(); start_time_wall = time_wall_msec(); start_gtod = gettimeofday_in_msec(); for (;;) { /* Wait up to 1 second. Using select() to do the timeout avoids * interfering with the interval timer. */ struct timeval timeout; int retval; timeout.tv_sec = 1; timeout.tv_usec = 0; retval = select(0, NULL, NULL, NULL, &timeout); if (retval != -1) { ovs_fatal(0, "select returned %d", retval); } else if (errno != EINTR) { ovs_fatal(errno, "select reported unexpected error"); } if (gettimeofday_in_msec() - start_gtod >= TIME_UPDATE_INTERVAL) { /* gettimeofday() and time_msec() have different granularities in * their time sources. Depending on the rounding used this could * result in a slight difference, so we allow for 1 ms of slop. */ assert(time_msec() - start_time_msec >= TIME_UPDATE_INTERVAL - 1); assert(time_wall_msec() - start_time_wall >= TIME_UPDATE_INTERVAL - 1); break; } } } static void usage(void) { ovs_fatal(0, "usage: %s TEST, where TEST is \"plain\" or \"daemon\"", program_name); } int main(int argc, char *argv[]) { proctitle_init(argc, argv); set_program_name(argv[0]); if (argc != 2) { usage(); } else if (!strcmp(argv[1], "plain")) { /* If we're not caching time there isn't much to test and SIGALRM won't * be around to pull us out of the select() call, so just skip out */ if (!CACHE_TIME) { exit (77); } do_test(); } else if (!strcmp(argv[1], "daemon")) { /* Test that time still advances even in a daemon. This is an * interesting test because fork() cancels the interval timer. */ char cwd[1024], *pidfile; FILE *success; if (!CACHE_TIME) { exit (77); } assert(getcwd(cwd, sizeof cwd) == cwd); unlink("test-timeval.success"); /* Daemonize, with a pidfile in the current directory. */ set_detach(); pidfile = xasprintf("%s/test-timeval.pid", cwd); set_pidfile(pidfile); free(pidfile); set_no_chdir(); daemonize(); /* Run the test. */ do_test(); /* Report success by writing out a file, since the ultimate invoker of * test-timeval can't wait on the daemonized process. */ success = fopen("test-timeval.success", "w"); if (!success) { ovs_fatal(errno, "test-timeval.success: create failed"); } fprintf(success, "success\n"); fclose(success); } else { usage(); } return 0; } openvswitch-2.0.1+git20140120/tests/test-type-props.c000066400000000000000000000044111226605124000220570ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "type-props.h" #include #include #include #include #define MUST_SUCCEED(EXPRESSION) \ if (!(EXPRESSION)) { \ fprintf(stderr, "%s:%d: %s failed\n", \ __FILE__, __LINE__, #EXPRESSION); \ exit(EXIT_FAILURE); \ } #define TEST_TYPE(type, minimum, maximum, is_signed) \ MUST_SUCCEED(TYPE_IS_INTEGER(type)); \ MUST_SUCCEED(TYPE_IS_SIGNED(type) == is_signed); \ MUST_SUCCEED(TYPE_MAXIMUM(type) == maximum); \ MUST_SUCCEED(TYPE_MINIMUM(type) == minimum); \ sprintf(max_s, "%"PRIuMAX, (uintmax_t) (maximum)); \ MUST_SUCCEED(strlen(max_s) <= INT_STRLEN(type)); \ sprintf(min_s, "%"PRIdMAX, (intmax_t) (minimum)); \ MUST_SUCCEED(strlen(min_s) <= INT_STRLEN(type)); int main (void) { char max_s[128]; char min_s[128]; TEST_TYPE(char, CHAR_MIN, CHAR_MAX, (CHAR_MIN < 0)); TEST_TYPE(signed char, SCHAR_MIN, SCHAR_MAX, 1); TEST_TYPE(short int, SHRT_MIN, SHRT_MAX, 1); TEST_TYPE(int, INT_MIN, INT_MAX, 1); TEST_TYPE(long int, LONG_MIN, LONG_MAX, 1); TEST_TYPE(long long int, LLONG_MIN, LLONG_MAX, 1); TEST_TYPE(unsigned char, 0, UCHAR_MAX, 0); TEST_TYPE(unsigned short int, 0, USHRT_MAX, 0); TEST_TYPE(unsigned int, 0, UINT_MAX, 0); TEST_TYPE(unsigned long int, 0, ULONG_MAX, 0); TEST_TYPE(unsigned long long int, 0, ULLONG_MAX, 0); MUST_SUCCEED(!(TYPE_IS_INTEGER(float))); MUST_SUCCEED(!(TYPE_IS_INTEGER(double))); MUST_SUCCEED(!(TYPE_IS_INTEGER(long double))); return 0; } openvswitch-2.0.1+git20140120/tests/test-unix-socket.c000066400000000000000000000032731226605124000222130ustar00rootroot00000000000000/* * Copyright (c) 2010, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include "util.h" #include "socket-util.h" int main(int argc, char *argv[]) { const char *sockname1; const char *sockname2; int sock1, sock2; set_program_name(argv[0]); if (argc != 2 && argc != 3) { ovs_fatal(0, "usage: %s SOCKETNAME1 [SOCKETNAME2]", argv[0]); } sockname1 = argv[1]; sockname2 = argc > 2 ? argv[2] : sockname1; signal(SIGALRM, SIG_DFL); alarm(5); /* Create a listening socket under name 'sockname1'. */ sock1 = make_unix_socket(SOCK_STREAM, false, sockname1, NULL); if (sock1 < 0) { ovs_fatal(-sock1, "%s: bind failed", sockname1); } if (listen(sock1, 1)) { ovs_fatal(errno, "%s: listen failed", sockname1); } /* Connect to 'sockname2' (which should be the same file, perhaps under a * different name). */ sock2 = make_unix_socket(SOCK_STREAM, false, NULL, sockname2); if (sock2 < 0) { ovs_fatal(-sock2, "%s: connect failed", sockname2); } close(sock1); close(sock2); return 0; } openvswitch-2.0.1+git20140120/tests/test-unix-socket.py000066400000000000000000000034341226605124000224200ustar00rootroot00000000000000# # Copyright (c) 2010, 2012, 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import signal import socket import sys import ovs.socket_util def main(argv): if len(argv) not in (2, 3): sys.stderr.write("usage: %s SOCKETNAME1 [SOCKETNAME2]", argv[0]) sys.exit(1) sockname1 = argv[1] if len(argv) > 2: sockname2 = argv[2] else: sockname2 = sockname1 signal.signal(signal.SIGALRM, signal.SIG_DFL) signal.alarm(5) # Create a listening socket under name 'sockname1'. error, sock1 = ovs.socket_util.make_unix_socket(socket.SOCK_STREAM, False, sockname1, None) if error: sys.stderr.write("%s: bind failed (%s)" % (sockname1, os.strerror(error))) sock1.listen(1) # Connect to 'sockname2' (which should be the same file, perhaps under a # different name). error, sock2 = ovs.socket_util.make_unix_socket(socket.SOCK_STREAM, False, None, sockname2) if error: sys.stderr.write("%s: connect failed (%s)" % (sockname2, os.strerror(error))) if __name__ == '__main__': main(sys.argv) openvswitch-2.0.1+git20140120/tests/test-unixctl.py000066400000000000000000000054001226605124000216300ustar00rootroot00000000000000# Copyright (c) 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse import sys import ovs.daemon import ovs.unixctl import ovs.unixctl.server vlog = ovs.vlog.Vlog("test-unixctl") exiting = False def unixctl_exit(conn, unused_argv, aux): assert aux == "aux_exit" global exiting exiting = True conn.reply(None) def unixctl_echo(conn, argv, aux): assert aux == "aux_echo" conn.reply(str(argv)) def unixctl_echo_error(conn, argv, aux): assert aux == "aux_echo_error" conn.reply_error(str(argv)) def unixctl_log(conn, argv, unused_aux): vlog.info(str(argv[0])) conn.reply(None) def unixctl_block(conn, unused_argv, unused_aux): pass def main(): parser = argparse.ArgumentParser( description="Open vSwitch unixctl test program for Python") parser.add_argument("--unixctl", help="UNIXCTL socket location or 'none'.") ovs.daemon.add_args(parser) ovs.vlog.add_args(parser) args = parser.parse_args() ovs.daemon.handle_args(args) ovs.vlog.handle_args(args) ovs.daemon.daemonize_start() error, server = ovs.unixctl.server.UnixctlServer.create(args.unixctl) if error: ovs.util.ovs_fatal(error, "could not create unixctl server at %s" % args.unixctl, vlog) ovs.unixctl.command_register("exit", "", 0, 0, unixctl_exit, "aux_exit") ovs.unixctl.command_register("echo", "[arg ...]", 1, 2, unixctl_echo, "aux_echo") ovs.unixctl.command_register("log", "[arg ...]", 1, 2, unixctl_log, None) ovs.unixctl.command_register("echo_error", "[arg ...]", 1, 2, unixctl_echo_error, "aux_echo_error") ovs.unixctl.command_register("block", "", 0, 0, unixctl_block, None) ovs.daemon.daemonize_complete() vlog.info("Entering run loop.") poller = ovs.poller.Poller() while not exiting: server.run() server.wait(poller) if exiting: poller.immediate_wake() poller.block() server.close() if __name__ == '__main__': try: main() except SystemExit: # Let system.exit() calls complete normally raise except: vlog.exception("traceback") sys.exit(ovs.daemon.RESTART_EXIT_CODE) openvswitch-2.0.1+git20140120/tests/test-util.c000066400000000000000000000276201226605124000207210ustar00rootroot00000000000000/* * Copyright (c) 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include "byte-order.h" #include "command-line.h" #include "random.h" #include "util.h" #include "vlog.h" #undef NDEBUG #include static void check_log_2_floor(uint32_t x, int n) { if (log_2_floor(x) != n) { fprintf(stderr, "log_2_floor(%"PRIu32") is %d but should be %d\n", x, log_2_floor(x), n); abort(); } } static void test_log_2_floor(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { int n; for (n = 0; n < 32; n++) { /* Check minimum x such that f(x) == n. */ check_log_2_floor(1 << n, n); /* Check maximum x such that f(x) == n. */ check_log_2_floor((1 << n) | ((1 << n) - 1), n); /* Check a random value in the middle. */ check_log_2_floor((random_uint32() & ((1 << n) - 1)) | (1 << n), n); } /* log_2_floor(0) is undefined, so don't check it. */ } static void check_ctz(uint32_t x, int n) { if (ctz(x) != n) { fprintf(stderr, "ctz(%"PRIu32") is %d but should be %d\n", x, ctz(x), n); abort(); } } static void test_ctz(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { int n; for (n = 0; n < 32; n++) { /* Check minimum x such that f(x) == n. */ check_ctz(1 << n, n); /* Check maximum x such that f(x) == n. */ check_ctz(UINT32_MAX << n, n); /* Check a random value in the middle. */ check_ctz((random_uint32() | 1) << n, n); } /* Check ctz(0). */ check_ctz(0, 32); } /* Returns a random number in the range 'min'...'max' inclusive. */ static uint32_t random_in_range(uint32_t min, uint32_t max) { return min == max ? min : min + random_range(max - min + 1); } static void check_rup2(uint32_t x, int n) { uint32_t rup2 = ROUND_UP_POW2(x); if (rup2 != n) { fprintf(stderr, "ROUND_UP_POW2(%#"PRIx32") is %#"PRIx32" " "but should be %#"PRIx32"\n", x, rup2, n); abort(); } } static void test_round_up_pow2(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { int n; for (n = 0; n < 32; n++) { /* Min, max value for which ROUND_UP_POW2 should yield (1 << n). */ uint32_t min = ((1u << n) >> 1) + 1; uint32_t max = 1u << n; check_rup2(min, 1u << n); check_rup2(max, 1u << n); check_rup2(random_in_range(min, max), 1u << n); } check_rup2(0, 0); } static void check_rdp2(uint32_t x, int n) { uint32_t rdp2 = ROUND_DOWN_POW2(x); if (rdp2 != n) { fprintf(stderr, "ROUND_DOWN_POW2(%#"PRIx32") is %#"PRIx32" " "but should be %#"PRIx32"\n", x, rdp2, n); abort(); } } static void test_round_down_pow2(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { int n; for (n = 0; n < 32; n++) { /* Min, max value for which ROUND_DOWN_POW2 should yield (1 << n). */ uint32_t min = 1u << n; uint32_t max = ((1u << n) << 1) - 1; check_rdp2(min, 1u << n); check_rdp2(max, 1u << n); check_rdp2(random_in_range(min, max), 1u << n); } check_rdp2(0, 0); } static void shuffle(unsigned int *p, size_t n) { for (; n > 1; n--, p++) { unsigned int *q = &p[random_range(n)]; unsigned int tmp = *p; *p = *q; *q = tmp; } } static void check_popcount(uint32_t x, int n) { if (popcount(x) != n) { fprintf(stderr, "popcount(%#"PRIx32") is %d but should be %d\n", x, popcount(x), n); abort(); } } static void test_popcount(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { unsigned int bits[32]; int i; for (i = 0; i < ARRAY_SIZE(bits); i++) { bits[i] = 1u << i; } check_popcount(0, 0); for (i = 0; i < 1000; i++) { uint32_t x = 0; int j; shuffle(bits, ARRAY_SIZE(bits)); for (j = 0; j < 32; j++) { x |= bits[j]; check_popcount(x, j + 1); } assert(x == UINT32_MAX); shuffle(bits, ARRAY_SIZE(bits)); for (j = 31; j >= 0; j--) { x &= ~bits[j]; check_popcount(x, j); } assert(x == 0); } } /* Returns the sum of the squares of the first 'n' positive integers. */ static unsigned int sum_of_squares(int n) { return n * (n + 1) * (2 * n + 1) / 6; } static void test_bitwise_copy(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { unsigned int n_loops; int src_ofs; int dst_ofs; int n_bits; n_loops = 0; for (n_bits = 0; n_bits <= 64; n_bits++) { for (src_ofs = 0; src_ofs < 64 - n_bits; src_ofs++) { for (dst_ofs = 0; dst_ofs < 64 - n_bits; dst_ofs++) { ovs_be64 src = htonll(random_uint64()); ovs_be64 dst = htonll(random_uint64()); ovs_be64 orig_dst = dst; ovs_be64 expect; if (n_bits == 64) { expect = dst; } else { uint64_t mask = (UINT64_C(1) << n_bits) - 1; expect = orig_dst & ~htonll(mask << dst_ofs); expect |= htonll(((ntohll(src) >> src_ofs) & mask) << dst_ofs); } bitwise_copy(&src, sizeof src, src_ofs, &dst, sizeof dst, dst_ofs, n_bits); if (expect != dst) { fprintf(stderr,"copy_bits(0x%016"PRIx64",8,%d, " "0x%016"PRIx64",8,%d, %d) yielded 0x%016"PRIx64" " "instead of the expected 0x%016"PRIx64"\n", ntohll(src), src_ofs, ntohll(orig_dst), dst_ofs, n_bits, ntohll(dst), ntohll(expect)); abort(); } n_loops++; } } } if (n_loops != sum_of_squares(64)) { abort(); } } static void test_bitwise_zero(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { unsigned int n_loops; int dst_ofs; int n_bits; n_loops = 0; for (n_bits = 0; n_bits <= 64; n_bits++) { for (dst_ofs = 0; dst_ofs < 64 - n_bits; dst_ofs++) { ovs_be64 dst = htonll(random_uint64()); ovs_be64 orig_dst = dst; ovs_be64 expect; if (n_bits == 64) { expect = htonll(0); } else { uint64_t mask = (UINT64_C(1) << n_bits) - 1; expect = orig_dst & ~htonll(mask << dst_ofs); } bitwise_zero(&dst, sizeof dst, dst_ofs, n_bits); if (expect != dst) { fprintf(stderr,"bitwise_zero(0x%016"PRIx64",8,%d, %d) " "yielded 0x%016"PRIx64" " "instead of the expected 0x%016"PRIx64"\n", ntohll(orig_dst), dst_ofs, n_bits, ntohll(dst), ntohll(expect)); abort(); } n_loops++; } } if (n_loops != 64 * (64 + 1) / 2) { abort(); } } static void test_bitwise_one(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { unsigned int n_loops; int dst_ofs; int n_bits; n_loops = 0; for (n_bits = 0; n_bits <= 64; n_bits++) { for (dst_ofs = 0; dst_ofs < 64 - n_bits; dst_ofs++) { ovs_be64 dst = htonll(random_uint64()); ovs_be64 orig_dst = dst; ovs_be64 expect; if (n_bits == 64) { expect = htonll(UINT64_MAX); } else { uint64_t mask = (UINT64_C(1) << n_bits) - 1; expect = orig_dst | htonll(mask << dst_ofs); } bitwise_one(&dst, sizeof dst, dst_ofs, n_bits); if (expect != dst) { fprintf(stderr,"bitwise_one(0x%016"PRIx64",8,%d, %d) " "yielded 0x%016"PRIx64" " "instead of the expected 0x%016"PRIx64"\n", ntohll(orig_dst), dst_ofs, n_bits, ntohll(dst), ntohll(expect)); abort(); } n_loops++; } } if (n_loops != 64 * (64 + 1) / 2) { abort(); } } static void test_bitwise_is_all_zeros(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { int n_loops; for (n_loops = 0; n_loops < 100; n_loops++) { ovs_be64 x = htonll(0); int i; for (i = 0; i < 64; i++) { ovs_be64 bit; int ofs, n; /* Change a random 0-bit into a 1-bit. */ do { bit = htonll(UINT64_C(1) << (random_range(64))); } while (x & bit); x |= bit; for (ofs = 0; ofs < 64; ofs++) { for (n = 0; n <= 64 - ofs; n++) { bool expect; bool answer; expect = (n == 64 ? x == 0 : !(x & htonll(((UINT64_C(1) << n) - 1) << ofs))); answer = bitwise_is_all_zeros(&x, sizeof x, ofs, n); if (expect != answer) { fprintf(stderr, "bitwise_is_all_zeros(0x%016"PRIx64",8,%d,%d " "returned %s instead of %s\n", ntohll(x), ofs, n, answer ? "true" : "false", expect ? "true" : "false"); abort(); } } } } } } static void test_follow_symlinks(int argc, char *argv[]) { int i; for (i = 1; i < argc; i++) { char *target = follow_symlinks(argv[i]); puts(target); free(target); } } static void test_assert(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { ovs_assert(false); } static const struct command commands[] = { {"ctz", 0, 0, test_ctz}, {"round_up_pow2", 0, 0, test_round_up_pow2}, {"round_down_pow2", 0, 0, test_round_down_pow2}, {"popcount", 0, 0, test_popcount}, {"log_2_floor", 0, 0, test_log_2_floor}, {"bitwise_copy", 0, 0, test_bitwise_copy}, {"bitwise_zero", 0, 0, test_bitwise_zero}, {"bitwise_one", 0, 0, test_bitwise_one}, {"bitwise_is_all_zeros", 0, 0, test_bitwise_is_all_zeros}, {"follow-symlinks", 1, INT_MAX, test_follow_symlinks}, {"assert", 0, 0, test_assert}, {NULL, 0, 0, NULL}, }; static void parse_options(int argc, char *argv[]) { enum { VLOG_OPTION_ENUMS }; static const struct option long_options[] = { VLOG_LONG_OPTIONS, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); for (;;) { int c = getopt_long(argc, argv, short_options, long_options, NULL); if (c == -1) { break; } switch (c) { VLOG_OPTION_HANDLERS case '?': exit(EXIT_FAILURE); default: abort(); } } free(short_options); } int main(int argc, char *argv[]) { set_program_name(argv[0]); parse_options(argc, argv); run_command(argc - optind, argv + optind, commands); return 0; } openvswitch-2.0.1+git20140120/tests/test-uuid.c000066400000000000000000000020351226605124000207030ustar00rootroot00000000000000/* * Copyright (c) 2009 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "uuid.h" #include int main(int argc, char *argv[]) { struct uuid uuid; if (argc == 1) { uuid_generate(&uuid); } else if (argc == 2) { if (!uuid_from_string(&uuid, argv[1])) { ovs_fatal(0, "\"%s\" is not a valid UUID", argv[1]); } } else { ovs_fatal(0, "usage: %s [UUID]", argv[0]); } printf(UUID_FMT"\n", UUID_ARGS(&uuid)); return 0; } openvswitch-2.0.1+git20140120/tests/test-vconn.c000066400000000000000000000314171226605124000210660ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "vconn.h" #include #include #include #include #include #include "command-line.h" #include "ofp-msgs.h" #include "ofp-util.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "poll-loop.h" #include "socket-util.h" #include "stream.h" #include "stream-ssl.h" #include "timeval.h" #include "util.h" #include "vlog.h" #undef NDEBUG #include struct fake_pvconn { const char *type; char *pvconn_name; char *vconn_name; struct pstream *pstream; }; static void check(int a, int b, const char *as, const char *file, int line) { if (a != b) { ovs_fatal(0, "%s:%d: %s is %d but should be %d", file, line, as, a, b); } } #define CHECK(A, B) check(A, B, #A, __FILE__, __LINE__) static void check_errno(int a, int b, const char *as, const char *file, int line) { if (a != b) { char *str_b = strdup(ovs_strerror(abs(b))); ovs_fatal(0, "%s:%d: %s is %d (%s) but should be %d (%s)", file, line, as, a, ovs_strerror(abs(a)), b, str_b); } } #define CHECK_ERRNO(A, B) check_errno(A, B, #A, __FILE__, __LINE__) static void fpv_create(const char *type, struct fake_pvconn *fpv) { #ifdef HAVE_OPENSSL if (!strcmp(type, "ssl")) { stream_ssl_set_private_key_file("testpki-privkey.pem"); stream_ssl_set_certificate_file("testpki-cert.pem"); stream_ssl_set_ca_cert_file("testpki-cacert.pem", false); } #endif fpv->type = type; if (!strcmp(type, "unix")) { static int unix_count = 0; char *bind_path; bind_path = xasprintf("fake-pvconn.%d", unix_count++); fpv->pvconn_name = xasprintf("punix:%s", bind_path); fpv->vconn_name = xasprintf("unix:%s", bind_path); CHECK_ERRNO(pstream_open(fpv->pvconn_name, &fpv->pstream, DSCP_DEFAULT), 0); free(bind_path); } else if (!strcmp(type, "tcp") || !strcmp(type, "ssl")) { char *s, *port, *save_ptr = NULL; char *open_name; open_name = xasprintf("p%s:0:127.0.0.1", type); CHECK_ERRNO(pstream_open(open_name, &fpv->pstream, DSCP_DEFAULT), 0); /* Extract bound port number from pstream name. */ s = xstrdup(pstream_get_name(fpv->pstream)); strtok_r(s, ":", &save_ptr); port = strtok_r(NULL, ":", &save_ptr); /* Save info. */ fpv->pvconn_name = xstrdup(pstream_get_name(fpv->pstream)); fpv->vconn_name = xasprintf("%s:127.0.0.1:%s", type, port); free(open_name); free(s); } else { abort(); } } static struct stream * fpv_accept(struct fake_pvconn *fpv) { struct stream *stream; CHECK_ERRNO(pstream_accept_block(fpv->pstream, &stream), 0); return stream; } static void fpv_close(struct fake_pvconn *fpv) { pstream_close(fpv->pstream); fpv->pstream = NULL; } static void fpv_destroy(struct fake_pvconn *fpv) { fpv_close(fpv); free(fpv->pvconn_name); free(fpv->vconn_name); } /* Connects to a fake_pvconn with vconn_open(), then closes the listener and * verifies that vconn_connect() reports 'expected_error'. */ static void test_refuse_connection(int argc OVS_UNUSED, char *argv[]) { const char *type = argv[1]; struct fake_pvconn fpv; struct vconn *vconn; int error; fpv_create(type, &fpv); CHECK_ERRNO(vconn_open(fpv.vconn_name, 0, DSCP_DEFAULT, &vconn), 0); fpv_close(&fpv); vconn_run(vconn); error = vconn_connect_block(vconn); if (!strcmp(type, "tcp")) { if (error != ECONNRESET && error != EPIPE) { ovs_fatal(0, "unexpected vconn_connect() return value %d (%s)", error, ovs_strerror(error)); } } else if (!strcmp(type, "unix")) { CHECK_ERRNO(error, EPIPE); } else if (!strcmp(type, "ssl")) { if (error != EPROTO && error != ECONNRESET) { ovs_fatal(0, "unexpected vconn_connect() return value %d (%s)", error, ovs_strerror(error)); } } else { ovs_fatal(0, "invalid connection type %s", type); } vconn_close(vconn); fpv_destroy(&fpv); } /* Connects to a fake_pvconn with vconn_open(), accepts that connection and * closes it immediately, and verifies that vconn_connect() reports * 'expected_error'. */ static void test_accept_then_close(int argc OVS_UNUSED, char *argv[]) { const char *type = argv[1]; struct fake_pvconn fpv; struct vconn *vconn; int error; fpv_create(type, &fpv); CHECK_ERRNO(vconn_open(fpv.vconn_name, 0, DSCP_DEFAULT, &vconn), 0); vconn_run(vconn); stream_close(fpv_accept(&fpv)); fpv_close(&fpv); error = vconn_connect_block(vconn); if (!strcmp(type, "tcp") || !strcmp(type, "unix")) { if (error != ECONNRESET && error != EPIPE) { ovs_fatal(0, "unexpected vconn_connect() return value %d (%s)", error, ovs_strerror(error)); } } else { CHECK_ERRNO(error, EPROTO); } vconn_close(vconn); fpv_destroy(&fpv); } /* Connects to a fake_pvconn with vconn_open(), accepts that connection and * reads the hello message from it, then closes the connection and verifies * that vconn_connect() reports 'expected_error'. */ static void test_read_hello(int argc OVS_UNUSED, char *argv[]) { const char *type = argv[1]; struct fake_pvconn fpv; struct vconn *vconn; struct stream *stream; int error; fpv_create(type, &fpv); CHECK_ERRNO(vconn_open(fpv.vconn_name, 0, DSCP_DEFAULT, &vconn), 0); vconn_run(vconn); stream = fpv_accept(&fpv); fpv_destroy(&fpv); for (;;) { struct ofp_header hello; int retval; retval = stream_recv(stream, &hello, sizeof hello); if (retval == sizeof hello) { enum ofpraw raw; CHECK(hello.version, OFP10_VERSION); CHECK(ofpraw_decode_partial(&raw, &hello, sizeof hello), 0); CHECK(raw, OFPRAW_OFPT_HELLO); CHECK(ntohs(hello.length), sizeof hello); break; } else { CHECK_ERRNO(retval, -EAGAIN); } vconn_run(vconn); CHECK_ERRNO(vconn_connect(vconn), EAGAIN); vconn_run_wait(vconn); vconn_connect_wait(vconn); stream_recv_wait(stream); poll_block(); } stream_close(stream); error = vconn_connect_block(vconn); if (error != ECONNRESET && error != EPIPE) { ovs_fatal(0, "unexpected vconn_connect() return value %d (%s)", error, ovs_strerror(error)); } vconn_close(vconn); } /* Connects to a fake_pvconn with vconn_open(), accepts that connection and * sends the 'out' bytes in 'out_size' to it (presumably an OFPT_HELLO * message), then verifies that vconn_connect() reports * 'expect_connect_error'. */ static void test_send_hello(const char *type, const void *out, size_t out_size, int expect_connect_error) { struct fake_pvconn fpv; struct vconn *vconn; bool read_hello, connected; struct ofpbuf *msg; struct stream *stream; size_t n_sent; fpv_create(type, &fpv); CHECK_ERRNO(vconn_open(fpv.vconn_name, 0, DSCP_DEFAULT, &vconn), 0); vconn_run(vconn); stream = fpv_accept(&fpv); fpv_destroy(&fpv); n_sent = 0; while (n_sent < out_size) { int retval; retval = stream_send(stream, (char *) out + n_sent, out_size - n_sent); if (retval > 0) { n_sent += retval; } else if (retval == -EAGAIN) { stream_run(stream); vconn_run(vconn); stream_recv_wait(stream); vconn_connect_wait(vconn); vconn_run_wait(vconn); poll_block(); } else { ovs_fatal(0, "stream_send returned unexpected value %d", retval); } } read_hello = connected = false; for (;;) { if (!read_hello) { struct ofp_header hello; int retval = stream_recv(stream, &hello, sizeof hello); if (retval == sizeof hello) { enum ofpraw raw; CHECK(hello.version, OFP10_VERSION); CHECK(ofpraw_decode_partial(&raw, &hello, sizeof hello), 0); CHECK(raw, OFPRAW_OFPT_HELLO); CHECK(ntohs(hello.length), sizeof hello); read_hello = true; } else { CHECK_ERRNO(retval, -EAGAIN); } } vconn_run(vconn); if (!connected) { int error = vconn_connect(vconn); if (error == expect_connect_error) { if (!error) { connected = true; } else { stream_close(stream); vconn_close(vconn); return; } } else { CHECK_ERRNO(error, EAGAIN); } } if (read_hello && connected) { break; } vconn_run_wait(vconn); if (!connected) { vconn_connect_wait(vconn); } if (!read_hello) { stream_recv_wait(stream); } poll_block(); } stream_close(stream); CHECK_ERRNO(vconn_recv_block(vconn, &msg), EOF); vconn_close(vconn); } /* Try connecting and sending a normal hello, which should succeed. */ static void test_send_plain_hello(int argc OVS_UNUSED, char *argv[]) { const char *type = argv[1]; struct ofpbuf *hello; hello = ofpraw_alloc_xid(OFPRAW_OFPT_HELLO, OFP10_VERSION, htonl(0x12345678), 0); test_send_hello(type, hello->data, hello->size, 0); ofpbuf_delete(hello); } /* Try connecting and sending an extra-long hello, which should succeed (since * the specification says that implementations must accept and ignore extra * data). */ static void test_send_long_hello(int argc OVS_UNUSED, char *argv[]) { const char *type = argv[1]; struct ofpbuf *hello; enum { EXTRA_BYTES = 8 }; hello = ofpraw_alloc_xid(OFPRAW_OFPT_HELLO, OFP10_VERSION, htonl(0x12345678), EXTRA_BYTES); ofpbuf_put_zeros(hello, EXTRA_BYTES); ofpmsg_update_length(hello); test_send_hello(type, hello->data, hello->size, 0); ofpbuf_delete(hello); } /* Try connecting and sending an echo request instead of a hello, which should * fail with EPROTO. */ static void test_send_echo_hello(int argc OVS_UNUSED, char *argv[]) { const char *type = argv[1]; struct ofpbuf *echo; echo = ofpraw_alloc_xid(OFPRAW_OFPT_ECHO_REQUEST, OFP10_VERSION, htonl(0x12345678), 0); test_send_hello(type, echo->data, echo->size, EPROTO); ofpbuf_delete(echo); } /* Try connecting and sending a hello packet that has its length field as 0, * which should fail with EPROTO. */ static void test_send_short_hello(int argc OVS_UNUSED, char *argv[]) { const char *type = argv[1]; struct ofp_header hello; memset(&hello, 0, sizeof hello); test_send_hello(type, &hello, sizeof hello, EPROTO); } /* Try connecting and sending a hello packet that has a bad version, which * should fail with EPROTO. */ static void test_send_invalid_version_hello(int argc OVS_UNUSED, char *argv[]) { const char *type = argv[1]; struct ofpbuf *hello; hello = ofpraw_alloc_xid(OFPRAW_OFPT_HELLO, OFP10_VERSION, htonl(0x12345678), 0); ((struct ofp_header *) hello->data)->version = 0; test_send_hello(type, hello->data, hello->size, EPROTO); ofpbuf_delete(hello); } static const struct command commands[] = { {"refuse-connection", 1, 1, test_refuse_connection}, {"accept-then-close", 1, 1, test_accept_then_close}, {"read-hello", 1, 1, test_read_hello}, {"send-plain-hello", 1, 1, test_send_plain_hello}, {"send-long-hello", 1, 1, test_send_long_hello}, {"send-echo-hello", 1, 1, test_send_echo_hello}, {"send-short-hello", 1, 1, test_send_short_hello}, {"send-invalid-version-hello", 1, 1, test_send_invalid_version_hello}, {NULL, 0, 0, NULL}, }; int main(int argc, char *argv[]) { set_program_name(argv[0]); vlog_set_levels(NULL, VLF_ANY_FACILITY, VLL_EMER); vlog_set_levels(NULL, VLF_CONSOLE, VLL_DBG); signal(SIGPIPE, SIG_IGN); time_alarm(10); run_command(argc - 1, argv + 1, commands); return 0; } openvswitch-2.0.1+git20140120/tests/test-vlog.py000066400000000000000000000027151226605124000211170ustar00rootroot00000000000000# Copyright (c) 2011 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse import ovs.vlog def main(): modules = [ovs.vlog.Vlog("module_%d" % i) for i in xrange(3)] parser = argparse.ArgumentParser(description="Vlog Module Tester") ovs.vlog.add_args(parser) args = parser.parse_args() ovs.vlog.handle_args(args) for m in modules: m.emer("emergency") m.err("error") m.warn("warning") m.info("information") m.dbg("debug") try: fail = False # Silence pychecker warning. assert fail except AssertionError: m.emer("emergency exception", exc_info=True) m.err("error exception", exc_info=True) m.warn("warn exception", exc_info=True) m.info("information exception", exc_info=True) m.dbg("debug exception", exc_info=True) m.exception("exception") if __name__ == '__main__': main() openvswitch-2.0.1+git20140120/tests/testsuite.at000066400000000000000000000063511226605124000212000ustar00rootroot00000000000000AT_INIT AT_COPYRIGHT([Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at: http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.]) AT_TESTED([ovs-vswitchd]) AT_TESTED([ovs-vsctl]) AT_TESTED([perl]) m4_divert_push([PREPARE_TESTS]) [ ovs_wait () { # First try a quick sleep, so that the test completes very quickly # in the normal case. POSIX doesn't require fractional times to # work, so this might not work. sleep 0.1 ovs_wait_cond && exit 0 # Then wait up to 10 seconds. for d in 0 1 2 3 4 5 6 7 8 9; do sleep 1 ovs_wait_cond && exit 0 done exit 1 } # Prints the integers from $1 to $2, increasing by $3 (default 1) on stdout. seq () { while test $1 -le $2; do echo $1 set `expr $1 + ${3-1}` $2 $3 done } ] m4_divert_pop([PREPARE_TESTS]) m4_define([OVS_WAIT], [AT_CHECK( [ovs_wait_cond () { $1 } ovs_wait], [0], [ignore], [ignore], [$2])]) m4_define([OVS_WAIT_UNTIL], [OVS_WAIT([$1], [$2])]) m4_define([OVS_WAIT_WHILE], [OVS_WAIT([if $1; then return 1; else return 0; fi], [$2])]) dnl ON_EXIT([COMMANDS]) dnl dnl Adds the shell COMMANDS to a collection executed when the current test dnl completes, as a cleanup action. (The most common use is to kill a dnl daemon started by the test. This is important to prevent tests that dnl start daemons from hanging at exit.) m4_define([ON_EXIT], [trap '. ./cleanup' 0; cat >>cleanup <<'EOF' $1 EOF ]) m4_include([tests/ovsdb-macros.at]) m4_include([tests/ofproto-macros.at]) m4_include([tests/bfd.at]) m4_include([tests/cfm.at]) m4_include([tests/lacp.at]) m4_include([tests/library.at]) m4_include([tests/heap.at]) m4_include([tests/bundle.at]) m4_include([tests/classifier.at]) m4_include([tests/check-structs.at]) m4_include([tests/daemon.at]) m4_include([tests/daemon-py.at]) m4_include([tests/ofp-actions.at]) m4_include([tests/ofp-print.at]) m4_include([tests/ofp-util.at]) m4_include([tests/ofp-errors.at]) m4_include([tests/ovs-ofctl.at]) m4_include([tests/odp.at]) m4_include([tests/multipath.at]) m4_include([tests/learn.at]) m4_include([tests/vconn.at]) m4_include([tests/file_name.at]) m4_include([tests/aes128.at]) m4_include([tests/unixctl-py.at]) m4_include([tests/uuid.at]) m4_include([tests/json.at]) m4_include([tests/jsonrpc.at]) m4_include([tests/jsonrpc-py.at]) m4_include([tests/timeval.at]) m4_include([tests/tunnel.at]) m4_include([tests/lockfile.at]) m4_include([tests/reconnect.at]) m4_include([tests/ovs-vswitchd.at]) m4_include([tests/ofproto.at]) m4_include([tests/ofproto-dpif.at]) m4_include([tests/vlan-splinters.at]) m4_include([tests/ovsdb.at]) m4_include([tests/ovs-vsctl.at]) m4_include([tests/ovs-monitor-ipsec.at]) m4_include([tests/ovs-xapi-sync.at]) m4_include([tests/interface-reconfigure.at]) m4_include([tests/stp.at]) m4_include([tests/vlog.at]) openvswitch-2.0.1+git20140120/tests/timeval.at000066400000000000000000000013661226605124000206110ustar00rootroot00000000000000AT_BANNER([timeval unit tests]) AT_SETUP([check that time advances]) AT_KEYWORDS([timeval]) AT_CHECK([test-timeval plain], [0]) AT_CLEANUP AT_SETUP([check that time advances after daemonize()]) AT_KEYWORDS([timeval]) AT_CHECK([test-timeval daemon], [0]) AT_CHECK( [# First try a quick sleep, so that the test completes very quickly # in the normal case. POSIX doesn't require fractional times to # work, so this might not work. sleep 0.1; if test -e test-timeval.success; then echo success; exit 0; fi # Then wait up to 2 seconds. sleep 1; if test -e test-timeval.success; then echo success; exit 0; fi sleep 1; if test -e test-timeval.success; then echo success; exit 0; fi echo failure; exit 1], [0], [success ], []) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/tunnel.at000066400000000000000000000456051226605124000204610ustar00rootroot00000000000000AT_BANNER([tunnel]) AT_SETUP([tunnel - input]) OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \ options:remote_ip=1.1.1.1 ofport_request=1\ -- add-port br0 p2 -- set Interface p2 type=gre \ options:local_ip=2.2.2.2 options:remote_ip=1.1.1.1 \ ofport_request=2 \ -- add-port br0 p3 -- set Interface p3 type=gre \ options:remote_ip=2.2.2.2 ofport_request=3]) AT_DATA([flows.txt], [dnl actions=IN_PORT ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl br0 65534/100: (dummy) p1 1/1: (gre: remote_ip=1.1.1.1) p2 2/1: (gre: local_ip=2.2.2.2, remote_ip=1.1.1.1) p3 3/1: (gre: remote_ip=2.2.2.2) ]) dnl remote_ip AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=1.2.3.4,tos=0x0,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: set(tunnel(tun_id=0x0,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64,flags(df))),1 ]) dnl local_ip, remote_ip AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0x0,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: set(tunnel(tun_id=0x0,src=2.2.2.2,dst=1.1.1.1,tos=0x0,ttl=64,flags(df))),1 ]) dnl reconfigure, local_ip, remote_ip AT_CHECK([ovs-vsctl set Interface p2 type=gre options:local_ip=2.2.2.3 \ options:df_default=false options:ttl=1 options:csum=true \ -- set Interface p3 type=gre64]) AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl br0 65534/100: (dummy) p1 1/1: (gre: remote_ip=1.1.1.1) p2 2/1: (gre: csum=true, df_default=false, local_ip=2.2.2.3, remote_ip=1.1.1.1, ttl=1) p3 3/64: (gre64: remote_ip=2.2.2.2) ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0x0,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: set(tunnel(tun_id=0x0,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64,flags(df))),1 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.3,tos=0x0,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: set(tunnel(tun_id=0x0,src=2.2.2.3,dst=1.1.1.1,tos=0x0,ttl=1,flags(csum))),1 ]) dnl nonexistent tunnel AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=5.5.5.5,dst=6.6.6.6,tos=0x0,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [2], [ignore], [dnl Invalid datapath flow ovs-appctl: ovs-vswitchd: server returned an error ]) OVS_VSWITCHD_STOP(["/receive tunnel port not found/d"]) AT_CLEANUP AT_SETUP([tunnel - ECN decapsulation]) OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \ options:remote_ip=1.1.1.1 ofport_request=1 \ -- add-port br0 p2 -- set Interface p2 type=dummy \ ofport_request=2]) AT_DATA([flows.txt], [dnl actions=2 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl br0 65534/100: (dummy) p1 1/1: (gre: remote_ip=1.1.1.1) p2 2/2: (dummy) ]) dnl Tunnel CE and encapsulated packet CE AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0x3,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=3,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: 2 ]) dnl Tunnel CE and encapsulated packet ECT(1) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0x3,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=1,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: set(ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0x3,ttl=64,frag=no)),2 ]) dnl Tunnel CE and encapsulated packet ECT(2) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0x3,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=2,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: set(ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0x3,ttl=64,frag=no)),2 ]) dnl Tunnel CE and encapsulated packet Non-ECT AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0x3,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: drop ]) OVS_VSWITCHD_STOP(["/dropping tunnel packet marked ECN CE but is not ECN capable/d"]) AT_CLEANUP AT_SETUP([tunnel - output]) OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \ options:remote_ip=1.1.1.1 options:local_ip=2.2.2.2 \ options:key=5 ofport_request=1\ -- add-port br0 p2 -- set Interface p2 type=dummy \ ofport_request=2 ofport_request=2]) AT_DATA([flows.txt], [dnl actions=output:1 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl br0 65534/100: (dummy) p1 1/1: (gre: key=5, local_ip=2.2.2.2, remote_ip=1.1.1.1) p2 2/2: (dummy) ]) dnl Basic AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=4,ttl=128,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: set(tunnel(tun_id=0x5,src=2.2.2.2,dst=1.1.1.1,tos=0x0,ttl=64,flags(df,key))),1 ]) dnl ECN AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=1,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: set(tunnel(tun_id=0x5,src=2.2.2.2,dst=1.1.1.1,tos=0x1,ttl=64,flags(df,key))),1 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([tunnel - ToS and TTL inheritance]) OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \ options:remote_ip=1.1.1.1 options:tos=inherit \ options:ttl=inherit ofport_request=1 \ -- add-port br0 p2 -- set Interface p2 type=dummy \ ofport_request=2 ofport_request=2]) AT_DATA([flows.txt], [dnl actions=output:1 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl br0 65534/100: (dummy) p1 1/1: (gre: remote_ip=1.1.1.1, tos=inherit, ttl=inherit) p2 2/2: (dummy) ]) dnl Basic AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=4,ttl=128,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: set(tunnel(tun_id=0x0,src=0.0.0.0,dst=1.1.1.1,tos=0x4,ttl=128,flags(df))),1 ]) dnl ECN AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=5,ttl=128,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: set(tunnel(tun_id=0x0,src=0.0.0.0,dst=1.1.1.1,tos=0x5,ttl=128,flags(df))),1 ]) dnl non-IP AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0806),arp(sip=1.2.3.4,tip=5.6.7.8,op=1,sha=00:0f:10:11:12:13,tha=00:14:15:16:17:18)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: set(tunnel(tun_id=0x0,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64,flags(df))),1 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([tunnel - set_tunnel]) OVS_VSWITCHD_START([dnl add-port br0 p1 -- set Interface p1 type=gre options:key=flow \ options:remote_ip=1.1.1.1 ofport_request=1 \ -- add-port br0 p2 -- set Interface p2 type=gre options:key=flow \ options:remote_ip=2.2.2.2 ofport_request=2 \ -- add-port br0 p3 -- set Interface p3 type=gre options:key=flow \ options:remote_ip=3.3.3.3 ofport_request=3 \ -- add-port br0 p4 -- set Interface p4 type=gre options:key=flow \ options:remote_ip=4.4.4.4 ofport_request=4]) AT_DATA([flows.txt], [dnl actions=set_tunnel:1,output:1,set_tunnel:2,output:2,set_tunnel:3,output:3,set_tunnel:5,output:4 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl br0 65534/100: (dummy) p1 1/1: (gre: key=flow, remote_ip=1.1.1.1) p2 2/1: (gre: key=flow, remote_ip=2.2.2.2) p3 3/1: (gre: key=flow, remote_ip=3.3.3.3) p4 4/1: (gre: key=flow, remote_ip=4.4.4.4) ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(100),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: dnl set(tunnel(tun_id=0x1,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64,flags(df,key))),1,dnl set(tunnel(tun_id=0x2,src=0.0.0.0,dst=2.2.2.2,tos=0x0,ttl=64,flags(df,key))),1,dnl set(tunnel(tun_id=0x3,src=0.0.0.0,dst=3.3.3.3,tos=0x0,ttl=64,flags(df,key))),1,dnl set(tunnel(tun_id=0x5,src=0.0.0.0,dst=4.4.4.4,tos=0x0,ttl=64,flags(df,key))),1 ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([tunnel - key]) OVS_VSWITCHD_START([dnl add-port br0 p1 -- set Interface p1 type=gre options:key=1 \ options:remote_ip=1.1.1.1 ofport_request=1 \ -- add-port br0 p2 -- set Interface p2 type=gre options:in_key=2 \ options:out_key=3 options:remote_ip=1.1.1.1 ofport_request=2 \ -- add-port br0 p3 -- set Interface p3 type=gre options:out_key=5 \ options:remote_ip=1.1.1.1 ofport_request=3]) AT_DATA([flows.txt], [dnl actions=IN_PORT,output:1,output:2,output:3 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl br0 65534/100: (dummy) p1 1/1: (gre: key=1, remote_ip=1.1.1.1) p2 2/1: (gre: in_key=2, out_key=3, remote_ip=1.1.1.1) p3 3/1: (gre: out_key=5, remote_ip=1.1.1.1) ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x1,src=1.1.1.1,dst=2.2.2.2,tos=0x0,ttl=64,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: dnl set(tunnel(tun_id=0x1,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64,flags(df,key))),1,dnl set(tunnel(tun_id=0x3,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64,flags(df,key))),1,dnl set(tunnel(tun_id=0x5,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64,flags(df,key))),1 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x2,src=1.1.1.1,dst=2.2.2.2,tos=0x0,ttl=64,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: dnl set(tunnel(tun_id=0x3,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64,flags(df,key))),1,dnl set(tunnel(tun_id=0x1,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64,flags(df,key))),1,dnl set(tunnel(tun_id=0x5,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64,flags(df,key))),1 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0x0,ttl=64,flags()),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: dnl set(tunnel(tun_id=0x5,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64,flags(df,key))),1,dnl set(tunnel(tun_id=0x1,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64,flags(df,key))),1,dnl set(tunnel(tun_id=0x3,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64,flags(df,key))),1 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0xf,src=1.1.1.1,dst=2.2.2.2,tos=0x0,ttl=64,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [2], [ignore], [dnl Invalid datapath flow ovs-appctl: ovs-vswitchd: server returned an error ]) OVS_VSWITCHD_STOP(["/receive tunnel port not found/d"]) AT_CLEANUP AT_SETUP([tunnel - key match]) OVS_VSWITCHD_START([dnl add-port br0 p1 -- set Interface p1 type=gre options:key=flow \ options:remote_ip=1.1.1.1 ofport_request=1 \ -- add-port br0 p2 -- set Interface p2 type=gre options:key=3 \ options:remote_ip=3.3.3.3 ofport_request=2 \ -- add-port br0 p3 -- set Interface p3 type=dummy ofport_request=3 \ -- add-port br0 p4 -- set Interface p4 type=dummy ofport_request=4 \ -- add-port br0 p5 -- set Interface p5 type=dummy ofport_request=5]) AT_DATA([flows.txt], [dnl tun_id=2,actions=output:3 tun_id=3,actions=output:4,set_tunnel:2,resubmit:99,set_tunnel:4,output:2,resubmit:99 tun_id=4,actions=output:5 ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl br0 65534/100: (dummy) p1 1/1: (gre: key=flow, remote_ip=1.1.1.1) p2 2/1: (gre: key=3, remote_ip=3.3.3.3) p3 3/3: (dummy) p4 4/4: (dummy) p5 5/5: (dummy) ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x2,src=1.1.1.1,dst=2.2.2.2,tos=0x0,ttl=64,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl Datapath actions: 3 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x3,src=1.1.1.1,dst=2.2.2.2,tos=0x0,ttl=64,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl Datapath actions: 4,3,set(tunnel(tun_id=0x3,src=0.0.0.0,dst=3.3.3.3,tos=0x0,ttl=64,flags(df,key))),1,5 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x3,src=3.3.3.3,dst=2.2.2.2,tos=0x0,ttl=64,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl Datapath actions: 4,3,5 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'tunnel(tun_id=0x0,src=1.1.1.1,dst=2.2.2.2,tos=0x0,ttl=64,flags(key)),in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [dnl - Sends "packet-in" messages to the OpenFlow controller. ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([tunnel - VXLAN]) OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=vxlan \ options:remote_ip=1.1.1.1 ofport_request=1]) AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl br0 65534/100: (dummy) p1 1/1: (vxlan: remote_ip=1.1.1.1) ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([tunnel - LISP]) OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=lisp \ options:remote_ip=1.1.1.1 ofport_request=1]) AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl br0 65534/100: (dummy) p1 1/1: (lisp: remote_ip=1.1.1.1) ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([tunnel - different VXLAN UDP port]) OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=vxlan \ options:remote_ip=1.1.1.1 ofport_request=1 options:dst_port=4341]) AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl br0 65534/100: (dummy) p1 1/1: (vxlan: dst_port=4341, remote_ip=1.1.1.1) ]) dnl change UDP port AT_CHECK([ovs-vsctl -- set Interface p1 options:dst_port=5000]) AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl br0 65534/100: (dummy) p1 1/2: (vxlan: dst_port=5000, remote_ip=1.1.1.1) ]) dnl change UDP port to default AT_CHECK([ovs-vsctl -- set Interface p1 options:dst_port=4789]) AT_CHECK([ovs-appctl dpif/show | tail -n +5], [0], [dnl br0 65534/100: (dummy) p1 1/1: (vxlan: remote_ip=1.1.1.1) ]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - set_field - tun_src/tun_dst/tun_id]) OVS_VSWITCHD_START([dnl add-port br0 p1 -- set Interface p1 type=gre options:key=flow \ options:remote_ip=1.1.1.1 ofport_request=1 \ -- add-port br0 p2 -- set Interface p2 type=gre options:key=flow \ options:remote_ip=flow ofport_request=2 \ -- add-port br0 p3 -- set Interface p3 type=gre options:key=flow \ options:remote_ip=flow options:local_ip=flow ofport_request=3 \ -- add-port br0 p4 -- set Interface p4 type=gre options:key=3 \ options:remote_ip=flow ofport_request=4 \ -- add-port br0 p5 -- set Interface p5 type=gre options:key=flow \ options:remote_ip=5.5.5.5 ofport_request=5]) ADD_OF_PORTS([br0], [90]) AT_DATA([flows.txt], [dnl in_port=90 actions=resubmit:1,resubmit:2,resubmit:3,resubmit:4,resubmit:5 in_port=1 actions=set_field:42->tun_id,output:1 in_port=2 actions=set_field:3.3.3.3->tun_dst,output:2 in_port=3 actions=set_field:1.1.1.1->tun_src,set_field:4.4.4.4->tun_dst,output:3 in_port=4 actions=set_field:2.2.2.2->tun_dst,output:4 in_port=5 actions=set_field:5->tun_id ]) AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(90),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -1 stdout], [0], [Datapath actions: set(tunnel(tun_id=0x2a,src=0.0.0.0,dst=1.1.1.1,tos=0x0,ttl=64,flags(df,key))),1,set(tunnel(tun_id=0x2a,src=0.0.0.0,dst=3.3.3.3,tos=0x0,ttl=64,flags(df,key))),1,set(tunnel(tun_id=0x2a,src=1.1.1.1,dst=4.4.4.4,tos=0x0,ttl=64,flags(df,key))),1,set(tunnel(tun_id=0x3,src=0.0.0.0,dst=2.2.2.2,tos=0x0,ttl=64,flags(df,key))),1 ]) OVS_VSWITCHD_STOP AT_CLEANUP openvswitch-2.0.1+git20140120/tests/unixctl-py.at000066400000000000000000000122401226605124000212550ustar00rootroot00000000000000AT_BANNER([unixctl]) m4_define([APPCTL], [ovs-appctl --timeout 20]) m4_define([PYAPPCTL], [$PYTHON $srcdir/appctl.py --timeout 20]) AT_SETUP([unixctl ovs-vswitchd exit - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) OVS_VSWITCHD_START AT_CHECK([PYAPPCTL -t ovs-vswitchd exit], [0], []) OVS_WAIT_WHILE([test -s ovs-vswitchd.pid]) AT_CHECK([PYAPPCTL -t ovsdb-server exit], [0], []) OVS_WAIT_WHILE([test -s ovsdb-server.pid]) AT_CLEANUP AT_SETUP([unixctl ovs-vswitchd help - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) OVS_VSWITCHD_START AT_CHECK([APPCTL help], [0], [stdout]) AT_CHECK([head -1 stdout], [0], [dnl The available commands are: ]) mv stdout expout AT_CHECK([PYAPPCTL help], [0], [expout]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([unixctl ovs-vswitchd arguments - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) OVS_VSWITCHD_START AT_CHECK([APPCTL bond/hash], [2], [], [stderr]) AT_CHECK([head -1 stderr], [0], [dnl "bond/hash" command requires at least 1 arguments ]) sed 's/ovs-appctl/appctl.py/' stderr > experr AT_CHECK([PYAPPCTL bond/hash], [2], [], [experr]) AT_CHECK([APPCTL bond/hash mac], [2], [], [stderr]) AT_CHECK([head -1 stderr], [0], [dnl invalid mac ]) sed 's/ovs-appctl/appctl.py/' stderr > experr AT_CHECK([PYAPPCTL bond/hash mac], [2], [], [experr]) AT_CHECK([APPCTL bond/hash mac vlan], [2], [], [stderr]) AT_CHECK([head -1 stderr], [0], [dnl invalid vlan ]) sed 's/ovs-appctl/appctl.py/' stderr > experr AT_CHECK([PYAPPCTL bond/hash mac vlan], [2], [], [experr]) AT_CHECK([APPCTL bond/hash mac vlan basis], [2], [], [stderr]) AT_CHECK([head -1 stderr], [0], [dnl invalid vlan ]) sed 's/ovs-appctl/appctl.py/' stderr > experr AT_CHECK([PYAPPCTL bond/hash vlan basis], [2], [], [experr]) AT_CHECK([APPCTL bond/hash mac vlan basis extra], [2], [], [stderr]) AT_CHECK([head -1 stderr], [0], [dnl "bond/hash" command takes at most 3 arguments ]) sed 's/ovs-appctl/appctl.py/' stderr > experr AT_CHECK([PYAPPCTL bond/hash mac vlan basis extra], [2], [], [experr]) OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([unixctl bad target - Python]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_DBDIR=`pwd`; export OVS_DBDIR AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_CHECK([PYAPPCTL -t bogus doit], [1], [], [stderr]) AT_CHECK_UNQUOTED([tail -1 stderr], [0], [dnl appctl.py: cannot read pidfile "`pwd`/bogus.pid" (No such file or directory) ]) AT_CHECK([PYAPPCTL -t /bogus/path.pid doit], [1], [], [stderr]) AT_CHECK([tail -1 stderr], [0], [dnl appctl.py: cannot connect to "/bogus/path.pid" (No such file or directory) ]) AT_CLEANUP AT_SETUP([unixctl server - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR OVS_DBDIR=`pwd`; export OVS_DBDIR OVS_SYSCONFDIR=`pwd`; export OVS_SYSCONFDIR ON_EXIT([kill `cat test-unixctl.py.pid`]) AT_CAPTURE_FILE([`pwd`/test-unixctl.py.log]) AT_CHECK([$PYTHON $srcdir/test-unixctl.py --log-file --pidfile --detach]) AT_CHECK([APPCTL -t test-unixctl.py help], [0], [stdout]) AT_CHECK([cat stdout], [0], [dnl The available commands are: block echo [[arg ...]] echo_error [[arg ...]] exit help log [[arg ...]] version vlog/list vlog/reopen vlog/set spec ]) mv stdout expout AT_CHECK([PYAPPCTL -t test-unixctl.py help], [0], [expout]) AT_CHECK([ovs-vsctl --version | sed 's/ovs-vsctl/test-unixctl.py/' | head -1 > expout]) AT_CHECK([APPCTL -t test-unixctl.py version], [0], [expout]) AT_CHECK([PYAPPCTL -t test-unixctl.py version], [0], [expout]) AT_CHECK([APPCTL -t test-unixctl.py echo robot ninja], [0], [stdout]) AT_CHECK([cat stdout], [0], [dnl [[u'robot', u'ninja']] ]) mv stdout expout AT_CHECK([PYAPPCTL -t test-unixctl.py echo robot ninja], [0], [expout]) AT_CHECK([APPCTL -t test-unixctl.py echo_error robot ninja], [2], [], [stderr]) AT_CHECK([cat stderr], [0], [dnl [[u'robot', u'ninja']] ovs-appctl: test-unixctl.py: server returned an error ]) sed 's/ovs-appctl/appctl.py/' stderr > experr AT_CHECK([PYAPPCTL -t test-unixctl.py echo_error robot ninja], [2], [], [experr]) AT_CHECK([APPCTL -t test-unixctl.py echo], [2], [], [stderr]) AT_CHECK([cat stderr], [0], [dnl "echo" command requires at least 1 arguments ovs-appctl: test-unixctl.py: server returned an error ]) sed 's/ovs-appctl/appctl.py/' stderr > experr AT_CHECK([PYAPPCTL -t test-unixctl.py echo], [2], [], [experr]) AT_CHECK([APPCTL -t test-unixctl.py echo robot ninja pirates], [2], [], [stderr]) AT_CHECK([cat stderr], [0], [dnl "echo" command takes at most 2 arguments ovs-appctl: test-unixctl.py: server returned an error ]) sed 's/ovs-appctl/appctl.py/' stderr > experr AT_CHECK([PYAPPCTL -t test-unixctl.py echo robot ninja pirates], [2], [], [experr]) AT_CHECK([APPCTL -t test-unixctl.py bogus], [2], [], [stderr]) AT_CHECK([cat stderr], [0], [dnl "bogus" is not a valid command ovs-appctl: test-unixctl.py: server returned an error ]) sed 's/ovs-appctl/appctl.py/' stderr > experr AT_CHECK([PYAPPCTL -t test-unixctl.py bogus], [2], [], [experr]) AT_CHECK([APPCTL -t test-unixctl.py exit]) AT_CLEANUP AT_SETUP([unixctl server errors - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_CHECK($PYTHON $srcdir/test-unixctl.py --unixctl "`pwd`"/bogus/path, [1], [], [ignore]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/uuid.at000066400000000000000000000020041226605124000201040ustar00rootroot00000000000000AT_BANNER([UUID unit tests]) m4_define([UUID_REGEX], [[[0-9a-f]\{8\}-[0-9a-f]\{4\}-4[0-9a-f]\{3\}-[89ab][0-9a-f]\{3\}-[0-9a-f]\{12\}$]]) m4_define([CHECK_UUID], [if expr "$uuid" : 'UUID_REGEX' > /dev/null then : else echo "$uuid: not a random UUID" exit 1 fi]) AT_SETUP([UUID generation, parsing, serialization]) AT_KEYWORDS([UUID]) AT_CHECK([ uuids= for i in m4_for([count], [1], [100], [1], [count ]); do # Generate random UUID and check that it is in the expected format. uuid=`test-uuid` CHECK_UUID # Verify that $uuid does not duplicate any UUID generated so far. case $uuids in *$uuid*) echo "$uuid: generated duplicate UUID" exit 1 esac uuids="$uuids $uuid" # Verify that test-uuid parses and re-serializes this UUID correctly. serialized=`test-uuid $uuid` if test "$uuid" != "$serialized"; then echo "$uuid: test-uuid serialized this as $serialized" exit 1 fi done], [0]) AT_CLEANUP openvswitch-2.0.1+git20140120/tests/uuidfilt.pl000077500000000000000000000013661226605124000210070ustar00rootroot00000000000000#! /usr/bin/perl use strict; use warnings; our %uuids; our $n_uuids = 0; sub lookup_uuid { my ($uuid) = @_; if (!exists($uuids{$uuid})) { $uuids{$uuid} = $n_uuids++; } return "<$uuids{$uuid}>"; } sub sort_set { my ($s) = @_; my (@uuids) = sort { $a <=> $b } (grep(/\d+/, split(/(\d+)/, $s))); return '["set",[' . join(',', map('["uuid","<' . $_ . '>"]', @uuids)) . ']]'; } my $u = '[0-9a-fA-F]'; my $uuid_re = "${u}{8}-${u}{4}-${u}{4}-${u}{4}-${u}{12}"; while (<>) { s/($uuid_re)/lookup_uuid($1)/eg; # Sort sets like this: # [["uuid","<1>"],["uuid","<0>"]] # to look like this: # [["uuid","<0>"],["uuid","<1>"]] s/(\["set",\[(,?\["uuid","<\d+>"\])+\]\])/sort_set($1)/ge; print $_; } openvswitch-2.0.1+git20140120/tests/valgrind-wrapper.in000077500000000000000000000014671226605124000224430ustar00rootroot00000000000000#! /bin/sh wrap_program=`basename '@wrap_program@'` # Strip the first directory from $PATH that contains $wrap_program, # so that below we run the real $wrap_program, not ourselves. not_found=true new_path= first=true save_IFS=$IFS IFS=: for dir in $PATH; do IFS=$save_IFS if $not_found && test -x "$dir/$wrap_program"; then not_found=false else if $first; then first=false new_path=$dir else new_path=$new_path:$dir fi fi done IFS=$save_IFS if $not_found; then echo "$0: error: cannot find $wrap_program in \$PATH" >&2 exit 1 fi PATH=$new_path export PATH : ${VALGRIND:=valgrind -q --log-file=valgrind.%p --leak-check=full} exec $VALGRIND $wrap_program "$@" echo "$0: failed to execute $VALGRIND $wrap_program" "$@" >&2 exit 1 openvswitch-2.0.1+git20140120/tests/vconn.at000066400000000000000000000013141226605124000202640ustar00rootroot00000000000000m4_define([TEST_VCONN_CLASS], [AT_BANNER([vconn library -- $1 class]) m4_foreach( [testname], [[refuse-connection], [accept-then-close], [read-hello], [send-plain-hello], [send-long-hello], [send-echo-hello], [send-short-hello], [send-invalid-version-hello]], [AT_SETUP([$1 vconn - m4_bpatsubst(testname, [-], [ ])]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR m4_if([$1], [ssl], [ AT_SKIP_IF([test "$HAVE_OPENSSL" = no]) AT_CHECK([cp $abs_top_builddir/tests/testpki*.pem .])]) AT_CHECK([test-vconn testname $1], [0], [], [ignore]) AT_CLEANUP])]) TEST_VCONN_CLASS([unix]) TEST_VCONN_CLASS([tcp]) TEST_VCONN_CLASS([ssl]) openvswitch-2.0.1+git20140120/tests/vlan-splinters.at000066400000000000000000000027701226605124000221310ustar00rootroot00000000000000AT_BANNER([VLAN splinters]) AT_SETUP([VLAN splinters]) OVS_VSWITCHD_START([], [], [=override]) ADD_OF_PORTS([br0], 1, 2, 3, 4) AT_CHECK([ovs-vsctl \ -- set Bridge br0 fail-mode=standalone flood_vlans=0,9,11,15 \ -- set port br0 tag=0 \ -- set port p1 trunks=0,9,11,15 \ -- set interface p1 other-config:enable-vlan-splinters=true \ -- set port p2 tag=9 \ -- set port p3 tag=11 \ -- set port p4 tag=15]) ovs-appctl dpif/show | sed -n ' s/\./_/g s/^[[ ]]*\([[^ ]][[^ ]]*\) [[0-9]]*\/\([[0-9]]*\).*/\1=\2/p ' > port-numbers cat port-numbers . ./port-numbers for args in '9 p2' '11 p3' '15 p4'; do set $args vlan=$1 eval access_port=\$$2 eval splinter_port=\$p1_$vlan # Check that when a packet is received on $splinter_port, it is # treated as if it had been received on p1 in the correct VLAN. AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port($splinter_port)"], [0], [stdout]) AT_CHECK_UNQUOTED([sed -n '/^Flow/p; /^Datapath/p' stdout], [0], [dnl Flow: metadata=0,in_port=$p1,dl_vlan=$vlan,dl_vlan_pcp=0,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x05ff Datapath actions: $access_port ]) # Check that when an OpenFlow action sends a packet to p1 on # splintered VLAN $vlan, it is actually output to $splinter_port. AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "in_port($access_port)"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: $splinter_port ]) done OVS_VSWITCHD_STOP AT_CLEANUP openvswitch-2.0.1+git20140120/tests/vlog.at000066400000000000000000000154611226605124000201200ustar00rootroot00000000000000AT_BANNER([vlog]) AT_SETUP([vlog - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) AT_CAPTURE_FILE([log_file]) AT_CAPTURE_FILE([stderr_log]) AT_CHECK([$PYTHON $srcdir/test-vlog.py --log-file log_file \ -v dbg module_1:info module_2:warn syslog:off 2>stderr_log]) AT_CHECK([diff log_file stderr_log]) AT_CHECK([sed -e 's/.*-.*-.*T..:..:..\....Z|//' \ -e 's/File ".*", line [[0-9]][[0-9]]*,/File , line ,/' \ stderr_log], [0], [dnl 0|module_0|EMER|emergency 1|module_0|ERR|error 2|module_0|WARN|warning 3|module_0|INFO|information 4|module_0|DBG|debug 5|module_0|EMER|emergency exception Traceback (most recent call last): File , line , in main assert fail AssertionError 6|module_0|ERR|error exception Traceback (most recent call last): File , line , in main assert fail AssertionError 7|module_0|WARN|warn exception Traceback (most recent call last): File , line , in main assert fail AssertionError 8|module_0|INFO|information exception Traceback (most recent call last): File , line , in main assert fail AssertionError 9|module_0|DBG|debug exception Traceback (most recent call last): File , line , in main assert fail AssertionError 10|module_0|ERR|exception Traceback (most recent call last): File , line , in main assert fail AssertionError 11|module_1|EMER|emergency 12|module_1|ERR|error 13|module_1|WARN|warning 14|module_1|INFO|information 16|module_1|EMER|emergency exception Traceback (most recent call last): File , line , in main assert fail AssertionError 17|module_1|ERR|error exception Traceback (most recent call last): File , line , in main assert fail AssertionError 18|module_1|WARN|warn exception Traceback (most recent call last): File , line , in main assert fail AssertionError 19|module_1|INFO|information exception Traceback (most recent call last): File , line , in main assert fail AssertionError 21|module_1|ERR|exception Traceback (most recent call last): File , line , in main assert fail AssertionError 22|module_2|EMER|emergency 23|module_2|ERR|error 24|module_2|WARN|warning 27|module_2|EMER|emergency exception Traceback (most recent call last): File , line , in main assert fail AssertionError 28|module_2|ERR|error exception Traceback (most recent call last): File , line , in main assert fail AssertionError 29|module_2|WARN|warn exception Traceback (most recent call last): File , line , in main assert fail AssertionError 32|module_2|ERR|exception Traceback (most recent call last): File , line , in main assert fail AssertionError ]) AT_CLEANUP AT_SETUP([vlog - vlog/reopen - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR OVS_DBDIR=`pwd`; export OVS_DBDIR OVS_SYSCONFDIR=`pwd`; export OVS_SYSCONFDIR ON_EXIT([kill `cat test-unixctl.py.pid`]) AT_CAPTURE_FILE([log]) AT_CAPTURE_FILE([log.old]) AT_CHECK([$PYTHON $srcdir/test-unixctl.py --log-file=`pwd`/log --pidfile --detach]) AT_CHECK([APPCTL -t test-unixctl.py log message]) mv log log.old AT_CHECK([APPCTL -t test-unixctl.py log message2]) AT_CHECK([APPCTL -t test-unixctl.py vlog/reopen]) AT_CHECK([APPCTL -t test-unixctl.py log message3]) AT_CHECK([APPCTL -t test-unixctl.py exit]) AT_CHECK([sed 's/.*|//' log.old], [0], [dnl Entering run loop. message message2 ]) AT_CHECK([sed 's/.*|//' log], [0], [dnl message3 ]) AT_CLEANUP AT_SETUP([vlog - vlog/reopen without log file - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR OVS_DBDIR=`pwd`; export OVS_DBDIR OVS_SYSCONFDIR=`pwd`; export OVS_SYSCONFDIR ON_EXIT([kill `cat test-unixctl.py.pid`]) AT_CHECK([$PYTHON $srcdir/test-unixctl.py --pidfile --detach]) AT_CHECK([APPCTL -t test-unixctl.py vlog/reopen], [0], [Logging to file not configured ]) AT_CLEANUP dnl This checks that if vlog/reopen can't reopen the log file, dnl nothing particularly bad (e.g. Python throws an exception and dnl aborts the program) happens. AT_SETUP([vlog - vlog/reopen can't reopen log file - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) # Verify that /dev/full is a character device that fails writes. AT_SKIP_IF([test ! -c /dev/full]) AT_SKIP_IF([echo > /dev/full]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR OVS_DBDIR=`pwd`; export OVS_DBDIR OVS_SYSCONFDIR=`pwd`; export OVS_SYSCONFDIR ON_EXIT([kill `cat test-unixctl.py.pid`]) AT_CHECK([$PYTHON $srcdir/test-unixctl.py --log-file=`pwd`/log --pidfile --detach]) AT_CHECK([APPCTL -t test-unixctl.py log message]) mv log log.old ln -s /dev/full log AT_CHECK([APPCTL -t test-unixctl.py vlog/reopen]) AT_CHECK([APPCTL -t test-unixctl.py log message2]) rm log AT_CHECK([APPCTL -t test-unixctl.py vlog/reopen]) AT_CHECK([APPCTL -t test-unixctl.py log message3]) AT_CHECK([APPCTL -t test-unixctl.py exit]) AT_CHECK([sed 's/.*|//' log.old], [0], [dnl Entering run loop. message ]) AT_CHECK([sed 's/.*|//' log], [0], [dnl message3 ]) AT_CLEANUP AT_SETUP([vlog - vlog/set and vlog/list - Python]) AT_SKIP_IF([test $HAVE_PYTHON = no]) OVS_RUNDIR=`pwd`; export OVS_RUNDIR OVS_LOGDIR=`pwd`; export OVS_LOGDIR OVS_DBDIR=`pwd`; export OVS_DBDIR OVS_SYSCONFDIR=`pwd`; export OVS_SYSCONFDIR ON_EXIT([kill `cat test-unixctl.py.pid`]) AT_CAPTURE_FILE([log]) AT_CHECK([$PYTHON $srcdir/test-unixctl.py --log-file=`pwd`/log --pidfile --detach]) AT_CHECK([APPCTL -t test-unixctl.py vlog/list], [0], [dnl console syslog file ------- ------ ------ daemon info info info fatal-signal info info info jsonrpc info info info poller info info info reconnect info info info socket_util info info info stream info info info test-unixctl info info info unixctl_server info info info ]) AT_CHECK([APPCTL -t test-unixctl.py vlog/set daemon:syslog:err]) AT_CHECK([APPCTL -t test-unixctl.py vlog/set file:dbg]) AT_CHECK([APPCTL -t test-unixctl.py vlog/set nonexistent], [0], [no facility, level, or module "nonexistent" ]) AT_CHECK([APPCTL -t test-unixctl.py vlog/list], [0], [dnl console syslog file ------- ------ ------ daemon info err dbg fatal-signal info info dbg jsonrpc info info dbg poller info info dbg reconnect info info dbg socket_util info info dbg stream info info dbg test-unixctl info info dbg unixctl_server info info dbg ]) AT_CLEANUP openvswitch-2.0.1+git20140120/third-party/000077500000000000000000000000001226605124000177215ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/third-party/.gitignore000066400000000000000000000000271226605124000217100ustar00rootroot00000000000000/Makefile /Makefile.in openvswitch-2.0.1+git20140120/third-party/README000066400000000000000000000023171226605124000206040ustar00rootroot00000000000000This directory contains third-party software that may be useful for debugging. tcpdump ------- The "ofp-tcpdump.patch" patch adds the ability to parse OpenFlow messages to tcpdump. These instructions assume that tcpdump 4.3.0 is going to be used, but it should work with other versions that are not substantially different. To begin, download tcpdump and apply the patch: wget http://www.tcpdump.org/release/tcpdump-4.3.0.tar.gz tar xzf tcpdump-4.3.0.tar.gz ln -s tcpdump-4.3.0 tcpdump patch -p0 < ofp-tcpdump.patch Then build the new version of tcpdump: cd tcpdump ./configure make Clearly, tcpdump can only parse unencrypted packets, so you will need to connect the controller and datapath using plain TCP. To look at the traffic, tcpdump will be started in a manner similar to the following: sudo ./tcpdump -s0 -i eth0 port 6633 The "-s0" flag indicates that tcpdump should capture the entire packet. If the OpenFlow message is not received in its entirety, "[|openflow]" will be printed instead of the OpenFlow message contents. The verbosity of the output may be increased by adding additional "-v" flags. If "-vvv" is used, the raw OpenFlow data is also printed in hex and ASCII. openvswitch-2.0.1+git20140120/third-party/automake.mk000066400000000000000000000001051226605124000220540ustar00rootroot00000000000000EXTRA_DIST += \ third-party/README \ third-party/ofp-tcpdump.patch openvswitch-2.0.1+git20140120/third-party/ofp-tcpdump.patch000066400000000000000000000100011226605124000231700ustar00rootroot00000000000000--- tcpdump/interface.h 2007-06-13 18:03:20.000000000 -0700 +++ tcpdump/interface.h 2008-04-15 18:28:55.000000000 -0700 @@ -130,7 +130,8 @@ extern const char *dnaddr_string(u_short); -extern void error(const char *, ...) +#define error(fmt, args...) tcpdump_error(fmt, ## args) +extern void tcpdump_error(const char *, ...) __attribute__((noreturn, format (printf, 1, 2))); extern void warning(const char *, ...) __attribute__ ((format (printf, 1, 2))); @@ -163,6 +164,7 @@ extern void hex_print_with_offset(const char *, const u_char *, u_int, u_int); extern void hex_print(const char *, const u_char *, u_int); extern void telnet_print(const u_char *, u_int); +extern void openflow_print(const u_char *, u_int); extern int llc_print(const u_char *, u_int, u_int, const u_char *, const u_char *, u_short *); extern int snap_print(const u_char *, u_int, u_int, u_int); --- tcpdump/Makefile.in 2012-06-13 04:56:20.000000000 +1200 +++ tcpdump/Makefile.in 2012-08-29 21:36:37.000000000 +1200 @@ -43,7 +43,7 @@ CC = @CC@ PROG = tcpdump CCOPT = @V_CCOPT@ -INCLS = -I. @V_INCLS@ +INCLS = -I. @V_INCLS@ -I../../include DEFS = @DEFS@ @CPPFLAGS@ @V_DEFS@ # Standard CFLAGS @@ -51,10 +51,10 @@ FULL_CFLAGS = $(CCOPT) $(DEFS) $(INCLS) $(CFLAGS) # Standard LDFLAGS -LDFLAGS = @LDFLAGS@ +LDFLAGS = @LDFLAGS@ -L../../lib # Standard LIBS -LIBS = @LIBS@ +LIBS = @LIBS@ -lopenvswitch -lssl -lrt -lm INSTALL = @INSTALL@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ @@ -93,7 +93,8 @@ print-symantec.c print-syslog.c print-tcp.c print-telnet.c print-tftp.c \ print-timed.c print-tipc.c print-token.c print-udld.c print-udp.c \ print-usb.c print-vjc.c print-vqp.c print-vrrp.c print-vtp.c \ - print-wb.c print-zephyr.c signature.c setsignal.c tcpdump.c util.c + print-wb.c print-zephyr.c signature.c setsignal.c tcpdump.c util.c \ + print-openflow.c LIBNETDISSECT_SRC=print-isakmp.c LIBNETDISSECT_OBJ=$(LIBNETDISSECT_SRC:.c=.o) --- tcpdump/print-openflow.c 1969-12-31 16:00:00.000000000 -0800 +++ tcpdump/print-openflow.c 2009-05-11 15:38:41.000000000 -0700 @@ -0,0 +1,45 @@ +/* Copyright (C) 2007, 2008, 2009 Nicira, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. The names of the authors may not be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "interface.h" +#include "openflow/openflow.h" +#include "../../lib/ofp-print.h" + +void +openflow_print(const u_char *sp, u_int length) +{ + const struct ofp_header *ofp = (struct ofp_header *)sp; + + if (!TTEST2(*sp, ntohs(ofp->length))) + goto trunc; + + ofp_print(stdout, sp, length, vflag); + return; + +trunc: + printf("[|openflow]"); +} --- tcpdump/print-tcp.c 2006-09-19 12:07:57.000000000 -0700 +++ tcpdump/print-tcp.c 2009-05-11 15:38:25.000000000 -0700 @@ -56,6 +56,8 @@ #include "nameser.h" +#include "openflow/openflow.h" + #ifdef HAVE_LIBCRYPTO #include #include @@ -669,7 +672,9 @@ } else if (length > 0 && (sport == LDP_PORT || dport == LDP_PORT)) { ldp_print(bp, length); - } + } else if (sport == OFP_TCP_PORT || dport == OFP_TCP_PORT) { + openflow_print(bp, length); + } return; bad: openvswitch-2.0.1+git20140120/tutorial/000077500000000000000000000000001226605124000173155ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/tutorial/.gitignore000066400000000000000000000000111226605124000212750ustar00rootroot00000000000000sandbox/ openvswitch-2.0.1+git20140120/tutorial/Tutorial000066400000000000000000000774201226605124000210550ustar00rootroot00000000000000Open vSwitch Advanced Features Tutorial ======================================= Many tutorials cover the basics of OpenFlow. This is not such a tutorial. Rather, a knowledge of the basics of OpenFlow is a prerequisite. If you do not already understand how an OpenFlow flow table works, please go read a basic tutorial and then continue reading here afterward. It is also important to understand the basics of Open vSwitch before you begin. If you have never used ovs-vsctl or ovs-ofctl before, you should learn a little about them before proceeding. Most of the features covered in this tutorial are Open vSwitch extensions to OpenFlow. Also, most of the features in this tutorial are specific to the software Open vSwitch implementation. If you are using an Open vSwitch port to an ASIC-based hardware switch, this tutorial will not help you. This tutorial does not cover every aspect of the features that it mentions. You can find the details elsewhere in the Open vSwitch documentation, especially ovs-ofctl(8) and the comments in the include/openflow/nicira-ext.h header file. >>> In this tutorial, paragraphs set off like this designate notes with additional information that readers may wish to skip on a first read. Getting Started =============== This is a hands-on tutorial. To get the most out of it, you will need Open vSwitch binaries. You do not, on the other hand, need any physical networking hardware or even supervisor privilege on your system. Instead, we will use a script called "ovs-sandbox", which accompanies the tutorial, that constructs a software simulated network environment based on Open vSwitch. You can use "ovs-sandbox" three ways: * If you have already installed Open vSwitch on your system, then you should be able to just run "ovs-sandbox" from this directory without any options. * If you have not installed Open vSwitch (and you do not want to install it), then you can build Open vSwitch according to the instructions in INSTALL, without installing it. Then run "./ovs-sandbox -b DIRECTORY" from this directory, substituting the Open vSwitch build directory for DIRECTORY. * As a slight variant on the latter, you can run "make sandbox" from an Open vSwitch build directory. When you run ovs-sandbox, it does the following: 1. CAUTION: Deletes any subdirectory of the current directory named "sandbox" and any files in that directory. 2. Creates a new directory "sandbox" in the current directory. 3. Sets up special environment variables that ensure that Open vSwitch programs will look inside the "sandbox" directory instead of in the Open vSwitch installation directory. 4. If you are using a built but not installed Open vSwitch, installs the Open vSwitch manpages in a subdirectory of "sandbox" and adjusts the MANPATH environment variable to point to this directory. This means that you can use, for example, "man ovs-vsctl" to see a manpage for the ovs-vsctl program that you built. 5. Creates an empty Open vSwitch configuration database under "sandbox". 6. Starts ovsdb-server running under "sandbox". 7. Starts ovs-vswitchd running under "sandbox", passing special options that enable a special "dummy" mode for testing. 8. Starts a nested interactive shell inside "sandbox". At this point, you can run all the usual Open vSwitch utilities from the nested shell environment. You can, for example, use ovs-vsctl to create a bridge: ovs-vsctl add-br br0 From Open vSwitch's perspective, the bridge that you create this way is as real as any other. You can, for example, connect it to an OpenFlow controller or use "ovs-ofctl" to examine and modify it and its OpenFlow flow table. On the other hand, the bridge is not visible to the operating system's network stack, so "ifconfig" or "ip" cannot see it or affect it, which means that utilities like "ping" and "tcpdump" will not work either. (That has its good side, too: you can't screw up your computer's network stack by manipulating a sandboxed OVS.) When you're done using OVS from the sandbox, exit the nested shell (by entering the "exit" shell command or pressing Control+D). This will kill the daemons that ovs-sandbox started, but it leaves the "sandbox" directory and its contents in place. The sandbox directory contains log files for the Open vSwitch dameons. You can examine them while you're running in the sandboxed environment or after you exit. Motivation ========== The goal of this tutorial is to demonstrate the power of Open vSwitch flow tables. The tutorial works through the implementation of a MAC-learning switch with VLAN trunk and access ports. Outside of the Open vSwitch features that we will discuss, OpenFlow provides at least two ways to implement such a switch: 1. An OpenFlow controller to implement MAC learning in a "reactive" fashion. Whenever a new MAC appears on the switch, or a MAC moves from one switch port to another, the controller adjusts the OpenFlow flow table to match. 2. The "normal" action. OpenFlow defines this action to submit a packet to "the traditional non-OpenFlow pipeline of the switch". That is, if a flow uses this action, then the packets in the flow go through the switch in the same way that they would if OpenFlow was not configured on the switch. Each of these approaches has unfortunate pitfalls. In the first approach, using an OpenFlow controller to implement MAC learning, has a significant cost in terms of network bandwidth and latency. It also makes the controller more difficult to scale to large numbers of switches, which is especially important in environments with thousands of hypervisors (each of which contains a virtual OpenFlow switch). MAC learning at an OpenFlow controller also behaves poorly if the OpenFlow controller fails, slows down, or becomes unavailable due to network problems. The second approach, using the "normal" action, has different problems. First, little about the "normal" action is standardized, so it behaves differently on switches from different vendors, and the available features and how those features are configured (usually not through OpenFlow) varies widely. Second, "normal" does not work well with other OpenFlow actions. It is "all-or-nothing", with little potential to adjust its behavior slightly or to compose it with other features. Scenario ======== We will construct Open vSwitch flow tables for a VLAN-capable, MAC-learning switch that has four ports: * p1, a trunk port that carries all VLANs, on OpenFlow port 1. * p2, an access port for VLAN 20, on OpenFlow port 2. * p3 and p4, both access ports for VLAN 30, on OpenFlow ports 3 and 4, respectively. >>> The ports' names are not significant. You could call them eth1 through eth4, or any other names you like. >>> An OpenFlow switch always has a "local" port as well. This scenario won't use the local port. Our switch design will consist of five main flow tables, each of which implements one stage in the switch pipeline: Table 0: Admission control. Table 1: VLAN input processing. Table 2: Learn source MAC and VLAN for ingress port. Table 3: Look up learned port for destination MAC and VLAN. Table 4: Output processing. The section below describes how to set up the scenario, followed by a section for each OpenFlow table. You can cut and paste the "ovs-vsctl" and "ovs-ofctl" commands in each of the sections below into your "ovs-sandbox" shell. They are also available as shell scripts in this directory, named t-setup, t-stage0, t-stage1, ..., t-stage4. The "ovs-appctl" test commands are intended for cutting and pasting and are not supplied separately. Setup ===== To get started, start "ovs-sandbox". Inside the interactive shell that it starts, run this command: ovs-vsctl add-br br0 -- set Bridge br0 fail-mode=secure This command creates a new bridge "br0" and puts "br0" into so-called "fail-secure" mode. For our purpose, this just means that the OpenFlow flow table starts out empty. >>> If we did not do this, then the flow table would start out with a single flow that executes the "normal" action. We could use that feature to yield a switch that behaves the same as the switch we are currently building, but with the caveats described under "Motivation" above.) The new bridge has only one port on it so far, the "local port" br0. We need to add p1, p2, p3, and p4. A shell "for" loop is one way to do it: for i in 1 2 3 4; do ovs-vsctl add-port br0 p$i -- set Interface p$i ofport_request=$i ovs-ofctl mod-port br0 p$i up done In addition to adding a port, the ovs-vsctl command above sets its "ofport_request" column to ensure that port p1 is assigned OpenFlow port 1, p2 is assigned OpenFlow port 2, and so on. >>> We could omit setting the ofport_request and let Open vSwitch choose port numbers for us, but it's convenient for the purposes of this tutorial because we can talk about OpenFlow port 1 and know that it corresponds to p1. The ovs-ofctl command above brings up the simulated interfaces, which are down initially, using an OpenFlow request. The effect is similar to "ifconfig up", but the sandbox's interfaces are not visible to the operating system and therefore "ifconfig" would not affect them. We have not configured anything related to VLANs or MAC learning. That's because we're going to implement those features in the flow table. To see what we've done so far to set up the scenario, you can run a command like "ovs-vsctl show" or "ovs-ofctl show br0". Implementing Table 0: Admission control ======================================= Table 0 is where packets enter the switch. We use this stage to discard packets that for one reason or another are invalid. For example, packets with a multicast source address are not valid, so we can add a flow to drop them at ingress to the switch with: ovs-ofctl add-flow br0 \ "table=0, dl_src=01:00:00:00:00:00/01:00:00:00:00:00, actions=drop" A switch should also not forward IEEE 802.1D Spanning Tree Protocol (STP) packets, so we can also add a flow to drop those and other packets with reserved multicast protocols: ovs-ofctl add-flow br0 \ "table=0, dl_dst=01:80:c2:00:00:00/ff:ff:ff:ff:ff:f0, actions=drop" We could add flows to drop other protocols, but these demonstrate the pattern. We need one more flow, with a priority lower than the default, so that flows that don't match either of the "drop" flows we added above go on to pipeline stage 1 in OpenFlow table 1: ovs-ofctl add-flow br0 "table=0, priority=0, actions=resubmit(,1)" (The "resubmit" action is an Open vSwitch extension to OpenFlow.) Testing Table 0 --------------- If we were using Open vSwitch to set up a physical or a virtual switch, then we would naturally test it by sending packets through it one way or another, perhaps with common network testing tools like "ping" and "tcpdump" or more specialized tools like Scapy. That's difficult with our simulated switch, since it's not visible to the operating system. But our simulated switch has a few specialized testing tools. The most powerful of these tools is "ofproto/trace". Given a switch and the specification of a flow, "ofproto/trace" shows, step-by-step, how such a flow would be treated as it goes through the switch. == EXAMPLE 1 == Try this command: ovs-appctl ofproto/trace br0 in_port=1,dl_dst=01:80:c2:00:00:05 The output should look something like this: Flow: metadata=0,in_port=1,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=01:80:c2:00:00:05,dl_type=0x0000 Rule: table=0 cookie=0 dl_dst=01:80:c2:00:00:00/ff:ff:ff:ff:ff:f0 OpenFlow actions=drop Final flow: unchanged Datapath actions: drop The first block of lines describes an OpenFlow table lookup. The first line shows the fields used for the table lookup (which is mostly zeros because that's the default if we don't specify everything). The second line gives the OpenFlow flow that the fields matched (called a "rule" because that is the name used inside Open vSwitch for an OpenFlow flow). In this case, we see that this packet that has a reserved multicast destination address matches the rule that drops those packets. The third line gives the rule's OpenFlow actions. The second block of lines summarizes the results, which are not very interesting here. == EXAMPLE 2 == Try another command: ovs-appctl ofproto/trace br0 in_port=1,dl_dst=01:80:c2:00:00:10 The output should be: Flow: metadata=0,in_port=1,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=01:80:c2:00:00:10,dl_type=0x0000 Rule: table=0 cookie=0 priority=0 OpenFlow actions=resubmit(,1) Resubmitted flow: unchanged Resubmitted regs: reg0=0x0 reg1=0x0 reg2=0x0 reg3=0x0 reg4=0x0 reg5=0x0 reg6=0x0 reg7=0x0 Resubmitted odp: drop No match Final flow: unchanged Datapath actions: drop This time the flow we handed to "ofproto/trace" doesn't match any of our "drop" rules, so it falls through to the low-priority "resubmit" rule, which we see in the rule and the actions selected in the first block. The "resubmit" causes a second lookup in OpenFlow table 1, described by the additional block of indented text in the output. We haven't yet added any flows to OpenFlow table 1, so no flow actually matches in the second lookup. Therefore, the packet is still actually dropped, which means that the externally observable results would be identical to our first example. Implementing Table 1: VLAN Input Processing =========================================== A packet that enters table 1 has already passed basic validation in table 0. The purpose of table 1 is validate the packet's VLAN, based on the VLAN configuration of the switch port through which the packet entered the switch. We will also use it to attach a VLAN header to packets that arrive on an access port, which allows later processing stages to rely on the packet's VLAN always being part of the VLAN header, reducing special cases. Let's start by adding a low-priority flow that drops all packets, before we add flows that pass through acceptable packets. You can think of this as a "default drop" rule: ovs-ofctl add-flow br0 "table=1, priority=0, actions=drop" Our trunk port p1, on OpenFlow port 1, is an easy case. p1 accepts any packet regardless of whether it has a VLAN header or what the VLAN was, so we can add a flow that resubmits everything on input port 1 to the next table: ovs-ofctl add-flow br0 \ "table=1, priority=99, in_port=1, actions=resubmit(,2)" On the access ports, we want to accept any packet that has no VLAN header, tag it with the access port's VLAN number, and then pass it along to the next stage: ovs-ofctl add-flows br0 - <<'EOF' table=1, priority=99, in_port=2, vlan_tci=0, actions=mod_vlan_vid:20, resubmit(,2) table=1, priority=99, in_port=3, vlan_tci=0, actions=mod_vlan_vid:30, resubmit(,2) table=1, priority=99, in_port=4, vlan_tci=0, actions=mod_vlan_vid:30, resubmit(,2) EOF We don't write any rules that match packets with 802.1Q that enter this stage on any of the access ports, so the "default drop" rule we added earlier causes them to be dropped, which is ordinarily what we want for access ports. >>> Another variation of access ports allows ingress of packets tagged with VLAN 0 (aka 802.1p priority tagged packets). To allow such packets, replace "vlan_tci=0" by "vlan_tci=0/0xfff" above. Testing Table 1 --------------- "ofproto/trace" allows us to test the ingress VLAN rules that we added above. == EXAMPLE 1: Packet on Trunk Port == Here's a test of a packet coming in on the trunk port: ovs-appctl ofproto/trace br0 in_port=1,vlan_tci=5 The output shows the lookup in table 0, the resubmit to table 1, and the resubmit to table 2 (which does nothing because we haven't put anything there yet): Flow: metadata=0,in_port=1,vlan_tci=0x0005,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x0000 Rule: table=0 cookie=0 priority=0 OpenFlow actions=resubmit(,1) Resubmitted flow: unchanged Resubmitted regs: reg0=0x0 reg1=0x0 reg2=0x0 reg3=0x0 reg4=0x0 reg5=0x0 reg6=0x0 reg7=0x0 Resubmitted odp: drop Rule: table=1 cookie=0 priority=99,in_port=1 OpenFlow actions=resubmit(,2) Resubmitted flow: unchanged Resubmitted regs: reg0=0x0 reg1=0x0 reg2=0x0 reg3=0x0 reg4=0x0 reg5=0x0 reg6=0x0 reg7=0x0 Resubmitted odp: drop No match Final flow: unchanged Datapath actions: drop == EXAMPLE 2: Valid Packet on Access Port == Here's a test of a valid packet (a packet without an 802.1Q header) coming in on access port p2: ovs-appctl ofproto/trace br0 in_port=2 The output is similar to that for the previous case, except that it additionally tags the packet with p2's VLAN 20 before it passes it along to table 2: Flow: metadata=0,in_port=2,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x0000 Rule: table=0 cookie=0 priority=0 OpenFlow actions=resubmit(,1) Resubmitted flow: unchanged Resubmitted regs: reg0=0x0 reg1=0x0 reg2=0x0 reg3=0x0 reg4=0x0 reg5=0x0 reg6=0x0 reg7=0x0 Resubmitted odp: drop Rule: table=1 cookie=0 priority=99,in_port=2,vlan_tci=0x0000 OpenFlow actions=mod_vlan_vid:20,resubmit(,2) Resubmitted flow: metadata=0,in_port=2,dl_vlan=20,dl_vlan_pcp=0,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x0000 Resubmitted regs: reg0=0x0 reg1=0x0 reg2=0x0 reg3=0x0 reg4=0x0 reg5=0x0 reg6=0x0 reg7=0x0 Resubmitted odp: drop No match Final flow: unchanged Datapath actions: drop == EXAMPLE 3: Invalid Packet on Access Port == This tests an invalid packet (one that includes an 802.1Q header) coming in on access port p2: ovs-appctl ofproto/trace br0 in_port=2,vlan_tci=5 The output shows the packet matching the default drop rule: Flow: metadata=0,in_port=2,vlan_tci=0x0005,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x0000 Rule: table=0 cookie=0 priority=0 OpenFlow actions=resubmit(,1) Resubmitted flow: unchanged Resubmitted regs: reg0=0x0 reg1=0x0 reg2=0x0 reg3=0x0 reg4=0x0 reg5=0x0 reg6=0x0 reg7=0x0 Resubmitted odp: drop Rule: table=1 cookie=0 priority=0 OpenFlow actions=drop Final flow: unchanged Datapath actions: drop Implementing Table 2: MAC+VLAN Learning for Ingress Port ======================================================== This table allows the switch we're implementing to learn that the packet's source MAC is located on the packet's ingress port in the packet's VLAN. >>> This table is a good example why table 1 added a VLAN tag to packets that entered the switch through an access port. We want to associate a MAC+VLAN with a port regardless of whether the VLAN in question was originally part of the packet or whether it was an assumed VLAN associated with an access port. It only takes a single flow to do this. The following command adds it: ovs-ofctl add-flow br0 \ "table=2 actions=learn(table=10, NXM_OF_VLAN_TCI[0..11], \ NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[], \ load:NXM_OF_IN_PORT[]->NXM_NX_REG0[0..15]), \ resubmit(,3)" The "learn" action (an Open vSwitch extension to OpenFlow) modifies a flow table based on the content of the flow currently being processed. Here's how you can interpret each part of the "learn" action above: table=10 Modify flow table 10. This will be the MAC learning table. NXM_OF_VLAN_TCI[0..11] Make the flow that we add to flow table 10 match the same VLAN ID that the packet we're currently processing contains. This effectively scopes the MAC learning entry to a single VLAN, which is the ordinary behavior for a VLAN-aware switch. NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[] Make the flow that we add to flow table 10 match, as Ethernet destination, the Ethernet source address of the packet we're currently processing. load:NXM_OF_IN_PORT[]->NXM_NX_REG0[0..15] Whereas the preceding parts specify fields for the new flow to match, this specifies an action for the flow to take when it matches. The action is for the flow to load the ingress port number of the current packet into register 0 (a special field that is an Open vSwitch extension to OpenFlow). >>> A real use of "learn" for MAC learning would probably involve two additional elements. First, the "learn" action would specify a hard_timeout for the new flow, to enable a learned MAC to eventually expire if no new packets were seen from a given source within a reasonable interval. Second, one would usually want to limit resource consumption by using the Flow_Table table in the Open vSwitch configuration database to specify a maximum number of flows in table 10. This definitely calls for examples. Testing Table 2 --------------- == EXAMPLE 1 == Try the following test command: ovs-appctl ofproto/trace br0 in_port=1,vlan_tci=20,dl_src=50:00:00:00:00:01 -generate The output shows that "learn" was executed, but it isn't otherwise informative, so we won't include it here. The "-generate" keyword is new. Ordinarily, "ofproto/trace" has no side effects: "output" actions do not actually output packets, "learn" actions do not actually modify the flow table, and so on. With "-generate", though, "ofproto/trace" does execute "learn" actions. That's important now, because we want to see the effect of the "learn" action on table 10. You can see that by running: ovs-ofctl dump-flows br0 table=10 which (omitting the "duration" and "idle_age" fields, which will vary based on how soon you ran this command after the previous one, as well as some other uninteresting fields) prints something like: NXST_FLOW reply (xid=0x4): table=10, vlan_tci=0x0014/0x0fff,dl_dst=50:00:00:00:00:01 actions=load:0x1->NXM_NX_REG0[0..15] You can see that the packet coming in on VLAN 20 with source MAC 50:00:00:00:00:01 became a flow that matches VLAN 20 (written in hexadecimal) and destination MAC 50:00:00:00:00:01. The flow loads port number 1, the input port for the flow we tested, into register 0. == EXAMPLE 2 == Here's a second test command: ovs-appctl ofproto/trace br0 in_port=2,dl_src=50:00:00:00:00:01 -generate The flow that this command tests has the same source MAC and VLAN as example 1, although the VLAN comes from an access port VLAN rather than an 802.1Q header. If we again dump the flows for table 10 with: ovs-ofctl dump-flows br0 table=10 then we see that the flow we saw previously has changed to indicate that the learned port is port 2, as we would expect: NXST_FLOW reply (xid=0x4): table=10, vlan_tci=0x0014/0x0fff,dl_dst=50:00:00:00:00:01 actions=load:0x2->NXM_NX_REG0[0..15] Implementing Table 3: Look Up Destination Port ============================================== This table figures out what port we should send the packet to based on the destination MAC and VLAN. That is, if we've learned the location of the destination (from table 2 processing some previous packet with that destination as its source), then we want to send the packet there. We need only one flow to do the lookup: ovs-ofctl add-flow br0 \ "table=3 priority=50 actions=resubmit(,10), resubmit(,4)" The flow's first action resubmits to table 10, the table that the "learn" action modifies. As you saw previously, the learned flows in this table write the learned port into register 0. If the destination for our packet hasn't been learned, then there will be no matching flow, and so the "resubmit" turns into a no-op. Because registers are initialized to 0, we can use a register 0 value of 0 in our next pipeline stage as a signal to flood the packet. The second action resubmits to table 4, continuing to the next pipeline stage. We can add another flow to skip the learning table lookup for multicast and broadcast packets, since those should always be flooded: ovs-ofctl add-flow br0 \ "table=3 priority=99 dl_dst=01:00:00:00:00:00/01:00:00:00:00:00 \ actions=resubmit(,4)" >>> We don't strictly need to add this flow, because multicast addresses will never show up in our learning table. (In turn, that's because we put a flow into table 0 to drop packets that have a multicast source address.) Testing Table 3 --------------- == EXAMPLE == Here's a command that should cause OVS to learn that f0:00:00:00:00:01 is on p1 in VLAN 20: ovs-appctl ofproto/trace br0 in_port=1,dl_vlan=20,dl_src=f0:00:00:00:00:01,dl_dst=90:00:00:00:00:01 -generate Here's an excerpt from the output that shows (from the "no match" looking up the resubmit to table 10) that the flow's destination was unknown: Resubmitted flow: unchanged Resubmitted regs: reg0=0x0 reg1=0x0 reg2=0x0 reg3=0x0 reg4=0x0 reg5=0x0 reg6=0x0 reg7=0x0 Resubmitted odp: drop Rule: table=3 cookie=0 priority=50 OpenFlow actions=resubmit(,10),resubmit(,4) Resubmitted flow: unchanged Resubmitted regs: reg0=0x0 reg1=0x0 reg2=0x0 reg3=0x0 reg4=0x0 reg5=0x0 reg6=0x0 reg7=0x0 Resubmitted odp: drop No match You can verify that the packet's source was learned two ways. The most direct way is to dump the learning table with: ovs-ofctl dump-flows br0 table=10 which ought to show roughly the following, with extraneous details removed: table=10, vlan_tci=0x0014/0x0fff,dl_dst=f0:00:00:00:00:01 actions=load:0x1->NXM_NX_REG0[0..15] >>> If you tried the examples for the previous step, or if you did some of your own experiments, then you might see additional flows there. These additional flows are harmless. If they bother you, then you can remove them with "ovs-ofctl del-flows br0 table=10". The other way is to inject a packet to take advantage of the learning entry. For example, we can inject a packet on p2 whose destination is the MAC address that we just learned on p1: ovs-appctl ofproto/trace br0 in_port=2,dl_src=90:00:00:00:00:01,dl_dst=f0:00:00:00:00:01 -generate Here's an interesting excerpt from that command's output. This group of lines traces the "resubmit(,10)", showing that the packet matched the learned flow for the first MAC we used, loading the OpenFlow port number for the learned port p1 into register 0: Resubmitted flow: unchanged Resubmitted regs: reg0=0x0 reg1=0x0 reg2=0x0 reg3=0x0 reg4=0x0 reg5=0x0 reg6=0x0 reg7=0x0 Resubmitted odp: drop Rule: table=10 cookie=0 vlan_tci=0x0014/0x0fff,dl_dst=f0:00:00:00:00:01 OpenFlow actions=load:0x1->NXM_NX_REG0[0..15] If you read the commands above carefully, then you might have noticed that they simply have the Ethernet source and destination addresses exchanged. That means that if we now rerun the first ovs-appctl command above, e.g.: ovs-appctl ofproto/trace br0 in_port=1,dl_vlan=20,dl_src=f0:00:00:00:00:01,dl_dst=90:00:00:00:00:01 -generate then we see in the output that the destination has now been learned: Resubmitted flow: unchanged Resubmitted regs: reg0=0x0 reg1=0x0 reg2=0x0 reg3=0x0 reg4=0x0 reg5=0x0 reg6=0x0 reg7=0x0 Resubmitted odp: drop Rule: table=10 cookie=0 vlan_tci=0x0014/0x0fff,dl_dst=90:00:00:00:00:01 OpenFlow actions=load:0x2->NXM_NX_REG0[0..15] Implementing Table 4: Output Processing ======================================= At entry to stage 4, we know that register 0 contains either the desired output port or is zero if the packet should be flooded. We also know that the packet's VLAN is in its 802.1Q header, even if the VLAN was implicit because the packet came in on an access port. The job of the final pipeline stage is to actually output packets. The job is trivial for output to our trunk port p1: ovs-ofctl add-flow br0 "table=4 reg0=1 actions=1" For output to the access ports, we just have to strip the VLAN header before outputting the packet: ovs-ofctl add-flows br0 - <<'EOF' table=4 reg0=2 actions=strip_vlan,2 table=4 reg0=3 actions=strip_vlan,3 table=4 reg0=4 actions=strip_vlan,4 EOF The only slightly tricky part is flooding multicast and broadcast packets and unicast packets with unlearned destinations. For those, we need to make sure that we only output the packets to the ports that carry our packet's VLAN, and that we include the 802.1Q header in the copy output to the trunk port but not in copies output to access ports: ovs-ofctl add-flows br0 - <<'EOF' table=4 reg0=0 priority=99 dl_vlan=20 actions=1,strip_vlan,2 table=4 reg0=0 priority=99 dl_vlan=30 actions=1,strip_vlan,3,4 table=4 reg0=0 priority=50 actions=1 EOF >>> Our rules rely on the standard OpenFlow behavior that an output action will not forward a packet back out the port it came in on. That is, if a packet comes in on p1, and we've learned that the packet's destination MAC is also on p1, so that we end up with "actions=1" as our actions, the switch will not forward the packet back out its input port. The multicast/broadcast/unknown destination cases above also rely on this behavior. Testing Table 4 --------------- == EXAMPLE 1: Broadcast, Multicast, and Unknown Destination == Try tracing a broadcast packet arriving on p1 in VLAN 30: ovs-appctl ofproto/trace br0 in_port=1,dl_dst=ff:ff:ff:ff:ff:ff,dl_vlan=30 The interesting part of the output is the final line, which shows that the switch would remove the 802.1Q header and then output the packet to p3 and p4, which are access ports for VLAN 30: Datapath actions: pop_vlan,3,4 Similarly, if we trace a broadcast packet arriving on p3: ovs-appctl ofproto/trace br0 in_port=3,dl_dst=ff:ff:ff:ff:ff:ff then we see that it is output to p1 with an 802.1Q tag and then to p4 without one: Datapath actions: push_vlan(vid=30,pcp=0),1,pop_vlan,4 >>> Open vSwitch could simplify the datapath actions here to just "4,push_vlan(vid=30,pcp=0),1" but it is not smart enough to do so. The following are also broadcasts, but the result is to drop the packets because the VLAN only belongs to the input port: ovs-appctl ofproto/trace br0 in_port=1,dl_dst=ff:ff:ff:ff:ff:ff ovs-appctl ofproto/trace br0 in_port=1,dl_dst=ff:ff:ff:ff:ff:ff,dl_vlan=55 Try some other broadcast cases on your own: ovs-appctl ofproto/trace br0 in_port=1,dl_dst=ff:ff:ff:ff:ff:ff,dl_vlan=20 ovs-appctl ofproto/trace br0 in_port=2,dl_dst=ff:ff:ff:ff:ff:ff ovs-appctl ofproto/trace br0 in_port=4,dl_dst=ff:ff:ff:ff:ff:ff You can see the same behavior with multicast packets and with unicast packets whose destination has not been learned, e.g.: ovs-appctl ofproto/trace br0 in_port=4,dl_dst=01:00:00:00:00:00 ovs-appctl ofproto/trace br0 in_port=1,dl_dst=90:12:34:56:78:90,dl_vlan=20 ovs-appctl ofproto/trace br0 in_port=1,dl_dst=90:12:34:56:78:90,dl_vlan=30 == EXAMPLE 2: MAC Learning == Let's follow the same pattern as we did for table 3. First learn a MAC on port p1 in VLAN 30: ovs-appctl ofproto/trace br0 in_port=1,dl_vlan=30,dl_src=10:00:00:00:00:01,dl_dst=20:00:00:00:00:01 -generate You can see from the last line of output that the packet's destination is unknown, so it gets flooded to both p3 and p4, the other ports in VLAN 30: Datapath actions: pop_vlan,3,4 Then reverse the MACs and learn the first flow's destination on port p4: ovs-appctl ofproto/trace br0 in_port=4,dl_src=20:00:00:00:00:01,dl_dst=10:00:00:00:00:01 -generate The last line of output shows that the this packet's destination is known to be p1, as learned from our previous command: Datapath actions: push_vlan(vid=30,pcp=0),1 Now, if we rerun our first command: ovs-appctl ofproto/trace br0 in_port=1,dl_vlan=30,dl_src=10:00:00:00:00:01,dl_dst=20:00:00:00:00:01 -generate we can see that the result is no longer a flood but to the specified learned destination port p4: Datapath actions: pop_vlan,4 Contact ======= bugs@openvswitch.org http://openvswitch.org/ openvswitch-2.0.1+git20140120/tutorial/automake.mk000066400000000000000000000004011226605124000214470ustar00rootroot00000000000000EXTRA_DIST += \ tutorial/Tutorial \ tutorial/ovs-sandbox \ tutorial/t-setup \ tutorial/t-stage0 \ tutorial/t-stage1 \ tutorial/t-stage2 \ tutorial/t-stage3 \ tutorial/t-stage4 sandbox: all cd $(srcdir)/tutorial && ./ovs-sandbox -b $(abs_builddir) openvswitch-2.0.1+git20140120/tutorial/ovs-sandbox000077500000000000000000000142421226605124000215110ustar00rootroot00000000000000#! /bin/sh # # Copyright (c) 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. set -e run () { echo "$@" (cd "$sandbox" && "$@") || exit 1 } builddir= srcdir= schema= installed=false built=false for option; do # This option-parsing mechanism borrowed from a Autoconf-generated # configure script under the following license: # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, # 2002, 2003, 2004, 2005, 2006, 2009, 2013 Free Software Foundation, Inc. # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. # If the previous option needs an argument, assign it. if test -n "$prev"; then eval $prev=\$option prev= continue fi case $option in *=*) optarg=`expr "X$option" : '[^=]*=\(.*\)'` ;; *) optarg=yes ;; esac case $dashdash$option in --) dashdash=yes ;; -h|--help) cat <&2 exit 1 ;; *) echo "$option: non-option arguments not supported (use --help for help)" >&2 exit 1 ;; esac shift done if $installed && $built; then echo "sorry, conflicting options (use --help for help)" >&2 exit 1 elif $installed || $built; then : elif test -e vswitchd/ovs-vswitchd; then built=: builddir=. elif (ovs-vswitchd --version) >/dev/null 2>&1; then installed=: else echo "can't find an OVS build or install (use --help for help)" >&2 exit 1 fi if $built; then if test ! -e "$builddir"/vswitchd/ovs-vswitchd; then echo "$builddir does not appear to be an OVS build directory" >&2 exit 1 fi builddir=`cd $builddir && pwd` # Find srcdir. case $srcdir in '') srcdir=$builddir if test ! -e "$srcdir"/WHY-OVS; then srcdir=`cd $builddir/.. && pwd` fi ;; /*) ;; *) srcdir=`pwd`/$srcdir ;; esac schema=$srcdir/vswitchd/vswitch.ovsschema if test ! -e "$schema"; then echo >&2 'source directory not found, please use --srcdir' exit 1 fi # Put built tools early in $PATH. if test ! -e $builddir/vswitchd/ovs-vswitchd; then echo >&2 'build not found, please change set $builddir or change directory' exit 1 fi PATH=$builddir/ovsdb:$builddir/vswitchd:$builddir/utilities:$PATH export PATH else case $schema in '') for schema in \ /usr/local/share/openvswitch/vswitch.ovsschema \ /usr/share/openvswitch/vswitch.ovsschema \ none; do if test -r $schema; then break fi done ;; /*) ;; *) schema=`pwd`/$schema ;; esac if test ! -r "$schema"; then echo "can't find vswitch.ovsschema, please specify --schema" >&2 exit 1 fi fi # Create sandbox. rm -rf sandbox mkdir sandbox sandbox=`cd sandbox && pwd` # Set up environment for OVS programs to sandbox themselves. OVS_RUNDIR=$sandbox; export OVS_RUNDIR OVS_LOGDIR=$sandbox; export OVS_LOGDIR OVS_DBDIR=$sandbox; export OVS_DBDIR OVS_SYSCONFDIR=$sandbox; export OVS_SYSCONFDIR if $built; then # Easy access to OVS manpages. (cd "$builddir" && make install-man mandir="$sandbox"/man) MANPATH=$sandbox/man:; export MANPATH fi # Ensure cleanup. trap 'kill `cat "$sandbox"/*.pid`' 0 1 2 3 13 14 15 # Create database and start ovsdb-server. touch "$sandbox"/.conf.db.~lock~ run ovsdb-tool create conf.db "$schema" run ovsdb-server --detach --no-chdir --pidfile -vconsole:off --log-file \ --remote=punix:"$sandbox"/db.sock # Start ovs-vswitchd. run ovs-vswitchd --detach --no-chdir --pidfile -vconsole:off --log-file \ --enable-dummy=override -vvconn -vnetdev_dummy cat <NXM_NX_REG0[0..15]), \ resubmit(,3)" openvswitch-2.0.1+git20140120/tutorial/t-stage3000077500000000000000000000003431226605124000206720ustar00rootroot00000000000000#! /bin/sh -ve ovs-ofctl add-flow br0 \ "table=3 priority=50 actions=resubmit(,10), resubmit(,4)" ovs-ofctl add-flow br0 \ "table=3 priority=99 dl_dst=01:00:00:00:00:00/01:00:00:00:00:00 \ actions=resubmit(,4)" openvswitch-2.0.1+git20140120/tutorial/t-stage4000077500000000000000000000007001226605124000206700ustar00rootroot00000000000000#! /bin/sh -ve ovs-ofctl add-flow br0 "table=4 reg0=1 actions=1" ovs-ofctl add-flows br0 - <<'EOF' table=4 reg0=2 actions=strip_vlan,2 table=4 reg0=3 actions=strip_vlan,3 table=4 reg0=4 actions=strip_vlan,4 EOF ovs-ofctl add-flows br0 - <<'EOF' table=4 reg0=0 priority=99 dl_vlan=20 actions=1,strip_vlan,2 table=4 reg0=0 priority=99 dl_vlan=30 actions=1,strip_vlan,3,4 table=4 reg0=0 priority=50 actions=1 EOF openvswitch-2.0.1+git20140120/utilities/000077500000000000000000000000001226605124000174655ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/utilities/.gitignore000066400000000000000000000007641226605124000214640ustar00rootroot00000000000000/Makefile /Makefile.in /nlmon /ovs-appctl /ovs-appctl.8 /ovs-benchmark /ovs-benchmark.1 /ovs-cfg-mod /ovs-cfg-mod.8 /ovs-check-dead-ifs /ovs-controller /ovs-controller.8 /ovs-ctl /ovs-dpctl /ovs-dpctl.8 /ovs-l3ping /ovs-l3ping.8 /ovs-lib /ovs-ofctl /ovs-ofctl.8 /ovs-parse-backtrace /ovs-pcap /ovs-pcap.1 /ovs-pki /ovs-pki-cgi /ovs-pki.8 /ovs-test /ovs-test.8 /ovs-tcpundump /ovs-tcpundump.1 /ovs-vlan-bug-workaround /ovs-vlan-bug-workaround.8 /ovs-vlan-test /ovs-vlan-test.8 /ovs-vsctl /ovs-vsctl.8 openvswitch-2.0.1+git20140120/utilities/automake.mk000066400000000000000000000072651226605124000216360ustar00rootroot00000000000000bin_PROGRAMS += \ utilities/ovs-appctl \ utilities/ovs-controller \ utilities/ovs-dpctl \ utilities/ovs-ofctl \ utilities/ovs-vsctl bin_SCRIPTS += utilities/ovs-pki if HAVE_PYTHON bin_SCRIPTS += \ utilities/ovs-dpctl-top \ utilities/ovs-l3ping \ utilities/ovs-parse-backtrace \ utilities/ovs-pcap \ utilities/ovs-tcpundump \ utilities/ovs-test \ utilities/ovs-vlan-test endif scripts_SCRIPTS += \ utilities/ovs-check-dead-ifs \ utilities/ovs-ctl \ utilities/ovs-save scripts_DATA += utilities/ovs-lib EXTRA_DIST += \ utilities/ovs-check-dead-ifs.in \ utilities/ovs-ctl.in \ utilities/ovs-dev.py \ utilities/ovs-dpctl-top.in \ utilities/ovs-l3ping.in \ utilities/ovs-lib.in \ utilities/ovs-parse-backtrace.in \ utilities/ovs-pcap.in \ utilities/ovs-pki.in \ utilities/ovs-save \ utilities/ovs-tcpundump.in \ utilities/ovs-test.in \ utilities/ovs-vlan-test.in MAN_ROOTS += \ utilities/ovs-appctl.8.in \ utilities/ovs-benchmark.1.in \ utilities/ovs-controller.8.in \ utilities/ovs-ctl.8 \ utilities/ovs-dpctl.8.in \ utilities/ovs-dpctl-top.8.in \ utilities/ovs-l3ping.8.in \ utilities/ovs-ofctl.8.in \ utilities/ovs-parse-backtrace.8 \ utilities/ovs-pcap.1.in \ utilities/ovs-pki.8.in \ utilities/ovs-tcpundump.1.in \ utilities/ovs-vlan-bug-workaround.8.in \ utilities/ovs-test.8.in \ utilities/ovs-vlan-test.8.in \ utilities/ovs-vsctl.8.in MAN_FRAGMENTS += utilities/ovs-vlan-bugs.man DISTCLEANFILES += \ utilities/ovs-appctl.8 \ utilities/ovs-ctl \ utilities/ovs-benchmark.1 \ utilities/ovs-check-dead-ifs \ utilities/ovs-controller.8 \ utilities/ovs-dpctl.8 \ utilities/ovs-dpctl-top \ utilities/ovs-dpctl-top.8 \ utilities/ovs-l3ping \ utilities/ovs-l3ping.8 \ utilities/ovs-lib \ utilities/ovs-ofctl.8 \ utilities/ovs-parse-backtrace \ utilities/ovs-pcap \ utilities/ovs-pcap.1 \ utilities/ovs-pki \ utilities/ovs-pki.8 \ utilities/ovs-tcpundump \ utilities/ovs-tcpundump.1 \ utilities/ovs-test \ utilities/ovs-test.8 \ utilities/ovs-vlan-test \ utilities/ovs-vlan-test.8 \ utilities/ovs-vlan-bug-workaround.8 \ utilities/ovs-vsctl.8 man_MANS += \ utilities/ovs-appctl.8 \ utilities/ovs-benchmark.1 \ utilities/ovs-controller.8 \ utilities/ovs-dpctl.8 \ utilities/ovs-dpctl-top.8 \ utilities/ovs-l3ping.8 \ utilities/ovs-ofctl.8 \ utilities/ovs-parse-backtrace.8 \ utilities/ovs-pcap.1 \ utilities/ovs-pki.8 \ utilities/ovs-tcpundump.1 \ utilities/ovs-vlan-bug-workaround.8 \ utilities/ovs-test.8 \ utilities/ovs-vlan-test.8 \ utilities/ovs-vsctl.8 dist_man_MANS += utilities/ovs-ctl.8 utilities_ovs_appctl_SOURCES = utilities/ovs-appctl.c utilities_ovs_appctl_LDADD = lib/libopenvswitch.a $(SSL_LIBS) utilities_ovs_controller_SOURCES = utilities/ovs-controller.c utilities_ovs_controller_LDADD = lib/libopenvswitch.a $(SSL_LIBS) utilities_ovs_dpctl_SOURCES = utilities/ovs-dpctl.c utilities_ovs_dpctl_LDADD = lib/libopenvswitch.a $(SSL_LIBS) utilities_ovs_ofctl_SOURCES = utilities/ovs-ofctl.c utilities_ovs_ofctl_LDADD = \ ofproto/libofproto.a \ lib/libopenvswitch.a \ $(SSL_LIBS) utilities_ovs_vsctl_SOURCES = utilities/ovs-vsctl.c utilities_ovs_vsctl_LDADD = lib/libopenvswitch.a $(SSL_LIBS) if LINUX_DATAPATH sbin_PROGRAMS += utilities/ovs-vlan-bug-workaround utilities_ovs_vlan_bug_workaround_SOURCES = utilities/ovs-vlan-bug-workaround.c utilities_ovs_vlan_bug_workaround_LDADD = lib/libopenvswitch.a $(SSL_LIBS) noinst_PROGRAMS += utilities/nlmon utilities_nlmon_SOURCES = utilities/nlmon.c utilities_nlmon_LDADD = lib/libopenvswitch.a $(SSL_LIBS) endif bin_PROGRAMS += utilities/ovs-benchmark utilities_ovs_benchmark_SOURCES = utilities/ovs-benchmark.c utilities_ovs_benchmark_LDADD = lib/libopenvswitch.a $(SSL_LIBS) include utilities/bugtool/automake.mk openvswitch-2.0.1+git20140120/utilities/bugtool/000077500000000000000000000000001226605124000211405ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/utilities/bugtool/.gitignore000066400000000000000000000000341226605124000231250ustar00rootroot00000000000000/ovs-bugtool /ovs-bugtool.8 openvswitch-2.0.1+git20140120/utilities/bugtool/automake.mk000066400000000000000000000042711226605124000233030ustar00rootroot00000000000000if HAVE_PYTHON sbin_SCRIPTS += utilities/bugtool/ovs-bugtool CLEANFILES += utilities/bugtool/ovs-bugtool man_MANS += utilities/bugtool/ovs-bugtool.8 MAN_ROOTS += utilities/bugtool/ovs-bugtool.8.in DISTCLEANFILES += utilities/bugtool/ovs-bugtool.8 bugtool_plugins = \ utilities/bugtool/plugins/kernel-info/openvswitch.xml \ utilities/bugtool/plugins/network-status/openvswitch.xml \ utilities/bugtool/plugins/system-configuration.xml \ utilities/bugtool/plugins/system-logs/openvswitch.xml \ utilities/bugtool/plugins/system-configuration/openvswitch.xml bugtool_scripts = \ utilities/bugtool/ovs-bugtool-bfd-show \ utilities/bugtool/ovs-bugtool-cfm-show \ utilities/bugtool/ovs-bugtool-coverage-show \ utilities/bugtool/ovs-bugtool-lacp-show \ utilities/bugtool/ovs-bugtool-list-dbs \ utilities/bugtool/ovs-bugtool-memory-show \ utilities/bugtool/ovs-bugtool-tc-class-show \ utilities/bugtool/ovs-bugtool-vsctl-show \ utilities/bugtool/ovs-bugtool-ovsdb-dump \ utilities/bugtool/ovs-bugtool-daemons-ver \ utilities/bugtool/ovs-bugtool-ovs-ofctl-show \ utilities/bugtool/ovs-bugtool-ovs-ofctl-dump-flows \ utilities/bugtool/ovs-bugtool-ovs-appctl-dpif \ utilities/bugtool/ovs-bugtool-bond-show scripts_SCRIPTS += $(bugtool_scripts) bugtoolpluginsdir = $(pkgdatadir)/bugtool-plugins INSTALL_DATA_LOCAL += bugtool-install-data-local bugtool-install-data-local: for plugin in $(bugtool_plugins); do \ stem=`echo "$$plugin" | sed 's,utilities/bugtool/plugins/,,'`; \ dir=`expr "$$stem" : '\(.*\)/[^/]*$$'`; \ $(MKDIR_P) "$(DESTDIR)$(bugtoolpluginsdir)/$$dir"; \ $(INSTALL_DATA) "$(srcdir)/$$plugin" "$(DESTDIR)$(bugtoolpluginsdir)/$$stem"; \ done UNINSTALL_LOCAL += bugtool-uninstall-local bugtool-uninstall-local: for plugin in $(bugtool_plugins); do \ stem=`echo "$$plugin" | sed 's,utilities/bugtool/plugins/,,'`; \ rm -f "$(DESTDIR)$(bugtoolpluginsdir)/$$stem"; \ done for plugin in $(bugtool_plugins); do \ stem=`echo "$$plugin" | sed 's,utilities/bugtool/plugins/,,'`; \ dir=`expr "$$stem" : '\(.*\)/[^/]*$$'`; \ rmdir "$(DESTDIR)$(bugtoolpluginsdir)/$$dir"; \ done; exit 0 endif EXTRA_DIST += \ $(bugtool_plugins) \ $(bugtool_scripts) \ utilities/bugtool/ovs-bugtool.in openvswitch-2.0.1+git20140120/utilities/bugtool/ovs-bugtool-bfd-show000077500000000000000000000013331226605124000250550ustar00rootroot00000000000000#! /bin/sh # This library is free software; you can redistribute it and/or # modify it under the terms of version 2.1 of the GNU Lesser General # Public License as published by the Free Software Foundation. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA # # Copyright (C) 2013 Nicira, Inc. ovs-appctl bfd/show openvswitch-2.0.1+git20140120/utilities/bugtool/ovs-bugtool-bond-show000077500000000000000000000013341226605124000252450ustar00rootroot00000000000000#! /bin/sh # This library is free software; you can redistribute it and/or # modify it under the terms of version 2.1 of the GNU Lesser General # Public License as published by the Free Software Foundation. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA # # Copyright (C) 2012 Nicira, Inc. ovs-appctl bond/show openvswitch-2.0.1+git20140120/utilities/bugtool/ovs-bugtool-cfm-show000077500000000000000000000013331226605124000250670ustar00rootroot00000000000000#! /bin/sh # This library is free software; you can redistribute it and/or # modify it under the terms of version 2.1 of the GNU Lesser General # Public License as published by the Free Software Foundation. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA # # Copyright (C) 2011 Nicira, Inc. ovs-appctl cfm/show openvswitch-2.0.1+git20140120/utilities/bugtool/ovs-bugtool-coverage-show000077500000000000000000000013401226605124000261130ustar00rootroot00000000000000#! /bin/sh # This library is free software; you can redistribute it and/or # modify it under the terms of version 2.1 of the GNU Lesser General # Public License as published by the Free Software Foundation. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA # # Copyright (C) 2012 Nicira, Inc. ovs-appctl coverage/show openvswitch-2.0.1+git20140120/utilities/bugtool/ovs-bugtool-daemons-ver000077500000000000000000000015501226605124000255650ustar00rootroot00000000000000#! /bin/sh # This library is free software; you can redistribute it and/or # modify it under the terms of version 2.1 of the GNU Lesser General # Public License as published by the Free Software Foundation. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA # # Copyright (C) 2012 Nicira, Inc. for f in `cd /var/run/openvswitch/; ls *.pid 2>/dev/null` do if [ -n "${f%.pid}" ]; then ovs-appctl -t "${f%.pid}" version 2>&1 fi echo done openvswitch-2.0.1+git20140120/utilities/bugtool/ovs-bugtool-lacp-show000077500000000000000000000013341226605124000252420ustar00rootroot00000000000000#! /bin/sh # This library is free software; you can redistribute it and/or # modify it under the terms of version 2.1 of the GNU Lesser General # Public License as published by the Free Software Foundation. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA # # Copyright (C) 2011 Nicira, Inc. ovs-appctl lacp/show openvswitch-2.0.1+git20140120/utilities/bugtool/ovs-bugtool-list-dbs000077500000000000000000000013701226605124000250660ustar00rootroot00000000000000#! /bin/sh # This library is free software; you can redistribute it and/or # modify it under the terms of version 2.1 of the GNU Lesser General # Public License as published by the Free Software Foundation. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA # # Copyright (C) 2013 Nicira, Inc. ovs-appctl -t ovsdb-server ovsdb-server/list-dbs openvswitch-2.0.1+git20140120/utilities/bugtool/ovs-bugtool-memory-show000077500000000000000000000013361226605124000256350ustar00rootroot00000000000000#! /bin/sh # This library is free software; you can redistribute it and/or # modify it under the terms of version 2.1 of the GNU Lesser General # Public License as published by the Free Software Foundation. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA # # Copyright (C) 2012 Nicira, Inc. ovs-appctl memory/show openvswitch-2.0.1+git20140120/utilities/bugtool/ovs-bugtool-ovs-appctl-dpif000077500000000000000000000016571226605124000263650ustar00rootroot00000000000000#! /bin/sh # This library is free software; you can redistribute it and/or # modify it under the terms of version 2.1 of the GNU Lesser General # Public License as published by the Free Software Foundation. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA # # Copyright (C) 2013 Nicira, Inc. for bridge in `ovs-vsctl -- --real list-br` do echo "ovs-appctl dpif/show ${bridge}" ovs-appctl dpif/show "${bridge}" echo "ovs-appctl dpif/dump-flows ${bridge}" ovs-appctl dpif/dump-flows "$bridge" echo "" done openvswitch-2.0.1+git20140120/utilities/bugtool/ovs-bugtool-ovs-ofctl-dump-flows000077500000000000000000000015121226605124000273520ustar00rootroot00000000000000#! /bin/sh # This library is free software; you can redistribute it and/or # modify it under the terms of version 2.1 of the GNU Lesser General # Public License as published by the Free Software Foundation. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA # # Copyright (C) 2013 Nicira, Inc. for bridge in `ovs-vsctl list-br` do echo "ovs-ofctl dump-flows ${bridge}" ovs-ofctl dump-flows "$bridge" echo "" done openvswitch-2.0.1+git20140120/utilities/bugtool/ovs-bugtool-ovs-ofctl-show000077500000000000000000000014761226605124000262460ustar00rootroot00000000000000#! /bin/sh # This library is free software; you can redistribute it and/or # modify it under the terms of version 2.1 of the GNU Lesser General # Public License as published by the Free Software Foundation. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA # # Copyright (C) 2013 Nicira, Inc. for bridge in `ovs-vsctl list-br` do echo "ovs-ofctl show ${bridge}" ovs-ofctl show "$bridge" echo "" done openvswitch-2.0.1+git20140120/utilities/bugtool/ovs-bugtool-ovsdb-dump000077500000000000000000000014171226605124000254270ustar00rootroot00000000000000#! /bin/sh # This library is free software; you can redistribute it and/or # modify it under the terms of version 2.1 of the GNU Lesser General # Public License as published by the Free Software Foundation. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA # # Copyright (C) 2011 Nicira, Inc. ovsdb-client -f csv dump unix:/var/run/openvswitch/db.sock Open_vSwitch openvswitch-2.0.1+git20140120/utilities/bugtool/ovs-bugtool-tc-class-show000077500000000000000000000017001226605124000260310ustar00rootroot00000000000000#! /bin/sh # This library is free software; you can redistribute it and/or # modify it under the terms of version 2.1 of the GNU Lesser General # Public License as published by the Free Software Foundation. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA # # Copyright (C) 2011 Nicira, Inc. for iface in $(cd /sys/class/net && echo *); do if [ -d /sys/class/net/$iface ]; then echo Interface $iface: # indent tc output so it's clear which interface it pertains to /sbin/tc -s -d class show dev $iface | /bin/sed 's/^/ /' fi done openvswitch-2.0.1+git20140120/utilities/bugtool/ovs-bugtool-vsctl-show000077500000000000000000000013341226605124000254560ustar00rootroot00000000000000#! /bin/sh # This library is free software; you can redistribute it and/or # modify it under the terms of version 2.1 of the GNU Lesser General # Public License as published by the Free Software Foundation. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 # USA # # Copyright (C) 2012, 2013 Nicira, Inc. ovs-vsctl show openvswitch-2.0.1+git20140120/utilities/bugtool/ovs-bugtool.8.in000066400000000000000000000030151226605124000241150ustar00rootroot00000000000000.\" -*- nroff -*- .de IQ . br . ns . IP "\\$1" .. .TH ovs\-bugtool 8 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" .\" This program's name: .ds PN ovs\-bugtool . .SH NAME ovs\-bugtool \- Open vSwitch bug reporting utility . .SH SYNOPSIS .B ovs\-bugtool . .SH DESCRIPTION Generate a debug bundle with useful information about Open vSwitch on this system and places it in \fB/var/log/ovs-bugtool\fR. . .SH OPTIONS . .IP "\fB\-\-all\fR" Use all available capabilities. . .IP "\fB\-\-capabilities\fR" List \fBovs\-bugtool\fR capabilities. . .IP "\fB\-\-debug\fR" Print verbose debugging output. . .IP "\fB\-\-entries=\fIlist\fR" Use the capabilities specified in a comma-separated list. . .IP "\fB\-\-log\-days=\fIdays\fR" Include the logs rotated in the previous \fIdays\fR days in the debug bundle. The number of log files included has a big impact on the eventual bundle size. The default value is 20 days. . .IP "\fB\-\-output=\fIfiletype\fR" Generate a debug bundle with the specified file type. Options include \fBtar\fR, \fBtar.gz\fR, \fBtar.bz2\fR, and \fBzip\fR. . .IP "\fB\-\-ovs\fR" Use only Open vSwitch relevant capabilities. . .IP "\fB\-\-silent\fR" Suppress output. . .IP "\fB\-\-unlimited\fR" Do not exclude files which are too large. . .IP "\fB\-\-yestoall\fR" Answer yes to all prompts. . .SH BUGS \fBovs\-bugtool\fR makes many assumptions about file locations and the availability of system utilities. It has been tested on Debian and Red Hat and derived distributions. On other distributions it is likely to be less useful. openvswitch-2.0.1+git20140120/utilities/bugtool/ovs-bugtool.in000077500000000000000000001224341226605124000237610ustar00rootroot00000000000000#! @PYTHON@ # This library is free software; you can redistribute it and/or # modify it under the terms of version 2.1 of the GNU Lesser General Public # License as published by the Free Software Foundation. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # Copyright (c) 2005, 2007 XenSource Ltd. # Copyright (c) 2010, 2011, 2012 Nicira, Inc. # # To add new entries to the bugtool, you need to: # # Create a new capability. These declare the new entry to the GUI, including # the expected size, time to collect, privacy implications, and whether the # capability should be selected by default. One capability may refer to # multiple files, assuming that they can be reasonably grouped together, and # have the same privacy implications. You need: # # A new CAP_ constant. # A cap() invocation to declare the capability. # # You then need to add calls to main() to collect the files. These will # typically be calls to the helpers file_output(), tree_output(), cmd_output(), # or func_output(). # import warnings warnings.filterwarnings(action="ignore", category=DeprecationWarning) import getopt import re import os import StringIO import sys import tarfile import time import commands import pprint from xml.dom.minidom import parse, getDOMImplementation import zipfile from subprocess import Popen, PIPE from select import select from signal import SIGTERM, SIGUSR1 import md5 import platform import fcntl import glob import urllib import socket import base64 OS_RELEASE = platform.release() # # Files & directories # APT_SOURCES_LIST = "/etc/apt/sources.list" APT_SOURCES_LIST_D = "/etc/apt/sources.list.d" BUG_DIR = "/var/log/ovs-bugtool" PLUGIN_DIR = "@pkgdatadir@/bugtool-plugins" GRUB_CONFIG = '/boot/grub/menu.lst' BOOT_KERNEL = '/boot/vmlinuz-' + OS_RELEASE BOOT_INITRD = '/boot/initrd-' + OS_RELEASE + '.img' PROC_PARTITIONS = '/proc/partitions' FSTAB = '/etc/fstab' PROC_MOUNTS = '/proc/mounts' ISCSI_CONF = '/etc/iscsi/iscsid.conf' ISCSI_INITIATOR = '/etc/iscsi/initiatorname.iscsi' PROC_CPUINFO = '/proc/cpuinfo' PROC_MEMINFO = '/proc/meminfo' PROC_IOPORTS = '/proc/ioports' PROC_INTERRUPTS = '/proc/interrupts' PROC_SCSI = '/proc/scsi/scsi' PROC_VERSION = '/proc/version' PROC_MODULES = '/proc/modules' PROC_DEVICES = '/proc/devices' PROC_FILESYSTEMS = '/proc/filesystems' PROC_CMDLINE = '/proc/cmdline' PROC_CONFIG = '/proc/config.gz' PROC_USB_DEV = '/proc/bus/usb/devices' PROC_NET_BONDING_DIR = '/proc/net/bonding' IFCFG_RE = re.compile(r'^.*/ifcfg-.*') ROUTE_RE = re.compile(r'^.*/route-.*') SYSCONFIG_HWCONF = '/etc/sysconfig/hwconf' SYSCONFIG_NETWORK = '/etc/sysconfig/network' SYSCONFIG_NETWORK_SCRIPTS = '/etc/sysconfig/network-scripts' PROC_NET_VLAN_DIR = '/proc/net/vlan' PROC_NET_SOFTNET_STAT = '/proc/net/softnet_stat' MODPROBE_CONF = '/etc/modprobe.conf' MODPROBE_DIR = '/etc/modprobe.d' RESOLV_CONF = '/etc/resolv.conf' MPP_CONF = '/etc/mpp.conf' MULTIPATH_CONF = '/etc/multipath.conf' NSSWITCH_CONF = '/etc/nsswitch.conf' NTP_CONF = '/etc/ntp.conf' IPTABLES_CONFIG = '/etc/sysconfig/iptables-config' HOSTS = '/etc/hosts' HOSTS_ALLOW = '/etc/hosts.allow' HOSTS_DENY = '/etc/hosts.deny' DHCP_LEASE_DIR = ['/var/lib/dhclient', '/var/lib/dhcp3'] OPENVSWITCH_LOG_DIR = '@LOGDIR@/' OPENVSWITCH_DEFAULT_SWITCH = '/etc/default/openvswitch-switch' # Debian OPENVSWITCH_SYSCONFIG_SWITCH = '/etc/sysconfig/openvswitch' # RHEL OPENVSWITCH_DEFAULT_CONTROLLER = '/etc/default/openvswitch-controller' OPENVSWITCH_CONF_DB = '@DBDIR@/conf.db' OPENVSWITCH_COMPACT_DB = '@DBDIR@/bugtool-compact-conf.db' OPENVSWITCH_VSWITCHD_PID = '@RUNDIR@/ovs-vswitchd.pid' VAR_LOG_DIR = '/var/log/' VAR_LOG_CORE_DIR = '/var/log/core' YUM_LOG = '/var/log/yum.log' YUM_REPOS_DIR = '/etc/yum.repos.d' # # External programs # os.environ['PATH'] = '/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:@pkgdatadir@/scripts' ARP = 'arp' CAT = 'cat' CHKCONFIG = 'chkconfig' DF = 'df' DMESG = 'dmesg' DMIDECODE = 'dmidecode' DMSETUP = 'dmsetup' DPKG_QUERY = 'dpkg-query' ETHTOOL = 'ethtool' FDISK = 'fdisk' FIND = 'find' IFCONFIG = 'ifconfig' IPTABLES = 'iptables' ISCSIADM = 'iscsiadm' LOSETUP = 'losetup' LS = 'ls' LSPCI = 'lspci' MD5SUM = 'md5sum' MODINFO = 'modinfo' MPPUTIL = 'mppUtil' MULTIPATHD = 'multipathd' NETSTAT = 'netstat' OVS_DPCTL = 'ovs-dpctl' OVS_OFCTL = 'ovs-ofctl' OVS_VSCTL = 'ovs-vsctl' PS = 'ps' ROUTE = 'route' RPM = 'rpm' SG_MAP = 'sg_map' SYSCTL = 'sysctl' TC = 'tc' UPTIME = 'uptime' ZCAT = 'zcat' # # PII -- Personally identifiable information. Of particular concern are # things that would identify customers, or their network topology. # Passwords are never to be included in any bug report, regardless of any PII # declaration. # # NO -- No PII will be in these entries. # YES -- PII will likely or certainly be in these entries. # MAYBE -- The user may wish to audit these entries for PII. # IF_CUSTOMIZED -- If the files are unmodified, then they will contain no PII, # but since we encourage customers to edit these files, PII may have been # introduced by the customer. This is used in particular for the networking # scripts in dom0. # PII_NO = 'no' PII_YES = 'yes' PII_MAYBE = 'maybe' PII_IF_CUSTOMIZED = 'if_customized' KEY = 0 PII = 1 MIN_SIZE = 2 MAX_SIZE = 3 MIN_TIME = 4 MAX_TIME = 5 MIME = 6 CHECKED = 7 HIDDEN = 8 MIME_DATA = 'application/data' MIME_TEXT = 'text/plain' INVENTORY_XML_ROOT = "system-status-inventory" INVENTORY_XML_SUMMARY = 'system-summary' INVENTORY_XML_ELEMENT = 'inventory-entry' CAP_XML_ROOT = "system-status-capabilities" CAP_XML_ELEMENT = 'capability' CAP_BOOT_LOADER = 'boot-loader' CAP_DISK_INFO = 'disk-info' CAP_HARDWARE_INFO = 'hardware-info' CAP_KERNEL_INFO = 'kernel-info' CAP_LOSETUP_A = 'loopback-devices' CAP_MULTIPATH = 'multipath' CAP_NETWORK_CONFIG = 'network-config' CAP_NETWORK_INFO = 'network-info' CAP_NETWORK_STATUS = 'network-status' CAP_OPENVSWITCH_LOGS = 'ovs-system-logs' CAP_PROCESS_LIST = 'process-list' CAP_SYSTEM_LOGS = 'system-logs' CAP_SYSTEM_SERVICES = 'system-services' CAP_YUM = 'yum' KB = 1024 MB = 1024 * 1024 caps = {} cap_sizes = {} unlimited_data = False dbg = False # Default value for the number of rotated logs. log_days = 20 def cap(key, pii=PII_MAYBE, min_size=-1, max_size=-1, min_time=-1, max_time=-1, mime=MIME_TEXT, checked=True, hidden=False): caps[key] = (key, pii, min_size, max_size, min_time, max_time, mime, checked, hidden) cap_sizes[key] = 0 cap(CAP_BOOT_LOADER, PII_NO, max_size=3*KB, max_time=5) cap(CAP_DISK_INFO, PII_MAYBE, max_size=50*KB, max_time=20) cap(CAP_HARDWARE_INFO, PII_MAYBE, max_size=2*MB, max_time=20) cap(CAP_KERNEL_INFO, PII_MAYBE, max_size=120*KB, max_time=5) cap(CAP_LOSETUP_A, PII_MAYBE, max_size=KB, max_time=5) cap(CAP_MULTIPATH, PII_MAYBE, max_size=20*KB, max_time=10) cap(CAP_NETWORK_CONFIG, PII_IF_CUSTOMIZED, min_size=0, max_size=5*MB) cap(CAP_NETWORK_INFO, PII_YES, max_size=50*MB, max_time=30) cap(CAP_NETWORK_STATUS, PII_YES, max_size=-1, max_time=30) cap(CAP_OPENVSWITCH_LOGS, PII_MAYBE, max_size=-1, max_time=5) cap(CAP_PROCESS_LIST, PII_YES, max_size=30*KB, max_time=20) cap(CAP_SYSTEM_LOGS, PII_MAYBE, max_size=200*MB, max_time=5) cap(CAP_SYSTEM_SERVICES, PII_NO, max_size=5*KB, max_time=20) cap(CAP_YUM, PII_IF_CUSTOMIZED, max_size=10*KB, max_time=30) ANSWER_YES_TO_ALL = False SILENT_MODE = False entries = None data = {} dev_null = open('/dev/null', 'r+') def output(x): global SILENT_MODE if not SILENT_MODE: print x def output_ts(x): output("[%s] %s" % (time.strftime("%x %X %Z"), x)) def cmd_output(cap, args, label=None, filter=None, binary=False): if cap in entries: if not label: if isinstance(args, list): a = [aa for aa in args] a[0] = os.path.basename(a[0]) label = ' '.join(a) else: label = args data[label] = {'cap': cap, 'cmd_args': args, 'filter': filter, 'binary': binary} def file_output(cap, path_list, newest_first=False): """ If newest_first is True, the list of files in path_list is sorted by file modification time in descending order, else its sorted in ascending order. """ if cap in entries: path_entries = [] for path in path_list: try: s = os.stat(path) except OSError, e: continue path_entries.append((path, s)) mtime = lambda(path, stat): stat.st_mtime path_entries.sort(key=mtime, reverse=newest_first) for p in path_entries: if unlimited_data or caps[cap][MAX_SIZE] == -1 or \ cap_sizes[cap] < caps[cap][MAX_SIZE]: data[p] = {'cap': cap, 'filename': p[0]} cap_sizes[cap] += p[1].st_size else: output("Omitting %s, size constraint of %s exceeded" % (p[0], cap)) def tree_output(cap, path, pattern=None, negate=False, newest_first=False): """ Walks the directory tree rooted at path. Files in current dir are processed before files in sub-dirs. """ if cap in entries: if os.path.exists(path): for root, dirs, files in os.walk(path): fns = [fn for fn in [os.path.join(root, f) for f in files] if os.path.isfile(fn) and matches(fn, pattern, negate)] file_output(cap, fns, newest_first=newest_first) def func_output(cap, label, func): if cap in entries: t = str(func).split() data[label] = {'cap': cap, 'func': func} def log_output(cap, logs, newest_first=False): global log_days file_output(cap, logs) file_output(cap, ['%s.%d' % (f, n) for n in range(1, log_days+1) for f in logs], \ newest_first=newest_first) file_output(cap, ['%s.%d.gz' % (f, n) for n in range(1, log_days+1) for f in logs], \ newest_first=newest_first) def collect_data(): process_lists = {} for (k, v) in data.items(): cap = v['cap'] if v.has_key('cmd_args'): v['output'] = StringIOmtime() if not process_lists.has_key(cap): process_lists[cap] = [] process_lists[cap].append( ProcOutput(v['cmd_args'], caps[cap][MAX_TIME], v['output'], v['filter'], v['binary'])) elif v.has_key('filename') and v['filename'].startswith('/proc/'): # proc files must be read into memory try: f = open(v['filename'], 'r') s = f.read() f.close() if unlimited_data or caps[cap][MAX_SIZE] == -1 or \ cap_sizes[cap] < caps[cap][MAX_SIZE]: v['output'] = StringIOmtime(s) cap_sizes[cap] += len(s) else: output("Omitting %s, size constraint of %s exceeded" % (v['filename'], cap)) except: pass elif v.has_key('func'): try: s = v['func'](cap) except Exception, e: s = str(e) if unlimited_data or caps[cap][MAX_SIZE] == -1 or \ cap_sizes[cap] < caps[cap][MAX_SIZE]: v['output'] = StringIOmtime(s) cap_sizes[cap] += len(s) else: output("Omitting %s, size constraint of %s exceeded" % (k, cap)) run_procs(process_lists.values()) def main(argv=None): global ANSWER_YES_TO_ALL, SILENT_MODE global entries, data, dbg, unlimited_data, log_days # Filter flags only_ovs_info = False collect_all_info = True # we need access to privileged files, exit if we are not running as root if os.getuid() != 0: print >>sys.stderr, "Error: ovs-bugtool must be run as root" return 1 output_file = None output_type = 'tar.gz' output_fd = -1 if argv is None: argv = sys.argv try: (options, params) = getopt.gnu_getopt( argv, 'sy', ['capabilities', 'silent', 'yestoall', 'entries=', 'output=', 'outfd=', 'outfile=', 'all', 'unlimited', 'debug', 'ovs', 'log-days=']) except getopt.GetoptError, opterr: print >>sys.stderr, opterr return 2 try: load_plugins(True) except: pass entries = [e for e in caps.keys() if caps[e][CHECKED]] for (k, v) in options: if k == '--capabilities': update_capabilities() print_capabilities() return 0 if k == '--output': if v in ['tar', 'tar.bz2', 'tar.gz', 'zip']: output_type = v else: print >>sys.stderr, "Invalid output format '%s'" % v return 2 # "-s" or "--silent" means suppress output (except for the final # output filename at the end) if k in ['-s', '--silent']: SILENT_MODE = True if k == '--entries' and v != '': entries = v.split(',') # If the user runs the script with "-y" or "--yestoall" we don't ask # all the really annoying questions. if k in ['-y', '--yestoall']: ANSWER_YES_TO_ALL = True if k == '--outfd': output_fd = int(v) try: old = fcntl.fcntl(output_fd, fcntl.F_GETFD) fcntl.fcntl(output_fd, fcntl.F_SETFD, old | fcntl.FD_CLOEXEC) except: print >>sys.stderr, "Invalid output file descriptor", output_fd return 2 if k == '--outfile': output_file = v elif k == '--all': entries = caps.keys() elif k == '--unlimited': unlimited_data = True elif k == '--debug': dbg = True ProcOutput.debug = True if k == '--ovs': only_ovs_info = True collect_all_info = False if k == '--log-days': log_days = int(v) if len(params) != 1: print >>sys.stderr, "Invalid additional arguments", str(params) return 2 if output_fd != -1 and output_type != 'tar': print >>sys.stderr, "Option '--outfd' only valid with '--output=tar'" return 2 if output_fd != -1 and output_file is not None: print >>sys.stderr, "Cannot set both '--outfd' and '--outfile'" return 2 if ANSWER_YES_TO_ALL: output("Warning: '--yestoall' argument provided, will not prompt for individual files.") output(''' This application will collate dmesg output, details of the hardware configuration of your machine, information about the build of openvswitch that you are using, plus, if you allow it, various logs. The collated information will be saved as a .%s for archiving or sending to a Technical Support Representative. The logs may contain private information, and if you are at all worried about that, you should exit now, or you should explicitly exclude those logs from the archive. ''' % output_type) # assemble potential data file_output(CAP_BOOT_LOADER, [GRUB_CONFIG]) cmd_output(CAP_BOOT_LOADER, [LS, '-lR', '/boot']) cmd_output(CAP_BOOT_LOADER, [MD5SUM, BOOT_KERNEL, BOOT_INITRD], label='vmlinuz-initrd.md5sum') cmd_output(CAP_DISK_INFO, [FDISK, '-l']) file_output(CAP_DISK_INFO, [PROC_PARTITIONS, PROC_MOUNTS]) file_output(CAP_DISK_INFO, [FSTAB, ISCSI_CONF, ISCSI_INITIATOR]) cmd_output(CAP_DISK_INFO, [DF, '-alT']) cmd_output(CAP_DISK_INFO, [DF, '-alTi']) if len(pidof('iscsid')) != 0: cmd_output(CAP_DISK_INFO, [ISCSIADM, '-m', 'node']) cmd_output(CAP_DISK_INFO, [LS, '-R', '/sys/class/scsi_host']) cmd_output(CAP_DISK_INFO, [LS, '-R', '/sys/class/scsi_disk']) cmd_output(CAP_DISK_INFO, [LS, '-R', '/sys/class/fc_transport']) cmd_output(CAP_DISK_INFO, [SG_MAP, '-x']) func_output(CAP_DISK_INFO, 'scsi-hosts', dump_scsi_hosts) file_output(CAP_HARDWARE_INFO, [PROC_CPUINFO, PROC_MEMINFO, PROC_IOPORTS, PROC_INTERRUPTS]) cmd_output(CAP_HARDWARE_INFO, [DMIDECODE]) cmd_output(CAP_HARDWARE_INFO, [LSPCI, '-n']) cmd_output(CAP_HARDWARE_INFO, [LSPCI, '-vv']) file_output(CAP_HARDWARE_INFO, [PROC_USB_DEV, PROC_SCSI]) file_output(CAP_HARDWARE_INFO, [SYSCONFIG_HWCONF]) cmd_output(CAP_HARDWARE_INFO, [LS, '-lR', '/dev']) file_output(CAP_KERNEL_INFO, [PROC_VERSION, PROC_MODULES, PROC_DEVICES, PROC_FILESYSTEMS, PROC_CMDLINE]) cmd_output(CAP_KERNEL_INFO, [ZCAT, PROC_CONFIG], label='config') cmd_output(CAP_KERNEL_INFO, [SYSCTL, '-A']) file_output(CAP_KERNEL_INFO, [MODPROBE_CONF]) tree_output(CAP_KERNEL_INFO, MODPROBE_DIR) func_output(CAP_KERNEL_INFO, 'modinfo', module_info) cmd_output(CAP_LOSETUP_A, [LOSETUP, '-a']) file_output(CAP_MULTIPATH, [MULTIPATH_CONF, MPP_CONF]) cmd_output(CAP_MULTIPATH, [DMSETUP, 'table']) func_output(CAP_MULTIPATH, 'multipathd_topology', multipathd_topology) cmd_output(CAP_MULTIPATH, [MPPUTIL, '-a']) if CAP_MULTIPATH in entries and collect_all_info: dump_rdac_groups(CAP_MULTIPATH) tree_output(CAP_NETWORK_CONFIG, SYSCONFIG_NETWORK_SCRIPTS, IFCFG_RE) tree_output(CAP_NETWORK_CONFIG, SYSCONFIG_NETWORK_SCRIPTS, ROUTE_RE) file_output(CAP_NETWORK_CONFIG, [SYSCONFIG_NETWORK, RESOLV_CONF, NSSWITCH_CONF, HOSTS]) file_output(CAP_NETWORK_CONFIG, [NTP_CONF, IPTABLES_CONFIG, HOSTS_ALLOW, HOSTS_DENY]) file_output(CAP_NETWORK_CONFIG, [OPENVSWITCH_DEFAULT_SWITCH, OPENVSWITCH_SYSCONFIG_SWITCH, OPENVSWITCH_DEFAULT_CONTROLLER]) cmd_output(CAP_NETWORK_INFO, [IFCONFIG, '-a']) cmd_output(CAP_NETWORK_INFO, [ROUTE, '-n']) cmd_output(CAP_NETWORK_INFO, [ARP, '-n']) cmd_output(CAP_NETWORK_INFO, [NETSTAT, '-an']) for dir in DHCP_LEASE_DIR: tree_output(CAP_NETWORK_INFO, dir) for table in ['filter', 'nat', 'mangle', 'raw', 'security']: cmd_output(CAP_NETWORK_INFO, [IPTABLES, '-t', table, '-nL']) for p in os.listdir('/sys/class/net/'): try: f = open('/sys/class/net/%s/type' % p, 'r') t = f.readline() f.close() if os.path.islink('/sys/class/net/%s/device' % p) and int(t) == 1: # ARPHRD_ETHER cmd_output(CAP_NETWORK_INFO, [ETHTOOL, '-S', p]) if not p.startswith('vif') and not p.startswith('tap'): cmd_output(CAP_NETWORK_INFO, [ETHTOOL, p]) cmd_output(CAP_NETWORK_INFO, [ETHTOOL, '-k', p]) cmd_output(CAP_NETWORK_INFO, [ETHTOOL, '-i', p]) cmd_output(CAP_NETWORK_INFO, [ETHTOOL, '-c', p]) if int(t) == 1: cmd_output(CAP_NETWORK_INFO, [TC, '-s', '-d', 'class', 'show', 'dev', p]) except: pass tree_output(CAP_NETWORK_INFO, PROC_NET_BONDING_DIR) tree_output(CAP_NETWORK_INFO, PROC_NET_VLAN_DIR) cmd_output(CAP_NETWORK_INFO, [TC, '-s', 'qdisc']) file_output(CAP_NETWORK_INFO, [PROC_NET_SOFTNET_STAT]) collect_ovsdb() if os.path.exists(OPENVSWITCH_VSWITCHD_PID): cmd_output(CAP_NETWORK_STATUS, [OVS_DPCTL, 'show', '-s']) for d in dp_list(): cmd_output(CAP_NETWORK_STATUS, [OVS_DPCTL, 'dump-flows', d]) cmd_output(CAP_PROCESS_LIST, [PS, 'wwwaxf', '-eo', 'pid,tty,stat,time,nice,psr,pcpu,pmem,nwchan,wchan:25,args'], label='process-tree') func_output(CAP_PROCESS_LIST, 'fd_usage', fd_usage) system_logs = ([ VAR_LOG_DIR + x for x in ['crit.log', 'kern.log', 'daemon.log', 'user.log', 'syslog', 'messages', 'secure', 'debug', 'dmesg', 'boot']]) ovs_logs = ([ OPENVSWITCH_LOG_DIR + x for x in ['ovs-vswitchd.log', 'ovsdb-server.log', 'ovs-xapi-sync.log', 'ovs-monitor-ipsec.log', 'ovs-ctl.log']]) log_output(CAP_SYSTEM_LOGS, system_logs) log_output(CAP_OPENVSWITCH_LOGS, ovs_logs) if not os.path.exists('/var/log/dmesg') and not os.path.exists('/var/log/boot'): cmd_output(CAP_SYSTEM_LOGS, [DMESG]) cmd_output(CAP_SYSTEM_SERVICES, [CHKCONFIG, '--list']) tree_output(CAP_SYSTEM_LOGS, VAR_LOG_CORE_DIR) file_output(CAP_YUM, [YUM_LOG]) tree_output(CAP_YUM, YUM_REPOS_DIR) cmd_output(CAP_YUM, [RPM, '-qa']) file_output(CAP_YUM, [APT_SOURCES_LIST]) tree_output(CAP_YUM, APT_SOURCES_LIST_D) cmd_output(CAP_YUM, [DPKG_QUERY, '-W', '-f=${Package} ${Version} ${Status}\n'], 'dpkg-packages') # Filter out ovs relevant information if --ovs option passed # else collect all information filters = set() if only_ovs_info: filters.add('ovs') ovs_info_caps = [CAP_NETWORK_STATUS, CAP_SYSTEM_LOGS, CAP_NETWORK_CONFIG] ovs_info_list = ['process-tree'] # We cannot use iteritems, since we modify 'data' as we pass through for (k, v) in data.items(): cap = v['cap'] if 'filename' in v: info = k[0] else: info = k if info not in ovs_info_list and cap not in ovs_info_caps: del data[k] if filters: filter = ",".join(filters) else: filter = None try: load_plugins(filter=filter) except: pass # permit the user to filter out data # We cannot use iteritems, since we modify 'data' as we pass through for (k, v) in sorted(data.items()): cap = v['cap'] if 'filename' in v: key = k[0] else: key = k if not ANSWER_YES_TO_ALL and not yes("Include '%s'? [Y/n]: " % key): del data[k] # collect selected data now output_ts('Running commands to collect data') collect_data() subdir = "bug-report-%s" % time.strftime("%Y%m%d%H%M%S") # include inventory data['inventory.xml'] = {'cap': None, 'output': StringIOmtime(make_inventory(data, subdir))} # create archive if output_fd == -1: if output_file is None: dirname = BUG_DIR else: dirname = os.path.dirname(output_file) if dirname and not os.path.exists(dirname): try: os.makedirs(dirname) except: pass if output_fd == -1: output_ts('Creating output file') if output_type.startswith('tar'): make_tar(subdir, output_type, output_fd, output_file) else: make_zip(subdir, output_file) if dbg: print >>sys.stderr, "Category sizes (max, actual):\n" for c in caps.keys(): print >>sys.stderr, " %s (%d, %d)" % (c, caps[c][MAX_SIZE], cap_sizes[c]) cleanup_ovsdb() return 0 def dump_scsi_hosts(cap): output = '' l = os.listdir('/sys/class/scsi_host') l.sort() for h in l: procname = '' try: f = open('/sys/class/scsi_host/%s/proc_name' % h) procname = f.readline().strip("\n") f.close() except: pass modelname = None try: f = open('/sys/class/scsi_host/%s/model_name' % h) modelname = f.readline().strip("\n") f.close() except: pass output += "%s:\n" %h output += " %s%s\n" % (procname, modelname and (" -> %s" % modelname) or '') return output def module_info(cap): output = StringIO.StringIO() modules = open(PROC_MODULES, 'r') procs = [] for line in modules: module = line.split()[0] procs.append(ProcOutput([MODINFO, module], caps[cap][MAX_TIME], output)) modules.close() run_procs([procs]) return output.getvalue() def multipathd_topology(cap): pipe = Popen([MULTIPATHD, '-k'], bufsize=1, stdin=PIPE, stdout=PIPE, stderr=dev_null) stdout, stderr = pipe.communicate('show topology') return stdout def dp_list(): output = StringIO.StringIO() procs = [ProcOutput([OVS_DPCTL, 'dump-dps'], caps[CAP_NETWORK_STATUS][MAX_TIME], output)] run_procs([procs]) if not procs[0].timed_out: return output.getvalue().splitlines() return [] def collect_ovsdb(): if not os.path.isfile(OPENVSWITCH_CONF_DB): return max_size = 10*MB try: if os.path.getsize(OPENVSWITCH_CONF_DB) > max_size: if os.path.isfile(OPENVSWITCH_COMPACT_DB): os.unlink(OPENVSWITCH_COMPACT_DB) output = StringIO.StringIO() max_time = 5 procs = [ProcOutput(['ovsdb-tool', 'compact', OPENVSWITCH_CONF_DB, OPENVSWITCH_COMPACT_DB], max_time, output)] run_procs([procs]) file_output(CAP_NETWORK_STATUS, [OPENVSWITCH_COMPACT_DB]) else: file_output(CAP_NETWORK_STATUS, [OPENVSWITCH_CONF_DB]) except OSError, e: return def cleanup_ovsdb(): try: if os.path.isfile(OPENVSWITCH_COMPACT_DB): os.unlink(OPENVSWITCH_COMPACT_DB) except: return def fd_usage(cap): output = '' fd_dict = {} for d in [p for p in os.listdir('/proc') if p.isdigit()]: try: fh = open('/proc/'+d+'/cmdline') name = fh.readline() num_fds = len(os.listdir(os.path.join('/proc/'+d+'/fd'))) if num_fds > 0: if not num_fds in fd_dict: fd_dict[num_fds] = [] fd_dict[num_fds].append(name.replace('\0', ' ').strip()) finally: fh.close() keys = fd_dict.keys() keys.sort(lambda a, b: int(b) - int(a)) for k in keys: output += "%s: %s\n" % (k, str(fd_dict[k])) return output def dump_rdac_groups(cap): output = StringIO.StringIO() procs = [ProcOutput([MPPUTIL, '-a'], caps[cap][MAX_TIME], output)] run_procs([procs]) if not procs[0].timed_out: proc_line = 0 for line in output.getvalue().splitlines(): if line.startswith('ID'): proc_line = 2 elif line.startswith('----'): proc_line -= 1 elif proc_line > 0: group, _ = line.split(None, 1) cmd_output(cap, [MPPUTIL, '-g', group]) def load_plugins(just_capabilities=False, filter=None): def getText(nodelist): rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: rc += node.data return rc.encode() def getBoolAttr(el, attr, default=False): ret = default val = el.getAttribute(attr).lower() if val in ['true', 'false', 'yes', 'no']: ret = val in ['true', 'yes'] return ret for dir in [d for d in os.listdir(PLUGIN_DIR) if os.path.isdir(os.path.join(PLUGIN_DIR, d))]: if not caps.has_key(dir): if not os.path.exists("%s/%s.xml" % (PLUGIN_DIR, dir)): continue xmldoc = parse("%s/%s.xml" % (PLUGIN_DIR, dir)) assert xmldoc.documentElement.tagName == "capability" pii, min_size, max_size, min_time, max_time, mime = \ PII_MAYBE, -1,-1,-1,-1, MIME_TEXT if xmldoc.documentElement.getAttribute("pii") in [PII_NO, PII_YES, PII_MAYBE, PII_IF_CUSTOMIZED]: pii = xmldoc.documentElement.getAttribute("pii") if xmldoc.documentElement.getAttribute("min_size") != '': min_size = long(xmldoc.documentElement.getAttribute("min_size")) if xmldoc.documentElement.getAttribute("max_size") != '': max_size = long(xmldoc.documentElement.getAttribute("max_size")) if xmldoc.documentElement.getAttribute("min_time") != '': min_time = int(xmldoc.documentElement.getAttribute("min_time")) if xmldoc.documentElement.getAttribute("max_time") != '': max_time = int(xmldoc.documentElement.getAttribute("max_time")) if xmldoc.documentElement.getAttribute("mime") in [MIME_DATA, MIME_TEXT]: mime = xmldoc.documentElement.getAttribute("mime") checked = getBoolAttr(xmldoc.documentElement, 'checked', True) hidden = getBoolAttr(xmldoc.documentElement, 'hidden', False) cap(dir, pii, min_size, max_size, min_time, max_time, mime, checked, hidden) if just_capabilities: continue plugdir = os.path.join(PLUGIN_DIR, dir) for file in [f for f in os.listdir(plugdir) if f.endswith('.xml')]: xmldoc = parse(os.path.join(plugdir, file)) assert xmldoc.documentElement.tagName == "collect" for el in xmldoc.documentElement.getElementsByTagName("*"): filters_tmp = el.getAttribute("filters") if filters_tmp == '': filters = [] else: filters = filters_tmp.split(',') if not(filter is None or filter in filters): continue if el.tagName == "files": newest_first = getBoolAttr(el, 'newest_first') if el.getAttribute("type") == "logs": log_output(dir, getText(el.childNodes).split(), newest_first=newest_first) else: file_output(dir, getText(el.childNodes).split(), newest_first=newest_first) elif el.tagName == "directory": pattern = el.getAttribute("pattern") if pattern == '': pattern = None negate = getBoolAttr(el, 'negate') newest_first = getBoolAttr(el, 'newest_first') tree_output(dir, getText(el.childNodes), pattern and re.compile(pattern) or None, negate=negate, newest_first=newest_first) elif el.tagName == "command": label = el.getAttribute("label") if label == '': label = None binary = getBoolAttr(el, 'binary') cmd_output(dir, getText(el.childNodes), label, binary=binary) def make_tar(subdir, suffix, output_fd, output_file): global SILENT_MODE, data mode = 'w' if suffix == 'tar.bz2': mode = 'w:bz2' elif suffix == 'tar.gz': mode = 'w:gz' if output_fd == -1: if output_file is None: filename = "%s/%s.%s" % (BUG_DIR, subdir, suffix) else: filename = output_file old_umask = os.umask(0077) tf = tarfile.open(filename, mode) os.umask(old_umask) else: tf = tarfile.open(None, 'w', os.fdopen(output_fd, 'a')) try: for (k, v) in data.items(): try: tar_filename = os.path.join(subdir, construct_filename(k, v)) ti = tarfile.TarInfo(tar_filename) ti.uname = 'root' ti.gname = 'root' if v.has_key('output'): ti.mtime = v['output'].mtime ti.size = len(v['output'].getvalue()) v['output'].seek(0) tf.addfile(ti, v['output']) elif v.has_key('filename'): s = os.stat(v['filename']) ti.mtime = s.st_mtime ti.size = s.st_size tf.addfile(ti, file(v['filename'])) except: pass finally: tf.close() if output_fd == -1: output ('Writing tarball %s successful.' % filename) if SILENT_MODE: print filename def make_zip(subdir, output_file): global SILENT_MODE, data if output_file is None: filename = "%s/%s.zip" % (BUG_DIR, subdir) else: filename = output_file old_umask = os.umask(0077) zf = zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) os.umask(old_umask) try: for (k, v) in data.items(): try: dest = os.path.join(subdir, construct_filename(k, v)) if v.has_key('output'): zf.writestr(dest, v['output'].getvalue()) else: if os.stat(v['filename']).st_size < 50: compress_type = zipfile.ZIP_STORED else: compress_type = zipfile.ZIP_DEFLATED zf.write(v['filename'], dest, compress_type) except: pass finally: zf.close() output ('Writing archive %s successful.' % filename) if SILENT_MODE: print filename def make_inventory(inventory, subdir): document = getDOMImplementation().createDocument( None, INVENTORY_XML_ROOT, None) # create summary entry s = document.createElement(INVENTORY_XML_SUMMARY) user = os.getenv('SUDO_USER', os.getenv('USER')) if user: s.setAttribute('user', user) s.setAttribute('date', time.strftime('%c')) s.setAttribute('hostname', platform.node()) s.setAttribute('uname', ' '.join(platform.uname())) s.setAttribute('uptime', commands.getoutput(UPTIME)) document.getElementsByTagName(INVENTORY_XML_ROOT)[0].appendChild(s) map(lambda (k, v): inventory_entry(document, subdir, k, v), inventory.items()) return document.toprettyxml() def inventory_entry(document, subdir, k, v): try: el = document.createElement(INVENTORY_XML_ELEMENT) el.setAttribute('capability', v['cap']) el.setAttribute('filename', os.path.join(subdir, construct_filename(k, v))) el.setAttribute('md5sum', md5sum(v)) document.getElementsByTagName(INVENTORY_XML_ROOT)[0].appendChild(el) except: pass def md5sum(d): m = md5.new() if d.has_key('filename'): f = open(d['filename']) data = f.read(1024) while len(data) > 0: m.update(data) data = f.read(1024) f.close() elif d.has_key('output'): m.update(d['output'].getvalue()) return m.hexdigest() def construct_filename(k, v): if v.has_key('filename'): if v['filename'][0] == '/': return v['filename'][1:] else: return v['filename'] s = k.replace(' ', '-') s = s.replace('--', '-') s = s.replace('/', '%') if s.find('.') == -1: s += '.out' return s def update_capabilities(): pass def update_cap_size(cap, size): update_cap(cap, MIN_SIZE, size) update_cap(cap, MAX_SIZE, size) update_cap(cap, CHECKED, size > 0) def update_cap(cap, k, v): global caps l = list(caps[cap]) l[k] = v caps[cap] = tuple(l) def size_of_dir(d, pattern=None, negate=False): if os.path.isdir(d): return size_of_all([os.path.join(d, fn) for fn in os.listdir(d)], pattern, negate) else: return 0 def size_of_all(files, pattern=None, negate=False): return sum([size_of(f, pattern, negate) for f in files]) def matches(f, pattern, negate): if negate: return not matches(f, pattern, False) else: return pattern is None or pattern.match(f) def size_of(f, pattern, negate): if os.path.isfile(f) and matches(f, pattern, negate): return os.stat(f)[6] else: return size_of_dir(f, pattern, negate) def print_capabilities(): document = getDOMImplementation().createDocument( "ns", CAP_XML_ROOT, None) map(lambda key: capability(document, key), [k for k in caps.keys() if not caps[k][HIDDEN]]) print document.toprettyxml() def capability(document, key): c = caps[key] el = document.createElement(CAP_XML_ELEMENT) el.setAttribute('key', c[KEY]) el.setAttribute('pii', c[PII]) el.setAttribute('min-size', str(c[MIN_SIZE])) el.setAttribute('max-size', str(c[MAX_SIZE])) el.setAttribute('min-time', str(c[MIN_TIME])) el.setAttribute('max-time', str(c[MAX_TIME])) el.setAttribute('content-type', c[MIME]) el.setAttribute('default-checked', c[CHECKED] and 'yes' or 'no') document.getElementsByTagName(CAP_XML_ROOT)[0].appendChild(el) def prettyDict(d): format = '%%-%ds: %%s' % max(map(len, [k for k, _ in d.items()])) return '\n'.join([format % i for i in d.items()]) + '\n' def yes(prompt): yn = raw_input(prompt) return len(yn) == 0 or yn.lower()[0] == 'y' partition_re = re.compile(r'(.*[0-9]+$)|(^xvd)') def disk_list(): disks = [] try: f = open('/proc/partitions') f.readline() f.readline() for line in f.readlines(): (major, minor, blocks, name) = line.split() if int(major) < 254 and not partition_re.match(name): disks.append(name) f.close() except: pass return disks class ProcOutput: debug = False def __init__(self, command, max_time, inst=None, filter=None, binary=False): self.command = command self.max_time = max_time self.inst = inst self.running = False self.status = None self.timed_out = False self.failed = False self.timeout = int(time.time()) + self.max_time self.filter = filter self.filter_state = {} if binary: self.bufsize = 1048576 # 1MB buffer else: self.bufsize = 1 # line buffered def __del__(self): self.terminate() def cmdAsStr(self): return isinstance(self.command, list) and ' '.join(self.command) or self.command def run(self): self.timed_out = False try: if ProcOutput.debug: output_ts("Starting '%s'" % self.cmdAsStr()) self.proc = Popen(self.command, bufsize=self.bufsize, stdin=dev_null, stdout=PIPE, stderr=dev_null, shell=isinstance(self.command, str)) old = fcntl.fcntl(self.proc.stdout.fileno(), fcntl.F_GETFD) fcntl.fcntl(self.proc.stdout.fileno(), fcntl.F_SETFD, old | fcntl.FD_CLOEXEC) self.running = True self.failed = False except: output_ts("'%s' failed" % self.cmdAsStr()) self.running = False self.failed = True def terminate(self): if self.running: try: self.proc.stdout.close() os.kill(self.proc.pid, SIGTERM) except: pass self.proc = None self.running = False self.status = SIGTERM def read_line(self): assert self.running if self.bufsize == 1: line = self.proc.stdout.readline() else: line = self.proc.stdout.read(self.bufsize) if line == '': # process exited self.proc.stdout.close() self.status = self.proc.wait() self.proc = None self.running = False else: if self.filter: line = self.filter(line, self.filter_state) if self.inst: self.inst.write(line) def run_procs(procs): while True: pipes = [] active_procs = [] for pp in procs: for p in pp: if p.running: active_procs.append(p) pipes.append(p.proc.stdout) break elif p.status == None and not p.failed and not p.timed_out: p.run() if p.running: active_procs.append(p) pipes.append(p.proc.stdout) break if len(pipes) == 0: # all finished break (i, o, x) = select(pipes, [], [], 1.0) now = int(time.time()) # handle process output for p in active_procs: if p.proc.stdout in i: p.read_line() # handle timeout if p.running and now > p.timeout: output_ts("'%s' timed out" % p.cmdAsStr()) if p.inst: p.inst.write("\n** timeout **\n") p.timed_out = True p.terminate() def pidof(name): pids = [] for d in [p for p in os.listdir('/proc') if p.isdigit()]: try: if os.path.basename(os.readlink('/proc/%s/exe' % d)) == name: pids.append(int(d)) except: pass return pids class StringIOmtime(StringIO.StringIO): def __init__(self, buf=''): StringIO.StringIO.__init__(self, buf) self.mtime = time.time() def write(self, s): StringIO.StringIO.write(self, s) self.mtime = time.time() if __name__ == "__main__": try: sys.exit(main()) except KeyboardInterrupt: print "\nInterrupted." sys.exit(3) openvswitch-2.0.1+git20140120/utilities/bugtool/plugins/000077500000000000000000000000001226605124000226215ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/utilities/bugtool/plugins/kernel-info/000077500000000000000000000000001226605124000250325ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/utilities/bugtool/plugins/kernel-info/openvswitch.xml000066400000000000000000000013701226605124000301260ustar00rootroot00000000000000 /proc/slabinfo openvswitch-2.0.1+git20140120/utilities/bugtool/plugins/network-status/000077500000000000000000000000001226605124000256335ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/utilities/bugtool/plugins/network-status/openvswitch.xml000066400000000000000000000043071226605124000307320ustar00rootroot00000000000000 /usr/share/openvswitch/scripts/ovs-bugtool-tc-class-show /usr/share/openvswitch/scripts/ovs-bugtool-vsctl-show /usr/share/openvswitch/scripts/ovs-bugtool-ovsdb-dump /usr/share/openvswitch/scripts/ovs-bugtool-lacp-show /usr/share/openvswitch/scripts/ovs-bugtool-cfm-show /usr/share/openvswitch/scripts/ovs-bugtool-bfd-show /usr/share/openvswitch/scripts/ovs-bugtool-coverage-show /usr/share/openvswitch/scripts/ovs-bugtool-bond-show /usr/share/openvswitch/scripts/ovs-bugtool-memory-show /usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-show /usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-dump-flows /usr/share/openvswitch/scripts/ovs-bugtool-ovs-appctl-dpif /usr/share/openvswitch/scripts/ovs-bugtool-list-dbs openvswitch-2.0.1+git20140120/utilities/bugtool/plugins/system-configuration.xml000066400000000000000000000016601226605124000275370ustar00rootroot00000000000000 openvswitch-2.0.1+git20140120/utilities/bugtool/plugins/system-configuration/000077500000000000000000000000001226605124000270125ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/utilities/bugtool/plugins/system-configuration/openvswitch.xml000066400000000000000000000016041226605124000321060ustar00rootroot00000000000000 date --rfc-3339=seconds /usr/share/openvswitch/scripts/ovs-bugtool-daemons-ver openvswitch-2.0.1+git20140120/utilities/bugtool/plugins/system-logs/000077500000000000000000000000001226605124000251075ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/utilities/bugtool/plugins/system-logs/openvswitch.xml000066400000000000000000000017051226605124000302050ustar00rootroot00000000000000 /etc/openvswitch /var/lib/openvswitch openvswitch-2.0.1+git20140120/utilities/nlmon.c000066400000000000000000000112531226605124000207560ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include "netlink.h" #include "netlink-socket.h" #include "ofpbuf.h" #include "poll-loop.h" #include "timeval.h" #include "util.h" #include "vlog.h" static const struct nl_policy rtnlgrp_link_policy[] = { [IFLA_IFNAME] = { .type = NL_A_STRING, .optional = false }, [IFLA_MASTER] = { .type = NL_A_U32, .optional = true }, }; int main(int argc OVS_UNUSED, char *argv[]) { uint64_t buf_stub[4096 / 64]; struct nl_sock *sock; struct ofpbuf buf; int error; set_program_name(argv[0]); vlog_set_levels(NULL, VLF_ANY_FACILITY, VLL_DBG); error = nl_sock_create(NETLINK_ROUTE, &sock); if (error) { ovs_fatal(error, "could not create rtnetlink socket"); } error = nl_sock_join_mcgroup(sock, RTNLGRP_LINK); if (error) { ovs_fatal(error, "could not join RTNLGRP_LINK multicast group"); } ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); for (;;) { error = nl_sock_recv(sock, &buf, false); if (error == EAGAIN) { /* Nothing to do. */ } else if (error == ENOBUFS) { ovs_error(0, "network monitor socket overflowed"); } else if (error) { ovs_fatal(error, "error on network monitor socket"); } else { struct iff_flag { unsigned int flag; const char *name; }; static const struct iff_flag flags[] = { { IFF_UP, "UP", }, { IFF_BROADCAST, "BROADCAST", }, { IFF_DEBUG, "DEBUG", }, { IFF_LOOPBACK, "LOOPBACK", }, { IFF_POINTOPOINT, "POINTOPOINT", }, { IFF_NOTRAILERS, "NOTRAILERS", }, { IFF_RUNNING, "RUNNING", }, { IFF_NOARP, "NOARP", }, { IFF_PROMISC, "PROMISC", }, { IFF_ALLMULTI, "ALLMULTI", }, { IFF_MASTER, "MASTER", }, { IFF_SLAVE, "SLAVE", }, { IFF_MULTICAST, "MULTICAST", }, { IFF_PORTSEL, "PORTSEL", }, { IFF_AUTOMEDIA, "AUTOMEDIA", }, { IFF_DYNAMIC, "DYNAMIC", }, }; struct nlattr *attrs[ARRAY_SIZE(rtnlgrp_link_policy)]; struct nlmsghdr *nlh; struct ifinfomsg *iim; int i; nlh = ofpbuf_at(&buf, 0, NLMSG_HDRLEN); iim = ofpbuf_at(&buf, NLMSG_HDRLEN, sizeof *iim); if (!iim) { ovs_error(0, "received bad rtnl message (no ifinfomsg)"); continue; } if (!nl_policy_parse(&buf, NLMSG_HDRLEN + sizeof(struct ifinfomsg), rtnlgrp_link_policy, attrs, ARRAY_SIZE(rtnlgrp_link_policy))) { ovs_error(0, "received bad rtnl message (policy)"); continue; } printf("netdev %s changed (%s):\n", nl_attr_get_string(attrs[IFLA_IFNAME]), (nlh->nlmsg_type == RTM_NEWLINK ? "RTM_NEWLINK" : nlh->nlmsg_type == RTM_DELLINK ? "RTM_DELLINK" : nlh->nlmsg_type == RTM_GETLINK ? "RTM_GETLINK" : nlh->nlmsg_type == RTM_SETLINK ? "RTM_SETLINK" : "other")); printf("\tflags:"); for (i = 0; i < ARRAY_SIZE(flags); i++) { if (iim->ifi_flags & flags[i].flag) { printf(" %s", flags[i].name); } } printf("\n"); if (attrs[IFLA_MASTER]) { uint32_t idx = nl_attr_get_u32(attrs[IFLA_MASTER]); char ifname[IFNAMSIZ]; if (!if_indextoname(idx, ifname)) { strcpy(ifname, "unknown"); } printf("\tmaster=%"PRIu32" (%s)\n", idx, ifname); } } nl_sock_wait(sock, POLLIN); poll_block(); } } openvswitch-2.0.1+git20140120/utilities/ovs-appctl.8.in000066400000000000000000000206341226605124000222600ustar00rootroot00000000000000.\" -*- nroff -*- .de IQ . br . ns . IP "\\$1" .. .TH ovs\-appctl 8 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" .ds PN ovs\-appctl . .SH NAME ovs\-appctl \- utility for configuring running Open vSwitch daemons . .SH SYNOPSIS \fBovs\-appctl\fR [\fB\-\-target=\fItarget\fR | \fB\-t\fR \fItarget\fR] \fIcommand \fR[\fIarg\fR...] .br \fBovs\-appctl\fR \-\-help .br \fBovs\-appctl\fR \-\-version .SH DESCRIPTION Open vSwitch daemons accept certain commands at runtime to control their behavior and query their settings. Every daemon accepts a common set of commands documented under \fBCOMMON COMMANDS\fR below. Some daemons support additional commands documented in their own manpages. \fBovs\-vswitchd\fR in particular accepts a number of additional commands documented in \fBovs\-vswitchd\fR(8). .PP The \fBovs\-appctl\fR program provides a simple way to invoke these commands. The command to be sent is specified on \fBovs\-appctl\fR's command line as non-option arguments. \fBovs\-appctl\fR sends the command and prints the daemon's response on standard output. .PP In normal use only a single option is accepted: .IP "\fB\-t \fItarget\fR" .IQ "\fB\-\-target=\fItarget\fR" Tells \fBovs\-appctl\fR which daemon to contact. .IP If \fItarget\fR begins with \fB/\fR it must name a Unix domain socket on which an Open vSwitch daemon is listening for control channel connections. By default, each daemon listens on a Unix domain socket named \fB@RUNDIR@/\fIprogram\fB.\fIpid\fB.ctl\fR, where \fIprogram\fR is the program's name and \fIpid\fR is its process ID. For example, if \fBovs\-vswitchd\fR has PID 123, it would listen on \fB@RUNDIR@/ovs\-vswitchd.123.ctl\fR. .IP Otherwise, \fBovs\-appctl\fR looks for a pidfile, that is, a file whose contents are the process ID of a running process as a decimal number, named \fB@RUNDIR@/\fItarget\fB.pid\fR. (The \fB\-\-pidfile\fR option makes an Open vSwitch daemon create a pidfile.) \fBovs\-appctl\fR reads the pidfile, then looks for a Unix socket named \fB@RUNDIR@/\fItarget\fB.\fIpid\fB.ctl\fR, where \fIpid\fR is replaced by the process ID read from the pidfile, and uses that file as if it had been specified directly as the target. .IP The default target is \fBovs\-vswitchd\fR. . .SH COMMON COMMANDS Every Open vSwitch daemon supports a common set of commands, which are documented in this section. . .SS GENERAL COMMANDS These commands display daemon-specific commands and the running version. Note that these commands are different from the \fB\-\-help\fR and \fB\-\-version\fR options that return information about the \fBovs\-appctl\fR utility itself. . .IP "\fBhelp\fR" Lists the commands supported by the target. . .IP "\fBversion\fR" Displays the version and compilation date of the target. . .SS LOGGING COMMANDS Open vSwitch has several log levels. The highest-severity log level is: . .IP "\fBoff\fR" No message is ever logged at this level, so setting a logging facility's log level to \fBoff\fR disables logging to that facility. . .PP The following log levels, in order of descending severity, are available: . .IP "\fBemer\fR" A major failure forced a process to abort. .IP "\fBerr\fR" A high-level operation or a subsystem failed. Attention is warranted. .IP "\fBwarn\fR" A low-level operation failed, but higher-level subsystems may be able to recover. .IP "\fBinfo\fR" Information that may be useful in retrospect when investigating a problem. .IP "\fBdbg\fR" Information useful only to someone with intricate knowledge of the system, or that would commonly cause too-voluminous log output. Log messages at this level are not logged by default. . .PP Every Open vSwitch daemon supports the following commands for examining and adjusting log levels. .IP "\fBvlog/list\fR" Lists the known logging modules and their current levels. . .IP "\fBvlog/set\fR [\fIspec\fR]" Sets logging levels. Without any \fIspec\fR, sets the log level for every module and facility to \fBdbg\fR. Otherwise, \fIspec\fR is a list of words separated by spaces or commas or colons, up to one from each category below: . .RS .IP \(bu A valid module name, as displayed by the \fBvlog/list\fR command on \fBovs\-appctl\fR(8), limits the log level change to the specified module. . .IP \(bu \fBsyslog\fR, \fBconsole\fR, or \fBfile\fR, to limit the log level change to only to the system log, to the console, or to a file, respectively. . .IP \(bu \fBoff\fR, \fBemer\fR, \fBerr\fR, \fBwarn\fR, \fBinfo\fR, or \fBdbg\fR, to control the log level. Messages of the given severity or higher will be logged, and messages of lower severity will be filtered out. \fBoff\fR filters out all messages. .RE . .IP Case is not significant within \fIspec\fR. .IP Regardless of the log levels set for \fBfile\fR, logging to a file will not take place unless the target application was invoked with the \fB\-\-log\-file\fR option. .IP For compatibility with older versions of OVS, \fBany\fR is accepted as a word but has no effect. . .IP "\fBvlog/set PATTERN:\fIfacility\fB:\fIpattern\fR" Sets the log pattern for \fIfacility\fR to \fIpattern\fR. Each time a message is logged to \fIfacility\fR, \fIpattern\fR determines the message's formatting. Most characters in \fIpattern\fR are copied literally to the log, but special escapes beginning with \fB%\fR are expanded as follows: . .RS .IP \fB%A\fR The name of the application logging the message, e.g. \fBovs\-vswitchd\fR. . .IP \fB%c\fR The name of the module (as shown by \fBovs\-appctl \-\-list\fR) logging the message. . .IP \fB%d\fR The current date and time in ISO 8601 format (YYYY\-MM\-DD HH:MM:SS). . .IP \fB%d{\fIformat\fB}\fR The current date and time in the specified \fIformat\fR, which takes the same format as the \fItemplate\fR argument to \fBstrftime\fR(3). As an extension, any \fB#\fR characters in \fIformat\fR will be replaced by fractional seconds, e.g. use \fB%H:%M:%S.###\fR for the time to the nearest millisecond. Sub-second times are only approximate and currently decimal places after the third will always be reported as zero. . .IP \fB%D\fR The current UTC date and time in ISO 8601 format (YYYY\-MM\-DD HH:MM:SS). . .IP \fB%D{\fIformat\fB}\fR The current UTC date and time in the specified \fIformat\fR, which takes the same format as the \fItemplate\fR argument to \fBstrftime\fR(3). Supports the same extension for sub-second resolution as \fB%d{\fR...\fB}\fR. . .IP \fB%m\fR The message being logged. . .IP \fB%N\fR A serial number for this message within this run of the program, as a decimal number. The first message a program logs has serial number 1, the second one has serial number 2, and so on. . .IP \fB%n\fR A new-line. . .IP \fB%p\fR The level at which the message is logged, e.g. \fBDBG\fR. . .IP \fB%P\fR The program's process ID (pid), as a decimal number. . .IP \fB%r\fR The number of milliseconds elapsed from the start of the application to the time the message was logged. . .IP \fB%t\fR The subprogram name, that is, an identifying name for the process or thread that emitted the log message, such as \fBmonitor\fR for the process used for \fB\-\-monitor\fR or \fBmain\fR for the primary process or thread in a program. . .IP \fB%T\fR The subprogram name enclosed in parentheses, e.g. \fB(monitor)\fR, or the empty string for the primary process or thread in a program. . .IP \fB%%\fR A literal \fB%\fR. .RE . .IP A few options may appear between the \fB%\fR and the format specifier character, in this order: . .RS .IP \fB\-\fR Left justify the escape's expansion within its field width. Right justification is the default. . .IP \fB0\fR Pad the field to the field width with \fB0\fRs. Padding with spaces is the default. . .IP \fIwidth\fR A number specifies the minimum field width. If the escape expands to fewer characters than \fIwidth\fR then it is padded to fill the field width. (A field wider than \fIwidth\fR is not truncated to fit.) .RE . .IP The default pattern for console and file output is \fB%D{%Y-%m-%dT %H:%M:%SZ}|%05N|%c|%p|%m\fR; for syslog output, \fB%05N|%c|%p|%m\fR. . .IP Daemons written in Python (e.g. \fBovs\-xapi\-sync\fR, \fBovs\-monitor\-ipsec) do not allow control over the log pattern. . .IP "\fBvlog/reopen\fR" Causes the daemon to close and reopen its log file. (This is useful after rotating log files, to cause a new log file to be used.) .IP This has no effect if the target application was not invoked with the \fB\-\-log\-file\fR option. . .SH OPTIONS . .so lib/common.man . .SH "SEE ALSO" . \fBovs\-appctl\fR can control all Open vSwitch daemons, including: .BR ovs\-vswitchd (8), and .BR ovsdb\-server (8). openvswitch-2.0.1+git20140120/utilities/ovs-appctl.c000066400000000000000000000132301226605124000217200ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include "command-line.h" #include "daemon.h" #include "dirs.h" #include "dynamic-string.h" #include "jsonrpc.h" #include "process.h" #include "timeval.h" #include "unixctl.h" #include "util.h" static void usage(void); static const char *parse_command_line(int argc, char *argv[]); static struct jsonrpc *connect_to_target(const char *target); int main(int argc, char *argv[]) { char *cmd_result, *cmd_error; struct jsonrpc *client; char *cmd, **cmd_argv; const char *target; int cmd_argc; int error; set_program_name(argv[0]); /* Parse command line and connect to target. */ target = parse_command_line(argc, argv); client = connect_to_target(target); /* Transact request and process reply. */ cmd = argv[optind++]; cmd_argc = argc - optind; cmd_argv = cmd_argc ? argv + optind : NULL; error = unixctl_client_transact(client, cmd, cmd_argc, cmd_argv, &cmd_result, &cmd_error); if (error) { ovs_fatal(error, "%s: transaction error", target); } if (cmd_error) { fputs(cmd_error, stderr); ovs_error(0, "%s: server returned an error", target); exit(2); } else if (cmd_result) { fputs(cmd_result, stdout); } else { NOT_REACHED(); } jsonrpc_close(client); free(cmd_result); free(cmd_error); return 0; } static void usage(void) { printf("\ %s, for querying and controlling Open vSwitch daemon\n\ usage: %s [TARGET] COMMAND [ARG...]\n\ Targets:\n\ -t, --target=TARGET pidfile or socket to contact\n\ Common commands:\n\ help List commands supported by the target\n\ version Print version of the target\n\ vlog/list List current logging levels\n\ vlog/set [SPEC]\n\ Set log levels as detailed in SPEC, which may include:\n\ A valid module name (all modules, by default)\n\ 'syslog', 'console', 'file' (all facilities, by default))\n\ 'off', 'emer', 'err', 'warn', 'info', or 'dbg' ('dbg', bydefault)\n\ vlog/reopen Make the program reopen its log file\n\ Other options:\n\ --timeout=SECS wait at most SECS seconds for a response\n\ -h, --help Print this helpful information\n\ -V, --version Display ovs-appctl version information\n", program_name, program_name); exit(EXIT_SUCCESS); } static const char * parse_command_line(int argc, char *argv[]) { static const struct option long_options[] = { {"target", required_argument, NULL, 't'}, {"execute", no_argument, NULL, 'e'}, {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'V'}, {"timeout", required_argument, NULL, 'T'}, {NULL, 0, NULL, 0}, }; const char *target; int e_options; target = NULL; e_options = 0; for (;;) { int option; option = getopt_long(argc, argv, "+t:hVe", long_options, NULL); if (option == -1) { break; } switch (option) { case 't': if (target) { ovs_fatal(0, "-t or --target may be specified only once"); } target = optarg; break; case 'e': /* We ignore -e for compatibility. Older versions specified the * command as the argument to -e. Since the current version takes * the command as non-option arguments and we say that -e has no * arguments, this just works in the common case. */ if (e_options++) { ovs_fatal(0, "-e or --execute may be speciifed only once"); } break; case 'h': usage(); break; case 'T': time_alarm(atoi(optarg)); break; case 'V': ovs_print_version(0, 0); exit(EXIT_SUCCESS); case '?': exit(EXIT_FAILURE); default: NOT_REACHED(); } } if (optind >= argc) { ovs_fatal(0, "at least one non-option argument is required " "(use --help for help)"); } return target ? target : "ovs-vswitchd"; } static struct jsonrpc * connect_to_target(const char *target) { struct jsonrpc *client; char *socket_name; int error; if (target[0] != '/') { char *pidfile_name; pid_t pid; pidfile_name = xasprintf("%s/%s.pid", ovs_rundir(), target); pid = read_pidfile(pidfile_name); if (pid < 0) { ovs_fatal(-pid, "cannot read pidfile \"%s\"", pidfile_name); } free(pidfile_name); socket_name = xasprintf("%s/%s.%ld.ctl", ovs_rundir(), target, (long int) pid); } else { socket_name = xstrdup(target); } error = unixctl_client_create(socket_name, &client); if (error) { ovs_fatal(error, "cannot connect to \"%s\"", socket_name); } free(socket_name); return client; } openvswitch-2.0.1+git20140120/utilities/ovs-benchmark.1.in000066400000000000000000000167661226605124000227330ustar00rootroot00000000000000.\" -*- nroff -*- .so lib/ovs.tmac .TH ovs\-benchmark 1 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" . .SH NAME ovs\-benchmark \- flow setup benchmark utility for Open vSwitch . .SH SYNOPSIS . .SY ovs\-benchmark\ latency \fB\-\-remote \fIip\fR[\fB:\fIports\fR] .OP \-\-sockets nsocks .OP \-\-batches nbatches .OP \-\-local \fR[\fIip\fR][\fB:\fIports\fR] .YS . .SY ovs\-benchmark\ rate \fB\-\-remote \fIip\fR[\fB:\fIports\fR] .OP \-\-max\-rate rate .OP \-\-timeout maxsecs .OP \-\-sockets nsocks .OP \-\-batches nbatches .OP \-\-local \fR[\fIip\fR][\fB:\fIports\fR] .YS . .SY ovs\-benchmark\ listen .OP \-\-local \fR[\fIip\fR]\fB:\fIports .YS . .SY ovs\-benchmark\ help .YS . .SH DESCRIPTION \fBovs\-benchmark\fR tests the performance of Open vSwitch flow setup by setting up a number of TCP connections and measuring the time required. It can also be used with the Linux bridge or without any bridging software, which allows one to measure the bandwidth and latency cost of bridging. .PP Each \fBovs\-benchmark\fR command is described separately below. . .SH "The ``latency'' command" . .PP This command initiates \fInsocks\fR TCP connections (by default, 100) as quickly as possible, waits for each one to complete with success or failure, and prints a bar chart of completion times on standard output, followed by a summary line. Each line in the bar chart lists a time to connection completion in milliseconds followed by a number of \fB.\fR or \fB!\fR symbols, one for each TCP connection that completed in that many milliseconds. A successful connection prints a \fB.\fR, and an unsuccessful connection (e.g. to a port on which no process is listening) prints a \fB!\fR. . .PP If \fInbatches\fR is given, the entire procedure is repeated the specified number of times. Only a single summary line is printed at the end. . .PP Results vary widely based on the number of sockets and whether the remote host is listening for connections on the specified ports. With a small number of sockets, all connection times typically remain within a handful of milliseconds. As the number of sockets increases, the distribution of connection times clusters around the sending TCP stack's SYN retransmission interval. (This pattern occurs with or without Open vSwitch on the network path.) . .SH "The ``rate'' command" . .PP This command initiates \fInsocks\fR TCP connections (by default, 100) as quickly as possible (limited by \fImaxrate\fR, if \fB\-\-max\-rate\fR is specified). Each time a connection completes with success or failure, it closes that connection and initiates a new one. It continues to do so either forever or, if \fB\-\-timeout\fR is specified, until \fImaxsecs\fR seconds have elapsed. During the test, it prints statistics about time elapsed, successful and unsuccessful connections, and the average number of completed (succeeded or failed) connections per second over the run. . .PP Without \fB\-\-max\-rate\fR, the \fBrate\fR command measures the maximum sustained flow setup rate for an Open vSwitch instance. This naturally tends to drive \fBovs\-vswitchd\fR CPU usage to 100% on the host receiving the traffic. . .PP When \fB\-\-max\-rate\fR is specified with a value below the maximum rate that an Open vSwitch instance can handle, then \fBrate\fR can also be used to measure the kernel and userspace CPU cost of flow setups at specific flow rates. . .PP Results tend to fluctuate greatly for the first few seconds of a run, then settle down. The displayed average is calculated over the entire run and so tends to converge asymptotically on the ``correct'' value. To converge more quickly, try running for 5 to 10 seconds, then killing and restarting the run. . .SH "The ``listen'' command" . .PP This command listens on one or more TCP ports for incoming connections. It accepts connections and immediately closes them. It can be paired with the \fBrate\fR or \fBlatency\fR commands for observing effects of successful vs. unsuccessful TCP connections. . .PP It is easier to reproduce and interpret \fBovs\-benchmark\fR results when there is no listener (see \fBNOTES\fR below). . .SH "The ``help'' command" . .PP Prints a usage message and exits successfully. . .SH OPTIONS . .IP "\fB\-r \fIip\fR[\fB:\fIports\fR]" .IQ "\fB\-\-remote \fIip\fR[\fB:\fIports\fR]" This option, required on \fBlatency\fR and \fBrate\fR commands, minimally specifies the remote host to connect to (as an IP address or DNS name) as \fIip\fR. . .IP A TCP port or range of ports (separated by \fB\-\fR) may also be specified. If a range is specified then each port in the range is used in round-robin order. The default port is 6630 if none is specified. . .IP "\fB\-l \fR[\fIip\fR][\fB:\fIports\fR]" .IQ "\fB\-\-local \fR[\fIip\fR][\fB:\fIports\fR]" On the \fBlatency\fR and \fBrate\fR, without this option, outgoing connections will not bind a specific TCP port. The local TCP stack will pick a local TCP port to bind. When this option is specified, the specified port or range of ports will be used in turn. (If a port range is specified on both \fB\-\-local\fR and \fB\-\-remote\fR, then each local port in its range will be used before the remote port is incremented to the next port in its range.) . .IP On the \fBlisten\fR command, this option specifies the local port or ports and IP addresses on which to listen. If it is omitted, port 6630 on any IP address is used. . .IP "\fB\-s \fInsocks\fR" .IQ "\fB\-\-sockets \fInsocks\fR" For \fBlatency\fR, sets the number of connections to initiate per batch. For \fBrate\fR, sets the number of outstanding connections attempts to maintain at any given time. The default is 100. . .IP "\fB\-b \fInbatches\fR" .IQ "\fB\-\-batches \fInbatches\fR" For \fBlatency\fR, sets the number of times to initiate and wait for all of the connections to complete. The default is 1. . .IP "\fB\-c \fImaxrate\fR" .IQ "\fB\-\-max\-rate \fImaxrate\fR" For \fBrate\fR, caps the maximum rate at which connections will be attempted to \fImaxrate\fR connections per second. By default there is no limit. . .IP "\fB\-T \fImaxsecs\fR" .IQ "\fB\-\-timeout \fImaxsecs\fR" For \fBrate\fR, stops the benchmark after \fImaxsecs\fR seconds have elapsed. By default, the benchmark continues until interrupted by a signal. . .SH NOTES .PP \fBovs\-benchmark\fR uses standard POSIX socket calls for network access, so it shares the strengths and limitations of TCP/IP and its implementations in the local and remote TCP/IP stacks. Particularly, TCP and its implementations limit the number of successfully completed and then closed TCP connections. This means that \fBovs\-benchmark\fR tests tend to slow down if run for long intervals or with large numbers of sockets or batches, if the remote system is listening on the port or ports being contacted. The problem does not occur when the remote system is not listening. \fBovs\-benchmark\fR results are therefore much more reliable and repeatable when the remote system is not listening on the port or ports being contacted. Even a single listening socket (e.g. range of ports 8000 to 9000 with one listener on port 8080) can cause anomalies in results. . .PP Be sure that the remote TCP/IP stack's firewall allows the benchmark's traffic to be processed. For Open vSwitch benchmarking purposes, you might want to disable the firewall with, e.g., \fBiptables \-F\fR. . .PP \fBovs\-benchmark\fR is single-threaded. A multithreaded process might be able to initiate connections more quickly. . .PP A TCP connection consists of two flows (one in each direction), so multiply the TCP connection statistics that \fBovs\-benchmark\fR reports by 2 to get flow statistics. openvswitch-2.0.1+git20140120/utilities/ovs-benchmark.c000066400000000000000000000414261226605124000223770ustar00rootroot00000000000000/* * Copyright (c) 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include "command-line.h" #include "poll-loop.h" #include "socket-util.h" #include "timeval.h" #include "util.h" #include "vlog.h" #define DEFAULT_PORT 6630 #define MAX_SOCKETS 65535 static int n_batches = 1; static int n_sockets = 100; static struct in_addr local_addr; static unsigned short int local_min_port, local_max_port; static struct in_addr remote_addr; static unsigned short int remote_min_port, remote_max_port; static double max_rate; static double timeout; static const struct command *get_all_commands(void); static void parse_options(int argc, char *argv[]); static void usage(void); static long long int time_in_msec(void) { struct timeval tv; if (gettimeofday(&tv, NULL) < 0) { ovs_fatal(errno, "gettimeofday"); } return tv.tv_sec * 1000LL + tv.tv_usec / 1000; } int main(int argc, char *argv[]) { set_program_name(argv[0]); vlog_set_levels(NULL, VLF_ANY_FACILITY, VLL_EMER); parse_options(argc, argv); run_command(argc - optind, argv + optind, get_all_commands()); return 0; } static void parse_target(const char *s_, struct in_addr *addr, unsigned short int *min, unsigned short int *max) { char *s = xstrdup(s_); char *colon; int error; colon = strchr(s, ':'); if (colon) { *colon = '\0'; } if (*s != '\0') { error = lookup_hostname(s, addr); if (error) { ovs_fatal(error, "failed to look up IP address for \"%s\"", s_); } } else { addr->s_addr = htonl(INADDR_ANY); } *min = *max = 0; if (colon && colon[1] != '\0') { const char *ports = colon + 1; if (sscanf(ports, "%hu-%hu", min, max) == 2) { if (*min > *max) { ovs_fatal(0, "%s: minimum is greater than maximum", s_); } } else if (sscanf(ports, "%hu", min) == 1) { *max = *min; } else { ovs_fatal(0, "%s: number or range expected", s_); } } free(s); } static void parse_options(int argc, char *argv[]) { static const struct option long_options[] = { {"local", required_argument, NULL, 'l'}, {"remote", required_argument, NULL, 'r'}, {"batches", required_argument, NULL, 'b'}, {"sockets", required_argument, NULL, 's'}, {"max-rate", required_argument, NULL, 'c'}, {"timeout", required_argument, NULL, 'T'}, {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'V'}, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); local_addr.s_addr = htonl(INADDR_ANY); local_min_port = local_max_port = 0; remote_addr.s_addr = htonl(0); remote_min_port = remote_max_port = 0; for (;;) { int c; c = getopt_long(argc, argv, short_options, long_options, NULL); if (c == -1) { break; } switch (c) { case 'l': parse_target(optarg, &local_addr, &local_min_port, &local_max_port); break; case 'r': parse_target(optarg, &remote_addr, &remote_min_port, &remote_max_port); if (remote_addr.s_addr == htonl(INADDR_ANY)) { ovs_fatal(0, "remote IP address is required"); } break; case 'b': n_batches = atoi(optarg); if (n_batches < 0) { ovs_fatal(0, "--batches or -b argument must be at least 1"); } break; case 's': n_sockets = atoi(optarg); if (n_sockets < 1 || n_sockets > MAX_SOCKETS) { ovs_fatal(0, "--sockets or -s argument must be between 1 " "and %d (inclusive)", MAX_SOCKETS); } break; case 'c': max_rate = atof(optarg); if (max_rate <= 0.0) { ovs_fatal(0, "--max-rate or -c argument must be positive"); } break; case 'T': timeout = atoi(optarg); if (!timeout) { ovs_fatal(0, "-T or --timeout argument must be positive"); } break; case 'h': usage(); case 'V': ovs_print_version(0, 0); exit(EXIT_SUCCESS); case '?': exit(EXIT_FAILURE); default: abort(); } } free(short_options); } static void usage(void) { printf("\ %s: Open vSwitch flow setup benchmark utility\n\ usage: %s [OPTIONS] COMMAND [ARG...]\n\ latency connect many times all at once\n\ rate measure sustained flow setup rate\n\ listen accept TCP connections\n\ help display this help message\n\ \n\ Command options:\n\ -l, --local [IP][:PORTS] use local IP and range of PORTS\n\ -r, --remote IP[:PORTS] connect to remote IP and PORTS\n\ -s, --sockets N number of sockets for \"rate\" or \"latency\"\n\ -b, --batches N number of connection batches for \"latency\"\n\ -c, --max-rate NPERSEC connection rate limit for \"rate\"\n\ -T, --timeout MAXSECS max number of seconds to run for \"rate\"\n\ \n\ Other options:\n\ -h, --help display this help message\n\ -V, --version display version information\n", program_name, program_name); exit(EXIT_SUCCESS); } static void cmd_listen(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { struct pollfd *fds; int n_fds; int port; int i; if (!local_min_port && !local_max_port) { local_min_port = local_max_port = DEFAULT_PORT; } fds = xmalloc((1 + local_max_port - local_min_port) * sizeof *fds); n_fds = 0; for (port = local_min_port; port <= local_max_port; port++) { struct sockaddr_in sin; unsigned int yes = 1; int error; int fd; /* Create socket, set SO_REUSEADDR. */ fd = socket(AF_INET, SOCK_STREAM, 0); if (fd < 0) { ovs_fatal(errno, "failed to create socket"); } error = set_nonblocking(fd); if (error) { ovs_fatal(error, "failed to set non-blocking mode"); } if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof yes) < 0) { ovs_fatal(errno, "setsockopt(SO_REUSEADDR) failed"); } /* Bind. */ sin.sin_family = AF_INET; sin.sin_addr = remote_addr; sin.sin_port = htons(port); if (bind(fd, (struct sockaddr *) &sin, sizeof sin) < 0) { ovs_fatal(errno, "bind failed"); } /* Listen. */ if (listen(fd, 10000) < 0) { ovs_fatal(errno, "listen failed"); } fds[n_fds].fd = fd; fds[n_fds].events = POLLIN; n_fds++; } for (;;) { int retval; do { retval = poll(fds, n_fds, -1); } while (retval < 0 && errno == EINTR); if (retval < 0) { ovs_fatal(errno, "poll failed"); } for (i = 0; i < n_fds; i++) { if (fds[i].revents & POLLIN) { int newfd; do { newfd = accept(fds[i].fd, NULL, NULL); } while (newfd < 0 && errno == EINTR); if (newfd >= 0) { close(newfd); } else if (errno != EAGAIN) { ovs_fatal(errno, "accept failed"); } } } } } /* Increments '*value' within the range 'min...max' inclusive. Returns true * if '*value' wraps around to 'min', otherwise false. */ static bool increment(unsigned short int *value, unsigned short int min, unsigned short int max) { if (*value < max) { ++*value; return false; } else { *value = min; return true; } } static void next_ports(unsigned short int *local_port, unsigned short int *remote_port) { if (increment(local_port, local_min_port, local_max_port)) { increment(remote_port, remote_min_port, remote_max_port); } } static void bind_local_port(int fd, unsigned short int *local_port, unsigned short int *remote_port) { int error; if (!local_min_port && !local_max_port) { next_ports(local_port, remote_port); return; } do { struct sockaddr_in local; memset(&local, 0, sizeof local); local.sin_family = AF_INET; local.sin_addr = local_addr; local.sin_port = htons(*local_port); error = (bind(fd, (struct sockaddr *) &local, sizeof local) < 0 ? errno : 0); next_ports(local_port, remote_port); } while (error == EADDRINUSE || error == EINTR); if (error) { ovs_fatal(error, "bind failed"); } } static void cmd_rate(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { unsigned short int local_port; unsigned short int remote_port; unsigned int completed = 0; unsigned int failures = 0; long long int start, prev; struct pollfd *fds; int n_fds; if (!remote_addr.s_addr) { ovs_fatal(0, "remote address must be specified with -r or --remote"); } if (!remote_min_port && !remote_max_port) { remote_min_port = remote_max_port = DEFAULT_PORT; } local_port = local_min_port; remote_port = remote_min_port; fds = xmalloc(n_sockets * sizeof *fds); n_fds = 0; start = prev = time_in_msec(); for (;;) { long long int now; long long int may_open; int delay; int error; int j; if (max_rate > 0) { long long int cur_total = completed + n_fds; long long int max_total = (time_in_msec() - start) * (max_rate / 1000.0); if (max_total > cur_total) { may_open = MIN(n_sockets, max_total - cur_total); } else { may_open = 0; } delay = 1000.0 / max_rate; } else { may_open = n_sockets; delay = 1000; } while (may_open-- > 0 && n_fds < n_sockets) { struct sockaddr_in remote; int error; int fd; fd = socket(AF_INET, SOCK_STREAM, 0); if (fd < 0) { ovs_fatal(errno, "socket failed"); } error = set_nonblocking(fd); if (error) { ovs_fatal(error, "set_nonblocking failed"); } bind_local_port(fd, &local_port, &remote_port); memset(&remote, 0, sizeof remote); remote.sin_family = AF_INET; remote.sin_addr = remote_addr; remote.sin_port = htons(remote_port); if (connect(fd, (struct sockaddr *) &remote, sizeof remote) < 0) { if (errno == EINPROGRESS) { fds[n_fds].fd = fd; fds[n_fds].events = POLLOUT; fds[n_fds].revents = 0; n_fds++; } else if (errno != ECONNREFUSED) { ovs_fatal(errno, "connect"); } } else { /* Success, I guess. */ shutdown(fd, 2); close(fd); completed++; } } if (n_fds == n_sockets) { delay = 1000; } do { error = poll(fds, n_fds, delay) < 0 ? errno : 0; } while (error == EINTR); if (error) { ovs_fatal(errno, "poll"); } for (j = 0; j < n_fds; ) { if (fds[j].revents) { if (fds[j].revents & POLLERR) { failures++; } shutdown(fds[j].fd, 2); close(fds[j].fd); fds[j] = fds[--n_fds]; completed++; } else { j++; } } now = time_in_msec(); if (now >= prev + 1000) { long long int elapsed = now - start; printf("%.3f s elapsed, %u OK, %u failed, avg %.1f/s\n", elapsed / 1000.0, completed - failures, failures, completed / (elapsed / 1000.0)); fflush(stdout); prev = now; if (timeout && elapsed > timeout * 1000LL) { break; } } } } static void timer_end(long long int start, bool error, int *min, int *max, unsigned long long int *total) { int elapsed = time_in_msec() - start; static int last_elapsed = INT_MIN; char c = error ? '!' : '.'; if (last_elapsed != elapsed) { if (last_elapsed != INT_MIN) { putchar('\n'); } printf("%5d %c", elapsed, c); fflush(stdout); last_elapsed = elapsed; } else { putchar(c); fflush(stdout); } if (elapsed < *min) { *min = elapsed; } if (elapsed > *max) { *max = elapsed; } *total += elapsed; } static void cmd_latency(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { unsigned short int local_port; unsigned short int remote_port; int min = INT_MAX; int max = 0; unsigned long long int total = 0; int i; if (!remote_addr.s_addr) { ovs_fatal(0, "remote address must be specified with -r or --rate"); } if (!remote_min_port && !remote_max_port) { remote_min_port = remote_max_port = DEFAULT_PORT; } local_port = local_min_port; remote_port = remote_min_port; for (i = 0; i < n_batches; i++) { struct pollfd fds[MAX_SOCKETS]; long long int start; int n_fds; int j; start = time_in_msec(); n_fds = 0; for (j = 0; j < n_sockets; j++) { struct sockaddr_in remote; int error; int fd; fd = socket(AF_INET, SOCK_STREAM, 0); if (fd < 0) { ovs_fatal(errno, "socket failed"); } error = set_nonblocking(fd); if (error) { ovs_fatal(error, "set_nonblocking failed"); } bind_local_port(fd, &local_port, &remote_port); memset(&remote, 0, sizeof remote); remote.sin_family = AF_INET; remote.sin_addr = remote_addr; remote.sin_port = htons(remote_port); if (connect(fd, (struct sockaddr *) &remote, sizeof remote) < 0) { if (errno == EINPROGRESS) { fds[n_fds].fd = fd; fds[n_fds].events = POLLOUT; fds[n_fds].revents = 0; n_fds++; } else if (errno != ECONNREFUSED) { ovs_fatal(errno, "connect"); } } else { /* Success, I guess. */ close(fd); timer_end(start, 0, &min, &max, &total); } } while (n_fds > 0) { int error; do { error = poll(fds, n_fds, -1) < 0 ? errno : 0; } while (error == EINTR); if (error) { ovs_fatal(errno, "poll"); } for (j = 0; j < n_fds; ) { if (fds[j].revents) { timer_end(start, fds[j].revents & (POLLERR|POLLHUP) ? 1 : 0, &min, &max, &total); close(fds[j].fd); fds[j] = fds[--n_fds]; } else { j++; } } } putchar('\n'); } printf("min %d ms, max %d ms, avg %llu ms\n", min, max, total / (1ULL * n_sockets * n_batches)); } static void cmd_help(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { usage(); } static const struct command all_commands[] = { { "listen", 0, 0, cmd_listen }, { "rate", 0, 0, cmd_rate }, { "latency", 0, 0, cmd_latency }, { "help", 0, 0, cmd_help }, { NULL, 0, 0, NULL }, }; static const struct command *get_all_commands(void) { return all_commands; } openvswitch-2.0.1+git20140120/utilities/ovs-check-dead-ifs.in000077500000000000000000000056061226605124000233630ustar00rootroot00000000000000#! @PYTHON@ import os import re import stat import sys if "--help" in sys.argv: sys.stdout.write("""\ ovs-check-dead-ifs: Check for packet sockets for nonexistent network devices. One side effect of the "force-reload-kmod" command that reloads the Open vSwitch kernel module is that all the network devices that the Open vSwitch kernel module implemented get destroyed and then replaced by new instances with the same names. Unfortunately, programs that are listening for packets on the original network devices will not receive packets that arrive on the new instances. This causes some services, such as DHCP, to silently fail. This program looks for such problems and, if it finds any, prints information about programs that are in such a state. The system administrator should then take some action to fix the problem, such as restarting these programs. """) sys.exit(0) elif len(sys.argv) > 1: sys.stderr.write("ovs-check-dead-ifs: no arguments or options accepted " "(use --help for help)\n") sys.exit(1) # Get the set of all valid ifindexes. # # 0 is always valid for our purposes because it means "any interface". valid_ifindexes = set([]) for ifname in os.listdir("/sys/class/net"): fn = "/sys/class/net/%s/ifindex" % ifname try: valid_ifindexes.add(int(open(fn).readline())) except IOError: pass except ValueError: print "%s: unexpected format\n" % fn # Get inodes for all packet sockets whose ifindexes don't exist. invalid_inodes = set() f = open("/proc/net/packet") f.readline() # Skip header line. for line in f: fields = line.split() ifindex = int(fields[4]) if ifindex not in valid_ifindexes: invalid_inodes.add(int(fields[8])) f.close() if not invalid_inodes: sys.exit(0) # Now find the processes that are using those packet sockets. inode_re = re.compile(r'socket:\[([0-9]+)\]$') bad_pids = set() for pid in os.listdir("/proc"): try: pid = int(pid) except ValueError: continue try: fds = os.listdir("/proc/%d/fd" % pid) except OSError: continue for fd in fds: try: fd = int(fd) except ValueError: continue try: s = os.stat("/proc/%d/fd/%d" % (pid, fd)) except OSError: continue if not stat.S_ISSOCK(s.st_mode): continue try: linkname = os.readlink("/proc/%d/fd/%d" % (pid, fd)) except OSError: continue m = inode_re.match(linkname) if not m: continue inode = int(m.group(1)) if inode in invalid_inodes: bad_pids.add(pid) if bad_pids: print """ The following processes are listening for packets to arrive on network devices that no longer exist. You may want to restart them.""" os.execvp("ps", ["ps"] + ["%s" % pid for pid in bad_pids]) openvswitch-2.0.1+git20140120/utilities/ovs-controller.8.in000066400000000000000000000141141226605124000231540ustar00rootroot00000000000000.\" -*- nroff -*- .de IQ . br . ns . IP "\\$1" .. .TH ovs\-controller 8 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" .ds PN ovs\-controller . .SH NAME ovs\-controller \- simple OpenFlow controller reference implementation . .SH SYNOPSIS .B ovs\-controller [\fIoptions\fR] \fImethod\fR \fB[\fImethod\fR]\&... . .SH DESCRIPTION \fBovs\-controller\fR manages any number of remote switches over OpenFlow protocol, causing them to function as L2 MAC-learning switches or hub. .PP \fBovs\-controller\fR controls one or more OpenFlow switches, specified as one or more of the following OpenFlow connection methods: . .RS .so lib/vconn-passive.man .so lib/vconn-active.man .RE . .SH OPTIONS .IP "\fB\-n\fR" .IQ "\fB\-\-noflow\fR" By default, \fBovs\-controller\fR sets up a flow in each OpenFlow switch whenever it receives a packet whose destination is known due through MAC learning. This option disables flow setup, so that every packet in the network passes through the controller. .IP This option is most useful for debugging. It reduces switching performance, so it should not be used in production. . .TP \fB\-\-max\-idle=\fIsecs\fR|\fBpermanent\fR Sets \fIsecs\fR as the number of seconds that a flow set up by the controller will remain in the switch's flow table without any matching packets being seen. If \fBpermanent\fR is specified, which is not recommended, flows will never expire. The default is 60 seconds. .IP This option has no effect when \fB\-n\fR (or \fB\-\-noflow\fR) is in use (because the controller does not set up flows in that case). . .IP "\fB\-H\fR" .IQ "\fB\-\-hub\fR" By default, the controller acts as an L2 MAC-learning switch. This option changes its behavior to that of a hub that floods packets on all but the incoming port. .IP If \fB\-H\fR (or \fB\-\-hub\fR) and \fB\-n\fR (or \fB\-\-noflow\fR) are used together, then the cumulative effect is that every packet passes through the controller and every packet is flooded. .IP This option is most useful for debugging. It reduces switching performance, so it should not be used in production. . .IP "\fB\-w\fR[\fIwildcard_mask\fR]" .IQ "\fB\-\-wildcards\fR[\fB=\fIwildcard_mask\fR]\fR" By default, \fBovs\-controller\fR sets up exact-match flows. This option allows it to set up wildcarded flows, which may reduce flow setup latency by causing less traffic to be sent up to the controller. .IP The optional \fIwildcard_mask\fR is an OpenFlow wildcard bitmask in hexadecimal that specifies the fields to wildcard. If no \fIwildcard_mask\fR is specified, the default value 0x2820F0 is used which specifies L2-only switching and wildcards L3 and L4 fields. Another interesting value is 0x2000EC, which specifies L3-only switching and wildcards L2 and L4 fields. .IP This option has no effect when \fB\-n\fR (or \fB\-\-noflow\fR) is in use (because the controller does not set up flows in that case). . .IP "\fB\-N\fR" .IQ "\fB\-\-normal\fR" By default, \fBovs\-controller\fR directs packets to a particular port or floods them. This option causes it to direct non-flooded packets to the OpenFlow \fBOFPP_NORMAL\fR port. This allows the switch itself to make decisions about packet destinations. Support for \fBOFPP_NORMAL\fR is optional in OpenFlow, so this option may not well with some non-Open vSwitch switches. . .IP "\fB\-\-mute\fR" Prevents ovs\-controller from replying to any OpenFlow messages sent to it by switches. .IP This option is only for debugging the Open vSwitch implementation of ``fail open'' mode. It must not be used in production. . .IP "\fB\-q \fIid\fR" .IQ "\fB\-\-queue=\fIid\fR" By default, \fBovs\-controller\fR uses the default OpenFlow queue for sending packets and setting up flows. Use one of these options, supplying \fIid\fR as an OpenFlow queue ID as a decimal number, to instead use that specific queue. .IP This option is incompatible with \fB\-N\fR or \fB\-\-normal\fR and with \fB\-H\fR or \fB\-\-hub\fR. If more than one is specified then this option takes precedence. .IP This option may be useful for testing or debugging quality of service setups. . .IP "\fB\-Q \fIport-name\fB:\fIqueue-id\fR" .IP "\fB\-\-port\-queue \fIport-name\fB:\fIqueue-id\fR" Configures packets received on the port named \fIport-name\fR (e.g. \fBeth0\fR) to be output on OpenFlow queue ID \fIqueue-id\fR (specified as a decimal number). For the specified port, this option overrides the default specified on \fB\-q\fR or \fB\-\-queue\fR. .IP This option may be specified any number of times with different \fIport-name\fR arguments. .IP This option is incompatible with \fB\-N\fR or \fB\-\-normal\fR and with \fB\-H\fR or \fB\-\-hub\fR. If more than one is specified then this option takes precedence. .IP This option may be useful for testing or debugging quality of service setups. . .IP "\fB\-\-with\-flows \fIfile\fR" When a switch connects, push the flow entries as described in \fIfile\fR. Each line in \fIfile\fR is a flow entry in the format described for the \fBadd\-flows\fR command in the \fBFlow Syntax\fR section of the \fBovs\-ofctl\fR(8) man page. .IP Use this option more than once to add flows from multiple files. . .SS "Public Key Infrastructure Options" .so lib/ssl.man .so lib/ssl-peer-ca-cert.man .ds DD .so lib/daemon.man .so lib/vlog.man .so lib/unixctl.man .so lib/common.man .so so lib/ofp-version.man . .SH EXAMPLES .PP To bind locally to port 6633 (the default) and wait for incoming connections from OpenFlow switches: .IP \fB% ovs\-controller ptcp:\fR .SH "BUGS" .PP Configuring a Citrix XenServer to connect to a particular controller only points the remote OVSDB management connection to that controller. It does not also configure OpenFlow connections, because the manager is expected to do that over the management protocol. \fBovs\-controller\fR is not an Open vSwitch manager and does not know how to do that. .PP As a stopgap workaround, \fBovs\-vsctl\fR can wait for an OVSDB connection and set the controller, e.g.: .IP \fB% ovs\-vsctl \-t0 \-\-db=pssl: \-\-certificate=cert.pem \-\-ca\-cert=none \-\-private\-key=privkey.pem \-\-peer\-ca\-cert=cacert.pem set\-controller ssl:\fIip\fR .SH "SEE ALSO" . .BR ovs\-appctl (8), .BR ovs\-ofctl (8), .BR ovs\-dpctl (8) openvswitch-2.0.1+git20140120/utilities/ovs-controller.c000066400000000000000000000276551226605124000226400ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include "command-line.h" #include "compiler.h" #include "daemon.h" #include "learning-switch.h" #include "ofp-parse.h" #include "ofp-version-opt.h" #include "ofpbuf.h" #include "openflow/openflow.h" #include "poll-loop.h" #include "rconn.h" #include "simap.h" #include "stream-ssl.h" #include "timeval.h" #include "unixctl.h" #include "util.h" #include "vconn.h" #include "vlog.h" #include "socket-util.h" #include "ofp-util.h" VLOG_DEFINE_THIS_MODULE(controller); #define MAX_SWITCHES 16 #define MAX_LISTENERS 16 struct switch_ { struct lswitch *lswitch; }; /* -H, --hub: Learn the ports on which MAC addresses appear? */ static bool learn_macs = true; /* -n, --noflow: Set up flows? (If not, every packet is processed at the * controller.) */ static bool set_up_flows = true; /* -N, --normal: Use "NORMAL" action instead of explicit port? */ static bool action_normal = false; /* -w, --wildcard: 0 to disable wildcard flow entries, an OFPFW10_* bitmask to * enable specific wildcards, or UINT32_MAX to use the default wildcards. */ static uint32_t wildcards = 0; /* --max-idle: Maximum idle time, in seconds, before flows expire. */ static int max_idle = 60; /* --mute: If true, accept connections from switches but do not reply to any * of their messages (for debugging fail-open mode). */ static bool mute = false; /* -q, --queue: default OpenFlow queue, none if UINT32_MAX. */ static uint32_t default_queue = UINT32_MAX; /* -Q, --port-queue: map from port name to port number. */ static struct simap port_queues = SIMAP_INITIALIZER(&port_queues); /* --with-flows: Flows to send to switch. */ static struct ofputil_flow_mod *default_flows; static size_t n_default_flows; static enum ofputil_protocol usable_protocols; /* --unixctl: Name of unixctl socket, or null to use the default. */ static char *unixctl_path = NULL; static void new_switch(struct switch_ *, struct vconn *); static void parse_options(int argc, char *argv[]); static void usage(void) NO_RETURN; int main(int argc, char *argv[]) { struct unixctl_server *unixctl; struct switch_ switches[MAX_SWITCHES]; struct pvconn *listeners[MAX_LISTENERS]; int n_switches, n_listeners; int retval; int i; proctitle_init(argc, argv); set_program_name(argv[0]); parse_options(argc, argv); signal(SIGPIPE, SIG_IGN); if (argc - optind < 1) { ovs_fatal(0, "at least one vconn argument required; " "use --help for usage"); } n_switches = n_listeners = 0; for (i = optind; i < argc; i++) { const char *name = argv[i]; struct vconn *vconn; retval = vconn_open(name, get_allowed_ofp_versions(), DSCP_DEFAULT, &vconn); if (!retval) { if (n_switches >= MAX_SWITCHES) { ovs_fatal(0, "max %d switch connections", n_switches); } new_switch(&switches[n_switches++], vconn); continue; } else if (retval == EAFNOSUPPORT) { struct pvconn *pvconn; retval = pvconn_open(name, get_allowed_ofp_versions(), DSCP_DEFAULT, &pvconn); if (!retval) { if (n_listeners >= MAX_LISTENERS) { ovs_fatal(0, "max %d passive connections", n_listeners); } listeners[n_listeners++] = pvconn; } } if (retval) { VLOG_ERR("%s: connect: %s", name, ovs_strerror(retval)); } } if (n_switches == 0 && n_listeners == 0) { ovs_fatal(0, "no active or passive switch connections"); } daemonize_start(); retval = unixctl_server_create(unixctl_path, &unixctl); if (retval) { exit(EXIT_FAILURE); } daemonize_complete(); while (n_switches > 0 || n_listeners > 0) { /* Accept connections on listening vconns. */ for (i = 0; i < n_listeners && n_switches < MAX_SWITCHES; ) { struct vconn *new_vconn; retval = pvconn_accept(listeners[i], &new_vconn); if (!retval || retval == EAGAIN) { if (!retval) { new_switch(&switches[n_switches++], new_vconn); } i++; } else { pvconn_close(listeners[i]); listeners[i] = listeners[--n_listeners]; } } /* Do some switching work. . */ for (i = 0; i < n_switches; ) { struct switch_ *this = &switches[i]; lswitch_run(this->lswitch); if (lswitch_is_alive(this->lswitch)) { i++; } else { lswitch_destroy(this->lswitch); switches[i] = switches[--n_switches]; } } unixctl_server_run(unixctl); /* Wait for something to happen. */ if (n_switches < MAX_SWITCHES) { for (i = 0; i < n_listeners; i++) { pvconn_wait(listeners[i]); } } for (i = 0; i < n_switches; i++) { struct switch_ *sw = &switches[i]; lswitch_wait(sw->lswitch); } unixctl_server_wait(unixctl); poll_block(); } return 0; } static void new_switch(struct switch_ *sw, struct vconn *vconn) { struct lswitch_config cfg; struct rconn *rconn; rconn = rconn_create(60, 0, DSCP_DEFAULT, get_allowed_ofp_versions()); rconn_connect_unreliably(rconn, vconn, NULL); cfg.mode = (action_normal ? LSW_NORMAL : learn_macs ? LSW_LEARN : LSW_FLOOD); cfg.wildcards = wildcards; cfg.max_idle = set_up_flows ? max_idle : -1; cfg.default_flows = default_flows; cfg.n_default_flows = n_default_flows; cfg.usable_protocols = usable_protocols; cfg.default_queue = default_queue; cfg.port_queues = &port_queues; cfg.mute = mute; sw->lswitch = lswitch_create(rconn, &cfg); } static void add_port_queue(char *s) { char *save_ptr = NULL; char *port_name; char *queue_id; port_name = strtok_r(s, ":", &save_ptr); queue_id = strtok_r(NULL, "", &save_ptr); if (!queue_id) { ovs_fatal(0, "argument to -Q or --port-queue should take the form " "\":\""); } if (!simap_put(&port_queues, port_name, atoi(queue_id))) { ovs_fatal(0, " arguments for -Q or --port-queue must " "be unique"); } } static void parse_options(int argc, char *argv[]) { enum { OPT_MAX_IDLE = UCHAR_MAX + 1, OPT_PEER_CA_CERT, OPT_MUTE, OPT_WITH_FLOWS, OPT_UNIXCTL, VLOG_OPTION_ENUMS, DAEMON_OPTION_ENUMS, OFP_VERSION_OPTION_ENUMS }; static const struct option long_options[] = { {"hub", no_argument, NULL, 'H'}, {"noflow", no_argument, NULL, 'n'}, {"normal", no_argument, NULL, 'N'}, {"wildcards", optional_argument, NULL, 'w'}, {"max-idle", required_argument, NULL, OPT_MAX_IDLE}, {"mute", no_argument, NULL, OPT_MUTE}, {"queue", required_argument, NULL, 'q'}, {"port-queue", required_argument, NULL, 'Q'}, {"with-flows", required_argument, NULL, OPT_WITH_FLOWS}, {"unixctl", required_argument, NULL, OPT_UNIXCTL}, {"help", no_argument, NULL, 'h'}, DAEMON_LONG_OPTIONS, OFP_VERSION_LONG_OPTIONS, VLOG_LONG_OPTIONS, STREAM_SSL_LONG_OPTIONS, {"peer-ca-cert", required_argument, NULL, OPT_PEER_CA_CERT}, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); for (;;) { int indexptr; char *error; int c; c = getopt_long(argc, argv, short_options, long_options, &indexptr); if (c == -1) { break; } switch (c) { case 'H': learn_macs = false; break; case 'n': set_up_flows = false; break; case OPT_MUTE: mute = true; break; case 'N': action_normal = true; break; case 'w': wildcards = optarg ? strtol(optarg, NULL, 16) : UINT32_MAX; break; case OPT_MAX_IDLE: if (!strcmp(optarg, "permanent")) { max_idle = OFP_FLOW_PERMANENT; } else { max_idle = atoi(optarg); if (max_idle < 1 || max_idle > 65535) { ovs_fatal(0, "--max-idle argument must be between 1 and " "65535 or the word 'permanent'"); } } break; case 'q': default_queue = atoi(optarg); break; case 'Q': add_port_queue(optarg); break; case OPT_WITH_FLOWS: error = parse_ofp_flow_mod_file(optarg, OFPFC_ADD, &default_flows, &n_default_flows, &usable_protocols); if (error) { ovs_fatal(0, "%s", error); } break; case OPT_UNIXCTL: unixctl_path = optarg; break; case 'h': usage(); VLOG_OPTION_HANDLERS OFP_VERSION_OPTION_HANDLERS DAEMON_OPTION_HANDLERS STREAM_SSL_OPTION_HANDLERS case OPT_PEER_CA_CERT: stream_ssl_set_peer_ca_cert_file(optarg); break; case '?': exit(EXIT_FAILURE); default: abort(); } } free(short_options); if (!simap_is_empty(&port_queues) || default_queue != UINT32_MAX) { if (action_normal) { ovs_error(0, "queue IDs are incompatible with -N or --normal; " "not using OFPP_NORMAL"); action_normal = false; } if (!learn_macs) { ovs_error(0, "queue IDs are incompatible with -H or --hub; " "not acting as hub"); learn_macs = true; } } } static void usage(void) { printf("%s: OpenFlow controller\n" "usage: %s [OPTIONS] METHOD\n" "where METHOD is any OpenFlow connection method.\n", program_name, program_name); vconn_usage(true, true, false); daemon_usage(); ofp_version_usage(); vlog_usage(); printf("\nOther options:\n" " -H, --hub act as hub instead of learning switch\n" " -n, --noflow pass traffic, but don't add flows\n" " --max-idle=SECS max idle time for new flows\n" " -N, --normal use OFPP_NORMAL action\n" " -w, --wildcards[=MASK] wildcard (specified) bits in flows\n" " -q, --queue=QUEUE-ID OpenFlow queue ID to use for output\n" " -Q PORT-NAME:QUEUE-ID use QUEUE-ID for frames from PORT-NAME\n" " --with-flows FILE use the flows from FILE\n" " --unixctl=SOCKET override default control socket name\n" " -h, --help display this help message\n" " -V, --version display version information\n"); exit(EXIT_SUCCESS); } openvswitch-2.0.1+git20140120/utilities/ovs-ctl.8000066400000000000000000000334701226605124000211540ustar00rootroot00000000000000.\" -*- nroff -*- .de IQ . br . ns . IP "\\$1" .. .de ST . PP . RS -0.15in . I "\\$1" . RE .. .TH ovs\-ctl 8 "June 2011" "Open vSwitch" "Open vSwitch Manual" .ds PN ovs\-ctl . .SH NAME ovs\-ctl \- OVS startup helper script . .SH SYNOPSIS \fBovs\-ctl\fR \fB\-\-system\-id=random\fR|\fIuuid\fR [\fIoptions\fR] \fBstart .br \fBovs\-ctl stop .br \fBovs\-ctl status .br \fBovs\-ctl version .br \fBovs\-ctl [\fIoptions\fR] \fBload\-kmod\fR .br \fBovs\-ctl \fB\-\-system\-id=random\fR|\fIuuid\fR [\fIoptions\fR] \fBforce\-reload\-kmod\fR .br \fBovs\-ctl \fR[\fB\-\-protocol=\fIprotocol\fR] [\fB\-\-sport=\fIsport\fR] [\fB\-\-dport=\fIdport\fR] \fBenable\-protocol\fR .br \fBovs\-ctl help \fR| \fB\-h \fR| \fB\-\-help .br \fBovs\-ctl \-\-version . .SH DESCRIPTION . .PP The \fBovs\-ctl\fR program starts, stops, and checks the status of Open vSwitch daemons. It is not meant to be invoked directly by system administrators but to be called internally by system startup scripts. . .PP Each of \fBovs\-ctl\fR's commands is described separately below. . .SH "The ``start'' command" . .PP The \fBstart\fR command starts Open vSwitch. It performs the following tasks: . .IP 1. Loads the Open vSwitch kernel module. If this fails, and the Linux bridge module is loaded but no bridges exist, it tries to unload the bridge module and tries loading the Open vSwitch kernel module again. (This is because the Open vSwitch kernel module cannot coexist with the Linux bridge module before 2.6.37.) . .PP The \fBstart\fR command skips the following steps if \fBovsdb\-server\fR is already running: .IP 2. If the Open vSwitch database file does not exist, it creates it. If the database does exist, but it has an obsolete version, it upgrades it to the latest schema. . .IP 3. Starts \fBovsdb-server\fR. . .IP 4. Initializes a few values inside the database. . .IP 5. If the \fB\-\-delete\-bridges\fR option was used, deletes all of the bridges from the database. . .PP The \fBstart\fR command skips the following step if \fBovs\-vswitchd\fR is already running: .IP 6. Starts \fBovs\-vswitchd\fR. . .SS "Options" .PP Several command-line options influence the \fBstart\fR command's behavior. Some form of the following option should ordinarily be specified: . .IP "\fB\-\-system\-id=\fIuuid\fR" .IQ "\fB\-\-system\-id=random\fR" This specifies a unique system identifier to store into \fBexternal-ids:system-id\fR in the database's \fBOpen_vSwitch\fR table. Remote managers that talk to the Open vSwitch database server over network protocols use this value to identify and distinguish Open vSwitch instances, so it should be unique (at least) within OVS instances that will connect to a single controller. .IP When \fBrandom\fR is specified, \fBovs\-ctl\fR will generate a random ID that persists from one run to another (stored in a file). When another string is specified \fBovs\-ctl\fR uses it literally. . .PP The following options should be specified if the defaults are not suitable: . .IP "\fB\-\-system\-type=\fItype\fR" .IQ "\fB\-\-system\-version=\fIversion\fR" Sets the value to store in the \fBsystem-type\fR and \fBsystem-version\fR columns, respectively, in the database's \fBOpen_vSwitch\fR table. Remote managers may use these values to determine the kind of system to which they are connected (primarily for display to human administrators). .IP When not specified, \fBovs\-ctl\fR uses values from the optional \fBsystem\-type.conf\fR and \fBsystem\-version.conf\fR files(see section \fBFILES\fR) or it uses the \fBlsb_release\fR program, if present, to provide reasonable defaults. . .PP The following options are also likely to be useful: . .IP "\fB\-\-external\-id=\(dq\fIname\fB=\fIvalue\fB\(dq" Sets \fBexternal-ids:\fIname\fR to \fIvalue\fR in the database's \fBOpen_vSwitch\fR table. Specifying this option multiple times adds multiple key-value pairs. . .IP "\fB\-\-delete\-bridges\fR" Ordinarily Open vSwitch bridges persist from one system boot to the next, as long as the database is preserved. Some environments instead expect to re-create all of the bridges and other configuration state on every boot. This option supports that, by deleting all Open vSwitch bridges after starting \fBovsdb\-server\fR but before starting \fBovs\-vswitchd\fR. . .PP The following options are less important: . .IP "\fB\-\-daemon-cwd=\fIdirectory\fR" Specifies the current working directory that the OVS daemons should run from. The default is \fB/\fR (the root directory) if this option is not specified. (This option is useful because most systems create core files in a process's current working directory and because a file system that is in use as a process's current working directory cannot be unmounted.) . .IP "\fB\-\-no\-force\-corefiles\fR" By default, \fBovs\-ctl\fR enables core dumps for the OVS daemons. This option disables that behavior. . .IP "\fB\-\-no\-mlockall\fR" By default \fBovs\-ctl\fR passes \fB\-\-mlockall\fR to \fBovs\-vswitchd\fR, requesting that it lock all of its virtual memory, preventing it from being paged to disk. This option suppresses that behavior. . .IP "\fB\-\-ovsdb\-server\-priority=\fIniceness\fR" .IQ "\fB\-\-ovs\-vswitchd\-priority=\fIniceness\fR" Sets the \fBnice\fR(1) level used for each daemon. All of them default to \fB\-10\fR. . .IP "\fB\-\-ovsdb\-server\-wrapper=\fIwrapper\fR" .IQ "\fB\-\-ovs\-vswitchd\-wrapper=\fIwrapper\fR" . Configures the specified daemon to run under \fIwrapper\fR, which is one of the following: . .RS .IP "\fBvalgrind\fR" Run the daemon under \fBvalgrind\fR(1), if it is installed, logging to \fIdaemon\fB.valgrind.log.\fIpid\fR in the log directory. . .IP "\fBstrace\fR" Run the daemon under \fBstrace\fR(1), if it is installed, logging to \fIdaemon\fB.strace.log.\fIpid\fR in the log directory. . .IP "\fBglibc\fR" Enable GNU C library features designed to find memory errors. .RE . .IP By default, no wrapper is used. . .IP Each of the wrappers can expose bugs in Open vSwitch that lead to incorrect operation, including crashes. The \fBvalgrind\fR and \fBstrace\fR wrappers greatly slow daemon operations so they should not be used in production. They also produce voluminous logs that can quickly fill small disk partitions. The \fBglibc\fR wrapper is less resource-intensive but still somewhat slows the daemons. . .PP The following options control file locations. They should only be used if the default locations cannot be used. See \fBFILES\fR, below, for more information. . .IP "\fB\-\-db\-file=\fIfile\fR" Overrides the file name for the OVS database. . .IP "\fB\-\-db\-sock=\fIsocket\fR" Overrides the file name for the Unix domain socket used to connect to \fBovsdb\-server\fR. . .IP "\fB\-\-db\-schema=\fIschema\fR" Overrides the file name for the OVS database schema. . .IP "\fB\-\-extra-dbs=\fIfile\fR" Adds \fIfile\fR as an extra database for \fBovsdb\-server\fR to serve out. Multiple space-separated file names may also be specified. \fIfile\fR should begin with \fB/\fR; if it does not, then it will be taken as relative to \fIdbdir\fR. . .SH "The ``stop'' command" . .PP The \fBstop\fR command does not unload the Open vSwitch kernel modules. . .PP This command does nothing and finishes successfully if the OVS daemons aren't running. . .SH "The ``restart'' command" . .PP The \fBrestart\fR command performs a \fBstop\fR followed by a \fBstart\fR command. The command can take the same options as that of the \fBstart\fR command. In addition, it saves and restores Openflow flows for each individual bridge. . .SH "The ``status'' command" . .PP The \fBstatus\fR command checks whether the OVS daemons \fBovs-vswitchd\fR and \fBovsdb\-server\fR are running and prints messages with that information. It exits with status 0 if the daemons are running, 1 otherwise. . .SH "The ``version'' command" . .PP The \fBversion\fR command runs \fBovsdb\-server \-\-version\fR and \fBovs\-vswitchd \-\-version\fR. . .SH "The ``force\-reload\-kmod'' command" . .PP The \fBforce\-reload\-kmod\fR command allows upgrading the Open vSwitch kernel module without rebooting. It performs the following tasks: . .IP 1. Gets a list of OVS ``internal'' interfaces, that is, network devices implemented by Open vSwitch. The most common examples of these are bridge ``local ports''. . .IP 2. Saves the Openflow flows of each bridge. . .IP 3. Stops the Open vSwitch daemons, as if by a call to \fBovs\-ctl stop\fR. . .IP 4. Saves the kernel configuration state of the OVS internal interfaces listed in step 1, including IP and IPv6 addresses and routing table entries. . .IP 5. Unloads the Open vSwitch kernel module (including the bridge compatibility module if it is loaded). . .IP 6. Starts OVS back up, as if by a call to \fBovs\-ctl start\fR. This reloads the kernel module, restarts the OVS daemons and finally restores the saved Openflow flows. . .IP 7. Restores the kernel configuration state that was saved in step 4. . .IP 8. Checks for daemons that may need to be restarted because they have packet sockets that are listening on old instances of Open vSwitch kernel interfaces and, if it finds any, prints a warning on stdout. DHCP is a common example: if the ISC DHCP client is running on an OVS internal interface, then it will have to be restarted after completing the above procedure. (It would be nice if \fBovs\-ctl\fR could restart daemons automatically, but the details are far too specific to a particular distribution and installation.) . .PP \fBforce\-kmod\-reload\fR internally stops and starts OVS, so it accepts all of the options accepted by the \fBstart\fR command. . .SH "The ``load\-kmod'' command" . .PP The \fBload\-kmod\fR command loads the openvswitch kernel modules if they are not already loaded. This operation also occurs as part of the \fBstart\fR command. The motivation for providing the \fBload\-kmod\fR command is to allow errors when loading modules to be handled separatetly from other errors that may occur when running the \fBstart\fR command. . .PP By default the \fBload\-kmod\fR command attempts to load the openvswitch kernel module. . .SH "The ``enable\-protocol'' command" . .PP The \fBenable\-protocol\fR command checks for rules related to a specified protocol in the system's \fBiptables\fR(8) configuration. If there are no rules specifically related to that protocol, then it inserts a rule to accept the specified protocol. . .PP More specifically: . .IP \(bu If \fBiptables\fR is not installed or not enabled, this command does nothing, assuming that lack of filtering means that the protocol is enabled. . .IP \(bu If the \fBINPUT\fR chain has a rule that matches the specified protocol, then this command does nothing, assuming that whatever rule is installed reflects the system administrator's decisions. . .IP \(bu Otherwise, this command installs a rule that accepts traffic of the specified protocol. . .PP This command normally completes successfully, even if it does nothing. Only the failure of an attempt to insert a rule normally causes it to return an exit code other than 0. . The following options control the protocol to be enabled: . .IP "\fB\-\-protocol=\fIprotocol\fR" The name of the IP protocol to be enabled, such as \fBgre\fR or \fBtcp\fR. The default is \fBgre\fR. . .IP "\fB\-\-sport=\fIsport\fR" .IQ "\fB\-\-dport=\fIdport\fR" TCP or UDP source or destination port to match. These are optional and allowed only with \fB\-\-protocol=tcp\fR or \fB\-\-protocol=udp\fR. . .SH "The ``help'' command" . Prints a usage message and exits successfully. . .SH "OPTIONS" .PP In addition to the options listed for each command above, this option controls the behavior of several of \fBovs\-ctl\fR's commands. . .SH "EXIT STATUS" . \fBovs\-ctl\fR exits with status 0 on success and nonzero on failure. The \fBstart\fR command is considered to succeed if OVS is already started; the \fBstop\fR command is considered to succeed if OVS is already stopped. . .SH "ENVIRONMENT" . The following environment variables affect \fBovs\-ctl\fR: . .IP "\fBPATH\fR" \fBovs\-ctl\fR does not hardcode the location of any of the programs that it runs. \fBovs\-ctl\fR will add the \fIsbindir\fR and \fIbindir\fR that were specified at \fBconfigure\fR time to \fBPATH\fR, if they are not already present. . .IP "\fBOVS_LOGDIR\fR" .IQ "\fBOVS_RUNDIR\fR" .IQ "\fBOVS_DBDIR\fR" .IQ "\fBOVS_SYSCONFDIR\fR" .IQ "\fBOVS_PKGDATADIR\fR" .IQ "\fBOVS_BINDIR\fR" .IQ "\fBOVS_SBINDIR\fR" Setting one of these variables in the environment overrides the respective \fBconfigure\fR option, both for \fBovs\-ctl\fR itself and for the other Open vSwitch programs that it runs. . .SH "FILES" . \fBovs\-ctl\fR uses the following files: . .IP "\fBovs\-lib" Shell function library used internally by \fBovs\-ctl\fR. It must be installed in the same directory as \fBovs\-ctl\fR. . .IP "\fIlogdir\fB/\fIdaemon\fB.log\fR" Per-daemon logfiles. . .IP "\fIrundir\fB/\fIdaemon\fB.pid\fR" Per-daemon pidfiles to track whether a daemon is running and with what process ID. . .IP "\fIpkgdatadir\fB/vswitch.ovsschema\fR" The OVS database schema used to initialize the database (use \fB\-\-db\-schema to override this location). . .IP "\fIdbdir\fB/conf.db\fR" The OVS database (use \fB\-\-db\-file\fR to override this location). . .IP "\fIrundir\fB/openvswitch/db.sock\fR" The Unix domain socket used for local communication with \fBovsdb\-server\fR (use \fB\-\-db\-sock\fR to override this location). . .IP "\fIsysconfdir\fB/openvswitch/system-id.conf\fR" The persistent system UUID created and read by \fB\-\-system\-id=random\fR. . .IP "\fIsysconfdir\fB/openvswitch/system\-type.conf\fR" .IQ "\fIsysconfdir\fB/openvswitch/system\-version.conf\fR" The \fBsystem\-type\fR and \fBsystem\-version\fR values stored in the database's \fBOpen_vSwitch\fR table when not specified as a command-line option. . .SH "EXAMPLE" . .PP The files \fBdebian/openvswitch\-switch.init\fR and \fBxenserver/etc_init.d_openvswitch\fR in the Open vSwitch source distribution are good examples of how to use \fBovs\-ctl\fR. . .SH "SEE ALSO" . \fBREADME\fR, \fBINSTALL.Linux\fR, \fBovsdb\-server\fR(8), \fBovs\-vswitchd\fR(8). openvswitch-2.0.1+git20140120/utilities/ovs-ctl.in000077500000000000000000000541561226605124000214220ustar00rootroot00000000000000#! /bin/sh # Copyright (C) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. case $0 in */*) dir0=`echo "$0" | sed 's,/[^/]*$,,'` ;; *) dir0=./ ;; esac . "$dir0/ovs-lib" || exit 1 for dir in "$sbindir" "$bindir" /sbin /bin /usr/sbin /usr/bin; do case :$PATH: in *:$dir:*) ;; *) PATH=$PATH:$dir ;; esac done ## ----- ## ## start ## ## ----- ## insert_mod_if_required () { # If openvswitch is already loaded then we're done. test -e /sys/module/openvswitch -o -e /sys/module/openvswitch_mod && \ return 0 # Load openvswitch. If that's successful then we're done. action "Inserting openvswitch module" modprobe openvswitch && return 0 # If the bridge module is loaded, then that might be blocking # openvswitch. Try to unload it, if there are no bridges. test -e /sys/module/bridge || return 1 bridges=`echo /sys/class/net/*/bridge | sed 's,/sys/class/net/,,g;s,/bridge,,g'` if test "$bridges" != "*"; then log_warning_msg "not removing bridge module because bridges exist ($bridges)" return 1 fi action "removing bridge module" rmmod bridge || return 1 # Try loading openvswitch again. action "Inserting openvswitch module" modprobe openvswitch } ovs_vsctl () { ovs-vsctl --no-wait "$@" } ovsdb_tool () { ovsdb-tool -vconsole:off "$@" } create_db () { action "Creating empty database $DB_FILE" ovsdb_tool create "$DB_FILE" "$DB_SCHEMA" } upgrade_db () { schemaver=`ovsdb_tool schema-version "$DB_SCHEMA"` if test ! -e "$DB_FILE"; then log_warning_msg "$DB_FILE does not exist" install -d -m 755 -o root -g root `dirname $DB_FILE` create_db elif test X"`ovsdb_tool needs-conversion "$DB_FILE" "$DB_SCHEMA"`" != Xno; then # Back up the old version. version=`ovsdb_tool db-version "$DB_FILE"` cksum=`ovsdb_tool db-cksum "$DB_FILE" | awk '{print $1}'` backup=$DB_FILE.backup$version-$cksum action "Backing up database to $backup" cp "$DB_FILE" "$backup" || return 1 # Compact database. This is important if the old schema did not enable # garbage collection (i.e. if it did not have any tables with "isRoot": # true) but the new schema does. In that situation the old database # may contain a transaction that creates a record followed by a # transaction that creates the first use of the record. Replaying that # series of transactions against the new database schema (as "convert" # does) would cause the record to be dropped by the first transaction, # then the second transaction would cause a referential integrity # failure (for a strong reference). # # Errors might occur on an Open vSwitch downgrade if ovsdb-tool doesn't # understand some feature of the schema used in the OVSDB version that # we're downgrading from, so we don't give up on error. action "Compacting database" ovsdb_tool compact "$DB_FILE" # Upgrade or downgrade schema. if action "Converting database schema" ovsdb_tool convert "$DB_FILE" "$DB_SCHEMA"; then : else log_warning_msg "Schema conversion failed, using empty database instead" rm -f "$DB_FILE" create_db fi fi } set_system_ids () { set ovs_vsctl set Open_vSwitch . OVS_VERSION=`ovs-vswitchd --version | sed 's/.*) //;1q'` set "$@" ovs-version="$OVS_VERSION" case $SYSTEM_ID in random) id_file=$etcdir/system-id.conf uuid_file=$etcdir/install_uuid.conf if test -e "$id_file"; then SYSTEM_ID=`cat "$id_file"` elif test -e "$uuid_file"; then # Migrate from old file name. . "$uuid_file" SYSTEM_ID=$INSTALLATION_UUID echo "$SYSTEM_ID" > "$id_file" elif SYSTEM_ID=`uuidgen`; then echo "$SYSTEM_ID" > "$id_file" else log_failure_msg "missing uuidgen, could not generate system ID" fi ;; '') log_failure_msg "system ID not configured, please use --system-id" ;; *) ;; esac set "$@" external-ids:system-id="\"$SYSTEM_ID\"" if test X"$SYSTEM_TYPE" != X; then set "$@" system-type="\"$SYSTEM_TYPE\"" else log_failure_msg "no default system type, please use --system-type" fi if test X"$SYSTEM_VERSION" != X; then set "$@" system-version="\"$SYSTEM_VERSION\"" else log_failure_msg "no default system version, please use --system-version" fi action "Configuring Open vSwitch system IDs" "$@" $extra_ids } check_force_cores () { if test X"$FORCE_COREFILES" = Xyes; then ulimit -c 67108864 fi } start_ovsdb () { check_force_cores if daemon_is_running ovsdb-server; then log_success_msg "ovsdb-server is already running" else # Create initial database or upgrade database schema. upgrade_db || return 1 # Start ovsdb-server. set ovsdb-server "$DB_FILE" for db in $EXTRA_DBS; do case $db in /*) ;; *) db=$dbdir/$db ;; esac if test ! -f "$db"; then log_warning_msg "$db (from \$EXTRA_DBS) does not exist." elif ovsdb-tool db-version "$db" >/dev/null; then set "$@" "$db" else log_warning_msg "$db (from \$EXTRA_DBS) cannot be read as a database (see error message above)" fi done set "$@" -vconsole:emer -vsyslog:err -vfile:info set "$@" --remote=punix:"$DB_SOCK" set "$@" --private-key=db:Open_vSwitch,SSL,private_key set "$@" --certificate=db:Open_vSwitch,SSL,certificate set "$@" --bootstrap-ca-cert=db:Open_vSwitch,SSL,ca_cert start_daemon "$OVSDB_SERVER_PRIORITY" "$OVSDB_SERVER_WRAPPER" "$@" \ || return 1 # Initialize database settings. ovs_vsctl -- init -- set Open_vSwitch . db-version="$schemaver" \ || return 1 set_system_ids || return 1 if test X"$DELETE_BRIDGES" = Xyes; then for bridge in `ovs_vsctl list-br`; do ovs_vsctl del-br $bridge done fi fi } add_managers () { # Now that ovs-vswitchd has started and completed its initial # configuration, tell ovsdb-server to conenct to the remote managers. We # used to do this at ovsdb-server startup time, but waiting for # ovs-vswitchd to finish configuring means that remote managers see less # churn in the database at startup or restart. (For example, managers # won't briefly see empty datapath-id or ofport columns for records that # exist at startup.) action "Enabling remote OVSDB managers" \ ovs-appctl -t ovsdb-server ovsdb-server/add-remote \ db:Open_vSwitch,Open_vSwitch,manager_options } start_forwarding () { check_force_cores insert_mod_if_required || return 1 if daemon_is_running ovs-vswitchd; then log_success_msg "ovs-vswitchd is already running" else # Increase the limit on the number of open file descriptors. # On Linux, ovs-vswitchd needs about three file descriptors # per bridge and one file descriptor per bridge port, so this # allows a very large number of bridges and ports. ulimit -n 7500 # Start ovs-vswitchd. set ovs-vswitchd unix:"$DB_SOCK" set "$@" -vconsole:emer -vsyslog:err -vfile:info if test X"$MLOCKALL" != Xno; then set "$@" --mlockall fi start_daemon "$OVS_VSWITCHD_PRIORITY" "$OVS_VSWITCHD_WRAPPER" "$@" fi } ## ---- ## ## stop ## ## ---- ## stop_ovsdb () { stop_daemon ovsdb-server } stop_forwarding () { stop_daemon ovs-vswitchd } ## ----------------- ## ## force-reload-kmod ## ## ----------------- ## internal_interfaces () { # Outputs a list of internal interfaces: # # - There is an internal interface for every bridge, whether it # has an Interface record or not and whether the Interface # record's 'type' is properly set or not. # # - There is an internal interface for each Interface record whose # 'type' is 'internal'. # # But ignore interfaces that don't really exist. for d in `(ovs_vsctl --bare \ -- --columns=name find Interface type=internal \ -- list-br) | sort -u` do if test -e "/sys/class/net/$d"; then printf "%s " "$d" fi done } ovs_save () { bridges=`ovs_vsctl -- --real list-br` if [ -n "${bridges}" ] && \ "$datadir/scripts/ovs-save" "$1" ${bridges} > "$2"; then chmod +x "$2" return 0 fi [ -z "${bridges}" ] && return 0 } save_ofports_if_required () { # Save ofports if we are upgrading from a pre-1.10 branch. case `ovs-appctl version | sed 1q` in "ovs-vswitchd (Open vSwitch) 1."[0-9].*) action "Saving ofport values" ovs_save save-ofports \ "${script_ofports}" ;; esac } save_interfaces () { "$datadir/scripts/ovs-save" save-interfaces ${ifaces} \ > "${script_interfaces}" } restore_ofports () { [ -x "${script_ofports}" ] && \ action "Restoring ofport values" "${script_ofports}" } flow_restore_wait () { ovs_vsctl set open_vswitch . other_config:flow-restore-wait="true" } flow_restore_complete () { ovs_vsctl --if-exists remove open_vswitch . other_config \ flow-restore-wait="true" } restore_flows () { [ -x "${script_flows}" ] && \ action "Restoring saved flows" "${script_flows}" } restore_interfaces () { [ ! -x "${script_interfaces}" ] && return 0 action "Restoring interface configuration" "${script_interfaces}" rc=$? if test $rc = 0; then level=debug else level=err fi log="logger -p daemon.$level -t ovs-save" $log "interface restore script exited with status $rc:" $log -f "$script_interfaces" } init_restore_scripts () { script_interfaces=`mktemp` script_flows=`mktemp` script_ofports=`mktemp` trap 'rm -f "${script_interfaces}" "${script_flows}" "${script_ofports}"' 0 } force_reload_kmod () { ifaces=`internal_interfaces` action "Detected internal interfaces: $ifaces" true init_restore_scripts action "Saving flows" ovs_save save-flows "${script_flows}" save_ofports_if_required # Restart the database first, since a large database may take a # while to load, and we want to minimize forwarding disruption. stop_ovsdb start_ovsdb # Restore of ofports should happen before vswitchd is restarted. restore_ofports stop_forwarding if action "Saving interface configuration" save_interfaces; then : else log_warning_msg "Failed to save configuration, not replacing kernel module" start_forwarding add_managers exit 1 fi chmod +x "$script_interfaces" for dp in `ovs-dpctl dump-dps`; do action "Removing datapath: $dp" ovs-dpctl del-dp "$dp" done # try both old and new names in case this is post upgrade if test -e /sys/module/openvswitch_mod; then action "Removing openvswitch module" rmmod openvswitch_mod elif test -e /sys/module/openvswitch; then action "Removing openvswitch module" rmmod openvswitch fi # Start vswitchd by asking it to wait till flow restore is finished. flow_restore_wait start_forwarding # Restore saved flows and inform vswitchd that we are done. restore_flows flow_restore_complete add_managers restore_interfaces "$datadir/scripts/ovs-check-dead-ifs" } ## ------- ## ## restart ## ## ------- ## save_interfaces_if_required () { # Save interfaces if we are upgrading from a pre-1.10 branch. case `ovs-appctl version | sed 1q` in "ovs-vswitchd (Open vSwitch) 1."[0-9].*) ifaces=`internal_interfaces` action "Detected internal interfaces: $ifaces" true if action "Saving interface configuration" save_interfaces; then chmod +x "$script_interfaces" fi ;; esac } restart () { if daemon_is_running ovsdb-server && daemon_is_running ovs-vswitchd; then init_restore_scripts save_interfaces_if_required action "Saving flows" ovs_save save-flows "${script_flows}" save_ofports_if_required fi # Restart the database first, since a large database may take a # while to load, and we want to minimize forwarding disruption. stop_ovsdb start_ovsdb # Restore of ofports, if required, should happen before vswitchd is # restarted. restore_ofports stop_forwarding # Start vswitchd by asking it to wait till flow restore is finished. flow_restore_wait start_forwarding # Restore saved flows and inform vswitchd that we are done. restore_flows flow_restore_complete add_managers # Restore the interfaces if required. Return true even if restore fails. restore_interfaces || true } ## --------------- ## ## enable-protocol ## ## --------------- ## enable_protocol () { # Translate the protocol name to a number, because "iptables -n -L" prints # some protocols by name (despite the -n) and therefore we need to look for # both forms. # # (iptables -S output is more uniform but old iptables doesn't have it.) protonum=`grep "^$PROTOCOL[ ]" /etc/protocols | awk '{print $2}'` if expr X"$protonum" : X'[0-9]\{1,\}$' > /dev/null; then :; else log_failure_msg "unknown protocol $PROTOCOL" return 1 fi name=$PROTOCOL match="(\$2 == \"$PROTOCOL\" || \$2 == $protonum)" insert="iptables -I INPUT -p $PROTOCOL" if test X"$DPORT" != X; then name="$name to port $DPORT" match="$match && /dpt:$DPORT/" insert="$insert --dport $DPORT" fi if test X"$SPORT" != X; then name="$name from port $SPORT" match="$match && /spt:$SPORT/" insert="$insert --sport $SPORT" fi insert="$insert -j ACCEPT" if (iptables -n -L INPUT) >/dev/null 2>&1; then if iptables -n -L INPUT | awk "$match { n++ } END { exit n == 0 }" then # There's already a rule for this protocol. Don't override it. log_success_msg "iptables already has a rule for $name, not explicitly enabling" else action "Enabling $name with iptables" $insert fi elif (iptables --version) >/dev/null 2>&1; then action "cannot list iptables rules, not adding a rule for $name" else action "iptables binary not installed, not adding a rule for $name" fi } ## ---- ## ## main ## ## ---- ## set_defaults () { SYSTEM_ID= DELETE_BRIDGES=no DAEMON_CWD=/ FORCE_COREFILES=yes MLOCKALL=yes OVSDB_SERVER_PRIORITY=-10 OVS_VSWITCHD_PRIORITY=-10 OVSDB_SERVER_WRAPPER= OVS_VSWITCHD_WRAPPER= DB_FILE=$dbdir/conf.db DB_SOCK=$rundir/db.sock DB_SCHEMA=$datadir/vswitch.ovsschema EXTRA_DBS= PROTOCOL=gre DPORT= SPORT= type_file=$etcdir/system-type.conf version_file=$etcdir/system-version.conf if test -e "$type_file" ; then SYSTEM_TYPE=`cat $type_file` SYSTEM_VERSION=`cat $version_file` elif (lsb_release --id) >/dev/null 2>&1; then SYSTEM_TYPE=`lsb_release --id -s` system_release=`lsb_release --release -s` system_codename=`lsb_release --codename -s` SYSTEM_VERSION="${system_release}-${system_codename}" else SYSTEM_TYPE=unknown SYSTEM_VERSION=unknown fi } usage () { set_defaults cat <&2 "$0: unknown option \"$arg\" (use --help for help)" return fi eval $var=\$value } daemons () { echo ovsdb-server ovs-vswitchd } set_defaults extra_ids= command= for arg do case $arg in -h | --help) usage ;; -V | --version) echo "$0 (Open vSwitch) $VERSION" exit 0 ;; --external-id=*) value=`expr X"$arg" : 'X[^=]*=\(.*\)'` case $value in *=*) extra_ids="$extra_ids external-ids:$value" ;; *) echo >&2 "$0: --external-id argument not in the form \"key=value\"" exit 1 ;; esac ;; --[a-z]*=*) option=`expr X"$arg" : 'X--\([^=]*\)'` value=`expr X"$arg" : 'X[^=]*=\(.*\)'` type=string set_option ;; --no-[a-z]*) option=`expr X"$arg" : 'X--no-\(.*\)'` value=no type=bool set_option ;; --[a-z]*) option=`expr X"$arg" : 'X--\(.*\)'` value=yes type=bool set_option ;; -*) echo >&2 "$0: unknown option \"$arg\" (use --help for help)" exit 1 ;; *) if test X"$command" = X; then command=$arg else echo >&2 "$0: exactly one non-option argument required (use --help for help)" exit 1 fi ;; esac done case $command in start) start_ovsdb start_forwarding add_managers ;; stop) stop_forwarding stop_ovsdb ;; restart) restart ;; status) rc=0 for daemon in `daemons`; do daemon_status $daemon || rc=$? done exit $rc ;; version) for daemon in `daemons`; do $daemon --version done ;; force-reload-kmod) force_reload_kmod ;; load-kmod) insert_mod_if_required ;; enable-protocol) enable_protocol ;; help) usage ;; '') echo >&2 "$0: missing command name (use --help for help)" exit 1 ;; *) echo >&2 "$0: unknown command \"$command\" (use --help for help)" exit 1 ;; esac openvswitch-2.0.1+git20140120/utilities/ovs-dev.py000077500000000000000000000220151226605124000214250ustar00rootroot00000000000000#!/usr/bin/python # Copyright (c) 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import optparse import os import shutil import subprocess import sys import tempfile ENV = os.environ HOME = ENV["HOME"] OVS_SRC = HOME + "/ovs" ROOT = HOME + "/root" PATH = "%(ovs)s/utilities:%(ovs)s/ovsdb:%(ovs)s/vswitchd" % {"ovs": OVS_SRC} ENV["CFLAGS"] = "-g -O0" ENV["PATH"] = PATH + ":" + ENV["PATH"] options = None parser = None commands = [] def _sh(*args, **kwargs): print "------> " + " ".join(args) shell = len(args) == 1 if kwargs.get("capture", False): proc = subprocess.Popen(args, stdout=subprocess.PIPE, shell=shell) return proc.stdout.readlines() elif kwargs.get("check", True): subprocess.check_call(args, shell=shell) else: subprocess.call(args, shell=shell) def uname(): return _sh("uname", "-r", capture=True)[0].strip() def conf(): tag() if options.clang: ENV["CC"] = "clang" configure = ["./configure", "--prefix=" + ROOT, "--localstatedir=" + ROOT, "--with-logdir=%s/log" % ROOT, "--with-rundir=%s/run" % ROOT, "--with-linux=/lib/modules/%s/build" % uname(), "--with-dbdir=" + ROOT] if options.werror: configure.append("--enable-Werror") if options.cache_time: configure.append("--enable-cache-time") if options.mandir: configure.append("--mandir=" + options.mandir) _sh("./boot.sh") _sh(*configure) commands.append(conf) def make(args=""): make = "make -s -j 8 " + args try: _sh("cgcc", "--version", capture=True) # XXX: For some reason the clang build doesn't place nicely with # sparse. At some point this needs to be figured out and this check # removed. if not options.clang: make += " C=1" except OSError: pass _sh(make) commands.append(make) def check(): make("check") commands.append(check) def tag(): ctags = ['ctags', '-R', '-f', '.tags'] try: _sh(*(ctags + ['--exclude="datapath/"'])) except: try: _sh(*ctags) # Some versions of ctags don't have --exclude except: pass try: _sh('cscope', '-R', '-b') except: pass commands.append(tag) def kill(): for proc in ["ovs-vswitchd", "ovsdb-server"]: if os.path.exists("%s/run/openvswitch/%s.pid" % (ROOT, proc)): _sh("ovs-appctl", "-t", proc, "exit", check=False) time.sleep(.1) _sh("sudo", "killall", "-q", "-2", proc, check=False) commands.append(kill) def reset(): kill() if os.path.exists(ROOT): shutil.rmtree(ROOT) for dp in _sh("ovs-dpctl dump-dps", capture=True): _sh("ovs-dpctl", "del-dp", dp.strip()) commands.append(reset) def run(): kill() for d in ["log", "run"]: d = "%s/%s" % (ROOT, d) shutil.rmtree(d, ignore_errors=True) os.makedirs(d) pki_dir = ROOT + "/pki" if not os.path.exists(pki_dir): os.mkdir(pki_dir) os.chdir(pki_dir) _sh("ovs-pki init") _sh("ovs-pki req+sign ovsclient") os.chdir(OVS_SRC) if not os.path.exists(ROOT + "/conf.db"): _sh("ovsdb-tool", "create", ROOT + "/conf.db", OVS_SRC + "/vswitchd/vswitch.ovsschema") opts = ["--pidfile", "--log-file", "--enable-dummy"] _sh(*(["ovsdb-server", "--remote=punix:%s/run/db.sock" % ROOT, "--remote=db:Open_vSwitch,Open_vSwitch,manager_options", "--private-key=db:Open_vSwitch,SSL,private_key", "--certificate=db:Open_vSwitch,SSL,certificate", "--bootstrap-ca-cert=db:Open_vSwitch,SSL,ca_cert", "--detach", "-vconsole:off"] + opts)) _sh("ovs-vsctl --no-wait --bootstrap set-ssl %s/ovsclient-privkey.pem" \ " %s/ovsclient-cert.pem %s/vswitchd.cacert" % (pki_dir, pki_dir, pki_dir)) version = _sh("ovs-vsctl --no-wait --version", capture=True) version = version[0].strip().split()[3] root_uuid = _sh("ovs-vsctl --no-wait --bare list Open_vSwitch", capture=True)[0].strip() _sh("ovs-vsctl --no-wait set Open_vSwitch %s ovs_version=%s" % (root_uuid, version)) cmd = [OVS_SRC + "/vswitchd/ovs-vswitchd"] if options.gdb: cmd = ["gdb", "--args"] + cmd elif options.valgrind: cmd = ["valgrind", "--track-origins=yes", "--suppressions=%s/tests/glibc.supp" % OVS_SRC, "--suppressions=%s/tests/openssl.supp" % OVS_SRC] + cmd else: cmd = ["sudo"] + cmd opts = opts + ["-vconsole:off", "--detach"] _sh(*(cmd + opts)) commands.append(run) def modinst(): if not os.path.exists("/lib/modules"): print "Missing modules directory. Is this a Linux system?" sys.exit(1) try: _sh("rmmod", "openvswitch") except subprocess.CalledProcessError, e: pass # Module isn't loaded try: _sh("rm /lib/modules/%s/extra/openvswitch.ko" % uname()) except subprocess.CalledProcessError, e: pass # Module isn't installed conf() make() make("modules_install") _sh("modprobe", "openvswitch") _sh("dmesg | grep openvswitch | tail -1") commands.append(modinst) def env(): print "export PATH=" + ENV["PATH"] commands.append(env) def doc(): parser.print_help() print \ """ This program is designed to help developers build and run Open vSwitch without necessarily needing to know the gory details. Given some basic requirements (described below), it can be used to build and run Open vSwitch, keeping runtime files in the user's home directory. Basic Configuration: # This section can be run as a script on ubuntu systems. # First install the basic requirements needed to build Open vSwitch. sudo apt-get install git build-essential libtool autoconf pkg-config \\ libssl-dev pkg-config gdb linux-headers-`uname -r` # Next clone the Open vSwitch source. git clone git://git.openvswitch.org/openvswitch %(ovs)s # Setup environment variables. `%(v)s env` # Build the switch. %(v)s conf make # Install the kernel module sudo insmod %(ovs)s/datapath/linux/openvswitch.ko # Run the switch. %(v)s run Commands: conf - Configure the ovs source. make - Build the source (must have been configured). check - Run the unit tests. tag - Run ctags and cscope over the source. kill - Kill all running instances of ovs. reset - Reset any runtime configuration in %(run)s. run - Run ovs. modinst - Build ovs and install the kernel module. env - Print the required path environment variable. doc - Print this message. """ % {"ovs": OVS_SRC, "v": sys.argv[0], "run": ROOT} sys.exit(0) commands.append(doc) def main(): global options global parser description = "Open vSwitch developer configuration. Try `%prog doc`." cmd_names = [c.__name__ for c in commands] parser = optparse.OptionParser(usage="usage: %prog" + " [options] [%s] ..." % "|".join(cmd_names), description=description) group = optparse.OptionGroup(parser, "conf") group.add_option("--disable-Werror", dest="werror", action="store_false", default=True, help="compile without the Werror flag") group.add_option("--cache-time", dest="cache_time", action="store_true", help="configure with cached timing") group.add_option("--mandir", dest="mandir", metavar="MANDIR", help="configure the man documentation install directory") parser.add_option_group(group) group = optparse.OptionGroup(parser, "run") group.add_option("-g", "--gdb", dest="gdb", action="store_true", help="run ovs-vswitchd under gdb") group.add_option("--valgrind", dest="valgrind", action="store_true", help="run ovs-vswitchd under valgrind") group.add_option("--clang", dest="clang", action="store_true", help="build ovs-vswitchd with clang") parser.add_option_group(group) options, args = parser.parse_args() for arg in args: if arg not in cmd_names: print "Unknown argument " + arg doc() try: os.chdir(OVS_SRC) except OSError: print "Missing %s." % OVS_SRC doc() for arg in args: for cmd in commands: if arg == cmd.__name__: cmd() if __name__ == '__main__': main() openvswitch-2.0.1+git20140120/utilities/ovs-dpctl-top.8.in000066400000000000000000000102421226605124000226750ustar00rootroot00000000000000.de IQ . br . ns . IP "\\$1" .. .TH ovs\-dpctl\-top "8" "@VERSION@" "Open vSwitch" "Open vSwitch Manual" . .SH NAME \fBovs\-dpctl\-top\fR \- Top like behavior for ovs\-dpctl dump\-flows . .SH SYNOPSIS \fBovs\-dpctl\-top\fR [\-h] [\-v] [\-f FLOWFILES] [\-V] [\-s] [\-\-host HOST] [\-a | \-\-accumulate] [\-\-accumulate\-decay ACCUMULATEDECAY] [\-d DELAY] . .SH DESCRIPTION .PP This program summarizes \fBovs\-dpctl\fR flow content by aggregating the number of packets, total bytes and occurrence of the following fields: .IP \- Datapath in_port .IP \- Ethernet type .IP \- Source and destination MAC addresses .IP \- IP protocol .IP \- Source and destination IPv4 addresses .IP \- Source and destination IPv6 addresses .IP \- UDP and TCP destination port .IP \- Tunnel source and destination addresses . .SS "Output shows four values:" .IP \- FIELDS: the flow fields for example in_port(1). .IP \- COUNT: the number of lines in the dump\-flow output contain the flow field. .IP \- PACKETS: the total number of packets containing the flow field. .IP \- BYTES: the total number of bytes containing the flow field. If units are not present then values are in bytes. .IP \- AVERAGE: the average packets size (BYTES/PACKET). .PP .SS "Top Behavior" .PP While in top mode, the default behavior, the following single character commands are supported: .IP a \- toggles top in accumulate and live mode. Accumulate mode is described below. .IP s \- toggles which column is used to sort content in decreasing order. A DESC title is placed over the column. .IP _ \- a space indicating to collect dump\-flow content again .IP h \- halt output. Any character will restart sampling .IP f \- cycle through flow fields .IP q \- q for quit. .PP .SS "Accumulate Mode" .PP There are two supported modes: live and accumulate. The default is live. The parameter \fB\-\-accumulate\fR or the 'a' character in top mode enables the latter. In live mode, recent dump\-flow content is presented. Where as accumulate mode keeps track of the prior historical information until the flow is reset not when the flow is purged. Reset flows are determined when the packet count for a flow has decreased from its previous sample. There is one caveat, eventually the system will run out of memory if, after the accumulate\-decay period any flows that have not been refreshed are purged. The goal here is to free memory of flows that are not active. Statistics are not decremented. Their purpose is to reflect the overall history of the flow fields. .PP .SS "Debugging Errors" .PP Parsing errors are counted and displayed in the status line at the beginning of the output. Use the \fB\-\-verbose\fR option with \fB\-\-script to see what output was not parsed, like this: .PP $ ovs\-dpctl dump\-flows | ovs\-dpctl\-top \fB\-\-script\fR \fB\-\-verbose\fR .PP Error messages will identify content that failed to parse. .PP .SS "Access Remote Hosts" .PP The \fB\-\-host\fR must follow the format user@hostname. This script simply calls \&'ssh user@Hostname' without checking for login credentials therefore public keys should be installed on the system identified by hostname, such as: .PP $ ssh\-copy\-id user@hostname .PP Consult ssh\-copy\-id man pages for more details. .PP .SS "Expected usage" .PP $ ovs\-dpctl\-top .PP or to run as a script: .PP $ ovs\-dpctl dump\-flows > dump\-flows.log .PP $ ovs\-dpctl\-top \fB\-\-script\fR \fB\-\-flow\-file\fR dump\-flows.log .SS "OPTIONS" .TP \fB\-h\fR, \fB\-\-help\fR show this help message and exit. .TP \fB\-v\fR, \fB\-\-version\fR show program's version number and exit. .TP \fB\-f\fR FLOWFILES, \fB\-\-flow\-file\fR FLOWFILES file containing flows from ovs\-dpctl dump\-flow. .TP \fB\-V\fR, \fB\-\-verbose\fR enable debug level verbosity. .TP \fB\-s\fR, \fB\-\-script\fR Run from a script (no user interface). .TP \fB\-\-host\fR HOST Specify a user@host for retrieving flows see Accessing Remote Hosts for more information. .TP \fB\-a\fR, \fB\-\-accumulate\fR Accumulate dump\-flow content. .TP \fB\-\-accumulate\-decay\fR ACCUMULATEDECAY Decay old accumulated flows. The default is 5 minutes. A value of 0 disables decay. .TP \fB\-d\fR DELAY, \fB\-\-delay\fR DELAY Delay in milliseconds to collect dump\-flow content (sample rate). openvswitch-2.0.1+git20140120/utilities/ovs-dpctl-top.in000077500000000000000000001650631226605124000225460ustar00rootroot00000000000000#! @PYTHON@ # # Copyright (c) 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # The approximate_size code was copied from # http://getpython3.com/diveintopython3/your-first-python-program.html#divingin # which is licensed under # "Dive Into Python 3," Copyright 2011 Mark Pilgrim, # used under a Creative Commons Attribution-Share-Alike license: # http://creativecommons.org/licenses/by-sa/3.0/ # # """Top like behavior for ovs-dpctl dump-flows output. This program summarizes ovs-dpctl flow content by aggregating the number of packets, total bytes and occurrence of the following fields: - Datapath in_port - Ethernet type - Source and destination MAC addresses - IP protocol - Source and destination IPv4 addresses - Source and destination IPv6 addresses - UDP and TCP destination port - Tunnel source and destination addresses Output shows four values: - FIELDS: the flow fields for example in_port(1). - PACKETS: the total number of packets containing the flow field. - BYTES: the total number of bytes containing the flow field. If units are not present then values are in bytes. - AVERAGE: the average packets size (BYTES/PACKET). - COUNT: the number of lines in the dump-flow output contain the flow field. Top Behavior While in top mode, the default behavior, the following single character commands are supported: a - toggles top in accumulate and live mode. Accumulate mode is described below. s - toggles which column is used to sort content in decreasing order. A DESC title is placed over the column. _ - a space indicating to collect dump-flow content again h - halt output. Any character will restart sampling f - cycle through flow fields q - q for quit. Accumulate Mode There are two supported modes: live and accumulate. The default is live. The parameter --accumulate or the 'a' character in top mode enables the latter. In live mode, recent dump-flow content is presented. Where as accumulate mode keeps track of the prior historical information until the flow is reset not when the flow is purged. Reset flows are determined when the packet count for a flow has decreased from its previous sample. There is one caveat, eventually the system will run out of memory if, after the accumulate-decay period any flows that have not been refreshed are purged. The goal here is to free memory of flows that are not active. Statistics are not decremented. Their purpose is to reflect the overall history of the flow fields. Debugging Errors Parsing errors are counted and displayed in the status line at the beginning of the output. Use the --verbose option with --script to see what output was not parsed, like this: $ ovs-dpctl dump-flows | ovs-dpctl-top --script --verbose Error messages will identify content that failed to parse. Access Remote Hosts The --host must follow the format user@hostname. This script simply calls 'ssh user@Hostname' without checking for login credentials therefore public keys should be installed on the system identified by hostname, such as: $ ssh-copy-id user@hostname Consult ssh-copy-id man pages for more details. Expected usage $ ovs-dpctl-top or to run as a script: $ ovs-dpctl dump-flows > dump-flows.log $ ovs-dpctl-top --script --flow-file dump-flows.log """ # pylint: disable-msg=C0103 # pylint: disable-msg=C0302 # pylint: disable-msg=R0902 # pylint: disable-msg=R0903 # pylint: disable-msg=R0904 # pylint: disable-msg=R0912 # pylint: disable-msg=R0913 # pylint: disable-msg=R0914 import sys import os try: ## # Arg parse is not installed on older Python distributions. # ovs ships with a version in the directory mentioned below. import argparse except ImportError: sys.path.append(os.path.join("@pkgdatadir@", "python")) import argparse import logging import re import unittest import copy import curses import operator import subprocess import fcntl import struct import termios import datetime import threading import time import socket ## # The following two definitions provide the necessary netaddr functionality. # Python netaddr module is not part of the core installation. Packaging # netaddr was involved and seems inappropriate given that only two # methods where used. def ipv4_to_network(ip_str): """ Calculate the network given a ipv4/mask value. If a mask is not present simply return ip_str. """ pack_length = '!HH' try: (ip, mask) = ip_str.split("/") except ValueError: # just an ip address no mask. return ip_str ip_p = socket.inet_pton(socket.AF_INET, ip) ip_t = struct.unpack(pack_length, ip_p) mask_t = struct.unpack(pack_length, socket.inet_pton(socket.AF_INET, mask)) network_n = [ii & jj for (ii, jj) in zip(ip_t, mask_t)] return socket.inet_ntop(socket.AF_INET, struct.pack('!HH', network_n[0], network_n[1])) def ipv6_to_network(ip_str): """ Calculate the network given a ipv6/mask value. If a mask is not present simply return ip_str. """ pack_length = '!HHHHHHHH' try: (ip, mask) = ip_str.split("/") except ValueError: # just an ip address no mask. return ip_str ip_p = socket.inet_pton(socket.AF_INET6, ip) ip_t = struct.unpack(pack_length, ip_p) mask_t = struct.unpack(pack_length, socket.inet_pton(socket.AF_INET6, mask)) network_n = [ii & jj for (ii, jj) in zip(ip_t, mask_t)] return socket.inet_ntop(socket.AF_INET6, struct.pack(pack_length, network_n[0], network_n[1], network_n[2], network_n[3], network_n[4], network_n[5], network_n[6], network_n[7])) ## # columns displayed ## class Columns: """ Holds column specific content. Titles needs to be less than 8 characters. """ VALUE_WIDTH = 9 FIELDS = "fields" PACKETS = "packets" COUNT = "count" BYTES = "bytes" AVERAGE = "average" def __init__(self): pass @staticmethod def assoc_list(obj): """ Return a associated list. """ return [(Columns.FIELDS, repr(obj)), (Columns.PACKETS, obj.packets), (Columns.BYTES, obj.bytes), (Columns.COUNT, obj.count), (Columns.AVERAGE, obj.average), ] def element_eth_get(field_type, element, stats_dict): """ Extract eth frame src and dst from a dump-flow element.""" fmt = "%s(src=%s,dst=%s)" element = fmt % (field_type, element["src"], element["dst"]) return SumData(field_type, element, stats_dict["packets"], stats_dict["bytes"], element) def element_ipv4_get(field_type, element, stats_dict): """ Extract src and dst from a dump-flow element.""" fmt = "%s(src=%s,dst=%s)" element_show = fmt % (field_type, element["src"], element["dst"]) element_key = fmt % (field_type, ipv4_to_network(element["src"]), ipv4_to_network(element["dst"])) return SumData(field_type, element_show, stats_dict["packets"], stats_dict["bytes"], element_key) def element_tunnel_get(field_type, element, stats_dict): """ Extract src and dst from a tunnel.""" return element_ipv4_get(field_type, element, stats_dict) def element_ipv6_get(field_type, element, stats_dict): """ Extract src and dst from a dump-flow element.""" fmt = "%s(src=%s,dst=%s)" element_show = fmt % (field_type, element["src"], element["dst"]) element_key = fmt % (field_type, ipv6_to_network(element["src"]), ipv6_to_network(element["dst"])) return SumData(field_type, element_show, stats_dict["packets"], stats_dict["bytes"], element_key) def element_dst_port_get(field_type, element, stats_dict): """ Extract src and dst from a dump-flow element.""" element_key = "%s(dst=%s)" % (field_type, element["dst"]) return SumData(field_type, element_key, stats_dict["packets"], stats_dict["bytes"], element_key) def element_passthrough_get(field_type, element, stats_dict): """ Extract src and dst from a dump-flow element.""" element_key = "%s(%s)" % (field_type, element) return SumData(field_type, element_key, stats_dict["packets"], stats_dict["bytes"], element_key) # pylint: disable-msg=R0903 class OutputFormat: """ Holds field_type and function to extract element value. """ def __init__(self, field_type, generator): self.field_type = field_type self.generator = generator OUTPUT_FORMAT = [ OutputFormat("eth", element_eth_get), OutputFormat("ipv4", element_ipv4_get), OutputFormat("ipv6", element_ipv6_get), OutputFormat("tunnel", element_tunnel_get), OutputFormat("udp", element_dst_port_get), OutputFormat("tcp", element_dst_port_get), OutputFormat("eth_type", element_passthrough_get), OutputFormat("in_port", element_passthrough_get) ] ELEMENT_KEY = { "udp": "udp.dst", "tcp": "tcp.dst" } def top_input_get(args): """ Return subprocess stdout.""" cmd = [] if (args.host): cmd += ["ssh", args.host] cmd += ["ovs-dpctl", "dump-flows"] return subprocess.Popen(cmd, stderr=subprocess.STDOUT, stdout=subprocess.PIPE).stdout def args_get(): """ read program parameters handle any necessary validation of input. """ parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__) ## # None is a special value indicating to read flows from stdin. # This handles the case # ovs-dpctl dump-flows | ovs-dpctl-flows.py parser.add_argument("-v", "--version", version="@VERSION@", action="version", help="show version") parser.add_argument("-f", "--flow-file", dest="flowFiles", default=None, action="append", help="file containing flows from ovs-dpctl dump-flow") parser.add_argument("-V", "--verbose", dest="verbose", default=logging.CRITICAL, action="store_const", const=logging.DEBUG, help="enable debug level verbosity") parser.add_argument("-s", "--script", dest="top", action="store_false", help="Run from a script (no user interface)") parser.add_argument("--host", dest="host", help="Specify a user@host for retrieving flows see" "Accessing Remote Hosts for more information") parser.add_argument("-a", "--accumulate", dest="accumulate", action="store_true", default=False, help="Accumulate dump-flow content") parser.add_argument("--accumulate-decay", dest="accumulateDecay", default=5.0 * 60, type=float, help="Decay old accumulated flows. " "The default is 5 minutes. " "A value of 0 disables decay.") parser.add_argument("-d", "--delay", dest="delay", type=int, default=1000, help="Delay in milliseconds to collect dump-flow " "content (sample rate).") args = parser.parse_args() logging.basicConfig(level=args.verbose) return args ### # Code to parse a single line in dump-flow ### # key(values) FIELDS_CMPND = re.compile("([\w]+)\((.+)\)") # key:value FIELDS_CMPND_ELEMENT = re.compile("([\w:]+)=([/\.\w:]+)") FIELDS_ELEMENT = re.compile("([\w]+):([-\.\w]+)") def flow_line_iter(line): """ iterate over flow dump elements. return tuples of (true, element) or (false, remaining element) """ # splits by , except for when in a (). Actions element was not # split properly but we don't need it. rc = [] element = "" paren_count = 0 for ch in line: if (ch == '('): paren_count += 1 elif (ch == ')'): paren_count -= 1 if (ch == ' '): # ignore white space. continue elif ((ch == ',') and (paren_count == 0)): rc.append(element) element = "" else: element += ch if (paren_count): raise ValueError(line) else: if (len(element) > 0): rc.append(element) return rc def flow_line_compound_parse(compound): """ Parse compound element for example src=00:50:56:b4:4e:f8,dst=33:33:00:01:00:03 which is in eth(src=00:50:56:b4:4e:f8,dst=33:33:00:01:00:03) """ result = {} for element in flow_line_iter(compound): match = FIELDS_CMPND_ELEMENT.search(element) if (match): key = match.group(1) value = match.group(2) result[key] = value match = FIELDS_CMPND.search(element) if (match): key = match.group(1) value = match.group(2) result[key] = flow_line_compound_parse(value) continue if (len(result.keys()) == 0): return compound return result def flow_line_split(line): """ Convert a flow dump line into ([fields], [stats], actions) tuple. Where fields and stats are lists. This function relies on a the following ovs-dpctl dump-flow output characteristics: 1. The dumpe flow line consists of a list of frame fields, list of stats and action. 2. list of frame fields, each stat and action field are delimited by ', '. 3. That all other non stat field are not delimited by ', '. """ results = re.split(', ', line) (field, stats, action) = (results[0], results[1:-1], results[-1]) fields = flow_line_iter(field) return (fields, stats, action) def elements_to_dict(elements): """ Convert line to a hierarchy of dictionaries. """ result = {} for element in elements: match = FIELDS_CMPND.search(element) if (match): key = match.group(1) value = match.group(2) result[key] = flow_line_compound_parse(value) continue match = FIELDS_ELEMENT.search(element) if (match): key = match.group(1) value = match.group(2) result[key] = value else: raise ValueError("can't parse >%s<" % element) return result # pylint: disable-msg=R0903 class SumData(object): """ Interface that all data going into SumDb must implement. Holds the flow field and its corresponding count, total packets, total bytes and calculates average. __repr__ is used as key into SumData singleton. __str__ is used as human readable output. """ def __init__(self, field_type, field, packets, flow_bytes, key): # Count is the number of lines in the dump-flow log. self.field_type = field_type self.field = field self.count = 1 self.packets = int(packets) self.bytes = int(flow_bytes) self.key = key def decrement(self, decr_packets, decr_bytes, decr_count): """ Decrement content to calculate delta from previous flow sample.""" self.packets -= decr_packets self.bytes -= decr_bytes self.count -= decr_count def __iadd__(self, other): """ Add two objects. """ if (self.key != other.key): raise ValueError("adding two unrelated types") self.count += other.count self.packets += other.packets self.bytes += other.bytes return self def __isub__(self, other): """ Decrement two objects. """ if (self.key != other.key): raise ValueError("adding two unrelated types") self.count -= other.count self.packets -= other.packets self.bytes -= other.bytes return self def __getattr__(self, name): """ Handle average. """ if (name == "average"): if (self.packets == 0): return float(0.0) else: return float(self.bytes) / float(self.packets) raise AttributeError(name) def __str__(self): """ Used for debugging. """ return "%s %s %s %s" % (self.field, self.count, self.packets, self.bytes) def __repr__(self): """ Used as key in the FlowDB table. """ return self.key def flow_aggregate(fields_dict, stats_dict): """ Search for content in a line. Passed the flow port of the dump-flows plus the current stats consisting of packets, bytes, etc """ result = [] for output_format in OUTPUT_FORMAT: field = fields_dict.get(output_format.field_type, None) if (field): obj = output_format.generator(output_format.field_type, field, stats_dict) result.append(obj) return result def flows_read(ihdl, flow_db): """ read flow content from ihdl and insert into flow_db. """ done = False while (not done): line = ihdl.readline() if (len(line) == 0): # end of input break try: flow_db.flow_line_add(line) except ValueError, arg: logging.error(arg) return flow_db def get_terminal_size(): """ return column width and height of the terminal """ for fd_io in [0, 1, 2]: try: result = struct.unpack('hh', fcntl.ioctl(fd_io, termios.TIOCGWINSZ, '1234')) except IOError: result = None continue if (result is None or result == (0, 0)): # Maybe we can't get the width. In that case assume (25, 80) result = (25, 80) return result ## # Content derived from: # http://getpython3.com/diveintopython3/your-first-python-program.html#divingin ## SUFFIXES = {1000: ['KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'], 1024: ['KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']} def approximate_size(size, a_kilobyte_is_1024_bytes=True): """Convert a file size to human-readable form. Keyword arguments: size -- file size in bytes a_kilobyte_is_1024_bytes -- if True (default), use multiples of 1024 if False, use multiples of 1000 Returns: string """ size = float(size) if size < 0: raise ValueError('number must be non-negative') if (a_kilobyte_is_1024_bytes): multiple = 1024 else: multiple = 1000 for suffix in SUFFIXES[multiple]: size /= multiple if size < multiple: return "%.1f %s" % (size, suffix) raise ValueError('number too large') ## # End copied content ## class ColMeta: """ Concepts about columns. """ def __init__(self, sortable, width): self.sortable = sortable self.width = width class RowMeta: """ How to render rows. """ def __init__(self, label, fmt): self.label = label self.fmt = fmt def fmt_packet(obj, width): """ Provide a string for packets that is appropriate for output.""" return str(obj.packets).rjust(width) def fmt_count(obj, width): """ Provide a string for average that is appropriate for output.""" return str(obj.count).rjust(width) def fmt_avg(obj, width): """ Provide a string for average that is appropriate for output.""" return str(int(obj.average)).rjust(width) def fmt_field(obj, width): """ truncate really long flow and insert ellipses to help make it clear. """ ellipses = " ... " value = obj.field if (len(obj.field) > width): value = value[:(width - len(ellipses))] + ellipses return value.ljust(width) def fmt_bytes(obj, width): """ Provide a string for average that is appropriate for output.""" if (len(str(obj.bytes)) <= width): value = str(obj.bytes) else: value = approximate_size(obj.bytes) return value.rjust(width) def title_center(value, width): """ Center a column title.""" return value.upper().center(width) def title_rjust(value, width): """ Right justify a column title. """ return value.upper().rjust(width) def column_picker(order, obj): """ return the column as specified by order. """ if (order == 1): return obj.count elif (order == 2): return obj.packets elif (order == 3): return obj.bytes elif (order == 4): return obj.average else: raise ValueError("order outside of range %s" % order) class Render: """ Renders flow data. """ def __init__(self, console_width): """ Calculate column widths taking into account changes in format.""" self._start_time = datetime.datetime.now() self._cols = [ColMeta(False, 0), ColMeta(True, Columns.VALUE_WIDTH), ColMeta(True, Columns.VALUE_WIDTH), ColMeta(True, Columns.VALUE_WIDTH), ColMeta(True, Columns.VALUE_WIDTH)] self._console_width = console_width self.console_width_set(console_width) # Order in this array dictate the order of the columns. # The 0 width for the first entry is a place holder. This is # dynamically calculated. The first column is special. We need a # way to indicate which field are presented. self._descs = [RowMeta("", title_rjust), RowMeta("", title_rjust), RowMeta("", title_rjust), RowMeta("", title_rjust), RowMeta("", title_rjust)] self._column_sort_select = 0 self.column_select_event() self._titles = [ RowMeta(Columns.FIELDS, title_center), RowMeta(Columns.COUNT, title_rjust), RowMeta(Columns.PACKETS, title_rjust), RowMeta(Columns.BYTES, title_rjust), RowMeta(Columns.AVERAGE, title_rjust) ] self._datas = [ RowMeta(None, fmt_field), RowMeta(None, fmt_count), RowMeta(None, fmt_packet), RowMeta(None, fmt_bytes), RowMeta(None, fmt_avg) ] ## # _field_types hold which fields are displayed in the field # column, with the keyword all implying all fields. ## self._field_types = ["all"] + [ii.field_type for ii in OUTPUT_FORMAT] ## # The default is to show all field types. ## self._field_type_select = -1 self.field_type_toggle() def _field_type_select_get(self): """ Return which field type to display. """ return self._field_types[self._field_type_select] def field_type_toggle(self): """ toggle which field types to show. """ self._field_type_select += 1 if (self._field_type_select >= len(self._field_types)): self._field_type_select = 0 value = Columns.FIELDS + " (%s)" % self._field_type_select_get() self._titles[0].label = value def column_select_event(self): """ Handles column select toggle. """ self._descs[self._column_sort_select].label = "" for _ in range(len(self._cols)): self._column_sort_select += 1 if (self._column_sort_select >= len(self._cols)): self._column_sort_select = 0 # Now look for the next sortable column if (self._cols[self._column_sort_select].sortable): break self._descs[self._column_sort_select].label = "DESC" def console_width_set(self, console_width): """ Adjust the output given the new console_width. """ self._console_width = console_width spaces = len(self._cols) - 1 ## # Calculating column width can be tedious but important. The # flow field value can be long. The goal here is to dedicate # fixed column space for packets, bytes, average and counts. Give the # remaining space to the flow column. When numbers get large # transition output to output generated by approximate_size which # limits output to ###.# XiB in other words 9 characters. ## # At this point, we know the maximum length values. We may # truncate the flow column to get everything to fit. self._cols[0].width = 0 values_max_length = sum([ii.width for ii in self._cols]) + spaces flow_max_length = console_width - values_max_length self._cols[0].width = flow_max_length def format(self, flow_db): """ shows flows based on --script parameter.""" rc = [] ## # Top output consists of # Title # Column title (2 rows) # data # statistics and status ## # Title ## rc.append("Flow Summary".center(self._console_width)) stats = " Total: %(flow_total)s errors: %(flow_errors)s " % \ flow_db.flow_stats_get() accumulate = flow_db.accumulate_get() if (accumulate): stats += "Accumulate: on " else: stats += "Accumulate: off " duration = datetime.datetime.now() - self._start_time stats += "Duration: %s " % str(duration) rc.append(stats.ljust(self._console_width)) ## # 2 rows for columns. ## # Indicate which column is in descending order. rc.append(" ".join([ii.fmt(ii.label, col.width) for (ii, col) in zip(self._descs, self._cols)])) rc.append(" ".join([ii.fmt(ii.label, col.width) for (ii, col) in zip(self._titles, self._cols)])) ## # Data. ## for dd in flow_db.field_values_in_order(self._field_type_select_get(), self._column_sort_select): rc.append(" ".join([ii.fmt(dd, col.width) for (ii, col) in zip(self._datas, self._cols)])) return rc def curses_screen_begin(): """ begin curses screen control. """ stdscr = curses.initscr() curses.cbreak() curses.noecho() stdscr.keypad(1) return stdscr def curses_screen_end(stdscr): """ end curses screen control. """ curses.nocbreak() stdscr.keypad(0) curses.echo() curses.endwin() class FlowDB: """ Implements live vs accumulate mode. Flows are stored as key value pairs. The key consists of the content prior to stat fields. The value portion consists of stats in a dictionary form. @ \todo future add filtering here. """ def __init__(self, accumulate): self._accumulate = accumulate self._error_count = 0 # Values are (stats, last update time.) # The last update time is used for aging. self._flow_lock = threading.Lock() # This dictionary holds individual flows. self._flows = {} # This dictionary holds aggregate of flow fields. self._fields = {} def accumulate_get(self): """ Return the current accumulate state. """ return self._accumulate def accumulate_toggle(self): """ toggle accumulate flow behavior. """ self._accumulate = not self._accumulate def begin(self): """ Indicate the beginning of processing flow content. if accumulate is false clear current set of flows. """ if (not self._accumulate): self._flow_lock.acquire() try: self._flows.clear() finally: self._flow_lock.release() self._fields.clear() def flow_line_add(self, line): """ Split a line from a ovs-dpctl dump-flow into key and stats. The order of the content in the flow should be: - flow content - stats for the flow - actions This method also assumes that the dump flow output does not change order of fields of the same flow. """ line = line.rstrip("\n") (fields, stats, _) = flow_line_split(line) try: fields_dict = elements_to_dict(fields) if (len(fields_dict) == 0): raise ValueError("flow fields are missing %s", line) stats_dict = elements_to_dict(stats) if (len(stats_dict) == 0): raise ValueError("statistics are missing %s.", line) ## # In accumulate mode, the Flow database can reach 10,000's of # persistent flows. The interaction of the script with this many # flows is too slow. Instead, delta are sent to the flow_db # database allow incremental changes to be done in O(m) time # where m is the current flow list, instead of iterating over # all flows in O(n) time where n is the entire history of flows. key = ",".join(fields) self._flow_lock.acquire() try: (stats_old_dict, _) = self._flows.get(key, (None, None)) finally: self._flow_lock.release() self.flow_event(fields_dict, stats_old_dict, stats_dict) except ValueError, arg: logging.error(arg) self._error_count += 1 raise self._flow_lock.acquire() try: self._flows[key] = (stats_dict, datetime.datetime.now()) finally: self._flow_lock.release() def decay(self, decayTimeInSeconds): """ Decay content. """ now = datetime.datetime.now() for (key, value) in self._flows.items(): (stats_dict, updateTime) = value delta = now - updateTime if (delta.seconds > decayTimeInSeconds): self._flow_lock.acquire() try: del self._flows[key] fields_dict = elements_to_dict(flow_line_iter(key)) matches = flow_aggregate(fields_dict, stats_dict) for match in matches: self.field_dec(match) finally: self._flow_lock.release() def flow_stats_get(self): """ Return statistics in a form of a dictionary. """ rc = None self._flow_lock.acquire() try: rc = {"flow_total": len(self._flows), "flow_errors": self._error_count} finally: self._flow_lock.release() return rc def field_types_get(self): """ Return the set of types stored in the singleton. """ types = set((ii.field_type for ii in self._fields.values())) return types def field_add(self, data): """ Collect dump-flow data to sum number of times item appears. """ current = self._fields.get(repr(data), None) if (current is None): current = copy.copy(data) else: current += data self._fields[repr(current)] = current def field_dec(self, data): """ Collect dump-flow data to sum number of times item appears. """ current = self._fields.get(repr(data), None) if (current is None): raise ValueError("decrementing field missing %s" % repr(data)) current -= data self._fields[repr(current)] = current if (current.count == 0): del self._fields[repr(current)] def field_values_in_order(self, field_type_select, column_order): """ Return a list of items in order maximum first. """ values = self._fields.values() if (field_type_select != "all"): # If a field type other than "all" then reduce the list. values = [ii for ii in values if (ii.field_type == field_type_select)] values = [(column_picker(column_order, ii), ii) for ii in values] values.sort(key=operator.itemgetter(0)) values.reverse() values = [ii[1] for ii in values] return values def flow_event(self, fields_dict, stats_old_dict, stats_new_dict): """ Receives new flow information. """ # In order to avoid processing every flow at every sample # period, changes in flow packet count is used to determine the # delta in the flow statistics. This delta is used in the call # to self.decrement prior to self.field_add if (stats_old_dict is None): # This is a new flow matches = flow_aggregate(fields_dict, stats_new_dict) for match in matches: self.field_add(match) else: old_packets = int(stats_old_dict.get("packets", 0)) new_packets = int(stats_new_dict.get("packets", 0)) if (old_packets == new_packets): # ignore. same data. pass else: old_bytes = stats_old_dict.get("bytes", 0) # old_packets != new_packets # if old_packets > new_packets then we end up decrementing # packets and bytes. matches = flow_aggregate(fields_dict, stats_new_dict) for match in matches: match.decrement(int(old_packets), int(old_bytes), 1) self.field_add(match) class DecayThread(threading.Thread): """ Periodically call flow database to see if any flows are old. """ def __init__(self, flow_db, interval): """ Start decay thread. """ threading.Thread.__init__(self) self._interval = max(1, interval) self._min_interval = min(1, interval / 10) self._flow_db = flow_db self._event = threading.Event() self._running = True self.daemon = True def run(self): """ Worker thread which handles decaying accumulated flows. """ while(self._running): self._event.wait(self._min_interval) if (self._running): self._flow_db.decay(self._interval) def stop(self): """ Stop thread. """ self._running = False self._event.set() ## # Give the calling thread time to terminate but not too long. # this thread is a daemon so the application will terminate if # we timeout during the join. This is just a cleaner way to # release resources. self.join(2.0) def flow_top_command(stdscr, render, flow_db): """ Handle input while in top mode. """ ch = stdscr.getch() ## # Any character will restart sampling. if (ch == ord('h')): # halt output. ch = stdscr.getch() while (ch == -1): ch = stdscr.getch() if (ch == ord('s')): # toggle which column sorts data in descending order. render.column_select_event() elif (ch == ord('a')): flow_db.accumulate_toggle() elif (ch == ord('f')): render.field_type_toggle() elif (ch == ord(' ')): # resample pass return ch def decay_timer_start(flow_db, accumulateDecay): """ If accumulateDecay greater than zero then start timer. """ if (accumulateDecay > 0): decay_timer = DecayThread(flow_db, accumulateDecay) decay_timer.start() return decay_timer else: return None def flows_top(args): """ handles top like behavior when --script is not specified. """ flow_db = FlowDB(args.accumulate) render = Render(0) decay_timer = decay_timer_start(flow_db, args.accumulateDecay) lines = [] try: stdscr = curses_screen_begin() try: ch = 'X' #stdscr.nodelay(1) stdscr.timeout(args.delay) while (ch != ord('q')): flow_db.begin() try: ihdl = top_input_get(args) try: flows_read(ihdl, flow_db) finally: ihdl.close() except OSError, arg: logging.critical(arg) break (console_height, console_width) = stdscr.getmaxyx() render.console_width_set(console_width) output_height = console_height - 1 line_count = range(output_height) line_output = render.format(flow_db) lines = zip(line_count, line_output[:output_height]) stdscr.erase() for (count, line) in lines: stdscr.addstr(count, 0, line[:console_width]) stdscr.refresh() ch = flow_top_command(stdscr, render, flow_db) finally: curses_screen_end(stdscr) except KeyboardInterrupt: pass if (decay_timer): decay_timer.stop() # repeat output for (count, line) in lines: print line def flows_script(args): """ handles --script option. """ flow_db = FlowDB(args.accumulate) flow_db.begin() if (args.flowFiles is None): logging.info("reading flows from stdin") ihdl = os.fdopen(sys.stdin.fileno(), 'r', 0) try: flow_db = flows_read(ihdl, flow_db) finally: ihdl.close() else: for flowFile in args.flowFiles: logging.info("reading flows from %s", flowFile) ihdl = open(flowFile, "r") try: flow_db = flows_read(ihdl, flow_db) finally: ihdl.close() (_, console_width) = get_terminal_size() render = Render(console_width) for line in render.format(flow_db): print line def main(): """ Return 0 on success or 1 on failure. Algorithm There are four stages to the process ovs-dpctl dump-flow content. 1. Retrieve current input 2. store in FlowDB and maintain history 3. Iterate over FlowDB and aggregating stats for each flow field 4. present data. Retrieving current input is currently trivial, the ovs-dpctl dump-flow is called. Future version will have more elaborate means for collecting dump-flow content. FlowDB returns all data as in the form of a hierarchical dictionary. Input will vary. In the case of accumulate mode, flows are not purged from the FlowDB manager. Instead at the very least, merely the latest statistics are kept. In the case, of live output the FlowDB is purged prior to sampling data. Aggregating results requires identify flow fields to aggregate out of the flow and summing stats. """ args = args_get() try: if (args.top): flows_top(args) else: flows_script(args) except KeyboardInterrupt: return 1 return 0 if __name__ == '__main__': sys.exit(main()) elif __name__ == 'ovs-dpctl-top': # pylint: disable-msg=R0915 ## # Test case beyond this point. # pylint: disable-msg=R0904 class TestsuiteFlowParse(unittest.TestCase): """ parse flow into hierarchy of dictionaries. """ def test_flow_parse(self): """ test_flow_parse. """ line = "in_port(4),eth(src=00:50:56:b4:4e:f8,"\ "dst=33:33:00:01:00:03),eth_type(0x86dd),"\ "ipv6(src=fe80::55bf:fe42:bc96:2812,dst=ff02::1:3,"\ "label=0,proto=17,tclass=0,hlimit=1,frag=no),"\ "udp(src=61252,dst=5355), packets:1, bytes:92, "\ "used:0.703s, actions:3,8,11,14,17,20,23,26,29,32,35,"\ "38,41,44,47,50,53,56,59,62,65" (fields, stats, _) = flow_line_split(line) flow_dict = elements_to_dict(fields + stats) self.assertEqual(flow_dict["eth"]["src"], "00:50:56:b4:4e:f8") self.assertEqual(flow_dict["eth"]["dst"], "33:33:00:01:00:03") self.assertEqual(flow_dict["ipv6"]["src"], "fe80::55bf:fe42:bc96:2812") self.assertEqual(flow_dict["ipv6"]["dst"], "ff02::1:3") self.assertEqual(flow_dict["packets"], "1") self.assertEqual(flow_dict["bytes"], "92") line = "in_port(4),eth(src=00:50:56:b4:4e:f8,"\ "dst=33:33:00:01:00:03),eth_type(0x86dd),"\ "ipv6(src=fe80::55bf:fe42:bc96:2812,dst=ff02::1:3,"\ "label=0,proto=17,tclass=0,hlimit=1,frag=no),"\ "udp(src=61252,dst=5355), packets:1, bytes:92, "\ "used:-0.703s, actions:3,8,11,14,17,20,23,26,29,32,35,"\ "38,41,44,47,50,53,56,59,62,65" (fields, stats, _) = flow_line_split(line) flow_dict = elements_to_dict(fields + stats) self.assertEqual(flow_dict["used"], "-0.703s") self.assertEqual(flow_dict["packets"], "1") self.assertEqual(flow_dict["bytes"], "92") def test_flow_sum(self): """ test_flow_sum. """ line = "in_port(4),eth(src=00:50:56:b4:4e:f8,"\ "dst=33:33:00:01:00:03),eth_type(0x86dd),"\ "ipv6(src=fe80::55bf:fe42:bc96:2812,dst=ff02::1:3,"\ "label=0,proto=17,tclass=0,hlimit=1,frag=no),"\ "udp(src=61252,dst=5355), packets:2, bytes:92, "\ "used:0.703s, actions:3,8,11,14,17,20,23,26,29,32,35,"\ "38,41,44,47,50,53,56,59,62,65" (fields, stats, _) = flow_line_split(line) stats_dict = elements_to_dict(stats) fields_dict = elements_to_dict(fields) ## # Test simple case of one line. flow_db = FlowDB(False) matches = flow_aggregate(fields_dict, stats_dict) for match in matches: flow_db.field_add(match) flow_types = flow_db.field_types_get() expected_flow_types = ["eth", "eth_type", "udp", "in_port", "ipv6"] self.assert_(len(flow_types) == len(expected_flow_types)) for flow_type in flow_types: self.assertTrue(flow_type in expected_flow_types) for flow_type in flow_types: sum_value = flow_db.field_values_in_order("all", 1) self.assert_(len(sum_value) == 5) self.assert_(sum_value[0].packets == 2) self.assert_(sum_value[0].count == 1) self.assert_(sum_value[0].bytes == 92) ## # Add line again just to see counts go up. matches = flow_aggregate(fields_dict, stats_dict) for match in matches: flow_db.field_add(match) flow_types = flow_db.field_types_get() self.assert_(len(flow_types) == len(expected_flow_types)) for flow_type in flow_types: self.assertTrue(flow_type in expected_flow_types) for flow_type in flow_types: sum_value = flow_db.field_values_in_order("all", 1) self.assert_(len(sum_value) == 5) self.assert_(sum_value[0].packets == 4) self.assert_(sum_value[0].count == 2) self.assert_(sum_value[0].bytes == 2 * 92) def test_assoc_list(self): """ test_assoc_list. """ line = "in_port(4),eth(src=00:50:56:b4:4e:f8,"\ "dst=33:33:00:01:00:03),eth_type(0x86dd),"\ "ipv6(src=fe80::55bf:fe42:bc96:2812,dst=ff02::1:3,"\ "label=0,proto=17,tclass=0,hlimit=1,frag=no),"\ "udp(src=61252,dst=5355), packets:2, bytes:92, "\ "used:0.703s, actions:3,8,11,14,17,20,23,26,29,32,35,"\ "38,41,44,47,50,53,56,59,62,65" valid_flows = [ 'eth_type(0x86dd)', 'udp(dst=5355)', 'in_port(4)', 'ipv6(src=fe80::55bf:fe42:bc96:2812,dst=ff02::1:3)', 'eth(src=00:50:56:b4:4e:f8,dst=33:33:00:01:00:03)' ] (fields, stats, _) = flow_line_split(line) stats_dict = elements_to_dict(stats) fields_dict = elements_to_dict(fields) ## # Test simple case of one line. flow_db = FlowDB(False) matches = flow_aggregate(fields_dict, stats_dict) for match in matches: flow_db.field_add(match) for sum_value in flow_db.field_values_in_order("all", 1): assoc_list = Columns.assoc_list(sum_value) for item in assoc_list: if (item[0] == "fields"): self.assertTrue(item[1] in valid_flows) elif (item[0] == "packets"): self.assertTrue(item[1] == 2) elif (item[0] == "count"): self.assertTrue(item[1] == 1) elif (item[0] == "average"): self.assertTrue(item[1] == 46.0) elif (item[0] == "bytes"): self.assertTrue(item[1] == 92) else: raise ValueError("unknown %s", item[0]) def test_human_format(self): """ test_assoc_list. """ self.assertEqual(approximate_size(0.0), "0.0 KiB") self.assertEqual(approximate_size(1024), "1.0 KiB") self.assertEqual(approximate_size(1024 * 1024), "1.0 MiB") self.assertEqual(approximate_size((1024 * 1024) + 100000), "1.1 MiB") value = (1024 * 1024 * 1024) + 100000000 self.assertEqual(approximate_size(value), "1.1 GiB") def test_flow_line_split(self): """ Splitting a flow line is not trivial. There is no clear delimiter. Comma is used liberally.""" expected_fields = ["in_port(4)", "eth(src=00:50:56:b4:4e:f8,dst=33:33:00:01:00:03)", "eth_type(0x86dd)", "ipv6(src=fe80::55bf:fe42:bc96:2812,dst=ff02::1:3," "label=0,proto=17,tclass=0,hlimit=1,frag=no)", "udp(src=61252,dst=5355)"] expected_stats = ["packets:2", "bytes:92", "used:0.703s"] expected_actions = "actions:3,8,11,14,17,20,23,26,29,32,35," \ "38,41,44,47,50,53,56,59,62,65" line = "in_port(4),eth(src=00:50:56:b4:4e:f8,"\ "dst=33:33:00:01:00:03),eth_type(0x86dd),"\ "ipv6(src=fe80::55bf:fe42:bc96:2812,dst=ff02::1:3,"\ "label=0,proto=17,tclass=0,hlimit=1,frag=no),"\ "udp(src=61252,dst=5355), packets:2, bytes:92, "\ "used:0.703s, actions:3,8,11,14,17,20,23,26,29,32,35,"\ "38,41,44,47,50,53,56,59,62,65" (fields, stats, actions) = flow_line_split(line) self.assertEqual(fields, expected_fields) self.assertEqual(stats, expected_stats) self.assertEqual(actions, expected_actions) def test_accumulate_decay(self): """ test_accumulate_decay: test accumulated decay. """ lines = ["in_port(1),eth(src=00:50:56:4f:dc:3b," "dst=ff:ff:ff:ff:ff:ff)," "eth_type(0x0806),arp(sip=10.24.105.107/255.255.255.255," "tip=10.24.104.230/255.255.255.255,op=1/0xff," "sha=00:50:56:4f:dc:3b/00:00:00:00:00:00," "tha=00:00:00:00:00:00/00:00:00:00:00:00), " "packets:1, bytes:120, used:0.004s, actions:1"] flow_db = FlowDB(True) flow_db.begin() flow_db.flow_line_add(lines[0]) # Make sure we decay time.sleep(4) self.assertEqual(flow_db.flow_stats_get()["flow_total"], 1) flow_db.decay(1) self.assertEqual(flow_db.flow_stats_get()["flow_total"], 0) flow_db.flow_line_add(lines[0]) self.assertEqual(flow_db.flow_stats_get()["flow_total"], 1) flow_db.decay(30) # Should not be deleted. self.assertEqual(flow_db.flow_stats_get()["flow_total"], 1) flow_db.flow_line_add(lines[0]) self.assertEqual(flow_db.flow_stats_get()["flow_total"], 1) timer = decay_timer_start(flow_db, 2) time.sleep(10) self.assertEqual(flow_db.flow_stats_get()["flow_total"], 0) timer.stop() def test_accumulate(self): """ test_accumulate test that FlowDB supports accumulate. """ lines = ["in_port(1),eth(src=00:50:56:4f:dc:3b," "dst=ff:ff:ff:ff:ff:ff)," "eth_type(0x0806),arp(sip=10.24.105.107/255.255.255.255," "tip=10.24.104.230/255.255.255.255,op=1/0xff," "sha=00:50:56:4f:dc:3b/00:00:00:00:00:00," "tha=00:00:00:00:00:00/00:00:00:00:00:00), " "packets:1, bytes:120, used:0.004s, actions:1", "in_port(2)," "eth(src=68:ef:bd:25:ef:c0,dst=33:33:00:00:00:66)," "eth_type(0x86dd),ipv6(src=fe80::6aef:bdff:fe25:efc0/::," "dst=ff02::66/::,label=0/0,proto=17/0xff,tclass=0xe0/0," "hlimit=255/0,frag=no/0),udp(src=2029,dst=2029), " "packets:2, bytes:5026, used:0.348s, actions:1", "in_port(1),eth(src=ee:ee:ee:ee:ee:ee," "dst=ff:ff:ff:ff:ff:ff)," "eth_type(0x0806),arp(sip=10.24.105.107/255.255.255.255," "tip=10.24.104.230/255.255.255.255,op=1/0xff," "sha=00:50:56:4f:dc:3b/00:00:00:00:00:00," "tha=00:00:00:00:00:00/00:00:00:00:00:00), packets:2, " "bytes:240, used:0.004s, actions:1"] lines = [ "in_port(1),eth_type(0x0806), packets:1, bytes:120, actions:1", "in_port(2),eth_type(0x0806), packets:2, bytes:126, actions:1", "in_port(1),eth_type(0x0806), packets:2, bytes:240, actions:1", "in_port(1),eth_type(0x0800), packets:1, bytes:120, actions:1", "in_port(1),eth_type(0x0800), packets:2, bytes:240, actions:1", "in_port(1),eth_type(0x0806), packets:1, bytes:120, actions:1", ] # Turn on accumulate. flow_db = FlowDB(True) flow_db.begin() flow_db.flow_line_add(lines[0]) # Test one flow exist. sum_values = flow_db.field_values_in_order("all", 1) in_ports = [ii for ii in sum_values if (repr(ii) == "in_port(1)")] self.assertEqual(len(in_ports), 1) self.assertEqual(in_ports[0].packets, 1) self.assertEqual(in_ports[0].bytes, 120) self.assertEqual(in_ports[0].count, 1) # simulate another sample # Test two different flows exist. flow_db.begin() flow_db.flow_line_add(lines[1]) sum_values = flow_db.field_values_in_order("all", 1) in_ports = [ii for ii in sum_values if (repr(ii) == "in_port(1)")] self.assertEqual(len(in_ports), 1) self.assertEqual(in_ports[0].packets, 1) self.assertEqual(in_ports[0].bytes, 120) self.assertEqual(in_ports[0].count, 1) in_ports = [ii for ii in sum_values if (repr(ii) == "in_port(2)")] self.assertEqual(len(in_ports), 1) self.assertEqual(in_ports[0].packets, 2) self.assertEqual(in_ports[0].bytes, 126) self.assertEqual(in_ports[0].count, 1) # Test first flow increments packets. flow_db.begin() flow_db.flow_line_add(lines[2]) sum_values = flow_db.field_values_in_order("all", 1) in_ports = [ii for ii in sum_values if (repr(ii) == "in_port(1)")] self.assertEqual(len(in_ports), 1) self.assertEqual(in_ports[0].packets, 2) self.assertEqual(in_ports[0].bytes, 240) self.assertEqual(in_ports[0].count, 1) in_ports = [ii for ii in sum_values if (repr(ii) == "in_port(2)")] self.assertEqual(len(in_ports), 1) self.assertEqual(in_ports[0].packets, 2) self.assertEqual(in_ports[0].bytes, 126) self.assertEqual(in_ports[0].count, 1) # Test third flow but with the same in_port(1) as the first flow. flow_db.begin() flow_db.flow_line_add(lines[3]) sum_values = flow_db.field_values_in_order("all", 1) in_ports = [ii for ii in sum_values if (repr(ii) == "in_port(1)")] self.assertEqual(len(in_ports), 1) self.assertEqual(in_ports[0].packets, 3) self.assertEqual(in_ports[0].bytes, 360) self.assertEqual(in_ports[0].count, 2) in_ports = [ii for ii in sum_values if (repr(ii) == "in_port(2)")] self.assertEqual(len(in_ports), 1) self.assertEqual(in_ports[0].packets, 2) self.assertEqual(in_ports[0].bytes, 126) self.assertEqual(in_ports[0].count, 1) # Third flow has changes. flow_db.begin() flow_db.flow_line_add(lines[4]) sum_values = flow_db.field_values_in_order("all", 1) in_ports = [ii for ii in sum_values if (repr(ii) == "in_port(1)")] self.assertEqual(len(in_ports), 1) self.assertEqual(in_ports[0].packets, 4) self.assertEqual(in_ports[0].bytes, 480) self.assertEqual(in_ports[0].count, 2) in_ports = [ii for ii in sum_values if (repr(ii) == "in_port(2)")] self.assertEqual(len(in_ports), 1) self.assertEqual(in_ports[0].packets, 2) self.assertEqual(in_ports[0].bytes, 126) self.assertEqual(in_ports[0].count, 1) # First flow reset. flow_db.begin() flow_db.flow_line_add(lines[5]) sum_values = flow_db.field_values_in_order("all", 1) in_ports = [ii for ii in sum_values if (repr(ii) == "in_port(1)")] self.assertEqual(len(in_ports), 1) self.assertEqual(in_ports[0].packets, 3) self.assertEqual(in_ports[0].bytes, 360) self.assertEqual(in_ports[0].count, 2) in_ports = [ii for ii in sum_values if (repr(ii) == "in_port(2)")] self.assertEqual(len(in_ports), 1) self.assertEqual(in_ports[0].packets, 2) self.assertEqual(in_ports[0].bytes, 126) self.assertEqual(in_ports[0].count, 1) def test_parse_character_errors(self): """ test_parsing errors. The flow parses is purposely loose. Its not designed to validate input. Merely pull out what it can but there are situations that a parse error can be detected. """ lines = ["complete garbage", "in_port(2),eth(src=68:ef:bd:25:ef:c0," "dst=33:33:00:00:00:66)," "eth_type(0x86dd),ipv6(src=fe80::6aef:bdff:fe25:efc0/::," "dst=ff02::66/::,label=0/0,proto=17/0xff,tclass=0xe0/0," "hlimit=255/0,frag=no/0),udp(src=2029,dst=2029)," "packets:2,bytes:5026,actions:1"] flow_db = FlowDB(False) flow_db.begin() for line in lines: try: flow_db.flow_line_add(line) except ValueError: # We want an exception. That is how we know we have # correctly found a simple parsing error. We are not # looking to validate flow output just catch simple issues. continue self.assertTrue(False) def test_tunnel_parsing(self): """ test_tunnel_parsing test parse flows with tunnel. """ lines = [ "tunnel(tun_id=0x0,src=192.168.1.1,dst=192.168.1.10," "tos=0x0,ttl=64,flags(key)),in_port(1)," "eth(src=9e:40:f5:ef:ec:ee,dst=01:23:20:00:00:30)," "eth_type(0x8902), packets:6, bytes:534, used:0.128s, " "actions:userspace(pid=4294962691,slow_path(cfm))" ] flow_db = FlowDB(False) flow_db.begin() flow_db.flow_line_add(lines[0]) sum_values = flow_db.field_values_in_order("all", 1) in_ports = [ii for ii in sum_values if (repr(ii) == "in_port(1)")] self.assertEqual(len(in_ports), 1) self.assertEqual(in_ports[0].packets, 6) self.assertEqual(in_ports[0].bytes, 534) self.assertEqual(in_ports[0].count, 1) def test_flow_multiple_paren(self): """ test_flow_multiple_paren. """ line = "tunnel(tun_id=0x0,src=192.168.1.1,flags(key)),in_port(2)" valid = ["tunnel(tun_id=0x0,src=192.168.1.1,flags(key))", "in_port(2)"] rc = flow_line_iter(line) self.assertEqual(valid, rc) def test_to_network(self): """ test_to_network test ipv4_to_network and ipv6_to_network. """ ipv4s = [ ("192.168.0.1", "192.168.0.1"), ("192.168.0.1/255.255.255.255", "192.168.0.1"), ("192.168.0.1/255.255.255.0", "192.168.0.0"), ("192.168.0.1/255.255.0.0", "192.168.0.0"), ("192.168.0.1/255.0.0.0", "192.0.0.0"), ("192.168.0.1/0.0.0.0", "0.0.0.0"), ("10.24.106.230/255.255.255.255", "10.24.106.230"), ("10.24.106.230/255.255.255.0", "10.24.106.0"), ("10.24.106.0/255.255.255.0", "10.24.106.0"), ("10.24.106.0/255.255.252.0", "10.24.104.0") ] ipv6s = [ ("1::192:168:0:1", "1::192:168:0:1"), ("1::192:168:0:1/1::ffff:ffff:ffff:ffff", "1::192:168:0:1"), ("1::192:168:0:1/1::ffff:ffff:ffff:0", "1::192:168:0:0"), ("1::192:168:0:1/1::ffff:ffff:0:0", "1::192:168:0:0"), ("1::192:168:0:1/1::ffff:0:0:0", "1::192:0:0:0"), ("1::192:168:0:1/1::0:0:0:0", "1::"), ("1::192:168:0:1/::", "::") ] for (ipv4_test, ipv4_check) in ipv4s: self.assertEqual(ipv4_to_network(ipv4_test), ipv4_check) for (ipv6_test, ipv6_check) in ipv6s: self.assertEqual(ipv6_to_network(ipv6_test), ipv6_check) openvswitch-2.0.1+git20140120/utilities/ovs-dpctl.8.in000066400000000000000000000155631226605124000221100ustar00rootroot00000000000000.de IQ . br . ns . IP "\\$1" .. .TH ovs\-dpctl 8 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" .ds PN ovs\-dpctl . .SH NAME ovs\-dpctl \- administer Open vSwitch datapaths . .SH SYNOPSIS .B ovs\-dpctl [\fIoptions\fR] \fIcommand \fR[\fIswitch\fR] [\fIargs\fR\&...] . .SH DESCRIPTION .PP The \fBovs\-dpctl\fR program can create, modify, and delete Open vSwitch datapaths. A single machine may host any number of datapaths. .PP A newly created datapath is associated with only one network device, a virtual network device sometimes called the datapath's ``local port''. A newly created datapath is not, however, associated with any of the host's other network devices. To intercept and process traffic on a given network device, use the \fBadd\-if\fR command to explicitly add that network device to the datapath. .PP If \fBovs\-vswitchd\fR(8) is in use, use \fBovs\-vsctl\fR(8) instead of \fBovs\-dpctl\fR. .PP Most \fBovs\-dpctl\fR commands that work with datapaths take an argument that specifies the name of the datapath. Datapath names take the form [\fItype\fB@\fR]\fIname\fR, where \fIname\fR is the network device associated with the datapath's local port. If \fItype\fR is given, it specifies the datapath provider of \fIname\fR, otherwise the default provider \fBsystem\fR is assumed. .PP The following commands manage datapaths. . .TP \fBadd\-dp \fIdp\fR [\fInetdev\fR[\fB,\fIoption\fR]...] Creates datapath \fIdp\fR, with a local port also named \fIdp\fR. This will fail if a network device \fIdp\fR already exists. .IP If \fInetdev\fRs are specified, \fBovs\-dpctl\fR adds them to the new datapath, just as if \fBadd\-if\fR was specified. . .TP \fBdel\-dp \fIdp\fR Deletes datapath \fIdp\fR. If \fIdp\fR is associated with any network devices, they are automatically removed. . .TP \fBadd\-if \fIdp netdev\fR[\fB,\fIoption\fR]... Adds each \fInetdev\fR to the set of network devices datapath \fIdp\fR monitors, where \fIdp\fR is the name of an existing datapath, and \fInetdev\fR is the name of one of the host's network devices, e.g. \fBeth0\fR. Once a network device has been added to a datapath, the datapath has complete ownership of the network device's traffic and the network device appears silent to the rest of the system. .IP A \fInetdev\fR may be followed by a comma-separated list of options. The following options are currently supported: . .RS .IP "\fBtype=\fItype\fR" Specifies the type of port to add. The default type is \fBsystem\fR. .IP "\fBport_no=\fIport\fR" Requests a specific port number within the datapath. If this option is not specified then one will be automatically assigned. .IP "\fIkey\fB=\fIvalue\fR" Adds an arbitrary key-value option to the port's configuration. .RE .IP \fBovs\-vswitchd.conf.db\fR(5) documents the available port types and options. . .IP "\fBset\-if \fIdp port\fR[\fB,\fIoption\fR]..." Reconfigures each \fIport\fR in \fIdp\fR as specified. An \fIoption\fR of the form \fIkey\fB=\fIvalue\fR adds the specified key-value option to the port or overrides an existing key's value. An \fIoption\fR of the form \fIkey\fB=\fR, that is, without a value, deletes the key-value named \fIkey\fR. The type and port number of a port cannot be changed, so \fBtype\fR and \fBport_no\fR are only allowed if they match the existing configuration. .TP \fBdel\-if \fIdp netdev\fR... Removes each \fInetdev\fR from the list of network devices datapath \fIdp\fR monitors. . .TP \fBdump\-dps\fR Prints the name of each configured datapath on a separate line. . .TP [\fB\-s\fR | \fB\-\-statistics\fR] \fBshow \fR[\fIdp\fR...] Prints a summary of configured datapaths, including their datapath numbers and a list of ports connected to each datapath. (The local port is identified as port 0.) If \fB\-s\fR or \fB\-\-statistics\fR is specified, then packet and byte counters are also printed for each port. .IP If one or more datapaths are specified, information on only those datapaths are displayed. Otherwise, \fBovs\-dpctl\fR displays information about all configured datapaths. .SS "DEBUGGING COMMANDS" The following commands are primarily useful for debugging Open vSwitch. The flow table entries (both matches and actions) that they work with are not OpenFlow flow entries. Instead, they are different and considerably simpler flows maintained by the Open vSwitch kernel module. Use \fBovs\-ofctl\fR(8), instead, to work with OpenFlow flow entries. . .PP The \fIdp\fR argument to each of these commands is optional when exactly one datapath exists, in which case that datapath is the default. When multiple datapaths exist, then a datapath name is required. . .IP "[\fB\-m \fR| \fB\-\-more\fR] \fBdump\-flows\fR [\fIdp\fR]" Prints to the console all flow entries in datapath \fIdp\fR's flow table. Without \fB\-m\fR or \fB\-\-more\fR, output omits match fields that a flow wildcards entirely; with \fB\-m\fR or \fB\-\-more\fR, output includes all wildcarded fields. . .IP "\fBadd\-flow\fR [\fIdp\fR] \fIflow actions\fR" .IQ "[\fB\-\-clear\fR] [\fB\-\-may-create\fR] [\fB\-s\fR | \fB\-\-statistics\fR] \fBmod\-flow\fR [\fIdp\fR] \fIflow actions\fR" Adds or modifies a flow in \fIdp\fR's flow table that, when a packet matching \fIflow\fR arrives, causes \fIactions\fR to be executed. .IP The \fBadd\-flow\fR command succeeds only if \fIflow\fR does not already exist in \fIdp\fR. Contrariwise, \fBmod\-flow\fR without \fB\-\-may\-create\fR only modifies the actions for an existing flow. With \fB\-\-may\-create\fR, \fBmod\-flow\fR will add a new flow or modify an existing one. .IP If \fB\-s\fR or \fB\-\-statistics\fR is specified, then \fBmod\-flows\fR prints the modified flow's statistics. A flow's statistics are the number of packets and bytes that have passed through the flow, the elapsed time since the flow last processed a packet (if ever), and (for TCP flows) the union of the TCP flags processed through the flow. .IP With \fB\-\-clear\fR, \fBmod\-flows\fR zeros out the flow's statistics. The statistics printed if \fB\-s\fR or \fB\-\-statistics\fR is also specified are those from just before clearing the statistics. . .IP "[\fB\-s\fR | \fB\-\-statistics\fR] \fBdel\-flow\fR [\fIdp\fR] \fIflow\fR" Deletes the flow from \fIdp\fR's flow table that matches \fIflow\fR. If \fB\-s\fR or \fB\-\-statistics\fR is specified, then \fBmod\-flows\fR prints the deleted flow's statistics. . .IP "\fBdel\-flows\fR [\fIdp\fR]" Deletes all flow entries from datapath \fIdp\fR's flow table. . .SH OPTIONS .IP "\fB\-s\fR" .IQ "\fB\-\-statistics\fR" Causes the \fBshow\fR command to print packet and byte counters for each port within the datapaths that it shows. . .IP "\fB\-m\fR" .IQ "\fB\-\-more\fR" Increases the verbosity of \fBdump\-flows\fR output. . .IP "\fB\-t\fR" .IQ "\fB\-\-timeout=\fIsecs\fR" Limits \fBovs\-dpctl\fR runtime to approximately \fIsecs\fR seconds. If the timeout expires, \fBovs\-dpctl\fR will exit with a \fBSIGALRM\fR signal. . .so lib/vlog.man .so lib/common.man . .SH "SEE ALSO" . .BR ovs\-appctl (8), .BR ovs\-vswitchd (8) openvswitch-2.0.1+git20140120/utilities/ovs-dpctl.c000066400000000000000000000767201226605124000215600ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "command-line.h" #include "compiler.h" #include "dirs.h" #include "dpif.h" #include "dynamic-string.h" #include "flow.h" #include "netdev.h" #include "netlink.h" #include "odp-util.h" #include "ofpbuf.h" #include "packets.h" #include "shash.h" #include "simap.h" #include "smap.h" #include "sset.h" #include "timeval.h" #include "util.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(dpctl); /* -s, --statistics: Print port/flow statistics? */ static bool print_statistics; /* --clear: Reset existing statistics to zero when modifying a flow? */ static bool zero_statistics; /* --may-create: Allow mod-flows command to create a new flow? */ static bool may_create; /* -m, --more: Output verbosity. * * So far only undocumented commands honor this option, so we don't document * the option itself. */ static int verbosity; static const struct command *get_all_commands(void); static void usage(void) NO_RETURN; static void parse_options(int argc, char *argv[]); int main(int argc, char *argv[]) { set_program_name(argv[0]); parse_options(argc, argv); signal(SIGPIPE, SIG_IGN); run_command(argc - optind, argv + optind, get_all_commands()); return 0; } static void parse_options(int argc, char *argv[]) { enum { OPT_CLEAR = UCHAR_MAX + 1, OPT_MAY_CREATE, VLOG_OPTION_ENUMS }; static const struct option long_options[] = { {"statistics", no_argument, NULL, 's'}, {"clear", no_argument, NULL, OPT_CLEAR}, {"may-create", no_argument, NULL, OPT_MAY_CREATE}, {"more", no_argument, NULL, 'm'}, {"timeout", required_argument, NULL, 't'}, {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'V'}, VLOG_LONG_OPTIONS, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); for (;;) { unsigned long int timeout; int c; c = getopt_long(argc, argv, short_options, long_options, NULL); if (c == -1) { break; } switch (c) { case 's': print_statistics = true; break; case OPT_CLEAR: zero_statistics = true; break; case OPT_MAY_CREATE: may_create = true; break; case 'm': verbosity++; break; case 't': timeout = strtoul(optarg, NULL, 10); if (timeout <= 0) { ovs_fatal(0, "value %s on -t or --timeout is not at least 1", optarg); } else { time_alarm(timeout); } break; case 'h': usage(); case 'V': ovs_print_version(0, 0); exit(EXIT_SUCCESS); VLOG_OPTION_HANDLERS case '?': exit(EXIT_FAILURE); default: abort(); } } free(short_options); } static void usage(void) { printf("%s: Open vSwitch datapath management utility\n" "usage: %s [OPTIONS] COMMAND [ARG...]\n" " add-dp DP [IFACE...] add new datapath DP (with IFACEs)\n" " del-dp DP delete local datapath DP\n" " add-if DP IFACE... add each IFACE as a port on DP\n" " set-if DP IFACE... reconfigure each IFACE within DP\n" " del-if DP IFACE... delete each IFACE from DP\n" " dump-dps display names of all datapaths\n" " show show basic info on all datapaths\n" " show DP... show basic info on each DP\n" " dump-flows DP display flows in DP\n" " add-flow DP FLOW ACTIONS add FLOW with ACTIONS to DP\n" " mod-flow DP FLOW ACTIONS change FLOW actions to ACTIONS in DP\n" " del-flow DP FLOW delete FLOW from DP\n" " del-flows DP delete all flows from DP\n" "Each IFACE on add-dp, add-if, and set-if may be followed by\n" "comma-separated options. See ovs-dpctl(8) for syntax, or the\n" "Interface table in ovs-vswitchd.conf.db(5) for an options list.\n", program_name, program_name); vlog_usage(); printf("\nOptions for show and mod-flow:\n" " -s, --statistics print statistics for port or flow\n" "\nOptions for dump-flows:\n" " -m, --more increase verbosity of output\n" "\nOptions for mod-flow:\n" " --may-create create flow if it doesn't exist\n" " --clear reset existing stats to zero\n" "\nOther options:\n" " -t, --timeout=SECS give up after SECS seconds\n" " -h, --help display this help message\n" " -V, --version display version information\n"); exit(EXIT_SUCCESS); } static void run(int retval, const char *message, ...) PRINTF_FORMAT(2, 3); static void run(int retval, const char *message, ...) { if (retval) { va_list args; va_start(args, message); ovs_fatal_valist(retval, message, args); } } static void dpctl_add_if(int argc, char *argv[]); static int if_up(const char *netdev_name) { struct netdev *netdev; int retval; retval = netdev_open(netdev_name, "system", &netdev); if (!retval) { retval = netdev_turn_flags_on(netdev, NETDEV_UP, NULL); netdev_close(netdev); } return retval; } /* Retrieve the name of the datapath if exactly one exists. The caller * is responsible for freeing the returned string. If there is not one * datapath, aborts with an error message. */ static char * get_one_dp(void) { struct sset types; const char *type; char *dp_name = NULL; size_t count = 0; sset_init(&types); dp_enumerate_types(&types); SSET_FOR_EACH (type, &types) { struct sset names; sset_init(&names); if (!dp_enumerate_names(type, &names)) { count += sset_count(&names); if (!dp_name && count == 1) { dp_name = xasprintf("%s@%s", type, SSET_FIRST(&names)); } } sset_destroy(&names); } sset_destroy(&types); if (!count) { ovs_fatal(0, "no datapaths exist"); } else if (count > 1) { ovs_fatal(0, "multiple datapaths, specify one"); } return dp_name; } static int parsed_dpif_open(const char *arg_, bool create, struct dpif **dpifp) { int result; char *name, *type; dp_parse_name(arg_, &name, &type); if (create) { result = dpif_create(name, type, dpifp); } else { result = dpif_open(name, type, dpifp); } free(name); free(type); return result; } static void dpctl_add_dp(int argc OVS_UNUSED, char *argv[]) { struct dpif *dpif; run(parsed_dpif_open(argv[1], true, &dpif), "add_dp"); dpif_close(dpif); if (argc > 2) { dpctl_add_if(argc, argv); } } static void dpctl_del_dp(int argc OVS_UNUSED, char *argv[]) { struct dpif *dpif; run(parsed_dpif_open(argv[1], false, &dpif), "opening datapath"); run(dpif_delete(dpif), "del_dp"); dpif_close(dpif); } static void dpctl_add_if(int argc OVS_UNUSED, char *argv[]) { bool failure = false; struct dpif *dpif; int i; run(parsed_dpif_open(argv[1], false, &dpif), "opening datapath"); for (i = 2; i < argc; i++) { const char *name, *type; char *save_ptr = NULL; struct netdev *netdev = NULL; struct smap args; odp_port_t port_no = ODPP_NONE; char *option; int error; name = strtok_r(argv[i], ",", &save_ptr); type = "system"; if (!name) { ovs_error(0, "%s is not a valid network device name", argv[i]); failure = true; continue; } smap_init(&args); while ((option = strtok_r(NULL, ",", &save_ptr)) != NULL) { char *save_ptr_2 = NULL; char *key, *value; key = strtok_r(option, "=", &save_ptr_2); value = strtok_r(NULL, "", &save_ptr_2); if (!value) { value = ""; } if (!strcmp(key, "type")) { type = value; } else if (!strcmp(key, "port_no")) { port_no = u32_to_odp(atoi(value)); } else if (!smap_add_once(&args, key, value)) { ovs_error(0, "duplicate \"%s\" option", key); } } error = netdev_open(name, type, &netdev); if (error) { ovs_error(error, "%s: failed to open network device", name); goto next; } error = netdev_set_config(netdev, &args); if (error) { ovs_error(error, "%s: failed to configure network device", name); goto next; } error = dpif_port_add(dpif, netdev, &port_no); if (error) { ovs_error(error, "adding %s to %s failed", name, argv[1]); goto next; } error = if_up(name); next: netdev_close(netdev); if (error) { failure = true; } } dpif_close(dpif); if (failure) { exit(EXIT_FAILURE); } } static void dpctl_set_if(int argc, char *argv[]) { bool failure = false; struct dpif *dpif; int i; run(parsed_dpif_open(argv[1], false, &dpif), "opening datapath"); for (i = 2; i < argc; i++) { struct netdev *netdev = NULL; struct dpif_port dpif_port; char *save_ptr = NULL; char *type = NULL; const char *name; struct smap args; odp_port_t port_no; char *option; int error; name = strtok_r(argv[i], ",", &save_ptr); if (!name) { ovs_error(0, "%s is not a valid network device name", argv[i]); failure = true; continue; } /* Get the port's type from the datapath. */ error = dpif_port_query_by_name(dpif, name, &dpif_port); if (error) { ovs_error(error, "%s: failed to query port in %s", name, argv[1]); goto next; } type = xstrdup(dpif_port.type); port_no = dpif_port.port_no; dpif_port_destroy(&dpif_port); /* Retrieve its existing configuration. */ error = netdev_open(name, type, &netdev); if (error) { ovs_error(error, "%s: failed to open network device", name); goto next; } smap_init(&args); error = netdev_get_config(netdev, &args); if (error) { ovs_error(error, "%s: failed to fetch configuration", name); goto next; } /* Parse changes to configuration. */ while ((option = strtok_r(NULL, ",", &save_ptr)) != NULL) { char *save_ptr_2 = NULL; char *key, *value; key = strtok_r(option, "=", &save_ptr_2); value = strtok_r(NULL, "", &save_ptr_2); if (!value) { value = ""; } if (!strcmp(key, "type")) { if (strcmp(value, type)) { ovs_error(0, "%s: can't change type from %s to %s", name, type, value); failure = true; } } else if (!strcmp(key, "port_no")) { if (port_no != u32_to_odp(atoi(value))) { ovs_error(0, "%s: can't change port number from " "%"PRIu32" to %d", name, port_no, atoi(value)); failure = true; } } else if (value[0] == '\0') { smap_remove(&args, key); } else { smap_replace(&args, key, value); } } /* Update configuration. */ error = netdev_set_config(netdev, &args); smap_destroy(&args); if (error) { ovs_error(error, "%s: failed to configure network device", name); goto next; } next: free(type); netdev_close(netdev); if (error) { failure = true; } } dpif_close(dpif); if (failure) { exit(EXIT_FAILURE); } } static bool get_port_number(struct dpif *dpif, const char *name, odp_port_t *port) { struct dpif_port dpif_port; if (!dpif_port_query_by_name(dpif, name, &dpif_port)) { *port = dpif_port.port_no; dpif_port_destroy(&dpif_port); return true; } else { ovs_error(0, "no port named %s", name); return false; } } static void dpctl_del_if(int argc OVS_UNUSED, char *argv[]) { bool failure = false; struct dpif *dpif; int i; run(parsed_dpif_open(argv[1], false, &dpif), "opening datapath"); for (i = 2; i < argc; i++) { const char *name = argv[i]; odp_port_t port; int error; if (!name[strspn(name, "0123456789")]) { port = u32_to_odp(atoi(name)); } else if (!get_port_number(dpif, name, &port)) { failure = true; continue; } error = dpif_port_del(dpif, port); if (error) { ovs_error(error, "deleting port %s from %s failed", name, argv[1]); failure = true; } } dpif_close(dpif); if (failure) { exit(EXIT_FAILURE); } } static void print_stat(const char *leader, uint64_t value) { fputs(leader, stdout); if (value != UINT64_MAX) { printf("%"PRIu64, value); } else { putchar('?'); } } static void print_human_size(uint64_t value) { if (value == UINT64_MAX) { /* Nothing to do. */ } else if (value >= 1024ULL * 1024 * 1024 * 1024) { printf(" (%.1f TiB)", value / (1024.0 * 1024 * 1024 * 1024)); } else if (value >= 1024ULL * 1024 * 1024) { printf(" (%.1f GiB)", value / (1024.0 * 1024 * 1024)); } else if (value >= 1024ULL * 1024) { printf(" (%.1f MiB)", value / (1024.0 * 1024)); } else if (value >= 1024) { printf(" (%.1f KiB)", value / 1024.0); } } static void show_dpif(struct dpif *dpif) { struct dpif_port_dump dump; struct dpif_port dpif_port; struct dpif_dp_stats stats; struct netdev *netdev; printf("%s:\n", dpif_name(dpif)); if (!dpif_get_dp_stats(dpif, &stats)) { printf("\tlookups: hit:%"PRIu64" missed:%"PRIu64" lost:%"PRIu64"\n" "\tflows: %"PRIu64"\n", stats.n_hit, stats.n_missed, stats.n_lost, stats.n_flows); } DPIF_PORT_FOR_EACH (&dpif_port, &dump, dpif) { printf("\tport %u: %s", dpif_port.port_no, dpif_port.name); if (strcmp(dpif_port.type, "system")) { int error; printf (" (%s", dpif_port.type); error = netdev_open(dpif_port.name, dpif_port.type, &netdev); if (!error) { struct smap config; smap_init(&config); error = netdev_get_config(netdev, &config); if (!error) { const struct smap_node **nodes; size_t i; nodes = smap_sort(&config); for (i = 0; i < smap_count(&config); i++) { const struct smap_node *node = nodes[i]; printf("%c %s=%s", i ? ',' : ':', node->key, node->value); } free(nodes); } else { printf(", could not retrieve configuration (%s)", ovs_strerror(error)); } smap_destroy(&config); netdev_close(netdev); } else { printf(": open failed (%s)", ovs_strerror(error)); } putchar(')'); } putchar('\n'); if (print_statistics) { struct netdev_stats s; int error; error = netdev_open(dpif_port.name, dpif_port.type, &netdev); if (error) { printf(", open failed (%s)", ovs_strerror(error)); continue; } error = netdev_get_stats(netdev, &s); if (error) { printf(", could not retrieve stats (%s)", ovs_strerror(error)); continue; } netdev_close(netdev); print_stat("\t\tRX packets:", s.rx_packets); print_stat(" errors:", s.rx_errors); print_stat(" dropped:", s.rx_dropped); print_stat(" overruns:", s.rx_over_errors); print_stat(" frame:", s.rx_frame_errors); printf("\n"); print_stat("\t\tTX packets:", s.tx_packets); print_stat(" errors:", s.tx_errors); print_stat(" dropped:", s.tx_dropped); print_stat(" aborted:", s.tx_aborted_errors); print_stat(" carrier:", s.tx_carrier_errors); printf("\n"); print_stat("\t\tcollisions:", s.collisions); printf("\n"); print_stat("\t\tRX bytes:", s.rx_bytes); print_human_size(s.rx_bytes); print_stat(" TX bytes:", s.tx_bytes); print_human_size(s.tx_bytes); printf("\n"); } } dpif_close(dpif); } static void dpctl_show(int argc, char *argv[]) { bool failure = false; if (argc > 1) { int i; for (i = 1; i < argc; i++) { const char *name = argv[i]; struct dpif *dpif; int error; error = parsed_dpif_open(name, false, &dpif); if (!error) { show_dpif(dpif); } else { ovs_error(error, "opening datapath %s failed", name); failure = true; } } } else { struct sset types; const char *type; sset_init(&types); dp_enumerate_types(&types); SSET_FOR_EACH (type, &types) { struct sset names; const char *name; sset_init(&names); if (dp_enumerate_names(type, &names)) { failure = true; continue; } SSET_FOR_EACH (name, &names) { struct dpif *dpif; int error; error = dpif_open(name, type, &dpif); if (!error) { show_dpif(dpif); } else { ovs_error(error, "opening datapath %s failed", name); failure = true; } } sset_destroy(&names); } sset_destroy(&types); } if (failure) { exit(EXIT_FAILURE); } } static void dpctl_dump_dps(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { struct sset dpif_names, dpif_types; const char *type; int error = 0; sset_init(&dpif_names); sset_init(&dpif_types); dp_enumerate_types(&dpif_types); SSET_FOR_EACH (type, &dpif_types) { const char *name; int retval; retval = dp_enumerate_names(type, &dpif_names); if (retval) { error = retval; } SSET_FOR_EACH (name, &dpif_names) { struct dpif *dpif; if (!dpif_open(name, type, &dpif)) { printf("%s\n", dpif_name(dpif)); dpif_close(dpif); } } } sset_destroy(&dpif_names); sset_destroy(&dpif_types); if (error) { exit(EXIT_FAILURE); } } static void dpctl_dump_flows(int argc, char *argv[]) { const struct dpif_flow_stats *stats; const struct nlattr *actions; struct dpif_flow_dump dump; const struct nlattr *key; const struct nlattr *mask; size_t actions_len; struct dpif *dpif; size_t key_len; size_t mask_len; struct ds ds; char *name; name = (argc == 2) ? xstrdup(argv[1]) : get_one_dp(); run(parsed_dpif_open(name, false, &dpif), "opening datapath"); free(name); ds_init(&ds); dpif_flow_dump_start(&dump, dpif); while (dpif_flow_dump_next(&dump, &key, &key_len, &mask, &mask_len, &actions, &actions_len, &stats)) { ds_clear(&ds); odp_flow_format(key, key_len, mask, mask_len, &ds, verbosity); ds_put_cstr(&ds, ", "); dpif_flow_stats_format(stats, &ds); ds_put_cstr(&ds, ", actions:"); format_odp_actions(&ds, actions, actions_len); printf("%s\n", ds_cstr(&ds)); } dpif_flow_dump_done(&dump); ds_destroy(&ds); dpif_close(dpif); } static void dpctl_put_flow(int argc, char *argv[], enum dpif_flow_put_flags flags) { const char *key_s = argv[argc - 2]; const char *actions_s = argv[argc - 1]; struct dpif_flow_stats stats; struct ofpbuf actions; struct ofpbuf key; struct ofpbuf mask; struct dpif *dpif; struct ds s; char *dp_name; ds_init(&s); ofpbuf_init(&key, 0); ofpbuf_init(&mask, 0); run(odp_flow_from_string(key_s, NULL, &key, &mask), "parsing flow key"); ofpbuf_init(&actions, 0); run(odp_actions_from_string(actions_s, NULL, &actions), "parsing actions"); dp_name = argc == 4 ? xstrdup(argv[1]) : get_one_dp(); run(parsed_dpif_open(dp_name, false, &dpif), "opening datapath"); free(dp_name); run(dpif_flow_put(dpif, flags, key.data, key.size, mask.size == 0 ? NULL : mask.data, mask.size, actions.data, actions.size, print_statistics ? &stats : NULL), "updating flow table"); ofpbuf_uninit(&key); ofpbuf_uninit(&mask); ofpbuf_uninit(&actions); if (print_statistics) { struct ds s; ds_init(&s); dpif_flow_stats_format(&stats, &s); puts(ds_cstr(&s)); ds_destroy(&s); } } static void dpctl_add_flow(int argc, char *argv[]) { dpctl_put_flow(argc, argv, DPIF_FP_CREATE); } static void dpctl_mod_flow(int argc OVS_UNUSED, char *argv[]) { enum dpif_flow_put_flags flags; flags = DPIF_FP_MODIFY; if (may_create) { flags |= DPIF_FP_CREATE; } if (zero_statistics) { flags |= DPIF_FP_ZERO_STATS; } dpctl_put_flow(argc, argv, flags); } static void dpctl_del_flow(int argc, char *argv[]) { const char *key_s = argv[argc - 1]; struct dpif_flow_stats stats; struct ofpbuf key; struct ofpbuf mask; /* To be ignored. */ struct dpif *dpif; char *dp_name; ofpbuf_init(&key, 0); ofpbuf_init(&mask, 0); run(odp_flow_from_string(key_s, NULL, &key, &mask), "parsing flow key"); dp_name = argc == 2 ? xstrdup(argv[1]) : get_one_dp(); run(parsed_dpif_open(dp_name, false, &dpif), "opening datapath"); free(dp_name); run(dpif_flow_del(dpif, key.data, key.size, print_statistics ? &stats : NULL), "deleting flow"); ofpbuf_uninit(&key); ofpbuf_uninit(&mask); if (print_statistics) { struct ds s; ds_init(&s); dpif_flow_stats_format(&stats, &s); puts(ds_cstr(&s)); ds_destroy(&s); } } static void dpctl_del_flows(int argc, char *argv[]) { struct dpif *dpif; char *name; name = (argc == 2) ? xstrdup(argv[1]) : get_one_dp(); run(parsed_dpif_open(name, false, &dpif), "opening datapath"); free(name); run(dpif_flow_flush(dpif), "deleting all flows"); dpif_close(dpif); } static void dpctl_help(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { usage(); } /* Undocumented commands for unit testing. */ static void dpctl_parse_actions(int argc, char *argv[]) { int i; for (i = 1; i < argc; i++) { struct ofpbuf actions; struct ds s; ofpbuf_init(&actions, 0); run(odp_actions_from_string(argv[i], NULL, &actions), "odp_actions_from_string"); ds_init(&s); format_odp_actions(&s, actions.data, actions.size); puts(ds_cstr(&s)); ds_destroy(&s); ofpbuf_uninit(&actions); } } struct actions_for_flow { struct hmap_node hmap_node; struct flow flow; struct ofpbuf actions; }; static struct actions_for_flow * get_actions_for_flow(struct hmap *actions_per_flow, const struct flow *flow) { uint32_t hash = flow_hash(flow, 0); struct actions_for_flow *af; HMAP_FOR_EACH_WITH_HASH (af, hmap_node, hash, actions_per_flow) { if (flow_equal(&af->flow, flow)) { return af; } } af = xmalloc(sizeof *af); af->flow = *flow; ofpbuf_init(&af->actions, 0); hmap_insert(actions_per_flow, &af->hmap_node, hash); return af; } static int compare_actions_for_flow(const void *a_, const void *b_) { struct actions_for_flow *const *a = a_; struct actions_for_flow *const *b = b_; return flow_compare_3way(&(*a)->flow, &(*b)->flow); } static int compare_output_actions(const void *a_, const void *b_) { const struct nlattr *a = a_; const struct nlattr *b = b_; uint32_t a_port = nl_attr_get_u32(a); uint32_t b_port = nl_attr_get_u32(b); return a_port < b_port ? -1 : a_port > b_port; } static void sort_output_actions__(struct nlattr *first, struct nlattr *end) { size_t bytes = (uint8_t *) end - (uint8_t *) first; size_t n = bytes / NL_A_U32_SIZE; ovs_assert(bytes % NL_A_U32_SIZE == 0); qsort(first, n, NL_A_U32_SIZE, compare_output_actions); } static void sort_output_actions(struct nlattr *actions, size_t length) { struct nlattr *first_output = NULL; struct nlattr *a; int left; NL_ATTR_FOR_EACH (a, left, actions, length) { if (nl_attr_type(a) == OVS_ACTION_ATTR_OUTPUT) { if (!first_output) { first_output = a; } } else { if (first_output) { sort_output_actions__(first_output, a); first_output = NULL; } } } if (first_output) { uint8_t *end = (uint8_t *) actions + length; sort_output_actions__(first_output, ALIGNED_CAST(struct nlattr *, end)); } } /* usage: "ovs-dpctl normalize-actions FLOW ACTIONS" where FLOW and ACTIONS * have the syntax used by "ovs-dpctl dump-flows". * * This command prints ACTIONS in a format that shows what happens for each * VLAN, independent of the order of the ACTIONS. For example, there is more * than one way to output a packet on VLANs 9 and 11, but this command will * print the same output for any form. * * The idea here generalizes beyond VLANs (e.g. to setting other fields) but * so far the implementation only covers VLANs. */ static void dpctl_normalize_actions(int argc, char *argv[]) { struct simap port_names; struct ofpbuf keybuf; struct flow flow; struct ofpbuf odp_actions; struct hmap actions_per_flow; struct actions_for_flow **afs; struct actions_for_flow *af; struct nlattr *a; size_t n_afs; struct ds s; int left; int i; ds_init(&s); simap_init(&port_names); for (i = 3; i < argc; i++) { char name[16]; int number; int n = -1; if (sscanf(argv[i], "%15[^=]=%d%n", name, &number, &n) > 0 && n > 0) { uintptr_t n = number; simap_put(&port_names, name, n); } else { ovs_fatal(0, "%s: expected NAME=NUMBER", argv[i]); } } /* Parse flow key. */ ofpbuf_init(&keybuf, 0); run(odp_flow_from_string(argv[1], &port_names, &keybuf, NULL), "odp_flow_key_from_string"); ds_clear(&s); odp_flow_format(keybuf.data, keybuf.size, NULL, 0, &s, verbosity); printf("input flow: %s\n", ds_cstr(&s)); run(odp_flow_key_to_flow(keybuf.data, keybuf.size, &flow), "odp_flow_key_to_flow"); ofpbuf_uninit(&keybuf); /* Parse actions. */ ofpbuf_init(&odp_actions, 0); run(odp_actions_from_string(argv[2], &port_names, &odp_actions), "odp_actions_from_string"); if (verbosity) { ds_clear(&s); format_odp_actions(&s, odp_actions.data, odp_actions.size); printf("input actions: %s\n", ds_cstr(&s)); } hmap_init(&actions_per_flow); NL_ATTR_FOR_EACH (a, left, odp_actions.data, odp_actions.size) { const struct ovs_action_push_vlan *push; switch(nl_attr_type(a)) { case OVS_ACTION_ATTR_POP_VLAN: flow.vlan_tci = htons(0); continue; case OVS_ACTION_ATTR_PUSH_VLAN: push = nl_attr_get_unspec(a, sizeof *push); flow.vlan_tci = push->vlan_tci; continue; } af = get_actions_for_flow(&actions_per_flow, &flow); nl_msg_put_unspec(&af->actions, nl_attr_type(a), nl_attr_get(a), nl_attr_get_size(a)); } n_afs = hmap_count(&actions_per_flow); afs = xmalloc(n_afs * sizeof *afs); i = 0; HMAP_FOR_EACH (af, hmap_node, &actions_per_flow) { afs[i++] = af; } ovs_assert(i == n_afs); qsort(afs, n_afs, sizeof *afs, compare_actions_for_flow); for (i = 0; i < n_afs; i++) { const struct actions_for_flow *af = afs[i]; sort_output_actions(af->actions.data, af->actions.size); if (af->flow.vlan_tci != htons(0)) { printf("vlan(vid=%"PRIu16",pcp=%d): ", vlan_tci_to_vid(af->flow.vlan_tci), vlan_tci_to_pcp(af->flow.vlan_tci)); } else { printf("no vlan: "); } if (af->flow.mpls_depth) { printf("mpls(label=%"PRIu32",tc=%d,ttl=%d): ", mpls_lse_to_label(af->flow.mpls_lse), mpls_lse_to_tc(af->flow.mpls_lse), mpls_lse_to_ttl(af->flow.mpls_lse)); } else { printf("no mpls: "); } ds_clear(&s); format_odp_actions(&s, af->actions.data, af->actions.size); puts(ds_cstr(&s)); } ds_destroy(&s); } static const struct command all_commands[] = { { "add-dp", 1, INT_MAX, dpctl_add_dp }, { "del-dp", 1, 1, dpctl_del_dp }, { "add-if", 2, INT_MAX, dpctl_add_if }, { "del-if", 2, INT_MAX, dpctl_del_if }, { "set-if", 2, INT_MAX, dpctl_set_if }, { "dump-dps", 0, 0, dpctl_dump_dps }, { "show", 0, INT_MAX, dpctl_show }, { "dump-flows", 0, 1, dpctl_dump_flows }, { "add-flow", 2, 3, dpctl_add_flow }, { "mod-flow", 2, 3, dpctl_mod_flow }, { "del-flow", 1, 2, dpctl_del_flow }, { "del-flows", 0, 1, dpctl_del_flows }, { "help", 0, INT_MAX, dpctl_help }, /* Undocumented commands for testing. */ { "parse-actions", 1, INT_MAX, dpctl_parse_actions }, { "normalize-actions", 2, INT_MAX, dpctl_normalize_actions }, { NULL, 0, 0, NULL }, }; static const struct command *get_all_commands(void) { return all_commands; } openvswitch-2.0.1+git20140120/utilities/ovs-l3ping.8.in000066400000000000000000000110001226605124000221540ustar00rootroot00000000000000.de IQ . br . ns . IP "\\$1" .. .TH ovs\-l3ping 1 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" . .SH NAME \fBovs\-l3ping\fR \- check network deployment for L3 tunneling problems . .SH SYNOPSIS \fBovs\-l3ping\fR \fB\-s\fR \fITunnelRemoteIP,InnerIP[/mask]\fR \fB\-t\fR \fItunnelmode\fR .br \fBovs\-l3ping\fR \fB\-s\fR \fITunnelRemoteIP,InnerIP[/mask][:ControlPort]\fR \fB\-t\fR \fItunnelmode\fR .PP \fBovs\-l3ping\fR \fB\-c\fR \fITunnelRemoteIP,InnerIP[/mask],RemoteInnerIP\fR \fB\-t\fR \fItunnelmode\fR .br \fBovs\-l3ping\fR \fB\-c\fR \fITunnelRemoteIP,InnerIP[/mask][:ControlPort\ [:DataPort]],RemoteInnerIP[:ControlPort[:DataPort]]\fR [\fB\-b\fR \fItargetbandwidth\fR] [\fB\-i\fR \fItestinterval\fR] \fB\-t\fR \fItunnelmode\fR .so lib/common-syn.man . .SH DESCRIPTION The \fBovs\-l3ping\fR program may be used to check for problems that could be caused by invalid routing policy, misconfigured firewall in the tunnel path or a bad NIC driver. On one of the nodes, run \fBovs\-l3ping\fR in server mode and on the other node run it in client mode. The client and server will establish L3 tunnel, over which client will give further testing instructions. The \fBovs\-l3ping\fR client will perform UDP and TCP tests. This tool is different from \fBovs\-test\fR that it encapsulates XML/RPC control connection over the tunnel, so there is no need to open special holes in firewall. .PP UDP tests can report packet loss and achieved bandwidth for various datagram sizes. By default target bandwidth for UDP tests is 1Mbit/s. .PP TCP tests report only achieved bandwidth, because kernel TCP stack takes care of flow control and packet loss. . .SS "Client Mode" An \fBovs\-l3ping\fR client will create a L3 tunnel and connect over it to the \fBovs\-l3ping\fR server to schedule the tests. \fITunnelRemoteIP\fR is the peer's IP address, where tunnel will be terminated. \fIInnerIP\fR is the address that will be temporarily assigned during testing. All test traffic originating from this IP address to the \fIRemoteInnerIP\fR will be tunneled. It is possible to override default \fIControlPort\fR and \fIDataPort\fR, if there is any other application that already listens on those two ports. . .SS "Server Mode" To conduct tests, \fBovs\-l3ping\fR server must be running. It is required that both client and server \fIInnerIP\fR addresses are in the same subnet. It is possible to specify \fIInnerIP\fR with netmask in CIDR format. . .SH OPTIONS One of \fB\-s\fR or \fB\-c\fR is required. The \fB\-t\fR option is also required. . .IP "\fB\-s \fITunnelRemoteIP,InnerIP[/mask][:ControlPort]\fR" .IQ "\fB\-\-server\fR \fITunnelRemoteIP,InnerIP[/mask][:ControlPort]\fR" Run in server mode and create L3 tunnel with the client that will be accepting tunnel at \fITunnelRemoteIP\fR address. The socket on \fIInnerIP[:ControlPort]\fR will be used to receive further instructions from the client. . .IP "\fB\-c \fITunnelRemoteIP,InnerIP[/mask][:ControlPort\ [:DataPort]],RemoteInnerIP[:ControlPort[:DataPort]]\fR" .IQ "\fB\-\-client \fITunnelRemoteIP,InnerIP[/mask][:ControlPort\ [:DataPort]],RemoteInnerIP[:ControlPort[:DataPort]]\fR" Run in client mode and create L3 tunnel with the server on \fITunnelRemoteIP\fR. The client will use \fIInnerIP\fR to generate test traffic with the server's \fIRemoteInnerIP\fR. . .IP "\fB\-b \fItargetbandwidth\fR" .IQ "\fB\-\-bandwidth\fR \fItargetbandwidth\fR" Target bandwidth for UDP tests. The \fItargetbandwidth\fR must be given in bits per second. It is possible to use postfix M or K to alter the target bandwidth magnitude. . .IP "\fB\-i \fItestinterval\fR" .IQ "\fB\-\-interval\fR \fItestinterval\fR" How long each test should run. By default 5 seconds. . .IP "\fB\-t \fItunnelmode\fR" .IQ "\fB\-\-tunnel\-mode\fR \fItunnelmode\fR" Specify the tunnel type. This option must match on server and client. . .so lib/common.man . .SH EXAMPLES .PP On host 192.168.122.220 start \fBovs\-l3ping\fR in server mode. This command will create a temporary GRE tunnel with the host 192.168.122.236 and assign 10.1.1.1/28 as the inner IP address, where client will have to connect: .IP .B ovs\-l3ping -s 192.168.122.236,10.1.1.1/28 -t gre . .PP On host 192.168.122.236 start \fBovs\-l3ping\fR in client mode. This command will use 10.1.1.2/28 as the local inner IP address and will connect over the L3 tunnel to the server's inner IP address at 10.1.1.1. .IP .B ovs\-l3ping -c 192.168.122.220,10.1.1.2/28,10.1.1.1 -t gre . .SH SEE ALSO . .BR ovs\-vswitchd (8), .BR ovs\-ofctl (8), .BR ovs\-vsctl (8), .BR ovs\-vlan\-test (8), .BR ovs\-test (8), .BR ethtool (8), .BR uname (1) openvswitch-2.0.1+git20140120/utilities/ovs-l3ping.in000066400000000000000000000054101226605124000220160ustar00rootroot00000000000000#! @PYTHON@ # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ ovs L3 ping utility allows to do tests between two remote hosts without opening holes in the firewall for the XML RPC control connection. This is achieved by tunneling the control connection inside the tunnel itself. """ import socket import xmlrpclib import ovstest.args as args import ovstest.tests as tests import ovstest.util as util def get_packet_sizes(me, he, remote_ip): """ This function retrieves MTUs from both hosts and returns a list of packet sizes, that are more likely to uncover possible configuration issues. """ mtu_node1 = 1500 mtu_node2 = 1500 server1 = util.rpc_client(me[0], me[1]) server2 = util.rpc_client(he[0], he[1]) iface1 = server2.get_interface(remote_ip) iface2 = server1.get_interface_from_routing_decision(remote_ip) if iface1: mtu_node1 = server2.get_interface_mtu(iface1) if iface2: mtu_node2 = server1.get_interface_mtu(iface2) return util.get_datagram_sizes(mtu_node1, mtu_node2) if __name__ == '__main__': local_server = None try: args = args.l3_initialize_args() tunnel_mode = args.tunnelMode if args.server is not None: # Start in server mode local_server = tests.configure_l3(args.server, tunnel_mode) local_server.wait() elif args.client is not None: # Run in client mode bandwidth = args.targetBandwidth interval = args.testInterval me = (util.ip_from_cidr(args.client[1][0]), args.client[1][1], args.client[1][0], args.client[1][2]) he = (args.client[2][0], args.client[2][1], args.client[2][0], args.client[2][2]) local_server = tests. configure_l3(args.client, tunnel_mode) ps = get_packet_sizes(me, he, args.client[0]) tests.do_direct_tests(me, he, bandwidth, interval, ps) except KeyboardInterrupt: print "Terminating" except xmlrpclib.Fault: print "Couldn't contact peer" except socket.error: print "Couldn't contact peer" except xmlrpclib.ProtocolError: print "XMLRPC control channel was abruptly terminated" finally: if local_server is not None: local_server.terminate() openvswitch-2.0.1+git20140120/utilities/ovs-lib.in000066400000000000000000000162361226605124000214000ustar00rootroot00000000000000# This is a shell function library sourced by some Open vSwitch scripts. # It is not intended to be invoked on its own. # Copyright (C) 2009, 2010, 2011, 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ## ----------------- ## ## configure options ## ## ----------------- ## # All of these should be substituted by the Makefile at build time. logdir=${OVS_LOGDIR-'@LOGDIR@'} # /var/log/openvswitch rundir=${OVS_RUNDIR-'@RUNDIR@'} # /var/run/openvswitch sysconfdir=${OVS_SYSCONFDIR-'@sysconfdir@'} # /etc etcdir=$sysconfdir/openvswitch # /etc/openvswitch datadir=${OVS_PKGDATADIR-'@pkgdatadir@'} # /usr/share/openvswitch bindir=${OVS_BINDIR-'@bindir@'} # /usr/bin sbindir=${OVS_SBINDIR-'@sbindir@'} # /usr/sbin # /etc/openvswitch or /var/lib/openvswitch if test X"$OVS_DBDIR" != X; then dbdir=$OVS_DBDIR elif test X"$OVS_SYSCONFDIR" != X; then dbdir=$OVS_SYSCONFDIR/openvswitch else dbdir='@DBDIR@' fi ovs_ctl_log () { echo "$@" >> "${logdir}/ovs-ctl.log" } ovs_ctl () { case "$@" in *"=strace"*) # In case of running the daemon with strace, piping the o/p causes # the script to block (strace probably does not close the inherited # pipe). So, do not log the o/p to ovs-ctl.log. "${datadir}/scripts/ovs-ctl" "$@" ;; "status") # In case of the command 'status', we should return the exit status # of ovs-ctl. It is also useful to document the o/p in ovs-ctl.log. display=`"${datadir}/scripts/ovs-ctl" "$@" 2>&1` rc=$? echo "${display}" | tee -a "${logdir}/ovs-ctl.log" return ${rc} ;; *) echo "`date -u`:$@" >> "${logdir}/ovs-ctl.log" "${datadir}/scripts/ovs-ctl" "$@" 2>&1 | tee -a "${logdir}/ovs-ctl.log" ;; esac } VERSION='@VERSION@' DAEMON_CWD=/ LC_ALL=C; export LC_ALL ## ------------- ## ## LSB functions ## ## ------------- ## # Use the system's own implementations if it has any. if test -e /etc/init.d/functions; then . /etc/init.d/functions elif test -e /etc/rc.d/init.d/functions; then . /etc/rc.d/init.d/functions elif test -e /lib/lsb/init-functions; then . /lib/lsb/init-functions fi # Implement missing functions (e.g. OpenSUSE lacks 'action'). if type log_success_msg >/dev/null 2>&1; then :; else log_success_msg () { printf '%s.\n' "$*" } fi if type log_failure_msg >/dev/null 2>&1; then :; else log_failure_msg () { printf '%s ... failed!\n' "$*" } fi if type log_warning_msg >/dev/null 2>&1; then :; else log_warning_msg () { printf '%s ... (warning).\n' "$*" } fi if type action >/dev/null 2>&1; then :; else action () { STRING=$1 shift "$@" rc=$? if test $rc = 0; then log_success_msg "$STRING" else log_failure_msg "$STRING" fi return $rc } fi ## ------- ## ## Daemons ## ## ------- ## pid_exists () { # This is better than "kill -0" because it doesn't require permission to # send a signal (so daemon_status in particular works as non-root). test -d /proc/"$1" } start_daemon () { priority=$1 wrapper=$2 shift; shift daemon=$1 strace="" # drop core files in a sensible place test -d "$DAEMON_CWD" || install -d -m 755 -o root -g root "$DAEMON_CWD" set "$@" --no-chdir cd "$DAEMON_CWD" # log file test -d "$logdir" || install -d -m 755 -o root -g root "$logdir" set "$@" --log-file="$logdir/$daemon.log" # pidfile and monitoring test -d "$rundir" || install -d -m 755 -o root -g root "$rundir" set "$@" --pidfile="$rundir/$daemon.pid" set "$@" --detach --monitor # wrapper case $wrapper in valgrind) if (valgrind --version) > /dev/null 2>&1; then set valgrind -q --leak-check=full --time-stamp=yes \ --log-file="$logdir/$daemon.valgrind.log.%p" "$@" else log_failure_msg "valgrind not installed, running $daemon without it" fi ;; strace) if (strace -V) > /dev/null 2>&1; then strace="strace -tt -T -s 256 -ff" if (strace -DV) > /dev/null 2>&1; then # Has the -D option. set $strace -D -o "$logdir/$daemon.strace.log" "$@" strace="" fi else log_failure_msg "strace not installed, running $daemon without it" fi ;; glibc) set env MALLOC_CHECK_=2 MALLOC_PERTURB_=165 "$@" ;; '') ;; *) log_failure_msg "unknown wrapper $wrapper, running $daemon without it" ;; esac # priority if test X"$priority" != X; then set nice -n "$priority" "$@" fi action "Starting $daemon" "$@" if test X"$strace" != X; then # Strace doesn't have the -D option so we attach after the fact. setsid $strace -o "$logdir/$daemon.strace.log" \ -p `cat $rundir/$daemon.pid` > /dev/null 2>&1 & fi } stop_daemon () { if test -e "$rundir/$1.pid"; then if pid=`cat "$rundir/$1.pid"`; then for action in TERM .1 .25 .65 1 1 1 1 KILL 1 1 1 2 10 15 30 FAIL; do if pid_exists "$pid" >/dev/null 2>&1; then :; else return 0 fi case $action in TERM) action "Killing $1 ($pid)" kill $pid ;; KILL) action "Killing $1 ($pid) with SIGKILL" kill -9 $pid ;; FAIL) log_failure_msg "Killing $1 ($pid) failed" return 1 ;; *) sleep $action ;; esac done fi fi log_success_msg "$1 is not running" } daemon_status () { pidfile=$rundir/$1.pid if test -e "$pidfile"; then if pid=`cat "$pidfile"`; then if pid_exists "$pid"; then echo "$1 is running with pid $pid" return 0 else echo "Pidfile for $1 ($pidfile) is stale" fi else echo "Pidfile for $1 ($pidfile) exists but cannot be read" fi else echo "$1 is not running" fi return 1 } daemon_is_running () { pidfile=$rundir/$1.pid test -e "$pidfile" && pid=`cat "$pidfile"` && pid_exists "$pid" } >/dev/null 2>&1 openvswitch-2.0.1+git20140120/utilities/ovs-ofctl.8.in000066400000000000000000002030671226605124000221070ustar00rootroot00000000000000.\" -*- nroff -*- .de IQ . br . ns . IP "\\$1" .. .TH ovs\-ofctl 8 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" .ds PN ovs\-ofctl . .SH NAME ovs\-ofctl \- administer OpenFlow switches . .SH SYNOPSIS .B ovs\-ofctl [\fIoptions\fR] \fIcommand \fR[\fIswitch\fR] [\fIargs\fR\&...] . .SH DESCRIPTION The .B ovs\-ofctl program is a command line tool for monitoring and administering OpenFlow switches. It can also show the current state of an OpenFlow switch, including features, configuration, and table entries. It should work with any OpenFlow switch, not just Open vSwitch. . .SS "OpenFlow Switch Management Commands" .PP These commands allow \fBovs\-ofctl\fR to monitor and administer an OpenFlow switch. It is able to show the current state of a switch, including features, configuration, and table entries. .PP Most of these commands take an argument that specifies the method for connecting to an OpenFlow switch. The following connection methods are supported: . .RS .so lib/vconn-active.man . .IP "\fIfile\fR" This is short for \fBunix:\fIfile\fR, as long as \fIfile\fR does not contain a colon. . .IP \fIbridge\fR This is short for \fBunix:@RUNDIR@/\fIbridge\fB.mgmt\fR, as long as \fIbridge\fR does not contain a colon. . .IP [\fItype\fB@\fR]\fIdp\fR Attempts to look up the bridge associated with \fIdp\fR and open as above. If \fItype\fR is given, it specifies the datapath provider of \fIdp\fR, otherwise the default provider \fBsystem\fR is assumed. .RE . .TP \fBshow \fIswitch\fR Prints to the console information on \fIswitch\fR, including information on its flow tables and ports. . .TP \fBdump\-tables \fIswitch\fR Prints to the console statistics for each of the flow tables used by \fIswitch\fR. . .TP \fBdump\-ports \fIswitch\fR [\fInetdev\fR] Prints to the console statistics for network devices associated with \fIswitch\fR. If \fInetdev\fR is specified, only the statistics associated with that device will be printed. \fInetdev\fR can be an OpenFlow assigned port number or device name, e.g. \fBeth0\fR. . .TP \fBdump\-ports\-desc \fIswitch\fR Prints to the console detailed information about network devices associated with \fIswitch\fR (version 1.7 or later). This is a subset of the information provided by the \fBshow\fR command. . .IP "\fBmod\-port \fIswitch\fR \fIport\fR \fIaction\fR" Modify characteristics of port \fBport\fR in \fIswitch\fR. \fIport\fR may be an OpenFlow port number or name or the keyword \fBLOCAL\fR (the preferred way to refer to the OpenFlow local port). The \fIaction\fR may be any one of the following: . .RS .IQ \fBup\fR .IQ \fBdown\fR Enable or disable the interface. This is equivalent to \fBifconfig up\fR or \fBifconfig down\fR on a Unix system. . .IP \fBstp\fR .IQ \fBno\-stp\fR Enable or disable 802.1D spanning tree protocol (STP) on the interface. OpenFlow implementations that don't support STP will refuse to enable it. . .IP \fBreceive\fR .IQ \fBno\-receive\fR .IQ \fBreceive\-stp\fR .IQ \fBno\-receive\-stp\fR Enable or disable OpenFlow processing of packets received on this interface. When packet processing is disabled, packets will be dropped instead of being processed through the OpenFlow table. The \fBreceive\fR or \fBno\-receive\fR setting applies to all packets except 802.1D spanning tree packets, which are separately controlled by \fBreceive\-stp\fR or \fBno\-receive\-stp\fR. . .IP \fBforward\fR .IQ \fBno\-forward\fR Allow or disallow forwarding of traffic to this interface. By default, forwarding is enabled. . .IP \fBflood\fR .IQ \fBno\-flood\fR Controls whether an OpenFlow \fBflood\fR action will send traffic out this interface. By default, flooding is enabled. Disabling flooding is primarily useful to prevent loops when a spanning tree protocol is not in use. . .IP \fBpacket\-in\fR .IQ \fBno\-packet\-in\fR Controls whether packets received on this interface that do not match a flow table entry generate a ``packet in'' message to the OpenFlow controller. By default, ``packet in'' messages are enabled. .RE .IP The \fBshow\fR command displays (among other information) the configuration that \fBmod\-port\fR changes. . .IP "\fBget\-frags \fIswitch\fR" Prints \fIswitch\fR's fragment handling mode. See \fBset\-frags\fR, below, for a description of each fragment handling mode. .IP The \fBshow\fR command also prints the fragment handling mode among its other output. . .IP "\fBset\-frags \fIswitch frag_mode\fR" Configures \fIswitch\fR's treatment of IPv4 and IPv6 fragments. The choices for \fIfrag_mode\fR are: .RS .IP "\fBnormal\fR" Fragments pass through the flow table like non-fragmented packets. The TCP ports, UDP ports, and ICMP type and code fields are always set to 0, even for fragments where that information would otherwise be available (fragments with offset 0). This is the default fragment handling mode for an OpenFlow switch. .IP "\fBdrop\fR" Fragments are dropped without passing through the flow table. .IP "\fBreassemble\fR" The switch reassembles fragments into full IP packets before passing them through the flow table. Open vSwitch does not implement this fragment handling mode. .IP "\fBnx\-match\fR" Fragments pass through the flow table like non-fragmented packets. The TCP ports, UDP ports, and ICMP type and code fields are available for matching for fragments with offset 0, and set to 0 in fragments with nonzero offset. This mode is a Nicira extension. .RE .IP See the description of \fBip_frag\fR, below, for a way to match on whether a packet is a fragment and on its fragment offset. . .TP \fBdump\-flows \fIswitch \fR[\fIflows\fR] Prints to the console all flow entries in \fIswitch\fR's tables that match \fIflows\fR. If \fIflows\fR is omitted, all flows in the switch are retrieved. See \fBFlow Syntax\fR, below, for the syntax of \fIflows\fR. The output format is described in \fBTable Entry Output\fR. . .IP By default, \fBovs\-ofctl\fR prints flow entries in the same order that the switch sends them, which is unlikely to be intuitive or consistent. See the description of \fB\-\-sort\fR and \fB\-\-rsort\fR, under \fBOPTIONS\fR below, to influence the display order. . .TP \fBdump\-aggregate \fIswitch \fR[\fIflows\fR] Prints to the console aggregate statistics for flows in \fIswitch\fR's tables that match \fIflows\fR. If \fIflows\fR is omitted, the statistics are aggregated across all flows in the switch's flow tables. See \fBFlow Syntax\fR, below, for the syntax of \fIflows\fR. The output format is described in \fBTable Entry Output\fR. . .IP "\fBqueue\-stats \fIswitch \fR[\fIport \fR[\fIqueue\fR]]" Prints to the console statistics for the specified \fIqueue\fR on \fIport\fR within \fIswitch\fR. \fIport\fR can be an OpenFlow port number or name, the keyword \fBLOCAL\fR (the preferred way to refer to the OpenFlow local port), or the keyword \fBALL\fR. Either of \fIport\fR or \fIqueue\fR or both may be omitted (or equivalently the keyword \fBALL\fR). If both are omitted, statistics are printed for all queues on all ports. If only \fIqueue\fR is omitted, then statistics are printed for all queues on \fIport\fR; if only \fIport\fR is omitted, then statistics are printed for \fIqueue\fR on every port where it exists. . .SS "OpenFlow Switch Flow Table Commands" . These commands manage the flow table in an OpenFlow switch. In each case, \fIflow\fR specifies a flow entry in the format described in \fBFlow Syntax\fR, below, and \fIfile\fR is a text file that contains zero or more flows in the same syntax, one per line. . .IP "\fBadd\-flow \fIswitch flow\fR" .IQ "\fBadd\-flow \fIswitch \fB\- < \fIfile\fR" .IQ "\fBadd\-flows \fIswitch file\fR" Add each flow entry to \fIswitch\fR's tables. . .IP "[\fB\-\-strict\fR] \fBmod\-flows \fIswitch flow\fR" .IQ "[\fB\-\-strict\fR] \fBmod\-flows \fIswitch \fB\- < \fIfile\fR" Modify the actions in entries from \fIswitch\fR's tables that match the specified flows. With \fB\-\-strict\fR, wildcards are not treated as active for matching purposes. . .IP "\fBdel\-flows \fIswitch\fR" .IQ "[\fB\-\-strict\fR] \fBdel\-flows \fIswitch \fR[\fIflow\fR]" .IQ "[\fB\-\-strict\fR] \fBdel\-flows \fIswitch \fB\- < \fIfile\fR" Deletes entries from \fIswitch\fR's flow table. With only a \fIswitch\fR argument, deletes all flows. Otherwise, deletes flow entries that match the specified flows. With \fB\-\-strict\fR, wildcards are not treated as active for matching purposes. . .IP "[\fB\-\-readd\fR] \fBreplace\-flows \fIswitch file\fR" Reads flow entries from \fIfile\fR (or \fBstdin\fR if \fIfile\fR is \fB\-\fR) and queries the flow table from \fIswitch\fR. Then it fixes up any differences, adding flows from \fIflow\fR that are missing on \fIswitch\fR, deleting flows from \fIswitch\fR that are not in \fIfile\fR, and updating flows in \fIswitch\fR whose actions, cookie, or timeouts differ in \fIfile\fR. . .IP With \fB\-\-readd\fR, \fBovs\-ofctl\fR adds all the flows from \fIfile\fR, even those that exist with the same actions, cookie, and timeout in \fIswitch\fR. This resets all the flow packet and byte counters to 0, which can be useful for debugging. . .IP "\fBdiff\-flows \fIsource1 source2\fR" Reads flow entries from \fIsource1\fR and \fIsource2\fR and prints the differences. A flow that is in \fIsource1\fR but not in \fIsource2\fR is printed preceded by a \fB\-\fR, and a flow that is in \fIsource2\fR but not in \fIsource1\fR is printed preceded by a \fB+\fR. If a flow exists in both \fIsource1\fR and \fIsource2\fR with different actions, cookie, or timeouts, then both versions are printed preceded by \fB\-\fR and \fB+\fR, respectively. .IP \fIsource1\fR and \fIsource2\fR may each name a file or a switch. If a name begins with \fB/\fR or \fB.\fR, then it is considered to be a file name. A name that contains \fB:\fR is considered to be a switch. Otherwise, it is a file if a file by that name exists, a switch if not. .IP For this command, an exit status of 0 means that no differences were found, 1 means that an error occurred, and 2 means that some differences were found. . .IP "\fBpacket\-out \fIswitch in_port actions packet\fR..." Connects to \fIswitch\fR and instructs it to execute the OpenFlow \fIactions\fR on each \fIpacket\fR. For the purpose of executing the actions, the packets are considered to have arrived on \fIin_port\fR, which may be an OpenFlow port number or name (e.g. \fBeth0\fR), the keyword \fBLOCAL\fR (the preferred way to refer to the OpenFlow ``local'' port), or the keyword \fBNONE\fR to indicate that the packet was generated by the switch itself. . .SS "OpenFlow Switch Monitoring Commands" . .IP "\fBsnoop \fIswitch\fR" Connects to \fIswitch\fR and prints to the console all OpenFlow messages received. Unlike other \fBovs\-ofctl\fR commands, if \fIswitch\fR is the name of a bridge, then the \fBsnoop\fR command connects to a Unix domain socket named \fB@RUNDIR@/\fIbridge\fB.snoop\fR. \fBovs\-vswitchd\fR listens on such a socket for each bridge and sends to it all of the OpenFlow messages sent to or received from its configured OpenFlow controller. Thus, this command can be used to view OpenFlow protocol activity between a switch and its controller. .IP When a switch has more than one controller configured, only the traffic to and from a single controller is output. If none of the controllers is configured as a master or a slave (using a Nicira extension to OpenFlow), then a controller is chosen arbitrarily among them. If there is a master controller, it is chosen; otherwise, if there are any controllers that are not masters or slaves, one is chosen arbitrarily; otherwise, a slave controller is chosen arbitrarily. This choice is made once at connection time and does not change as controllers reconfigure their roles. .IP If a switch has no controller configured, or if the configured controller is disconnected, no traffic is sent, so monitoring will not show any traffic. . .IP "\fBmonitor \fIswitch\fR [\fImiss-len\fR] [\fBinvalid_ttl\fR] [\fBwatch:\fR[\fIspec\fR...]]" Connects to \fIswitch\fR and prints to the console all OpenFlow messages received. Usually, \fIswitch\fR should specify the name of a bridge in the \fBovs\-vswitchd\fR database. .IP If \fImiss-len\fR is provided, \fBovs\-ofctl\fR sends an OpenFlow ``set configuration'' message at connection setup time that requests \fImiss-len\fR bytes of each packet that misses the flow table. Open vSwitch does not send these and other asynchronous messages to an \fBovs\-ofctl monitor\fR client connection unless a nonzero value is specified on this argument. (Thus, if \fImiss\-len\fR is not specified, very little traffic will ordinarily be printed.) .IP If \fBinvalid_ttl\fR is passed, \fBovs\-ofctl\fR sends an OpenFlow ``set configuration'' message at connection setup time that requests \fBINVALID_TTL_TO_CONTROLLER\fR, so that \fBovs\-ofctl monitor\fR can receive ``packet-in'' messages when TTL reaches zero on \fBdec_ttl\fR action. .IP \fBwatch:\fR[\fB\fIspec\fR...] causes \fBovs\-ofctl\fR to send a ``monitor request'' Nicira extension message to the switch at connection setup time. This message causes the switch to send information about flow table changes as they occur. The following comma-separated \fIspec\fR syntax is available: .RS .IP "\fB!initial\fR" Do not report the switch's initial flow table contents. .IP "\fB!add\fR" Do not report newly added flows. .IP "\fB!delete\fR" Do not report deleted flows. .IP "\fB!modify\fR" Do not report modifications to existing flows. .IP "\fB!own\fR" Abbreviate changes made to the flow table by \fBovs\-ofctl\fR's own connection to the switch. (These could only occur using the \fBofctl/send\fR command described below under \fBRUNTIME MANAGEMENT COMMANDS\fR.) .IP "\fB!actions\fR" Do not report actions as part of flow updates. .IP "\fBtable=\fInumber\fR" Limits the monitoring to the table with the given \fInumber\fR between 0 and 254. By default, all tables are monitored. .IP "\fBout_port=\fIport\fR" If set, only flows that output to \fIport\fR are monitored. The \fIport\fR may be an OpenFlow port number or keyword (e.g. \fBLOCAL\fR). .IP "\fIfield\fB=\fIvalue\fR" Monitors only flows that have \fIfield\fR specified as the given \fIvalue\fR. Any syntax valid for matching on \fBdump\-flows\fR may be used. .RE .IP This command may be useful for debugging switch or controller implementations. With \fBwatch:\fR, it is particularly useful for observing how a controller updates flow tables. . .SS "OpenFlow Switch and Controller Commands" . The following commands, like those in the previous section, may be applied to OpenFlow switches, using any of the connection methods described in that section. Unlike those commands, these may also be applied to OpenFlow controllers. . .TP \fBprobe \fItarget\fR Sends a single OpenFlow echo-request message to \fItarget\fR and waits for the response. With the \fB\-t\fR or \fB\-\-timeout\fR option, this command can test whether an OpenFlow switch or controller is up and running. . .TP \fBping \fItarget \fR[\fIn\fR] Sends a series of 10 echo request packets to \fItarget\fR and times each reply. The echo request packets consist of an OpenFlow header plus \fIn\fR bytes (default: 64) of randomly generated payload. This measures the latency of individual requests. . .TP \fBbenchmark \fItarget n count\fR Sends \fIcount\fR echo request packets that each consist of an OpenFlow header plus \fIn\fR bytes of payload and waits for each response. Reports the total time required. This is a measure of the maximum bandwidth to \fItarget\fR for round-trips of \fIn\fR-byte messages. . .SS "Other Commands" . .IP "\fBofp\-parse\fR \fIfile\fR" Reads \fIfile\fR (or \fBstdin\fR if \fIfile\fR is \fB\-\fR) as a series of OpenFlow messages in the binary format used on an OpenFlow connection, and prints them to the console. This can be useful for printing OpenFlow messages captured from a TCP stream. . .SS "Flow Syntax" .PP Some \fBovs\-ofctl\fR commands accept an argument that describes a flow or flows. Such flow descriptions comprise a series \fIfield\fB=\fIvalue\fR assignments, separated by commas or white space. (Embedding spaces into a flow description normally requires quoting to prevent the shell from breaking the description into multiple arguments.) .PP Flow descriptions should be in \fBnormal form\fR. This means that a flow may only specify a value for an L3 field if it also specifies a particular L2 protocol, and that a flow may only specify an L4 field if it also specifies particular L2 and L3 protocol types. For example, if the L2 protocol type \fBdl_type\fR is wildcarded, then L3 fields \fBnw_src\fR, \fBnw_dst\fR, and \fBnw_proto\fR must also be wildcarded. Similarly, if \fBdl_type\fR or \fBnw_proto\fR (the L3 protocol type) is wildcarded, so must be \fBtp_dst\fR and \fBtp_src\fR, which are L4 fields. \fBovs\-ofctl\fR will warn about flows not in normal form. .PP The following field assignments describe how a flow matches a packet. If any of these assignments is omitted from the flow syntax, the field is treated as a wildcard; thus, if all of them are omitted, the resulting flow matches all packets. The string \fB*\fR may be specified to explicitly mark any of these fields as a wildcard. (\fB*\fR should be quoted to protect it from shell expansion.) . .IP \fBin_port=\fIport\fR Matches OpenFlow port \fIport\fR, which may be an OpenFlow port number or keyword (e.g. \fBLOCAL\fR). \fBovs\-ofctl show\fR. .IP (The \fBresubmit\fR action can search OpenFlow flow tables with arbitrary \fBin_port\fR values, so flows that match port numbers that do not exist from an OpenFlow perspective can still potentially be matched.) . .IP \fBdl_vlan=\fIvlan\fR Matches IEEE 802.1q Virtual LAN tag \fIvlan\fR. Specify \fB0xffff\fR as \fIvlan\fR to match packets that are not tagged with a Virtual LAN; otherwise, specify a number between 0 and 4095, inclusive, as the 12-bit VLAN ID to match. . .IP \fBdl_vlan_pcp=\fIpriority\fR Matches IEEE 802.1q Priority Code Point (PCP) \fIpriority\fR, which is specified as a value between 0 and 7, inclusive. A higher value indicates a higher frame priority level. . .IP \fBdl_src=\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fR .IQ \fBdl_dst=\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fR Matches an Ethernet source (or destination) address specified as 6 pairs of hexadecimal digits delimited by colons (e.g. \fB00:0A:E4:25:6B:B0\fR). . .IP \fBdl_src=\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB/\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fR .IQ \fBdl_dst=\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB/\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fR Matches an Ethernet destination address specified as 6 pairs of hexadecimal digits delimited by colons (e.g. \fB00:0A:E4:25:6B:B0\fR), with a wildcard mask following the slash. Open vSwitch 1.8 and later support arbitrary masks for source and/or destination. Earlier versions only support masking the destination with the following masks: .RS .IP \fB01:00:00:00:00:00\fR Match only the multicast bit. Thus, \fBdl_dst=01:00:00:00:00:00/01:00:00:00:00:00\fR matches all multicast (including broadcast) Ethernet packets, and \fBdl_dst=00:00:00:00:00:00/01:00:00:00:00:00\fR matches all unicast Ethernet packets. .IP \fBfe:ff:ff:ff:ff:ff\fR Match all bits except the multicast bit. This is probably not useful. .IP \fBff:ff:ff:ff:ff:ff\fR Exact match (equivalent to omitting the mask). .IP \fB00:00:00:00:00:00\fR Wildcard all bits (equivalent to \fBdl_dst=*\fR.) .RE . .IP \fBdl_type=\fIethertype\fR Matches Ethernet protocol type \fIethertype\fR, which is specified as an integer between 0 and 65535, inclusive, either in decimal or as a hexadecimal number prefixed by \fB0x\fR (e.g. \fB0x0806\fR to match ARP packets). . .IP \fBnw_src=\fIip\fR[\fB/\fInetmask\fR] .IQ \fBnw_dst=\fIip\fR[\fB/\fInetmask\fR] When \fBdl_type\fR is 0x0800 (possibly via shorthand, e.g. \fBip\fR or \fBtcp\fR), matches IPv4 source (or destination) address \fIip\fR, which may be specified as an IP address or host name (e.g. \fB192.168.1.1\fR or \fBwww.example.com\fR). The optional \fInetmask\fR allows restricting a match to an IPv4 address prefix. The netmask may be specified as a dotted quad (e.g. \fB192.168.1.0/255.255.255.0\fR) or as a CIDR block (e.g. \fB192.168.1.0/24\fR). Open vSwitch 1.8 and later support arbitrary dotted quad masks; earlier versions support only CIDR masks, that is, the dotted quads that are equivalent to some CIDR block. .IP When \fBdl_type=0x0806\fR or \fBarp\fR is specified, matches the \fBar_spa\fR or \fBar_tpa\fR field, respectively, in ARP packets for IPv4 and Ethernet. .IP When \fBdl_type=0x8035\fR or \fBrarp\fR is specified, matches the \fBar_spa\fR or \fBar_tpa\fR field, respectively, in RARP packets for IPv4 and Ethernet. .IP When \fBdl_type\fR is wildcarded or set to a value other than 0x0800, 0x0806, or 0x8035, the values of \fBnw_src\fR and \fBnw_dst\fR are ignored (see \fBFlow Syntax\fR above). . .IP \fBnw_proto=\fIproto\fR When \fBip\fR or \fBdl_type=0x0800\fR is specified, matches IP protocol type \fIproto\fR, which is specified as a decimal number between 0 and 255, inclusive (e.g. 1 to match ICMP packets or 6 to match TCP packets). .IP When \fBipv6\fR or \fBdl_type=0x86dd\fR is specified, matches IPv6 header type \fIproto\fR, which is specified as a decimal number between 0 and 255, inclusive (e.g. 58 to match ICMPv6 packets or 6 to match TCP). The header type is the terminal header as described in the \fBDESIGN\fR document. .IP When \fBarp\fR or \fBdl_type=0x0806\fR is specified, matches the lower 8 bits of the ARP opcode. ARP opcodes greater than 255 are treated as 0. .IP When \fBrarp\fR or \fBdl_type=0x8035\fR is specified, matches the lower 8 bits of the ARP opcode. ARP opcodes greater than 255 are treated as 0. .IP When \fBdl_type\fR is wildcarded or set to a value other than 0x0800, 0x0806, 0x8035 or 0x86dd, the value of \fBnw_proto\fR is ignored (see \fBFlow Syntax\fR above). . .IP \fBnw_tos=\fItos\fR Matches IP ToS/DSCP or IPv6 traffic class field \fItos\fR, which is specified as a decimal number between 0 and 255, inclusive. Note that the two lower reserved bits are ignored for matching purposes. .IP When \fBdl_type\fR is wildcarded or set to a value other than 0x0800 or 0x86dd, the value of \fBnw_tos\fR is ignored (see \fBFlow Syntax\fR above). . .IP \fBnw_ecn=\fIecn\fR Matches \fIecn\fR bits in IP ToS or IPv6 traffic class fields, which is specified as a decimal number between 0 and 3, inclusive. .IP When \fBdl_type\fR is wildcarded or set to a value other than 0x0800 or 0x86dd, the value of \fBnw_ecn\fR is ignored (see \fBFlow Syntax\fR above). . .IP \fBnw_ttl=\fIttl\fR Matches IP TTL or IPv6 hop limit value \fIttl\fR, which is specified as a decimal number between 0 and 255, inclusive. .IP When \fBdl_type\fR is wildcarded or set to a value other than 0x0800 or 0x86dd, the value of \fBnw_ttl\fR is ignored (see \fBFlow Syntax\fR above). .IP . .IP \fBtp_src=\fIport\fR .IQ \fBtp_dst=\fIport\fR When \fBdl_type\fR and \fBnw_proto\fR specify TCP or UDP or SCTP, \fBtp_src\fR and \fBtp_dst\fR match the UDP or TCP or SCTP source or destination port \fIport\fR, respectively, which is specified as a decimal number between 0 and 65535, inclusive (e.g. 80 to match packets originating from a HTTP server). .IP When \fBdl_type\fR and \fBnw_proto\fR take other values, the values of these settings are ignored (see \fBFlow Syntax\fR above). . .IP \fBtp_src=\fIport\fB/\fImask\fR .IQ \fBtp_dst=\fIport\fB/\fImask\fR Bitwise match on TCP (or UDP or SCTP) source or destination port, respectively. The \fIport\fR and \fImask\fR are 16-bit numbers written in decimal or in hexadecimal prefixed by \fB0x\fR. Each 1-bit in \fImask\fR requires that the corresponding bit in \fIport\fR must match. Each 0-bit in \fImask\fR causes the corresponding bit to be ignored. .IP Bitwise matches on transport ports are rarely useful in isolation, but a group of them can be used to reduce the number of flows required to match on a range of transport ports. For example, suppose that the goal is to match TCP source ports 1000 to 1999, inclusive. One way is to insert 1000 flows, each of which matches on a single source port. Another way is to look at the binary representations of 1000 and 1999, as follows: .br .B "01111101000" .br .B "11111001111" .br and then to transform those into a series of bitwise matches that accomplish the same results: .br .B "01111101xxx" .br .B "0111111xxxx" .br .B "10xxxxxxxxx" .br .B "110xxxxxxxx" .br .B "1110xxxxxxx" .br .B "11110xxxxxx" .br .B "1111100xxxx" .br which become the following when written in the syntax required by \fBovs\-ofctl\fR: .br .B "tcp,tp_src=0x03e8/0xfff8" .br .B "tcp,tp_src=0x03f0/0xfff0" .br .B "tcp,tp_src=0x0400/0xfe00" .br .B "tcp,tp_src=0x0600/0xff00" .br .B "tcp,tp_src=0x0700/0xff80" .br .B "tcp,tp_src=0x0780/0xffc0" .br .B "tcp,tp_src=0x07c0/0xfff0" .IP Only Open vSwitch 1.6 and later supports bitwise matching on transport ports. .IP Like the exact-match forms of \fBtp_src\fR and \fBtp_dst\fR described above, the bitwise match forms apply only when \fBdl_type\fR and \fBnw_proto\fR specify TCP or UDP or SCTP. . .IP \fBicmp_type=\fItype\fR .IQ \fBicmp_code=\fIcode\fR When \fBdl_type\fR and \fBnw_proto\fR specify ICMP or ICMPv6, \fItype\fR matches the ICMP type and \fIcode\fR matches the ICMP code. Each is specified as a decimal number between 0 and 255, inclusive. .IP When \fBdl_type\fR and \fBnw_proto\fR take other values, the values of these settings are ignored (see \fBFlow Syntax\fR above). . .IP \fBtable=\fInumber\fR If specified, limits the flow manipulation and flow dump commands to only apply to the table with the given \fInumber\fR between 0 and 254. . Behavior varies if \fBtable\fR is not specified (equivalent to specifying 255 as \fInumber\fR). For flow table modification commands without \fB\-\-strict\fR, the switch will choose the table for these commands to operate on. For flow table modification commands with \fB\-\-strict\fR, the command will operate on any single matching flow in any table; it will do nothing if there are matches in more than one table. The \fBdump-flows\fR and \fBdump-aggregate\fR commands will gather statistics about flows from all tables. .IP When this field is specified in \fBadd-flow\fR, \fBadd-flows\fR, \fBmod-flows\fR and \fBdel-flows\fR commands, it activates a Nicira extension to OpenFlow, which as of this writing is only known to be implemented by Open vSwitch. . .IP \fBmetadata=\fIvalue\fR[\fB/\fImask\fR] Matches \fIvalue\fR either exactly or with optional \fImask\fR in the metadata field. \fIvalue\fR and \fImask\fR are 64-bit integers, by default in decimal (use a \fB0x\fR prefix to specify hexadecimal). Arbitrary \fImask\fR values are allowed: a 1-bit in \fImask\fR indicates that the corresponding bit in \fIvalue\fR must match exactly, and a 0-bit wildcards that bit. Matching on metadata was added in Open vSwitch 1.8. . .PP The following shorthand notations are also available: . .IP \fBip\fR Same as \fBdl_type=0x0800\fR. . .IP \fBicmp\fR Same as \fBdl_type=0x0800,nw_proto=1\fR. . .IP \fBtcp\fR Same as \fBdl_type=0x0800,nw_proto=6\fR. . .IP \fBudp\fR Same as \fBdl_type=0x0800,nw_proto=17\fR. . .IP \fBsctp\fR Same as \fBdl_type=0x0800,nw_proto=132\fR. . .IP \fBarp\fR Same as \fBdl_type=0x0806\fR. . .IP \fBrarp\fR Same as \fBdl_type=0x8035\fR. . .PP The following field assignments require support for the NXM (Nicira Extended Match) extension to OpenFlow. When one of these is specified, \fBovs\-ofctl\fR will automatically attempt to negotiate use of this extension. If the switch does not support NXM, then \fBovs\-ofctl\fR will report a fatal error. . .IP \fBvlan_tci=\fItci\fR[\fB/\fImask\fR] Matches modified VLAN TCI \fItci\fR. If \fImask\fR is omitted, \fItci\fR is the exact VLAN TCI to match; if \fImask\fR is specified, then a 1-bit in \fImask\fR indicates that the corresponding bit in \fItci\fR must match exactly, and a 0-bit wildcards that bit. Both \fItci\fR and \fImask\fR are 16-bit values that are decimal by default; use a \fB0x\fR prefix to specify them in hexadecimal. . .IP The value that \fBvlan_tci\fR matches against is 0 for a packet that has no 802.1Q header. Otherwise, it is the TCI value from the 802.1Q header with the CFI bit (with value \fB0x1000\fR) forced to 1. .IP Examples: .RS .IP \fBvlan_tci=0\fR Match only packets without an 802.1Q header. .IP \fBvlan_tci=0xf123\fR Match packets tagged with priority 7 in VLAN 0x123. .IP \fBvlan_tci=0x1123/0x1fff\fR Match packets tagged with VLAN 0x123 (and any priority). .IP \fBvlan_tci=0x5000/0xf000\fR Match packets tagged with priority 2 (in any VLAN). .IP \fBvlan_tci=0/0xfff\fR Match packets with no 802.1Q header or tagged with VLAN 0 (and any priority). .IP \fBvlan_tci=0x5000/0xe000\fR Match packets with no 802.1Q header or tagged with priority 2 (in any VLAN). .IP \fBvlan_tci=0/0xefff\fR Match packets with no 802.1Q header or tagged with VLAN 0 and priority 0. .RE .IP Some of these matching possibilities can also be achieved with \fBdl_vlan\fR and \fBdl_vlan_pcp\fR. . .IP \fBip_frag=\fIfrag_type\fR When \fBdl_type\fR specifies IP or IPv6, \fIfrag_type\fR specifies what kind of IP fragments or non-fragments to match. The following values of \fIfrag_type\fR are supported: .RS .IP "\fBno\fR" Matches only non-fragmented packets. .IP "\fByes\fR" Matches all fragments. .IP "\fBfirst\fR" Matches only fragments with offset 0. .IP "\fBlater\fR" Matches only fragments with nonzero offset. .IP "\fBnot_later\fR" Matches non-fragmented packets and fragments with zero offset. .RE .IP The \fBip_frag\fR match type is likely to be most useful in \fBnx\-match\fR mode. See the description of the \fBset\-frags\fR command, above, for more details. . .IP \fBarp_sha=\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fR .IQ \fBarp_tha=\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fR When \fBdl_type\fR specifies either ARP or RARP, \fBarp_sha\fR and \fBarp_tha\fR match the source and target hardware address, respectively. An address is specified as 6 pairs of hexadecimal digits delimited by colons. . .IP \fBipv6_src=\fIipv6\fR[\fB/\fInetmask\fR] .IQ \fBipv6_dst=\fIipv6\fR[\fB/\fInetmask\fR] When \fBdl_type\fR is 0x86dd (possibly via shorthand, e.g., \fBipv6\fR or \fBtcp6\fR), matches IPv6 source (or destination) address \fIipv6\fR, which may be specified as defined in RFC 2373. The preferred format is \fIx\fB:\fIx\fB:\fIx\fB:\fIx\fB:\fIx\fB:\fIx\fB:\fIx\fB:\fIx\fR, where \fIx\fR are the hexadecimal values of the eight 16-bit pieces of the address. A single instance of \fB::\fR may be used to indicate multiple groups of 16-bits of zeros. The optional \fInetmask\fR allows restricting a match to an IPv6 address prefix. A netmask is specified as an IPv6 address (e.g. \fB2001:db8:3c4d:1::/ffff:ffff:ffff:ffff::\fR) or a CIDR block (e.g. \fB2001:db8:3c4d:1::/64\fR). Open vSwitch 1.8 and later support arbitrary masks; earlier versions support only CIDR masks, that is, CIDR block and IPv6 addresses that are equivalent to CIDR blocks. . .IP \fBipv6_label=\fIlabel\fR When \fBdl_type\fR is 0x86dd (possibly via shorthand, e.g., \fBipv6\fR or \fBtcp6\fR), matches IPv6 flow label \fIlabel\fR. . .IP \fBnd_target=\fIipv6\fR[\fB/\fInetmask\fR] When \fBdl_type\fR, \fBnw_proto\fR, and \fBicmp_type\fR specify IPv6 Neighbor Discovery (ICMPv6 type 135 or 136), matches the target address \fIipv6\fR. \fIipv6\fR is in the same format described earlier for the \fBipv6_src\fR and \fBipv6_dst\fR fields. . .IP \fBnd_sll=\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fR When \fBdl_type\fR, \fBnw_proto\fR, and \fBicmp_type\fR specify IPv6 Neighbor Solicitation (ICMPv6 type 135), matches the source link\-layer address option. An address is specified as 6 pairs of hexadecimal digits delimited by colons. . .IP \fBnd_tll=\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fB:\fIxx\fR When \fBdl_type\fR, \fBnw_proto\fR, and \fBicmp_type\fR specify IPv6 Neighbor Advertisement (ICMPv6 type 136), matches the target link\-layer address option. An address is specified as 6 pairs of hexadecimal digits delimited by colons. . .IP \fBtun_id=\fItunnel-id\fR[\fB/\fImask\fR] Matches tunnel identifier \fItunnel-id\fR. Only packets that arrive over a tunnel that carries a key (e.g. GRE with the RFC 2890 key extension and a nonzero key value) will have a nonzero tunnel ID. If \fImask\fR is omitted, \fItunnel-id\fR is the exact tunnel ID to match; if \fImask\fR is specified, then a 1-bit in \fImask\fR indicates that the corresponding bit in \fItunnel-id\fR must match exactly, and a 0-bit wildcards that bit. . .IP \fBtun_src=\fIip\fR[\fB/\fInetmask\fR] .IQ \fBtun_dst=\fIip\fR[\fB/\fInetmask\fR] Matches tunnel IPv4 source (or destination) address \fIip\fR. Only packets that arrive over a tunnel will have nonzero tunnel addresses. The address may be specified as an IP address or host name (e.g. \fB192.168.1.1\fR or \fBwww.example.com\fR). The optional \fInetmask\fR allows restricting a match to a masked IPv4 address. The netmask may be specified as a dotted quad (e.g. \fB192.168.1.0/255.255.255.0\fR) or as a CIDR block (e.g. \fB192.168.1.0/24\fR). . .IP "\fBreg\fIidx\fB=\fIvalue\fR[\fB/\fImask\fR]" Matches \fIvalue\fR either exactly or with optional \fImask\fR in register number \fIidx\fR. The valid range of \fIidx\fR depends on the switch. \fIvalue\fR and \fImask\fR are 32-bit integers, by default in decimal (use a \fB0x\fR prefix to specify hexadecimal). Arbitrary \fImask\fR values are allowed: a 1-bit in \fImask\fR indicates that the corresponding bit in \fIvalue\fR must match exactly, and a 0-bit wildcards that bit. .IP When a packet enters an OpenFlow switch, all of the registers are set to 0. Only explicit Nicira extension actions change register values. . .IP \fBpkt_mark=\fIvalue\fR[\fB/\fImask\fR] Matches packet metadata mark \fIvalue\fR either exactly or with optional \fImask\fR. The mark is associated data that may be passed into other system components in order to facilitate interaction between subsystems. On Linux this corresponds to the skb mark but the exact implementation is platform-dependent. . .PP Defining IPv6 flows (those with \fBdl_type\fR equal to 0x86dd) requires support for NXM. The following shorthand notations are available for IPv6-related flows: . .IP \fBipv6\fR Same as \fBdl_type=0x86dd\fR. . .IP \fBtcp6\fR Same as \fBdl_type=0x86dd,nw_proto=6\fR. . .IP \fBudp6\fR Same as \fBdl_type=0x86dd,nw_proto=17\fR. . .IP \fBsctp6\fR Same as \fBdl_type=0x86dd,nw_proto=132\fR. . .IP \fBicmp6\fR Same as \fBdl_type=0x86dd,nw_proto=58\fR. . .PP Finally, field assignments to \fBduration\fR, \fBn_packets\fR, or \fBn_bytes\fR are ignored to allow output from the \fBdump\-flows\fR command to be used as input for other commands that parse flows. . .PP The \fBadd\-flow\fR, \fBadd\-flows\fR, and \fBmod\-flows\fR commands require an additional field, which must be the final field specified: . .IP \fBactions=\fR[\fItarget\fR][\fB,\fItarget\fR...]\fR Specifies a comma-separated list of actions to take on a packet when the flow entry matches. If no \fItarget\fR is specified, then packets matching the flow are dropped. The \fItarget\fR may be an OpenFlow port number designating the physical port on which to output the packet, or one of the following keywords: . .RS .IP \fBoutput:\fIport\fR Outputs the packet to \fIport\fR, which must be an OpenFlow port number or keyword (e.g. \fBLOCAL\fR). . .IP \fBoutput:\fIsrc\fB[\fIstart\fB..\fIend\fB] Outputs the packet to the OpenFlow port number read from \fIsrc\fR, which must be an NXM field as described above. For example, \fBoutput:NXM_NX_REG0[16..31]\fR outputs to the OpenFlow port number written in the upper half of register 0. This form of \fBoutput\fR uses an OpenFlow extension that is not supported by standard OpenFlow switches. . .IP \fBenqueue:\fIport\fB:\fIqueue\fR Enqueues the packet on the specified \fIqueue\fR within port \fIport\fR, which must be an OpenFlow port number or keyword (e.g. \fBLOCAL\fR).. The number of supported queues depends on the switch; some OpenFlow implementations do not support queuing at all. . .IP \fBnormal\fR Subjects the packet to the device's normal L2/L3 processing. (This action is not implemented by all OpenFlow switches.) . .IP \fBflood\fR Outputs the packet on all switch physical ports other than the port on which it was received and any ports on which flooding is disabled (typically, these would be ports disabled by the IEEE 802.1D spanning tree protocol). . .IP \fBall\fR Outputs the packet on all switch physical ports other than the port on which it was received. . .IP \fBcontroller(\fIkey\fB=\fIvalue\fR...\fB) Sends the packet to the OpenFlow controller as a ``packet in'' message. The supported key-value pairs are: .RS .IP "\fBmax_len=\fInbytes\fR" Limit to \fInbytes\fR the number of bytes of the packet to send to the controller. By default the entire packet is sent. .IP "\fBreason=\fIreason\fR" Specify \fIreason\fR as the reason for sending the message in the ``packet in'' message. The supported reasons are \fBaction\fR (the default), \fBno_match\fR, and \fBinvalid_ttl\fR. .IP "\fBid=\fIcontroller-id\fR" Specify \fIcontroller-id\fR, a 16-bit integer, as the connection ID of the OpenFlow controller or controllers to which the ``packet in'' message should be sent. The default is zero. Zero is also the default connection ID for each controller connection, and a given controller connection will only have a nonzero connection ID if its controller uses the \fBNXT_SET_CONTROLLER_ID\fR Nicira extension to OpenFlow. .RE Any \fIreason\fR other than \fBaction\fR and any nonzero \fIcontroller-id\fR uses a Nicira vendor extension that, as of this writing, is only known to be implemented by Open vSwitch (version 1.6 or later). . .IP \fBcontroller\fR .IQ \fBcontroller\fR[\fB:\fInbytes\fR] Shorthand for \fBcontroller()\fR or \fBcontroller(max_len=\fInbytes\fB)\fR, respectively. . .IP \fBlocal\fR Outputs the packet on the ``local port,'' which corresponds to the network device that has the same name as the bridge. . .IP \fBin_port\fR Outputs the packet on the port from which it was received. . .IP \fBdrop\fR Discards the packet, so no further processing or forwarding takes place. If a drop action is used, no other actions may be specified. . .IP \fBmod_vlan_vid\fR:\fIvlan_vid\fR Modifies the VLAN id on a packet. The VLAN tag is added or modified as necessary to match the value specified. If the VLAN tag is added, a priority of zero is used (see the \fBmod_vlan_pcp\fR action to set this). . .IP \fBmod_vlan_pcp\fR:\fIvlan_pcp\fR Modifies the VLAN priority on a packet. The VLAN tag is added or modified as necessary to match the value specified. Valid values are between 0 (lowest) and 7 (highest). If the VLAN tag is added, a vid of zero is used (see the \fBmod_vlan_vid\fR action to set this). . .IP \fBstrip_vlan\fR Strips the VLAN tag from a packet if it is present. . .IP \fBpush_vlan\fR:\fIethertype\fR Push a new VLAN tag onto the packet. Ethertype is used as the the Ethertype for the tag. Only ethertype 0x8100 should be used. (0x88a8 which the spec allows isn't supported at the moment.) A priority of zero and the tag of zero are used for the new tag. . .IP \fBpush_mpls\fR:\fIethertype\fR If the packet does not already contain any MPLS labels, changes the packet's Ethertype to \fIethertype\fR, which must be either the MPLS unicast Ethertype \fB0x8847\fR or the MPLS multicast Ethertype \fB0x8848\fR, and then pushes an initial label stack entry. The label stack entry's default label is 2 if the packet contains IPv6 and 0 otherwise, its default traffic control value is the low 3 bits of the packet's DSCP value (0 if the packet is not IP), and its TTL is copied from the IP TTL (64 if the packet is not IP). .IP If the packet does already contain an MPLS label, pushes a new outermost label as a copy of the existing outermost label. .IP There are some limitations in the implementation. \fBpush_mpls\fR followed by another \fBpush_mpls\fR will result in the first \fBpush_mpls\fR being discarded. . .IP \fBpop_mpls\fR:\fIethertype\fR Strips the outermost MPLS label stack entry. Currently the implementation restricts \fIethertype\fR to a non-MPLS Ethertype and thus \fBpop_mpls\fR should only be applied to packets with an MPLS label stack depth of one. . .IP There are some limitations in the implementation. \fBpop_mpls\fR followed by another \fBpush_mpls\fR without an intermediate \fBpush_mpls\fR will result in the first \fBpush_mpls\fR being discarded. . .IP \fBmod_dl_src\fB:\fImac\fR Sets the source Ethernet address to \fImac\fR. . .IP \fBmod_dl_dst\fB:\fImac\fR Sets the destination Ethernet address to \fImac\fR. . .IP \fBmod_nw_src\fB:\fIip\fR Sets the IPv4 source address to \fIip\fR. . .IP \fBmod_nw_dst\fB:\fIip\fR Sets the IPv4 destination address to \fIip\fR. . .IP \fBmod_tp_src\fB:\fIport\fR Sets the TCP or UDP or SCTP source port to \fIport\fR. . .IP \fBmod_tp_dst\fB:\fIport\fR Sets the TCP or UDP or SCTP destination port to \fIport\fR. . .IP \fBmod_nw_tos\fB:\fItos\fR Sets the IPv4 ToS/DSCP field to \fItos\fR, which must be a multiple of 4 between 0 and 255. This action does not modify the two least significant bits of the ToS field (the ECN bits). .RE .IP The following actions are Nicira vendor extensions that, as of this writing, are only known to be implemented by Open vSwitch: . .RS . .IP \fBresubmit\fB:\fIport\fR .IQ \fBresubmit\fB(\fR[\fIport\fR]\fB,\fR[\fItable\fR]\fB) Re-searches this OpenFlow flow table (or the table whose number is specified by \fItable\fR) with the \fBin_port\fR field replaced by \fIport\fR (if \fIport\fR is specified) and executes the actions found, if any, in addition to any other actions in this flow entry. .IP Recursive \fBresubmit\fR actions are obeyed up to an implementation-defined maximum depth. Open vSwitch 1.0.1 and earlier did not support recursion; Open vSwitch before 1.2.90 did not support \fItable\fR. . .IP \fBset_tunnel\fB:\fIid\fR .IQ \fBset_tunnel64\fB:\fIid\fR If outputting to a port that encapsulates the packet in a tunnel and supports an identifier (such as GRE), sets the identifier to \fIid\fR. If the \fBset_tunnel\fR form is used and \fIid\fR fits in 32 bits, then this uses an action extension that is supported by Open vSwitch 1.0 and later. Otherwise, if \fIid\fR is a 64-bit value, it requires Open vSwitch 1.1 or later. . .IP \fBset_queue\fB:\fIqueue\fR Sets the queue that should be used to \fIqueue\fR when packets are output. The number of supported queues depends on the switch; some OpenFlow implementations do not support queuing at all. . .IP \fBpop_queue\fR Restores the queue to the value it was before any \fBset_queue\fR actions were applied. . .IP \fBdec_ttl\fR .IQ \fBdec_ttl\fB[\fR(\fIid1,id2\fI)\fR]\fR Decrement TTL of IPv4 packet or hop limit of IPv6 packet. If the TTL or hop limit is initially zero, no decrement occurs. Instead, a ``packet-in'' message with reason code \fBOFPR_INVALID_TTL\fR is sent to each connected controller that has enabled receiving them, if any. Processing the current set of actions then stops. However, if the current set of actions was reached through ``resubmit'' then remaining actions in outer levels resume processing. This action also optionally supports the ability to specify a list of valid controller ids. Each of controllers in the list will receive the ``packet_in'' message only if they have registered to receive the invalid ttl packets. If controller ids are not specified, the ``packet_in'' message will be sent only to the controllers having controller id zero which have registered for the invalid ttl packets. . .IP \fBset_mpls_ttl\fR:\fIttl\fR Set the TTL of the outer MPLS label stack entry of a packet. \fIttl\fR should be in the range 0 to 255 inclusive. . .IP \fBdec_mpls_ttl\fR Decrement TTL of the outer MPLS label stack entry of a packet. If the TTL is initially zero, no decrement occurs. Instead, a ``packet-in'' message with reason code \fBOFPR_INVALID_TTL\fR is sent to each connected controller with controller id zer that has enabled receiving them. Processing the current set of actions then stops. However, if the current set of actions was reached through ``resubmit'' then remaining actions in outer levels resume processing. . .IP \fBnote:\fR[\fIhh\fR]... Does nothing at all. Any number of bytes represented as hex digits \fIhh\fR may be included. Pairs of hex digits may be separated by periods for readability. The \fBnote\fR action's format doesn't include an exact length for its payload, so the provided bytes will be padded on the right by enough bytes with value 0 to make the total number 6 more than a multiple of 8. . .IP "\fBmove:\fIsrc\fB[\fIstart\fB..\fIend\fB]\->\fIdst\fB[\fIstart\fB..\fIend\fB]\fR" Copies the named bits from field \fIsrc\fR to field \fIdst\fR. \fIsrc\fR and \fIdst\fR must be NXM field names as defined in \fBnicira\-ext.h\fR, e.g. \fBNXM_OF_UDP_SRC\fR or \fBNXM_NX_REG0\fR. Each \fIstart\fR and \fIend\fR pair, which are inclusive, must specify the same number of bits and must fit within its respective field. Shorthands for \fB[\fIstart\fB..\fIend\fB]\fR exist: use \fB[\fIbit\fB]\fR to specify a single bit or \fB[]\fR to specify an entire field. .IP Examples: \fBmove:NXM_NX_REG0[0..5]\->NXM_NX_REG1[26..31]\fR copies the six bits numbered 0 through 5, inclusive, in register 0 into bits 26 through 31, inclusive; \fBmove:NXM_NX_REG0[0..15]\->NXM_OF_VLAN_TCI[]\fR copies the least significant 16 bits of register 0 into the VLAN TCI field. . .IP "\fBload:\fIvalue\fB\->\fIdst\fB[\fIstart\fB..\fIend\fB]" Writes \fIvalue\fR to bits \fIstart\fR through \fIend\fR, inclusive, in field \fIdst\fR. .IP Example: \fBload:55\->NXM_NX_REG2[0..5]\fR loads value 55 (bit pattern \fB110111\fR) into bits 0 through 5, inclusive, in register 2. . .IP "\fBpush:\fIsrc\fB[\fIstart\fB..\fIend\fB]" Pushes \fIstart\fR to \fIend\fR bits inclusive, in fields on top of the stack. .IP Example: \fBpush:NXM_NX_REG2[0..5]\fR push the value stored in register 2 bits 0 through 5, inclusive, on to the internal stack. . .IP "\fBpop:\fIdst\fB[\fIstart\fB..\fIend\fB]" Pops from the top of the stack, retrieves the \fIstart\fR to \fIend\fR bits inclusive, from the value popped and store them into the corresponding bits in \fIdst\fR. . .IP Example: \fBpop:NXM_NX_REG2[0..5]\fR pops the value from top of the stack. Set register 2 bits 0 through 5, inclusive, based on bits 0 through 5 from the value just popped. . .IP "\fBset_field:\fIvalue\fB\->\fIdst" Writes the literal \fIvalue\fR into the field \fIdst\fR, which should be specified as a name used for matching. (This is similar to \fBload\fR but more closely matches the set-field action defined in Open Flow 1.2 and above.) . .IP Example: \fBset_field:00:11:22:33:44:55->eth_src\fR. . .IP "\fBmultipath(\fIfields\fB, \fIbasis\fB, \fIalgorithm\fB, \fIn_links\fB, \fIarg\fB, \fIdst\fB[\fIstart\fB..\fIend\fB])\fR" Hashes \fIfields\fR using \fIbasis\fR as a universal hash parameter, then the applies multipath link selection \fIalgorithm\fR (with parameter \fIarg\fR) to choose one of \fIn_links\fR output links numbered 0 through \fIn_links\fR minus 1, and stores the link into \fIdst\fB[\fIstart\fB..\fIend\fB]\fR, which must be an NXM field as described above. .IP Currently, \fIfields\fR must be either \fBeth_src\fR or \fBsymmetric_l4\fR and \fIalgorithm\fR must be one of \fBmodulo_n\fR, \fBhash_threshold\fR, \fBhrw\fR, and \fBiter_hash\fR. Only the \fBiter_hash\fR algorithm uses \fIarg\fR. .IP Refer to \fBnicira\-ext.h\fR for more details. . .IP "\fBbundle(\fIfields\fB, \fIbasis\fB, \fIalgorithm\fB, \fIslave_type\fB, slaves:[\fIs1\fB, \fIs2\fB, ...])\fR" Hashes \fIfields\fR using \fIbasis\fR as a universal hash parameter, then applies the bundle link selection \fIalgorithm\fR to choose one of the listed slaves represented as \fIslave_type\fR. Currently the only supported \fIslave_type\fR is \fBofport\fR. Thus, each \fIs1\fR through \fIsN\fR should be an OpenFlow port number. Outputs to the selected slave. .IP Currently, \fIfields\fR must be either \fBeth_src\fR or \fBsymmetric_l4\fR and \fIalgorithm\fR must be one of \fBhrw\fR and \fBactive_backup\fR. .IP Example: \fBbundle(eth_src,0,hrw,ofport,slaves:4,8)\fR uses an Ethernet source hash with basis 0, to select between OpenFlow ports 4 and 8 using the Highest Random Weight algorithm. .IP Refer to \fBnicira\-ext.h\fR for more details. . .IP "\fBbundle_load(\fIfields\fB, \fIbasis\fB, \fIalgorithm\fB, \fIslave_type\fB, \fIdst\fB[\fIstart\fB..\fIend\fB], slaves:[\fIs1\fB, \fIs2\fB, ...])\fR" Has the same behavior as the \fBbundle\fR action, with one exception. Instead of outputting to the selected slave, it writes its selection to \fIdst\fB[\fIstart\fB..\fIend\fB]\fR, which must be an NXM field as described above. .IP Example: \fBbundle_load(eth_src, 0, hrw, ofport, NXM_NX_REG0[], slaves:4, 8)\fR uses an Ethernet source hash with basis 0, to select between OpenFlow ports 4 and 8 using the Highest Random Weight algorithm, and writes the selection to \fBNXM_NX_REG0[]\fR. .IP Refer to \fBnicira\-ext.h\fR for more details. . .IP "\fBlearn(\fIargument\fR[\fB,\fIargument\fR]...\fB)\fR" This action adds or modifies a flow in an OpenFlow table, similar to \fBovs\-ofctl \-\-strict mod\-flows\fR. The arguments specify the flow's match fields, actions, and other properties, as follows. At least one match criterion and one action argument should ordinarily be specified. .RS .IP \fBidle_timeout=\fIseconds\fR .IQ \fBhard_timeout=\fIseconds\fR .IQ \fBpriority=\fIvalue\fR These key-value pairs have the same meaning as in the usual \fBovs\-ofctl\fR flow syntax. . .IP \fBfin_idle_timeout=\fIseconds\fR .IQ \fBfin_hard_timeout=\fIseconds\fR Adds a \fBfin_timeout\fR action with the specified arguments to the new flow. This feature was added in Open vSwitch 1.5.90. . .IP \fBtable=\fInumber\fR The table in which the new flow should be inserted. Specify a decimal number between 0 and 254. The default, if \fBtable\fR is unspecified, is table 1. . .IP \fIfield\fB=\fIvalue\fR .IQ \fIfield\fB[\fIstart\fB..\fIend\fB]=\fIsrc\fB[\fIstart\fB..\fIend\fB]\fR .IQ \fIfield\fB[\fIstart\fB..\fIend\fB]\fR Adds a match criterion to the new flow. .IP The first form specifies that \fIfield\fR must match the literal \fIvalue\fR, e.g. \fBdl_type=0x0800\fR. All of the fields and values for \fBovs\-ofctl\fR flow syntax are available with their usual meanings. .IP The second form specifies that \fIfield\fB[\fIstart\fB..\fIend\fB]\fR in the new flow must match \fIsrc\fB[\fIstart\fB..\fIend\fB]\fR taken from the flow currently being processed. .IP The third form is a shorthand for the second form. It specifies that \fIfield\fB[\fIstart\fB..\fIend\fB]\fR in the new flow must match \fIfield\fB[\fIstart\fB..\fIend\fB]\fR taken from the flow currently being processed. . .IP \fBload:\fIvalue\fB\->\fIdst\fB[\fIstart\fB..\fIend\fB] .IQ \fBload:\fIsrc\fB[\fIstart\fB..\fIend\fB]\->\fIdst\fB[\fIstart\fB..\fIend\fB] . Adds a \fBload\fR action to the new flow. .IP The first form loads the literal \fIvalue\fR into bits \fIstart\fR through \fIend\fR, inclusive, in field \fIdst\fR. Its syntax is the same as the \fBload\fR action described earlier in this section. .IP The second form loads \fIsrc\fB[\fIstart\fB..\fIend\fB]\fR, a value from the flow currently being processed, into bits \fIstart\fR through \fIend\fR, inclusive, in field \fIdst\fR. . .IP \fBoutput:\fIfield\fB[\fIstart\fB..\fIend\fB]\fR Add an \fBoutput\fR action to the new flow's actions, that outputs to the OpenFlow port taken from \fIfield\fB[\fIstart\fB..\fIend\fB]\fR, which must be an NXM field as described above. .RE .IP For best performance, segregate learned flows into a table (using \fBtable=\fInumber\fR) that is not used for any other flows except possibly for a lowest-priority ``catch-all'' flow, that is, a flow with no match criteria. (This is why the default \fBtable\fR is 1, to keep the learned flows separate from the primary flow table 0.) .RE . .RS .IP \fBapply_actions(\fR[\fIaction\fR][\fB,\fIaction\fR...]\fB) Applies the specific action(s) immediately. The syntax of actions are same to \fBactions=\fR field. . .IP \fBclear_actions\fR Clears all the actions in the action set immediately. . .IP \fBwrite_metadata\fB:\fIvalue\fR[/\fImask\fR] Updates the metadata field for the flow. If \fImask\fR is omitted, the metadata field is set exactly to \fIvalue\fR; if \fImask\fR is specified, then a 1-bit in \fImask\fR indicates that the corresponding bit in the metadata field will be replaced with the corresponding bit from \fIvalue\fR. Both \fIvalue\fR and \fImask\fR are 64-bit values that are decimal by default; use a \fB0x\fR prefix to specify them in hexadecimal. . .IP \fBgoto_table\fR:\fItable\fR Indicates the next table in the process pipeline. . .IP "\fBfin_timeout(\fIargument\fR[\fB,\fIargument\fR]\fB)" This action changes the idle timeout or hard timeout, or both, of this OpenFlow rule when the rule matches a TCP packet with the FIN or RST flag. When such a packet is observed, the action reduces the rule's timeouts to those specified on the action. If the rule's existing timeout is already shorter than the one that the action specifies, then that timeout is unaffected. .IP \fIargument\fR takes the following forms: .RS .IP "\fBidle_timeout=\fIseconds\fR" Causes the flow to expire after the given number of seconds of inactivity. . .IP "\fBhard_timeout=\fIseconds\fR" Causes the flow to expire after the given number of seconds, regardless of activity. (\fIseconds\fR specifies time since the flow's creation, not since the receipt of the FIN or RST.) .RE .IP This action was added in Open vSwitch 1.5.90. . .IP "\fBsample(\fIargument\fR[\fB,\fIargument\fR]...\fB)\fR" Samples packets and sends one sample for every sampled packet. .IP \fIargument\fR takes the following forms: .RS .IP "\fBprobability=\fIpackets\fR" The number of sampled packets out of 65535. Must be greater or equal to 1. .IP "\fBcollector_set_id=\fIid\fR" The unsigned 32-bit integer identifier of the set of sample collectors to send sampled packets to. Defaults to 0. .IP "\fBobs_domain_id=\fIid\fR" When sending samples to IPFIX collectors, the unsigned 32-bit integer Observation Domain ID sent in every IPFIX flow record. Defaults to 0. .IP "\fBobs_point_id=\fIid\fR" When sending samples to IPFIX collectors, the unsigned 32-bit integer Observation Point ID sent in every IPFIX flow record. Defaults to 0. .RE .IP Refer to \fBovs\-vswitchd.conf.db\fR(8) for more details on configuring sample collector sets. .IP This action was added in Open vSwitch 1.10.90. . .IP "\fBexit\fR" This action causes Open vSwitch to immediately halt execution of further actions. Those actions which have already been executed are unaffected. Any further actions, including those which may be in other tables, or different levels of the \fBresubmit\fR call stack, are ignored. .RE . .PP An opaque identifier called a cookie can be used as a handle to identify a set of flows: . .IP \fBcookie=\fIvalue\fR . A cookie can be associated with a flow using the \fBadd\-flow\fR, \fBadd\-flows\fR, and \fBmod\-flows\fR commands. \fIvalue\fR can be any 64-bit number and need not be unique among flows. If this field is omitted, a default cookie value of 0 is used. . .IP \fBcookie=\fIvalue\fR\fB/\fImask\fR . When using NXM, the cookie can be used as a handle for querying, modifying, and deleting flows. \fIvalue\fR and \fImask\fR may be supplied for the \fBdel\-flows\fR, \fBmod\-flows\fR, \fBdump\-flows\fR, and \fBdump\-aggregate\fR commands to limit matching cookies. A 1-bit in \fImask\fR indicates that the corresponding bit in \fIcookie\fR must match exactly, and a 0-bit wildcards that bit. A mask of \-1 may be used to exactly match a cookie. .IP The \fBmod\-flows\fR command can update the cookies of flows that match a cookie by specifying the \fIcookie\fR field twice (once with a mask for matching and once without to indicate the new value): .RS .IP "\fBovs\-ofctl mod\-flows br0 cookie=1,actions=normal\fR" Change all flows' cookies to 1 and change their actions to \fBnormal\fR. .IP "\fBovs\-ofctl mod\-flows br0 cookie=1/\-1,cookie=2,actions=normal\fR" Update cookies with a value of 1 to 2 and change their actions to \fBnormal\fR. .RE .IP The ability to match on cookies was added in Open vSwitch 1.5.0. . .PP The following additional field sets the priority for flows added by the \fBadd\-flow\fR and \fBadd\-flows\fR commands. For \fBmod\-flows\fR and \fBdel\-flows\fR when \fB\-\-strict\fR is specified, priority must match along with the rest of the flow specification. For \fBmod-flows\fR without \fB\-\-strict\fR, priority is only significant if the command creates a new flow, that is, non-strict \fBmod\-flows\fR does not match on priority and will not change the priority of existing flows. Other commands do not allow priority to be specified. . .IP \fBpriority=\fIvalue\fR The priority at which a wildcarded entry will match in comparison to others. \fIvalue\fR is a number between 0 and 65535, inclusive. A higher \fIvalue\fR will match before a lower one. An exact-match entry will always have priority over an entry containing wildcards, so it has an implicit priority value of 65535. When adding a flow, if the field is not specified, the flow's priority will default to 32768. .IP OpenFlow leaves behavior undefined when two or more flows with the same priority can match a single packet. Some users expect ``sensible'' behavior, such as more specific flows taking precedence over less specific flows, but OpenFlow does not specify this and Open vSwitch does not implement it. Users should therefore take care to use priorities to ensure the behavior that they expect. . .PP The \fBadd\-flow\fR, \fBadd\-flows\fR, and \fBmod\-flows\fR commands support the following additional options. These options affect only new flows. Thus, for \fBadd\-flow\fR and \fBadd\-flows\fR, these options are always significant, but for \fBmod\-flows\fR they are significant only if the command creates a new flow, that is, their values do not update or affect existing flows. . .IP "\fBidle_timeout=\fIseconds\fR" Causes the flow to expire after the given number of seconds of inactivity. A value of 0 (the default) prevents a flow from expiring due to inactivity. . .IP \fBhard_timeout=\fIseconds\fR Causes the flow to expire after the given number of seconds, regardless of activity. A value of 0 (the default) gives the flow no hard expiration deadline. . .IP "\fBsend_flow_rem\fR" Marks the flow with a flag that causes the switch to generate a ``flow removed'' message and send it to interested controllers when the flow later expires or is removed. . .IP "\fBcheck_overlap\fR" Forces the switch to check that the flow match does not overlap that of any different flow with the same priority in the same table. (This check is expensive so it is best to avoid it.) . .PP The \fBdump\-flows\fR, \fBdump\-aggregate\fR, \fBdel\-flow\fR and \fBdel\-flows\fR commands support one additional optional field: . .TP \fBout_port=\fIport\fR If set, a matching flow must include an output action to \fIport\fR, which must an OpenFlow port number or name (e.g. \fBlocal\fR). . .SS "Table Entry Output" . The \fBdump\-tables\fR and \fBdump\-aggregate\fR commands print information about the entries in a datapath's tables. Each line of output is a flow entry as described in \fBFlow Syntax\fR, above, plus some additional fields: . .IP \fBduration=\fIsecs\fR The time, in seconds, that the entry has been in the table. \fIsecs\fR includes as much precision as the switch provides, possibly to nanosecond resolution. . .IP \fBn_packets\fR The number of packets that have matched the entry. . .IP \fBn_bytes\fR The total number of bytes from packets that have matched the entry. . .PP The following additional fields are included only if the switch is Open vSwitch 1.6 or later and the NXM flow format is used to dump the flow (see the description of the \fB\-\-flow-format\fR option below). The values of these additional fields are approximations only and in particular \fBidle_age\fR will sometimes become nonzero even for busy flows. . .IP \fBhard_age=\fIsecs\fR The integer number of seconds since the flow was added or modified. \fBhard_age\fR is displayed only if it differs from the integer part of \fBduration\fR. (This is separate from \fBduration\fR because \fBmod\-flows\fR restarts the \fBhard_timeout\fR timer without zeroing \fBduration\fR.) . .IP \fBidle_age=\fIsecs\fR The integer number of seconds that have passed without any packets passing through the flow. . .SH OPTIONS .TP \fB\-\-strict\fR Uses strict matching when running flow modification commands. . .so lib/ofp-version.man . .IP "\fB\-F \fIformat\fR[\fB,\fIformat\fR...]" .IQ "\fB\-\-flow\-format=\fIformat\fR[\fB,\fIformat\fR...]" \fBovs\-ofctl\fR supports the following individual flow formats, any number of which may be listed as \fIformat\fR: .RS .IP "\fBOpenFlow10\-table_id\fR" This is the standard OpenFlow 1.0 flow format. All OpenFlow switches and all versions of Open vSwitch support this flow format. . .IP "\fBOpenFlow10+table_id\fR" This is the standard OpenFlow 1.0 flow format plus a Nicira extension that allows \fBovs\-ofctl\fR to specify the flow table in which a particular flow should be placed. Open vSwitch 1.2 and later supports this flow format. . .IP "\fBNXM\-table_id\fR (Nicira Extended Match)" This Nicira extension to OpenFlow is flexible and extensible. It supports all of the Nicira flow extensions, such as \fBtun_id\fR and registers. Open vSwitch 1.1 and later supports this flow format. . .IP "\fBNXM+table_id\fR (Nicira Extended Match)" This combines Nicira Extended match with the ability to place a flow in a specific table. Open vSwitch 1.2 and later supports this flow format. . .IP "\fBOXM-OpenFlow12\fR" .IQ "\fBOXM-OpenFlow13\fR" These are the standard OXM (OpenFlow Extensible Match) flow format in OpenFlow 1.2 and 1.3, respectively. .RE . .IP \fBovs\-ofctl\fR also supports the following abbreviations for collections of flow formats: .RS .IP "\fBany\fR" Any supported flow format. .IP "\fBOpenFlow10\fR" \fBOpenFlow10\-table_id\fR or \fBOpenFlow10+table_id\fR. .IP "\fBNXM\fR" \fBNXM\-table_id\fR or \fBNXM+table_id\fR. .IP "\fBOXM\fR" \fBOXM-OpenFlow12\fR or \fBOXM-OpenFlow13\fR. .RE . .IP For commands that modify the flow table, \fBovs\-ofctl\fR by default negotiates the most widely supported flow format that supports the flows being added. For commands that query the flow table, \fBovs\-ofctl\fR by default uses the most advanced format supported by the switch. .IP This option, where \fIformat\fR is a comma-separated list of one or more of the formats listed above, limits \fBovs\-ofctl\fR's choice of flow format. If a command cannot work as requested using one of the specified flow formats, \fBovs\-ofctl\fR will report a fatal error. . .IP "\fB\-P \fIformat\fR" .IQ "\fB\-\-packet\-in\-format=\fIformat\fR" \fBovs\-ofctl\fR supports the following packet_in formats, in order of increasing capability: .RS .IP "\fBopenflow10\fR" This is the standard OpenFlow 1.0 packet in format. It should be supported by all OpenFlow switches. . .IP "\fBnxm\fR (Nicira Extended Match)" This packet_in format includes flow metadata encoded using the NXM format. . .RE .IP Usually, \fBovs\-ofctl\fR prefers the \fBnxm\fR packet_in format, but will allow the switch to choose its default if \fBnxm\fR is unsupported. When \fIformat\fR is one of the formats listed in the above table, \fBovs\-ofctl\fR will insist on the selected format. If the switch does not support the requested format, \fBovs\-ofctl\fR will report a fatal error. This option only affects the \fBmonitor\fR command. . .IP "\fB\-\-timestamp\fR" Print a timestamp before each received packet. This option only affects the \fBmonitor\fR and \fBsnoop\fR commands. . .IP "\fB\-m\fR" .IQ "\fB\-\-more\fR" Increases the verbosity of OpenFlow messages printed and logged by \fBovs\-ofctl\fR commands. Specify this option more than once to increase verbosity further. . .IP \fB\-\-sort\fR[\fB=\fIfield\fR] .IQ \fB\-\-rsort\fR[\fB=\fIfield\fR] Display output sorted by flow \fIfield\fR in ascending (\fB\-\-sort\fR) or descending (\fB\-\-rsort\fR) order, where \fIfield\fR is any of the fields that are allowed for matching or \fBpriority\fR to sort by priority. When \fIfield\fR is omitted, the output is sorted by priority. Specify these options multiple times to sort by multiple fields. .IP Any given flow will not necessarily specify a value for a given field. This requires special treatement: .RS .IP \(bu A flow that does not specify any part of a field that is used for sorting is sorted after all the flows that do specify the field. For example, \fB\-\-sort=tcp_src\fR will sort all the flows that specify a TCP source port in ascending order, followed by the flows that do not specify a TCP source port at all. .IP \(bu A flow that only specifies some bits in a field is sorted as if the wildcarded bits were zero. For example, \fB\-\-sort=nw_src\fR would sort a flow that specifies \fBnw_src=192.168.0.0/24\fR the same as \fBnw_src=192.168.0.0\fR. .RE .IP These options currently affect only \fBdump\-flows\fR output. . .ds DD \ \fBovs\-ofctl\fR detaches only when executing the \fBmonitor\fR or \ \fBsnoop\fR commands. .so lib/daemon.man .SS "Public Key Infrastructure Options" .so lib/ssl.man .so lib/vlog.man .so lib/common.man . .SH "RUNTIME MANAGEMENT COMMANDS" \fBovs\-appctl\fR(8) can send commands to a running \fBovs\-ofctl\fR process. The supported commands are listed below. . .IP "\fBexit\fR" Causes \fBovs\-ofctl\fR to gracefully terminate. This command applies only when executing the \fBmonitor\fR or \fBsnoop\fR commands. . .IP "\fBofctl/set\-output\-file \fIfile\fR" Causes all subsequent output to go to \fIfile\fR instead of stderr. This command applies only when executing the \fBmonitor\fR or \fBsnoop\fR commands. . .IP "\fBofctl/send \fIofmsg\fR..." Sends each \fIofmsg\fR, specified as a sequence of hex digits that express an OpenFlow message, on the OpenFlow connection. This command is useful only when executing the \fBmonitor\fR command. . .IP "\fBofctl/barrier\fR" Sends an OpenFlow barrier request on the OpenFlow connection and waits for a reply. This command is useful only for the \fBmonitor\fR command. . .SH EXAMPLES . The following examples assume that \fBovs\-vswitchd\fR has a bridge named \fBbr0\fR configured. . .TP \fBovs\-ofctl dump\-tables br0\fR Prints out the switch's table stats. (This is more interesting after some traffic has passed through.) . .TP \fBovs\-ofctl dump\-flows br0\fR Prints the flow entries in the switch. . .SH "SEE ALSO" . .BR ovs\-appctl (8), .BR ovs\-controller (8), .BR ovs\-vswitchd (8) .BR ovs\-vswitchd.conf.db (8) openvswitch-2.0.1+git20140120/utilities/ovs-ofctl.c000066400000000000000000002673571226605124000215710ustar00rootroot00000000000000/* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "byte-order.h" #include "classifier.h" #include "command-line.h" #include "daemon.h" #include "compiler.h" #include "dirs.h" #include "dynamic-string.h" #include "nx-match.h" #include "odp-util.h" #include "ofp-actions.h" #include "ofp-errors.h" #include "ofp-msgs.h" #include "ofp-parse.h" #include "ofp-print.h" #include "ofp-util.h" #include "ofp-version-opt.h" #include "ofpbuf.h" #include "ofproto/ofproto.h" #include "openflow/nicira-ext.h" #include "openflow/openflow.h" #include "packets.h" #include "poll-loop.h" #include "random.h" #include "stream-ssl.h" #include "socket-util.h" #include "timeval.h" #include "unixctl.h" #include "util.h" #include "vconn.h" #include "vlog.h" #include "meta-flow.h" #include "sort.h" VLOG_DEFINE_THIS_MODULE(ofctl); /* --strict: Use strict matching for flow mod commands? Additionally governs * use of nx_pull_match() instead of nx_pull_match_loose() in parse-nx-match. */ static bool strict; /* --readd: If true, on replace-flows, re-add even flows that have not changed * (to reset flow counters). */ static bool readd; /* -F, --flow-format: Allowed protocols. By default, any protocol is * allowed. */ static enum ofputil_protocol allowed_protocols = OFPUTIL_P_ANY; /* -P, --packet-in-format: Packet IN format to use in monitor and snoop * commands. Either one of NXPIF_* to force a particular packet_in format, or * -1 to let ovs-ofctl choose the default. */ static int preferred_packet_in_format = -1; /* -m, --more: Additional verbosity for ofp-print functions. */ static int verbosity; /* --timestamp: Print a timestamp before each received packet on "monitor" and * "snoop" command? */ static bool timestamp; /* --sort, --rsort: Sort order. */ enum sort_order { SORT_ASC, SORT_DESC }; struct sort_criterion { const struct mf_field *field; /* NULL means to sort by priority. */ enum sort_order order; }; static struct sort_criterion *criteria; static size_t n_criteria, allocated_criteria; static const struct command *get_all_commands(void); static void usage(void) NO_RETURN; static void parse_options(int argc, char *argv[]); static bool recv_flow_stats_reply(struct vconn *, ovs_be32 send_xid, struct ofpbuf **replyp, struct ofputil_flow_stats *, struct ofpbuf *ofpacts); int main(int argc, char *argv[]) { set_program_name(argv[0]); parse_options(argc, argv); signal(SIGPIPE, SIG_IGN); run_command(argc - optind, argv + optind, get_all_commands()); return 0; } static void add_sort_criterion(enum sort_order order, const char *field) { struct sort_criterion *sc; if (n_criteria >= allocated_criteria) { criteria = x2nrealloc(criteria, &allocated_criteria, sizeof *criteria); } sc = &criteria[n_criteria++]; if (!field || !strcasecmp(field, "priority")) { sc->field = NULL; } else { sc->field = mf_from_name(field); if (!sc->field) { ovs_fatal(0, "%s: unknown field name", field); } } sc->order = order; } static void parse_options(int argc, char *argv[]) { enum { OPT_STRICT = UCHAR_MAX + 1, OPT_READD, OPT_TIMESTAMP, OPT_SORT, OPT_RSORT, DAEMON_OPTION_ENUMS, OFP_VERSION_OPTION_ENUMS, VLOG_OPTION_ENUMS }; static const struct option long_options[] = { {"timeout", required_argument, NULL, 't'}, {"strict", no_argument, NULL, OPT_STRICT}, {"readd", no_argument, NULL, OPT_READD}, {"flow-format", required_argument, NULL, 'F'}, {"packet-in-format", required_argument, NULL, 'P'}, {"more", no_argument, NULL, 'm'}, {"timestamp", no_argument, NULL, OPT_TIMESTAMP}, {"sort", optional_argument, NULL, OPT_SORT}, {"rsort", optional_argument, NULL, OPT_RSORT}, {"help", no_argument, NULL, 'h'}, DAEMON_LONG_OPTIONS, OFP_VERSION_LONG_OPTIONS, VLOG_LONG_OPTIONS, STREAM_SSL_LONG_OPTIONS, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); uint32_t versions; enum ofputil_protocol version_protocols; for (;;) { unsigned long int timeout; int c; c = getopt_long(argc, argv, short_options, long_options, NULL); if (c == -1) { break; } switch (c) { case 't': timeout = strtoul(optarg, NULL, 10); if (timeout <= 0) { ovs_fatal(0, "value %s on -t or --timeout is not at least 1", optarg); } else { time_alarm(timeout); } break; case 'F': allowed_protocols = ofputil_protocols_from_string(optarg); if (!allowed_protocols) { ovs_fatal(0, "%s: invalid flow format(s)", optarg); } break; case 'P': preferred_packet_in_format = ofputil_packet_in_format_from_string(optarg); if (preferred_packet_in_format < 0) { ovs_fatal(0, "unknown packet-in format `%s'", optarg); } break; case 'm': verbosity++; break; case 'h': usage(); case OPT_STRICT: strict = true; break; case OPT_READD: readd = true; break; case OPT_TIMESTAMP: timestamp = true; break; case OPT_SORT: add_sort_criterion(SORT_ASC, optarg); break; case OPT_RSORT: add_sort_criterion(SORT_DESC, optarg); break; DAEMON_OPTION_HANDLERS OFP_VERSION_OPTION_HANDLERS VLOG_OPTION_HANDLERS STREAM_SSL_OPTION_HANDLERS case '?': exit(EXIT_FAILURE); default: abort(); } } if (n_criteria) { /* Always do a final sort pass based on priority. */ add_sort_criterion(SORT_DESC, "priority"); } free(short_options); versions = get_allowed_ofp_versions(); version_protocols = ofputil_protocols_from_version_bitmap(versions); if (!(allowed_protocols & version_protocols)) { char *protocols = ofputil_protocols_to_string(allowed_protocols); struct ds version_s = DS_EMPTY_INITIALIZER; ofputil_format_version_bitmap_names(&version_s, versions); ovs_fatal(0, "None of the enabled OpenFlow versions (%s) supports " "any of the enabled flow formats (%s). (Use -O to enable " "additional OpenFlow versions or -F to enable additional " "flow formats.)", ds_cstr(&version_s), protocols); } allowed_protocols &= version_protocols; mask_allowed_ofp_versions(ofputil_protocols_to_version_bitmap( allowed_protocols)); } static void usage(void) { printf("%s: OpenFlow switch management utility\n" "usage: %s [OPTIONS] COMMAND [ARG...]\n" "\nFor OpenFlow switches:\n" " show SWITCH show OpenFlow information\n" " dump-desc SWITCH print switch description\n" " dump-tables SWITCH print table stats\n" " mod-port SWITCH IFACE ACT modify port behavior\n" " get-frags SWITCH print fragment handling behavior\n" " set-frags SWITCH FRAG_MODE set fragment handling behavior\n" " dump-ports SWITCH [PORT] print port statistics\n" " dump-ports-desc SWITCH print port descriptions\n" " dump-flows SWITCH print all flow entries\n" " dump-flows SWITCH FLOW print matching FLOWs\n" " dump-aggregate SWITCH print aggregate flow statistics\n" " dump-aggregate SWITCH FLOW print aggregate stats for FLOWs\n" " queue-stats SWITCH [PORT [QUEUE]] dump queue stats\n" " add-flow SWITCH FLOW add flow described by FLOW\n" " add-flows SWITCH FILE add flows from FILE\n" " mod-flows SWITCH FLOW modify actions of matching FLOWs\n" " del-flows SWITCH [FLOW] delete matching FLOWs\n" " replace-flows SWITCH FILE replace flows with those in FILE\n" " diff-flows SOURCE1 SOURCE2 compare flows from two sources\n" " packet-out SWITCH IN_PORT ACTIONS PACKET...\n" " execute ACTIONS on PACKET\n" " monitor SWITCH [MISSLEN] [invalid_ttl] [watch:[...]]\n" " print packets received from SWITCH\n" " snoop SWITCH snoop on SWITCH and its controller\n" "\nFor OpenFlow switches and controllers:\n" " probe TARGET probe whether TARGET is up\n" " ping TARGET [N] latency of N-byte echos\n" " benchmark TARGET N COUNT bandwidth of COUNT N-byte echos\n" "SWITCH or TARGET is an active OpenFlow connection method.\n" "\nOther commands:\n" " ofp-parse FILE print messages read from FILE\n", program_name, program_name); vconn_usage(true, false, false); daemon_usage(); ofp_version_usage(); vlog_usage(); printf("\nOther options:\n" " --strict use strict match for flow commands\n" " --readd replace flows that haven't changed\n" " -F, --flow-format=FORMAT force particular flow format\n" " -P, --packet-in-format=FRMT force particular packet in format\n" " -m, --more be more verbose printing OpenFlow\n" " --timestamp (monitor, snoop) print timestamps\n" " -t, --timeout=SECS give up after SECS seconds\n" " --sort[=field] sort in ascending order\n" " --rsort[=field] sort in descending order\n" " -h, --help display this help message\n" " -V, --version display version information\n"); exit(EXIT_SUCCESS); } static void ofctl_exit(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *exiting_) { bool *exiting = exiting_; *exiting = true; unixctl_command_reply(conn, NULL); } static void run(int retval, const char *message, ...) PRINTF_FORMAT(2, 3); static void run(int retval, const char *message, ...) { if (retval) { va_list args; va_start(args, message); ovs_fatal_valist(retval, message, args); } } /* Generic commands. */ static int open_vconn_socket(const char *name, struct vconn **vconnp) { char *vconn_name = xasprintf("unix:%s", name); int error; error = vconn_open(vconn_name, get_allowed_ofp_versions(), DSCP_DEFAULT, vconnp); if (error && error != ENOENT) { ovs_fatal(0, "%s: failed to open socket (%s)", name, ovs_strerror(error)); } free(vconn_name); return error; } enum open_target { MGMT, SNOOP }; static enum ofputil_protocol open_vconn__(const char *name, enum open_target target, struct vconn **vconnp) { const char *suffix = target == MGMT ? "mgmt" : "snoop"; char *datapath_name, *datapath_type, *socket_name; enum ofputil_protocol protocol; char *bridge_path; int ofp_version; int error; bridge_path = xasprintf("%s/%s.%s", ovs_rundir(), name, suffix); ofproto_parse_name(name, &datapath_name, &datapath_type); socket_name = xasprintf("%s/%s.%s", ovs_rundir(), datapath_name, suffix); free(datapath_name); free(datapath_type); if (strchr(name, ':')) { run(vconn_open(name, get_allowed_ofp_versions(), DSCP_DEFAULT, vconnp), "connecting to %s", name); } else if (!open_vconn_socket(name, vconnp)) { /* Fall Through. */ } else if (!open_vconn_socket(bridge_path, vconnp)) { /* Fall Through. */ } else if (!open_vconn_socket(socket_name, vconnp)) { /* Fall Through. */ } else { ovs_fatal(0, "%s is not a bridge or a socket", name); } if (target == SNOOP) { vconn_set_recv_any_version(*vconnp); } free(bridge_path); free(socket_name); VLOG_DBG("connecting to %s", vconn_get_name(*vconnp)); error = vconn_connect_block(*vconnp); if (error) { ovs_fatal(0, "%s: failed to connect to socket (%s)", name, ovs_strerror(error)); } ofp_version = vconn_get_version(*vconnp); protocol = ofputil_protocol_from_ofp_version(ofp_version); if (!protocol) { ovs_fatal(0, "%s: unsupported OpenFlow version 0x%02x", name, ofp_version); } return protocol; } static enum ofputil_protocol open_vconn(const char *name, struct vconn **vconnp) { return open_vconn__(name, MGMT, vconnp); } static void send_openflow_buffer(struct vconn *vconn, struct ofpbuf *buffer) { ofpmsg_update_length(buffer); run(vconn_send_block(vconn, buffer), "failed to send packet to switch"); } static void dump_transaction(struct vconn *vconn, struct ofpbuf *request) { struct ofpbuf *reply; ofpmsg_update_length(request); run(vconn_transact(vconn, request, &reply), "talking to %s", vconn_get_name(vconn)); ofp_print(stdout, reply->data, reply->size, verbosity + 1); ofpbuf_delete(reply); } static void dump_trivial_transaction(const char *vconn_name, enum ofpraw raw) { struct ofpbuf *request; struct vconn *vconn; open_vconn(vconn_name, &vconn); request = ofpraw_alloc(raw, vconn_get_version(vconn), 0); dump_transaction(vconn, request); vconn_close(vconn); } static void dump_stats_transaction(struct vconn *vconn, struct ofpbuf *request) { const struct ofp_header *request_oh = request->data; ovs_be32 send_xid = request_oh->xid; enum ofpraw request_raw; enum ofpraw reply_raw; bool done = false; ofpraw_decode_partial(&request_raw, request->data, request->size); reply_raw = ofpraw_stats_request_to_reply(request_raw, request_oh->version); send_openflow_buffer(vconn, request); while (!done) { ovs_be32 recv_xid; struct ofpbuf *reply; run(vconn_recv_block(vconn, &reply), "OpenFlow packet receive failed"); recv_xid = ((struct ofp_header *) reply->data)->xid; if (send_xid == recv_xid) { enum ofpraw raw; ofp_print(stdout, reply->data, reply->size, verbosity + 1); ofpraw_decode(&raw, reply->data); if (ofptype_from_ofpraw(raw) == OFPTYPE_ERROR) { done = true; } else if (raw == reply_raw) { done = !ofpmp_more(reply->data); } else { ovs_fatal(0, "received bad reply: %s", ofp_to_string(reply->data, reply->size, verbosity + 1)); } } else { VLOG_DBG("received reply with xid %08"PRIx32" " "!= expected %08"PRIx32, recv_xid, send_xid); } ofpbuf_delete(reply); } } static void dump_trivial_stats_transaction(const char *vconn_name, enum ofpraw raw) { struct ofpbuf *request; struct vconn *vconn; open_vconn(vconn_name, &vconn); request = ofpraw_alloc(raw, vconn_get_version(vconn), 0); dump_stats_transaction(vconn, request); vconn_close(vconn); } /* Sends all of the 'requests', which should be requests that only have replies * if an error occurs, and waits for them to succeed or fail. If an error does * occur, prints it and exits with an error. * * Destroys all of the 'requests'. */ static void transact_multiple_noreply(struct vconn *vconn, struct list *requests) { struct ofpbuf *request, *reply; LIST_FOR_EACH (request, list_node, requests) { ofpmsg_update_length(request); } run(vconn_transact_multiple_noreply(vconn, requests, &reply), "talking to %s", vconn_get_name(vconn)); if (reply) { ofp_print(stderr, reply->data, reply->size, verbosity + 2); exit(1); } ofpbuf_delete(reply); } /* Sends 'request', which should be a request that only has a reply if an error * occurs, and waits for it to succeed or fail. If an error does occur, prints * it and exits with an error. * * Destroys 'request'. */ static void transact_noreply(struct vconn *vconn, struct ofpbuf *request) { struct list requests; list_init(&requests); list_push_back(&requests, &request->list_node); transact_multiple_noreply(vconn, &requests); } static void fetch_switch_config(struct vconn *vconn, struct ofp_switch_config *config_) { struct ofp_switch_config *config; struct ofpbuf *request; struct ofpbuf *reply; enum ofptype type; request = ofpraw_alloc(OFPRAW_OFPT_GET_CONFIG_REQUEST, vconn_get_version(vconn), 0); run(vconn_transact(vconn, request, &reply), "talking to %s", vconn_get_name(vconn)); if (ofptype_pull(&type, reply) || type != OFPTYPE_GET_CONFIG_REPLY) { ovs_fatal(0, "%s: bad reply to config request", vconn_get_name(vconn)); } config = ofpbuf_pull(reply, sizeof *config); *config_ = *config; ofpbuf_delete(reply); } static void set_switch_config(struct vconn *vconn, const struct ofp_switch_config *config) { struct ofpbuf *request; request = ofpraw_alloc(OFPRAW_OFPT_SET_CONFIG, vconn_get_version(vconn), 0); ofpbuf_put(request, config, sizeof *config); transact_noreply(vconn, request); } static void ofctl_show(int argc OVS_UNUSED, char *argv[]) { const char *vconn_name = argv[1]; struct vconn *vconn; struct ofpbuf *request; struct ofpbuf *reply; bool trunc; open_vconn(vconn_name, &vconn); request = ofpraw_alloc(OFPRAW_OFPT_FEATURES_REQUEST, vconn_get_version(vconn), 0); run(vconn_transact(vconn, request, &reply), "talking to %s", vconn_name); trunc = ofputil_switch_features_ports_trunc(reply); ofp_print(stdout, reply->data, reply->size, verbosity + 1); ofpbuf_delete(reply); if (trunc) { /* The Features Reply may not contain all the ports, so send a * Port Description stats request, which doesn't have size * constraints. */ dump_trivial_stats_transaction(vconn_name, OFPRAW_OFPST_PORT_DESC_REQUEST); } dump_trivial_transaction(vconn_name, OFPRAW_OFPT_GET_CONFIG_REQUEST); vconn_close(vconn); } static void ofctl_dump_desc(int argc OVS_UNUSED, char *argv[]) { dump_trivial_stats_transaction(argv[1], OFPRAW_OFPST_DESC_REQUEST); } static void ofctl_dump_tables(int argc OVS_UNUSED, char *argv[]) { dump_trivial_stats_transaction(argv[1], OFPRAW_OFPST_TABLE_REQUEST); } static bool fetch_port_by_features(const char *vconn_name, const char *port_name, ofp_port_t port_no, struct ofputil_phy_port *pp, bool *trunc) { struct ofputil_switch_features features; const struct ofp_header *oh; struct ofpbuf *request, *reply; struct vconn *vconn; enum ofperr error; enum ofptype type; struct ofpbuf b; bool found = false; /* Fetch the switch's ofp_switch_features. */ open_vconn(vconn_name, &vconn); request = ofpraw_alloc(OFPRAW_OFPT_FEATURES_REQUEST, vconn_get_version(vconn), 0); run(vconn_transact(vconn, request, &reply), "talking to %s", vconn_name); vconn_close(vconn); oh = reply->data; if (ofptype_decode(&type, reply->data) || type != OFPTYPE_FEATURES_REPLY) { ovs_fatal(0, "%s: received bad features reply", vconn_name); } *trunc = false; if (ofputil_switch_features_ports_trunc(reply)) { *trunc = true; goto exit; } error = ofputil_decode_switch_features(oh, &features, &b); if (error) { ovs_fatal(0, "%s: failed to decode features reply (%s)", vconn_name, ofperr_to_string(error)); } while (!ofputil_pull_phy_port(oh->version, &b, pp)) { if (port_no != OFPP_NONE ? port_no == pp->port_no : !strcmp(pp->name, port_name)) { found = true; goto exit; } } exit: ofpbuf_delete(reply); return found; } static bool fetch_port_by_stats(const char *vconn_name, const char *port_name, ofp_port_t port_no, struct ofputil_phy_port *pp) { struct ofpbuf *request; struct vconn *vconn; ovs_be32 send_xid; bool done = false; bool found = false; request = ofpraw_alloc(OFPRAW_OFPST_PORT_DESC_REQUEST, OFP10_VERSION, 0); send_xid = ((struct ofp_header *) request->data)->xid; open_vconn(vconn_name, &vconn); send_openflow_buffer(vconn, request); while (!done) { ovs_be32 recv_xid; struct ofpbuf *reply; run(vconn_recv_block(vconn, &reply), "OpenFlow packet receive failed"); recv_xid = ((struct ofp_header *) reply->data)->xid; if (send_xid == recv_xid) { struct ofp_header *oh = reply->data; enum ofptype type; struct ofpbuf b; uint16_t flags; ofpbuf_use_const(&b, oh, ntohs(oh->length)); if (ofptype_pull(&type, &b) || type != OFPTYPE_PORT_DESC_STATS_REPLY) { ovs_fatal(0, "received bad reply: %s", ofp_to_string(reply->data, reply->size, verbosity + 1)); } flags = ofpmp_flags(oh); done = !(flags & OFPSF_REPLY_MORE); if (found) { /* We've already found the port, but we need to drain * the queue of any other replies for this request. */ continue; } while (!ofputil_pull_phy_port(oh->version, &b, pp)) { if (port_no != OFPP_NONE ? port_no == pp->port_no : !strcmp(pp->name, port_name)) { found = true; break; } } } else { VLOG_DBG("received reply with xid %08"PRIx32" " "!= expected %08"PRIx32, recv_xid, send_xid); } ofpbuf_delete(reply); } vconn_close(vconn); return found; } static bool str_to_ofp(const char *s, ofp_port_t *ofp_port) { bool ret; uint32_t port_; ret = str_to_uint(s, 10, &port_); *ofp_port = u16_to_ofp(port_); return ret; } /* Opens a connection to 'vconn_name', fetches the port structure for * 'port_name' (which may be a port name or number), and copies it into * '*pp'. */ static void fetch_ofputil_phy_port(const char *vconn_name, const char *port_name, struct ofputil_phy_port *pp) { ofp_port_t port_no; bool found; bool trunc; /* Try to interpret the argument as a port number. */ if (!str_to_ofp(port_name, &port_no)) { port_no = OFPP_NONE; } /* Try to find the port based on the Features Reply. If it looks * like the results may be truncated, then use the Port Description * stats message introduced in OVS 1.7. */ found = fetch_port_by_features(vconn_name, port_name, port_no, pp, &trunc); if (trunc) { found = fetch_port_by_stats(vconn_name, port_name, port_no, pp); } if (!found) { ovs_fatal(0, "%s: couldn't find port `%s'", vconn_name, port_name); } } /* Returns the port number corresponding to 'port_name' (which may be a port * name or number) within the switch 'vconn_name'. */ static ofp_port_t str_to_port_no(const char *vconn_name, const char *port_name) { ofp_port_t port_no; if (ofputil_port_from_string(port_name, &port_no)) { return port_no; } else { struct ofputil_phy_port pp; fetch_ofputil_phy_port(vconn_name, port_name, &pp); return pp.port_no; } } static bool try_set_protocol(struct vconn *vconn, enum ofputil_protocol want, enum ofputil_protocol *cur) { for (;;) { struct ofpbuf *request, *reply; enum ofputil_protocol next; request = ofputil_encode_set_protocol(*cur, want, &next); if (!request) { return *cur == want; } run(vconn_transact_noreply(vconn, request, &reply), "talking to %s", vconn_get_name(vconn)); if (reply) { char *s = ofp_to_string(reply->data, reply->size, 2); VLOG_DBG("%s: failed to set protocol, switch replied: %s", vconn_get_name(vconn), s); free(s); ofpbuf_delete(reply); return false; } *cur = next; } } static enum ofputil_protocol set_protocol_for_flow_dump(struct vconn *vconn, enum ofputil_protocol cur_protocol, enum ofputil_protocol usable_protocols) { char *usable_s; int i; for (i = 0; i < ofputil_n_flow_dump_protocols; i++) { enum ofputil_protocol f = ofputil_flow_dump_protocols[i]; if (f & usable_protocols & allowed_protocols && try_set_protocol(vconn, f, &cur_protocol)) { return f; } } usable_s = ofputil_protocols_to_string(usable_protocols); if (usable_protocols & allowed_protocols) { ovs_fatal(0, "switch does not support any of the usable flow " "formats (%s)", usable_s); } else { char *allowed_s = ofputil_protocols_to_string(allowed_protocols); ovs_fatal(0, "none of the usable flow formats (%s) is among the " "allowed flow formats (%s)", usable_s, allowed_s); } } static struct vconn * prepare_dump_flows(int argc, char *argv[], bool aggregate, struct ofpbuf **requestp) { enum ofputil_protocol usable_protocols, protocol; struct ofputil_flow_stats_request fsr; struct vconn *vconn; char *error; error = parse_ofp_flow_stats_request_str(&fsr, aggregate, argc > 2 ? argv[2] : "", &usable_protocols); if (error) { ovs_fatal(0, "%s", error); } protocol = open_vconn(argv[1], &vconn); protocol = set_protocol_for_flow_dump(vconn, protocol, usable_protocols); *requestp = ofputil_encode_flow_stats_request(&fsr, protocol); return vconn; } static void ofctl_dump_flows__(int argc, char *argv[], bool aggregate) { struct ofpbuf *request; struct vconn *vconn; vconn = prepare_dump_flows(argc, argv, aggregate, &request); dump_stats_transaction(vconn, request); vconn_close(vconn); } static int compare_flows(const void *afs_, const void *bfs_) { const struct ofputil_flow_stats *afs = afs_; const struct ofputil_flow_stats *bfs = bfs_; const struct match *a = &afs->match; const struct match *b = &bfs->match; const struct sort_criterion *sc; for (sc = criteria; sc < &criteria[n_criteria]; sc++) { const struct mf_field *f = sc->field; int ret; if (!f) { unsigned int a_pri = afs->priority; unsigned int b_pri = bfs->priority; ret = a_pri < b_pri ? -1 : a_pri > b_pri; } else { bool ina, inb; ina = mf_are_prereqs_ok(f, &a->flow) && !mf_is_all_wild(f, &a->wc); inb = mf_are_prereqs_ok(f, &b->flow) && !mf_is_all_wild(f, &b->wc); if (ina != inb) { /* Skip the test for sc->order, so that missing fields always * sort to the end whether we're sorting in ascending or * descending order. */ return ina ? -1 : 1; } else { union mf_value aval, bval; mf_get_value(f, &a->flow, &aval); mf_get_value(f, &b->flow, &bval); ret = memcmp(&aval, &bval, f->n_bytes); } } if (ret) { return sc->order == SORT_ASC ? ret : -ret; } } return 0; } static void ofctl_dump_flows(int argc, char *argv[]) { if (!n_criteria) { return ofctl_dump_flows__(argc, argv, false); } else { struct ofputil_flow_stats *fses; size_t n_fses, allocated_fses; struct ofpbuf *request; struct ofpbuf ofpacts; struct ofpbuf *reply; struct vconn *vconn; ovs_be32 send_xid; struct ds s; size_t i; vconn = prepare_dump_flows(argc, argv, false, &request); send_xid = ((struct ofp_header *) request->data)->xid; send_openflow_buffer(vconn, request); fses = NULL; n_fses = allocated_fses = 0; reply = NULL; ofpbuf_init(&ofpacts, 0); for (;;) { struct ofputil_flow_stats *fs; if (n_fses >= allocated_fses) { fses = x2nrealloc(fses, &allocated_fses, sizeof *fses); } fs = &fses[n_fses]; if (!recv_flow_stats_reply(vconn, send_xid, &reply, fs, &ofpacts)) { break; } fs->ofpacts = xmemdup(fs->ofpacts, fs->ofpacts_len); n_fses++; } ofpbuf_uninit(&ofpacts); qsort(fses, n_fses, sizeof *fses, compare_flows); ds_init(&s); for (i = 0; i < n_fses; i++) { ds_clear(&s); ofp_print_flow_stats(&s, &fses[i]); puts(ds_cstr(&s)); } ds_destroy(&s); for (i = 0; i < n_fses; i++) { free(fses[i].ofpacts); } free(fses); vconn_close(vconn); } } static void ofctl_dump_aggregate(int argc, char *argv[]) { return ofctl_dump_flows__(argc, argv, true); } static void ofctl_queue_stats(int argc, char *argv[]) { struct ofpbuf *request; struct vconn *vconn; struct ofputil_queue_stats_request oqs; open_vconn(argv[1], &vconn); if (argc > 2 && argv[2][0] && strcasecmp(argv[2], "all")) { oqs.port_no = str_to_port_no(argv[1], argv[2]); } else { oqs.port_no = OFPP_ANY; } if (argc > 3 && argv[3][0] && strcasecmp(argv[3], "all")) { oqs.queue_id = atoi(argv[3]); } else { oqs.queue_id = OFPQ_ALL; } request = ofputil_encode_queue_stats_request(vconn_get_version(vconn), &oqs); dump_stats_transaction(vconn, request); vconn_close(vconn); } static enum ofputil_protocol open_vconn_for_flow_mod(const char *remote, struct vconn **vconnp, enum ofputil_protocol usable_protocols) { enum ofputil_protocol cur_protocol; char *usable_s; int i; if (!(usable_protocols & allowed_protocols)) { char *allowed_s = ofputil_protocols_to_string(allowed_protocols); usable_s = ofputil_protocols_to_string(usable_protocols); ovs_fatal(0, "none of the usable flow formats (%s) is among the " "allowed flow formats (%s)", usable_s, allowed_s); } /* If the initial flow format is allowed and usable, keep it. */ cur_protocol = open_vconn(remote, vconnp); if (usable_protocols & allowed_protocols & cur_protocol) { return cur_protocol; } /* Otherwise try each flow format in turn. */ for (i = 0; i < sizeof(enum ofputil_protocol) * CHAR_BIT; i++) { enum ofputil_protocol f = 1 << i; if (f != cur_protocol && f & usable_protocols & allowed_protocols && try_set_protocol(*vconnp, f, &cur_protocol)) { return f; } } usable_s = ofputil_protocols_to_string(usable_protocols); ovs_fatal(0, "switch does not support any of the usable flow " "formats (%s)", usable_s); } static void ofctl_flow_mod__(const char *remote, struct ofputil_flow_mod *fms, size_t n_fms, enum ofputil_protocol usable_protocols) { enum ofputil_protocol protocol; struct vconn *vconn; size_t i; protocol = open_vconn_for_flow_mod(remote, &vconn, usable_protocols); for (i = 0; i < n_fms; i++) { struct ofputil_flow_mod *fm = &fms[i]; transact_noreply(vconn, ofputil_encode_flow_mod(fm, protocol)); free(fm->ofpacts); } vconn_close(vconn); } static void ofctl_flow_mod_file(int argc OVS_UNUSED, char *argv[], uint16_t command) { enum ofputil_protocol usable_protocols; struct ofputil_flow_mod *fms = NULL; size_t n_fms = 0; char *error; error = parse_ofp_flow_mod_file(argv[2], command, &fms, &n_fms, &usable_protocols); if (error) { ovs_fatal(0, "%s", error); } ofctl_flow_mod__(argv[1], fms, n_fms, usable_protocols); free(fms); } static void ofctl_flow_mod(int argc, char *argv[], uint16_t command) { enum ofputil_protocol usable_protocols; if (argc > 2 && !strcmp(argv[2], "-")) { ofctl_flow_mod_file(argc, argv, command); } else { struct ofputil_flow_mod fm; char *error; error = parse_ofp_flow_mod_str(&fm, argc > 2 ? argv[2] : "", command, &usable_protocols); if (error) { ovs_fatal(0, "%s", error); } ofctl_flow_mod__(argv[1], &fm, 1, usable_protocols); } } static void ofctl_add_flow(int argc, char *argv[]) { ofctl_flow_mod(argc, argv, OFPFC_ADD); } static void ofctl_add_flows(int argc, char *argv[]) { ofctl_flow_mod_file(argc, argv, OFPFC_ADD); } static void ofctl_mod_flows(int argc, char *argv[]) { ofctl_flow_mod(argc, argv, strict ? OFPFC_MODIFY_STRICT : OFPFC_MODIFY); } static void ofctl_del_flows(int argc, char *argv[]) { ofctl_flow_mod(argc, argv, strict ? OFPFC_DELETE_STRICT : OFPFC_DELETE); } static void set_packet_in_format(struct vconn *vconn, enum nx_packet_in_format packet_in_format) { struct ofpbuf *spif; spif = ofputil_make_set_packet_in_format(vconn_get_version(vconn), packet_in_format); transact_noreply(vconn, spif); VLOG_DBG("%s: using user-specified packet in format %s", vconn_get_name(vconn), ofputil_packet_in_format_to_string(packet_in_format)); } static int monitor_set_invalid_ttl_to_controller(struct vconn *vconn) { struct ofp_switch_config config; enum ofp_config_flags flags; fetch_switch_config(vconn, &config); flags = ntohs(config.flags); if (!(flags & OFPC_INVALID_TTL_TO_CONTROLLER)) { /* Set the invalid ttl config. */ flags |= OFPC_INVALID_TTL_TO_CONTROLLER; config.flags = htons(flags); set_switch_config(vconn, &config); /* Then retrieve the configuration to see if it really took. OpenFlow * doesn't define error reporting for bad modes, so this is all we can * do. */ fetch_switch_config(vconn, &config); flags = ntohs(config.flags); if (!(flags & OFPC_INVALID_TTL_TO_CONTROLLER)) { ovs_fatal(0, "setting invalid_ttl_to_controller failed (this " "switch probably doesn't support mode)"); return -EOPNOTSUPP; } } return 0; } /* Converts hex digits in 'hex' to an OpenFlow message in '*msgp'. The * caller must free '*msgp'. On success, returns NULL. On failure, returns * an error message and stores NULL in '*msgp'. */ static const char * openflow_from_hex(const char *hex, struct ofpbuf **msgp) { struct ofp_header *oh; struct ofpbuf *msg; msg = ofpbuf_new(strlen(hex) / 2); *msgp = NULL; if (ofpbuf_put_hex(msg, hex, NULL)[0] != '\0') { ofpbuf_delete(msg); return "Trailing garbage in hex data"; } if (msg->size < sizeof(struct ofp_header)) { ofpbuf_delete(msg); return "Message too short for OpenFlow"; } oh = msg->data; if (msg->size != ntohs(oh->length)) { ofpbuf_delete(msg); return "Message size does not match length in OpenFlow header"; } *msgp = msg; return NULL; } static void ofctl_send(struct unixctl_conn *conn, int argc, const char *argv[], void *vconn_) { struct vconn *vconn = vconn_; struct ds reply; bool ok; int i; ok = true; ds_init(&reply); for (i = 1; i < argc; i++) { const char *error_msg; struct ofpbuf *msg; int error; error_msg = openflow_from_hex(argv[i], &msg); if (error_msg) { ds_put_format(&reply, "%s\n", error_msg); ok = false; continue; } fprintf(stderr, "send: "); ofp_print(stderr, msg->data, msg->size, verbosity); error = vconn_send_block(vconn, msg); if (error) { ofpbuf_delete(msg); ds_put_format(&reply, "%s\n", ovs_strerror(error)); ok = false; } else { ds_put_cstr(&reply, "sent\n"); } } if (ok) { unixctl_command_reply(conn, ds_cstr(&reply)); } else { unixctl_command_reply_error(conn, ds_cstr(&reply)); } ds_destroy(&reply); } struct barrier_aux { struct vconn *vconn; /* OpenFlow connection for sending barrier. */ struct unixctl_conn *conn; /* Connection waiting for barrier response. */ }; static void ofctl_barrier(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *aux_) { struct barrier_aux *aux = aux_; struct ofpbuf *msg; int error; if (aux->conn) { unixctl_command_reply_error(conn, "already waiting for barrier reply"); return; } msg = ofputil_encode_barrier_request(vconn_get_version(aux->vconn)); error = vconn_send_block(aux->vconn, msg); if (error) { ofpbuf_delete(msg); unixctl_command_reply_error(conn, ovs_strerror(error)); } else { aux->conn = conn; } } static void ofctl_set_output_file(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) { int fd; fd = open(argv[1], O_CREAT | O_TRUNC | O_WRONLY, 0666); if (fd < 0) { unixctl_command_reply_error(conn, ovs_strerror(errno)); return; } fflush(stderr); dup2(fd, STDERR_FILENO); close(fd); unixctl_command_reply(conn, NULL); } static void ofctl_block(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *blocked_) { bool *blocked = blocked_; if (!*blocked) { *blocked = true; unixctl_command_reply(conn, NULL); } else { unixctl_command_reply(conn, "already blocking"); } } static void ofctl_unblock(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *blocked_) { bool *blocked = blocked_; if (*blocked) { *blocked = false; unixctl_command_reply(conn, NULL); } else { unixctl_command_reply(conn, "already unblocked"); } } /* Prints to stdout all of the messages received on 'vconn'. * * Iff 'reply_to_echo_requests' is true, sends a reply to any echo request * received on 'vconn'. */ static void monitor_vconn(struct vconn *vconn, bool reply_to_echo_requests) { struct barrier_aux barrier_aux = { vconn, NULL }; struct unixctl_server *server; bool exiting = false; bool blocked = false; int error; daemon_save_fd(STDERR_FILENO); daemonize_start(); error = unixctl_server_create(NULL, &server); if (error) { ovs_fatal(error, "failed to create unixctl server"); } unixctl_command_register("exit", "", 0, 0, ofctl_exit, &exiting); unixctl_command_register("ofctl/send", "OFMSG...", 1, INT_MAX, ofctl_send, vconn); unixctl_command_register("ofctl/barrier", "", 0, 0, ofctl_barrier, &barrier_aux); unixctl_command_register("ofctl/set-output-file", "FILE", 1, 1, ofctl_set_output_file, NULL); unixctl_command_register("ofctl/block", "", 0, 0, ofctl_block, &blocked); unixctl_command_register("ofctl/unblock", "", 0, 0, ofctl_unblock, &blocked); daemonize_complete(); for (;;) { struct ofpbuf *b; int retval; unixctl_server_run(server); while (!blocked) { enum ofptype type; retval = vconn_recv(vconn, &b); if (retval == EAGAIN) { break; } run(retval, "vconn_recv"); if (timestamp) { char *s = xastrftime_msec("%Y-%m-%d %H:%M:%S.###: ", time_wall_msec(), true); fputs(s, stderr); free(s); } ofptype_decode(&type, b->data); ofp_print(stderr, b->data, b->size, verbosity + 2); switch ((int) type) { case OFPTYPE_BARRIER_REPLY: if (barrier_aux.conn) { unixctl_command_reply(barrier_aux.conn, NULL); barrier_aux.conn = NULL; } break; case OFPTYPE_ECHO_REQUEST: if (reply_to_echo_requests) { struct ofpbuf *reply; reply = make_echo_reply(b->data); retval = vconn_send_block(vconn, reply); if (retval) { ovs_fatal(retval, "failed to send echo reply"); } } break; } ofpbuf_delete(b); } if (exiting) { break; } vconn_run(vconn); vconn_run_wait(vconn); if (!blocked) { vconn_recv_wait(vconn); } unixctl_server_wait(server); poll_block(); } vconn_close(vconn); unixctl_server_destroy(server); } static void ofctl_monitor(int argc, char *argv[]) { struct vconn *vconn; int i; enum ofputil_protocol usable_protocols; open_vconn(argv[1], &vconn); for (i = 2; i < argc; i++) { const char *arg = argv[i]; if (isdigit((unsigned char) *arg)) { struct ofp_switch_config config; fetch_switch_config(vconn, &config); config.miss_send_len = htons(atoi(arg)); set_switch_config(vconn, &config); } else if (!strcmp(arg, "invalid_ttl")) { monitor_set_invalid_ttl_to_controller(vconn); } else if (!strncmp(arg, "watch:", 6)) { struct ofputil_flow_monitor_request fmr; struct ofpbuf *msg; char *error; error = parse_flow_monitor_request(&fmr, arg + 6, &usable_protocols); if (error) { ovs_fatal(0, "%s", error); } msg = ofpbuf_new(0); ofputil_append_flow_monitor_request(&fmr, msg); dump_stats_transaction(vconn, msg); } else { ovs_fatal(0, "%s: unsupported \"monitor\" argument", arg); } } if (preferred_packet_in_format >= 0) { set_packet_in_format(vconn, preferred_packet_in_format); } else { enum ofp_version version = vconn_get_version(vconn); switch (version) { case OFP10_VERSION: { struct ofpbuf *spif, *reply; spif = ofputil_make_set_packet_in_format(vconn_get_version(vconn), NXPIF_NXM); run(vconn_transact_noreply(vconn, spif, &reply), "talking to %s", vconn_get_name(vconn)); if (reply) { char *s = ofp_to_string(reply->data, reply->size, 2); VLOG_DBG("%s: failed to set packet in format to nxm, controller" " replied: %s. Falling back to the switch default.", vconn_get_name(vconn), s); free(s); ofpbuf_delete(reply); } break; } case OFP11_VERSION: case OFP12_VERSION: case OFP13_VERSION: break; default: NOT_REACHED(); } } monitor_vconn(vconn, true); } static void ofctl_snoop(int argc OVS_UNUSED, char *argv[]) { struct vconn *vconn; open_vconn__(argv[1], SNOOP, &vconn); monitor_vconn(vconn, false); } static void ofctl_dump_ports(int argc, char *argv[]) { struct ofpbuf *request; struct vconn *vconn; ofp_port_t port; open_vconn(argv[1], &vconn); port = argc > 2 ? str_to_port_no(argv[1], argv[2]) : OFPP_ANY; request = ofputil_encode_dump_ports_request(vconn_get_version(vconn), port); dump_stats_transaction(vconn, request); vconn_close(vconn); } static void ofctl_dump_ports_desc(int argc OVS_UNUSED, char *argv[]) { dump_trivial_stats_transaction(argv[1], OFPRAW_OFPST_PORT_DESC_REQUEST); } static void ofctl_probe(int argc OVS_UNUSED, char *argv[]) { struct ofpbuf *request; struct vconn *vconn; struct ofpbuf *reply; open_vconn(argv[1], &vconn); request = make_echo_request(vconn_get_version(vconn)); run(vconn_transact(vconn, request, &reply), "talking to %s", argv[1]); if (reply->size != sizeof(struct ofp_header)) { ovs_fatal(0, "reply does not match request"); } ofpbuf_delete(reply); vconn_close(vconn); } static void ofctl_packet_out(int argc, char *argv[]) { enum ofputil_protocol protocol; struct ofputil_packet_out po; struct ofpbuf ofpacts; struct vconn *vconn; char *error; int i; enum ofputil_protocol usable_protocols; /* TODO: Use in proto selection */ ofpbuf_init(&ofpacts, 64); error = parse_ofpacts(argv[3], &ofpacts, &usable_protocols); if (error) { ovs_fatal(0, "%s", error); } po.buffer_id = UINT32_MAX; po.in_port = str_to_port_no(argv[1], argv[2]); po.ofpacts = ofpacts.data; po.ofpacts_len = ofpacts.size; protocol = open_vconn(argv[1], &vconn); for (i = 4; i < argc; i++) { struct ofpbuf *packet, *opo; const char *error_msg; error_msg = eth_from_hex(argv[i], &packet); if (error_msg) { ovs_fatal(0, "%s", error_msg); } po.packet = packet->data; po.packet_len = packet->size; opo = ofputil_encode_packet_out(&po, protocol); transact_noreply(vconn, opo); ofpbuf_delete(packet); } vconn_close(vconn); ofpbuf_uninit(&ofpacts); } static void ofctl_mod_port(int argc OVS_UNUSED, char *argv[]) { struct ofp_config_flag { const char *name; /* The flag's name. */ enum ofputil_port_config bit; /* Bit to turn on or off. */ bool on; /* Value to set the bit to. */ }; static const struct ofp_config_flag flags[] = { { "up", OFPUTIL_PC_PORT_DOWN, false }, { "down", OFPUTIL_PC_PORT_DOWN, true }, { "stp", OFPUTIL_PC_NO_STP, false }, { "receive", OFPUTIL_PC_NO_RECV, false }, { "receive-stp", OFPUTIL_PC_NO_RECV_STP, false }, { "flood", OFPUTIL_PC_NO_FLOOD, false }, { "forward", OFPUTIL_PC_NO_FWD, false }, { "packet-in", OFPUTIL_PC_NO_PACKET_IN, false }, }; const struct ofp_config_flag *flag; enum ofputil_protocol protocol; struct ofputil_port_mod pm; struct ofputil_phy_port pp; struct vconn *vconn; const char *command; bool not; fetch_ofputil_phy_port(argv[1], argv[2], &pp); pm.port_no = pp.port_no; memcpy(pm.hw_addr, pp.hw_addr, ETH_ADDR_LEN); pm.config = 0; pm.mask = 0; pm.advertise = 0; if (!strncasecmp(argv[3], "no-", 3)) { command = argv[3] + 3; not = true; } else if (!strncasecmp(argv[3], "no", 2)) { command = argv[3] + 2; not = true; } else { command = argv[3]; not = false; } for (flag = flags; flag < &flags[ARRAY_SIZE(flags)]; flag++) { if (!strcasecmp(command, flag->name)) { pm.mask = flag->bit; pm.config = flag->on ^ not ? flag->bit : 0; goto found; } } ovs_fatal(0, "unknown mod-port command '%s'", argv[3]); found: protocol = open_vconn(argv[1], &vconn); transact_noreply(vconn, ofputil_encode_port_mod(&pm, protocol)); vconn_close(vconn); } static void ofctl_get_frags(int argc OVS_UNUSED, char *argv[]) { struct ofp_switch_config config; struct vconn *vconn; open_vconn(argv[1], &vconn); fetch_switch_config(vconn, &config); puts(ofputil_frag_handling_to_string(ntohs(config.flags))); vconn_close(vconn); } static void ofctl_set_frags(int argc OVS_UNUSED, char *argv[]) { struct ofp_switch_config config; enum ofp_config_flags mode; struct vconn *vconn; ovs_be16 flags; if (!ofputil_frag_handling_from_string(argv[2], &mode)) { ovs_fatal(0, "%s: unknown fragment handling mode", argv[2]); } open_vconn(argv[1], &vconn); fetch_switch_config(vconn, &config); flags = htons(mode) | (config.flags & htons(~OFPC_FRAG_MASK)); if (flags != config.flags) { /* Set the configuration. */ config.flags = flags; set_switch_config(vconn, &config); /* Then retrieve the configuration to see if it really took. OpenFlow * doesn't define error reporting for bad modes, so this is all we can * do. */ fetch_switch_config(vconn, &config); if (flags != config.flags) { ovs_fatal(0, "%s: setting fragment handling mode failed (this " "switch probably doesn't support mode \"%s\")", argv[1], ofputil_frag_handling_to_string(mode)); } } vconn_close(vconn); } static void ofctl_ofp_parse(int argc OVS_UNUSED, char *argv[]) { const char *filename = argv[1]; struct ofpbuf b; FILE *file; file = !strcmp(filename, "-") ? stdin : fopen(filename, "r"); if (file == NULL) { ovs_fatal(errno, "%s: open", filename); } ofpbuf_init(&b, 65536); for (;;) { struct ofp_header *oh; size_t length, tail_len; void *tail; size_t n; ofpbuf_clear(&b); oh = ofpbuf_put_uninit(&b, sizeof *oh); n = fread(oh, 1, sizeof *oh, file); if (n == 0) { break; } else if (n < sizeof *oh) { ovs_fatal(0, "%s: unexpected end of file mid-message", filename); } length = ntohs(oh->length); if (length < sizeof *oh) { ovs_fatal(0, "%s: %zu-byte message is too short for OpenFlow", filename, length); } tail_len = length - sizeof *oh; tail = ofpbuf_put_uninit(&b, tail_len); n = fread(tail, 1, tail_len, file); if (n < tail_len) { ovs_fatal(0, "%s: unexpected end of file mid-message", filename); } ofp_print(stdout, b.data, b.size, verbosity + 2); } ofpbuf_uninit(&b); if (file != stdin) { fclose(file); } } static void ofctl_ping(int argc, char *argv[]) { size_t max_payload = 65535 - sizeof(struct ofp_header); unsigned int payload; struct vconn *vconn; int i; payload = argc > 2 ? atoi(argv[2]) : 64; if (payload > max_payload) { ovs_fatal(0, "payload must be between 0 and %zu bytes", max_payload); } open_vconn(argv[1], &vconn); for (i = 0; i < 10; i++) { struct timeval start, end; struct ofpbuf *request, *reply; const struct ofp_header *rpy_hdr; enum ofptype type; request = ofpraw_alloc(OFPRAW_OFPT_ECHO_REQUEST, vconn_get_version(vconn), payload); random_bytes(ofpbuf_put_uninit(request, payload), payload); xgettimeofday(&start); run(vconn_transact(vconn, ofpbuf_clone(request), &reply), "transact"); xgettimeofday(&end); rpy_hdr = reply->data; if (ofptype_pull(&type, reply) || type != OFPTYPE_ECHO_REPLY || reply->size != payload || memcmp(request->l3, reply->l3, payload)) { printf("Reply does not match request. Request:\n"); ofp_print(stdout, request, request->size, verbosity + 2); printf("Reply:\n"); ofp_print(stdout, reply, reply->size, verbosity + 2); } printf("%zu bytes from %s: xid=%08"PRIx32" time=%.1f ms\n", reply->size, argv[1], ntohl(rpy_hdr->xid), (1000*(double)(end.tv_sec - start.tv_sec)) + (.001*(end.tv_usec - start.tv_usec))); ofpbuf_delete(request); ofpbuf_delete(reply); } vconn_close(vconn); } static void ofctl_benchmark(int argc OVS_UNUSED, char *argv[]) { size_t max_payload = 65535 - sizeof(struct ofp_header); struct timeval start, end; unsigned int payload_size, message_size; struct vconn *vconn; double duration; int count; int i; payload_size = atoi(argv[2]); if (payload_size > max_payload) { ovs_fatal(0, "payload must be between 0 and %zu bytes", max_payload); } message_size = sizeof(struct ofp_header) + payload_size; count = atoi(argv[3]); printf("Sending %d packets * %u bytes (with header) = %u bytes total\n", count, message_size, count * message_size); open_vconn(argv[1], &vconn); xgettimeofday(&start); for (i = 0; i < count; i++) { struct ofpbuf *request, *reply; request = ofpraw_alloc(OFPRAW_OFPT_ECHO_REQUEST, vconn_get_version(vconn), payload_size); ofpbuf_put_zeros(request, payload_size); run(vconn_transact(vconn, request, &reply), "transact"); ofpbuf_delete(reply); } xgettimeofday(&end); vconn_close(vconn); duration = ((1000*(double)(end.tv_sec - start.tv_sec)) + (.001*(end.tv_usec - start.tv_usec))); printf("Finished in %.1f ms (%.0f packets/s) (%.0f bytes/s)\n", duration, count / (duration / 1000.0), count * message_size / (duration / 1000.0)); } static void ofctl_help(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { usage(); } /* replace-flows and diff-flows commands. */ /* A flow table entry, possibly with two different versions. */ struct fte { struct cls_rule rule; /* Within a "struct classifier". */ struct fte_version *versions[2]; }; /* One version of a Flow Table Entry. */ struct fte_version { ovs_be64 cookie; uint16_t idle_timeout; uint16_t hard_timeout; uint16_t flags; struct ofpact *ofpacts; size_t ofpacts_len; }; /* Frees 'version' and the data that it owns. */ static void fte_version_free(struct fte_version *version) { if (version) { free(version->ofpacts); free(version); } } /* Returns true if 'a' and 'b' are the same, false if they differ. * * Ignores differences in 'flags' because there's no way to retrieve flags from * an OpenFlow switch. We have to assume that they are the same. */ static bool fte_version_equals(const struct fte_version *a, const struct fte_version *b) { return (a->cookie == b->cookie && a->idle_timeout == b->idle_timeout && a->hard_timeout == b->hard_timeout && ofpacts_equal(a->ofpacts, a->ofpacts_len, b->ofpacts, b->ofpacts_len)); } /* Clears 's', then if 's' has a version 'index', formats 'fte' and version * 'index' into 's', followed by a new-line. */ static void fte_version_format(const struct fte *fte, int index, struct ds *s) { const struct fte_version *version = fte->versions[index]; ds_clear(s); if (!version) { return; } cls_rule_format(&fte->rule, s); if (version->cookie != htonll(0)) { ds_put_format(s, " cookie=0x%"PRIx64, ntohll(version->cookie)); } if (version->idle_timeout != OFP_FLOW_PERMANENT) { ds_put_format(s, " idle_timeout=%"PRIu16, version->idle_timeout); } if (version->hard_timeout != OFP_FLOW_PERMANENT) { ds_put_format(s, " hard_timeout=%"PRIu16, version->hard_timeout); } ds_put_char(s, ' '); ofpacts_format(version->ofpacts, version->ofpacts_len, s); ds_put_char(s, '\n'); } static struct fte * fte_from_cls_rule(const struct cls_rule *cls_rule) { return cls_rule ? CONTAINER_OF(cls_rule, struct fte, rule) : NULL; } /* Frees 'fte' and its versions. */ static void fte_free(struct fte *fte) { if (fte) { fte_version_free(fte->versions[0]); fte_version_free(fte->versions[1]); cls_rule_destroy(&fte->rule); free(fte); } } /* Frees all of the FTEs within 'cls'. */ static void fte_free_all(struct classifier *cls) { struct cls_cursor cursor; struct fte *fte, *next; ovs_rwlock_wrlock(&cls->rwlock); cls_cursor_init(&cursor, cls, NULL); CLS_CURSOR_FOR_EACH_SAFE (fte, next, rule, &cursor) { classifier_remove(cls, &fte->rule); fte_free(fte); } ovs_rwlock_unlock(&cls->rwlock); classifier_destroy(cls); } /* Searches 'cls' for an FTE matching 'rule', inserting a new one if * necessary. Sets 'version' as the version of that rule with the given * 'index', replacing any existing version, if any. * * Takes ownership of 'version'. */ static void fte_insert(struct classifier *cls, const struct match *match, unsigned int priority, struct fte_version *version, int index) { struct fte *old, *fte; fte = xzalloc(sizeof *fte); cls_rule_init(&fte->rule, match, priority); fte->versions[index] = version; ovs_rwlock_wrlock(&cls->rwlock); old = fte_from_cls_rule(classifier_replace(cls, &fte->rule)); ovs_rwlock_unlock(&cls->rwlock); if (old) { fte_version_free(old->versions[index]); fte->versions[!index] = old->versions[!index]; cls_rule_destroy(&old->rule); free(old); } } /* Reads the flows in 'filename' as flow table entries in 'cls' for the version * with the specified 'index'. Returns the flow formats able to represent the * flows that were read. */ static enum ofputil_protocol read_flows_from_file(const char *filename, struct classifier *cls, int index) { enum ofputil_protocol usable_protocols; int line_number; struct ds s; FILE *file; file = !strcmp(filename, "-") ? stdin : fopen(filename, "r"); if (file == NULL) { ovs_fatal(errno, "%s: open", filename); } ds_init(&s); usable_protocols = OFPUTIL_P_ANY; line_number = 0; while (!ds_get_preprocessed_line(&s, file, &line_number)) { struct fte_version *version; struct ofputil_flow_mod fm; char *error; enum ofputil_protocol usable; error = parse_ofp_str(&fm, OFPFC_ADD, ds_cstr(&s), &usable); if (error) { ovs_fatal(0, "%s:%d: %s", filename, line_number, error); } usable_protocols &= usable; version = xmalloc(sizeof *version); version->cookie = fm.new_cookie; version->idle_timeout = fm.idle_timeout; version->hard_timeout = fm.hard_timeout; version->flags = fm.flags & (OFPUTIL_FF_SEND_FLOW_REM | OFPUTIL_FF_EMERG); version->ofpacts = fm.ofpacts; version->ofpacts_len = fm.ofpacts_len; fte_insert(cls, &fm.match, fm.priority, version, index); } ds_destroy(&s); if (file != stdin) { fclose(file); } return usable_protocols; } static bool recv_flow_stats_reply(struct vconn *vconn, ovs_be32 send_xid, struct ofpbuf **replyp, struct ofputil_flow_stats *fs, struct ofpbuf *ofpacts) { struct ofpbuf *reply = *replyp; for (;;) { int retval; bool more; /* Get a flow stats reply message, if we don't already have one. */ if (!reply) { enum ofptype type; enum ofperr error; do { run(vconn_recv_block(vconn, &reply), "OpenFlow packet receive failed"); } while (((struct ofp_header *) reply->data)->xid != send_xid); error = ofptype_decode(&type, reply->data); if (error || type != OFPTYPE_FLOW_STATS_REPLY) { ovs_fatal(0, "received bad reply: %s", ofp_to_string(reply->data, reply->size, verbosity + 1)); } } /* Pull an individual flow stats reply out of the message. */ retval = ofputil_decode_flow_stats_reply(fs, reply, false, ofpacts); switch (retval) { case 0: *replyp = reply; return true; case EOF: more = ofpmp_more(reply->l2); ofpbuf_delete(reply); reply = NULL; if (!more) { *replyp = NULL; return false; } break; default: ovs_fatal(0, "parse error in reply (%s)", ofperr_to_string(retval)); } } } /* Reads the OpenFlow flow table from 'vconn', which has currently active flow * format 'protocol', and adds them as flow table entries in 'cls' for the * version with the specified 'index'. */ static void read_flows_from_switch(struct vconn *vconn, enum ofputil_protocol protocol, struct classifier *cls, int index) { struct ofputil_flow_stats_request fsr; struct ofputil_flow_stats fs; struct ofpbuf *request; struct ofpbuf ofpacts; struct ofpbuf *reply; ovs_be32 send_xid; fsr.aggregate = false; match_init_catchall(&fsr.match); fsr.out_port = OFPP_ANY; fsr.table_id = 0xff; fsr.cookie = fsr.cookie_mask = htonll(0); request = ofputil_encode_flow_stats_request(&fsr, protocol); send_xid = ((struct ofp_header *) request->data)->xid; send_openflow_buffer(vconn, request); reply = NULL; ofpbuf_init(&ofpacts, 0); while (recv_flow_stats_reply(vconn, send_xid, &reply, &fs, &ofpacts)) { struct fte_version *version; version = xmalloc(sizeof *version); version->cookie = fs.cookie; version->idle_timeout = fs.idle_timeout; version->hard_timeout = fs.hard_timeout; version->flags = 0; version->ofpacts_len = fs.ofpacts_len; version->ofpacts = xmemdup(fs.ofpacts, fs.ofpacts_len); fte_insert(cls, &fs.match, fs.priority, version, index); } ofpbuf_uninit(&ofpacts); } static void fte_make_flow_mod(const struct fte *fte, int index, uint16_t command, enum ofputil_protocol protocol, struct list *packets) { const struct fte_version *version = fte->versions[index]; struct ofputil_flow_mod fm; struct ofpbuf *ofm; minimatch_expand(&fte->rule.match, &fm.match); fm.priority = fte->rule.priority; fm.cookie = htonll(0); fm.cookie_mask = htonll(0); fm.new_cookie = version->cookie; fm.modify_cookie = true; fm.table_id = 0xff; fm.command = command; fm.idle_timeout = version->idle_timeout; fm.hard_timeout = version->hard_timeout; fm.buffer_id = UINT32_MAX; fm.out_port = OFPP_ANY; fm.flags = version->flags; if (command == OFPFC_ADD || command == OFPFC_MODIFY || command == OFPFC_MODIFY_STRICT) { fm.ofpacts = version->ofpacts; fm.ofpacts_len = version->ofpacts_len; } else { fm.ofpacts = NULL; fm.ofpacts_len = 0; } ofm = ofputil_encode_flow_mod(&fm, protocol); list_push_back(packets, &ofm->list_node); } static void ofctl_replace_flows(int argc OVS_UNUSED, char *argv[]) { enum { FILE_IDX = 0, SWITCH_IDX = 1 }; enum ofputil_protocol usable_protocols, protocol; struct cls_cursor cursor; struct classifier cls; struct list requests; struct vconn *vconn; struct fte *fte; classifier_init(&cls); usable_protocols = read_flows_from_file(argv[2], &cls, FILE_IDX); protocol = open_vconn(argv[1], &vconn); protocol = set_protocol_for_flow_dump(vconn, protocol, usable_protocols); read_flows_from_switch(vconn, protocol, &cls, SWITCH_IDX); list_init(&requests); /* Delete flows that exist on the switch but not in the file. */ ovs_rwlock_rdlock(&cls.rwlock); cls_cursor_init(&cursor, &cls, NULL); CLS_CURSOR_FOR_EACH (fte, rule, &cursor) { struct fte_version *file_ver = fte->versions[FILE_IDX]; struct fte_version *sw_ver = fte->versions[SWITCH_IDX]; if (sw_ver && !file_ver) { fte_make_flow_mod(fte, SWITCH_IDX, OFPFC_DELETE_STRICT, protocol, &requests); } } /* Add flows that exist in the file but not on the switch. * Update flows that exist in both places but differ. */ cls_cursor_init(&cursor, &cls, NULL); CLS_CURSOR_FOR_EACH (fte, rule, &cursor) { struct fte_version *file_ver = fte->versions[FILE_IDX]; struct fte_version *sw_ver = fte->versions[SWITCH_IDX]; if (file_ver && (readd || !sw_ver || !fte_version_equals(sw_ver, file_ver))) { fte_make_flow_mod(fte, FILE_IDX, OFPFC_ADD, protocol, &requests); } } ovs_rwlock_unlock(&cls.rwlock); transact_multiple_noreply(vconn, &requests); vconn_close(vconn); fte_free_all(&cls); } static void read_flows_from_source(const char *source, struct classifier *cls, int index) { struct stat s; if (source[0] == '/' || source[0] == '.' || (!strchr(source, ':') && !stat(source, &s))) { read_flows_from_file(source, cls, index); } else { enum ofputil_protocol protocol; struct vconn *vconn; protocol = open_vconn(source, &vconn); protocol = set_protocol_for_flow_dump(vconn, protocol, OFPUTIL_P_ANY); read_flows_from_switch(vconn, protocol, cls, index); vconn_close(vconn); } } static void ofctl_diff_flows(int argc OVS_UNUSED, char *argv[]) { bool differences = false; struct cls_cursor cursor; struct classifier cls; struct ds a_s, b_s; struct fte *fte; classifier_init(&cls); read_flows_from_source(argv[1], &cls, 0); read_flows_from_source(argv[2], &cls, 1); ds_init(&a_s); ds_init(&b_s); ovs_rwlock_rdlock(&cls.rwlock); cls_cursor_init(&cursor, &cls, NULL); CLS_CURSOR_FOR_EACH (fte, rule, &cursor) { struct fte_version *a = fte->versions[0]; struct fte_version *b = fte->versions[1]; if (!a || !b || !fte_version_equals(a, b)) { fte_version_format(fte, 0, &a_s); fte_version_format(fte, 1, &b_s); if (strcmp(ds_cstr(&a_s), ds_cstr(&b_s))) { if (a_s.length) { printf("-%s", ds_cstr(&a_s)); } if (b_s.length) { printf("+%s", ds_cstr(&b_s)); } differences = true; } } } ovs_rwlock_unlock(&cls.rwlock); ds_destroy(&a_s); ds_destroy(&b_s); fte_free_all(&cls); if (differences) { exit(2); } } /* Undocumented commands for unit testing. */ static void ofctl_parse_flows__(struct ofputil_flow_mod *fms, size_t n_fms, enum ofputil_protocol usable_protocols) { enum ofputil_protocol protocol = 0; char *usable_s; size_t i; usable_s = ofputil_protocols_to_string(usable_protocols); printf("usable protocols: %s\n", usable_s); free(usable_s); if (!(usable_protocols & allowed_protocols)) { ovs_fatal(0, "no usable protocol"); } for (i = 0; i < sizeof(enum ofputil_protocol) * CHAR_BIT; i++) { protocol = 1 << i; if (protocol & usable_protocols & allowed_protocols) { break; } } ovs_assert(is_pow2(protocol)); printf("chosen protocol: %s\n", ofputil_protocol_to_string(protocol)); for (i = 0; i < n_fms; i++) { struct ofputil_flow_mod *fm = &fms[i]; struct ofpbuf *msg; msg = ofputil_encode_flow_mod(fm, protocol); ofp_print(stdout, msg->data, msg->size, verbosity); ofpbuf_delete(msg); free(fm->ofpacts); } } /* "parse-flow FLOW": parses the argument as a flow (like add-flow) and prints * it back to stdout. */ static void ofctl_parse_flow(int argc OVS_UNUSED, char *argv[]) { enum ofputil_protocol usable_protocols; struct ofputil_flow_mod fm; char *error; error = parse_ofp_flow_mod_str(&fm, argv[1], OFPFC_ADD, &usable_protocols); if (error) { ovs_fatal(0, "%s", error); } ofctl_parse_flows__(&fm, 1, usable_protocols); } /* "parse-flows FILENAME": reads the named file as a sequence of flows (like * add-flows) and prints each of the flows back to stdout. */ static void ofctl_parse_flows(int argc OVS_UNUSED, char *argv[]) { enum ofputil_protocol usable_protocols; struct ofputil_flow_mod *fms = NULL; size_t n_fms = 0; char *error; error = parse_ofp_flow_mod_file(argv[1], OFPFC_ADD, &fms, &n_fms, &usable_protocols); if (error) { ovs_fatal(0, "%s", error); } ofctl_parse_flows__(fms, n_fms, usable_protocols); free(fms); } static void ofctl_parse_nxm__(bool oxm) { struct ds in; ds_init(&in); while (!ds_get_test_line(&in, stdin)) { struct ofpbuf nx_match; struct match match; ovs_be64 cookie, cookie_mask; enum ofperr error; int match_len; /* Convert string to nx_match. */ ofpbuf_init(&nx_match, 0); if (oxm) { match_len = oxm_match_from_string(ds_cstr(&in), &nx_match); } else { match_len = nx_match_from_string(ds_cstr(&in), &nx_match); } /* Convert nx_match to match. */ if (strict) { if (oxm) { error = oxm_pull_match(&nx_match, &match); } else { error = nx_pull_match(&nx_match, match_len, &match, &cookie, &cookie_mask); } } else { if (oxm) { error = oxm_pull_match_loose(&nx_match, &match); } else { error = nx_pull_match_loose(&nx_match, match_len, &match, &cookie, &cookie_mask); } } if (!error) { char *out; /* Convert match back to nx_match. */ ofpbuf_uninit(&nx_match); ofpbuf_init(&nx_match, 0); if (oxm) { match_len = oxm_put_match(&nx_match, &match); out = oxm_match_to_string(&nx_match, match_len); } else { match_len = nx_put_match(&nx_match, &match, cookie, cookie_mask); out = nx_match_to_string(nx_match.data, match_len); } puts(out); free(out); } else { printf("nx_pull_match() returned error %s\n", ofperr_get_name(error)); } ofpbuf_uninit(&nx_match); } ds_destroy(&in); } /* "parse-nxm": reads a series of NXM nx_match specifications as strings from * stdin, does some internal fussing with them, and then prints them back as * strings on stdout. */ static void ofctl_parse_nxm(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { return ofctl_parse_nxm__(false); } /* "parse-oxm": reads a series of OXM nx_match specifications as strings from * stdin, does some internal fussing with them, and then prints them back as * strings on stdout. */ static void ofctl_parse_oxm(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { return ofctl_parse_nxm__(true); } static void print_differences(const char *prefix, const void *a_, size_t a_len, const void *b_, size_t b_len) { const uint8_t *a = a_; const uint8_t *b = b_; size_t i; for (i = 0; i < MIN(a_len, b_len); i++) { if (a[i] != b[i]) { printf("%s%2zu: %02"PRIx8" -> %02"PRIx8"\n", prefix, i, a[i], b[i]); } } for (i = a_len; i < b_len; i++) { printf("%s%2zu: (none) -> %02"PRIx8"\n", prefix, i, b[i]); } for (i = b_len; i < a_len; i++) { printf("%s%2zu: %02"PRIx8" -> (none)\n", prefix, i, a[i]); } } /* "parse-ofp10-actions": reads a series of OpenFlow 1.0 action specifications * as hex bytes from stdin, converts them to ofpacts, prints them as strings * on stdout, and then converts them back to hex bytes and prints any * differences from the input. */ static void ofctl_parse_ofp10_actions(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { struct ds in; ds_init(&in); while (!ds_get_preprocessed_line(&in, stdin, NULL)) { struct ofpbuf of10_out; struct ofpbuf of10_in; struct ofpbuf ofpacts; enum ofperr error; size_t size; struct ds s; /* Parse hex bytes. */ ofpbuf_init(&of10_in, 0); if (ofpbuf_put_hex(&of10_in, ds_cstr(&in), NULL)[0] != '\0') { ovs_fatal(0, "Trailing garbage in hex data"); } /* Convert to ofpacts. */ ofpbuf_init(&ofpacts, 0); size = of10_in.size; error = ofpacts_pull_openflow10(&of10_in, of10_in.size, &ofpacts); if (error) { printf("bad OF1.1 actions: %s\n\n", ofperr_get_name(error)); ofpbuf_uninit(&ofpacts); ofpbuf_uninit(&of10_in); continue; } ofpbuf_push_uninit(&of10_in, size); /* Print cls_rule. */ ds_init(&s); ofpacts_format(ofpacts.data, ofpacts.size, &s); puts(ds_cstr(&s)); ds_destroy(&s); /* Convert back to ofp10 actions and print differences from input. */ ofpbuf_init(&of10_out, 0); ofpacts_put_openflow10(ofpacts.data, ofpacts.size, &of10_out); print_differences("", of10_in.data, of10_in.size, of10_out.data, of10_out.size); putchar('\n'); ofpbuf_uninit(&ofpacts); ofpbuf_uninit(&of10_in); ofpbuf_uninit(&of10_out); } ds_destroy(&in); } /* "parse-ofp10-match": reads a series of ofp10_match specifications as hex * bytes from stdin, converts them to cls_rules, prints them as strings on * stdout, and then converts them back to hex bytes and prints any differences * from the input. * * The input hex bytes may contain "x"s to represent "don't-cares", bytes whose * values are ignored in the input and will be set to zero when OVS converts * them back to hex bytes. ovs-ofctl actually sets "x"s to random bits when * it does the conversion to hex, to ensure that in fact they are ignored. */ static void ofctl_parse_ofp10_match(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { struct ds expout; struct ds in; ds_init(&in); ds_init(&expout); while (!ds_get_preprocessed_line(&in, stdin, NULL)) { struct ofpbuf match_in, match_expout; struct ofp10_match match_out; struct ofp10_match match_normal; struct match match; char *p; /* Parse hex bytes to use for expected output. */ ds_clear(&expout); ds_put_cstr(&expout, ds_cstr(&in)); for (p = ds_cstr(&expout); *p; p++) { if (*p == 'x') { *p = '0'; } } ofpbuf_init(&match_expout, 0); if (ofpbuf_put_hex(&match_expout, ds_cstr(&expout), NULL)[0] != '\0') { ovs_fatal(0, "Trailing garbage in hex data"); } if (match_expout.size != sizeof(struct ofp10_match)) { ovs_fatal(0, "Input is %zu bytes, expected %zu", match_expout.size, sizeof(struct ofp10_match)); } /* Parse hex bytes for input. */ for (p = ds_cstr(&in); *p; p++) { if (*p == 'x') { *p = "0123456789abcdef"[random_uint32() & 0xf]; } } ofpbuf_init(&match_in, 0); if (ofpbuf_put_hex(&match_in, ds_cstr(&in), NULL)[0] != '\0') { ovs_fatal(0, "Trailing garbage in hex data"); } if (match_in.size != sizeof(struct ofp10_match)) { ovs_fatal(0, "Input is %zu bytes, expected %zu", match_in.size, sizeof(struct ofp10_match)); } /* Convert to cls_rule and print. */ ofputil_match_from_ofp10_match(match_in.data, &match); match_print(&match); /* Convert back to ofp10_match and print differences from input. */ ofputil_match_to_ofp10_match(&match, &match_out); print_differences("", match_expout.data, match_expout.size, &match_out, sizeof match_out); /* Normalize, then convert and compare again. */ ofputil_normalize_match(&match); ofputil_match_to_ofp10_match(&match, &match_normal); print_differences("normal: ", &match_out, sizeof match_out, &match_normal, sizeof match_normal); putchar('\n'); ofpbuf_uninit(&match_in); ofpbuf_uninit(&match_expout); } ds_destroy(&in); ds_destroy(&expout); } /* "parse-ofp11-match": reads a series of ofp11_match specifications as hex * bytes from stdin, converts them to "struct match"es, prints them as strings * on stdout, and then converts them back to hex bytes and prints any * differences from the input. */ static void ofctl_parse_ofp11_match(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { struct ds in; ds_init(&in); while (!ds_get_preprocessed_line(&in, stdin, NULL)) { struct ofpbuf match_in; struct ofp11_match match_out; struct match match; enum ofperr error; /* Parse hex bytes. */ ofpbuf_init(&match_in, 0); if (ofpbuf_put_hex(&match_in, ds_cstr(&in), NULL)[0] != '\0') { ovs_fatal(0, "Trailing garbage in hex data"); } if (match_in.size != sizeof(struct ofp11_match)) { ovs_fatal(0, "Input is %zu bytes, expected %zu", match_in.size, sizeof(struct ofp11_match)); } /* Convert to match. */ error = ofputil_match_from_ofp11_match(match_in.data, &match); if (error) { printf("bad ofp11_match: %s\n\n", ofperr_get_name(error)); ofpbuf_uninit(&match_in); continue; } /* Print match. */ match_print(&match); /* Convert back to ofp11_match and print differences from input. */ ofputil_match_to_ofp11_match(&match, &match_out); print_differences("", match_in.data, match_in.size, &match_out, sizeof match_out); putchar('\n'); ofpbuf_uninit(&match_in); } ds_destroy(&in); } /* "parse-ofp11-actions": reads a series of OpenFlow 1.1 action specifications * as hex bytes from stdin, converts them to ofpacts, prints them as strings * on stdout, and then converts them back to hex bytes and prints any * differences from the input. */ static void ofctl_parse_ofp11_actions(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { struct ds in; ds_init(&in); while (!ds_get_preprocessed_line(&in, stdin, NULL)) { struct ofpbuf of11_out; struct ofpbuf of11_in; struct ofpbuf ofpacts; enum ofperr error; size_t size; struct ds s; /* Parse hex bytes. */ ofpbuf_init(&of11_in, 0); if (ofpbuf_put_hex(&of11_in, ds_cstr(&in), NULL)[0] != '\0') { ovs_fatal(0, "Trailing garbage in hex data"); } /* Convert to ofpacts. */ ofpbuf_init(&ofpacts, 0); size = of11_in.size; error = ofpacts_pull_openflow11_actions(&of11_in, of11_in.size, &ofpacts); if (error) { printf("bad OF1.1 actions: %s\n\n", ofperr_get_name(error)); ofpbuf_uninit(&ofpacts); ofpbuf_uninit(&of11_in); continue; } ofpbuf_push_uninit(&of11_in, size); /* Print cls_rule. */ ds_init(&s); ofpacts_format(ofpacts.data, ofpacts.size, &s); puts(ds_cstr(&s)); ds_destroy(&s); /* Convert back to ofp11 actions and print differences from input. */ ofpbuf_init(&of11_out, 0); ofpacts_put_openflow11_actions(ofpacts.data, ofpacts.size, &of11_out); print_differences("", of11_in.data, of11_in.size, of11_out.data, of11_out.size); putchar('\n'); ofpbuf_uninit(&ofpacts); ofpbuf_uninit(&of11_in); ofpbuf_uninit(&of11_out); } ds_destroy(&in); } /* "parse-ofp11-instructions": reads a series of OpenFlow 1.1 instruction * specifications as hex bytes from stdin, converts them to ofpacts, prints * them as strings on stdout, and then converts them back to hex bytes and * prints any differences from the input. */ static void ofctl_parse_ofp11_instructions(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { struct ds in; ds_init(&in); while (!ds_get_preprocessed_line(&in, stdin, NULL)) { struct ofpbuf of11_out; struct ofpbuf of11_in; struct ofpbuf ofpacts; enum ofperr error; size_t size; struct ds s; const char *table_id; char *instructions; /* Parse table_id separated with the follow-up instructions by ",", if * any. */ instructions = ds_cstr(&in); table_id = NULL; if (strstr(instructions, ",")) { table_id = strsep(&instructions, ","); } /* Parse hex bytes. */ ofpbuf_init(&of11_in, 0); if (ofpbuf_put_hex(&of11_in, instructions, NULL)[0] != '\0') { ovs_fatal(0, "Trailing garbage in hex data"); } /* Convert to ofpacts. */ ofpbuf_init(&ofpacts, 0); size = of11_in.size; error = ofpacts_pull_openflow11_instructions(&of11_in, of11_in.size, &ofpacts); if (!error) { /* Verify actions. */ struct flow flow; memset(&flow, 0, sizeof flow); error = ofpacts_check(ofpacts.data, ofpacts.size, &flow, OFPP_MAX, table_id ? atoi(table_id) : 0); } if (error) { printf("bad OF1.1 instructions: %s\n\n", ofperr_get_name(error)); ofpbuf_uninit(&ofpacts); ofpbuf_uninit(&of11_in); continue; } ofpbuf_push_uninit(&of11_in, size); /* Print cls_rule. */ ds_init(&s); ofpacts_format(ofpacts.data, ofpacts.size, &s); puts(ds_cstr(&s)); ds_destroy(&s); /* Convert back to ofp11 instructions and print differences from * input. */ ofpbuf_init(&of11_out, 0); ofpacts_put_openflow11_instructions(ofpacts.data, ofpacts.size, &of11_out); print_differences("", of11_in.data, of11_in.size, of11_out.data, of11_out.size); putchar('\n'); ofpbuf_uninit(&ofpacts); ofpbuf_uninit(&of11_in); ofpbuf_uninit(&of11_out); } ds_destroy(&in); } /* "check-vlan VLAN_TCI VLAN_TCI_MASK": converts the specified vlan_tci and * mask values to and from various formats and prints the results. */ static void ofctl_check_vlan(int argc OVS_UNUSED, char *argv[]) { struct match match; char *string_s; struct ofputil_flow_mod fm; struct ofpbuf nxm; struct match nxm_match; int nxm_match_len; char *nxm_s; struct ofp10_match of10_raw; struct match of10_match; struct ofp11_match of11_raw; struct match of11_match; enum ofperr error; char *error_s; enum ofputil_protocol usable_protocols; /* Unused for now. */ match_init_catchall(&match); match.flow.vlan_tci = htons(strtoul(argv[1], NULL, 16)); match.wc.masks.vlan_tci = htons(strtoul(argv[2], NULL, 16)); /* Convert to and from string. */ string_s = match_to_string(&match, OFP_DEFAULT_PRIORITY); printf("%s -> ", string_s); fflush(stdout); error_s = parse_ofp_str(&fm, -1, string_s, &usable_protocols); if (error_s) { ovs_fatal(0, "%s", error_s); } printf("%04"PRIx16"/%04"PRIx16"\n", ntohs(fm.match.flow.vlan_tci), ntohs(fm.match.wc.masks.vlan_tci)); free(string_s); /* Convert to and from NXM. */ ofpbuf_init(&nxm, 0); nxm_match_len = nx_put_match(&nxm, &match, htonll(0), htonll(0)); nxm_s = nx_match_to_string(nxm.data, nxm_match_len); error = nx_pull_match(&nxm, nxm_match_len, &nxm_match, NULL, NULL); printf("NXM: %s -> ", nxm_s); if (error) { printf("%s\n", ofperr_to_string(error)); } else { printf("%04"PRIx16"/%04"PRIx16"\n", ntohs(nxm_match.flow.vlan_tci), ntohs(nxm_match.wc.masks.vlan_tci)); } free(nxm_s); ofpbuf_uninit(&nxm); /* Convert to and from OXM. */ ofpbuf_init(&nxm, 0); nxm_match_len = oxm_put_match(&nxm, &match); nxm_s = oxm_match_to_string(&nxm, nxm_match_len); error = oxm_pull_match(&nxm, &nxm_match); printf("OXM: %s -> ", nxm_s); if (error) { printf("%s\n", ofperr_to_string(error)); } else { uint16_t vid = ntohs(nxm_match.flow.vlan_tci) & (VLAN_VID_MASK | VLAN_CFI); uint16_t mask = ntohs(nxm_match.wc.masks.vlan_tci) & (VLAN_VID_MASK | VLAN_CFI); printf("%04"PRIx16"/%04"PRIx16",", vid, mask); if (vid && vlan_tci_to_pcp(nxm_match.wc.masks.vlan_tci)) { printf("%02"PRIx8"\n", vlan_tci_to_pcp(nxm_match.flow.vlan_tci)); } else { printf("--\n"); } } free(nxm_s); ofpbuf_uninit(&nxm); /* Convert to and from OpenFlow 1.0. */ ofputil_match_to_ofp10_match(&match, &of10_raw); ofputil_match_from_ofp10_match(&of10_raw, &of10_match); printf("OF1.0: %04"PRIx16"/%d,%02"PRIx8"/%d -> %04"PRIx16"/%04"PRIx16"\n", ntohs(of10_raw.dl_vlan), (of10_raw.wildcards & htonl(OFPFW10_DL_VLAN)) != 0, of10_raw.dl_vlan_pcp, (of10_raw.wildcards & htonl(OFPFW10_DL_VLAN_PCP)) != 0, ntohs(of10_match.flow.vlan_tci), ntohs(of10_match.wc.masks.vlan_tci)); /* Convert to and from OpenFlow 1.1. */ ofputil_match_to_ofp11_match(&match, &of11_raw); ofputil_match_from_ofp11_match(&of11_raw, &of11_match); printf("OF1.1: %04"PRIx16"/%d,%02"PRIx8"/%d -> %04"PRIx16"/%04"PRIx16"\n", ntohs(of11_raw.dl_vlan), (of11_raw.wildcards & htonl(OFPFW11_DL_VLAN)) != 0, of11_raw.dl_vlan_pcp, (of11_raw.wildcards & htonl(OFPFW11_DL_VLAN_PCP)) != 0, ntohs(of11_match.flow.vlan_tci), ntohs(of11_match.wc.masks.vlan_tci)); } /* "print-error ENUM": Prints the type and code of ENUM for every OpenFlow * version. */ static void ofctl_print_error(int argc OVS_UNUSED, char *argv[]) { enum ofperr error; int version; error = ofperr_from_name(argv[1]); if (!error) { ovs_fatal(0, "unknown error \"%s\"", argv[1]); } for (version = 0; version <= UINT8_MAX; version++) { const char *name = ofperr_domain_get_name(version); if (name) { int vendor = ofperr_get_vendor(error, version); int type = ofperr_get_type(error, version); int code = ofperr_get_code(error, version); if (vendor != -1 || type != -1 || code != -1) { printf("%s: vendor %#x, type %d, code %d\n", name, vendor, type, code); } } } } /* "encode-error-reply ENUM REQUEST": Encodes an error reply to REQUEST for the * error named ENUM and prints the error reply in hex. */ static void ofctl_encode_error_reply(int argc OVS_UNUSED, char *argv[]) { const struct ofp_header *oh; struct ofpbuf request, *reply; enum ofperr error; error = ofperr_from_name(argv[1]); if (!error) { ovs_fatal(0, "unknown error \"%s\"", argv[1]); } ofpbuf_init(&request, 0); if (ofpbuf_put_hex(&request, argv[2], NULL)[0] != '\0') { ovs_fatal(0, "Trailing garbage in hex data"); } if (request.size < sizeof(struct ofp_header)) { ovs_fatal(0, "Request too short"); } oh = request.data; if (request.size != ntohs(oh->length)) { ovs_fatal(0, "Request size inconsistent"); } reply = ofperr_encode_reply(error, request.data); ofpbuf_uninit(&request); ovs_hex_dump(stdout, reply->data, reply->size, 0, false); ofpbuf_delete(reply); } /* "ofp-print HEXSTRING [VERBOSITY]": Converts the hex digits in HEXSTRING into * binary data, interpreting them as an OpenFlow message, and prints the * OpenFlow message on stdout, at VERBOSITY (level 2 by default). */ static void ofctl_ofp_print(int argc, char *argv[]) { struct ofpbuf packet; ofpbuf_init(&packet, strlen(argv[1]) / 2); if (ofpbuf_put_hex(&packet, argv[1], NULL)[0] != '\0') { ovs_fatal(0, "trailing garbage following hex bytes"); } ofp_print(stdout, packet.data, packet.size, argc > 2 ? atoi(argv[2]) : 2); ofpbuf_uninit(&packet); } /* "encode-hello BITMAP...": Encodes each BITMAP as an OpenFlow hello message * and dumps each message in hex. */ static void ofctl_encode_hello(int argc OVS_UNUSED, char *argv[]) { uint32_t bitmap = strtol(argv[1], NULL, 0); struct ofpbuf *hello; hello = ofputil_encode_hello(bitmap); ovs_hex_dump(stdout, hello->data, hello->size, 0, false); ofp_print(stdout, hello->data, hello->size, verbosity); ofpbuf_delete(hello); } static const struct command all_commands[] = { { "show", 1, 1, ofctl_show }, { "monitor", 1, 3, ofctl_monitor }, { "snoop", 1, 1, ofctl_snoop }, { "dump-desc", 1, 1, ofctl_dump_desc }, { "dump-tables", 1, 1, ofctl_dump_tables }, { "dump-flows", 1, 2, ofctl_dump_flows }, { "dump-aggregate", 1, 2, ofctl_dump_aggregate }, { "queue-stats", 1, 3, ofctl_queue_stats }, { "add-flow", 2, 2, ofctl_add_flow }, { "add-flows", 2, 2, ofctl_add_flows }, { "mod-flows", 2, 2, ofctl_mod_flows }, { "del-flows", 1, 2, ofctl_del_flows }, { "replace-flows", 2, 2, ofctl_replace_flows }, { "diff-flows", 2, 2, ofctl_diff_flows }, { "packet-out", 4, INT_MAX, ofctl_packet_out }, { "dump-ports", 1, 2, ofctl_dump_ports }, { "dump-ports-desc", 1, 1, ofctl_dump_ports_desc }, { "mod-port", 3, 3, ofctl_mod_port }, { "get-frags", 1, 1, ofctl_get_frags }, { "set-frags", 2, 2, ofctl_set_frags }, { "ofp-parse", 1, 1, ofctl_ofp_parse }, { "probe", 1, 1, ofctl_probe }, { "ping", 1, 2, ofctl_ping }, { "benchmark", 3, 3, ofctl_benchmark }, { "help", 0, INT_MAX, ofctl_help }, /* Undocumented commands for testing. */ { "parse-flow", 1, 1, ofctl_parse_flow }, { "parse-flows", 1, 1, ofctl_parse_flows }, { "parse-nx-match", 0, 0, ofctl_parse_nxm }, { "parse-nxm", 0, 0, ofctl_parse_nxm }, { "parse-oxm", 0, 0, ofctl_parse_oxm }, { "parse-ofp10-actions", 0, 0, ofctl_parse_ofp10_actions }, { "parse-ofp10-match", 0, 0, ofctl_parse_ofp10_match }, { "parse-ofp11-match", 0, 0, ofctl_parse_ofp11_match }, { "parse-ofp11-actions", 0, 0, ofctl_parse_ofp11_actions }, { "parse-ofp11-instructions", 0, 0, ofctl_parse_ofp11_instructions }, { "check-vlan", 2, 2, ofctl_check_vlan }, { "print-error", 1, 1, ofctl_print_error }, { "encode-error-reply", 2, 2, ofctl_encode_error_reply }, { "ofp-print", 1, 2, ofctl_ofp_print }, { "encode-hello", 1, 1, ofctl_encode_hello }, { NULL, 0, 0, NULL }, }; static const struct command *get_all_commands(void) { return all_commands; } openvswitch-2.0.1+git20140120/utilities/ovs-parse-backtrace.8000066400000000000000000000017351226605124000234200ustar00rootroot00000000000000.TH ovs\-parse\-backtrace 8 "October 2012" "Open vSwitch" "Open vSwitch Manual" . .SH NAME ovs\-parse\-backtrace \- parses ovs-appctl backtrace output . .SH SYNOPSIS \fBovs\-appctl backtrace\fR | \fBovs\-parse\-backtrace\fR [\fIbinary\fR] .P \fBovs\-parse\-backtrace\fR [\fIbinary\fR] < \fIbacktrace\fR . .SH DESCRIPTION In some configurations, many Open vSwitch daemons can produce a series of backtraces using the \fBovs\-appctl backtrace\fR command. Users can analyze these backtraces to figure out what the given Open vSwitch daemon may be spending most of its time doing. \fBovs\-parse\-backtrace\fR makes this output easier to interpret. .PP The \fBovs\-appctl backtrace\fR output must be supplied on standard input. The binary that produced the output should be supplied as the sole non-option argument. For best results, the binary should have debug symbols. . .SH OPTIONS .TP \fB\-\-help\fR Prints a usage message and exits. .P \fB\-\-version\fR Prints the version and exits. openvswitch-2.0.1+git20140120/utilities/ovs-parse-backtrace.in000077500000000000000000000054431226605124000236620ustar00rootroot00000000000000#! @PYTHON@ # # Copyright (c) 2012 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import optparse import os import re import subprocess import sys addr2line_cache = {} # None if addr2line is missing or broken. def addr2line(binary, addr): global addr2line_cache if addr2line_cache is None: return "" if addr in addr2line_cache: return addr2line_cache[addr] cmd = ["addr2line", "-f", "-s", "-e", binary, addr] try: proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) lines = proc.stdout.readlines() failed = proc.returncode except OSError: failed = True if failed: addr2line_cache = None return "" lines = [l.strip() for l in lines] return " ".join(lines) def main(): parser = optparse.OptionParser(version='@VERSION@', usage="usage: %prog [binary]", description="""\ Parses the output of ovs-appctl backtrace producing a more human readable result. Expected usage is for ovs-appctl backtrace to be piped in.""") options, args = parser.parse_args() if len(args) > 1: parser.print_help() sys.exit(1) if len(args) == 1: binary = args[0] else: binary = "@sbindir@/ovs-vswitchd" debug = "/usr/lib/debug%s.debug" % binary if os.path.exists(debug): binary = debug print "Binary: %s\n" % binary stdin = sys.stdin.read() traces = [] for trace in stdin.strip().split("\n\n"): lines = trace.splitlines() match = re.search(r'Count (\d+)', lines[0]) if match: count = int(match.group(1)) else: count = 0 traces.append((lines[1:], count)) traces = sorted(traces, key=(lambda x: x[1]), reverse=True) for lines, count in traces: longest = max(len(l) for l in lines) print "Backtrace Count: %d" % count for line in lines: match = re.search(r'\[(0x.*)]', line) if match: print "%s %s" % (line.ljust(longest), addr2line(binary, match.group(1))) else: print line print if __name__ == "__main__": main() openvswitch-2.0.1+git20140120/utilities/ovs-pcap.1.in000066400000000000000000000011521226605124000217030ustar00rootroot00000000000000.TH ovs\-pcap 1 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" . .SH NAME ovs\-pcap \- print packets from a pcap file as hex . .SH SYNOPSIS \fBovs\-pcap\fR \fIfile\fR .so lib/common-syn.man . .SH DESCRIPTION The \fBovs\-pcap\fR program reads the pcap \fIfile\fR named on the command line and prints each packet's contents as a sequence of hex digits on a line of its own. This format is suitable for use with the \fBofproto/trace\fR command supported by \fBovs\-vswitchd\fR(8). . .SH "OPTIONS" .so lib/common.man . .SH "SEE ALSO" . .BR ovs\-vswitchd (8), .BR ovs\-tcpundump (1), .BR tcpdump (8), .BR wireshark (8). openvswitch-2.0.1+git20140120/utilities/ovs-pcap.in000077500000000000000000000063321226605124000215540ustar00rootroot00000000000000#! @PYTHON@ # # Copyright (c) 2010 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import binascii import getopt import struct import sys class PcapException(Exception): pass class PcapReader(object): def __init__(self, file_name): self.file = open(file_name, "rb") header = self.file.read(24) if len(header) != 24: raise PcapException("end of file reading pcap header") magic, version, thiszone, sigfigs, snaplen, network = \ struct.unpack(">6I", header) if magic == 0xa1b2c3d4: self.header_format = ">4I" elif magic == 0xd4c3b2a1: self.header_format = "<4I" else: raise PcapException("bad magic %u reading pcap file " "(expected 0xa1b2c3d4 or 0xd4c3b2a1)" % magic) def read(self): header = self.file.read(16) if len(header) == 0: return None elif len(header) != 16: raise PcapException("end of file within pcap record header") ts_sec, ts_usec, incl_len, orig_len = struct.unpack(self.header_format, header) packet = self.file.read(incl_len) if len(packet) != incl_len: raise PcapException("end of file reading pcap packet data") return packet argv0 = sys.argv[0] def usage(): print """\ %(argv0)s: print pcap file packet data as hex usage: %(argv0)s FILE where FILE is a PCAP file. The following options are also available: -h, --help display this help message -V, --version display version information\ """ % {'argv0': argv0} sys.exit(0) if __name__ == "__main__": try: try: options, args = getopt.gnu_getopt(sys.argv[1:], 'hV', ['help', 'version']) except getopt.GetoptException, geo: sys.stderr.write("%s: %s\n" % (argv0, geo.msg)) sys.exit(1) for key, value in options: if key in ['-h', '--help']: usage() elif key in ['-V', '--version']: print "ovs-pcap (Open vSwitch) @VERSION@" else: sys.exit(0) if len(args) != 1: sys.stderr.write("%s: exactly 1 non-option argument required " "(use --help for help)\n" % argv0) sys.exit(1) reader = PcapReader(args[0]) while True: packet = reader.read() if packet is None: break print binascii.hexlify(packet) except PcapException, e: sys.stderr.write("%s: %s\n" % (argv0, e)) sys.exit(1) # Local variables: # mode: python # End: openvswitch-2.0.1+git20140120/utilities/ovs-pki.8.in000066400000000000000000000207741226605124000215650ustar00rootroot00000000000000.de IQ . br . ns . IP "\\$1" .. .TH ovs\-pki 8 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" .SH NAME ovs\-pki \- OpenFlow public key infrastructure management utility .SH SYNOPSIS Each command takes the form: .sp \fBovs\-pki\fR [\fIoptions\fR] \fIcommand\fR [\fIargs\fR] .sp The implemented commands and their arguments are: .br \fBovs\-pki\fR \fBinit\fR .br \fBovs\-pki\fR \fBreq\fR \fIname\fR .br \fBovs\-pki\fR \fBsign\fR \fIname\fR [\fItype\fR] .br \fBovs\-pki\fR \fBreq+sign\fR \fIname\fR [\fItype\fR] .br \fBovs\-pki\fR \fBverify\fR \fIname\fR [\fItype\fR] .br \fBovs\-pki\fR \fBfingerprint\fR \fIfile\fR .br \fBovs\-pki\fR \fBself\-sign\fR \fIname\fR .sp Each \fItype\fR above is a certificate type, either \fBswitch\fR (default) or \fBcontroller\fR. .sp The available options are: .br [\fB\-k\fR \fItype\fR | \fB\-\^\-key=\fItype\fR] .br [\fB\-B\fR \fInbits\fR | \fB\-\^\-bits=\fInbits\fR] .br [\fB\-D\fR \fIfile\fR | \fB\-\^\-dsaparam=\fIfile\fR] .br [\fB\-b\fR | \fB\-\^\-batch\fR] .br [\fB\-f\fR | \fB\-\^\-force\fR] .br [\fB\-d\fR \fIdir\fR | \fB\-\^\-dir=\fR\fIdir\fR] .br [\fB\-l\fR \fIfile\fR | \fB\-\^\-log=\fIfile\fR] .br [\fB\-h\fR | \fB\-\^\-help\fR] .sp Some options do not apply to every command. .SH DESCRIPTION The \fBovs\-pki\fR program sets up and manages a public key infrastructure for use with OpenFlow. It is intended to be a simple interface for organizations that do not have an established public key infrastructure. Other PKI tools can substitute for or supplement the use of \fBovs\-pki\fR. \fBovs\-pki\fR uses \fBopenssl\fR(1) for certificate management and key generation. .SH "OFFLINE COMMANDS" The following \fBovs\-pki\fR commands support manual PKI administration: .TP \fBinit\fR Initializes a new PKI (by default in directory \fB@PKIDIR@\fR) and populates it with a pair of certificate authorities for controllers and switches. This command should ideally be run on a high\-security machine separate from any OpenFlow controller or switch, called the CA machine. The files \fBpki/controllerca/cacert.pem\fR and \fBpki/switchca/cacert.pem\fR that it produces will need to be copied over to the OpenFlow switches and controllers, respectively. Their contents may safely be made public. By default, \fBovs\-pki\fR generates 2048\-bit RSA keys. The \fB\-B\fR or \fB\-\^\-bits\fR option (see below) may be used to override the key length. The \fB\-k dsa\fR or \fB\-\^\-key=dsa\fR option may be used to use DSA in place of RSA. If DSA is selected, the \fBdsaparam.pem\fR file generated in the new PKI hierarchy must be copied to any machine on which the \fBreq\fR command (see below) will be executed. Its contents may safely be made public. Other files generated by \fBinit\fR may remain on the CA machine. The files \fBpki/controllerca/private/cakey.pem\fR and \fBpki/switchca/private/cakey.pem\fR have particularly sensitive contents that should not be exposed. .TP \fBreq\fR \fIname\fR Generates a new private key named \fIname\fR\fB\-privkey.pem\fR and corresponding certificate request named \fIname\fR\fB\-req.pem\fR. The private key can be intended for use by a switch or a controller. This command should ideally be run on the switch or controller that will use the private key to identify itself. The file \fIname\fR\fB\-req.pem\fR must be copied to the CA machine for signing with the \fBsign\fR command (below). This command will output a fingerprint to stdout as its final step. Write down the fingerprint and take it to the CA machine before continuing with the \fBsign\fR step. When RSA keys are in use (as is the default), \fBreq\fR, unlike the rest of \fBovs\-pki\fR's commands, does not need access to a PKI hierarchy created by \fBovs\-pki init\fR. The \fB\-B\fR or \fB\-\^\-bits\fR option (see below) may be used to specify the number of bits in the generated RSA key. When DSA keys are used (as specified with \fB\-\^\-key=dsa\fR), \fBreq\fR needs access to the \fBdsaparam.pem\fR file created as part of the PKI hierarchy (but not to other files in that tree). By default, \fBovs\-pki\fR looks for this file in \fB@PKIDIR@/dsaparam.pem\fR, but the \fB\-D\fR or \fB\-\^\-dsaparam\fR option (see below) may be used to specify an alternate location. \fIname\fR\fB\-privkey.pem\fR has sensitive contents that should not be exposed. \fIname\fR\fB\-req.pem\fR may be safely made public. .TP \fBsign\fR \fIname\fR [\fItype\fR] Signs the certificate request named \fIname\fR\fB\-req.pem\fR that was produced in the previous step, producing a certificate named \fIname\fR\fB\-cert.pem\fR. \fItype\fR, either \fBswitch\fR (default) or \fBcontroller\fR, indicates the use for which the key is being certified. This command must be run on the CA machine. The command will output a fingerprint to stdout and request that you verify that it is the same fingerprint output by the \fBreq\fR command. This ensures that the request being signed is the same one produced by \fBreq\fR. (The \fB\-b\fR or \fB\-\^\-batch\fR option suppresses the verification step.) The file \fIname\fR\fB\-cert.pem\fR will need to be copied back to the switch or controller for which it is intended. Its contents may safely be made public. .TP \fBreq+sign\fR \fIname\fR [\fItype\fR] Combines the \fBreq\fR and \fBsign\fR commands into a single step, outputting all the files produced by each. The \fIname\fR\fB\-privkey.pem\fR and \fIname\fR\fB\-cert.pem\fR files must be copied securely to the switch or controller. \fIname\fR\fB\-privkey.pem\fR has sensitive contents and must not be exposed in transit. Afterward, it should be deleted from the CA machine. This combined method is, theoretically, less secure than the individual steps performed separately on two different machines, because there is additional potential for exposure of the private key. However, it is also more convenient. .TP \fBverify\fR \fIname\fR [\fItype\fR] Verifies that \fIname\fR\fB\-cert.pem\fR is a valid certificate for the given \fItype\fR of use, either \fBswitch\fR (default) or \fBcontroller\fR. If the certificate is valid for this use, it prints the message ``\fIname\fR\fB\-cert.pem\fR: OK''; otherwise, it prints an error message. .TP \fBfingerprint\fR \fIfile\fR Prints the fingerprint for \fIfile\fR. If \fIfile\fR is a certificate, then this is the SHA\-1 digest of the DER encoded version of the certificate; otherwise, it is the SHA\-1 digest of the entire file. .TP \fBself\-sign\fR \fIname\fR Signs the certificate request named \fIname\fB\-req.pem\fR using the private key \fIname\fB\-privkey.pem\fR, producing a self-signed certificate named \fIname\fB\-cert.pem\fR. The input files should have been produced with \fBovs\-pki req\fR. Some controllers accept such self-signed certificates. .SH OPTIONS .IP "\fB\-k\fR \fItype\fR" .IQ "\fB\-\^\-key=\fItype\fR" For the \fBinit\fR command, sets the public key algorithm to use for the new PKI hierarchy. For the \fBreq\fR and \fBreq+sign\fR commands, sets the public key algorithm to use for the key to be generated, which must match the value specified on \fBinit\fR. With other commands, the value has no effect. The \fItype\fR may be \fBrsa\fR (the default) or \fBdsa\fR. .IP "\fB\-B\fR \fInbits\fR" .IQ "\fB\-\^\-bits=\fInbits\fR" Sets the number of bits in the key to be generated. When RSA keys are in use, this option affects only the \fBinit\fR, \fBreq\fR, and \fBreq+sign\fR commands, and the same value should be given each time. With DSA keys are in use, this option affects only the \fBinit\fR command. The value must be at least 1024. The default is 2048. .IP "\fB\-D\fR \fIfile\fR" .IQ "\fB\-\^\-dsaparam=\fIfile\fR" Specifies an alternate location for the \fBdsaparam.pem\fR file required by the \fBreq\fR and \fBreq+sign\fR commands. This option affects only these commands, and only when DSA keys are used. The default is \fBdsaparam.pem\fR under the PKI hierarchy. .IP "\fB\-b\fR" .IQ "\fB\-\^\-batch\fR" Suppresses the interactive verification of fingerprints that the \fBsign\fR command by default requires. .IP "\fB\-d\fR \fIdir\fR" .IQ "\fB\-\^\-dir=\fR\fIdir\fR" Specifies the location of the PKI hierarchy to be used or created by the command (default: \fB@PKIDIR@\fR). All commands, except \fBreq\fR, need access to a PKI hierarchy. .IP "\fB\-f\fR" .IQ "\fB\-\^\-force\fR" By default, \fBovs\-pki\fR will not overwrite existing files or directories. This option overrides this behavior. .IP "\fB\-l\fR \fIfile\fR" .IQ "\fB\-\^\-log=\fIfile\fR" Sets the log file to \fIfile\fR. Default: \fB@LOGDIR@/ovs\-pki.log\fR. .IP "\fB\-h\fR" .IQ "\fB\-\^\-help\fR" Prints a help usage message and exits. .SH "SEE ALSO" .BR ovs\-controller (8). openvswitch-2.0.1+git20140120/utilities/ovs-pki.in000077500000000000000000000337651226605124000214260ustar00rootroot00000000000000#! /bin/sh # Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. set -e pkidir='@PKIDIR@' command= prev= force=no batch=no log='@LOGDIR@/ovs-pki.log' keytype=rsa bits=2048 # OS-specific compatibility routines case $(uname -s) in FreeBSD|NetBSD) file_mod_epoch() { stat -r "$1" | awk '{print $10}' } file_mod_date() { stat -f '%Sm' "$1" } sha1sum() { sha1 "$@" } ;; *) file_mod_epoch() { date -r "$1" +%s } file_mod_date() { date -r "$1" } ;; esac for option; do # This option-parsing mechanism borrowed from a Autoconf-generated # configure script under the following license: # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, # 2002, 2003, 2004, 2005, 2006, 2009 Free Software Foundation, Inc. # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. # If the previous option needs an argument, assign it. if test -n "$prev"; then eval $prev=\$option prev= continue fi case $option in *=*) optarg=`expr "X$option" : '[^=]*=\(.*\)'` ;; *) optarg=yes ;; esac case $dashdash$option in --) dashdash=yes ;; -h|--help) cat <&2 exit 1 ;; *) if test -z "$command"; then command=$option elif test -z "${arg1+set}"; then arg1=$option elif test -z "${arg2+set}"; then arg2=$option else echo "$option: only two arguments may be specified" >&2 exit 1 fi ;; esac shift done if test -n "$prev"; then option=--`echo $prev | sed 's/_/-/g'` { echo "$as_me: error: missing argument to $option" >&2 { (exit 1); exit 1; }; } fi if test -z "$command"; then echo "$0: missing command name; use --help for help" >&2 exit 1 fi if test "$keytype" != rsa && test "$keytype" != dsa; then echo "$0: argument to -k or --key must be rsa or dsa" >&2 exit 1 fi if test "$bits" -lt 1024; then echo "$0: argument to -B or --bits must be at least 1024" >&2 exit 1 fi if test -z "$dsaparam"; then dsaparam=$pkidir/dsaparam.pem fi case $log in /*) ;; *) log=`pwd`/$log ;; esac logdir=$(dirname "$log") if test ! -d "$logdir"; then mkdir -p -m755 "$logdir" 2>/dev/null || true if test ! -d "$logdir"; then echo "$0: log directory $logdir does not exist and cannot be created" >&2 exit 1 fi fi if test "$command" = "init"; then if test -e "$pkidir" && test "$force" != "yes"; then echo "$0: $pkidir already exists and --force not specified" >&2 exit 1 fi if test ! -d "$pkidir"; then mkdir -p "$pkidir" fi cd "$pkidir" exec 3>>$log if test $keytype = dsa && test ! -e dsaparam.pem; then echo "Generating DSA parameters, please wait..." >&2 openssl dsaparam -out dsaparam.pem $bits 1>&3 2>&3 fi # Get the current date to add some uniqueness to this certificate curr_date=`date +"%Y %b %d %T"` # Create the CAs. for ca in controllerca switchca; do echo "Creating $ca..." >&2 oldpwd=`pwd` mkdir -p $ca cd $ca mkdir -p certs crl newcerts mkdir -p -m 0700 private touch index.txt test -e crlnumber || echo 01 > crlnumber test -e serial || echo 01 > serial # Put DSA parameters in directory. if test $keytype = dsa && test ! -e dsaparam.pem; then cp ../dsaparam.pem . fi # Write CA configuration file. if test ! -e ca.cnf; then sed "s/@ca@/$ca/g;s/@curr_date@/$curr_date/g" > ca.cnf <<'EOF' [ req ] prompt = no distinguished_name = req_distinguished_name [ req_distinguished_name ] C = US ST = CA L = Palo Alto O = Open vSwitch OU = @ca@ CN = OVS @ca@ CA Certificate (@curr_date@) [ ca ] default_ca = the_ca [ the_ca ] dir = . # top dir database = $dir/index.txt # index file. new_certs_dir = $dir/newcerts # new certs dir certificate = $dir/cacert.pem # The CA cert serial = $dir/serial # serial no file private_key = $dir/private/cakey.pem# CA private key RANDFILE = $dir/private/.rand # random number file default_days = 3650 # how long to certify for default_crl_days= 30 # how long before next CRL default_md = md5 # md to use policy = policy # default policy email_in_dn = no # Don't add the email into cert DN name_opt = ca_default # Subject name display option cert_opt = ca_default # Certificate display option copy_extensions = none # Don't copy extensions from request unique_subject = no # Allow certs with duplicate subjects # For the CA policy [ policy ] countryName = optional stateOrProvinceName = optional organizationName = match organizationalUnitName = optional commonName = supplied emailAddress = optional EOF fi # Create certificate authority. if test $keytype = dsa; then newkey=dsa:dsaparam.pem else newkey=rsa:$bits fi openssl req -config ca.cnf -nodes \ -newkey $newkey -keyout private/cakey.pem -out careq.pem \ 1>&3 2>&3 openssl ca -config ca.cnf -create_serial -out cacert.pem \ -days 3650 -batch -keyfile private/cakey.pem -selfsign \ -infiles careq.pem 1>&3 2>&3 chmod 0700 private/cakey.pem cd "$oldpwd" done exit 0 fi one_arg() { if test -z "$arg1" || test -n "$arg2"; then echo "$0: $command must have exactly one argument; use --help for help" >&2 exit 1 fi } one_or_two_args() { if test -z "$arg1"; then echo "$0: $command must have one or two arguments; use --help for help" >&2 exit 1 fi } must_not_exist() { if test -e "$1" && test "$force" != "yes"; then echo "$0: $1 already exists and --force not supplied" >&2 exit 1 fi } make_tmpdir() { TMP=/tmp/ovs-pki.tmp$$ rm -rf $TMP trap "rm -rf $TMP" 0 mkdir -m 0700 $TMP } fingerprint() { file=$1 name=${1-$2} date=$(file_mod_date "$file") if grep -e '-BEGIN CERTIFICATE-' "$file" > /dev/null; then fingerprint=$(openssl x509 -noout -in "$file" -fingerprint | sed 's/SHA1 Fingerprint=//' | tr -d ':') else fingerprint=$(sha1sum "$file" | awk '{print $1}') fi printf "$name\\t$date\\n" case $file in $fingerprint*) printf "\\t(correct fingerprint in filename)\\n" ;; *) printf "\\tfingerprint $fingerprint\\n" ;; esac } verify_fingerprint() { fingerprint "$@" if test $batch != yes; then echo "Does fingerprint match? (yes/no)" read answer if test "$answer" != yes; then echo "Match failure, aborting" >&2 exit 1 fi fi } check_type() { if test x = x"$1"; then type=switch elif test "$1" = switch || test "$1" = controller; then type=$1 else echo "$0: type argument must be 'switch' or 'controller'" >&2 exit 1 fi } parse_age() { number=$(echo $1 | sed 's/^\([0-9]\+\)\([[:alpha:]]\+\)/\1/') unit=$(echo $1 | sed 's/^\([0-9]\+\)\([[:alpha:]]\+\)/\2/') case $unit in s) factor=1 ;; min) factor=60 ;; h) factor=3600 ;; day) factor=86400 ;; *) echo "$1: age not in the form Ns, Nmin, Nh, Nday (e.g. 1day)" >&2 exit 1 ;; esac echo $(($number * $factor)) } must_exist() { if test ! -e "$1"; then echo "$0: $1 does not exist" >&2 exit 1 fi } pkidir_must_exist() { if test ! -e "$pkidir"; then echo "$0: $pkidir does not exist (need to run 'init' or use '--dir'?)" >&2 exit 1 elif test ! -d "$pkidir"; then echo "$0: $pkidir is not a directory" >&2 exit 1 fi } make_request() { must_not_exist "$arg1-privkey.pem" must_not_exist "$arg1-req.pem" make_tmpdir # Use uuidgen or date to create unique subject DNs. unique=`(uuidgen) 2>/dev/null` || unique=`date +"%Y %b %d %T"` cat > "$TMP/req.cnf" <&3 2>&3 \ || exit $? else must_exist "$dsaparam" (umask 077 && openssl gendsa -out "$1-privkey.pem" "$dsaparam") \ 1>&3 2>&3 || exit $? fi openssl req -config "$TMP/req.cnf" -new -text \ -key "$1-privkey.pem" -out "$1-req.pem" 1>&3 2>&3 } sign_request() { must_exist "$1" must_not_exist "$2" pkidir_must_exist (cd "$pkidir/${type}ca" && openssl ca -config ca.cnf -batch -in /dev/stdin) \ < "$1" > "$2.tmp$$" 2>&3 mv "$2.tmp$$" "$2" } glob() { files=$(echo $1) if test "$files" != "$1"; then echo "$files" fi } exec 3>>$log || true if test "$command" = req; then one_arg make_request "$arg1" fingerprint "$arg1-req.pem" elif test "$command" = sign; then one_or_two_args check_type "$arg2" verify_fingerprint "$arg1-req.pem" sign_request "$arg1-req.pem" "$arg2-cert.pem" elif test "$command" = req+sign; then one_or_two_args check_type "$arg2" pkidir_must_exist make_request "$arg1" sign_request "$arg1-req.pem" "$arg1-cert.pem" fingerprint "$arg1-req.pem" elif test "$command" = verify; then one_or_two_args must_exist "$arg1-cert.pem" check_type "$arg2" pkidir_must_exist openssl verify -CAfile "$pkidir/${type}ca/cacert.pem" "$arg1-cert.pem" elif test "$command" = fingerprint; then one_arg fingerprint "$arg1" elif test "$command" = self-sign; then one_arg must_exist "$arg1-req.pem" must_exist "$arg1-privkey.pem" must_not_exist "$arg1-cert.pem" # Create both the private key and certificate with restricted permissions. (umask 077 && \ openssl x509 -in "$arg1-req.pem" -out "$arg1-cert.pem.tmp" \ -signkey "$arg1-privkey.pem" -req -days 3650 -text) 2>&3 || exit $? # Reset the permissions on the certificate to the user's default. cat "$arg1-cert.pem.tmp" > "$arg1-cert.pem" rm -f "$arg1-cert.pem.tmp" else echo "$0: $command command unknown; use --help for help" >&2 exit 1 fi openvswitch-2.0.1+git20140120/utilities/ovs-save000077500000000000000000000147151226605124000211660ustar00rootroot00000000000000#! /bin/sh # Copyright (c) 2011, 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. usage() { UTIL=$(basename $0) cat < /dev/null 2>&1; then :; else echo "$0: ip not found in $PATH" >&2 exit 1 fi if test "$#" = 0; then exit 0 fi devs="$@" for dev in $devs; do state=`ip link show dev $dev` || continue echo "# $dev" # Link state (Ethernet addresses, up/down, ...) linkcmd= case $state in *"state UP"* | *[,\<]"UP"[,\>]* ) linkcmd="$linkcmd up" ;; *"state DOWN"*) linkcmd="$linkcmd down" ;; esac if expr "$state" : '.*\bdynamic\b' > /dev/null; then linkcmd="$linkcmd dynamic" fi if qlen=`expr "$state" : '.*qlen \([0-9]+\)'`; then linkcmd="$linkcmd txqueuelen $qlen" fi if hwaddr=`expr "$state" : '.*link/ether \([^ ]*\)'`; then linkcmd="$linkcmd address $hwaddr" fi if brd=`expr "$state" : '.*brd \([^ ]*\)'`; then linkcmd="$linkcmd broadcast $brd" fi if mtu=`expr "$state" : '.*mtu \([0-9]+\)'`; then linkcmd="$linkcmd mtu $mtu" fi if test -n "$linkcmd"; then echo ip link set dev $dev down # Required to change hwaddr. echo ip link set dev $dev $linkcmd fi # IP addresses (including IPv6). echo "ip addr flush dev $dev 2>/dev/null" # Suppresses "Nothing to flush". ip addr show dev $dev | while read addr; do set -- $addr # Check and trim family. family=$1 shift case $family in inet | inet6) ;; *) continue ;; esac # Trim device off the end--"ip" insists on having "dev" precede it. addrcmd= while test $# != 0; do case $1 in dynamic) # Omit kernel-maintained route. continue 2 ;; scope) if test "$2" = link; then # Omit route derived from IP address, e.g. # 172.16.0.0/16 derived from 172.16.12.34. continue 2 fi ;; "$dev"|"$dev:"*) # Address label string addrcmd="$addrcmd label $1" shift continue ;; esac addrcmd="$addrcmd $1" shift done if test "$1" != "$dev"; then addrcmd="$addrcmd $1" fi echo ip -f $family addr add $addrcmd dev $dev done # Routes. echo "ip route flush dev $dev proto boot 2>/dev/null" # Suppresses "Nothing to flush". ip route show dev $dev | while read route; do # "proto kernel" routes are installed by the kernel automatically. case $route in *" proto kernel "*) continue ;; esac echo "ip route add $route dev $dev" done echo done if (iptables-save) > /dev/null 2>&1; then echo "# global" echo "iptables-restore <<'EOF'" iptables-save echo "EOF" else echo "# iptables-save not found in $PATH, not saving iptables state" fi } save_flows () { if (ovs-ofctl --version) > /dev/null 2>&1; then :; else echo "$0: ovs-ofctl not found in $PATH" >&2 exit 1 fi for bridge in "$@"; do echo "ovs-ofctl add-flows ${bridge} - << EOF" ovs-ofctl dump-flows "${bridge}" | sed -e '/NXST_FLOW/d' \ -e 's/\(idle\|hard\)_age=[^,]*,//g' echo "EOF" done } ovs_vsctl () { ovs-vsctl --no-wait "$@" } save_ofports () { if (ovs-vsctl --version) > /dev/null 2>&1; then :; else echo "$0: ovs-vsctl not found in $PATH" >&2 exit 1 fi for bridge in "$@"; do count=0 for iface in `ovs_vsctl list-ifaces ${bridge}`; do ofport=`ovs_vsctl get interface ${iface} ofport` [ "${count}" -eq 0 ] && cmd="ovs-vsctl --no-wait" cmd="${cmd} -- --if-exists set interface "${iface}" \ ofport_request="${ofport}"" # Run set interface command on 50 ports at a time. count=`expr ${count} + 1` [ "${count}" -eq 50 ] && count=0 && echo "${cmd}" && cmd="" done echo "${cmd}" done } while [ $# -ne 0 ] do case $1 in "save-flows") shift save_flows "$@" exit 0 ;; "save-interfaces") shift save_interfaces "$@" exit 0 ;; "save-ofports") shift save_ofports "$@" exit 0 ;; -h | --help) usage exit 0 ;; *) echo >&2 "$0: unknown command \"$1\" (use --help for help)" exit 1 ;; esac done exit 0 openvswitch-2.0.1+git20140120/utilities/ovs-tcpundump.1.in000066400000000000000000000015601226605124000230020ustar00rootroot00000000000000.TH ovs\-tcpundump 1 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" . .SH NAME ovs\-tcpundump \- convert ``tcpdump \-xx'' output to hex strings . .SH SYNOPSIS \fBovs\-tcpundump < \fIfile\fR .so lib/common-syn.man . .SH DESCRIPTION The \fBovs\-tcpundump\fR program reads \fBtcpdump \-xx\fR output on stdin, looking for hexadecimal packet data, and dumps each Ethernet as a single hexadecimal string on stdout. This format is suitable for use with the \fBofproto/trace\fR command supported by \fBovs\-vswitchd\fR(8) via \fBovs\-appctl\fR(8). .PP At least two \fB\-x\fR or \fB\-X\fR options must be given, otherwise the output will omit the Ethernet header, which prevents the output from being using with \fBofproto/trace\fR. . .SH "OPTIONS" .so lib/common.man . .SH "SEE ALSO" . .BR ovs\-appctl (8), .BR ovs\-vswitchd (8), .BR ovs\-pcap (1), .BR tcpdump (8), .BR wireshark (8). openvswitch-2.0.1+git20140120/utilities/ovs-tcpundump.in000077500000000000000000000041431226605124000226460ustar00rootroot00000000000000#! @PYTHON@ # # Copyright (c) 2010 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import getopt import re import sys argv0 = sys.argv[0] def usage(): print """\ %(argv0)s: print "tcpdump -xx" output as hex usage: %(argv0)s < FILE where FILE is output from "tcpdump -xx". The following options are also available: -h, --help display this help message -V, --version display version information\ """ % {'argv0': argv0} sys.exit(0) if __name__ == "__main__": try: options, args = getopt.gnu_getopt(sys.argv[1:], 'hV', ['help', 'version']) except getopt.GetoptError, geo: sys.stderr.write("%s: %s\n" % (argv0, geo.msg)) sys.exit(1) for key, value in options: if key in ['-h', '--help']: usage() elif key in ['-V', '--version']: print "ovs-pcap (Open vSwitch) @VERSION@" else: sys.exit(0) if len(args) != 0: sys.stderr.write("%s: non-option argument not supported " "(use --help for help)\n" % argv0) sys.exit(1) packet = '' regex = re.compile(r'^\s+0x([0-9a-fA-F]+): ((?: [0-9a-fA-F]{4})+)') while True: line = sys.stdin.readline() if line == "": break m = regex.match(line) if m is None or int(m.group(1)) == 0: if packet != '': print packet packet = '' if m: packet += re.sub(r'\s', '', m.group(2), 0) if packet != '': print packet # Local variables: # mode: python # End: openvswitch-2.0.1+git20140120/utilities/ovs-test.8.in000066400000000000000000000133731226605124000217560ustar00rootroot00000000000000.de IQ . br . ns . IP "\\$1" .. .TH ovs\-test 1 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" . .SH NAME \fBovs\-test\fR \- check Linux drivers for performance, vlan and L3 tunneling problems . .SH SYNOPSIS \fBovs\-test\fR \fB\-s\fR \fIport\fR .PP \fBovs\-test\fR \fB\-c\fR \fIserver1\fR \fIserver2\fR [\fB\-b\fR \fItargetbandwidth\fR] [\fB\-i\fR \fItestinterval\fR] [\fB\-d\fR] [\fB\-l\fR \fIvlantag\fR] [\fB\-t\fR \fItunnelmodes\fR] .so lib/common-syn.man . .SH DESCRIPTION The \fBovs\-test\fR program may be used to check for problems sending 802.1Q or GRE traffic that Open vSwitch may uncover. These problems, for example, can occur when Open vSwitch is used to send 802.1Q traffic through physical interfaces running certain drivers of certain Linux kernel versions. To run a test, configure IP addresses on \fIserver1\fR and \fIserver2\fR for interfaces you intended to test. These interfaces could also be already configured OVS bridges that have a physical interface attached to them. Then, on one of the nodes, run \fBovs\-test\fR in server mode and on the other node run it in client mode. The client will connect to \fBovs\-test\fR server and schedule tests between both of them. The \fBovs\-test\fR client will perform UDP and TCP tests. .PP UDP tests can report packet loss and achieved bandwidth for various datagram sizes. By default target bandwidth for UDP tests is 1Mbit/s. .PP TCP tests report only achieved bandwidth, because kernel TCP stack takes care of flow control and packet loss. TCP tests are essential to detect potential TSO related issues. .PP To determine whether Open vSwitch is encountering any problems, the user must compare packet loss and achieved bandwidth in a setup where traffic is being directly sent and in one where it is not. If in the 802.1Q or L3 tunneled tests both \fBovs\-test\fR processes are unable to communicate or the achieved bandwidth is much lower compared to direct setup, then, most likely, Open vSwitch has encountered a pre-existing kernel or driver bug. .PP Some examples of the types of problems that may be encountered are: .so utilities/ovs-vlan-bugs.man . .SS "Client Mode" An \fBovs\-test\fR client will connect to two \fBovs\-test\fR servers and will ask them to exchange test traffic. It is also possible to spawn an \fBovs\-test\fR server automatically from the client. . .SS "Server Mode" To conduct tests, two \fBovs\-test\fR servers must be running on two different hosts where the client can connect. The actual test traffic is exchanged only between both \fBovs\-test\fR servers. It is recommended that both servers have their IP addresses in the same subnet, otherwise one would have to make sure that routing is set up correctly. . .SH OPTIONS . .IP "\fB\-s \fIport\fR" .IQ "\fB\-\-server\fR \fIport\fR" Run in server mode and wait for the client to establish XML RPC Control Connection on this TCP \fIport\fR. It is recommended to have \fBethtool\fR(8) installed on the server so that it could retrieve information about the NIC driver. . .IP "\fB\-c \fIserver1\fR \fIserver2\fR" .IQ "\fB\-\-client \fIserver1\fR \fIserver2\fR" Run in client mode and schedule tests between \fIserver1\fR and \fIserver2\fR, where each \fIserver\fR must be given in the following format - \fIOuterIP[:OuterPort],InnerIP[/Mask][:InnerPort]\fR. The \fIOuterIP\fR must be already assigned to the physical interface which is going to be tested. This is the IP address where client will try to establish XML RPC connection. If \fIOuterIP\fR is 127.0.0.1 then client will automatically spawn a local instance of \fBovs\-test\fR server. \fIOuterPort\fR is TCP port where server is listening for incoming XML/RPC control connections to schedule tests (by default it is 15531). The \fBovs\-test\fR will automatically assign \fIInnerIP[/Mask]\fR to the interfaces that will be created on the fly for testing purposes. It is important that \fIInnerIP[/Mask]\fR does not interfere with already existing IP addresses on both \fBovs\-test\fR servers and client. \fIInnerPort\fR is port which will be used by server to listen for test traffic that will be encapsulated (by default it is 15532). . .IP "\fB\-b \fItargetbandwidth\fR" .IQ "\fB\-\-bandwidth\fR \fItargetbandwidth\fR" Target bandwidth for UDP tests. The \fItargetbandwidth\fR must be given in bits per second. It is possible to use postfix M or K to alter the target bandwidth magnitude. . .IP "\fB\-i \fItestinterval\fR" .IQ "\fB\-\-interval\fR \fItestinterval\fR" How long each test should run. By default 5 seconds. . .so lib/common.man . .SH "Test Modes" The following test modes are supported by \fBovs\-test\fR. It is possible to combine multiple of them in a single \fBovs\-test\fR invocation. . .IP "\fB\-d \fR" .IQ "\fB\-\-direct\fR" Perform direct tests between both \fIOuterIP\fR addresses. These tests could be used as a reference to compare 802.1Q or L3 tunneling test results. . .IP "\fB\-l \fIvlantag\fR" .IQ "\fB\-\-vlan\-tag\fR \fIvlantag\fR" Perform 802.1Q tests between both servers. These tests will create a temporary OVS bridge, if necessary, and attach a VLAN tagged port to it for testing purposes. . .IP "\fB\-t \fItunnelmodes\fR" .IQ "\fB\-\-tunnel\-modes\fR \fItunnelmodes\fR" Perform L3 tunneling tests. The given argument is a comma separated string that specifies all the L3 tunnel modes that should be tested (e.g. gre). The L3 tunnels are terminated on interface that has the \fIOuterIP\fR address assigned. . .SH EXAMPLES .PP On host 1.2.3.4 start \fBovs\-test\fR in server mode: .IP .B ovs\-test \-s 15531 . .PP On host 1.2.3.5 start \fBovs\-test\fR in client mode and do direct, VLAN and GRE tests between both nodes: .IP .B ovs\-test \-c 127.0.0.1,1.1.1.1/30 1.2.3.4,1.1.1.2/30 -d -l 123 -t gre . .SH SEE ALSO . .BR ovs\-vswitchd (8), .BR ovs\-ofctl (8), .BR ovs\-vsctl (8), .BR ovs\-vlan\-test (8), .BR ethtool (8), .BR uname (1) openvswitch-2.0.1+git20140120/utilities/ovs-test.in000066400000000000000000000112041226605124000215770ustar00rootroot00000000000000#! @PYTHON@ # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ ovs test utility that allows to do tests between remote hosts """ import fcntl import math import os import select import signal import socket import subprocess import sys import time import xmlrpclib import argparse import twisted import ovstest.args as args import ovstest.rpcserver as rpcserver import ovstest.tests as tests import ovstest.util as util DEFAULT_TEST_BRIDGE = "ovstestbr0" DEFAULT_TEST_PORT = "ovstestport0" DEFAULT_TEST_TUN = "ovstestport1" def collect_information(node): """Print information about hosts that will do testing""" print "Node %s:%u " % (node[0], node[1]) server = util.rpc_client(node[0], node[1]) interface_name = server.get_interface(node[0]) phys_iface = None uname = server.uname() mtu = 1500 if not interface_name: print ("Could not find interface that has %s IP address." "Make sure that you specified correct Outer IP." % (node[0])) else: if server.is_ovs_bridge(interface_name): phys_iface = server.get_iface_from_bridge(interface_name) else: phys_iface = interface_name if phys_iface: driver = server.get_driver(phys_iface) mtu = server.get_interface_mtu(phys_iface) print "Will be using %s (%s) with MTU %u" % (phys_iface, node[0], mtu) if not driver: print "Unable to get driver information from ethtool." else: print "On this host %s has %s." % (phys_iface, driver) if not uname: print "Unable to retrieve kernel information. Is this Linux?" else: print "Running kernel %s." % uname print "\n" return mtu if __name__ == '__main__': local_server = None try: ovs_args = args.ovs_initialize_args() if ovs_args.port is not None: # Start in pure server mode rpcserver.start_rpc_server(ovs_args.port) elif ovs_args.servers is not None: # Run in client mode node1 = ovs_args.servers[0] node2 = ovs_args.servers[1] # Verify whether client will need to spawn a local instance of # ovs-test server by looking at the first OuterIP. if it is a # 127.0.0.1 then spawn local ovs-test server. if node1[0] == "127.0.0.1": local_server = util.start_local_server(node1[1]) # We must determine the IP address that local ovs-test server # will use: me = util.rpc_client(node1[0], node1[1]) my_ip = me.get_my_address_from(node2[0], node2[1]) node1 = (my_ip, node1[1], node1[2], node1[3]) mtu_node2 = collect_information(node2) mtu_node1 = collect_information(node1) bandwidth = ovs_args.targetBandwidth interval = ovs_args.testInterval ps = util.get_datagram_sizes(mtu_node1, mtu_node2) direct = ovs_args.direct vlan_tag = ovs_args.vlanTag tunnel_modes = ovs_args.tunnelModes if direct is not None: print "Performing direct tests" tests.do_direct_tests(node2, node1, bandwidth, interval, ps) if vlan_tag is not None: print "Performing VLAN tests" tests.do_vlan_tests(node2, node1, bandwidth, interval, ps, vlan_tag) for tmode in tunnel_modes: print "Performing", tmode, "tests" tests.do_l3_tests(node2, node1, bandwidth, interval, ps, tmode) except KeyboardInterrupt: pass except xmlrpclib.Fault: print "Couldn't establish XMLRPC control channel" except socket.error: print "Couldn't establish XMLRPC control channel" except xmlrpclib.ProtocolError: print "XMLRPC control channel was abruptly terminated" except twisted.internet.error.CannotListenError: print "Couldn't start XMLRPC server on port %u" % ovs_args.port finally: if local_server is not None: local_server.terminate() openvswitch-2.0.1+git20140120/utilities/ovs-vlan-bug-workaround.8.in000066400000000000000000000055271226605124000247050ustar00rootroot00000000000000.\" -*- nroff -*- .de IQ . br . ns . IP "\\$1" .. .TH ovs\-vlan\-bug\-workaround 8 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" .ds PN ovs\-vlan\-bug\-workaround . .SH NAME ovs\-vlan\-bug\-workaround \- utility for configuring Linux VLAN driver bug workaround . .SH SYNOPSIS \fBovs\-vlan\-bug\-workaround \fInetdev\fR \fBon\fR .br \fBovs\-vlan\-bug\-workaround \fInetdev\fR \fBoff\fR .br \fBovs\-vlan\-bug\-workaround \-\-help .br \fBovs\-vlan\-bug\-workaround \-\-version .SH DESCRIPTION . .PP Some Linux network drivers support a feature called ``VLAN acceleration''. VLAN acceleration is associated with a data structure called a \fBvlan_group\fR that is, abstractly, a dictionary that maps from a VLAN ID (in the range 0 to 4095) to a VLAN device, that is, a Linux network device associated with a particular VLAN, e.g. \fBeth0.9\fR for VLAN 9 on \fBeth0\fR. .PP Some drivers that support VLAN acceleration have bugs that fall roughly into the categories listed below. \fBovs\-vlan\-test\fR(8) can test for these driver bugs. .so utilities/ovs-vlan-bugs.man .PP .PP The correct long term solution is to fix these driver bugs. .PP For now, \fBovs\-vlan\-bug\-workaround\fR can enable a special-purpose workaround for devices with buggy VLAN acceleration. A kernel patch must be applied for this workaround to work. .PP Use the command \fBovs\-vlan\-bug\-workaround \fInetdev\fR \fBon\fR to enable the VLAN driver bug workaround for network device \fInetdev\fR. Use the command \fBovs\-vlan\-bug\-workaround \fInetdev\fR \fBoff\fR to disable the VLAN driver bug workaround for network device \fInetdev\fR. .SH "DRIVER DETAILS" .PP The following drivers in Linux version 2.6.32.12-0.7.1.xs1.0.0.311.170586 implement VLAN acceleration and are relevant to Open vSwitch on XenServer. We have not tested any version of most of these drivers, so we do not know whether they have a VLAN problem that needs to be fixed. The drivers are listed by the name that they report in, e.g., \fBethtool \-i\fR output; in a few cases this differs slightly from the name of the module's \fB.ko\fR file: . .nf .ta T 1i \fB8139cp acenic amd8111e atl1c ATL1E atl1 atl2 be2net bna bnx2 bnx2x cnic cxgb cxgb3 e1000 e1000e enic forcedeth igb igbvf ixgb ixgbe jme ml4x_core ns83820 qlge r8169 S2IO sky2 starfire tehuti tg3 typhoon via-velocity vxge .fi .PP The following drivers use \fBvlan_group\fR but are irrelevant to Open vSwitch on XenServer: .IP "\fBbonding\fR" Not used with Open vSwitch on XenServer. .IP "\fBgianfar\fR" Not shipped with XenServer. A FreeScale CPU-integrated device. .IP "\fBehea\fR" Cannot be built on x86. IBM Power architecture only. .IP "\fBstmmac\fR" Cannot be built on x86. SH4 architecture only. .IP "\fBvmxnet3\fR" Not shipped with XenServer. For use inside VMware VMs only. . .SH OPTIONS . .so lib/common.man . .SH BUGS . Obviously. . .SH "SEE ALSO" . .BR ovs\-vlan\-test (8). openvswitch-2.0.1+git20140120/utilities/ovs-vlan-bug-workaround.c000066400000000000000000000073301226605124000243450ustar00rootroot00000000000000/* * Copyright (c) 2011 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "command-line.h" #include "util.h" #define ADD_ALL_VLANS_CMD 10 #define DEL_ALL_VLANS_CMD 11 static void usage(void); static void parse_options(int argc, char *argv[]); int main(int argc, char *argv[]) { struct vlan_ioctl_args vlan_args; const char *netdev, *setting; int fd; set_program_name(argv[0]); parse_options(argc, argv); if (argc - optind != 2) { ovs_fatal(0, "exactly two non-option arguments are required " "(use --help for help)"); } memset(&vlan_args, 0, sizeof vlan_args); /* Get command. */ setting = argv[optind + 1]; if (!strcmp(setting, "on")) { vlan_args.cmd = ADD_ALL_VLANS_CMD; } else if (!strcmp(setting, "off")) { vlan_args.cmd = DEL_ALL_VLANS_CMD; } else { ovs_fatal(0, "second command line argument must be \"on\" or \"off\" " "(not \"%s\")", setting); } /* Get network device name. */ netdev = argv[optind]; if (strlen(netdev) >= IFNAMSIZ) { ovs_fatal(0, "%s: network device name too long", netdev); } strcpy(vlan_args.device1, netdev); /* Execute operation. */ fd = socket(AF_INET, SOCK_STREAM, 0); if (fd < 0) { ovs_fatal(errno, "socket creation failed"); } if (ioctl(fd, SIOCSIFVLAN, &vlan_args) < 0) { if (errno == ENOPKG) { ovs_fatal(0, "operation failed (8021q module not loaded)"); } else if (errno == EOPNOTSUPP) { ovs_fatal(0, "operation failed (kernel does not support the " "VLAN bug workaround)"); } else { ovs_fatal(errno, "operation failed"); } } close(fd); return 0; } static void usage(void) { printf("\ %s, for enabling or disabling the kernel VLAN bug workaround\n\ usage: %s NETDEV SETTING\n\ where NETDEV is a network device (e.g. \"eth0\")\n\ and SETTING is \"on\" to enable the workaround or \"off\" to disable it.\n\ \n\ Options:\n\ -h, --help Print this helpful information\n\ -V, --version Display version information\n", program_name, program_name); exit(EXIT_SUCCESS); } static void parse_options(int argc, char *argv[]) { static const struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'V'}, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); for (;;) { int option; option = getopt_long(argc, argv, "+t:hVe", long_options, NULL); if (option == -1) { break; } switch (option) { case 'h': usage(); break; case 'V': ovs_print_version(0, 0); exit(EXIT_SUCCESS); case '?': exit(EXIT_FAILURE); default: NOT_REACHED(); } } free(short_options); } openvswitch-2.0.1+git20140120/utilities/ovs-vlan-bugs.man000066400000000000000000000017341226605124000226720ustar00rootroot00000000000000.IP \(bu When NICs use VLAN stripping on receive they must pass a pointer to a \fBvlan_group\fR when reporting the stripped tag to the networking core. If no \fBvlan_group\fR is in use then some drivers just drop the extracted tag. Drivers are supposed to only enable stripping if a \fBvlan_group\fR is registered but not all of them do that. . .IP \(bu On receive, some drivers handle priority tagged packets specially and don't pass the tag onto the network stack at all, so Open vSwitch never has a chance to see it. . .IP \(bu Some drivers size their receive buffers based on whether a \fBvlan_group\fR is enabled, meaning that a maximum size packet with a VLAN tag will not fit if no \fBvlan_group\fR is configured. . .IP \(bu On transmit, some drivers expect that VLAN acceleration will be used if it is available, which can only be done if a \fBvlan_group\fR is configured. In these cases, the driver may fail to parse the packet and correctly setup checksum offloading or TSO. openvswitch-2.0.1+git20140120/utilities/ovs-vlan-test.8.in000066400000000000000000000063541226605124000227150ustar00rootroot00000000000000.de IQ . br . ns . IP "\\$1" .. .TH ovs\-vlan\-test 1 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" . .SH NAME \fBovs\-vlan\-test\fR \- check Linux drivers for problems with vlan traffic . .SH SYNOPSIS \fBovs\-vlan\-test\fR [\fB\-s\fR | \fB\-\-server\fR] \fIcontrol_ip\fR \fIvlan_ip\fR .so lib/common-syn.man . .SH DESCRIPTION The \fBovs\-vlan\-test\fR utility has some limitations, for example, it does not use TCP in its tests. Also it does not take into account MTU to detect potential edge cases. To overcome those limitations a new tool was developed \- \fBovs\-test\fR. \fBovs\-test\fR is currently supported only on Debian so, if possible try to use that on instead of \fBovs\-vlan\-test\fR. .PP The \fBovs\-vlan\-test\fR program may be used to check for problems sending 802.1Q traffic which may occur when running Open vSwitch. These problems can occur when Open vSwitch is used to send 802.1Q traffic through physical interfaces running certain drivers of certain Linux kernel versions. To run a test, configure Open vSwitch to tag traffic originating from \fIvlan_ip\fR and forward it out the target interface. Then run the \fBovs\-vlan\-test\fR in client mode connecting to an \fBovs\-vlan\-test\fR server. \fBovs\-vlan\-test\fR will display "OK" if it did not detect problems. .PP Some examples of the types of problems that may be encountered are: .so utilities/ovs-vlan-bugs.man . .SS "Client Mode" An \fBovs\-vlan\-test\fR client may be run on a host to check for VLAN connectivity problems. The client must be able to establish HTTP connections with an \fBovs\-vlan\-test\fR server located at the specified \fIcontrol_ip\fR address. UDP traffic sourced at \fIvlan_ip\fR should be tagged and directed out the interface whose connectivity is being tested. . .SS "Server Mode" To conduct tests, an \fBovs\-vlan\-test\fR server must be running on a host known not to have VLAN connectivity problems. The server must have a \fIcontrol_ip\fR on a non\-VLAN network which clients can establish connectivity with. It must also have a \fIvlan_ip\fR address on a VLAN network which clients will use to test their VLAN connectivity. Multiple clients may test against a single \fBovs\-vlan\-test\fR server concurrently. . .SH OPTIONS . .IP "\fB\-s\fR" .IQ "\fB\-\-server\fR" Run in server mode. . .so lib/common.man .SH EXAMPLES Display the Linux kernel version and driver of \fBeth1\fR. .IP .B uname \-r .IP .B ethtool \-i eth1 . .PP Set up a bridge which forwards traffic originating from \fB1.2.3.4\fR out \fBeth1\fR with VLAN tag 10. .IP .B ovs\-vsctl \-\- add\-br vlan\-br \(rs .IP .B \-\- add\-port vlan\-br eth1 \(rs .IP .B \-\- add\-port vlan\-br vlan\-br\-tag tag=10 \(rs .IP .B \-\- set Interface vlan\-br\-tag type=internal .IP .B ifconfig vlan\-br\-tag up 1.2.3.4 . .PP Run an \fBovs\-vlan\-test\fR server listening for client control traffic on 172.16.0.142 port 8080 and VLAN traffic on the default port of 1.2.3.3. .IP .B ovs\-vlan\-test \-s 172.16.0.142:8080 1.2.3.3 . .PP Run an \fBovs\-vlan\-test\fR client with a control server located at 172.16.0.142 port 8080 and a local VLAN ip of 1.2.3.4. .IP .B ovs\-vlan\-test 172.16.0.142:8080 1.2.3.4 . .SH SEE ALSO . .BR ovs\-vswitchd (8), .BR ovs\-ofctl (8), .BR ovs\-vsctl (8), .BR ovs\-test (8), .BR ethtool (8), .BR uname (1) openvswitch-2.0.1+git20140120/utilities/ovs-vlan-test.in000077500000000000000000000274421226605124000225530ustar00rootroot00000000000000#! @PYTHON@ # # Copyright (c) 2010 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import BaseHTTPServer import getopt import httplib import os import threading import time import signal #Causes keyboard interrupts to go to the main thread. import socket import sys print_safe_lock = threading.Lock() def print_safe(s): print_safe_lock.acquire() print(s) print_safe_lock.release() def start_thread(target, args): t = threading.Thread(target=target, args=args) t.setDaemon(True) t.start() return t #Caller is responsible for catching socket.error exceptions. def send_packet(key, length, dest_ip, dest_port): length -= 20 + 8 #IP and UDP headers. packet = str(key) packet += chr(0) * (length - len(packet)) sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.sendto(packet, (dest_ip, dest_port)) sock.close() #UDP Receiver class UDPReceiver: def __init__(self, vlan_ip, vlan_port): self.vlan_ip = vlan_ip self.vlan_port = vlan_port self.recv_callbacks = {} self.udp_run = False def recv_packet(self, key, success_callback, timeout_callback): event = threading.Event() def timeout_cb(): timeout_callback() event.set() timer = threading.Timer(30, timeout_cb) timer.daemon = True def success_cb(): timer.cancel() success_callback() event.set() # Start the timer first to avoid a timer.cancel() race condition. timer.start() self.recv_callbacks[key] = success_cb return event def udp_receiver(self): sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.settimeout(1) try: sock.bind((self.vlan_ip, self.vlan_port)) except socket.error, e: print_safe('Failed to bind to %s:%d with error: %s' % (self.vlan_ip, self.vlan_port, e)) os._exit(1) #sys.exit only exits the current thread. while self.udp_run: try: data, _ = sock.recvfrom(4096) except socket.timeout: continue except socket.error, e: print_safe('Failed to receive from %s:%d with error: %s' % (self.vlan_ip, self.vlan_port, e)) os._exit(1) data_str = data.split(chr(0))[0] if not data_str.isdigit(): continue key = int(data_str) if key in self.recv_callbacks: self.recv_callbacks[key]() del self.recv_callbacks[key] def start(self): self.udp_run = True start_thread(self.udp_receiver, ()) def stop(self): self.udp_run = False #Server vlan_server = None class VlanServer: def __init__(self, server_ip, server_port, vlan_ip, vlan_port): global vlan_server vlan_server = self self.server_ip = server_ip self.server_port = server_port self.recv_response = '%s:%d:' % (vlan_ip, vlan_port) self.result = {} self.result_lock = threading.Lock() self._test_id = 0 self._test_id_lock = threading.Lock() self.udp_recv = UDPReceiver(vlan_ip, vlan_port) def get_test_id(self): self._test_id_lock.acquire() self._test_id += 1 ret = self._test_id self._test_id_lock.release() return ret def set_result(self, key, value): self.result_lock.acquire() if key not in self.result: self.result[key] = value self.result_lock.release() def recv(self, test_id): self.udp_recv.recv_packet(test_id, lambda : self.set_result(test_id, 'Success'), lambda : self.set_result(test_id, 'Timeout')) return self.recv_response + str(test_id) def send(self, test_id, data): try: ip, port, size = data.split(':') port = int(port) size = int(size) except ValueError: self.set_result(test_id, 'Server failed to parse send request: %s' % data) return def send_thread(): send_time = 10 for _ in range(send_time * 2): try: send_packet(test_id, size, ip, port) except socket.error, e: self.set_result(test_id, 'Failure: ' + str(e)) return time.sleep(.5) self.set_result(test_id, 'Success') start_thread(send_thread, ()) return str(test_id) def run(self): self.udp_recv.start() try: BaseHTTPServer.HTTPServer((self.server_ip, self.server_port), VlanServerHandler).serve_forever() except socket.error, e: print_safe('Failed to start control server: %s' % e) self.udp_recv.stop() return 1 class VlanServerHandler(BaseHTTPServer.BaseHTTPRequestHandler): def do_GET(self): #Guarantee three arguments. path = (self.path.lower().lstrip('/') + '//').split('/') resp = 404 body = None if path[0] == 'start': test_id = vlan_server.get_test_id() if path[1] == 'recv': resp = 200 body = vlan_server.recv(test_id) elif path[1] == 'send': resp = 200 body = vlan_server.send(test_id, path[2]) elif (path[0] == 'result' and path[1].isdigit() and int(path[1]) in vlan_server.result): resp = 200 body = vlan_server.result[int(path[1])] elif path[0] == 'ping': resp = 200 body = 'pong' self.send_response(resp) self.end_headers() if body: self.wfile.write(body) #Client class VlanClient: def __init__(self, server_ip, server_port, vlan_ip, vlan_port): self.server_ip_port = '%s:%d' % (server_ip, server_port) self.vlan_ip_port = "%s:%d" % (vlan_ip, vlan_port) self.udp_recv = UDPReceiver(vlan_ip, vlan_port) def request(self, resource): conn = httplib.HTTPConnection(self.server_ip_port) conn.request('GET', resource) return conn def send(self, size): def error_msg(e): print_safe('Send size %d unsuccessful: %s' % (size, e)) try: conn = self.request('/start/recv') data = conn.getresponse().read() except (socket.error, httplib.HTTPException), e: error_msg(e) return False try: ip, port, test_id = data.split(':') port = int(port) test_id = int(test_id) except ValueError: error_msg("Received invalid response from control server (%s)" % data) return False send_time = 5 for _ in range(send_time * 4): try: send_packet(test_id, size, ip, port) resp = self.request('/result/%d' % test_id).getresponse() data = resp.read() except (socket.error, httplib.HTTPException), e: error_msg(e) return False if resp.status == 200 and data == 'Success': print_safe('Send size %d successful' % size) return True elif resp.status == 200: error_msg(data) return False time.sleep(.25) error_msg('Timeout') return False def recv(self, size): def error_msg(e): print_safe('Receive size %d unsuccessful: %s' % (size, e)) resource = '/start/send/%s:%d' % (self.vlan_ip_port, size) try: conn = self.request(resource) test_id = conn.getresponse().read() except (socket.error, httplib.HTTPException), e: error_msg(e) return False if not test_id.isdigit(): error_msg('Invalid response %s' % test_id) return False success = [False] #Primitive datatypes can't be set from closures. def success_cb(): success[0] = True def failure_cb(): success[0] = False self.udp_recv.recv_packet(int(test_id), success_cb, failure_cb).wait() if success[0]: print_safe('Receive size %d successful' % size) else: error_msg('Timeout') return success[0] def server_up(self): def error_msg(e): print_safe('Failed control server connectivity test: %s' % e) try: resp = self.request('/ping').getresponse() data = resp.read() except (socket.error, httplib.HTTPException), e: error_msg(e) return False if resp.status != 200: error_msg('Invalid status %d' % resp.status) elif data != 'pong': error_msg('Invalid response %s' % data) return True def run(self): if not self.server_up(): return 1 self.udp_recv.start() success = True for size in [50, 500, 1000, 1500]: success = self.send(size) and success success = self.recv(size) and success self.udp_recv.stop() if success: print_safe('OK') return 0 else: print_safe('FAILED') return 1 def usage(): print_safe("""\ %(argv0)s: Test vlan connectivity usage: %(argv0)s server vlan The following options are also available: -s, --server run in server mode -h, --help display this help message -V, --version display version information\ """ % {'argv0': sys.argv[0]}) def main(): try: options, args = getopt.gnu_getopt(sys.argv[1:], 'hVs', ['help', 'version', 'server']) except getopt.GetoptError, geo: print_safe('%s: %s\n' % (sys.argv[0], geo.msg)) return 1 server = False for key, _ in options: if key in ['-h', '--help']: usage() return 0 elif key in ['-V', '--version']: print_safe('ovs-vlan-test (Open vSwitch) @VERSION@') return 0 elif key in ['-s', '--server']: server = True else: print_safe('Unexpected option %s. (use --help for help)' % key) return 1 if len(args) != 2: print_safe('Expecting two arguments. (use --help for help)') return 1 try: server_ip, server_port = args[0].split(':') server_port = int(server_port) except ValueError: server_ip = args[0] server_port = 80 try: vlan_ip, vlan_port = args[1].split(':') vlan_port = int(vlan_port) except ValueError: vlan_ip = args[1] vlan_port = 15213 if server: return VlanServer(server_ip, server_port, vlan_ip, vlan_port).run() else: return VlanClient(server_ip, server_port, vlan_ip, vlan_port).run() if __name__ == '__main__': main_ret = main() # Python can throw exceptions if threads are running at exit. for th in threading.enumerate(): if th != threading.currentThread(): th.join() sys.exit(main_ret) openvswitch-2.0.1+git20140120/utilities/ovs-vsctl.8.in000066400000000000000000001224261226605124000221320ustar00rootroot00000000000000.\" -*- nroff -*- .de IQ . br . ns . IP "\\$1" .. .de ST . PP . RS -0.15in . I "\\$1" . RE .. .TH ovs\-vsctl 8 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" .\" This program's name: .ds PN ovs\-vsctl .\" SSL peer program's name: .ds SN ovsdb\-server . .SH NAME ovs\-vsctl \- utility for querying and configuring \fBovs\-vswitchd\fR . .SH SYNOPSIS \fBovs\-vsctl\fR [\fIoptions\fR] \fB\-\-\fR [\fIoptions\fR] \fIcommand \fR[\fIargs\fR] [\fB\-\-\fR [\fIoptions\fR] \fIcommand \fR[\fIargs\fR]]... . .SH DESCRIPTION The \fBovs\-vsctl\fR program configures \fBovs\-vswitchd\fR(8) by providing a high\-level interface to its configuration database. See \fBovs\-vswitchd.conf.db\fR(5) for comprehensive documentation of the database schema. .PP \fBovs\-vsctl\fR connects to an \fBovsdb\-server\fR process that maintains an Open vSwitch configuration database. Using this connection, it queries and possibly applies changes to the database, depending on the supplied commands. Then, if it applied any changes, by default it waits until \fBovs\-vswitchd\fR has finished reconfiguring itself before it exits. (If you use \fBovs\-vsctl\fR when \fBovs\-vswitchd\fR is not running, use \fB\-\-no\-wait\fR.) .PP \fBovs\-vsctl\fR can perform any number of commands in a single run, implemented as a single atomic transaction against the database. .PP The \fBovs\-vsctl\fR command line begins with global options (see \fBOPTIONS\fR below for details). The global options are followed by one or more commands. Each command should begin with \fB\-\-\fR by itself as a command-line argument, to separate it from the following commands. (The \fB\-\-\fR before the first command is optional.) The command itself starts with command-specific options, if any, followed by the command name and any arguments. See \fBEXAMPLES\fR below for syntax examples. . .SS "Linux VLAN Bridging Compatibility" The \fBovs\-vsctl\fR program supports the model of a bridge implemented by Open vSwitch, in which a single bridge supports ports on multiple VLANs. In this model, each port on a bridge is either a trunk port that potentially passes packets tagged with 802.1Q headers that designate VLANs or it is assigned a single implicit VLAN that is never tagged with an 802.1Q header. .PP For compatibility with software designed for the Linux bridge, \fBovs\-vsctl\fR also supports a model in which traffic associated with a given 802.1Q VLAN is segregated into a separate bridge. A special form of the \fBadd\-br\fR command (see below) creates a ``fake bridge'' within an Open vSwitch bridge to simulate this behavior. When such a ``fake bridge'' is active, \fBovs\-vsctl\fR will treat it much like a bridge separate from its ``parent bridge,'' but the actual implementation in Open vSwitch uses only a single bridge, with ports on the fake bridge assigned the implicit VLAN of the fake bridge of which they are members. (A fake bridge for VLAN 0 receives packets that have no 802.1Q tag or a tag with VLAN 0.) . .SH OPTIONS . The following options affect the behavior \fBovs\-vsctl\fR as a whole. Some individual commands also accept their own options, which are given just before the command name. If the first command on the command line has options, then those options must be separated from the global options by \fB\-\-\fR. . .IP "\fB\-\-db=\fIserver\fR" Sets \fIserver\fR as the database server that \fBovs\-vsctl\fR contacts to query or modify configuration. The default is \fBunix:@RUNDIR@/db.sock\fR. \fIserver\fR must take one of the following forms: .RS .so ovsdb/remote-active.man .so ovsdb/remote-passive.man .RE . .IP "\fB\-\-no\-wait\fR" Prevents \fBovs\-vsctl\fR from waiting for \fBovs\-vswitchd\fR to reconfigure itself according to the the modified database. This option should be used if \fBovs\-vswitchd\fR is not running; otherwise, \fBovs\-vsctl\fR will not exit until \fBovs\-vswitchd\fR starts. .IP This option has no effect if the commands specified do not change the database. . .IP "\fB\-\-no\-syslog\fR" By default, \fBovs\-vsctl\fR logs its arguments and the details of any changes that it makes to the system log. This option disables this logging. .IP This option is equivalent to \fB\-\-verbose=vsctl:syslog:warn\fR. . .IP "\fB\-\-oneline\fR" Modifies the output format so that the output for each command is printed on a single line. New-line characters that would otherwise separate lines are printed as \fB\\n\fR, and any instances of \fB\\\fR that would otherwise appear in the output are doubled. Prints a blank line for each command that has no output. This option does not affect the formatting of output from the \fBlist\fR or \fBfind\fR commands; see \fBTable Formatting Options\fR below. . .IP "\fB\-\-dry\-run\fR" Prevents \fBovs\-vsctl\fR from actually modifying the database. . .IP "\fB\-t \fIsecs\fR" .IQ "\fB\-\-timeout=\fIsecs\fR" By default, or with a \fIsecs\fR of \fB0\fR, \fBovs\-vsctl\fR waits forever for a response from the database. This option limits runtime to approximately \fIsecs\fR seconds. If the timeout expires, \fBovs\-vsctl\fR will exit with a \fBSIGALRM\fR signal. (A timeout would normally happen only if the database cannot be contacted, or if the system is overloaded.) . .IP "\fB\-\-retry\fR" Without this option, if \fBovs\-vsctl\fR connects outward to the database server (the default) then \fBovs\-vsctl\fR will try to connect once and exit with an error if the connection fails (which usually means that \fBovsdb\-server\fR is not running). .IP With this option, or if \fB\-\-db\fR specifies that \fBovs\-vsctl\fR should listen for an incoming connection from the database server, then \fBovs\-vsctl\fR will wait for a connection to the database forever. .IP Regardless of this setting, \fB\-\-timeout\fR always limits how long \fBovs\-vsctl\fR will wait. . .SS "Table Formatting Options" These options control the format of output from the \fBlist\fR and \fBfind\fR commands. .so lib/table.man . .SS "Public Key Infrastructure Options" .so lib/ssl.man .so lib/ssl-bootstrap.man .so lib/ssl-peer-ca-cert.man .so lib/vlog.man . .SH COMMANDS The commands implemented by \fBovs\-vsctl\fR are described in the sections below. .SS "Open vSwitch Commands" These commands work with an Open vSwitch as a whole. . .IP "\fBinit\fR" Initializes the Open vSwitch database, if it is empty. If the database has already been initialized, this command has no effect. .IP Any successful \fBovs\-vsctl\fR command automatically initializes the Open vSwitch database if it is empty. This command is provided to initialize the database without executing any other command. . .IP "\fBshow\fR" Prints a brief overview of the database contents. . .IP "\fBemer\-reset\fR" Reset the configuration into a clean state. It deconfigures OpenFlow controllers, OVSDB servers, and SSL, and deletes port mirroring, \fBfail_mode\fR, NetFlow, sFlow, and IPFIX configuration. This command also removes all \fBother\-config\fR keys from all database records, except that \fBother\-config:hwaddr\fR is preserved if it is present in a Bridge record. Other networking configuration is left as-is. . .SS "Bridge Commands" These commands examine and manipulate Open vSwitch bridges. . .IP "[\fB\-\-may\-exist\fR] \fBadd\-br \fIbridge\fR" Creates a new bridge named \fIbridge\fR. Initially the bridge will have no ports (other than \fIbridge\fR itself). .IP Without \fB\-\-may\-exist\fR, attempting to create a bridge that exists is an error. With \fB\-\-may\-exist\fR, this command does nothing if \fIbridge\fR already exists as a real bridge. . .IP "[\fB\-\-may\-exist\fR] \fBadd\-br \fIbridge parent vlan\fR" Creates a ``fake bridge'' named \fIbridge\fR within the existing Open vSwitch bridge \fIparent\fR, which must already exist and must not itself be a fake bridge. The new fake bridge will be on 802.1Q VLAN \fIvlan\fR, which must be an integer between 0 and 4095. Initially \fIbridge\fR will have no ports (other than \fIbridge\fR itself). .IP Without \fB\-\-may\-exist\fR, attempting to create a bridge that exists is an error. With \fB\-\-may\-exist\fR, this command does nothing if \fIbridge\fR already exists as a VLAN bridge under \fIparent\fR for \fIvlan\fR. . .IP "[\fB\-\-if\-exists\fR] \fBdel\-br \fIbridge\fR" Deletes \fIbridge\fR and all of its ports. If \fIbridge\fR is a real bridge, this command also deletes any fake bridges that were created with \fIbridge\fR as parent, including all of their ports. .IP Without \fB\-\-if\-exists\fR, attempting to delete a bridge that does not exist is an error. With \fB\-\-if\-exists\fR, attempting to delete a bridge that does not exist has no effect. . .IP "[\fB\-\-real\fR|\fB\-\-fake\fR] \fBlist\-br\fR" Lists all existing real and fake bridges on standard output, one per line. With \fB\-\-real\fR or \fB\-\-fake\fR, only bridges of that type are returned. . .IP "\fBbr\-exists \fIbridge\fR" Tests whether \fIbridge\fR exists as a real or fake bridge. If so, \fBovs\-vsctl\fR exits successfully with exit code 0. If not, \fBovs\-vsctl\fR exits unsuccessfully with exit code 2. . .IP "\fBbr\-to\-vlan \fIbridge\fR" If \fIbridge\fR is a fake bridge, prints the bridge's 802.1Q VLAN as a decimal integer. If \fIbridge\fR is a real bridge, prints 0. . .IP "\fBbr\-to\-parent \fIbridge\fR" If \fIbridge\fR is a fake bridge, prints the name of its parent bridge. If \fIbridge\fR is a real bridge, print \fIbridge\fR. . .IP "\fBbr\-set\-external\-id \fIbridge key\fR [\fIvalue\fR]" Sets or clears an ``external ID'' value on \fIbridge\fR. These values are intended to identify entities external to Open vSwitch with which \fIbridge\fR is associated, e.g. the bridge's identifier in a virtualization management platform. The Open vSwitch database schema specifies well-known \fIkey\fR values, but \fIkey\fR and \fIvalue\fR are otherwise arbitrary strings. .IP If \fIvalue\fR is specified, then \fIkey\fR is set to \fIvalue\fR for \fIbridge\fR, overwriting any previous value. If \fIvalue\fR is omitted, then \fIkey\fR is removed from \fIbridge\fR's set of external IDs (if it was present). .IP For real bridges, the effect of this command is similar to that of a \fBset\fR or \fBremove\fR command in the \fBexternal\-ids\fR column of the \fBBridge\fR table. For fake bridges, it actually modifies keys with names prefixed by \fBfake\-bridge\-\fR in the \fBPort\fR table. . .IP "\fBbr\-get\-external\-id \fIbridge\fR [\fIkey\fR]" Queries the external IDs on \fIbridge\fR. If \fIkey\fR is specified, the output is the value for that \fIkey\fR or the empty string if \fIkey\fR is unset. If \fIkey\fR is omitted, the output is \fIkey\fB=\fIvalue\fR, one per line, for each key-value pair. .IP For real bridges, the effect of this command is similar to that of a \fBget\fR command in the \fBexternal\-ids\fR column of the \fBBridge\fR table. For fake bridges, it queries keys with names prefixed by \fBfake\-bridge\-\fR in the \fBPort\fR table. . .SS "Port Commands" . These commands examine and manipulate Open vSwitch ports. These commands treat a bonded port as a single entity. . .IP "\fBlist\-ports \fIbridge\fR" Lists all of the ports within \fIbridge\fR on standard output, one per line. The local port \fIbridge\fR is not included in the list. . .IP "[\fB\-\-may\-exist\fR] \fBadd\-port \fIbridge port \fR[\fIcolumn\fR[\fB:\fIkey\fR]\fR=\fIvalue\fR]\&...\fR" Creates on \fIbridge\fR a new port named \fIport\fR from the network device of the same name. .IP Optional arguments set values of column in the Port record created by the command. For example, \fBtag=9\fR would make the port an access port for VLAN 9. The syntax is the same as that for the \fBset\fR command (see \fBDatabase Commands\fR below). .IP Without \fB\-\-may\-exist\fR, attempting to create a port that exists is an error. With \fB\-\-may\-exist\fR, this command does nothing if \fIport\fR already exists on \fIbridge\fR and is not a bonded port. . .IP "[\fB\-\-fake\-iface\fR] \fBadd\-bond \fIbridge port iface\fR\&... [\fIcolumn\fR[\fB:\fIkey\fR]\fR=\fIvalue\fR]\&...\fR" Creates on \fIbridge\fR a new port named \fIport\fR that bonds together the network devices given as each \fIiface\fR. At least two interfaces must be named. .IP Optional arguments set values of column in the Port record created by the command. The syntax is the same as that for the \fBset\fR command (see \fBDatabase Commands\fR below). .IP With \fB\-\-fake\-iface\fR, a fake interface with the name \fIport\fR is created. This should only be used for compatibility with legacy software that requires it. .IP Without \fB\-\-may\-exist\fR, attempting to create a port that exists is an error. With \fB\-\-may\-exist\fR, this command does nothing if \fIport\fR already exists on \fIbridge\fR and bonds together exactly the specified interfaces. . .IP "[\fB\-\-if\-exists\fR] \fBdel\-port \fR[\fIbridge\fR] \fIport\fR" Deletes \fIport\fR. If \fIbridge\fR is omitted, \fIport\fR is removed from whatever bridge contains it; if \fIbridge\fR is specified, it must be the real or fake bridge that contains \fIport\fR. .IP Without \fB\-\-if\-exists\fR, attempting to delete a port that does not exist is an error. With \fB\-\-if\-exists\fR, attempting to delete a port that does not exist has no effect. . .IP "[\fB\-\-if\-exists\fR] \fB\-\-with\-iface del\-port \fR[\fIbridge\fR] \fIiface\fR" Deletes the port named \fIiface\fR or that has an interface named \fIiface\fR. If \fIbridge\fR is omitted, the port is removed from whatever bridge contains it; if \fIbridge\fR is specified, it must be the real or fake bridge that contains the port. .IP Without \fB\-\-if\-exists\fR, attempting to delete the port for an interface that does not exist is an error. With \fB\-\-if\-exists\fR, attempting to delete the port for an interface that does not exist has no effect. . .IP "\fBport\-to\-br \fIport\fR" Prints the name of the bridge that contains \fIport\fR on standard output. . .SS "Interface Commands" . These commands examine the interfaces attached to an Open vSwitch bridge. These commands treat a bonded port as a collection of two or more interfaces, rather than as a single port. . .IP "\fBlist\-ifaces \fIbridge\fR" Lists all of the interfaces within \fIbridge\fR on standard output, one per line. The local port \fIbridge\fR is not included in the list. . .IP "\fBiface\-to\-br \fIiface\fR" Prints the name of the bridge that contains \fIiface\fR on standard output. . .SS "OpenFlow Controller Connectivity" . \fBovs\-vswitchd\fR can perform all configured bridging and switching locally, or it can be configured to communicate with one or more external OpenFlow controllers. The switch is typically configured to connect to a primary controller that takes charge of the bridge's flow table to implement a network policy. In addition, the switch can be configured to listen to connections from service controllers. Service controllers are typically used for occasional support and maintenance, e.g. with \fBovs\-ofctl\fR. . .IP "\fBget\-controller\fR \fIbridge\fR" Prints the configured controller target. . .IP "\fBdel\-controller\fR \fIbridge\fR" Deletes the configured controller target. . .IP "\fBset\-controller\fR \fIbridge\fR \fItarget\fR\&..." Sets the configured controller target or targets. Each \fItarget\fR may use any of the following forms: . .RS .so lib/vconn-active.man .so lib/vconn-passive.man .RE . .ST "Controller Failure Settings" .PP When a controller is configured, it is, ordinarily, responsible for setting up all flows on the switch. Thus, if the connection to the controller fails, no new network connections can be set up. If the connection to the controller stays down long enough, no packets can pass through the switch at all. .PP If the value is \fBstandalone\fR, or if neither of these settings is set, \fBovs\-vswitchd\fR will take over responsibility for setting up flows when no message has been received from the controller for three times the inactivity probe interval. In this mode, \fBovs\-vswitchd\fR causes the datapath to act like an ordinary MAC-learning switch. \fBovs\-vswitchd\fR will continue to retry connecting to the controller in the background and, when the connection succeeds, it discontinues its standalone behavior. .PP If this option is set to \fBsecure\fR, \fBovs\-vswitchd\fR will not set up flows on its own when the controller connection fails. . .IP "\fBget\-fail\-mode\fR \fIbridge\fR" Prints the configured failure mode. . .IP "\fBdel\-fail\-mode\fR \fIbridge\fR" Deletes the configured failure mode. . .IP "\fBset\-fail\-mode\fR \fIbridge\fR \fBstandalone\fR|\fBsecure\fR" Sets the configured failure mode. . .SS "Manager Connectivity" . These commands manipulate the \fBmanager_options\fR column in the \fBOpen_vSwitch\fR table and rows in the \fBManagers\fR table. When \fBovsdb\-server\fR is configured to use the \fBmanager_options\fR column for OVSDB connections (as described in \fBINSTALL.Linux\fR and in the startup scripts provided with Open vSwitch), this allows the administrator to use \fBovs\-vsctl\fR to configure database connections. . .IP "\fBget\-manager\fR" Prints the configured manager(s). . .IP "\fBdel\-manager\fR" Deletes the configured manager(s). . .IP "\fBset\-manager\fR \fItarget\fR\&..." Sets the configured manager target or targets. Each \fItarget\fR may use any of the following forms: . .RS .so ovsdb/remote-active.man .so ovsdb/remote-passive.man .RE . .SS "SSL Configuration" When \fBovs\-vswitchd\fR is configured to connect over SSL for management or controller connectivity, the following parameters are required: .TP \fIprivate-key\fR Specifies a PEM file containing the private key used as the virtual switch's identity for SSL connections to the controller. .TP \fIcertificate\fR Specifies a PEM file containing a certificate, signed by the certificate authority (CA) used by the controller and manager, that certifies the virtual switch's private key, identifying a trustworthy switch. .TP \fIca-cert\fR Specifies a PEM file containing the CA certificate used to verify that the virtual switch is connected to a trustworthy controller. .PP These files are read only once, at \fBovs\-vswitchd\fR startup time. If their contents change, \fBovs\-vswitchd\fR must be killed and restarted. .PP These SSL settings apply to all SSL connections made by the virtual switch. . .IP "\fBget\-ssl\fR" Prints the SSL configuration. . .IP "\fBdel\-ssl\fR" Deletes the current SSL configuration. . .IP "[\fB\-\-bootstrap\fR] \fBset\-ssl\fR \fIprivate-key\fR \fIcertificate\fR \fIca-cert\fR" Sets the SSL configuration. The \fB\-\-bootstrap\fR option is described below. . .ST "CA Certificate Bootstrap" .PP Ordinarily, all of the files named in the SSL configuration must exist when \fBovs\-vswitchd\fR starts. However, if the \fIca-cert\fR file does not exist and the \fB\-\-bootstrap\fR option is given, then \fBovs\-vswitchd\fR will attempt to obtain the CA certificate from the controller on its first SSL connection and save it to the named PEM file. If it is successful, it will immediately drop the connection and reconnect, and from then on all SSL connections must be authenticated by a certificate signed by the CA certificate thus obtained. .PP \fBThis option exposes the SSL connection to a man-in-the-middle attack obtaining the initial CA certificate\fR, but it may be useful for bootstrapping. .PP This option is only useful if the controller sends its CA certificate as part of the SSL certificate chain. The SSL protocol does not require the controller to send the CA certificate, but \fBovs\-controller\fR(8) can be configured to do so with the \fB\-\-peer\-ca\-cert\fR option. . .SS "Database Commands" . These commands query and modify the contents of \fBovsdb\fR tables. They are a slight abstraction of the \fBovsdb\fR interface and as such they operate at a lower level than other \fBovs\-vsctl\fR commands. .PP .ST "Identifying Tables, Records, and Columns" .PP Each of these commands has a \fItable\fR parameter to identify a table within the database. Many of them also take a \fIrecord\fR parameter that identifies a particular record within a table. The \fIrecord\fR parameter may be the UUID for a record, and many tables offer additional ways to identify records. Some commands also take \fIcolumn\fR parameters that identify a particular field within the records in a table. .PP The following tables are currently defined: .IP "\fBOpen_vSwitch\fR" Global configuration for an \fBovs\-vswitchd\fR. This table contains exactly one record, identified by specifying \fB.\fR as the record name. .IP "\fBBridge\fR" Configuration for a bridge within an Open vSwitch. Records may be identified by bridge name. .IP "\fBPort\fR" A bridge port. Records may be identified by port name. .IP "\fBInterface\fR" A network device attached to a port. Records may be identified by name. .IP "\fBFlow_Table\fR" Configuration for a particular OpenFlow flow table. Records may be identified by name. .IP "\fBQoS\fR" Quality-of-service configuration for a \fBPort\fR. Records may be identified by port name. .IP "\fBQueue\fR" Configuration for one queue within a \fBQoS\fR configuration. Records may only be identified by UUID. .IP "\fBMirror\fR" A port mirroring configuration attached to a bridge. Records may be identified by mirror name. .IP "\fBController\fR" Configuration for an OpenFlow controller. A controller attached to a particular bridge may be identified by the bridge's name. .IP "\fBManager\fR" Configuration for an OVSDB connection. Records may be identified by target (e.g. \fBtcp:1.2.3.4\fR). .IP "\fBNetFlow\fR" A NetFlow configuration attached to a bridge. Records may be identified by bridge name. .IP "\fBSSL\fR" The global SSL configuration for \fBovs\-vswitchd\fR. The record attached to the \fBOpen_vSwitch\fR table may be identified by specifying \fB.\fR as the record name. .IP "\fBsFlow\fR" An sFlow exporter configuration attached to a bridge. Records may be identified by bridge name. .IP "\fBIPFIX\fR" An IPFIX exporter configuration attached to a bridge. Records may be identified by bridge name. .IP "\fBFlow_Sample_Collector_Set\fR" An IPFIX exporter configuration attached to a bridge for sampling packets on a per-flow basis using OpenFlow \fBsample\fR actions. .PP Record names must be specified in full and with correct capitalization. Names of tables and columns are not case-sensitive, and \fB\-\-\fR and \fB_\fR are treated interchangeably. Unique abbreviations are acceptable, e.g. \fBnet\fR or \fBn\fR is sufficient to identify the \fBNetFlow\fR table. . .ST "Database Values" .PP Each column in the database accepts a fixed type of data. The currently defined basic types, and their representations, are: .IP "integer" A decimal integer in the range \-2**63 to 2**63\-1, inclusive. .IP "real" A floating-point number. .IP "Boolean" True or false, written \fBtrue\fR or \fBfalse\fR, respectively. .IP "string" An arbitrary Unicode string, except that null bytes are not allowed. Quotes are optional for most strings that begin with an English letter or underscore and consist only of letters, underscores, hyphens, and periods. However, \fBtrue\fR and \fBfalse\fR and strings that match the syntax of UUIDs (see below) must be enclosed in double quotes to distinguish them from other basic types. When double quotes are used, the syntax is that of strings in JSON, e.g. backslashes may be used to escape special characters. The empty string must be represented as a pair of double quotes (\fB""\fR). .IP "UUID" Either a universally unique identifier in the style of RFC 4122, e.g. \fBf81d4fae\-7dec\-11d0\-a765\-00a0c91e6bf6\fR, or an \fB@\fIname\fR defined by a \fBget\fR or \fBcreate\fR command within the same \fBovs\-vsctl\fR invocation. .PP Multiple values in a single column may be separated by spaces or a single comma. When multiple values are present, duplicates are not allowed, and order is not important. Conversely, some database columns can have an empty set of values, represented as \fB[]\fR, and square brackets may optionally enclose other non-empty sets or single values as well. .PP A few database columns are ``maps'' of key-value pairs, where the key and the value are each some fixed database type. These are specified in the form \fIkey\fB=\fIvalue\fR, where \fIkey\fR and \fIvalue\fR follow the syntax for the column's key type and value type, respectively. When multiple pairs are present (separated by spaces or a comma), duplicate keys are not allowed, and again the order is not important. Duplicate values are allowed. An empty map is represented as \fB{}\fR. Curly braces may optionally enclose non-empty maps as well (but use quotes to prevent the shell from expanding \fBother-config={0=x,1=y}\fR into \fBother-config=0=x other-config=1=y\fR, which may not have the desired effect). . .ST "Database Command Syntax" . .IP "[\fB\-\-if\-exists\fR] [\fB\-\-columns=\fIcolumn\fR[\fB,\fIcolumn\fR]...] \fBlist \fItable \fR[\fIrecord\fR]..." Lists the data in each specified \fIrecord\fR. If no records are specified, lists all the records in \fItable\fR. .IP If \fB\-\-columns\fR is specified, only the requested columns are listed, in the specified order. Otherwise, all columns are listed, in alphabetical order by column name. .IP Without \fB\-\-if-exists\fR, it is an error if any specified \fIrecord\fR does not exist. With \fB\-\-if-exists\fR, the command ignores any \fIrecord\fR that does not exist, without producing any output. . .IP "[\fB\-\-columns=\fIcolumn\fR[\fB,\fIcolumn\fR]...] \fBfind \fItable \fR[\fIcolumn\fR[\fB:\fIkey\fR]\fB=\fIvalue\fR]..." Lists the data in each record in \fItable\fR whose \fIcolumn\fR equals \fIvalue\fR or, if \fIkey\fR is specified, whose \fIcolumn\fR contains a \fIkey\fR with the specified \fIvalue\fR. The following operators may be used where \fB=\fR is written in the syntax summary: .RS .IP "\fB= != < > <= >=\fR" Selects records in which \fIcolumn\fR[\fB:\fIkey\fR] equals, does not equal, is less than, is greater than, is less than or equal to, or is greater than or equal to \fIvalue\fR, respectively. .IP Consider \fIcolumn\fR[\fB:\fIkey\fR] and \fIvalue\fR as sets of elements. Identical sets are considered equal. Otherwise, if the sets have different numbers of elements, then the set with more elements is considered to be larger. Otherwise, consider a element from each set pairwise, in increasing order within each set. The first pair that differs determines the result. (For a column that contains key-value pairs, first all the keys are compared, and values are considered only if the two sets contain identical keys.) .IP "\fB{=} {!=}\fR" Test for set equality or inequality, respectively. .IP "\fB{<=}\fR" Selects records in which \fIcolumn\fR[\fB:\fIkey\fR] is a subset of \fIvalue\fR. For example, \fBflood-vlans{<=}1,2\fR selects records in which the \fBflood-vlans\fR column is the empty set or contains 1 or 2 or both. .IP "\fB{<}\fR" Selects records in which \fIcolumn\fR[\fB:\fIkey\fR] is a proper subset of \fIvalue\fR. For example, \fBflood-vlans{<}1,2\fR selects records in which the \fBflood-vlans\fR column is the empty set or contains 1 or 2 but not both. .IP "\fB{>=} {>}\fR" Same as \fB{<=}\fR and \fB{<}\fR, respectively, except that the relationship is reversed. For example, \fBflood-vlans{>=}1,2\fR selects records in which the \fBflood-vlans\fR column contains both 1 and 2. .RE .IP For arithmetic operators (\fB= != < > <= >=\fR), when \fIkey\fR is specified but a particular record's \fIcolumn\fR does not contain \fIkey\fR, the record is always omitted from the results. Thus, the condition \fBother-config:mtu!=1500\fR matches records that have a \fBmtu\fR key whose value is not 1500, but not those that lack an \fBmtu\fR key. .IP For the set operators, when \fIkey\fR is specified but a particular record's \fIcolumn\fR does not contain \fIkey\fR, the comparison is done against an empty set. Thus, the condition \fBother-config:mtu{!=}1500\fR matches records that have a \fBmtu\fR key whose value is not 1500 and those that lack an \fBmtu\fR key. .IP Don't forget to escape \fB<\fR or \fB>\fR from interpretation by the shell. .IP If \fB\-\-columns\fR is specified, only the requested columns are listed, in the specified order. Otherwise all columns are listed, in alphabetical order by column name. .IP The UUIDs shown for rows created in the same \fBovs\-vsctl\fR invocation will be wrong. . .IP "[\fB\-\-if\-exists\fR] [\fB\-\-id=@\fIname\fR] \fBget \fItable record \fR[\fIcolumn\fR[\fB:\fIkey\fR]]..." Prints the value of each specified \fIcolumn\fR in the given \fIrecord\fR in \fItable\fR. For map columns, a \fIkey\fR may optionally be specified, in which case the value associated with \fIkey\fR in the column is printed, instead of the entire map. .IP Without \fB\-\-if\-exists\fR, it is an error if \fIrecord\fR does not exist or \fIkey\fR is specified, if \fIkey\fR does not exist in \fIrecord\fR. With \fB\-\-if\-exists\fR, a missing \fIrecord\fR yields no output and a missing \fIkey\fR prints a blank line. .IP If \fB@\fIname\fR is specified, then the UUID for \fIrecord\fR may be referred to by that name later in the same \fBovs\-vsctl\fR invocation in contexts where a UUID is expected. .IP Both \fB\-\-id\fR and the \fIcolumn\fR arguments are optional, but usually at least one or the other should be specified. If both are omitted, then \fBget\fR has no effect except to verify that \fIrecord\fR exists in \fItable\fR. .IP \fB\-\-id\fR and \fB\-\-if\-exists\fR cannot be used together. . .IP "[\fB\-\-if\-exists\fR] \fBset \fItable record column\fR[\fB:\fIkey\fR]\fB=\fIvalue\fR..." Sets the value of each specified \fIcolumn\fR in the given \fIrecord\fR in \fItable\fR to \fIvalue\fR. For map columns, a \fIkey\fR may optionally be specified, in which case the value associated with \fIkey\fR in that column is changed (or added, if none exists), instead of the entire map. .IP Without \fB\-\-if-exists\fR, it is an error if \fIrecord\fR does not exist. With \fB\-\-if-exists\fR, this command does nothing if \fIrecord\fR does not exist. . .IP "[\fB\-\-if\-exists\fR] \fBadd \fItable record column \fR[\fIkey\fB=\fR]\fIvalue\fR..." Adds the specified value or key-value pair to \fIcolumn\fR in \fIrecord\fR in \fItable\fR. If \fIcolumn\fR is a map, then \fIkey\fR is required, otherwise it is prohibited. If \fIkey\fR already exists in a map column, then the current \fIvalue\fR is not replaced (use the \fBset\fR command to replace an existing value). .IP Without \fB\-\-if-exists\fR, it is an error if \fIrecord\fR does not exist. With \fB\-\-if-exists\fR, this command does nothing if \fIrecord\fR does not exist. . .IP "[\fB\-\-if\-exists\fR] \fBremove \fItable record column \fR\fIvalue\fR..." .IQ "[\fB\-\-if\-exists\fR] \fBremove \fItable record column \fR\fIkey\fR..." .IQ "[\fB\-\-if\-exists\fR] \fBremove \fItable record column \fR\fIkey\fB=\fR\fIvalue\fR..." Removes the specified values or key-value pairs from \fIcolumn\fR in \fIrecord\fR in \fItable\fR. The first form applies to columns that are not maps: each specified \fIvalue\fR is removed from the column. The second and third forms apply to map columns: if only a \fIkey\fR is specified, then any key-value pair with the given \fIkey\fR is removed, regardless of its value; if a \fIvalue\fR is given then a pair is removed only if both key and value match. .IP It is not an error if the column does not contain the specified key or value or pair. .IP Without \fB\-\-if-exists\fR, it is an error if \fIrecord\fR does not exist. With \fB\-\-if-exists\fR, this command does nothing if \fIrecord\fR does not exist. . .IP "[\fB\-\-if\-exists\fR] \fBclear\fR \fItable record column\fR..." Sets each \fIcolumn\fR in \fIrecord\fR in \fItable\fR to the empty set or empty map, as appropriate. This command applies only to columns that are allowed to be empty. .IP Without \fB\-\-if-exists\fR, it is an error if \fIrecord\fR does not exist. With \fB\-\-if-exists\fR, this command does nothing if \fIrecord\fR does not exist. . .IP "[\fB\-\-id=@\fIname\fR] \fBcreate\fR \fItable column\fR[\fB:\fIkey\fR]\fB=\fIvalue\fR..." Creates a new record in \fItable\fR and sets the initial values of each \fIcolumn\fR. Columns not explicitly set will receive their default values. Outputs the UUID of the new row. .IP If \fB@\fIname\fR is specified, then the UUID for the new row may be referred to by that name elsewhere in the same \fBovs\-vsctl\fR invocation in contexts where a UUID is expected. Such references may precede or follow the \fBcreate\fR command. .IP Records in the Open vSwitch database are significant only when they can be reached directly or indirectly from the \fBOpen_vSwitch\fR table. Except for records in the \fBQoS\fR or \fBQueue\fR tables, records that are not reachable from the \fBOpen_vSwitch\fR table are automatically deleted from the database. This deletion happens immediately, without waiting for additional \fBovs\-vsctl\fR commands or other database activity. Thus, a \fBcreate\fR command must generally be accompanied by additional commands \fIwithin the same \fBovs\-vsctl\fI invocation\fR to add a chain of references to the newly created record from the top-level \fBOpen_vSwitch\fR record. The \fBEXAMPLES\fR section gives some examples that show how to do this. . .IP "\fR[\fB\-\-if\-exists\fR] \fBdestroy \fItable record\fR..." Deletes each specified \fIrecord\fR from \fItable\fR. Unless \fB\-\-if\-exists\fR is specified, each \fIrecord\fRs must exist. .IP "\fB\-\-all destroy \fItable\fR" Deletes all records from the \fItable\fR. .IP The \fBdestroy\fR command is only useful for records in the \fBQoS\fR or \fBQueue\fR tables. Records in other tables are automatically deleted from the database when they become unreachable from the \fBOpen_vSwitch\fR table. This means that deleting the last reference to a record is sufficient for deleting the record itself. For records in these tables, \fBdestroy\fR is silently ignored. See the \fBEXAMPLES\fR section below for more information. . .IP "\fBwait\-until \fItable record \fR[\fIcolumn\fR[\fB:\fIkey\fR]\fB=\fIvalue\fR]..." Waits until \fItable\fR contains a record named \fIrecord\fR whose \fIcolumn\fR equals \fIvalue\fR or, if \fIkey\fR is specified, whose \fIcolumn\fR contains a \fIkey\fR with the specified \fIvalue\fR. Any of the operators \fB!=\fR, \fB<\fR, \fB>\fR, \fB<=\fR, or \fB>=\fR may be substituted for \fB=\fR to test for inequality, less than, greater than, less than or equal to, or greater than or equal to, respectively. (Don't forget to escape \fB<\fR or \fB>\fR from interpretation by the shell.) .IP If no \fIcolumn\fR[\fB:\fIkey\fR]\fB=\fIvalue\fR arguments are given, this command waits only until \fIrecord\fR exists. If more than one such argument is given, the command waits until all of them are satisfied. .IP Usually \fBwait\-until\fR should be placed at the beginning of a set of \fBovs\-vsctl\fR commands. For example, \fBwait\-until bridge br0 \-\- get bridge br0 datapath_id\fR waits until a bridge named \fBbr0\fR is created, then prints its \fBdatapath_id\fR column, whereas \fBget bridge br0 datapath_id \-\- wait\-until bridge br0\fR will abort if no bridge named \fBbr0\fR exists when \fBovs\-vsctl\fR initially connects to the database. .IP Consider specifying \fB\-\-timeout=0\fR along with \fB\-\-wait\-until\fR, to prevent \fBovs\-vsctl\fR from terminating after waiting only at most 5 seconds. .IP "\fBcomment \fR[\fIarg\fR]..." This command has no effect on behavior, but any database log record created by the command will include the command and its arguments. .SH "EXAMPLES" Create a new bridge named br0 and add port eth0 to it: .IP .B "ovs\-vsctl add\-br br0" .br .B "ovs\-vsctl add\-port br0 eth0" .PP Alternatively, perform both operations in a single atomic transaction: .IP .B "ovs\-vsctl add\-br br0 \-\- add\-port br0 eth0" .PP Delete bridge \fBbr0\fR, reporting an error if it does not exist: .IP .B "ovs\-vsctl del\-br br0" .PP Delete bridge \fBbr0\fR if it exists: .IP .B "ovs\-vsctl \-\-if\-exists del\-br br0" .PP Set the \fBqos\fR column of the \fBPort\fR record for \fBeth0\fR to point to a new \fBQoS\fR record, which in turn points with its queue 0 to a new \fBQueue\fR record: .IP .B "ovs\-vsctl \-\- set port eth0 qos=@newqos \-\- \-\-id=@newqos create qos type=linux\-htb other\-config:max\-rate=1000000 queues:0=@newqueue \-\- \-\-id=@newqueue create queue other\-config:min\-rate=1000000 other\-config:max\-rate=1000000" .SH "CONFIGURATION COOKBOOK" .SS "Port Configuration" .PP Add an ``internal port'' \fBvlan10\fR to bridge \fBbr0\fR as a VLAN access port for VLAN 10, and configure it with an IP address: .IP .B "ovs\-vsctl add\-port br0 vlan10 tag=10 \-\- set Interface vlan10 type=internal" .IP .B "ifconfig vlan10 192.168.0.123" . .PP Add a GRE tunnel port \fBgre0\fR to remote IP address 1.2.3.4 to bridge \fBbr0\fR: .IP .B "ovs\-vsctl add\-port br0 gre0 \-\- set Interface gre0 type=gre options:remote_ip=1.2.3.4" . .SS "Port Mirroring" .PP Mirror all packets received or sent on \fBeth0\fR or \fBeth1\fR onto \fBeth2\fR, assuming that all of those ports exist on bridge \fBbr0\fR (as a side-effect this causes any packets received on \fBeth2\fR to be ignored): .IP .B "ovs\-vsctl \-\- set Bridge br0 mirrors=@m \(rs" .IP .B "\-\- \-\-id=@eth0 get Port eth0 \(rs" .IP .B "\-\- \-\-id=@eth1 get Port eth1 \(rs" .IP .B "\-\- \-\-id=@eth2 get Port eth2 \(rs" .IP .B "\-\- \-\-id=@m create Mirror name=mymirror select-dst-port=@eth0,@eth1 select-src-port=@eth0,@eth1 output-port=@eth2" .PP Remove the mirror created above from \fBbr0\fR, which also destroys the Mirror record (since it is now unreferenced): .IP .B "ovs\-vsctl \-\- \-\-id=@rec get Mirror mymirror \(rs" .IP .B "\-\- remove Bridge br0 mirrors @rec" .PP The following simpler command also works: .IP .B "ovs\-vsctl clear Bridge br0 mirrors" .SS "Quality of Service (QoS)" .PP Create a \fBlinux\-htb\fR QoS record that points to a few queues and use it on \fBeth0\fR and \fBeth1\fR: .IP .B "ovs\-vsctl \-\- set Port eth0 qos=@newqos \(rs" .IP .B "\-\- set Port eth1 qos=@newqos \(rs" .IP .B "\-\- \-\-id=@newqos create QoS type=linux\-htb other\-config:max\-rate=1000000000 queues=0=@q0,1=@q1 \(rs" .IP .B "\-\- \-\-id=@q0 create Queue other\-config:min\-rate=100000000 other\-config:max\-rate=100000000 \(rs" .IP .B "\-\- \-\-id=@q1 create Queue other\-config:min\-rate=500000000" .PP Deconfigure the QoS record above from \fBeth1\fR only: .IP .B "ovs\-vsctl clear Port eth1 qos" .PP To deconfigure the QoS record from both \fBeth0\fR and \fBeth1\fR and then delete the QoS record (which must be done explicitly because unreferenced QoS records are not automatically destroyed): .IP .B "ovs\-vsctl \-\- destroy QoS eth0 \-\- clear Port eth0 qos \-\- clear Port eth1 qos" .PP (This command will leave two unreferenced Queue records in the database. To delete them, use "\fBovs\-vsctl list Queue\fR" to find their UUIDs, then "\fBovs\-vsctl destroy Queue \fIuuid1\fR \fIuuid2\fR" to destroy each of them or use "\fBovs\-vsctl -- --all destroy Queue\fR" to delete all records.) .SS "Connectivity Monitoring" .PP Monitor connectivity to a remote maintenance point on eth0. .IP .B "ovs\-vsctl set Interface eth0 cfm_mpid=1" .PP Deconfigure connectivity monitoring from above: .IP .B "ovs\-vsctl clear Interface eth0 cfm_mpid" .SS "NetFlow" .PP Configure bridge \fBbr0\fR to send NetFlow records to UDP port 5566 on host 192.168.0.34, with an active timeout of 30 seconds: .IP .B "ovs\-vsctl \-\- set Bridge br0 netflow=@nf \(rs" .IP .B "\-\- \-\-id=@nf create NetFlow targets=\(rs\(dq192.168.0.34:5566\(rs\(dq active\-timeout=30" .PP Update the NetFlow configuration created by the previous command to instead use an active timeout of 60 seconds: .IP .B "ovs\-vsctl set NetFlow br0 active_timeout=60" .PP Deconfigure the NetFlow settings from \fBbr0\fR, which also destroys the NetFlow record (since it is now unreferenced): .IP .B "ovs\-vsctl clear Bridge br0 netflow" .SS "sFlow" .PP Configure bridge \fBbr0\fR to send sFlow records to a collector on 10.0.0.1 at port 6343, using \fBeth1\fR\'s IP address as the source, with specific sampling parameters: .IP .B "ovs\-vsctl \-\- \-\-id=@s create sFlow agent=eth1 target=\(rs\(dq10.0.0.1:6343\(rs\(dq header=128 sampling=64 polling=10 \(rs" .IP .B "\-\- set Bridge br0 sflow=@s" .PP Deconfigure sFlow from \fBbr0\fR, which also destroys the sFlow record (since it is now unreferenced): .IP .B "ovs\-vsctl \-\- clear Bridge br0 sflow" .SS "IPFIX" .PP Configure bridge \fBbr0\fR to send one IPFIX flow record per packet sample to UDP port 4739 on host 192.168.0.34, with Observation Domain ID 123 and Observation Point ID 456, a flow cache active timeout of 1 minute (60 seconds), and a maximum flow cache size of 13 flows: .IP .B "ovs\-vsctl \-\- set Bridge br0 ipfix=@i \(rs" .IP .B "\-\- \-\-id=@i create IPFIX targets=\(rs\(dq192.168.0.34:4739\(rs\(dq obs_domain_id=123 obs_point_id=456 cache_active_timeout=60 cache_max_flows=13" .PP Deconfigure the IPFIX settings from \fBbr0\fR, which also destroys the IPFIX record (since it is now unreferenced): .IP .B "ovs\-vsctl clear Bridge br0 ipfix" .SS "802.1D Spanning Tree Protocol (STP)" .PP Configure bridge \fBbr0\fR to participate in an 802.1D spanning tree: .IP .B "ovs\-vsctl set Bridge br0 stp_enable=true" .PP Set the bridge priority of \fBbr0\fR to 0x7800: .IP .B "ovs\-vsctl set Bridge br0 other_config:stp-priority=0x7800" .PP Set the path cost of port \fBeth0\fR to 10: .IP .B "ovs\-vsctl set Port eth0 other_config:stp-path-cost=10" .PP Deconfigure STP from above: .IP .B "ovs\-vsctl clear Bridge br0 stp_enable" .PP .SS "OpenFlow Version" .PP Configure bridge \fBbr0\fR to support OpenFlow versions 1.0, 1.2, and 1.3: .IP .B "ovs\-vsctl set bridge br0 protocols=openflow10,openflow12,openflow13" . .SH "EXIT STATUS" .IP "0" Successful program execution. .IP "1" Usage, syntax, or configuration file error. .IP "2" The \fIbridge\fR argument to \fBbr\-exists\fR specified the name of a bridge that does not exist. .SH "SEE ALSO" . .BR ovsdb\-server (1), .BR ovs\-vswitchd (8), .BR ovs\-vswitchd.conf.db (5). openvswitch-2.0.1+git20140120/utilities/ovs-vsctl.c000066400000000000000000003724051226605124000216040ustar00rootroot00000000000000/* * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include #include #include #include "command-line.h" #include "compiler.h" #include "dirs.h" #include "dynamic-string.h" #include "hash.h" #include "json.h" #include "ovsdb-data.h" #include "ovsdb-idl.h" #include "poll-loop.h" #include "process.h" #include "stream.h" #include "stream-ssl.h" #include "smap.h" #include "sset.h" #include "svec.h" #include "lib/vswitch-idl.h" #include "table.h" #include "timeval.h" #include "util.h" #include "vconn.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(vsctl); /* vsctl_fatal() also logs the error, so it is preferred in this file. */ #define ovs_fatal please_use_vsctl_fatal_instead_of_ovs_fatal struct vsctl_context; /* A command supported by ovs-vsctl. */ struct vsctl_command_syntax { const char *name; /* e.g. "add-br" */ int min_args; /* Min number of arguments following name. */ int max_args; /* Max number of arguments following name. */ /* If nonnull, calls ovsdb_idl_add_column() or ovsdb_idl_add_table() for * each column or table in ctx->idl that it uses. */ void (*prerequisites)(struct vsctl_context *ctx); /* Does the actual work of the command and puts the command's output, if * any, in ctx->output or ctx->table. * * Alternatively, if some prerequisite of the command is not met and the * caller should wait for something to change and then retry, it may set * ctx->try_again to true. (Only the "wait-until" command currently does * this.) */ void (*run)(struct vsctl_context *ctx); /* If nonnull, called after the transaction has been successfully * committed. ctx->output is the output from the "run" function, which * this function may modify and otherwise postprocess as needed. (Only the * "create" command currently does any postprocessing.) */ void (*postprocess)(struct vsctl_context *ctx); /* A comma-separated list of supported options, e.g. "--a,--b", or the * empty string if the command does not support any options. */ const char *options; enum { RO, RW } mode; /* Does this command modify the database? */ }; struct vsctl_command { /* Data that remains constant after initialization. */ const struct vsctl_command_syntax *syntax; int argc; char **argv; struct shash options; /* Data modified by commands. */ struct ds output; struct table *table; }; /* --db: The database server to contact. */ static const char *db; /* --oneline: Write each command's output as a single line? */ static bool oneline; /* --dry-run: Do not commit any changes. */ static bool dry_run; /* --no-wait: Wait for ovs-vswitchd to reload its configuration? */ static bool wait_for_reload = true; /* --timeout: Time to wait for a connection to 'db'. */ static int timeout; /* --retry: If true, ovs-vsctl will retry connecting to the database forever. * If false and --db says to use an active connection method (e.g. "unix:", * "tcp:", "ssl:"), then ovs-vsctl will try to connect once and exit with an * error if the database server cannot be contacted (e.g. ovsdb-server is not * running). * * Regardless of this setting, --timeout always limits how long ovs-vsctl will * wait. */ static bool retry; /* Format for table output. */ static struct table_style table_style = TABLE_STYLE_DEFAULT; /* All supported commands. */ static const struct vsctl_command_syntax *get_all_commands(void); /* The IDL we're using and the current transaction, if any. * This is for use by vsctl_exit() only, to allow it to clean up. * Other code should use its context arguments. */ static struct ovsdb_idl *the_idl; static struct ovsdb_idl_txn *the_idl_txn; static void vsctl_exit(int status) NO_RETURN; static void vsctl_fatal(const char *, ...) PRINTF_FORMAT(1, 2) NO_RETURN; static char *default_db(void); static void usage(void) NO_RETURN; static void parse_options(int argc, char *argv[], struct shash *local_options); static bool might_write_to_db(char **argv); static struct vsctl_command *parse_commands(int argc, char *argv[], struct shash *local_options, size_t *n_commandsp); static void parse_command(int argc, char *argv[], struct shash *local_options, struct vsctl_command *); static const struct vsctl_command_syntax *find_command(const char *name); static void run_prerequisites(struct vsctl_command[], size_t n_commands, struct ovsdb_idl *); static void do_vsctl(const char *args, struct vsctl_command *, size_t n, struct ovsdb_idl *); static const struct vsctl_table_class *get_table(const char *table_name); static void set_column(const struct vsctl_table_class *, const struct ovsdb_idl_row *, const char *arg, struct ovsdb_symbol_table *); static bool is_condition_satisfied(const struct vsctl_table_class *, const struct ovsdb_idl_row *, const char *arg, struct ovsdb_symbol_table *); int main(int argc, char *argv[]) { extern struct vlog_module VLM_reconnect; struct ovsdb_idl *idl; struct vsctl_command *commands; struct shash local_options; unsigned int seqno; size_t n_commands; char *args; set_program_name(argv[0]); signal(SIGPIPE, SIG_IGN); vlog_set_levels(NULL, VLF_CONSOLE, VLL_WARN); vlog_set_levels(&VLM_reconnect, VLF_ANY_FACILITY, VLL_WARN); ovsrec_init(); /* Log our arguments. This is often valuable for debugging systems. */ args = process_escape_args(argv); VLOG(might_write_to_db(argv) ? VLL_INFO : VLL_DBG, "Called as %s", args); /* Parse command line. */ shash_init(&local_options); parse_options(argc, argv, &local_options); commands = parse_commands(argc - optind, argv + optind, &local_options, &n_commands); if (timeout) { time_alarm(timeout); } /* Initialize IDL. */ idl = the_idl = ovsdb_idl_create(db, &ovsrec_idl_class, false, retry); run_prerequisites(commands, n_commands, idl); /* Execute the commands. * * 'seqno' is the database sequence number for which we last tried to * execute our transaction. There's no point in trying to commit more than * once for any given sequence number, because if the transaction fails * it's because the database changed and we need to obtain an up-to-date * view of the database before we try the transaction again. */ seqno = ovsdb_idl_get_seqno(idl); for (;;) { ovsdb_idl_run(idl); if (!ovsdb_idl_is_alive(idl)) { int retval = ovsdb_idl_get_last_error(idl); vsctl_fatal("%s: database connection failed (%s)", db, ovs_retval_to_string(retval)); } if (seqno != ovsdb_idl_get_seqno(idl)) { seqno = ovsdb_idl_get_seqno(idl); do_vsctl(args, commands, n_commands, idl); } if (seqno == ovsdb_idl_get_seqno(idl)) { ovsdb_idl_wait(idl); poll_block(); } } } static struct option * find_option(const char *name, struct option *options, size_t n_options) { size_t i; for (i = 0; i < n_options; i++) { if (!strcmp(options[i].name, name)) { return &options[i]; } } return NULL; } static struct option * add_option(struct option **optionsp, size_t *n_optionsp, size_t *allocated_optionsp) { if (*n_optionsp >= *allocated_optionsp) { *optionsp = x2nrealloc(*optionsp, allocated_optionsp, sizeof **optionsp); } return &(*optionsp)[(*n_optionsp)++]; } static void parse_options(int argc, char *argv[], struct shash *local_options) { enum { OPT_DB = UCHAR_MAX + 1, OPT_ONELINE, OPT_NO_SYSLOG, OPT_NO_WAIT, OPT_DRY_RUN, OPT_PEER_CA_CERT, OPT_LOCAL, OPT_RETRY, VLOG_OPTION_ENUMS, TABLE_OPTION_ENUMS }; static const struct option global_long_options[] = { {"db", required_argument, NULL, OPT_DB}, {"no-syslog", no_argument, NULL, OPT_NO_SYSLOG}, {"no-wait", no_argument, NULL, OPT_NO_WAIT}, {"dry-run", no_argument, NULL, OPT_DRY_RUN}, {"oneline", no_argument, NULL, OPT_ONELINE}, {"timeout", required_argument, NULL, 't'}, {"retry", no_argument, NULL, OPT_RETRY}, {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'V'}, VLOG_LONG_OPTIONS, TABLE_LONG_OPTIONS, STREAM_SSL_LONG_OPTIONS, {"peer-ca-cert", required_argument, NULL, OPT_PEER_CA_CERT}, {NULL, 0, NULL, 0}, }; const int n_global_long_options = ARRAY_SIZE(global_long_options) - 1; char *tmp, *short_options; const struct vsctl_command_syntax *p; struct option *options, *o; size_t allocated_options; size_t n_options; size_t i; tmp = long_options_to_short_options(global_long_options); short_options = xasprintf("+%s", tmp); free(tmp); /* We want to parse both global and command-specific options here, but * getopt_long() isn't too convenient for the job. We copy our global * options into a dynamic array, then append all of the command-specific * options. */ options = xmemdup(global_long_options, sizeof global_long_options); allocated_options = ARRAY_SIZE(global_long_options); n_options = n_global_long_options; for (p = get_all_commands(); p->name; p++) { if (p->options[0]) { char *save_ptr = NULL; char *name; char *s; s = xstrdup(p->options); for (name = strtok_r(s, ",", &save_ptr); name != NULL; name = strtok_r(NULL, ",", &save_ptr)) { char *equals; int has_arg; ovs_assert(name[0] == '-' && name[1] == '-' && name[2]); name += 2; equals = strchr(name, '='); if (equals) { has_arg = required_argument; *equals = '\0'; } else { has_arg = no_argument; } o = find_option(name, options, n_options); if (o) { ovs_assert(o - options >= n_global_long_options); ovs_assert(o->has_arg == has_arg); } else { o = add_option(&options, &n_options, &allocated_options); o->name = xstrdup(name); o->has_arg = has_arg; o->flag = NULL; o->val = OPT_LOCAL; } } free(s); } } o = add_option(&options, &n_options, &allocated_options); memset(o, 0, sizeof *o); table_style.format = TF_LIST; for (;;) { int idx; int c; c = getopt_long(argc, argv, short_options, options, &idx); if (c == -1) { break; } switch (c) { case OPT_DB: db = optarg; break; case OPT_ONELINE: oneline = true; break; case OPT_NO_SYSLOG: vlog_set_levels(&VLM_vsctl, VLF_SYSLOG, VLL_WARN); break; case OPT_NO_WAIT: wait_for_reload = false; break; case OPT_DRY_RUN: dry_run = true; break; case OPT_LOCAL: if (shash_find(local_options, options[idx].name)) { vsctl_fatal("'%s' option specified multiple times", options[idx].name); } shash_add_nocopy(local_options, xasprintf("--%s", options[idx].name), optarg ? xstrdup(optarg) : NULL); break; case 'h': usage(); case 'V': ovs_print_version(0, 0); exit(EXIT_SUCCESS); case 't': timeout = strtoul(optarg, NULL, 10); if (timeout < 0) { vsctl_fatal("value %s on -t or --timeout is invalid", optarg); } break; case OPT_RETRY: retry = true; break; VLOG_OPTION_HANDLERS TABLE_OPTION_HANDLERS(&table_style) STREAM_SSL_OPTION_HANDLERS case OPT_PEER_CA_CERT: stream_ssl_set_peer_ca_cert_file(optarg); break; case '?': exit(EXIT_FAILURE); default: abort(); } } free(short_options); if (!db) { db = default_db(); } for (i = n_global_long_options; options[i].name; i++) { free(CONST_CAST(char *, options[i].name)); } free(options); } static struct vsctl_command * parse_commands(int argc, char *argv[], struct shash *local_options, size_t *n_commandsp) { struct vsctl_command *commands; size_t n_commands, allocated_commands; int i, start; commands = NULL; n_commands = allocated_commands = 0; for (start = i = 0; i <= argc; i++) { if (i == argc || !strcmp(argv[i], "--")) { if (i > start) { if (n_commands >= allocated_commands) { struct vsctl_command *c; commands = x2nrealloc(commands, &allocated_commands, sizeof *commands); for (c = commands; c < &commands[n_commands]; c++) { shash_moved(&c->options); } } parse_command(i - start, &argv[start], local_options, &commands[n_commands++]); } else if (!shash_is_empty(local_options)) { vsctl_fatal("missing command name (use --help for help)"); } start = i + 1; } } if (!n_commands) { vsctl_fatal("missing command name (use --help for help)"); } *n_commandsp = n_commands; return commands; } static void parse_command(int argc, char *argv[], struct shash *local_options, struct vsctl_command *command) { const struct vsctl_command_syntax *p; struct shash_node *node; int n_arg; int i; shash_init(&command->options); shash_swap(local_options, &command->options); for (i = 0; i < argc; i++) { const char *option = argv[i]; const char *equals; char *key, *value; if (option[0] != '-') { break; } equals = strchr(option, '='); if (equals) { key = xmemdup0(option, equals - option); value = xstrdup(equals + 1); } else { key = xstrdup(option); value = NULL; } if (shash_find(&command->options, key)) { vsctl_fatal("'%s' option specified multiple times", argv[i]); } shash_add_nocopy(&command->options, key, value); } if (i == argc) { vsctl_fatal("missing command name (use --help for help)"); } p = find_command(argv[i]); if (!p) { vsctl_fatal("unknown command '%s'; use --help for help", argv[i]); } SHASH_FOR_EACH (node, &command->options) { const char *s = strstr(p->options, node->name); int end = s ? s[strlen(node->name)] : EOF; if (end != '=' && end != ',' && end != ' ' && end != '\0') { vsctl_fatal("'%s' command has no '%s' option", argv[i], node->name); } if ((end == '=') != (node->data != NULL)) { if (end == '=') { vsctl_fatal("missing argument to '%s' option on '%s' " "command", node->name, argv[i]); } else { vsctl_fatal("'%s' option on '%s' does not accept an " "argument", node->name, argv[i]); } } } n_arg = argc - i - 1; if (n_arg < p->min_args) { vsctl_fatal("'%s' command requires at least %d arguments", p->name, p->min_args); } else if (n_arg > p->max_args) { int j; for (j = i + 1; j < argc; j++) { if (argv[j][0] == '-') { vsctl_fatal("'%s' command takes at most %d arguments " "(note that options must precede command " "names and follow a \"--\" argument)", p->name, p->max_args); } } vsctl_fatal("'%s' command takes at most %d arguments", p->name, p->max_args); } command->syntax = p; command->argc = n_arg + 1; command->argv = &argv[i]; } /* Returns the "struct vsctl_command_syntax" for a given command 'name', or a * null pointer if there is none. */ static const struct vsctl_command_syntax * find_command(const char *name) { static struct shash commands = SHASH_INITIALIZER(&commands); if (shash_is_empty(&commands)) { const struct vsctl_command_syntax *p; for (p = get_all_commands(); p->name; p++) { shash_add_assert(&commands, p->name, p); } } return shash_find_data(&commands, name); } static void vsctl_fatal(const char *format, ...) { char *message; va_list args; va_start(args, format); message = xvasprintf(format, args); va_end(args); vlog_set_levels(&VLM_vsctl, VLF_CONSOLE, VLL_OFF); VLOG_ERR("%s", message); ovs_error(0, "%s", message); vsctl_exit(EXIT_FAILURE); } /* Frees the current transaction and the underlying IDL and then calls * exit(status). * * Freeing the transaction and the IDL is not strictly necessary, but it makes * for a clean memory leak report from valgrind in the normal case. That makes * it easier to notice real memory leaks. */ static void vsctl_exit(int status) { if (the_idl_txn) { ovsdb_idl_txn_abort(the_idl_txn); ovsdb_idl_txn_destroy(the_idl_txn); } ovsdb_idl_destroy(the_idl); exit(status); } static void usage(void) { printf("\ %s: ovs-vswitchd management utility\n\ usage: %s [OPTIONS] COMMAND [ARG...]\n\ \n\ Open vSwitch commands:\n\ init initialize database, if not yet initialized\n\ show print overview of database contents\n\ emer-reset reset configuration to clean state\n\ \n\ Bridge commands:\n\ add-br BRIDGE create a new bridge named BRIDGE\n\ add-br BRIDGE PARENT VLAN create new fake BRIDGE in PARENT on VLAN\n\ del-br BRIDGE delete BRIDGE and all of its ports\n\ list-br print the names of all the bridges\n\ br-exists BRIDGE exit 2 if BRIDGE does not exist\n\ br-to-vlan BRIDGE print the VLAN which BRIDGE is on\n\ br-to-parent BRIDGE print the parent of BRIDGE\n\ br-set-external-id BRIDGE KEY VALUE set KEY on BRIDGE to VALUE\n\ br-set-external-id BRIDGE KEY unset KEY on BRIDGE\n\ br-get-external-id BRIDGE KEY print value of KEY on BRIDGE\n\ br-get-external-id BRIDGE list key-value pairs on BRIDGE\n\ \n\ Port commands (a bond is considered to be a single port):\n\ list-ports BRIDGE print the names of all the ports on BRIDGE\n\ add-port BRIDGE PORT add network device PORT to BRIDGE\n\ add-bond BRIDGE PORT IFACE... add bonded port PORT in BRIDGE from IFACES\n\ del-port [BRIDGE] PORT delete PORT (which may be bonded) from BRIDGE\n\ port-to-br PORT print name of bridge that contains PORT\n\ \n\ Interface commands (a bond consists of multiple interfaces):\n\ list-ifaces BRIDGE print the names of all interfaces on BRIDGE\n\ iface-to-br IFACE print name of bridge that contains IFACE\n\ \n\ Controller commands:\n\ get-controller BRIDGE print the controllers for BRIDGE\n\ del-controller BRIDGE delete the controllers for BRIDGE\n\ set-controller BRIDGE TARGET... set the controllers for BRIDGE\n\ get-fail-mode BRIDGE print the fail-mode for BRIDGE\n\ del-fail-mode BRIDGE delete the fail-mode for BRIDGE\n\ set-fail-mode BRIDGE MODE set the fail-mode for BRIDGE to MODE\n\ \n\ Manager commands:\n\ get-manager print the managers\n\ del-manager delete the managers\n\ set-manager TARGET... set the list of managers to TARGET...\n\ \n\ SSL commands:\n\ get-ssl print the SSL configuration\n\ del-ssl delete the SSL configuration\n\ set-ssl PRIV-KEY CERT CA-CERT set the SSL configuration\n\ \n\ Switch commands:\n\ emer-reset reset switch to known good state\n\ \n\ Database commands:\n\ list TBL [REC] list RECord (or all records) in TBL\n\ find TBL CONDITION... list records satisfying CONDITION in TBL\n\ get TBL REC COL[:KEY] print values of COLumns in RECord in TBL\n\ set TBL REC COL[:KEY]=VALUE set COLumn values in RECord in TBL\n\ add TBL REC COL [KEY=]VALUE add (KEY=)VALUE to COLumn in RECord in TBL\n\ remove TBL REC COL [KEY=]VALUE remove (KEY=)VALUE from COLumn\n\ clear TBL REC COL clear values from COLumn in RECord in TBL\n\ create TBL COL[:KEY]=VALUE create and initialize new record\n\ destroy TBL REC delete RECord from TBL\n\ wait-until TBL REC [COL[:KEY]=VALUE] wait until condition is true\n\ Potentially unsafe database commands require --force option.\n\ \n\ Options:\n\ --db=DATABASE connect to DATABASE\n\ (default: %s)\n\ --no-wait do not wait for ovs-vswitchd to reconfigure\n\ --retry keep trying to connect to server forever\n\ -t, --timeout=SECS wait at most SECS seconds for ovs-vswitchd\n\ --dry-run do not commit changes to database\n\ --oneline print exactly one line of output per command\n", program_name, program_name, default_db()); vlog_usage(); printf("\ --no-syslog equivalent to --verbose=vsctl:syslog:warn\n"); stream_usage("database", true, true, false); printf("\n\ Other options:\n\ -h, --help display this help message\n\ -V, --version display version information\n"); exit(EXIT_SUCCESS); } static char * default_db(void) { static char *def; if (!def) { def = xasprintf("unix:%s/db.sock", ovs_rundir()); } return def; } /* Returns true if it looks like this set of arguments might modify the * database, otherwise false. (Not very smart, so it's prone to false * positives.) */ static bool might_write_to_db(char **argv) { for (; *argv; argv++) { const struct vsctl_command_syntax *p = find_command(*argv); if (p && p->mode == RW) { return true; } } return false; } struct vsctl_context { /* Read-only. */ int argc; char **argv; struct shash options; /* Modifiable state. */ struct ds output; struct table *table; struct ovsdb_idl *idl; struct ovsdb_idl_txn *txn; struct ovsdb_symbol_table *symtab; const struct ovsrec_open_vswitch *ovs; bool verified_ports; /* A cache of the contents of the database. * * A command that needs to use any of this information must first call * vsctl_context_populate_cache(). A command that changes anything that * could invalidate the cache must either call * vsctl_context_invalidate_cache() or manually update the cache to * maintain its correctness. */ bool cache_valid; struct shash bridges; /* Maps from bridge name to struct vsctl_bridge. */ struct shash ports; /* Maps from port name to struct vsctl_port. */ struct shash ifaces; /* Maps from port name to struct vsctl_iface. */ /* A command may set this member to true if some prerequisite is not met * and the caller should wait for something to change and then retry. */ bool try_again; }; struct vsctl_bridge { struct ovsrec_bridge *br_cfg; char *name; struct list ports; /* Contains "struct vsctl_port"s. */ /* VLAN ("fake") bridge support. * * Use 'parent != NULL' to detect a fake bridge, because 'vlan' can be 0 * in either case. */ struct hmap children; /* VLAN bridges indexed by 'vlan'. */ struct hmap_node children_node; /* Node in parent's 'children' hmap. */ struct vsctl_bridge *parent; /* Real bridge, or NULL. */ int vlan; /* VLAN VID (0...4095), or 0. */ }; struct vsctl_port { struct list ports_node; /* In struct vsctl_bridge's 'ports' list. */ struct list ifaces; /* Contains "struct vsctl_iface"s. */ struct ovsrec_port *port_cfg; struct vsctl_bridge *bridge; }; struct vsctl_iface { struct list ifaces_node; /* In struct vsctl_port's 'ifaces' list. */ struct ovsrec_interface *iface_cfg; struct vsctl_port *port; }; static char * vsctl_context_to_string(const struct vsctl_context *ctx) { const struct shash_node *node; struct svec words; char *s; int i; svec_init(&words); SHASH_FOR_EACH (node, &ctx->options) { svec_add(&words, node->name); } for (i = 0; i < ctx->argc; i++) { svec_add(&words, ctx->argv[i]); } svec_terminate(&words); s = process_escape_args(words.names); svec_destroy(&words); return s; } static void verify_ports(struct vsctl_context *ctx) { if (!ctx->verified_ports) { const struct ovsrec_bridge *bridge; const struct ovsrec_port *port; ovsrec_open_vswitch_verify_bridges(ctx->ovs); OVSREC_BRIDGE_FOR_EACH (bridge, ctx->idl) { ovsrec_bridge_verify_ports(bridge); } OVSREC_PORT_FOR_EACH (port, ctx->idl) { ovsrec_port_verify_interfaces(port); } ctx->verified_ports = true; } } static struct vsctl_bridge * add_bridge_to_cache(struct vsctl_context *ctx, struct ovsrec_bridge *br_cfg, const char *name, struct vsctl_bridge *parent, int vlan) { struct vsctl_bridge *br = xmalloc(sizeof *br); br->br_cfg = br_cfg; br->name = xstrdup(name); list_init(&br->ports); br->parent = parent; br->vlan = vlan; hmap_init(&br->children); if (parent) { hmap_insert(&parent->children, &br->children_node, hash_int(vlan, 0)); } shash_add(&ctx->bridges, br->name, br); return br; } static void ovs_delete_bridge(const struct ovsrec_open_vswitch *ovs, struct ovsrec_bridge *bridge) { struct ovsrec_bridge **bridges; size_t i, n; bridges = xmalloc(sizeof *ovs->bridges * ovs->n_bridges); for (i = n = 0; i < ovs->n_bridges; i++) { if (ovs->bridges[i] != bridge) { bridges[n++] = ovs->bridges[i]; } } ovsrec_open_vswitch_set_bridges(ovs, bridges, n); free(bridges); } static void del_cached_bridge(struct vsctl_context *ctx, struct vsctl_bridge *br) { ovs_assert(list_is_empty(&br->ports)); ovs_assert(hmap_is_empty(&br->children)); if (br->parent) { hmap_remove(&br->parent->children, &br->children_node); } if (br->br_cfg) { ovsrec_bridge_delete(br->br_cfg); ovs_delete_bridge(ctx->ovs, br->br_cfg); } shash_find_and_delete(&ctx->bridges, br->name); hmap_destroy(&br->children); free(br->name); free(br); } static bool port_is_fake_bridge(const struct ovsrec_port *port_cfg) { return (port_cfg->fake_bridge && port_cfg->tag && *port_cfg->tag >= 0 && *port_cfg->tag <= 4095); } static struct vsctl_bridge * find_vlan_bridge(struct vsctl_bridge *parent, int vlan) { struct vsctl_bridge *child; HMAP_FOR_EACH_IN_BUCKET (child, children_node, hash_int(vlan, 0), &parent->children) { if (child->vlan == vlan) { return child; } } return NULL; } static struct vsctl_port * add_port_to_cache(struct vsctl_context *ctx, struct vsctl_bridge *parent, struct ovsrec_port *port_cfg) { struct vsctl_port *port; if (port_cfg->tag && *port_cfg->tag >= 0 && *port_cfg->tag <= 4095) { struct vsctl_bridge *vlan_bridge; vlan_bridge = find_vlan_bridge(parent, *port_cfg->tag); if (vlan_bridge) { parent = vlan_bridge; } } port = xmalloc(sizeof *port); list_push_back(&parent->ports, &port->ports_node); list_init(&port->ifaces); port->port_cfg = port_cfg; port->bridge = parent; shash_add(&ctx->ports, port_cfg->name, port); return port; } static void del_cached_port(struct vsctl_context *ctx, struct vsctl_port *port) { ovs_assert(list_is_empty(&port->ifaces)); list_remove(&port->ports_node); shash_find_and_delete(&ctx->ports, port->port_cfg->name); ovsrec_port_delete(port->port_cfg); free(port); } static struct vsctl_iface * add_iface_to_cache(struct vsctl_context *ctx, struct vsctl_port *parent, struct ovsrec_interface *iface_cfg) { struct vsctl_iface *iface; iface = xmalloc(sizeof *iface); list_push_back(&parent->ifaces, &iface->ifaces_node); iface->iface_cfg = iface_cfg; iface->port = parent; shash_add(&ctx->ifaces, iface_cfg->name, iface); return iface; } static void del_cached_iface(struct vsctl_context *ctx, struct vsctl_iface *iface) { list_remove(&iface->ifaces_node); shash_find_and_delete(&ctx->ifaces, iface->iface_cfg->name); ovsrec_interface_delete(iface->iface_cfg); free(iface); } static void vsctl_context_invalidate_cache(struct vsctl_context *ctx) { struct shash_node *node; if (!ctx->cache_valid) { return; } ctx->cache_valid = false; SHASH_FOR_EACH (node, &ctx->bridges) { struct vsctl_bridge *bridge = node->data; hmap_destroy(&bridge->children); free(bridge->name); free(bridge); } shash_destroy(&ctx->bridges); shash_destroy_free_data(&ctx->ports); shash_destroy_free_data(&ctx->ifaces); } static void pre_get_info(struct vsctl_context *ctx) { ovsdb_idl_add_column(ctx->idl, &ovsrec_open_vswitch_col_bridges); ovsdb_idl_add_column(ctx->idl, &ovsrec_bridge_col_name); ovsdb_idl_add_column(ctx->idl, &ovsrec_bridge_col_controller); ovsdb_idl_add_column(ctx->idl, &ovsrec_bridge_col_fail_mode); ovsdb_idl_add_column(ctx->idl, &ovsrec_bridge_col_ports); ovsdb_idl_add_column(ctx->idl, &ovsrec_port_col_name); ovsdb_idl_add_column(ctx->idl, &ovsrec_port_col_fake_bridge); ovsdb_idl_add_column(ctx->idl, &ovsrec_port_col_tag); ovsdb_idl_add_column(ctx->idl, &ovsrec_port_col_interfaces); ovsdb_idl_add_column(ctx->idl, &ovsrec_interface_col_name); } static void vsctl_context_populate_cache(struct vsctl_context *ctx) { const struct ovsrec_open_vswitch *ovs = ctx->ovs; struct sset bridges, ports; size_t i; if (ctx->cache_valid) { /* Cache is already populated. */ return; } ctx->cache_valid = true; shash_init(&ctx->bridges); shash_init(&ctx->ports); shash_init(&ctx->ifaces); sset_init(&bridges); sset_init(&ports); for (i = 0; i < ovs->n_bridges; i++) { struct ovsrec_bridge *br_cfg = ovs->bridges[i]; struct vsctl_bridge *br; size_t j; if (!sset_add(&bridges, br_cfg->name)) { VLOG_WARN("%s: database contains duplicate bridge name", br_cfg->name); continue; } br = add_bridge_to_cache(ctx, br_cfg, br_cfg->name, NULL, 0); if (!br) { continue; } for (j = 0; j < br_cfg->n_ports; j++) { struct ovsrec_port *port_cfg = br_cfg->ports[j]; if (!sset_add(&ports, port_cfg->name)) { /* Duplicate port name. (We will warn about that later.) */ continue; } if (port_is_fake_bridge(port_cfg) && sset_add(&bridges, port_cfg->name)) { add_bridge_to_cache(ctx, NULL, port_cfg->name, br, *port_cfg->tag); } } } sset_destroy(&bridges); sset_destroy(&ports); sset_init(&bridges); for (i = 0; i < ovs->n_bridges; i++) { struct ovsrec_bridge *br_cfg = ovs->bridges[i]; struct vsctl_bridge *br; size_t j; if (!sset_add(&bridges, br_cfg->name)) { continue; } br = shash_find_data(&ctx->bridges, br_cfg->name); for (j = 0; j < br_cfg->n_ports; j++) { struct ovsrec_port *port_cfg = br_cfg->ports[j]; struct vsctl_port *port; size_t k; port = shash_find_data(&ctx->ports, port_cfg->name); if (port) { if (port_cfg == port->port_cfg) { VLOG_WARN("%s: port is in multiple bridges (%s and %s)", port_cfg->name, br->name, port->bridge->name); } else { /* Log as an error because this violates the database's * uniqueness constraints, so the database server shouldn't * have allowed it. */ VLOG_ERR("%s: database contains duplicate port name", port_cfg->name); } continue; } if (port_is_fake_bridge(port_cfg) && !sset_add(&bridges, port_cfg->name)) { continue; } port = add_port_to_cache(ctx, br, port_cfg); for (k = 0; k < port_cfg->n_interfaces; k++) { struct ovsrec_interface *iface_cfg = port_cfg->interfaces[k]; struct vsctl_iface *iface; iface = shash_find_data(&ctx->ifaces, iface_cfg->name); if (iface) { if (iface_cfg == iface->iface_cfg) { VLOG_WARN("%s: interface is in multiple ports " "(%s and %s)", iface_cfg->name, iface->port->port_cfg->name, port->port_cfg->name); } else { /* Log as an error because this violates the database's * uniqueness constraints, so the database server * shouldn't have allowed it. */ VLOG_ERR("%s: database contains duplicate interface " "name", iface_cfg->name); } continue; } add_iface_to_cache(ctx, port, iface_cfg); } } } sset_destroy(&bridges); } static void check_conflicts(struct vsctl_context *ctx, const char *name, char *msg) { struct vsctl_iface *iface; struct vsctl_port *port; verify_ports(ctx); if (shash_find(&ctx->bridges, name)) { vsctl_fatal("%s because a bridge named %s already exists", msg, name); } port = shash_find_data(&ctx->ports, name); if (port) { vsctl_fatal("%s because a port named %s already exists on " "bridge %s", msg, name, port->bridge->name); } iface = shash_find_data(&ctx->ifaces, name); if (iface) { vsctl_fatal("%s because an interface named %s already exists " "on bridge %s", msg, name, iface->port->bridge->name); } free(msg); } static struct vsctl_bridge * find_bridge(struct vsctl_context *ctx, const char *name, bool must_exist) { struct vsctl_bridge *br; ovs_assert(ctx->cache_valid); br = shash_find_data(&ctx->bridges, name); if (must_exist && !br) { vsctl_fatal("no bridge named %s", name); } ovsrec_open_vswitch_verify_bridges(ctx->ovs); return br; } static struct vsctl_bridge * find_real_bridge(struct vsctl_context *ctx, const char *name, bool must_exist) { struct vsctl_bridge *br = find_bridge(ctx, name, must_exist); if (br && br->parent) { vsctl_fatal("%s is a fake bridge", name); } return br; } static struct vsctl_port * find_port(struct vsctl_context *ctx, const char *name, bool must_exist) { struct vsctl_port *port; ovs_assert(ctx->cache_valid); port = shash_find_data(&ctx->ports, name); if (port && !strcmp(name, port->bridge->name)) { port = NULL; } if (must_exist && !port) { vsctl_fatal("no port named %s", name); } verify_ports(ctx); return port; } static struct vsctl_iface * find_iface(struct vsctl_context *ctx, const char *name, bool must_exist) { struct vsctl_iface *iface; ovs_assert(ctx->cache_valid); iface = shash_find_data(&ctx->ifaces, name); if (iface && !strcmp(name, iface->port->bridge->name)) { iface = NULL; } if (must_exist && !iface) { vsctl_fatal("no interface named %s", name); } verify_ports(ctx); return iface; } static void bridge_insert_port(struct ovsrec_bridge *br, struct ovsrec_port *port) { struct ovsrec_port **ports; size_t i; ports = xmalloc(sizeof *br->ports * (br->n_ports + 1)); for (i = 0; i < br->n_ports; i++) { ports[i] = br->ports[i]; } ports[br->n_ports] = port; ovsrec_bridge_set_ports(br, ports, br->n_ports + 1); free(ports); } static void bridge_delete_port(struct ovsrec_bridge *br, struct ovsrec_port *port) { struct ovsrec_port **ports; size_t i, n; ports = xmalloc(sizeof *br->ports * br->n_ports); for (i = n = 0; i < br->n_ports; i++) { if (br->ports[i] != port) { ports[n++] = br->ports[i]; } } ovsrec_bridge_set_ports(br, ports, n); free(ports); } static void ovs_insert_bridge(const struct ovsrec_open_vswitch *ovs, struct ovsrec_bridge *bridge) { struct ovsrec_bridge **bridges; size_t i; bridges = xmalloc(sizeof *ovs->bridges * (ovs->n_bridges + 1)); for (i = 0; i < ovs->n_bridges; i++) { bridges[i] = ovs->bridges[i]; } bridges[ovs->n_bridges] = bridge; ovsrec_open_vswitch_set_bridges(ovs, bridges, ovs->n_bridges + 1); free(bridges); } static void cmd_init(struct vsctl_context *ctx OVS_UNUSED) { } struct cmd_show_table { const struct ovsdb_idl_table_class *table; const struct ovsdb_idl_column *name_column; const struct ovsdb_idl_column *columns[3]; bool recurse; }; static struct cmd_show_table cmd_show_tables[] = { {&ovsrec_table_open_vswitch, NULL, {&ovsrec_open_vswitch_col_manager_options, &ovsrec_open_vswitch_col_bridges, &ovsrec_open_vswitch_col_ovs_version}, false}, {&ovsrec_table_bridge, &ovsrec_bridge_col_name, {&ovsrec_bridge_col_controller, &ovsrec_bridge_col_fail_mode, &ovsrec_bridge_col_ports}, false}, {&ovsrec_table_port, &ovsrec_port_col_name, {&ovsrec_port_col_tag, &ovsrec_port_col_trunks, &ovsrec_port_col_interfaces}, false}, {&ovsrec_table_interface, &ovsrec_interface_col_name, {&ovsrec_interface_col_type, &ovsrec_interface_col_options, NULL}, false}, {&ovsrec_table_controller, &ovsrec_controller_col_target, {&ovsrec_controller_col_is_connected, NULL, NULL}, false}, {&ovsrec_table_manager, &ovsrec_manager_col_target, {&ovsrec_manager_col_is_connected, NULL, NULL}, false}, }; static void pre_cmd_show(struct vsctl_context *ctx) { struct cmd_show_table *show; for (show = cmd_show_tables; show < &cmd_show_tables[ARRAY_SIZE(cmd_show_tables)]; show++) { size_t i; ovsdb_idl_add_table(ctx->idl, show->table); if (show->name_column) { ovsdb_idl_add_column(ctx->idl, show->name_column); } for (i = 0; i < ARRAY_SIZE(show->columns); i++) { const struct ovsdb_idl_column *column = show->columns[i]; if (column) { ovsdb_idl_add_column(ctx->idl, column); } } } } static struct cmd_show_table * cmd_show_find_table_by_row(const struct ovsdb_idl_row *row) { struct cmd_show_table *show; for (show = cmd_show_tables; show < &cmd_show_tables[ARRAY_SIZE(cmd_show_tables)]; show++) { if (show->table == row->table->class) { return show; } } return NULL; } static struct cmd_show_table * cmd_show_find_table_by_name(const char *name) { struct cmd_show_table *show; for (show = cmd_show_tables; show < &cmd_show_tables[ARRAY_SIZE(cmd_show_tables)]; show++) { if (!strcmp(show->table->name, name)) { return show; } } return NULL; } static void cmd_show_row(struct vsctl_context *ctx, const struct ovsdb_idl_row *row, int level) { struct cmd_show_table *show = cmd_show_find_table_by_row(row); size_t i; ds_put_char_multiple(&ctx->output, ' ', level * 4); if (show && show->name_column) { const struct ovsdb_datum *datum; ds_put_format(&ctx->output, "%s ", show->table->name); datum = ovsdb_idl_read(row, show->name_column); ovsdb_datum_to_string(datum, &show->name_column->type, &ctx->output); } else { ds_put_format(&ctx->output, UUID_FMT, UUID_ARGS(&row->uuid)); } ds_put_char(&ctx->output, '\n'); if (!show || show->recurse) { return; } show->recurse = true; for (i = 0; i < ARRAY_SIZE(show->columns); i++) { const struct ovsdb_idl_column *column = show->columns[i]; const struct ovsdb_datum *datum; if (!column) { break; } datum = ovsdb_idl_read(row, column); if (column->type.key.type == OVSDB_TYPE_UUID && column->type.key.u.uuid.refTableName) { struct cmd_show_table *ref_show; size_t j; ref_show = cmd_show_find_table_by_name( column->type.key.u.uuid.refTableName); if (ref_show) { for (j = 0; j < datum->n; j++) { const struct ovsdb_idl_row *ref_row; ref_row = ovsdb_idl_get_row_for_uuid(ctx->idl, ref_show->table, &datum->keys[j].uuid); if (ref_row) { cmd_show_row(ctx, ref_row, level + 1); } } continue; } } if (!ovsdb_datum_is_default(datum, &column->type)) { ds_put_char_multiple(&ctx->output, ' ', (level + 1) * 4); ds_put_format(&ctx->output, "%s: ", column->name); ovsdb_datum_to_string(datum, &column->type, &ctx->output); ds_put_char(&ctx->output, '\n'); } } show->recurse = false; } static void cmd_show(struct vsctl_context *ctx) { const struct ovsdb_idl_row *row; for (row = ovsdb_idl_first_row(ctx->idl, cmd_show_tables[0].table); row; row = ovsdb_idl_next_row(row)) { cmd_show_row(ctx, row, 0); } } static void pre_cmd_emer_reset(struct vsctl_context *ctx) { ovsdb_idl_add_column(ctx->idl, &ovsrec_open_vswitch_col_manager_options); ovsdb_idl_add_column(ctx->idl, &ovsrec_open_vswitch_col_ssl); ovsdb_idl_add_column(ctx->idl, &ovsrec_bridge_col_controller); ovsdb_idl_add_column(ctx->idl, &ovsrec_bridge_col_fail_mode); ovsdb_idl_add_column(ctx->idl, &ovsrec_bridge_col_mirrors); ovsdb_idl_add_column(ctx->idl, &ovsrec_bridge_col_netflow); ovsdb_idl_add_column(ctx->idl, &ovsrec_bridge_col_sflow); ovsdb_idl_add_column(ctx->idl, &ovsrec_bridge_col_ipfix); ovsdb_idl_add_column(ctx->idl, &ovsrec_bridge_col_flood_vlans); ovsdb_idl_add_column(ctx->idl, &ovsrec_bridge_col_other_config); ovsdb_idl_add_column(ctx->idl, &ovsrec_port_col_other_config); ovsdb_idl_add_column(ctx->idl, &ovsrec_interface_col_ingress_policing_rate); ovsdb_idl_add_column(ctx->idl, &ovsrec_interface_col_ingress_policing_burst); } static void cmd_emer_reset(struct vsctl_context *ctx) { const struct ovsdb_idl *idl = ctx->idl; const struct ovsrec_bridge *br; const struct ovsrec_port *port; const struct ovsrec_interface *iface; const struct ovsrec_mirror *mirror, *next_mirror; const struct ovsrec_controller *ctrl, *next_ctrl; const struct ovsrec_manager *mgr, *next_mgr; const struct ovsrec_netflow *nf, *next_nf; const struct ovsrec_ssl *ssl, *next_ssl; const struct ovsrec_sflow *sflow, *next_sflow; const struct ovsrec_ipfix *ipfix, *next_ipfix; const struct ovsrec_flow_sample_collector_set *fscset, *next_fscset; /* Reset the Open_vSwitch table. */ ovsrec_open_vswitch_set_manager_options(ctx->ovs, NULL, 0); ovsrec_open_vswitch_set_ssl(ctx->ovs, NULL); OVSREC_BRIDGE_FOR_EACH (br, idl) { const char *hwaddr; ovsrec_bridge_set_controller(br, NULL, 0); ovsrec_bridge_set_fail_mode(br, NULL); ovsrec_bridge_set_mirrors(br, NULL, 0); ovsrec_bridge_set_netflow(br, NULL); ovsrec_bridge_set_sflow(br, NULL); ovsrec_bridge_set_ipfix(br, NULL); ovsrec_bridge_set_flood_vlans(br, NULL, 0); /* We only want to save the "hwaddr" key from other_config. */ hwaddr = smap_get(&br->other_config, "hwaddr"); if (hwaddr) { struct smap smap = SMAP_INITIALIZER(&smap); smap_add(&smap, "hwaddr", hwaddr); ovsrec_bridge_set_other_config(br, &smap); smap_destroy(&smap); } else { ovsrec_bridge_set_other_config(br, NULL); } } OVSREC_PORT_FOR_EACH (port, idl) { ovsrec_port_set_other_config(port, NULL); } OVSREC_INTERFACE_FOR_EACH (iface, idl) { /* xxx What do we do about gre/patch devices created by mgr? */ ovsrec_interface_set_ingress_policing_rate(iface, 0); ovsrec_interface_set_ingress_policing_burst(iface, 0); } OVSREC_MIRROR_FOR_EACH_SAFE (mirror, next_mirror, idl) { ovsrec_mirror_delete(mirror); } OVSREC_CONTROLLER_FOR_EACH_SAFE (ctrl, next_ctrl, idl) { ovsrec_controller_delete(ctrl); } OVSREC_MANAGER_FOR_EACH_SAFE (mgr, next_mgr, idl) { ovsrec_manager_delete(mgr); } OVSREC_NETFLOW_FOR_EACH_SAFE (nf, next_nf, idl) { ovsrec_netflow_delete(nf); } OVSREC_SSL_FOR_EACH_SAFE (ssl, next_ssl, idl) { ovsrec_ssl_delete(ssl); } OVSREC_SFLOW_FOR_EACH_SAFE (sflow, next_sflow, idl) { ovsrec_sflow_delete(sflow); } OVSREC_IPFIX_FOR_EACH_SAFE (ipfix, next_ipfix, idl) { ovsrec_ipfix_delete(ipfix); } OVSREC_FLOW_SAMPLE_COLLECTOR_SET_FOR_EACH_SAFE (fscset, next_fscset, idl) { ovsrec_flow_sample_collector_set_delete(fscset); } vsctl_context_invalidate_cache(ctx); } static void cmd_add_br(struct vsctl_context *ctx) { bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL; const char *br_name, *parent_name; int vlan; br_name = ctx->argv[1]; if (ctx->argc == 2) { parent_name = NULL; vlan = 0; } else if (ctx->argc == 4) { parent_name = ctx->argv[2]; vlan = atoi(ctx->argv[3]); if (vlan < 0 || vlan > 4095) { vsctl_fatal("%s: vlan must be between 0 and 4095", ctx->argv[0]); } } else { vsctl_fatal("'%s' command takes exactly 1 or 3 arguments", ctx->argv[0]); } vsctl_context_populate_cache(ctx); if (may_exist) { struct vsctl_bridge *br; br = find_bridge(ctx, br_name, false); if (br) { if (!parent_name) { if (br->parent) { vsctl_fatal("\"--may-exist add-br %s\" but %s is " "a VLAN bridge for VLAN %d", br_name, br_name, br->vlan); } } else { if (!br->parent) { vsctl_fatal("\"--may-exist add-br %s %s %d\" but %s " "is not a VLAN bridge", br_name, parent_name, vlan, br_name); } else if (strcmp(br->parent->name, parent_name)) { vsctl_fatal("\"--may-exist add-br %s %s %d\" but %s " "has the wrong parent %s", br_name, parent_name, vlan, br_name, br->parent->name); } else if (br->vlan != vlan) { vsctl_fatal("\"--may-exist add-br %s %s %d\" but %s " "is a VLAN bridge for the wrong VLAN %d", br_name, parent_name, vlan, br_name, br->vlan); } } return; } } check_conflicts(ctx, br_name, xasprintf("cannot create a bridge named %s", br_name)); if (!parent_name) { struct ovsrec_port *port; struct ovsrec_interface *iface; struct ovsrec_bridge *br; iface = ovsrec_interface_insert(ctx->txn); ovsrec_interface_set_name(iface, br_name); ovsrec_interface_set_type(iface, "internal"); port = ovsrec_port_insert(ctx->txn); ovsrec_port_set_name(port, br_name); ovsrec_port_set_interfaces(port, &iface, 1); br = ovsrec_bridge_insert(ctx->txn); ovsrec_bridge_set_name(br, br_name); ovsrec_bridge_set_ports(br, &port, 1); ovs_insert_bridge(ctx->ovs, br); } else { struct vsctl_bridge *parent; struct ovsrec_port *port; struct ovsrec_interface *iface; struct ovsrec_bridge *br; int64_t tag = vlan; parent = find_bridge(ctx, parent_name, false); if (parent && parent->parent) { vsctl_fatal("cannot create bridge with fake bridge as parent"); } if (!parent) { vsctl_fatal("parent bridge %s does not exist", parent_name); } br = parent->br_cfg; iface = ovsrec_interface_insert(ctx->txn); ovsrec_interface_set_name(iface, br_name); ovsrec_interface_set_type(iface, "internal"); port = ovsrec_port_insert(ctx->txn); ovsrec_port_set_name(port, br_name); ovsrec_port_set_interfaces(port, &iface, 1); ovsrec_port_set_fake_bridge(port, true); ovsrec_port_set_tag(port, &tag, 1); bridge_insert_port(br, port); } vsctl_context_invalidate_cache(ctx); } static void del_port(struct vsctl_context *ctx, struct vsctl_port *port) { struct vsctl_iface *iface, *next_iface; bridge_delete_port((port->bridge->parent ? port->bridge->parent->br_cfg : port->bridge->br_cfg), port->port_cfg); LIST_FOR_EACH_SAFE (iface, next_iface, ifaces_node, &port->ifaces) { del_cached_iface(ctx, iface); } del_cached_port(ctx, port); } static void del_bridge(struct vsctl_context *ctx, struct vsctl_bridge *br) { struct vsctl_bridge *child, *next_child; struct vsctl_port *port, *next_port; const struct ovsrec_flow_sample_collector_set *fscset, *next_fscset; HMAP_FOR_EACH_SAFE (child, next_child, children_node, &br->children) { del_bridge(ctx, child); } LIST_FOR_EACH_SAFE (port, next_port, ports_node, &br->ports) { del_port(ctx, port); } OVSREC_FLOW_SAMPLE_COLLECTOR_SET_FOR_EACH_SAFE (fscset, next_fscset, ctx->idl) { if (fscset->bridge == br->br_cfg) { ovsrec_flow_sample_collector_set_delete(fscset); } } del_cached_bridge(ctx, br); } static void cmd_del_br(struct vsctl_context *ctx) { bool must_exist = !shash_find(&ctx->options, "--if-exists"); struct vsctl_bridge *bridge; vsctl_context_populate_cache(ctx); bridge = find_bridge(ctx, ctx->argv[1], must_exist); if (bridge) { del_bridge(ctx, bridge); } } static void output_sorted(struct svec *svec, struct ds *output) { const char *name; size_t i; svec_sort(svec); SVEC_FOR_EACH (i, name, svec) { ds_put_format(output, "%s\n", name); } } static void cmd_list_br(struct vsctl_context *ctx) { struct shash_node *node; struct svec bridges; bool real = shash_find(&ctx->options, "--real"); bool fake = shash_find(&ctx->options, "--fake"); /* If neither fake nor real were requested, return both. */ if (!real && !fake) { real = fake = true; } vsctl_context_populate_cache(ctx); svec_init(&bridges); SHASH_FOR_EACH (node, &ctx->bridges) { struct vsctl_bridge *br = node->data; if (br->parent ? fake : real) { svec_add(&bridges, br->name); } } output_sorted(&bridges, &ctx->output); svec_destroy(&bridges); } static void cmd_br_exists(struct vsctl_context *ctx) { vsctl_context_populate_cache(ctx); if (!find_bridge(ctx, ctx->argv[1], false)) { vsctl_exit(2); } } static void set_external_id(struct smap *old, struct smap *new, char *key, char *value) { smap_clone(new, old); if (value) { smap_replace(new, key, value); } else { smap_remove(new, key); } } static void pre_cmd_br_set_external_id(struct vsctl_context *ctx) { pre_get_info(ctx); ovsdb_idl_add_column(ctx->idl, &ovsrec_bridge_col_external_ids); ovsdb_idl_add_column(ctx->idl, &ovsrec_port_col_external_ids); } static void cmd_br_set_external_id(struct vsctl_context *ctx) { struct vsctl_bridge *bridge; struct smap new; vsctl_context_populate_cache(ctx); bridge = find_bridge(ctx, ctx->argv[1], true); if (bridge->br_cfg) { set_external_id(&bridge->br_cfg->external_ids, &new, ctx->argv[2], ctx->argc >= 4 ? ctx->argv[3] : NULL); ovsrec_bridge_verify_external_ids(bridge->br_cfg); ovsrec_bridge_set_external_ids(bridge->br_cfg, &new); } else { char *key = xasprintf("fake-bridge-%s", ctx->argv[2]); struct vsctl_port *port = shash_find_data(&ctx->ports, ctx->argv[1]); set_external_id(&port->port_cfg->external_ids, &new, key, ctx->argc >= 4 ? ctx->argv[3] : NULL); ovsrec_port_verify_external_ids(port->port_cfg); ovsrec_port_set_external_ids(port->port_cfg, &new); free(key); } smap_destroy(&new); } static void get_external_id(struct smap *smap, const char *prefix, const char *key, struct ds *output) { if (key) { char *prefix_key = xasprintf("%s%s", prefix, key); const char *value = smap_get(smap, prefix_key); if (value) { ds_put_format(output, "%s\n", value); } free(prefix_key); } else { const struct smap_node **sorted = smap_sort(smap); size_t prefix_len = strlen(prefix); size_t i; for (i = 0; i < smap_count(smap); i++) { const struct smap_node *node = sorted[i]; if (!strncmp(node->key, prefix, prefix_len)) { ds_put_format(output, "%s=%s\n", node->key + prefix_len, node->value); } } free(sorted); } } static void pre_cmd_br_get_external_id(struct vsctl_context *ctx) { pre_cmd_br_set_external_id(ctx); } static void cmd_br_get_external_id(struct vsctl_context *ctx) { struct vsctl_bridge *bridge; vsctl_context_populate_cache(ctx); bridge = find_bridge(ctx, ctx->argv[1], true); if (bridge->br_cfg) { ovsrec_bridge_verify_external_ids(bridge->br_cfg); get_external_id(&bridge->br_cfg->external_ids, "", ctx->argc >= 3 ? ctx->argv[2] : NULL, &ctx->output); } else { struct vsctl_port *port = shash_find_data(&ctx->ports, ctx->argv[1]); ovsrec_port_verify_external_ids(port->port_cfg); get_external_id(&port->port_cfg->external_ids, "fake-bridge-", ctx->argc >= 3 ? ctx->argv[2] : NULL, &ctx->output); } } static void cmd_list_ports(struct vsctl_context *ctx) { struct vsctl_bridge *br; struct vsctl_port *port; struct svec ports; vsctl_context_populate_cache(ctx); br = find_bridge(ctx, ctx->argv[1], true); ovsrec_bridge_verify_ports(br->br_cfg ? br->br_cfg : br->parent->br_cfg); svec_init(&ports); LIST_FOR_EACH (port, ports_node, &br->ports) { if (strcmp(port->port_cfg->name, br->name)) { svec_add(&ports, port->port_cfg->name); } } output_sorted(&ports, &ctx->output); svec_destroy(&ports); } static void add_port(struct vsctl_context *ctx, const char *br_name, const char *port_name, bool may_exist, bool fake_iface, char *iface_names[], int n_ifaces, char *settings[], int n_settings) { struct vsctl_port *vsctl_port; struct vsctl_bridge *bridge; struct ovsrec_interface **ifaces; struct ovsrec_port *port; size_t i; vsctl_context_populate_cache(ctx); if (may_exist) { struct vsctl_port *vsctl_port; vsctl_port = find_port(ctx, port_name, false); if (vsctl_port) { struct svec want_names, have_names; svec_init(&want_names); for (i = 0; i < n_ifaces; i++) { svec_add(&want_names, iface_names[i]); } svec_sort(&want_names); svec_init(&have_names); for (i = 0; i < vsctl_port->port_cfg->n_interfaces; i++) { svec_add(&have_names, vsctl_port->port_cfg->interfaces[i]->name); } svec_sort(&have_names); if (strcmp(vsctl_port->bridge->name, br_name)) { char *command = vsctl_context_to_string(ctx); vsctl_fatal("\"%s\" but %s is actually attached to bridge %s", command, port_name, vsctl_port->bridge->name); } if (!svec_equal(&want_names, &have_names)) { char *have_names_string = svec_join(&have_names, ", ", ""); char *command = vsctl_context_to_string(ctx); vsctl_fatal("\"%s\" but %s actually has interface(s) %s", command, port_name, have_names_string); } svec_destroy(&want_names); svec_destroy(&have_names); return; } } check_conflicts(ctx, port_name, xasprintf("cannot create a port named %s", port_name)); for (i = 0; i < n_ifaces; i++) { check_conflicts(ctx, iface_names[i], xasprintf("cannot create an interface named %s", iface_names[i])); } bridge = find_bridge(ctx, br_name, true); ifaces = xmalloc(n_ifaces * sizeof *ifaces); for (i = 0; i < n_ifaces; i++) { ifaces[i] = ovsrec_interface_insert(ctx->txn); ovsrec_interface_set_name(ifaces[i], iface_names[i]); } port = ovsrec_port_insert(ctx->txn); ovsrec_port_set_name(port, port_name); ovsrec_port_set_interfaces(port, ifaces, n_ifaces); ovsrec_port_set_bond_fake_iface(port, fake_iface); if (bridge->parent) { int64_t tag = bridge->vlan; ovsrec_port_set_tag(port, &tag, 1); } for (i = 0; i < n_settings; i++) { set_column(get_table("Port"), &port->header_, settings[i], ctx->symtab); } bridge_insert_port((bridge->parent ? bridge->parent->br_cfg : bridge->br_cfg), port); vsctl_port = add_port_to_cache(ctx, bridge, port); for (i = 0; i < n_ifaces; i++) { add_iface_to_cache(ctx, vsctl_port, ifaces[i]); } free(ifaces); } static void cmd_add_port(struct vsctl_context *ctx) { bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL; add_port(ctx, ctx->argv[1], ctx->argv[2], may_exist, false, &ctx->argv[2], 1, &ctx->argv[3], ctx->argc - 3); } static void cmd_add_bond(struct vsctl_context *ctx) { bool may_exist = shash_find(&ctx->options, "--may-exist") != NULL; bool fake_iface = shash_find(&ctx->options, "--fake-iface"); int n_ifaces; int i; n_ifaces = ctx->argc - 3; for (i = 3; i < ctx->argc; i++) { if (strchr(ctx->argv[i], '=')) { n_ifaces = i - 3; break; } } if (n_ifaces < 2) { vsctl_fatal("add-bond requires at least 2 interfaces, but only " "%d were specified", n_ifaces); } add_port(ctx, ctx->argv[1], ctx->argv[2], may_exist, fake_iface, &ctx->argv[3], n_ifaces, &ctx->argv[n_ifaces + 3], ctx->argc - 3 - n_ifaces); } static void cmd_del_port(struct vsctl_context *ctx) { bool must_exist = !shash_find(&ctx->options, "--if-exists"); bool with_iface = shash_find(&ctx->options, "--with-iface") != NULL; const char *target = ctx->argv[ctx->argc - 1]; struct vsctl_port *port; vsctl_context_populate_cache(ctx); if (find_bridge(ctx, target, false)) { if (must_exist) { vsctl_fatal("cannot delete port %s because it is the local port " "for bridge %s (deleting this port requires deleting " "the entire bridge)", target, target); } port = NULL; } else if (!with_iface) { port = find_port(ctx, target, must_exist); } else { struct vsctl_iface *iface; port = find_port(ctx, target, false); if (!port) { iface = find_iface(ctx, target, false); if (iface) { port = iface->port; } } if (must_exist && !port) { vsctl_fatal("no port or interface named %s", target); } } if (port) { if (ctx->argc == 3) { struct vsctl_bridge *bridge; bridge = find_bridge(ctx, ctx->argv[1], true); if (port->bridge != bridge) { if (port->bridge->parent == bridge) { vsctl_fatal("bridge %s does not have a port %s (although " "its parent bridge %s does)", ctx->argv[1], ctx->argv[2], bridge->parent->name); } else { vsctl_fatal("bridge %s does not have a port %s", ctx->argv[1], ctx->argv[2]); } } } del_port(ctx, port); } } static void cmd_port_to_br(struct vsctl_context *ctx) { struct vsctl_port *port; vsctl_context_populate_cache(ctx); port = find_port(ctx, ctx->argv[1], true); ds_put_format(&ctx->output, "%s\n", port->bridge->name); } static void cmd_br_to_vlan(struct vsctl_context *ctx) { struct vsctl_bridge *bridge; vsctl_context_populate_cache(ctx); bridge = find_bridge(ctx, ctx->argv[1], true); ds_put_format(&ctx->output, "%d\n", bridge->vlan); } static void cmd_br_to_parent(struct vsctl_context *ctx) { struct vsctl_bridge *bridge; vsctl_context_populate_cache(ctx); bridge = find_bridge(ctx, ctx->argv[1], true); if (bridge->parent) { bridge = bridge->parent; } ds_put_format(&ctx->output, "%s\n", bridge->name); } static void cmd_list_ifaces(struct vsctl_context *ctx) { struct vsctl_bridge *br; struct vsctl_port *port; struct svec ifaces; vsctl_context_populate_cache(ctx); br = find_bridge(ctx, ctx->argv[1], true); verify_ports(ctx); svec_init(&ifaces); LIST_FOR_EACH (port, ports_node, &br->ports) { struct vsctl_iface *iface; LIST_FOR_EACH (iface, ifaces_node, &port->ifaces) { if (strcmp(iface->iface_cfg->name, br->name)) { svec_add(&ifaces, iface->iface_cfg->name); } } } output_sorted(&ifaces, &ctx->output); svec_destroy(&ifaces); } static void cmd_iface_to_br(struct vsctl_context *ctx) { struct vsctl_iface *iface; vsctl_context_populate_cache(ctx); iface = find_iface(ctx, ctx->argv[1], true); ds_put_format(&ctx->output, "%s\n", iface->port->bridge->name); } static void verify_controllers(struct ovsrec_bridge *bridge) { size_t i; ovsrec_bridge_verify_controller(bridge); for (i = 0; i < bridge->n_controller; i++) { ovsrec_controller_verify_target(bridge->controller[i]); } } static void pre_controller(struct vsctl_context *ctx) { pre_get_info(ctx); ovsdb_idl_add_column(ctx->idl, &ovsrec_controller_col_target); } static void cmd_get_controller(struct vsctl_context *ctx) { struct vsctl_bridge *br; struct svec targets; size_t i; vsctl_context_populate_cache(ctx); br = find_bridge(ctx, ctx->argv[1], true); if (br->parent) { br = br->parent; } verify_controllers(br->br_cfg); /* Print the targets in sorted order for reproducibility. */ svec_init(&targets); for (i = 0; i < br->br_cfg->n_controller; i++) { svec_add(&targets, br->br_cfg->controller[i]->target); } svec_sort(&targets); for (i = 0; i < targets.n; i++) { ds_put_format(&ctx->output, "%s\n", targets.names[i]); } svec_destroy(&targets); } static void delete_controllers(struct ovsrec_controller **controllers, size_t n_controllers) { size_t i; for (i = 0; i < n_controllers; i++) { ovsrec_controller_delete(controllers[i]); } } static void cmd_del_controller(struct vsctl_context *ctx) { struct ovsrec_bridge *br; vsctl_context_populate_cache(ctx); br = find_real_bridge(ctx, ctx->argv[1], true)->br_cfg; verify_controllers(br); if (br->controller) { delete_controllers(br->controller, br->n_controller); ovsrec_bridge_set_controller(br, NULL, 0); } } static struct ovsrec_controller ** insert_controllers(struct ovsdb_idl_txn *txn, char *targets[], size_t n) { struct ovsrec_controller **controllers; size_t i; controllers = xmalloc(n * sizeof *controllers); for (i = 0; i < n; i++) { if (vconn_verify_name(targets[i]) && pvconn_verify_name(targets[i])) { VLOG_WARN("target type \"%s\" is possibly erroneous", targets[i]); } controllers[i] = ovsrec_controller_insert(txn); ovsrec_controller_set_target(controllers[i], targets[i]); } return controllers; } static void cmd_set_controller(struct vsctl_context *ctx) { struct ovsrec_controller **controllers; struct ovsrec_bridge *br; size_t n; vsctl_context_populate_cache(ctx); br = find_real_bridge(ctx, ctx->argv[1], true)->br_cfg; verify_controllers(br); delete_controllers(br->controller, br->n_controller); n = ctx->argc - 2; controllers = insert_controllers(ctx->txn, &ctx->argv[2], n); ovsrec_bridge_set_controller(br, controllers, n); free(controllers); } static void cmd_get_fail_mode(struct vsctl_context *ctx) { struct vsctl_bridge *br; const char *fail_mode; vsctl_context_populate_cache(ctx); br = find_bridge(ctx, ctx->argv[1], true); if (br->parent) { br = br->parent; } ovsrec_bridge_verify_fail_mode(br->br_cfg); fail_mode = br->br_cfg->fail_mode; if (fail_mode && strlen(fail_mode)) { ds_put_format(&ctx->output, "%s\n", fail_mode); } } static void cmd_del_fail_mode(struct vsctl_context *ctx) { struct vsctl_bridge *br; vsctl_context_populate_cache(ctx); br = find_real_bridge(ctx, ctx->argv[1], true); ovsrec_bridge_set_fail_mode(br->br_cfg, NULL); } static void cmd_set_fail_mode(struct vsctl_context *ctx) { struct vsctl_bridge *br; const char *fail_mode = ctx->argv[2]; vsctl_context_populate_cache(ctx); br = find_real_bridge(ctx, ctx->argv[1], true); if (strcmp(fail_mode, "standalone") && strcmp(fail_mode, "secure")) { vsctl_fatal("fail-mode must be \"standalone\" or \"secure\""); } ovsrec_bridge_set_fail_mode(br->br_cfg, fail_mode); } static void verify_managers(const struct ovsrec_open_vswitch *ovs) { size_t i; ovsrec_open_vswitch_verify_manager_options(ovs); for (i = 0; i < ovs->n_manager_options; ++i) { const struct ovsrec_manager *mgr = ovs->manager_options[i]; ovsrec_manager_verify_target(mgr); } } static void pre_manager(struct vsctl_context *ctx) { ovsdb_idl_add_column(ctx->idl, &ovsrec_open_vswitch_col_manager_options); ovsdb_idl_add_column(ctx->idl, &ovsrec_manager_col_target); } static void cmd_get_manager(struct vsctl_context *ctx) { const struct ovsrec_open_vswitch *ovs = ctx->ovs; struct svec targets; size_t i; verify_managers(ovs); /* Print the targets in sorted order for reproducibility. */ svec_init(&targets); for (i = 0; i < ovs->n_manager_options; i++) { svec_add(&targets, ovs->manager_options[i]->target); } svec_sort_unique(&targets); for (i = 0; i < targets.n; i++) { ds_put_format(&ctx->output, "%s\n", targets.names[i]); } svec_destroy(&targets); } static void delete_managers(const struct vsctl_context *ctx) { const struct ovsrec_open_vswitch *ovs = ctx->ovs; size_t i; /* Delete Manager rows pointed to by 'manager_options' column. */ for (i = 0; i < ovs->n_manager_options; i++) { ovsrec_manager_delete(ovs->manager_options[i]); } /* Delete 'Manager' row refs in 'manager_options' column. */ ovsrec_open_vswitch_set_manager_options(ovs, NULL, 0); } static void cmd_del_manager(struct vsctl_context *ctx) { const struct ovsrec_open_vswitch *ovs = ctx->ovs; verify_managers(ovs); delete_managers(ctx); } static void insert_managers(struct vsctl_context *ctx, char *targets[], size_t n) { struct ovsrec_manager **managers; size_t i; /* Insert each manager in a new row in Manager table. */ managers = xmalloc(n * sizeof *managers); for (i = 0; i < n; i++) { if (stream_verify_name(targets[i]) && pstream_verify_name(targets[i])) { VLOG_WARN("target type \"%s\" is possibly erroneous", targets[i]); } managers[i] = ovsrec_manager_insert(ctx->txn); ovsrec_manager_set_target(managers[i], targets[i]); } /* Store uuids of new Manager rows in 'manager_options' column. */ ovsrec_open_vswitch_set_manager_options(ctx->ovs, managers, n); free(managers); } static void cmd_set_manager(struct vsctl_context *ctx) { const size_t n = ctx->argc - 1; verify_managers(ctx->ovs); delete_managers(ctx); insert_managers(ctx, &ctx->argv[1], n); } static void pre_cmd_get_ssl(struct vsctl_context *ctx) { ovsdb_idl_add_column(ctx->idl, &ovsrec_open_vswitch_col_ssl); ovsdb_idl_add_column(ctx->idl, &ovsrec_ssl_col_private_key); ovsdb_idl_add_column(ctx->idl, &ovsrec_ssl_col_certificate); ovsdb_idl_add_column(ctx->idl, &ovsrec_ssl_col_ca_cert); ovsdb_idl_add_column(ctx->idl, &ovsrec_ssl_col_bootstrap_ca_cert); } static void cmd_get_ssl(struct vsctl_context *ctx) { struct ovsrec_ssl *ssl = ctx->ovs->ssl; ovsrec_open_vswitch_verify_ssl(ctx->ovs); if (ssl) { ovsrec_ssl_verify_private_key(ssl); ovsrec_ssl_verify_certificate(ssl); ovsrec_ssl_verify_ca_cert(ssl); ovsrec_ssl_verify_bootstrap_ca_cert(ssl); ds_put_format(&ctx->output, "Private key: %s\n", ssl->private_key); ds_put_format(&ctx->output, "Certificate: %s\n", ssl->certificate); ds_put_format(&ctx->output, "CA Certificate: %s\n", ssl->ca_cert); ds_put_format(&ctx->output, "Bootstrap: %s\n", ssl->bootstrap_ca_cert ? "true" : "false"); } } static void pre_cmd_del_ssl(struct vsctl_context *ctx) { ovsdb_idl_add_column(ctx->idl, &ovsrec_open_vswitch_col_ssl); } static void cmd_del_ssl(struct vsctl_context *ctx) { struct ovsrec_ssl *ssl = ctx->ovs->ssl; if (ssl) { ovsrec_open_vswitch_verify_ssl(ctx->ovs); ovsrec_ssl_delete(ssl); ovsrec_open_vswitch_set_ssl(ctx->ovs, NULL); } } static void pre_cmd_set_ssl(struct vsctl_context *ctx) { ovsdb_idl_add_column(ctx->idl, &ovsrec_open_vswitch_col_ssl); } static void cmd_set_ssl(struct vsctl_context *ctx) { bool bootstrap = shash_find(&ctx->options, "--bootstrap"); struct ovsrec_ssl *ssl = ctx->ovs->ssl; ovsrec_open_vswitch_verify_ssl(ctx->ovs); if (ssl) { ovsrec_ssl_delete(ssl); } ssl = ovsrec_ssl_insert(ctx->txn); ovsrec_ssl_set_private_key(ssl, ctx->argv[1]); ovsrec_ssl_set_certificate(ssl, ctx->argv[2]); ovsrec_ssl_set_ca_cert(ssl, ctx->argv[3]); ovsrec_ssl_set_bootstrap_ca_cert(ssl, bootstrap); ovsrec_open_vswitch_set_ssl(ctx->ovs, ssl); } /* Parameter commands. */ struct vsctl_row_id { const struct ovsdb_idl_table_class *table; const struct ovsdb_idl_column *name_column; const struct ovsdb_idl_column *uuid_column; }; struct vsctl_table_class { struct ovsdb_idl_table_class *class; struct vsctl_row_id row_ids[2]; }; static const struct vsctl_table_class tables[] = { {&ovsrec_table_bridge, {{&ovsrec_table_bridge, &ovsrec_bridge_col_name, NULL}, {&ovsrec_table_flow_sample_collector_set, NULL, &ovsrec_flow_sample_collector_set_col_bridge}}}, {&ovsrec_table_controller, {{&ovsrec_table_bridge, &ovsrec_bridge_col_name, &ovsrec_bridge_col_controller}}}, {&ovsrec_table_interface, {{&ovsrec_table_interface, &ovsrec_interface_col_name, NULL}, {NULL, NULL, NULL}}}, {&ovsrec_table_mirror, {{&ovsrec_table_mirror, &ovsrec_mirror_col_name, NULL}, {NULL, NULL, NULL}}}, {&ovsrec_table_manager, {{&ovsrec_table_manager, &ovsrec_manager_col_target, NULL}, {NULL, NULL, NULL}}}, {&ovsrec_table_netflow, {{&ovsrec_table_bridge, &ovsrec_bridge_col_name, &ovsrec_bridge_col_netflow}, {NULL, NULL, NULL}}}, {&ovsrec_table_open_vswitch, {{&ovsrec_table_open_vswitch, NULL, NULL}, {NULL, NULL, NULL}}}, {&ovsrec_table_port, {{&ovsrec_table_port, &ovsrec_port_col_name, NULL}, {NULL, NULL, NULL}}}, {&ovsrec_table_qos, {{&ovsrec_table_port, &ovsrec_port_col_name, &ovsrec_port_col_qos}, {NULL, NULL, NULL}}}, {&ovsrec_table_queue, {{NULL, NULL, NULL}, {NULL, NULL, NULL}}}, {&ovsrec_table_ssl, {{&ovsrec_table_open_vswitch, NULL, &ovsrec_open_vswitch_col_ssl}}}, {&ovsrec_table_sflow, {{&ovsrec_table_bridge, &ovsrec_bridge_col_name, &ovsrec_bridge_col_sflow}, {NULL, NULL, NULL}}}, {&ovsrec_table_flow_table, {{&ovsrec_table_flow_table, &ovsrec_flow_table_col_name, NULL}, {NULL, NULL, NULL}}}, {&ovsrec_table_ipfix, {{&ovsrec_table_bridge, &ovsrec_bridge_col_name, &ovsrec_bridge_col_ipfix}, {&ovsrec_table_flow_sample_collector_set, NULL, &ovsrec_flow_sample_collector_set_col_ipfix}}}, {&ovsrec_table_flow_sample_collector_set, {{NULL, NULL, NULL}, {NULL, NULL, NULL}}}, {NULL, {{NULL, NULL, NULL}, {NULL, NULL, NULL}}} }; static void die_if_error(char *error) { if (error) { vsctl_fatal("%s", error); } } static int to_lower_and_underscores(unsigned c) { return c == '-' ? '_' : tolower(c); } static unsigned int score_partial_match(const char *name, const char *s) { int score; if (!strcmp(name, s)) { return UINT_MAX; } for (score = 0; ; score++, name++, s++) { if (to_lower_and_underscores(*name) != to_lower_and_underscores(*s)) { break; } else if (*name == '\0') { return UINT_MAX - 1; } } return *s == '\0' ? score : 0; } static const struct vsctl_table_class * get_table(const char *table_name) { const struct vsctl_table_class *table; const struct vsctl_table_class *best_match = NULL; unsigned int best_score = 0; for (table = tables; table->class; table++) { unsigned int score = score_partial_match(table->class->name, table_name); if (score > best_score) { best_match = table; best_score = score; } else if (score == best_score) { best_match = NULL; } } if (best_match) { return best_match; } else if (best_score) { vsctl_fatal("multiple table names match \"%s\"", table_name); } else { vsctl_fatal("unknown table \"%s\"", table_name); } } static const struct vsctl_table_class * pre_get_table(struct vsctl_context *ctx, const char *table_name) { const struct vsctl_table_class *table_class; int i; table_class = get_table(table_name); ovsdb_idl_add_table(ctx->idl, table_class->class); for (i = 0; i < ARRAY_SIZE(table_class->row_ids); i++) { const struct vsctl_row_id *id = &table_class->row_ids[i]; if (id->table) { ovsdb_idl_add_table(ctx->idl, id->table); } if (id->name_column) { ovsdb_idl_add_column(ctx->idl, id->name_column); } if (id->uuid_column) { ovsdb_idl_add_column(ctx->idl, id->uuid_column); } } return table_class; } static const struct ovsdb_idl_row * get_row_by_id(struct vsctl_context *ctx, const struct vsctl_table_class *table, const struct vsctl_row_id *id, const char *record_id) { const struct ovsdb_idl_row *referrer, *final; if (!id->table) { return NULL; } if (!id->name_column) { if (strcmp(record_id, ".")) { return NULL; } referrer = ovsdb_idl_first_row(ctx->idl, id->table); if (!referrer || ovsdb_idl_next_row(referrer)) { return NULL; } } else { const struct ovsdb_idl_row *row; referrer = NULL; for (row = ovsdb_idl_first_row(ctx->idl, id->table); row != NULL; row = ovsdb_idl_next_row(row)) { const struct ovsdb_datum *name; name = ovsdb_idl_get(row, id->name_column, OVSDB_TYPE_STRING, OVSDB_TYPE_VOID); if (name->n == 1 && !strcmp(name->keys[0].string, record_id)) { if (referrer) { vsctl_fatal("multiple rows in %s match \"%s\"", table->class->name, record_id); } referrer = row; } } } if (!referrer) { return NULL; } final = NULL; if (id->uuid_column) { const struct ovsdb_datum *uuid; ovsdb_idl_txn_verify(referrer, id->uuid_column); uuid = ovsdb_idl_get(referrer, id->uuid_column, OVSDB_TYPE_UUID, OVSDB_TYPE_VOID); if (uuid->n == 1) { final = ovsdb_idl_get_row_for_uuid(ctx->idl, table->class, &uuid->keys[0].uuid); } } else { final = referrer; } return final; } static const struct ovsdb_idl_row * get_row (struct vsctl_context *ctx, const struct vsctl_table_class *table, const char *record_id, bool must_exist) { const struct ovsdb_idl_row *row; struct uuid uuid; if (uuid_from_string(&uuid, record_id)) { row = ovsdb_idl_get_row_for_uuid(ctx->idl, table->class, &uuid); } else { int i; for (i = 0; i < ARRAY_SIZE(table->row_ids); i++) { row = get_row_by_id(ctx, table, &table->row_ids[i], record_id); if (row) { break; } } } if (must_exist && !row) { vsctl_fatal("no row \"%s\" in table %s", record_id, table->class->name); } return row; } static char * get_column(const struct vsctl_table_class *table, const char *column_name, const struct ovsdb_idl_column **columnp) { const struct ovsdb_idl_column *best_match = NULL; unsigned int best_score = 0; size_t i; for (i = 0; i < table->class->n_columns; i++) { const struct ovsdb_idl_column *column = &table->class->columns[i]; unsigned int score = score_partial_match(column->name, column_name); if (score > best_score) { best_match = column; best_score = score; } else if (score == best_score) { best_match = NULL; } } *columnp = best_match; if (best_match) { return NULL; } else if (best_score) { return xasprintf("%s contains more than one column whose name " "matches \"%s\"", table->class->name, column_name); } else { return xasprintf("%s does not contain a column whose name matches " "\"%s\"", table->class->name, column_name); } } static struct ovsdb_symbol * create_symbol(struct ovsdb_symbol_table *symtab, const char *id, bool *newp) { struct ovsdb_symbol *symbol; if (id[0] != '@') { vsctl_fatal("row id \"%s\" does not begin with \"@\"", id); } if (newp) { *newp = ovsdb_symbol_table_get(symtab, id) == NULL; } symbol = ovsdb_symbol_table_insert(symtab, id); if (symbol->created) { vsctl_fatal("row id \"%s\" may only be specified on one --id option", id); } symbol->created = true; return symbol; } static void pre_get_column(struct vsctl_context *ctx, const struct vsctl_table_class *table, const char *column_name, const struct ovsdb_idl_column **columnp) { die_if_error(get_column(table, column_name, columnp)); ovsdb_idl_add_column(ctx->idl, *columnp); } static char * missing_operator_error(const char *arg, const char **allowed_operators, size_t n_allowed) { struct ds s; ds_init(&s); ds_put_format(&s, "%s: argument does not end in ", arg); ds_put_format(&s, "\"%s\"", allowed_operators[0]); if (n_allowed == 2) { ds_put_format(&s, " or \"%s\"", allowed_operators[1]); } else if (n_allowed > 2) { size_t i; for (i = 1; i < n_allowed - 1; i++) { ds_put_format(&s, ", \"%s\"", allowed_operators[i]); } ds_put_format(&s, ", or \"%s\"", allowed_operators[i]); } ds_put_format(&s, " followed by a value."); return ds_steal_cstr(&s); } /* Breaks 'arg' apart into a number of fields in the following order: * * - The name of a column in 'table', stored into '*columnp'. The column * name may be abbreviated. * * - Optionally ':' followed by a key string. The key is stored as a * malloc()'d string into '*keyp', or NULL if no key is present in * 'arg'. * * - If 'valuep' is nonnull, an operator followed by a value string. The * allowed operators are the 'n_allowed' string in 'allowed_operators', * or just "=" if 'n_allowed' is 0. If 'operatorp' is nonnull, then the * index of the operator within 'allowed_operators' is stored into * '*operatorp'. The value is stored as a malloc()'d string into * '*valuep', or NULL if no value is present in 'arg'. * * On success, returns NULL. On failure, returned a malloc()'d string error * message and stores NULL into all of the nonnull output arguments. */ static char * WARN_UNUSED_RESULT parse_column_key_value(const char *arg, const struct vsctl_table_class *table, const struct ovsdb_idl_column **columnp, char **keyp, int *operatorp, const char **allowed_operators, size_t n_allowed, char **valuep) { const char *p = arg; char *column_name; char *error; ovs_assert(!(operatorp && !valuep)); *keyp = NULL; if (valuep) { *valuep = NULL; } /* Parse column name. */ error = ovsdb_token_parse(&p, &column_name); if (error) { goto error; } if (column_name[0] == '\0') { free(column_name); error = xasprintf("%s: missing column name", arg); goto error; } error = get_column(table, column_name, columnp); free(column_name); if (error) { goto error; } /* Parse key string. */ if (*p == ':') { p++; error = ovsdb_token_parse(&p, keyp); if (error) { goto error; } } /* Parse value string. */ if (valuep) { size_t best_len; size_t i; int best; if (!allowed_operators) { static const char *equals = "="; allowed_operators = = n_allowed = 1; } best = -1; best_len = 0; for (i = 0; i < n_allowed; i++) { const char *op = allowed_operators[i]; size_t op_len = strlen(op); if (op_len > best_len && !strncmp(op, p, op_len) && p[op_len]) { best_len = op_len; best = i; } } if (best < 0) { error = missing_operator_error(arg, allowed_operators, n_allowed); goto error; } if (operatorp) { *operatorp = best; } *valuep = xstrdup(p + best_len); } else { if (*p != '\0') { error = xasprintf("%s: trailing garbage \"%s\" in argument", arg, p); goto error; } } return NULL; error: *columnp = NULL; free(*keyp); *keyp = NULL; if (valuep) { free(*valuep); *valuep = NULL; if (operatorp) { *operatorp = -1; } } return error; } static const struct ovsdb_idl_column * pre_parse_column_key_value(struct vsctl_context *ctx, const char *arg, const struct vsctl_table_class *table) { const struct ovsdb_idl_column *column; const char *p; char *column_name; p = arg; die_if_error(ovsdb_token_parse(&p, &column_name)); if (column_name[0] == '\0') { vsctl_fatal("%s: missing column name", arg); } pre_get_column(ctx, table, column_name, &column); free(column_name); return column; } static void check_mutable(const struct vsctl_table_class *table, const struct ovsdb_idl_column *column) { if (!column->mutable) { vsctl_fatal("cannot modify read-only column %s in table %s", column->name, table->class->name); } } static void pre_cmd_get(struct vsctl_context *ctx) { const char *id = shash_find_data(&ctx->options, "--id"); const char *table_name = ctx->argv[1]; const struct vsctl_table_class *table; int i; /* Using "get" without --id or a column name could possibly make sense. * Maybe, for example, a ovs-vsctl run wants to assert that a row exists. * But it is unlikely that an interactive user would want to do that, so * issue a warning if we're running on a terminal. */ if (!id && ctx->argc <= 3 && isatty(STDOUT_FILENO)) { VLOG_WARN("\"get\" command without row arguments or \"--id\" is " "possibly erroneous"); } table = pre_get_table(ctx, table_name); for (i = 3; i < ctx->argc; i++) { if (!strcasecmp(ctx->argv[i], "_uuid") || !strcasecmp(ctx->argv[i], "-uuid")) { continue; } pre_parse_column_key_value(ctx, ctx->argv[i], table); } } static void cmd_get(struct vsctl_context *ctx) { const char *id = shash_find_data(&ctx->options, "--id"); bool must_exist = !shash_find(&ctx->options, "--if-exists"); const char *table_name = ctx->argv[1]; const char *record_id = ctx->argv[2]; const struct vsctl_table_class *table; const struct ovsdb_idl_row *row; struct ds *out = &ctx->output; int i; if (id && !must_exist) { vsctl_fatal("--if-exists and --id may not be specified together"); } table = get_table(table_name); row = get_row(ctx, table, record_id, must_exist); if (!row) { return; } if (id) { struct ovsdb_symbol *symbol; bool new; symbol = create_symbol(ctx->symtab, id, &new); if (!new) { vsctl_fatal("row id \"%s\" specified on \"get\" command was used " "before it was defined", id); } symbol->uuid = row->uuid; /* This symbol refers to a row that already exists, so disable warnings * about it being unreferenced. */ symbol->strong_ref = true; } for (i = 3; i < ctx->argc; i++) { const struct ovsdb_idl_column *column; const struct ovsdb_datum *datum; char *key_string; /* Special case for obtaining the UUID of a row. We can't just do this * through parse_column_key_value() below since it returns a "struct * ovsdb_idl_column" and the UUID column doesn't have one. */ if (!strcasecmp(ctx->argv[i], "_uuid") || !strcasecmp(ctx->argv[i], "-uuid")) { ds_put_format(out, UUID_FMT"\n", UUID_ARGS(&row->uuid)); continue; } die_if_error(parse_column_key_value(ctx->argv[i], table, &column, &key_string, NULL, NULL, 0, NULL)); ovsdb_idl_txn_verify(row, column); datum = ovsdb_idl_read(row, column); if (key_string) { union ovsdb_atom key; unsigned int idx; if (column->type.value.type == OVSDB_TYPE_VOID) { vsctl_fatal("cannot specify key to get for non-map column %s", column->name); } die_if_error(ovsdb_atom_from_string(&key, &column->type.key, key_string, ctx->symtab)); idx = ovsdb_datum_find_key(datum, &key, column->type.key.type); if (idx == UINT_MAX) { if (must_exist) { vsctl_fatal("no key \"%s\" in %s record \"%s\" column %s", key_string, table->class->name, record_id, column->name); } } else { ovsdb_atom_to_string(&datum->values[idx], column->type.value.type, out); } ovsdb_atom_destroy(&key, column->type.key.type); } else { ovsdb_datum_to_string(datum, &column->type, out); } ds_put_char(out, '\n'); free(key_string); } } static void parse_column_names(const char *column_names, const struct vsctl_table_class *table, const struct ovsdb_idl_column ***columnsp, size_t *n_columnsp) { const struct ovsdb_idl_column **columns; size_t n_columns; if (!column_names) { size_t i; n_columns = table->class->n_columns + 1; columns = xmalloc(n_columns * sizeof *columns); columns[0] = NULL; for (i = 0; i < table->class->n_columns; i++) { columns[i + 1] = &table->class->columns[i]; } } else { char *s = xstrdup(column_names); size_t allocated_columns; char *save_ptr = NULL; char *column_name; columns = NULL; allocated_columns = n_columns = 0; for (column_name = strtok_r(s, ", ", &save_ptr); column_name; column_name = strtok_r(NULL, ", ", &save_ptr)) { const struct ovsdb_idl_column *column; if (!strcasecmp(column_name, "_uuid")) { column = NULL; } else { die_if_error(get_column(table, column_name, &column)); } if (n_columns >= allocated_columns) { columns = x2nrealloc(columns, &allocated_columns, sizeof *columns); } columns[n_columns++] = column; } free(s); if (!n_columns) { vsctl_fatal("must specify at least one column name"); } } *columnsp = columns; *n_columnsp = n_columns; } static void pre_list_columns(struct vsctl_context *ctx, const struct vsctl_table_class *table, const char *column_names) { const struct ovsdb_idl_column **columns; size_t n_columns; size_t i; parse_column_names(column_names, table, &columns, &n_columns); for (i = 0; i < n_columns; i++) { if (columns[i]) { ovsdb_idl_add_column(ctx->idl, columns[i]); } } free(columns); } static void pre_cmd_list(struct vsctl_context *ctx) { const char *column_names = shash_find_data(&ctx->options, "--columns"); const char *table_name = ctx->argv[1]; const struct vsctl_table_class *table; table = pre_get_table(ctx, table_name); pre_list_columns(ctx, table, column_names); } static struct table * list_make_table(const struct ovsdb_idl_column **columns, size_t n_columns) { struct table *out; size_t i; out = xmalloc(sizeof *out); table_init(out); for (i = 0; i < n_columns; i++) { const struct ovsdb_idl_column *column = columns[i]; const char *column_name = column ? column->name : "_uuid"; table_add_column(out, "%s", column_name); } return out; } static void list_record(const struct ovsdb_idl_row *row, const struct ovsdb_idl_column **columns, size_t n_columns, struct table *out) { size_t i; if (!row) { return; } table_add_row(out); for (i = 0; i < n_columns; i++) { const struct ovsdb_idl_column *column = columns[i]; struct cell *cell = table_add_cell(out); if (!column) { struct ovsdb_datum datum; union ovsdb_atom atom; atom.uuid = row->uuid; datum.keys = &atom; datum.values = NULL; datum.n = 1; cell->json = ovsdb_datum_to_json(&datum, &ovsdb_type_uuid); cell->type = &ovsdb_type_uuid; } else { const struct ovsdb_datum *datum = ovsdb_idl_read(row, column); cell->json = ovsdb_datum_to_json(datum, &column->type); cell->type = &column->type; } } } static void cmd_list(struct vsctl_context *ctx) { const char *column_names = shash_find_data(&ctx->options, "--columns"); bool must_exist = !shash_find(&ctx->options, "--if-exists"); const struct ovsdb_idl_column **columns; const char *table_name = ctx->argv[1]; const struct vsctl_table_class *table; struct table *out; size_t n_columns; int i; table = get_table(table_name); parse_column_names(column_names, table, &columns, &n_columns); out = ctx->table = list_make_table(columns, n_columns); if (ctx->argc > 2) { for (i = 2; i < ctx->argc; i++) { list_record(get_row(ctx, table, ctx->argv[i], must_exist), columns, n_columns, out); } } else { const struct ovsdb_idl_row *row; for (row = ovsdb_idl_first_row(ctx->idl, table->class); row != NULL; row = ovsdb_idl_next_row(row)) { list_record(row, columns, n_columns, out); } } free(columns); } static void pre_cmd_find(struct vsctl_context *ctx) { const char *column_names = shash_find_data(&ctx->options, "--columns"); const char *table_name = ctx->argv[1]; const struct vsctl_table_class *table; int i; table = pre_get_table(ctx, table_name); pre_list_columns(ctx, table, column_names); for (i = 2; i < ctx->argc; i++) { pre_parse_column_key_value(ctx, ctx->argv[i], table); } } static void cmd_find(struct vsctl_context *ctx) { const char *column_names = shash_find_data(&ctx->options, "--columns"); const struct ovsdb_idl_column **columns; const char *table_name = ctx->argv[1]; const struct vsctl_table_class *table; const struct ovsdb_idl_row *row; struct table *out; size_t n_columns; table = get_table(table_name); parse_column_names(column_names, table, &columns, &n_columns); out = ctx->table = list_make_table(columns, n_columns); for (row = ovsdb_idl_first_row(ctx->idl, table->class); row; row = ovsdb_idl_next_row(row)) { int i; for (i = 2; i < ctx->argc; i++) { if (!is_condition_satisfied(table, row, ctx->argv[i], ctx->symtab)) { goto next_row; } } list_record(row, columns, n_columns, out); next_row: ; } free(columns); } static void pre_cmd_set(struct vsctl_context *ctx) { const char *table_name = ctx->argv[1]; const struct vsctl_table_class *table; int i; table = pre_get_table(ctx, table_name); for (i = 3; i < ctx->argc; i++) { const struct ovsdb_idl_column *column; column = pre_parse_column_key_value(ctx, ctx->argv[i], table); check_mutable(table, column); } } static void set_column(const struct vsctl_table_class *table, const struct ovsdb_idl_row *row, const char *arg, struct ovsdb_symbol_table *symtab) { const struct ovsdb_idl_column *column; char *key_string, *value_string; char *error; error = parse_column_key_value(arg, table, &column, &key_string, NULL, NULL, 0, &value_string); die_if_error(error); if (!value_string) { vsctl_fatal("%s: missing value", arg); } if (key_string) { union ovsdb_atom key, value; struct ovsdb_datum datum; if (column->type.value.type == OVSDB_TYPE_VOID) { vsctl_fatal("cannot specify key to set for non-map column %s", column->name); } die_if_error(ovsdb_atom_from_string(&key, &column->type.key, key_string, symtab)); die_if_error(ovsdb_atom_from_string(&value, &column->type.value, value_string, symtab)); ovsdb_datum_init_empty(&datum); ovsdb_datum_add_unsafe(&datum, &key, &value, &column->type); ovsdb_atom_destroy(&key, column->type.key.type); ovsdb_atom_destroy(&value, column->type.value.type); ovsdb_datum_union(&datum, ovsdb_idl_read(row, column), &column->type, false); ovsdb_idl_txn_write(row, column, &datum); } else { struct ovsdb_datum datum; die_if_error(ovsdb_datum_from_string(&datum, &column->type, value_string, symtab)); ovsdb_idl_txn_write(row, column, &datum); } free(key_string); free(value_string); } static void cmd_set(struct vsctl_context *ctx) { bool must_exist = !shash_find(&ctx->options, "--if-exists"); const char *table_name = ctx->argv[1]; const char *record_id = ctx->argv[2]; const struct vsctl_table_class *table; const struct ovsdb_idl_row *row; int i; table = get_table(table_name); row = get_row(ctx, table, record_id, must_exist); if (!row) { return; } for (i = 3; i < ctx->argc; i++) { set_column(table, row, ctx->argv[i], ctx->symtab); } vsctl_context_invalidate_cache(ctx); } static void pre_cmd_add(struct vsctl_context *ctx) { const char *table_name = ctx->argv[1]; const char *column_name = ctx->argv[3]; const struct vsctl_table_class *table; const struct ovsdb_idl_column *column; table = pre_get_table(ctx, table_name); pre_get_column(ctx, table, column_name, &column); check_mutable(table, column); } static void cmd_add(struct vsctl_context *ctx) { bool must_exist = !shash_find(&ctx->options, "--if-exists"); const char *table_name = ctx->argv[1]; const char *record_id = ctx->argv[2]; const char *column_name = ctx->argv[3]; const struct vsctl_table_class *table; const struct ovsdb_idl_column *column; const struct ovsdb_idl_row *row; const struct ovsdb_type *type; struct ovsdb_datum old; int i; table = get_table(table_name); die_if_error(get_column(table, column_name, &column)); row = get_row(ctx, table, record_id, must_exist); if (!row) { return; } type = &column->type; ovsdb_datum_clone(&old, ovsdb_idl_read(row, column), &column->type); for (i = 4; i < ctx->argc; i++) { struct ovsdb_type add_type; struct ovsdb_datum add; add_type = *type; add_type.n_min = 1; add_type.n_max = UINT_MAX; die_if_error(ovsdb_datum_from_string(&add, &add_type, ctx->argv[i], ctx->symtab)); ovsdb_datum_union(&old, &add, type, false); ovsdb_datum_destroy(&add, type); } if (old.n > type->n_max) { vsctl_fatal("\"add\" operation would put %u %s in column %s of " "table %s but the maximum number is %u", old.n, type->value.type == OVSDB_TYPE_VOID ? "values" : "pairs", column->name, table->class->name, type->n_max); } ovsdb_idl_txn_verify(row, column); ovsdb_idl_txn_write(row, column, &old); vsctl_context_invalidate_cache(ctx); } static void pre_cmd_remove(struct vsctl_context *ctx) { const char *table_name = ctx->argv[1]; const char *column_name = ctx->argv[3]; const struct vsctl_table_class *table; const struct ovsdb_idl_column *column; table = pre_get_table(ctx, table_name); pre_get_column(ctx, table, column_name, &column); check_mutable(table, column); } static void cmd_remove(struct vsctl_context *ctx) { bool must_exist = !shash_find(&ctx->options, "--if-exists"); const char *table_name = ctx->argv[1]; const char *record_id = ctx->argv[2]; const char *column_name = ctx->argv[3]; const struct vsctl_table_class *table; const struct ovsdb_idl_column *column; const struct ovsdb_idl_row *row; const struct ovsdb_type *type; struct ovsdb_datum old; int i; table = get_table(table_name); die_if_error(get_column(table, column_name, &column)); row = get_row(ctx, table, record_id, must_exist); if (!row) { return; } type = &column->type; ovsdb_datum_clone(&old, ovsdb_idl_read(row, column), &column->type); for (i = 4; i < ctx->argc; i++) { struct ovsdb_type rm_type; struct ovsdb_datum rm; char *error; rm_type = *type; rm_type.n_min = 1; rm_type.n_max = UINT_MAX; error = ovsdb_datum_from_string(&rm, &rm_type, ctx->argv[i], ctx->symtab); if (error && ovsdb_type_is_map(&rm_type)) { free(error); rm_type.value.type = OVSDB_TYPE_VOID; die_if_error(ovsdb_datum_from_string(&rm, &rm_type, ctx->argv[i], ctx->symtab)); } ovsdb_datum_subtract(&old, type, &rm, &rm_type); ovsdb_datum_destroy(&rm, &rm_type); } if (old.n < type->n_min) { vsctl_fatal("\"remove\" operation would put %u %s in column %s of " "table %s but the minimum number is %u", old.n, type->value.type == OVSDB_TYPE_VOID ? "values" : "pairs", column->name, table->class->name, type->n_min); } ovsdb_idl_txn_verify(row, column); ovsdb_idl_txn_write(row, column, &old); vsctl_context_invalidate_cache(ctx); } static void pre_cmd_clear(struct vsctl_context *ctx) { const char *table_name = ctx->argv[1]; const struct vsctl_table_class *table; int i; table = pre_get_table(ctx, table_name); for (i = 3; i < ctx->argc; i++) { const struct ovsdb_idl_column *column; pre_get_column(ctx, table, ctx->argv[i], &column); check_mutable(table, column); } } static void cmd_clear(struct vsctl_context *ctx) { bool must_exist = !shash_find(&ctx->options, "--if-exists"); const char *table_name = ctx->argv[1]; const char *record_id = ctx->argv[2]; const struct vsctl_table_class *table; const struct ovsdb_idl_row *row; int i; table = get_table(table_name); row = get_row(ctx, table, record_id, must_exist); if (!row) { return; } for (i = 3; i < ctx->argc; i++) { const struct ovsdb_idl_column *column; const struct ovsdb_type *type; struct ovsdb_datum datum; die_if_error(get_column(table, ctx->argv[i], &column)); type = &column->type; if (type->n_min > 0) { vsctl_fatal("\"clear\" operation cannot be applied to column %s " "of table %s, which is not allowed to be empty", column->name, table->class->name); } ovsdb_datum_init_empty(&datum); ovsdb_idl_txn_write(row, column, &datum); } vsctl_context_invalidate_cache(ctx); } static void pre_create(struct vsctl_context *ctx) { const char *id = shash_find_data(&ctx->options, "--id"); const char *table_name = ctx->argv[1]; const struct vsctl_table_class *table; table = get_table(table_name); if (!id && !table->class->is_root) { VLOG_WARN("applying \"create\" command to table %s without --id " "option will have no effect", table->class->name); } } static void cmd_create(struct vsctl_context *ctx) { const char *id = shash_find_data(&ctx->options, "--id"); const char *table_name = ctx->argv[1]; const struct vsctl_table_class *table = get_table(table_name); const struct ovsdb_idl_row *row; const struct uuid *uuid; int i; if (id) { struct ovsdb_symbol *symbol = create_symbol(ctx->symtab, id, NULL); if (table->class->is_root) { /* This table is in the root set, meaning that rows created in it * won't disappear even if they are unreferenced, so disable * warnings about that by pretending that there is a reference. */ symbol->strong_ref = true; } uuid = &symbol->uuid; } else { uuid = NULL; } row = ovsdb_idl_txn_insert(ctx->txn, table->class, uuid); for (i = 2; i < ctx->argc; i++) { set_column(table, row, ctx->argv[i], ctx->symtab); } ds_put_format(&ctx->output, UUID_FMT, UUID_ARGS(&row->uuid)); } /* This function may be used as the 'postprocess' function for commands that * insert new rows into the database. It expects that the command's 'run' * function prints the UUID reported by ovsdb_idl_txn_insert() as the command's * sole output. It replaces that output by the row's permanent UUID assigned * by the database server and appends a new-line. * * Currently we use this only for "create", because the higher-level commands * are supposed to be independent of the actual structure of the vswitch * configuration. */ static void post_create(struct vsctl_context *ctx) { const struct uuid *real; struct uuid dummy; if (!uuid_from_string(&dummy, ds_cstr(&ctx->output))) { NOT_REACHED(); } real = ovsdb_idl_txn_get_insert_uuid(ctx->txn, &dummy); if (real) { ds_clear(&ctx->output); ds_put_format(&ctx->output, UUID_FMT, UUID_ARGS(real)); } ds_put_char(&ctx->output, '\n'); } static void pre_cmd_destroy(struct vsctl_context *ctx) { const char *table_name = ctx->argv[1]; pre_get_table(ctx, table_name); } static void cmd_destroy(struct vsctl_context *ctx) { bool must_exist = !shash_find(&ctx->options, "--if-exists"); bool delete_all = shash_find(&ctx->options, "--all"); const char *table_name = ctx->argv[1]; const struct vsctl_table_class *table; int i; table = get_table(table_name); if (delete_all && ctx->argc > 2) { vsctl_fatal("--all and records argument should not be specified together"); } if (delete_all && !must_exist) { vsctl_fatal("--all and --if-exists should not be specified together"); } if (delete_all) { const struct ovsdb_idl_row *row; const struct ovsdb_idl_row *next_row; for (row = ovsdb_idl_first_row(ctx->idl, table->class); row;) { next_row = ovsdb_idl_next_row(row); ovsdb_idl_txn_delete(row); row = next_row; } } else { for (i = 2; i < ctx->argc; i++) { const struct ovsdb_idl_row *row; row = get_row(ctx, table, ctx->argv[i], must_exist); if (row) { ovsdb_idl_txn_delete(row); } } } vsctl_context_invalidate_cache(ctx); } #define RELOPS \ RELOP(RELOP_EQ, "=") \ RELOP(RELOP_NE, "!=") \ RELOP(RELOP_LT, "<") \ RELOP(RELOP_GT, ">") \ RELOP(RELOP_LE, "<=") \ RELOP(RELOP_GE, ">=") \ RELOP(RELOP_SET_EQ, "{=}") \ RELOP(RELOP_SET_NE, "{!=}") \ RELOP(RELOP_SET_LT, "{<}") \ RELOP(RELOP_SET_GT, "{>}") \ RELOP(RELOP_SET_LE, "{<=}") \ RELOP(RELOP_SET_GE, "{>=}") enum relop { #define RELOP(ENUM, STRING) ENUM, RELOPS #undef RELOP }; static bool is_set_operator(enum relop op) { return (op == RELOP_SET_EQ || op == RELOP_SET_NE || op == RELOP_SET_LT || op == RELOP_SET_GT || op == RELOP_SET_LE || op == RELOP_SET_GE); } static bool evaluate_relop(const struct ovsdb_datum *a, const struct ovsdb_datum *b, const struct ovsdb_type *type, enum relop op) { switch (op) { case RELOP_EQ: case RELOP_SET_EQ: return ovsdb_datum_compare_3way(a, b, type) == 0; case RELOP_NE: case RELOP_SET_NE: return ovsdb_datum_compare_3way(a, b, type) != 0; case RELOP_LT: return ovsdb_datum_compare_3way(a, b, type) < 0; case RELOP_GT: return ovsdb_datum_compare_3way(a, b, type) > 0; case RELOP_LE: return ovsdb_datum_compare_3way(a, b, type) <= 0; case RELOP_GE: return ovsdb_datum_compare_3way(a, b, type) >= 0; case RELOP_SET_LT: return b->n > a->n && ovsdb_datum_includes_all(a, b, type); case RELOP_SET_GT: return a->n > b->n && ovsdb_datum_includes_all(b, a, type); case RELOP_SET_LE: return ovsdb_datum_includes_all(a, b, type); case RELOP_SET_GE: return ovsdb_datum_includes_all(b, a, type); default: NOT_REACHED(); } } static bool is_condition_satisfied(const struct vsctl_table_class *table, const struct ovsdb_idl_row *row, const char *arg, struct ovsdb_symbol_table *symtab) { static const char *operators[] = { #define RELOP(ENUM, STRING) STRING, RELOPS #undef RELOP }; const struct ovsdb_idl_column *column; const struct ovsdb_datum *have_datum; char *key_string, *value_string; struct ovsdb_type type; int operator; bool retval; char *error; error = parse_column_key_value(arg, table, &column, &key_string, &operator, operators, ARRAY_SIZE(operators), &value_string); die_if_error(error); if (!value_string) { vsctl_fatal("%s: missing value", arg); } type = column->type; type.n_max = UINT_MAX; have_datum = ovsdb_idl_read(row, column); if (key_string) { union ovsdb_atom want_key; struct ovsdb_datum b; unsigned int idx; if (column->type.value.type == OVSDB_TYPE_VOID) { vsctl_fatal("cannot specify key to check for non-map column %s", column->name); } die_if_error(ovsdb_atom_from_string(&want_key, &column->type.key, key_string, symtab)); type.key = type.value; type.value.type = OVSDB_TYPE_VOID; die_if_error(ovsdb_datum_from_string(&b, &type, value_string, symtab)); idx = ovsdb_datum_find_key(have_datum, &want_key, column->type.key.type); if (idx == UINT_MAX && !is_set_operator(operator)) { retval = false; } else { struct ovsdb_datum a; if (idx != UINT_MAX) { a.n = 1; a.keys = &have_datum->values[idx]; a.values = NULL; } else { a.n = 0; a.keys = NULL; a.values = NULL; } retval = evaluate_relop(&a, &b, &type, operator); } ovsdb_atom_destroy(&want_key, column->type.key.type); ovsdb_datum_destroy(&b, &type); } else { struct ovsdb_datum want_datum; die_if_error(ovsdb_datum_from_string(&want_datum, &column->type, value_string, symtab)); retval = evaluate_relop(have_datum, &want_datum, &type, operator); ovsdb_datum_destroy(&want_datum, &column->type); } free(key_string); free(value_string); return retval; } static void pre_cmd_wait_until(struct vsctl_context *ctx) { const char *table_name = ctx->argv[1]; const struct vsctl_table_class *table; int i; table = pre_get_table(ctx, table_name); for (i = 3; i < ctx->argc; i++) { pre_parse_column_key_value(ctx, ctx->argv[i], table); } } static void cmd_wait_until(struct vsctl_context *ctx) { const char *table_name = ctx->argv[1]; const char *record_id = ctx->argv[2]; const struct vsctl_table_class *table; const struct ovsdb_idl_row *row; int i; table = get_table(table_name); row = get_row(ctx, table, record_id, false); if (!row) { ctx->try_again = true; return; } for (i = 3; i < ctx->argc; i++) { if (!is_condition_satisfied(table, row, ctx->argv[i], ctx->symtab)) { ctx->try_again = true; return; } } } /* Prepares 'ctx', which has already been initialized with * vsctl_context_init(), for processing 'command'. */ static void vsctl_context_init_command(struct vsctl_context *ctx, struct vsctl_command *command) { ctx->argc = command->argc; ctx->argv = command->argv; ctx->options = command->options; ds_swap(&ctx->output, &command->output); ctx->table = command->table; ctx->verified_ports = false; ctx->try_again = false; } /* Prepares 'ctx' for processing commands, initializing its members with the * values passed in as arguments. * * If 'command' is nonnull, calls vsctl_context_init_command() to prepare for * that particular command. */ static void vsctl_context_init(struct vsctl_context *ctx, struct vsctl_command *command, struct ovsdb_idl *idl, struct ovsdb_idl_txn *txn, const struct ovsrec_open_vswitch *ovs, struct ovsdb_symbol_table *symtab) { if (command) { vsctl_context_init_command(ctx, command); } ctx->idl = idl; ctx->txn = txn; ctx->ovs = ovs; ctx->symtab = symtab; ctx->cache_valid = false; } /* Completes processing of 'command' within 'ctx'. */ static void vsctl_context_done_command(struct vsctl_context *ctx, struct vsctl_command *command) { ds_swap(&ctx->output, &command->output); command->table = ctx->table; } /* Finishes up with 'ctx'. * * If command is nonnull, first calls vsctl_context_done_command() to complete * processing that command within 'ctx'. */ static void vsctl_context_done(struct vsctl_context *ctx, struct vsctl_command *command) { if (command) { vsctl_context_done_command(ctx, command); } vsctl_context_invalidate_cache(ctx); } static void run_prerequisites(struct vsctl_command *commands, size_t n_commands, struct ovsdb_idl *idl) { struct vsctl_command *c; ovsdb_idl_add_table(idl, &ovsrec_table_open_vswitch); if (wait_for_reload) { ovsdb_idl_add_column(idl, &ovsrec_open_vswitch_col_cur_cfg); } for (c = commands; c < &commands[n_commands]; c++) { if (c->syntax->prerequisites) { struct vsctl_context ctx; ds_init(&c->output); c->table = NULL; vsctl_context_init(&ctx, c, idl, NULL, NULL, NULL); (c->syntax->prerequisites)(&ctx); vsctl_context_done(&ctx, c); ovs_assert(!c->output.string); ovs_assert(!c->table); } } } static void do_vsctl(const char *args, struct vsctl_command *commands, size_t n_commands, struct ovsdb_idl *idl) { struct ovsdb_idl_txn *txn; const struct ovsrec_open_vswitch *ovs; enum ovsdb_idl_txn_status status; struct ovsdb_symbol_table *symtab; struct vsctl_context ctx; struct vsctl_command *c; struct shash_node *node; int64_t next_cfg = 0; char *error = NULL; txn = the_idl_txn = ovsdb_idl_txn_create(idl); if (dry_run) { ovsdb_idl_txn_set_dry_run(txn); } ovsdb_idl_txn_add_comment(txn, "ovs-vsctl: %s", args); ovs = ovsrec_open_vswitch_first(idl); if (!ovs) { /* XXX add verification that table is empty */ ovs = ovsrec_open_vswitch_insert(txn); } if (wait_for_reload) { ovsdb_idl_txn_increment(txn, &ovs->header_, &ovsrec_open_vswitch_col_next_cfg); } symtab = ovsdb_symbol_table_create(); for (c = commands; c < &commands[n_commands]; c++) { ds_init(&c->output); c->table = NULL; } vsctl_context_init(&ctx, NULL, idl, txn, ovs, symtab); for (c = commands; c < &commands[n_commands]; c++) { vsctl_context_init_command(&ctx, c); if (c->syntax->run) { (c->syntax->run)(&ctx); } vsctl_context_done_command(&ctx, c); if (ctx.try_again) { vsctl_context_done(&ctx, NULL); goto try_again; } } vsctl_context_done(&ctx, NULL); SHASH_FOR_EACH (node, &symtab->sh) { struct ovsdb_symbol *symbol = node->data; if (!symbol->created) { vsctl_fatal("row id \"%s\" is referenced but never created (e.g. " "with \"-- --id=%s create ...\")", node->name, node->name); } if (!symbol->strong_ref) { if (!symbol->weak_ref) { VLOG_WARN("row id \"%s\" was created but no reference to it " "was inserted, so it will not actually appear in " "the database", node->name); } else { VLOG_WARN("row id \"%s\" was created but only a weak " "reference to it was inserted, so it will not " "actually appear in the database", node->name); } } } status = ovsdb_idl_txn_commit_block(txn); if (wait_for_reload && status == TXN_SUCCESS) { next_cfg = ovsdb_idl_txn_get_increment_new_value(txn); } if (status == TXN_UNCHANGED || status == TXN_SUCCESS) { for (c = commands; c < &commands[n_commands]; c++) { if (c->syntax->postprocess) { struct vsctl_context ctx; vsctl_context_init(&ctx, c, idl, txn, ovs, symtab); (c->syntax->postprocess)(&ctx); vsctl_context_done(&ctx, c); } } } error = xstrdup(ovsdb_idl_txn_get_error(txn)); ovsdb_idl_txn_destroy(txn); txn = the_idl_txn = NULL; switch (status) { case TXN_UNCOMMITTED: case TXN_INCOMPLETE: NOT_REACHED(); case TXN_ABORTED: /* Should not happen--we never call ovsdb_idl_txn_abort(). */ vsctl_fatal("transaction aborted"); case TXN_UNCHANGED: case TXN_SUCCESS: break; case TXN_TRY_AGAIN: goto try_again; case TXN_ERROR: vsctl_fatal("transaction error: %s", error); case TXN_NOT_LOCKED: /* Should not happen--we never call ovsdb_idl_set_lock(). */ vsctl_fatal("database not locked"); default: NOT_REACHED(); } free(error); ovsdb_symbol_table_destroy(symtab); for (c = commands; c < &commands[n_commands]; c++) { struct ds *ds = &c->output; if (c->table) { table_print(c->table, &table_style); } else if (oneline) { size_t j; ds_chomp(ds, '\n'); for (j = 0; j < ds->length; j++) { int ch = ds->string[j]; switch (ch) { case '\n': fputs("\\n", stdout); break; case '\\': fputs("\\\\", stdout); break; default: putchar(ch); } } putchar('\n'); } else { fputs(ds_cstr(ds), stdout); } ds_destroy(&c->output); table_destroy(c->table); free(c->table); shash_destroy_free_data(&c->options); } free(commands); if (wait_for_reload && status != TXN_UNCHANGED) { for (;;) { ovsdb_idl_run(idl); OVSREC_OPEN_VSWITCH_FOR_EACH (ovs, idl) { if (ovs->cur_cfg >= next_cfg) { goto done; } } ovsdb_idl_wait(idl); poll_block(); } done: ; } ovsdb_idl_destroy(idl); exit(EXIT_SUCCESS); try_again: /* Our transaction needs to be rerun, or a prerequisite was not met. Free * resources and return so that the caller can try again. */ if (txn) { ovsdb_idl_txn_abort(txn); ovsdb_idl_txn_destroy(txn); the_idl_txn = NULL; } ovsdb_symbol_table_destroy(symtab); for (c = commands; c < &commands[n_commands]; c++) { ds_destroy(&c->output); table_destroy(c->table); free(c->table); } free(error); } static const struct vsctl_command_syntax all_commands[] = { /* Open vSwitch commands. */ {"init", 0, 0, NULL, cmd_init, NULL, "", RW}, {"show", 0, 0, pre_cmd_show, cmd_show, NULL, "", RO}, /* Bridge commands. */ {"add-br", 1, 3, pre_get_info, cmd_add_br, NULL, "--may-exist", RW}, {"del-br", 1, 1, pre_get_info, cmd_del_br, NULL, "--if-exists", RW}, {"list-br", 0, 0, pre_get_info, cmd_list_br, NULL, "--real,--fake", RO}, {"br-exists", 1, 1, pre_get_info, cmd_br_exists, NULL, "", RO}, {"br-to-vlan", 1, 1, pre_get_info, cmd_br_to_vlan, NULL, "", RO}, {"br-to-parent", 1, 1, pre_get_info, cmd_br_to_parent, NULL, "", RO}, {"br-set-external-id", 2, 3, pre_cmd_br_set_external_id, cmd_br_set_external_id, NULL, "", RW}, {"br-get-external-id", 1, 2, pre_cmd_br_get_external_id, cmd_br_get_external_id, NULL, "", RO}, /* Port commands. */ {"list-ports", 1, 1, pre_get_info, cmd_list_ports, NULL, "", RO}, {"add-port", 2, INT_MAX, pre_get_info, cmd_add_port, NULL, "--may-exist", RW}, {"add-bond", 4, INT_MAX, pre_get_info, cmd_add_bond, NULL, "--may-exist,--fake-iface", RW}, {"del-port", 1, 2, pre_get_info, cmd_del_port, NULL, "--if-exists,--with-iface", RW}, {"port-to-br", 1, 1, pre_get_info, cmd_port_to_br, NULL, "", RO}, /* Interface commands. */ {"list-ifaces", 1, 1, pre_get_info, cmd_list_ifaces, NULL, "", RO}, {"iface-to-br", 1, 1, pre_get_info, cmd_iface_to_br, NULL, "", RO}, /* Controller commands. */ {"get-controller", 1, 1, pre_controller, cmd_get_controller, NULL, "", RO}, {"del-controller", 1, 1, pre_controller, cmd_del_controller, NULL, "", RW}, {"set-controller", 1, INT_MAX, pre_controller, cmd_set_controller, NULL, "", RW}, {"get-fail-mode", 1, 1, pre_get_info, cmd_get_fail_mode, NULL, "", RO}, {"del-fail-mode", 1, 1, pre_get_info, cmd_del_fail_mode, NULL, "", RW}, {"set-fail-mode", 2, 2, pre_get_info, cmd_set_fail_mode, NULL, "", RW}, /* Manager commands. */ {"get-manager", 0, 0, pre_manager, cmd_get_manager, NULL, "", RO}, {"del-manager", 0, 0, pre_manager, cmd_del_manager, NULL, "", RW}, {"set-manager", 1, INT_MAX, pre_manager, cmd_set_manager, NULL, "", RW}, /* SSL commands. */ {"get-ssl", 0, 0, pre_cmd_get_ssl, cmd_get_ssl, NULL, "", RO}, {"del-ssl", 0, 0, pre_cmd_del_ssl, cmd_del_ssl, NULL, "", RW}, {"set-ssl", 3, 3, pre_cmd_set_ssl, cmd_set_ssl, NULL, "--bootstrap", RW}, /* Switch commands. */ {"emer-reset", 0, 0, pre_cmd_emer_reset, cmd_emer_reset, NULL, "", RW}, /* Database commands. */ {"comment", 0, INT_MAX, NULL, NULL, NULL, "", RO}, {"get", 2, INT_MAX, pre_cmd_get, cmd_get, NULL, "--if-exists,--id=", RO}, {"list", 1, INT_MAX, pre_cmd_list, cmd_list, NULL, "--if-exists,--columns=", RO}, {"find", 1, INT_MAX, pre_cmd_find, cmd_find, NULL, "--columns=", RO}, {"set", 3, INT_MAX, pre_cmd_set, cmd_set, NULL, "--if-exists", RW}, {"add", 4, INT_MAX, pre_cmd_add, cmd_add, NULL, "--if-exists", RW}, {"remove", 4, INT_MAX, pre_cmd_remove, cmd_remove, NULL, "--if-exists", RW}, {"clear", 3, INT_MAX, pre_cmd_clear, cmd_clear, NULL, "--if-exists", RW}, {"create", 2, INT_MAX, pre_create, cmd_create, post_create, "--id=", RW}, {"destroy", 1, INT_MAX, pre_cmd_destroy, cmd_destroy, NULL, "--if-exists,--all", RW}, {"wait-until", 2, INT_MAX, pre_cmd_wait_until, cmd_wait_until, NULL, "", RO}, {NULL, 0, 0, NULL, NULL, NULL, NULL, RO}, }; static const struct vsctl_command_syntax *get_all_commands(void) { return all_commands; } openvswitch-2.0.1+git20140120/vswitchd/000077500000000000000000000000001226605124000173055ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/vswitchd/.gitignore000066400000000000000000000001461226605124000212760ustar00rootroot00000000000000/Makefile /Makefile.in /ovs-vswitchd /ovs-vswitchd.8 /ovs-vswitchd.conf.db.5 /vswitch.ovsschema.stamp openvswitch-2.0.1+git20140120/vswitchd/INTERNALS000066400000000000000000000252741226605124000206010ustar00rootroot00000000000000 ======================== ovs-vswitchd Internals ======================== This document describes some of the internals of the ovs-vswitchd process. It is not complete. It tends to be updated on demand, so if you have questions about the vswitchd implementation, ask them and perhaps we'll add some appropriate documentation here. Most of the ovs-vswitchd implementation is in vswitchd/bridge.c, so code references below should be assumed to refer to that file except as otherwise specified. Bonding ======= Bonding allows two or more interfaces (the "slaves") to share network traffic. From a high-level point of view, bonded interfaces act like a single port, but they have the bandwidth of multiple network devices, e.g. two 1 GB physical interfaces act like a single 2 GB interface. Bonds also increase robustness: the bonded port does not go down as long as at least one of its slaves is up. In vswitchd, a bond always has at least two slaves (and may have more). If a configuration error, etc. would cause a bond to have only one slave, the port becomes an ordinary port, not a bonded port, and none of the special features of bonded ports described in this section apply. There are many forms of bonding of which ovs-vswitchd implements only a few. The most complex bond ovs-vswitchd implements is called "source load balancing" or SLB bonding. SLB bonding divides traffic among the slaves based on the Ethernet source address. This is useful only if the traffic over the bond has multiple Ethernet source addresses, for example if network traffic from multiple VMs are multiplexed over the bond. Enabling and Disabling Slaves ----------------------------- When a bond is created, a slave is initially enabled or disabled based on whether carrier is detected on the NIC (see iface_create()). After that, a slave is disabled if its carrier goes down for a period of time longer than the downdelay, and it is enabled if carrier comes up for longer than the updelay (see bond_link_status_update()). There is one exception where the updelay is skipped: if no slaves at all are currently enabled, then the first slave on which carrier comes up is enabled immediately. The updelay should be set to a time longer than the STP forwarding delay of the physical switch to which the bond port is connected (if STP is enabled on that switch). Otherwise, the slave will be enabled, and load may be shifted to it, before the physical switch starts forwarding packets on that port, which can cause some data to be "blackholed" for a time. The exception for a single enabled slave does not cause any problem in this regard because when no slaves are enabled all output packets are blackholed anyway. When a slave becomes disabled, the vswitch immediately chooses a new output port for traffic that was destined for that slave (see bond_enable_slave()). It also sends a "gratuitous learning packet", specifically a RARP, on the bond port (on the newly chosen slave) for each MAC address that the vswitch has learned on a port other than the bond (see bond_send_learning_packets()), to teach the physical switch that the new slave should be used in place of the one that is now disabled. (This behavior probably makes sense only for a vswitch that has only one port (the bond) connected to a physical switch; vswitchd should probably provide a way to disable or configure it in other scenarios.) Bond Packet Input ----------------- Bonding accepts unicast packets on any bond slave. This can occasionally cause packet duplication for the first few packets sent to a given MAC, if the physical switch attached to the bond is flooding packets to that MAC because it has not yet learned the correct slave for that MAC. Bonding only accepts multicast (and broadcast) packets on a single bond slave (the "active slave") at any given time. Multicast packets received on other slaves are dropped. Otherwise, every multicast packet would be duplicated, once for every bond slave, because the physical switch attached to the bond will flood those packets. Bonding also drops received packets when the vswitch has learned that the packet's MAC is on a port other than the bond port itself. This is because it is likely that the vswitch itself sent the packet out the bond port on a different slave and is now receiving the packet back. This occurs when the packet is multicast or the physical switch has not yet learned the MAC and is flooding it. However, the vswitch makes an exception to this rule for broadcast ARP replies, which indicate that the MAC has moved to another switch, probably due to VM migration. (ARP replies are normally unicast, so this exception does not match normal ARP replies. It will match the learning packets sent on bond fail-over.) The active slave is simply the first slave to be enabled after the bond is created (see bond_choose_active_iface()). If the active slave is disabled, then a new active slave is chosen among the slaves that remain active. Currently due to the way that configuration works, this tends to be the remaining slave whose interface name is first alphabetically, but this is by no means guaranteed. Bond Packet Output ------------------ When a packet is sent out a bond port, the bond slave actually used is selected based on the packet's source MAC and VLAN tag (see choose_output_iface()). In particular, the source MAC and VLAN tag are hashed into one of 256 values, and that value is looked up in a hash table (the "bond hash") kept in the "bond_hash" member of struct port. The hash table entry identifies a bond slave. If no bond slave has yet been chosen for that hash table entry, vswitchd chooses one arbitrarily. Every 10 seconds, vswitchd rebalances the bond slaves (see bond_rebalance_port()). To rebalance, vswitchd examines the statistics for the number of bytes transmitted by each slave over approximately the past minute, with data sent more recently weighted more heavily than data sent less recently. It considers each of the slaves in order from most-loaded to least-loaded. If highly loaded slave H is significantly more heavily loaded than the least-loaded slave L, and slave H carries at least two hashes, then vswitchd shifts one of H's hashes to L. However, vswitchd will only shift a hash from H to L if it will decrease the ratio of the load between H and L by at least 0.1. Currently, "significantly more loaded" means that H must carry at least 1 Mbps more traffic, and that traffic must be at least 3% greater than L's. Bond Balance Modes ------------------ Each bond balancing mode has different considerations, described below. LACP Bonding ------------ LACP bonding requires the remote switch to implement LACP, but it is otherwise very simple in that, after LACP negotiation is complete, there is no need for special handling of received packets. Active Backup Bonding --------------------- Active Backup bonds send all traffic out one "active" slave until that slave becomes unavailable. Since they are significantly less complicated than SLB bonds, they are preferred when LACP is not an option. Additionally, they are the only bond mode which supports attaching each slave to a different upstream switch. SLB Bonding ----------- SLB bonding allows a limited form of load balancing without the remote switch's knowledge or cooperation. The basics of SLB are simple. SLB assigns each source MAC+VLAN pair to a link and transmits all packets from that MAC+VLAN through that link. Learning in the remote switch causes it to send packets to that MAC+VLAN through the same link. SLB bonding has the following complications: 0. When the remote switch has not learned the MAC for the destination of a unicast packet and hence floods the packet to all of the links on the SLB bond, Open vSwitch will forward duplicate packets, one per link, to each other switch port. Open vSwitch does not solve this problem. 1. When the remote switch receives a multicast or broadcast packet from a port not on the SLB bond, it will forward it to all of the links in the SLB bond. This would cause packet duplication if not handled specially. Open vSwitch avoids packet duplication by accepting multicast and broadcast packets on only the active slave, and dropping multicast and broadcast packets on all other slaves. 2. When Open vSwitch forwards a multicast or broadcast packet to a link in the SLB bond other than the active slave, the remote switch will forward it to all of the other links in the SLB bond, including the active slave. Without special handling, this would mean that Open vSwitch would forward a second copy of the packet to each switch port (other than the bond), including the port that originated the packet. Open vSwitch deals with this case by dropping packets received on any SLB bonded link that have a source MAC+VLAN that has been learned on any other port. (This means that SLB as implemented in Open vSwitch relies critically on MAC learning. Notably, SLB is incompatible with the "flood_vlans" feature.) 3. Suppose that a MAC+VLAN moves to an SLB bond from another port (e.g. when a VM is migrated from this hypervisor to a different one). Without additional special handling, Open vSwitch will not notice until the MAC learning entry expires, up to 60 seconds later as a consequence of rule #2. Open vSwitch avoids a 60-second delay by listening for gratuitous ARPs, which VMs commonly emit upon migration. As an exception to rule #2, a gratuitous ARP received on an SLB bond is not dropped and updates the MAC learning table in the usual way. (If a move does not trigger a gratuitous ARP, or if the gratuitous ARP is lost in the network, then a 60-second delay still occurs.) 4. Suppose that a MAC+VLAN moves from an SLB bond to another port (e.g. when a VM is migrated from a different hypervisor to this one), that the MAC+VLAN emits a gratuitous ARP, and that Open vSwitch forwards that gratuitous ARP to a link in the SLB bond other than the active slave. The remote switch will forward the gratuitous ARP to all of the other links in the SLB bond, including the active slave. Without additional special handling, this would mean that Open vSwitch would learn that the MAC+VLAN was located on the SLB bond, as a consequence of rule #3. Open vSwitch avoids this problem by "locking" the MAC learning table entry for a MAC+VLAN from which a gratuitous ARP was received from a non-SLB bond port. For 5 seconds, a locked MAC learning table entry will not be updated based on a gratuitous ARP received on a SLB bond. openvswitch-2.0.1+git20140120/vswitchd/automake.mk000066400000000000000000000061211226605124000214440ustar00rootroot00000000000000sbin_PROGRAMS += vswitchd/ovs-vswitchd man_MANS += vswitchd/ovs-vswitchd.8 DISTCLEANFILES += \ vswitchd/ovs-vswitchd.8 vswitchd_ovs_vswitchd_SOURCES = \ vswitchd/bridge.c \ vswitchd/bridge.h \ vswitchd/ovs-vswitchd.c \ vswitchd/system-stats.c \ vswitchd/system-stats.h \ vswitchd/xenserver.c \ vswitchd/xenserver.h vswitchd_ovs_vswitchd_LDADD = \ ofproto/libofproto.a \ lib/libsflow.a \ lib/libopenvswitch.a \ $(SSL_LIBS) EXTRA_DIST += vswitchd/INTERNALS MAN_ROOTS += vswitchd/ovs-vswitchd.8.in # vswitch schema and IDL EXTRA_DIST += vswitchd/vswitch.ovsschema pkgdata_DATA += vswitchd/vswitch.ovsschema # vswitch E-R diagram # # There are two complications here. First, if "python" or "dot" is not # available, then we have to just use the existing diagram. Second, different # "dot" versions produce slightly different output for the same input, but we # don't want to gratuitously change vswitch.pic if someone tweaks the schema in # some minor way that doesn't affect the table structure. To avoid that we # store a checksum of vswitch.gv in vswitch.pic and only regenerate vswitch.pic # if vswitch.gv actually changes. $(srcdir)/vswitchd/vswitch.gv: ovsdb/ovsdb-dot.in vswitchd/vswitch.ovsschema if HAVE_PYTHON $(OVSDB_DOT) $(srcdir)/vswitchd/vswitch.ovsschema > $@ else touch $@ endif $(srcdir)/vswitchd/vswitch.pic: $(srcdir)/vswitchd/vswitch.gv ovsdb/dot2pic if HAVE_DOT sum=`cksum < $(srcdir)/vswitchd/vswitch.gv`; \ if grep "$$sum" $@ >/dev/null 2>&1; then \ echo "vswitch.gv unchanged, not regenerating vswitch.pic"; \ touch $@; \ else \ echo "regenerating vswitch.pic"; \ (echo ".\\\" Generated from vswitch.gv with cksum \"$$sum\""; \ dot -T plain < $(srcdir)/vswitchd/vswitch.gv \ | $(srcdir)/ovsdb/dot2pic) > $@; \ fi else touch $@ endif EXTRA_DIST += vswitchd/vswitch.gv vswitchd/vswitch.pic # vswitch schema documentation EXTRA_DIST += vswitchd/vswitch.xml DISTCLEANFILES += $(srcdir)/vswitchd/ovs-vswitchd.conf.db.5 dist_man_MANS += vswitchd/ovs-vswitchd.conf.db.5 $(srcdir)/vswitchd/ovs-vswitchd.conf.db.5: \ ovsdb/ovsdb-doc vswitchd/vswitch.xml vswitchd/vswitch.ovsschema \ $(srcdir)/vswitchd/vswitch.pic $(OVSDB_DOC) \ --title="ovs-vswitchd.conf.db" \ --er-diagram=$(srcdir)/vswitchd/vswitch.pic \ --version=$(VERSION) \ $(srcdir)/vswitchd/vswitch.ovsschema \ $(srcdir)/vswitchd/vswitch.xml > $@.tmp mv $@.tmp $@ # Version checking for vswitch.ovsschema. ALL_LOCAL += vswitchd/vswitch.ovsschema.stamp vswitchd/vswitch.ovsschema.stamp: vswitchd/vswitch.ovsschema @sum=`sed '/cksum/d' $? | cksum`; \ expected=`sed -n 's/.*"cksum": "\(.*\)".*/\1/p' $?`; \ if test "X$$sum" = "X$$expected"; then \ touch $@; \ else \ ln=`sed -n '/"cksum":/=' $?`; \ echo >&2 "$?:$$ln: checksum \"$$sum\" does not match (you should probably update the version number and fix the checksum)"; \ exit 1; \ fi CLEANFILES += vswitchd/vswitch.ovsschema.stamp # Clean up generated files from older OVS versions. (This is important so that # #include "vswitch-idl.h" doesn't get the wrong copy.) CLEANFILES += vswitchd/vswitch-idl.c vswitchd/vswitch-idl.h openvswitch-2.0.1+git20140120/vswitchd/bridge.c000066400000000000000000004230761226605124000207210ustar00rootroot00000000000000/* Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "bridge.h" #include #include #include #include "async-append.h" #include "bfd.h" #include "bitmap.h" #include "bond.h" #include "cfm.h" #include "coverage.h" #include "daemon.h" #include "dirs.h" #include "dynamic-string.h" #include "hash.h" #include "hmap.h" #include "hmapx.h" #include "jsonrpc.h" #include "lacp.h" #include "list.h" #include "mac-learning.h" #include "meta-flow.h" #include "netdev.h" #include "ofp-print.h" #include "ofp-util.h" #include "ofpbuf.h" #include "ofproto/ofproto.h" #include "poll-loop.h" #include "sha1.h" #include "shash.h" #include "smap.h" #include "socket-util.h" #include "stream.h" #include "stream-ssl.h" #include "sset.h" #include "system-stats.h" #include "timeval.h" #include "util.h" #include "unixctl.h" #include "vlandev.h" #include "lib/vswitch-idl.h" #include "xenserver.h" #include "vlog.h" #include "sflow_api.h" #include "vlan-bitmap.h" VLOG_DEFINE_THIS_MODULE(bridge); COVERAGE_DEFINE(bridge_reconfigure); /* Configuration of an uninstantiated iface. */ struct if_cfg { struct hmap_node hmap_node; /* Node in bridge's if_cfg_todo. */ const struct ovsrec_interface *cfg; /* Interface record. */ const struct ovsrec_port *parent; /* Parent port record. */ ofp_port_t ofport; /* Requested OpenFlow port number. */ }; /* OpenFlow port slated for removal from ofproto. */ struct ofpp_garbage { struct list list_node; /* Node in bridge's ofpp_garbage. */ ofp_port_t ofp_port; /* Port to be deleted. */ }; struct iface { /* These members are always valid. */ struct list port_elem; /* Element in struct port's "ifaces" list. */ struct hmap_node name_node; /* In struct bridge's "iface_by_name" hmap. */ struct port *port; /* Containing port. */ char *name; /* Host network device name. */ /* These members are valid only after bridge_reconfigure() causes them to * be initialized. */ struct hmap_node ofp_port_node; /* In struct bridge's "ifaces" hmap. */ ofp_port_t ofp_port; /* OpenFlow port number, */ /* OFPP_NONE if unknown. */ struct netdev *netdev; /* Network device. */ const char *type; /* Usually same as cfg->type. */ const struct ovsrec_interface *cfg; }; struct mirror { struct uuid uuid; /* UUID of this "mirror" record in database. */ struct hmap_node hmap_node; /* In struct bridge's "mirrors" hmap. */ struct bridge *bridge; char *name; const struct ovsrec_mirror *cfg; }; struct port { struct hmap_node hmap_node; /* Element in struct bridge's "ports" hmap. */ struct bridge *bridge; char *name; const struct ovsrec_port *cfg; /* An ordinary bridge port has 1 interface. * A bridge port for bonding has at least 2 interfaces. */ struct list ifaces; /* List of "struct iface"s. */ }; struct bridge { struct hmap_node node; /* In 'all_bridges'. */ char *name; /* User-specified arbitrary name. */ char *type; /* Datapath type. */ uint8_t ea[ETH_ADDR_LEN]; /* Bridge Ethernet Address. */ uint8_t default_ea[ETH_ADDR_LEN]; /* Default MAC. */ const struct ovsrec_bridge *cfg; /* OpenFlow switch processing. */ struct ofproto *ofproto; /* OpenFlow switch. */ /* Bridge ports. */ struct hmap ports; /* "struct port"s indexed by name. */ struct hmap ifaces; /* "struct iface"s indexed by ofp_port. */ struct hmap iface_by_name; /* "struct iface"s indexed by name. */ struct list ofpp_garbage; /* "struct ofpp_garbage" slated for removal. */ struct hmap if_cfg_todo; /* "struct if_cfg"s slated for creation. Indexed on 'cfg->name'. */ /* Port mirroring. */ struct hmap mirrors; /* "struct mirror" indexed by UUID. */ /* Synthetic local port if necessary. */ struct ovsrec_port synth_local_port; struct ovsrec_interface synth_local_iface; struct ovsrec_interface *synth_local_ifacep; }; /* All bridges, indexed by name. */ static struct hmap all_bridges = HMAP_INITIALIZER(&all_bridges); /* OVSDB IDL used to obtain configuration. */ static struct ovsdb_idl *idl; /* We want to complete daemonization, fully detaching from our parent process, * only after we have completed our initial configuration, committed our state * to the database, and received confirmation back from the database server * that it applied the commit. This allows our parent process to know that, * post-detach, ephemeral fields such as datapath-id and ofport are very likely * to have already been filled in. (It is only "very likely" rather than * certain because there is always a slim possibility that the transaction will * fail or that some other client has added new bridges, ports, etc. while * ovs-vswitchd was configuring using an old configuration.) * * We only need to do this once for our initial configuration at startup, so * 'initial_config_done' tracks whether we've already done it. While we are * waiting for a response to our commit, 'daemonize_txn' tracks the transaction * itself and is otherwise NULL. */ static bool initial_config_done; static struct ovsdb_idl_txn *daemonize_txn; /* Most recently processed IDL sequence number. */ static unsigned int idl_seqno; /* Each time this timer expires, the bridge fetches interface and mirror * statistics and pushes them into the database. */ #define IFACE_STATS_INTERVAL (5 * 1000) /* In milliseconds. */ static long long int iface_stats_timer = LLONG_MIN; /* In some datapaths, creating and destroying OpenFlow ports can be extremely * expensive. This can cause bridge_reconfigure() to take a long time during * which no other work can be done. To deal with this problem, we limit port * adds and deletions to a window of OFP_PORT_ACTION_WINDOW milliseconds per * call to bridge_reconfigure(). If there is more work to do after the limit * is reached, 'need_reconfigure', is flagged and it's done on the next loop. * This allows the rest of the code to catch up on important things like * forwarding packets. */ #define OFP_PORT_ACTION_WINDOW 10 static bool reconfiguring = false; static void add_del_bridges(const struct ovsrec_open_vswitch *); static void bridge_update_ofprotos(void); static void bridge_create(const struct ovsrec_bridge *); static void bridge_destroy(struct bridge *); static struct bridge *bridge_lookup(const char *name); static unixctl_cb_func bridge_unixctl_dump_flows; static unixctl_cb_func bridge_unixctl_reconnect; static size_t bridge_get_controllers(const struct bridge *br, struct ovsrec_controller ***controllersp); static void bridge_add_del_ports(struct bridge *, const unsigned long int *splinter_vlans); static void bridge_refresh_ofp_port(struct bridge *); static void bridge_configure_flow_miss_model(const char *opt); static void bridge_configure_datapath_id(struct bridge *); static void bridge_configure_netflow(struct bridge *); static void bridge_configure_forward_bpdu(struct bridge *); static void bridge_configure_mac_table(struct bridge *); static void bridge_configure_sflow(struct bridge *, int *sflow_bridge_number); static void bridge_configure_ipfix(struct bridge *); static void bridge_configure_stp(struct bridge *); static void bridge_configure_tables(struct bridge *); static void bridge_configure_dp_desc(struct bridge *); static void bridge_configure_remotes(struct bridge *, const struct sockaddr_in *managers, size_t n_managers); static void bridge_pick_local_hw_addr(struct bridge *, uint8_t ea[ETH_ADDR_LEN], struct iface **hw_addr_iface); static uint64_t bridge_pick_datapath_id(struct bridge *, const uint8_t bridge_ea[ETH_ADDR_LEN], struct iface *hw_addr_iface); static void bridge_queue_if_cfg(struct bridge *, const struct ovsrec_interface *, const struct ovsrec_port *); static uint64_t dpid_from_hash(const void *, size_t nbytes); static bool bridge_has_bond_fake_iface(const struct bridge *, const char *name); static bool port_is_bond_fake_iface(const struct port *); static unixctl_cb_func qos_unixctl_show; static struct port *port_create(struct bridge *, const struct ovsrec_port *); static void port_del_ifaces(struct port *); static void port_destroy(struct port *); static struct port *port_lookup(const struct bridge *, const char *name); static void port_configure(struct port *); static struct lacp_settings *port_configure_lacp(struct port *, struct lacp_settings *); static void port_configure_bond(struct port *, struct bond_settings *); static bool port_is_synthetic(const struct port *); static void reconfigure_system_stats(const struct ovsrec_open_vswitch *); static void run_system_stats(void); static void bridge_configure_mirrors(struct bridge *); static struct mirror *mirror_create(struct bridge *, const struct ovsrec_mirror *); static void mirror_destroy(struct mirror *); static bool mirror_configure(struct mirror *); static void mirror_refresh_stats(struct mirror *); static void iface_configure_lacp(struct iface *, struct lacp_slave_settings *); static bool iface_create(struct bridge *, struct if_cfg *, ofp_port_t ofp_port); static bool iface_is_internal(const struct ovsrec_interface *iface, const struct ovsrec_bridge *br); static const char *iface_get_type(const struct ovsrec_interface *, const struct ovsrec_bridge *); static void iface_destroy(struct iface *); static struct iface *iface_lookup(const struct bridge *, const char *name); static struct iface *iface_find(const char *name); static struct if_cfg *if_cfg_lookup(const struct bridge *, const char *name); static struct iface *iface_from_ofp_port(const struct bridge *, ofp_port_t ofp_port); static void iface_set_mac(struct iface *); static void iface_set_ofport(const struct ovsrec_interface *, ofp_port_t ofport); static void iface_clear_db_record(const struct ovsrec_interface *if_cfg); static void iface_configure_qos(struct iface *, const struct ovsrec_qos *); static void iface_configure_cfm(struct iface *); static void iface_refresh_cfm_stats(struct iface *); static void iface_refresh_stats(struct iface *); static void iface_refresh_status(struct iface *); static bool iface_is_synthetic(const struct iface *); static ofp_port_t iface_pick_ofport(const struct ovsrec_interface *); /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) * * This is deprecated. It is only for compatibility with broken device drivers * in old versions of Linux that do not properly support VLANs when VLAN * devices are not used. When broken device drivers are no longer in * widespread use, we will delete these interfaces. */ /* True if VLAN splinters are enabled on any interface, false otherwise.*/ static bool vlan_splinters_enabled_anywhere; static bool vlan_splinters_is_enabled(const struct ovsrec_interface *); static unsigned long int *collect_splinter_vlans( const struct ovsrec_open_vswitch *); static void configure_splinter_port(struct port *); static void add_vlan_splinter_ports(struct bridge *, const unsigned long int *splinter_vlans, struct shash *ports); static void bridge_init_ofproto(const struct ovsrec_open_vswitch *cfg) { struct shash iface_hints; static bool initialized = false; int i; if (initialized) { return; } shash_init(&iface_hints); if (cfg) { for (i = 0; i < cfg->n_bridges; i++) { const struct ovsrec_bridge *br_cfg = cfg->bridges[i]; int j; for (j = 0; j < br_cfg->n_ports; j++) { struct ovsrec_port *port_cfg = br_cfg->ports[j]; int k; for (k = 0; k < port_cfg->n_interfaces; k++) { struct ovsrec_interface *if_cfg = port_cfg->interfaces[k]; struct iface_hint *iface_hint; iface_hint = xmalloc(sizeof *iface_hint); iface_hint->br_name = br_cfg->name; iface_hint->br_type = br_cfg->datapath_type; iface_hint->ofp_port = iface_pick_ofport(if_cfg); shash_add(&iface_hints, if_cfg->name, iface_hint); } } } } ofproto_init(&iface_hints); shash_destroy_free_data(&iface_hints); initialized = true; } /* Public functions. */ /* Initializes the bridge module, configuring it to obtain its configuration * from an OVSDB server accessed over 'remote', which should be a string in a * form acceptable to ovsdb_idl_create(). */ void bridge_init(const char *remote) { /* Create connection to database. */ idl = ovsdb_idl_create(remote, &ovsrec_idl_class, true, true); idl_seqno = ovsdb_idl_get_seqno(idl); ovsdb_idl_set_lock(idl, "ovs_vswitchd"); ovsdb_idl_verify_write_only(idl); ovsdb_idl_omit_alert(idl, &ovsrec_open_vswitch_col_cur_cfg); ovsdb_idl_omit_alert(idl, &ovsrec_open_vswitch_col_statistics); ovsdb_idl_omit(idl, &ovsrec_open_vswitch_col_external_ids); ovsdb_idl_omit(idl, &ovsrec_open_vswitch_col_ovs_version); ovsdb_idl_omit(idl, &ovsrec_open_vswitch_col_db_version); ovsdb_idl_omit(idl, &ovsrec_open_vswitch_col_system_type); ovsdb_idl_omit(idl, &ovsrec_open_vswitch_col_system_version); ovsdb_idl_omit_alert(idl, &ovsrec_bridge_col_datapath_id); ovsdb_idl_omit_alert(idl, &ovsrec_bridge_col_status); ovsdb_idl_omit(idl, &ovsrec_bridge_col_external_ids); ovsdb_idl_omit_alert(idl, &ovsrec_port_col_status); ovsdb_idl_omit_alert(idl, &ovsrec_port_col_statistics); ovsdb_idl_omit(idl, &ovsrec_port_col_external_ids); ovsdb_idl_omit(idl, &ovsrec_port_col_fake_bridge); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_admin_state); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_duplex); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_link_speed); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_link_state); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_link_resets); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_mac_in_use); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_ifindex); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_mtu); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_ofport); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_statistics); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_status); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_fault); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_fault_status); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_remote_mpids); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_health); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_cfm_remote_opstate); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_bfd_status); ovsdb_idl_omit_alert(idl, &ovsrec_interface_col_lacp_current); ovsdb_idl_omit(idl, &ovsrec_interface_col_external_ids); ovsdb_idl_omit_alert(idl, &ovsrec_controller_col_is_connected); ovsdb_idl_omit_alert(idl, &ovsrec_controller_col_role); ovsdb_idl_omit_alert(idl, &ovsrec_controller_col_status); ovsdb_idl_omit(idl, &ovsrec_controller_col_external_ids); ovsdb_idl_omit(idl, &ovsrec_qos_col_external_ids); ovsdb_idl_omit(idl, &ovsrec_queue_col_external_ids); ovsdb_idl_omit(idl, &ovsrec_mirror_col_external_ids); ovsdb_idl_omit_alert(idl, &ovsrec_mirror_col_statistics); ovsdb_idl_omit(idl, &ovsrec_netflow_col_external_ids); ovsdb_idl_omit(idl, &ovsrec_sflow_col_external_ids); ovsdb_idl_omit(idl, &ovsrec_ipfix_col_external_ids); ovsdb_idl_omit(idl, &ovsrec_flow_sample_collector_set_col_external_ids); ovsdb_idl_omit(idl, &ovsrec_manager_col_external_ids); ovsdb_idl_omit(idl, &ovsrec_manager_col_inactivity_probe); ovsdb_idl_omit(idl, &ovsrec_manager_col_is_connected); ovsdb_idl_omit(idl, &ovsrec_manager_col_max_backoff); ovsdb_idl_omit(idl, &ovsrec_manager_col_status); ovsdb_idl_omit(idl, &ovsrec_ssl_col_external_ids); /* Register unixctl commands. */ unixctl_command_register("qos/show", "interface", 1, 1, qos_unixctl_show, NULL); unixctl_command_register("bridge/dump-flows", "bridge", 1, 1, bridge_unixctl_dump_flows, NULL); unixctl_command_register("bridge/reconnect", "[bridge]", 0, 1, bridge_unixctl_reconnect, NULL); lacp_init(); bond_init(); cfm_init(); stp_init(); } void bridge_exit(void) { struct bridge *br, *next_br; HMAP_FOR_EACH_SAFE (br, next_br, node, &all_bridges) { bridge_destroy(br); } ovsdb_idl_destroy(idl); } /* Looks at the list of managers in 'ovs_cfg' and extracts their remote IP * addresses and ports into '*managersp' and '*n_managersp'. The caller is * responsible for freeing '*managersp' (with free()). * * You may be asking yourself "why does ovs-vswitchd care?", because * ovsdb-server is responsible for connecting to the managers, and ovs-vswitchd * should not be and in fact is not directly involved in that. But * ovs-vswitchd needs to make sure that ovsdb-server can reach the managers, so * it has to tell in-band control where the managers are to enable that. * (Thus, only managers connected in-band are collected.) */ static void collect_in_band_managers(const struct ovsrec_open_vswitch *ovs_cfg, struct sockaddr_in **managersp, size_t *n_managersp) { struct sockaddr_in *managers = NULL; size_t n_managers = 0; struct sset targets; size_t i; /* Collect all of the potential targets from the "targets" columns of the * rows pointed to by "manager_options", excluding any that are * out-of-band. */ sset_init(&targets); for (i = 0; i < ovs_cfg->n_manager_options; i++) { struct ovsrec_manager *m = ovs_cfg->manager_options[i]; if (m->connection_mode && !strcmp(m->connection_mode, "out-of-band")) { sset_find_and_delete(&targets, m->target); } else { sset_add(&targets, m->target); } } /* Now extract the targets' IP addresses. */ if (!sset_is_empty(&targets)) { const char *target; managers = xmalloc(sset_count(&targets) * sizeof *managers); SSET_FOR_EACH (target, &targets) { struct sockaddr_in *sin = &managers[n_managers]; if (stream_parse_target_with_default_ports(target, JSONRPC_TCP_PORT, JSONRPC_SSL_PORT, sin)) { n_managers++; } } } sset_destroy(&targets); *managersp = managers; *n_managersp = n_managers; } static void bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) { unsigned long int *splinter_vlans; struct bridge *br; COVERAGE_INC(bridge_reconfigure); ovs_assert(!reconfiguring); reconfiguring = true; ofproto_set_flow_eviction_threshold( smap_get_int(&ovs_cfg->other_config, "flow-eviction-threshold", OFPROTO_FLOW_EVICTION_THRESHOLD_DEFAULT)); ofproto_set_n_handler_threads( smap_get_int(&ovs_cfg->other_config, "n-handler-threads", 0)); bridge_configure_flow_miss_model(smap_get(&ovs_cfg->other_config, "force-miss-model")); /* Destroy "struct bridge"s, "struct port"s, and "struct iface"s according * to 'ovs_cfg' while update the "if_cfg_queue", with only very minimal * configuration otherwise. * * This is mostly an update to bridge data structures. Nothing is pushed * down to ofproto or lower layers. */ add_del_bridges(ovs_cfg); splinter_vlans = collect_splinter_vlans(ovs_cfg); HMAP_FOR_EACH (br, node, &all_bridges) { bridge_add_del_ports(br, splinter_vlans); } free(splinter_vlans); /* Delete datapaths that are no longer configured, and create ones which * don't exist but should. */ bridge_update_ofprotos(); /* Make sure each "struct iface" has a correct ofp_port in its ofproto. */ HMAP_FOR_EACH (br, node, &all_bridges) { bridge_refresh_ofp_port(br); } /* Clear database records for "if_cfg"s which haven't been instantiated. */ HMAP_FOR_EACH (br, node, &all_bridges) { struct if_cfg *if_cfg; HMAP_FOR_EACH (if_cfg, hmap_node, &br->if_cfg_todo) { iface_clear_db_record(if_cfg->cfg); } } reconfigure_system_stats(ovs_cfg); } static bool bridge_reconfigure_ofp(void) { long long int deadline; struct bridge *br; time_refresh(); deadline = time_msec() + OFP_PORT_ACTION_WINDOW; /* The kernel will reject any attempt to add a given port to a datapath if * that port already belongs to a different datapath, so we must do all * port deletions before any port additions. */ HMAP_FOR_EACH (br, node, &all_bridges) { struct ofpp_garbage *garbage, *next; LIST_FOR_EACH_SAFE (garbage, next, list_node, &br->ofpp_garbage) { /* It's a bit dangerous to call bridge_run_fast() here as ofproto's * internal datastructures may not be consistent. Eventually, when * port additions and deletions are cheaper, these calls should be * removed. */ bridge_run_fast(); ofproto_port_del(br->ofproto, garbage->ofp_port); list_remove(&garbage->list_node); free(garbage); time_refresh(); if (time_msec() >= deadline) { return false; } bridge_run_fast(); } } HMAP_FOR_EACH (br, node, &all_bridges) { struct if_cfg *if_cfg, *next; HMAP_FOR_EACH_SAFE (if_cfg, next, hmap_node, &br->if_cfg_todo) { iface_create(br, if_cfg, OFPP_NONE); time_refresh(); if (time_msec() >= deadline) { return false; } } } return true; } static bool bridge_reconfigure_continue(const struct ovsrec_open_vswitch *ovs_cfg) { struct sockaddr_in *managers; int sflow_bridge_number; size_t n_managers; struct bridge *br; bool done; ovs_assert(reconfiguring); done = bridge_reconfigure_ofp(); /* Complete the configuration. */ sflow_bridge_number = 0; collect_in_band_managers(ovs_cfg, &managers, &n_managers); HMAP_FOR_EACH (br, node, &all_bridges) { struct port *port; /* We need the datapath ID early to allow LACP ports to use it as the * default system ID. */ bridge_configure_datapath_id(br); HMAP_FOR_EACH (port, hmap_node, &br->ports) { struct iface *iface; port_configure(port); LIST_FOR_EACH (iface, port_elem, &port->ifaces) { iface_configure_cfm(iface); iface_configure_qos(iface, port->cfg->qos); iface_set_mac(iface); ofproto_port_set_bfd(br->ofproto, iface->ofp_port, &iface->cfg->bfd); } } bridge_configure_mirrors(br); bridge_configure_forward_bpdu(br); bridge_configure_mac_table(br); bridge_configure_remotes(br, managers, n_managers); bridge_configure_netflow(br); bridge_configure_sflow(br, &sflow_bridge_number); bridge_configure_ipfix(br); bridge_configure_stp(br); bridge_configure_tables(br); bridge_configure_dp_desc(br); if (smap_get(&br->cfg->other_config, "flow-eviction-threshold")) { /* XXX: Remove this warning message eventually. */ VLOG_WARN_ONCE("As of June 2013, flow-eviction-threshold has been" " moved to the Open_vSwitch table. Ignoring its" " setting in the bridge table."); } } free(managers); return done; } /* Delete ofprotos which aren't configured or have the wrong type. Create * ofprotos which don't exist but need to. */ static void bridge_update_ofprotos(void) { struct bridge *br, *next; struct sset names; struct sset types; const char *type; /* Delete ofprotos with no bridge or with the wrong type. */ sset_init(&names); sset_init(&types); ofproto_enumerate_types(&types); SSET_FOR_EACH (type, &types) { const char *name; ofproto_enumerate_names(type, &names); SSET_FOR_EACH (name, &names) { br = bridge_lookup(name); if (!br || strcmp(type, br->type)) { ofproto_delete(name, type); } } } sset_destroy(&names); sset_destroy(&types); /* Add ofprotos for bridges which don't have one yet. */ HMAP_FOR_EACH_SAFE (br, next, node, &all_bridges) { struct bridge *br2; int error; if (br->ofproto) { continue; } /* Remove ports from any datapath with the same name as 'br'. If we * don't do this, creating 'br''s ofproto will fail because a port with * the same name as its local port already exists. */ HMAP_FOR_EACH (br2, node, &all_bridges) { struct ofproto_port ofproto_port; if (!br2->ofproto) { continue; } if (!ofproto_port_query_by_name(br2->ofproto, br->name, &ofproto_port)) { error = ofproto_port_del(br2->ofproto, ofproto_port.ofp_port); if (error) { VLOG_ERR("failed to delete port %s: %s", ofproto_port.name, ovs_strerror(error)); } ofproto_port_destroy(&ofproto_port); } } error = ofproto_create(br->name, br->type, &br->ofproto); if (error) { VLOG_ERR("failed to create bridge %s: %s", br->name, ovs_strerror(error)); bridge_destroy(br); } } } static void port_configure(struct port *port) { const struct ovsrec_port *cfg = port->cfg; struct bond_settings bond_settings; struct lacp_settings lacp_settings; struct ofproto_bundle_settings s; struct iface *iface; if (cfg->vlan_mode && !strcmp(cfg->vlan_mode, "splinter")) { configure_splinter_port(port); return; } /* Get name. */ s.name = port->name; /* Get slaves. */ s.n_slaves = 0; s.slaves = xmalloc(list_size(&port->ifaces) * sizeof *s.slaves); LIST_FOR_EACH (iface, port_elem, &port->ifaces) { s.slaves[s.n_slaves++] = iface->ofp_port; } /* Get VLAN tag. */ s.vlan = -1; if (cfg->tag && *cfg->tag >= 0 && *cfg->tag <= 4095) { s.vlan = *cfg->tag; } /* Get VLAN trunks. */ s.trunks = NULL; if (cfg->n_trunks) { s.trunks = vlan_bitmap_from_array(cfg->trunks, cfg->n_trunks); } /* Get VLAN mode. */ if (cfg->vlan_mode) { if (!strcmp(cfg->vlan_mode, "access")) { s.vlan_mode = PORT_VLAN_ACCESS; } else if (!strcmp(cfg->vlan_mode, "trunk")) { s.vlan_mode = PORT_VLAN_TRUNK; } else if (!strcmp(cfg->vlan_mode, "native-tagged")) { s.vlan_mode = PORT_VLAN_NATIVE_TAGGED; } else if (!strcmp(cfg->vlan_mode, "native-untagged")) { s.vlan_mode = PORT_VLAN_NATIVE_UNTAGGED; } else { /* This "can't happen" because ovsdb-server should prevent it. */ VLOG_ERR("unknown VLAN mode %s", cfg->vlan_mode); s.vlan_mode = PORT_VLAN_TRUNK; } } else { if (s.vlan >= 0) { s.vlan_mode = PORT_VLAN_ACCESS; if (cfg->n_trunks) { VLOG_ERR("port %s: ignoring trunks in favor of implicit vlan", port->name); } } else { s.vlan_mode = PORT_VLAN_TRUNK; } } s.use_priority_tags = smap_get_bool(&cfg->other_config, "priority-tags", false); /* Get LACP settings. */ s.lacp = port_configure_lacp(port, &lacp_settings); if (s.lacp) { size_t i = 0; s.lacp_slaves = xmalloc(s.n_slaves * sizeof *s.lacp_slaves); LIST_FOR_EACH (iface, port_elem, &port->ifaces) { iface_configure_lacp(iface, &s.lacp_slaves[i++]); } } else { s.lacp_slaves = NULL; } /* Get bond settings. */ if (s.n_slaves > 1) { s.bond = &bond_settings; port_configure_bond(port, &bond_settings); } else { s.bond = NULL; LIST_FOR_EACH (iface, port_elem, &port->ifaces) { netdev_set_miimon_interval(iface->netdev, 0); } } /* Register. */ ofproto_bundle_register(port->bridge->ofproto, port, &s); /* Clean up. */ free(s.slaves); free(s.trunks); free(s.lacp_slaves); } static void bridge_configure_flow_miss_model(const char *opt) { enum ofproto_flow_miss_model model = OFPROTO_HANDLE_MISS_AUTO; if (opt) { if (!strcmp(opt, "with-facets")) { model = OFPROTO_HANDLE_MISS_WITH_FACETS; } else if (!strcmp(opt, "without-facets")) { model = OFPROTO_HANDLE_MISS_WITHOUT_FACETS; } } ofproto_set_flow_miss_model(model); } /* Pick local port hardware address and datapath ID for 'br'. */ static void bridge_configure_datapath_id(struct bridge *br) { uint8_t ea[ETH_ADDR_LEN]; uint64_t dpid; struct iface *local_iface; struct iface *hw_addr_iface; char *dpid_string; bridge_pick_local_hw_addr(br, ea, &hw_addr_iface); local_iface = iface_from_ofp_port(br, OFPP_LOCAL); if (local_iface) { int error = netdev_set_etheraddr(local_iface->netdev, ea); if (error) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_ERR_RL(&rl, "bridge %s: failed to set bridge " "Ethernet address: %s", br->name, ovs_strerror(error)); } } memcpy(br->ea, ea, ETH_ADDR_LEN); dpid = bridge_pick_datapath_id(br, ea, hw_addr_iface); if (dpid != ofproto_get_datapath_id(br->ofproto)) { VLOG_INFO("bridge %s: using datapath ID %016"PRIx64, br->name, dpid); ofproto_set_datapath_id(br->ofproto, dpid); } dpid_string = xasprintf("%016"PRIx64, dpid); ovsrec_bridge_set_datapath_id(br->cfg, dpid_string); free(dpid_string); } /* Returns a bitmap of "enum ofputil_protocol"s that are allowed for use with * 'br'. */ static uint32_t bridge_get_allowed_versions(struct bridge *br) { if (!br->cfg->n_protocols) return 0; return ofputil_versions_from_strings(br->cfg->protocols, br->cfg->n_protocols); } /* Set NetFlow configuration on 'br'. */ static void bridge_configure_netflow(struct bridge *br) { struct ovsrec_netflow *cfg = br->cfg->netflow; struct netflow_options opts; if (!cfg) { ofproto_set_netflow(br->ofproto, NULL); return; } memset(&opts, 0, sizeof opts); /* Get default NetFlow configuration from datapath. * Apply overrides from 'cfg'. */ ofproto_get_netflow_ids(br->ofproto, &opts.engine_type, &opts.engine_id); if (cfg->engine_type) { opts.engine_type = *cfg->engine_type; } if (cfg->engine_id) { opts.engine_id = *cfg->engine_id; } /* Configure active timeout interval. */ opts.active_timeout = cfg->active_timeout; if (!opts.active_timeout) { opts.active_timeout = -1; } else if (opts.active_timeout < 0) { VLOG_WARN("bridge %s: active timeout interval set to negative " "value, using default instead (%d seconds)", br->name, NF_ACTIVE_TIMEOUT_DEFAULT); opts.active_timeout = -1; } /* Add engine ID to interface number to disambiguate bridgs? */ opts.add_id_to_iface = cfg->add_id_to_interface; if (opts.add_id_to_iface) { if (opts.engine_id > 0x7f) { VLOG_WARN("bridge %s: NetFlow port mangling may conflict with " "another vswitch, choose an engine id less than 128", br->name); } if (hmap_count(&br->ports) > 508) { VLOG_WARN("bridge %s: NetFlow port mangling will conflict with " "another port when more than 508 ports are used", br->name); } } /* Collectors. */ sset_init(&opts.collectors); sset_add_array(&opts.collectors, cfg->targets, cfg->n_targets); /* Configure. */ if (ofproto_set_netflow(br->ofproto, &opts)) { VLOG_ERR("bridge %s: problem setting netflow collectors", br->name); } sset_destroy(&opts.collectors); } /* Set sFlow configuration on 'br'. */ static void bridge_configure_sflow(struct bridge *br, int *sflow_bridge_number) { const struct ovsrec_sflow *cfg = br->cfg->sflow; struct ovsrec_controller **controllers; struct ofproto_sflow_options oso; size_t n_controllers; size_t i; if (!cfg) { ofproto_set_sflow(br->ofproto, NULL); return; } memset(&oso, 0, sizeof oso); sset_init(&oso.targets); sset_add_array(&oso.targets, cfg->targets, cfg->n_targets); oso.sampling_rate = SFL_DEFAULT_SAMPLING_RATE; if (cfg->sampling) { oso.sampling_rate = *cfg->sampling; } oso.polling_interval = SFL_DEFAULT_POLLING_INTERVAL; if (cfg->polling) { oso.polling_interval = *cfg->polling; } oso.header_len = SFL_DEFAULT_HEADER_SIZE; if (cfg->header) { oso.header_len = *cfg->header; } oso.sub_id = (*sflow_bridge_number)++; oso.agent_device = cfg->agent; oso.control_ip = NULL; n_controllers = bridge_get_controllers(br, &controllers); for (i = 0; i < n_controllers; i++) { if (controllers[i]->local_ip) { oso.control_ip = controllers[i]->local_ip; break; } } ofproto_set_sflow(br->ofproto, &oso); sset_destroy(&oso.targets); } /* Returns whether a Flow_Sample_Collector_Set row is valid. */ static bool ovsrec_fscs_is_valid(const struct ovsrec_flow_sample_collector_set *fscs, const struct bridge *br) { return fscs->ipfix && fscs->bridge == br->cfg; } /* Set IPFIX configuration on 'br'. */ static void bridge_configure_ipfix(struct bridge *br) { const struct ovsrec_ipfix *be_cfg = br->cfg->ipfix; const struct ovsrec_flow_sample_collector_set *fe_cfg; struct ofproto_ipfix_bridge_exporter_options be_opts; struct ofproto_ipfix_flow_exporter_options *fe_opts = NULL; size_t n_fe_opts = 0; OVSREC_FLOW_SAMPLE_COLLECTOR_SET_FOR_EACH(fe_cfg, idl) { if (ovsrec_fscs_is_valid(fe_cfg, br)) { n_fe_opts++; } } if (!be_cfg && n_fe_opts == 0) { ofproto_set_ipfix(br->ofproto, NULL, NULL, 0); return; } if (be_cfg) { memset(&be_opts, 0, sizeof be_opts); sset_init(&be_opts.targets); sset_add_array(&be_opts.targets, be_cfg->targets, be_cfg->n_targets); if (be_cfg->sampling) { be_opts.sampling_rate = *be_cfg->sampling; } else { be_opts.sampling_rate = SFL_DEFAULT_SAMPLING_RATE; } if (be_cfg->obs_domain_id) { be_opts.obs_domain_id = *be_cfg->obs_domain_id; } if (be_cfg->obs_point_id) { be_opts.obs_point_id = *be_cfg->obs_point_id; } if (be_cfg->cache_active_timeout) { be_opts.cache_active_timeout = *be_cfg->cache_active_timeout; } if (be_cfg->cache_max_flows) { be_opts.cache_max_flows = *be_cfg->cache_max_flows; } } if (n_fe_opts > 0) { struct ofproto_ipfix_flow_exporter_options *opts; fe_opts = xcalloc(n_fe_opts, sizeof *fe_opts); opts = fe_opts; OVSREC_FLOW_SAMPLE_COLLECTOR_SET_FOR_EACH(fe_cfg, idl) { if (ovsrec_fscs_is_valid(fe_cfg, br)) { opts->collector_set_id = fe_cfg->id; sset_init(&opts->targets); sset_add_array(&opts->targets, fe_cfg->ipfix->targets, fe_cfg->ipfix->n_targets); opts->cache_active_timeout = fe_cfg->ipfix->cache_active_timeout ? *fe_cfg->ipfix->cache_active_timeout : 0; opts->cache_max_flows = fe_cfg->ipfix->cache_max_flows ? *fe_cfg->ipfix->cache_max_flows : 0; opts++; } } } ofproto_set_ipfix(br->ofproto, be_cfg ? &be_opts : NULL, fe_opts, n_fe_opts); if (be_cfg) { sset_destroy(&be_opts.targets); } if (n_fe_opts > 0) { struct ofproto_ipfix_flow_exporter_options *opts = fe_opts; size_t i; for (i = 0; i < n_fe_opts; i++) { sset_destroy(&opts->targets); opts++; } free(fe_opts); } } static void port_configure_stp(const struct ofproto *ofproto, struct port *port, struct ofproto_port_stp_settings *port_s, int *port_num_counter, unsigned long *port_num_bitmap) { const char *config_str; struct iface *iface; if (!smap_get_bool(&port->cfg->other_config, "stp-enable", true)) { port_s->enable = false; return; } else { port_s->enable = true; } /* STP over bonds is not supported. */ if (!list_is_singleton(&port->ifaces)) { VLOG_ERR("port %s: cannot enable STP on bonds, disabling", port->name); port_s->enable = false; return; } iface = CONTAINER_OF(list_front(&port->ifaces), struct iface, port_elem); /* Internal ports shouldn't participate in spanning tree, so * skip them. */ if (!strcmp(iface->type, "internal")) { VLOG_DBG("port %s: disable STP on internal ports", port->name); port_s->enable = false; return; } /* STP on mirror output ports is not supported. */ if (ofproto_is_mirror_output_bundle(ofproto, port)) { VLOG_DBG("port %s: disable STP on mirror ports", port->name); port_s->enable = false; return; } config_str = smap_get(&port->cfg->other_config, "stp-port-num"); if (config_str) { unsigned long int port_num = strtoul(config_str, NULL, 0); int port_idx = port_num - 1; if (port_num < 1 || port_num > STP_MAX_PORTS) { VLOG_ERR("port %s: invalid stp-port-num", port->name); port_s->enable = false; return; } if (bitmap_is_set(port_num_bitmap, port_idx)) { VLOG_ERR("port %s: duplicate stp-port-num %lu, disabling", port->name, port_num); port_s->enable = false; return; } bitmap_set1(port_num_bitmap, port_idx); port_s->port_num = port_idx; } else { if (*port_num_counter >= STP_MAX_PORTS) { VLOG_ERR("port %s: too many STP ports, disabling", port->name); port_s->enable = false; return; } port_s->port_num = (*port_num_counter)++; } config_str = smap_get(&port->cfg->other_config, "stp-path-cost"); if (config_str) { port_s->path_cost = strtoul(config_str, NULL, 10); } else { enum netdev_features current; unsigned int mbps; netdev_get_features(iface->netdev, ¤t, NULL, NULL, NULL); mbps = netdev_features_to_bps(current, 100 * 1000 * 1000) / 1000000; port_s->path_cost = stp_convert_speed_to_cost(mbps); } config_str = smap_get(&port->cfg->other_config, "stp-port-priority"); if (config_str) { port_s->priority = strtoul(config_str, NULL, 0); } else { port_s->priority = STP_DEFAULT_PORT_PRIORITY; } } /* Set spanning tree configuration on 'br'. */ static void bridge_configure_stp(struct bridge *br) { if (!br->cfg->stp_enable) { ofproto_set_stp(br->ofproto, NULL); } else { struct ofproto_stp_settings br_s; const char *config_str; struct port *port; int port_num_counter; unsigned long *port_num_bitmap; config_str = smap_get(&br->cfg->other_config, "stp-system-id"); if (config_str) { uint8_t ea[ETH_ADDR_LEN]; if (eth_addr_from_string(config_str, ea)) { br_s.system_id = eth_addr_to_uint64(ea); } else { br_s.system_id = eth_addr_to_uint64(br->ea); VLOG_ERR("bridge %s: invalid stp-system-id, defaulting " "to "ETH_ADDR_FMT, br->name, ETH_ADDR_ARGS(br->ea)); } } else { br_s.system_id = eth_addr_to_uint64(br->ea); } config_str = smap_get(&br->cfg->other_config, "stp-priority"); if (config_str) { br_s.priority = strtoul(config_str, NULL, 0); } else { br_s.priority = STP_DEFAULT_BRIDGE_PRIORITY; } config_str = smap_get(&br->cfg->other_config, "stp-hello-time"); if (config_str) { br_s.hello_time = strtoul(config_str, NULL, 10) * 1000; } else { br_s.hello_time = STP_DEFAULT_HELLO_TIME; } config_str = smap_get(&br->cfg->other_config, "stp-max-age"); if (config_str) { br_s.max_age = strtoul(config_str, NULL, 10) * 1000; } else { br_s.max_age = STP_DEFAULT_MAX_AGE; } config_str = smap_get(&br->cfg->other_config, "stp-forward-delay"); if (config_str) { br_s.fwd_delay = strtoul(config_str, NULL, 10) * 1000; } else { br_s.fwd_delay = STP_DEFAULT_FWD_DELAY; } /* Configure STP on the bridge. */ if (ofproto_set_stp(br->ofproto, &br_s)) { VLOG_ERR("bridge %s: could not enable STP", br->name); return; } /* Users must either set the port number with the "stp-port-num" * configuration on all ports or none. If manual configuration * is not done, then we allocate them sequentially. */ port_num_counter = 0; port_num_bitmap = bitmap_allocate(STP_MAX_PORTS); HMAP_FOR_EACH (port, hmap_node, &br->ports) { struct ofproto_port_stp_settings port_s; struct iface *iface; port_configure_stp(br->ofproto, port, &port_s, &port_num_counter, port_num_bitmap); /* As bonds are not supported, just apply configuration to * all interfaces. */ LIST_FOR_EACH (iface, port_elem, &port->ifaces) { if (ofproto_port_set_stp(br->ofproto, iface->ofp_port, &port_s)) { VLOG_ERR("port %s: could not enable STP", port->name); continue; } } } if (bitmap_scan(port_num_bitmap, 0, STP_MAX_PORTS) != STP_MAX_PORTS && port_num_counter) { VLOG_ERR("bridge %s: must manually configure all STP port " "IDs or none, disabling", br->name); ofproto_set_stp(br->ofproto, NULL); } bitmap_free(port_num_bitmap); } } static bool bridge_has_bond_fake_iface(const struct bridge *br, const char *name) { const struct port *port = port_lookup(br, name); return port && port_is_bond_fake_iface(port); } static bool port_is_bond_fake_iface(const struct port *port) { return port->cfg->bond_fake_iface && !list_is_short(&port->ifaces); } static void add_del_bridges(const struct ovsrec_open_vswitch *cfg) { struct bridge *br, *next; struct shash new_br; size_t i; /* Collect new bridges' names and types. */ shash_init(&new_br); for (i = 0; i < cfg->n_bridges; i++) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); const struct ovsrec_bridge *br_cfg = cfg->bridges[i]; if (strchr(br_cfg->name, '/')) { /* Prevent remote ovsdb-server users from accessing arbitrary * directories, e.g. consider a bridge named "../../../etc/". */ VLOG_WARN_RL(&rl, "ignoring bridge with invalid name \"%s\"", br_cfg->name); } else if (!shash_add_once(&new_br, br_cfg->name, br_cfg)) { VLOG_WARN_RL(&rl, "bridge %s specified twice", br_cfg->name); } } /* Get rid of deleted bridges or those whose types have changed. * Update 'cfg' of bridges that still exist. */ HMAP_FOR_EACH_SAFE (br, next, node, &all_bridges) { br->cfg = shash_find_data(&new_br, br->name); if (!br->cfg || strcmp(br->type, ofproto_normalize_type( br->cfg->datapath_type))) { bridge_destroy(br); } } /* Add new bridges. */ for (i = 0; i < cfg->n_bridges; i++) { const struct ovsrec_bridge *br_cfg = cfg->bridges[i]; struct bridge *br = bridge_lookup(br_cfg->name); if (!br) { bridge_create(br_cfg); } } shash_destroy(&new_br); } static void iface_set_ofp_port(struct iface *iface, ofp_port_t ofp_port) { struct bridge *br = iface->port->bridge; ovs_assert(iface->ofp_port == OFPP_NONE && ofp_port != OFPP_NONE); iface->ofp_port = ofp_port; hmap_insert(&br->ifaces, &iface->ofp_port_node, hash_ofp_port(ofp_port)); iface_set_ofport(iface->cfg, ofp_port); } /* Configures 'netdev' based on the "options" column in 'iface_cfg'. * Returns 0 if successful, otherwise a positive errno value. */ static int iface_set_netdev_config(const struct ovsrec_interface *iface_cfg, struct netdev *netdev) { int error; error = netdev_set_config(netdev, &iface_cfg->options); if (error) { VLOG_WARN("could not configure network device %s (%s)", iface_cfg->name, ovs_strerror(error)); } return error; } /* This function determines whether 'ofproto_port', which is attached to * br->ofproto's datapath, is one that we want in 'br'. * * If it is, it returns true, after creating an iface (if necessary), * configuring the iface's netdev according to the iface's options, and setting * iface's ofp_port member to 'ofproto_port->ofp_port'. * * If, on the other hand, 'port' should be removed, it returns false. The * caller should later detach the port from br->ofproto. */ static bool bridge_refresh_one_ofp_port(struct bridge *br, const struct ofproto_port *ofproto_port) { const char *name = ofproto_port->name; const char *type = ofproto_port->type; ofp_port_t ofp_port = ofproto_port->ofp_port; struct iface *iface = iface_lookup(br, name); if (iface) { /* Check that the name-to-number mapping is one-to-one. */ if (iface->ofp_port != OFPP_NONE) { VLOG_WARN("bridge %s: interface %s reported twice", br->name, name); return false; } else if (iface_from_ofp_port(br, ofp_port)) { VLOG_WARN("bridge %s: interface %"PRIu16" reported twice", br->name, ofp_port); return false; } /* There's a configured interface named 'name'. */ if (strcmp(type, iface->type) || iface_set_netdev_config(iface->cfg, iface->netdev)) { /* It's the wrong type, or it's the right type but can't be * configured as the user requested, so we must destroy it. */ return false; } else { /* It's the right type and configured correctly. Keep it. */ iface_set_ofp_port(iface, ofp_port); return true; } } else if (bridge_has_bond_fake_iface(br, name) && !strcmp(type, "internal")) { /* It's a bond fake iface. Keep it. */ return true; } else { /* There's no configured interface named 'name', but there might be an * interface of that name queued to be created. * * If there is, and it has the correct type, then try to configure it * and add it. If that's successful, we'll keep it. Otherwise, we'll * delete it and later try to re-add it. */ struct if_cfg *if_cfg = if_cfg_lookup(br, name); return (if_cfg && !strcmp(type, iface_get_type(if_cfg->cfg, br->cfg)) && iface_create(br, if_cfg, ofp_port)); } } /* Update bridges "if_cfg"s, "struct port"s, and "struct iface"s to be * consistent with the ofp_ports in "br->ofproto". */ static void bridge_refresh_ofp_port(struct bridge *br) { struct ofproto_port_dump dump; struct ofproto_port ofproto_port; struct port *port, *port_next; /* Clear each "struct iface"s ofp_port so we can get its correct value. */ hmap_clear(&br->ifaces); HMAP_FOR_EACH (port, hmap_node, &br->ports) { struct iface *iface; LIST_FOR_EACH (iface, port_elem, &port->ifaces) { iface->ofp_port = OFPP_NONE; } } /* Obtain the correct "ofp_port"s from ofproto. Find any if_cfg's which * already exist in the datapath and promote them to full fledged "struct * iface"s. Mark ports in the datapath which don't belong as garbage. */ OFPROTO_PORT_FOR_EACH (&ofproto_port, &dump, br->ofproto) { if (!bridge_refresh_one_ofp_port(br, &ofproto_port)) { struct ofpp_garbage *garbage = xmalloc(sizeof *garbage); garbage->ofp_port = ofproto_port.ofp_port; list_push_front(&br->ofpp_garbage, &garbage->list_node); } } /* Some ifaces may not have "ofp_port"s in ofproto and therefore don't * deserve to have "struct iface"s. Demote these to "if_cfg"s so that * later they can be added to ofproto. */ HMAP_FOR_EACH_SAFE (port, port_next, hmap_node, &br->ports) { struct iface *iface, *iface_next; LIST_FOR_EACH_SAFE (iface, iface_next, port_elem, &port->ifaces) { if (iface->ofp_port == OFPP_NONE) { bridge_queue_if_cfg(br, iface->cfg, port->cfg); iface_destroy(iface); } } if (list_is_empty(&port->ifaces)) { port_destroy(port); } } } /* Opens a network device for 'if_cfg' and configures it. If '*ofp_portp' * is OFPP_NONE, adds the network device to br->ofproto and stores the OpenFlow * port number in '*ofp_portp'; otherwise leaves br->ofproto and '*ofp_portp' * untouched. * * If successful, returns 0 and stores the network device in '*netdevp'. On * failure, returns a positive errno value and stores NULL in '*netdevp'. */ static int iface_do_create(const struct bridge *br, const struct if_cfg *if_cfg, ofp_port_t *ofp_portp, struct netdev **netdevp) { const struct ovsrec_interface *iface_cfg = if_cfg->cfg; const struct ovsrec_port *port_cfg = if_cfg->parent; struct netdev *netdev = NULL; int error; if (netdev_is_reserved_name(iface_cfg->name)) { VLOG_WARN("could not create interface %s, name is reserved", iface_cfg->name); error = EINVAL; goto error; } error = netdev_open(iface_cfg->name, iface_get_type(iface_cfg, br->cfg), &netdev); if (error) { VLOG_WARN("could not open network device %s (%s)", iface_cfg->name, ovs_strerror(error)); goto error; } error = iface_set_netdev_config(iface_cfg, netdev); if (error) { goto error; } if (*ofp_portp == OFPP_NONE) { ofp_port_t ofp_port = if_cfg->ofport; error = ofproto_port_add(br->ofproto, netdev, &ofp_port); if (error) { goto error; } *ofp_portp = ofp_port; VLOG_INFO("bridge %s: added interface %s on port %d", br->name, iface_cfg->name, *ofp_portp); } else { VLOG_DBG("bridge %s: interface %s is on port %d", br->name, iface_cfg->name, *ofp_portp); } if ((port_cfg->vlan_mode && !strcmp(port_cfg->vlan_mode, "splinter")) || iface_is_internal(iface_cfg, br->cfg)) { netdev_turn_flags_on(netdev, NETDEV_UP, NULL); } *netdevp = netdev; return 0; error: *netdevp = NULL; netdev_close(netdev); return error; } /* Creates a new iface on 'br' based on 'if_cfg'. The new iface has OpenFlow * port number 'ofp_port'. If ofp_port is OFPP_NONE, an OpenFlow port is * automatically allocated for the iface. Takes ownership of and * deallocates 'if_cfg'. * * Return true if an iface is successfully created, false otherwise. */ static bool iface_create(struct bridge *br, struct if_cfg *if_cfg, ofp_port_t ofp_port) { const struct ovsrec_interface *iface_cfg = if_cfg->cfg; const struct ovsrec_port *port_cfg = if_cfg->parent; struct netdev *netdev; struct iface *iface; struct port *port; int error; bool ok = true; /* Do the bits that can fail up front. * * It's a bit dangerous to call bridge_run_fast() here as ofproto's * internal datastructures may not be consistent. Eventually, when port * additions and deletions are cheaper, these calls should be removed. */ bridge_run_fast(); ovs_assert(!iface_lookup(br, iface_cfg->name)); error = iface_do_create(br, if_cfg, &ofp_port, &netdev); bridge_run_fast(); if (error) { iface_set_ofport(iface_cfg, OFPP_NONE); iface_clear_db_record(iface_cfg); ok = false; goto done; } /* Get or create the port structure. */ port = port_lookup(br, port_cfg->name); if (!port) { port = port_create(br, port_cfg); } /* Create the iface structure. */ iface = xzalloc(sizeof *iface); list_push_back(&port->ifaces, &iface->port_elem); hmap_insert(&br->iface_by_name, &iface->name_node, hash_string(iface_cfg->name, 0)); iface->port = port; iface->name = xstrdup(iface_cfg->name); iface->ofp_port = OFPP_NONE; iface->netdev = netdev; iface->type = iface_get_type(iface_cfg, br->cfg); iface->cfg = iface_cfg; iface_set_ofp_port(iface, ofp_port); /* Populate initial status in database. */ iface_refresh_stats(iface); iface_refresh_status(iface); /* Add bond fake iface if necessary. */ if (port_is_bond_fake_iface(port)) { struct ofproto_port ofproto_port; if (ofproto_port_query_by_name(br->ofproto, port->name, &ofproto_port)) { struct netdev *netdev; int error; error = netdev_open(port->name, "internal", &netdev); if (!error) { ofp_port_t fake_ofp_port = if_cfg->ofport; ofproto_port_add(br->ofproto, netdev, &fake_ofp_port); netdev_close(netdev); } else { VLOG_WARN("could not open network device %s (%s)", port->name, ovs_strerror(error)); } } else { /* Already exists, nothing to do. */ ofproto_port_destroy(&ofproto_port); } } done: hmap_remove(&br->if_cfg_todo, &if_cfg->hmap_node); free(if_cfg); return ok; } /* Set forward BPDU option. */ static void bridge_configure_forward_bpdu(struct bridge *br) { ofproto_set_forward_bpdu(br->ofproto, smap_get_bool(&br->cfg->other_config, "forward-bpdu", false)); } /* Set MAC learning table configuration for 'br'. */ static void bridge_configure_mac_table(struct bridge *br) { const char *idle_time_str; int idle_time; const char *mac_table_size_str; int mac_table_size; idle_time_str = smap_get(&br->cfg->other_config, "mac-aging-time"); idle_time = (idle_time_str && atoi(idle_time_str) ? atoi(idle_time_str) : MAC_ENTRY_DEFAULT_IDLE_TIME); mac_table_size_str = smap_get(&br->cfg->other_config, "mac-table-size"); mac_table_size = (mac_table_size_str && atoi(mac_table_size_str) ? atoi(mac_table_size_str) : MAC_DEFAULT_MAX); ofproto_set_mac_table_config(br->ofproto, idle_time, mac_table_size); } static void bridge_pick_local_hw_addr(struct bridge *br, uint8_t ea[ETH_ADDR_LEN], struct iface **hw_addr_iface) { struct hmapx mirror_output_ports; const char *hwaddr; struct port *port; bool found_addr = false; int error; int i; *hw_addr_iface = NULL; /* Did the user request a particular MAC? */ hwaddr = smap_get(&br->cfg->other_config, "hwaddr"); if (hwaddr && eth_addr_from_string(hwaddr, ea)) { if (eth_addr_is_multicast(ea)) { VLOG_ERR("bridge %s: cannot set MAC address to multicast " "address "ETH_ADDR_FMT, br->name, ETH_ADDR_ARGS(ea)); } else if (eth_addr_is_zero(ea)) { VLOG_ERR("bridge %s: cannot set MAC address to zero", br->name); } else { return; } } /* Mirror output ports don't participate in picking the local hardware * address. ofproto can't help us find out whether a given port is a * mirror output because we haven't configured mirrors yet, so we need to * accumulate them ourselves. */ hmapx_init(&mirror_output_ports); for (i = 0; i < br->cfg->n_mirrors; i++) { struct ovsrec_mirror *m = br->cfg->mirrors[i]; if (m->output_port) { hmapx_add(&mirror_output_ports, m->output_port); } } /* Otherwise choose the minimum non-local MAC address among all of the * interfaces. */ HMAP_FOR_EACH (port, hmap_node, &br->ports) { uint8_t iface_ea[ETH_ADDR_LEN]; struct iface *candidate; struct iface *iface; /* Mirror output ports don't participate. */ if (hmapx_contains(&mirror_output_ports, port->cfg)) { continue; } /* Choose the MAC address to represent the port. */ iface = NULL; if (port->cfg->mac && eth_addr_from_string(port->cfg->mac, iface_ea)) { /* Find the interface with this Ethernet address (if any) so that * we can provide the correct devname to the caller. */ LIST_FOR_EACH (candidate, port_elem, &port->ifaces) { uint8_t candidate_ea[ETH_ADDR_LEN]; if (!netdev_get_etheraddr(candidate->netdev, candidate_ea) && eth_addr_equals(iface_ea, candidate_ea)) { iface = candidate; } } } else { /* Choose the interface whose MAC address will represent the port. * The Linux kernel bonding code always chooses the MAC address of * the first slave added to a bond, and the Fedora networking * scripts always add slaves to a bond in alphabetical order, so * for compatibility we choose the interface with the name that is * first in alphabetical order. */ LIST_FOR_EACH (candidate, port_elem, &port->ifaces) { if (!iface || strcmp(candidate->name, iface->name) < 0) { iface = candidate; } } /* The local port doesn't count (since we're trying to choose its * MAC address anyway). */ if (iface->ofp_port == OFPP_LOCAL) { continue; } /* Grab MAC. */ error = netdev_get_etheraddr(iface->netdev, iface_ea); if (error) { continue; } } /* Compare against our current choice. */ if (!eth_addr_is_multicast(iface_ea) && !eth_addr_is_local(iface_ea) && !eth_addr_is_reserved(iface_ea) && !eth_addr_is_zero(iface_ea) && (!found_addr || eth_addr_compare_3way(iface_ea, ea) < 0)) { memcpy(ea, iface_ea, ETH_ADDR_LEN); *hw_addr_iface = iface; found_addr = true; } } if (!found_addr) { memcpy(ea, br->default_ea, ETH_ADDR_LEN); *hw_addr_iface = NULL; } hmapx_destroy(&mirror_output_ports); } /* Choose and returns the datapath ID for bridge 'br' given that the bridge * Ethernet address is 'bridge_ea'. If 'bridge_ea' is the Ethernet address of * an interface on 'br', then that interface must be passed in as * 'hw_addr_iface'; if 'bridge_ea' was derived some other way, then * 'hw_addr_iface' must be passed in as a null pointer. */ static uint64_t bridge_pick_datapath_id(struct bridge *br, const uint8_t bridge_ea[ETH_ADDR_LEN], struct iface *hw_addr_iface) { /* * The procedure for choosing a bridge MAC address will, in the most * ordinary case, also choose a unique MAC that we can use as a datapath * ID. In some special cases, though, multiple bridges will end up with * the same MAC address. This is OK for the bridges, but it will confuse * the OpenFlow controller, because each datapath needs a unique datapath * ID. * * Datapath IDs must be unique. It is also very desirable that they be * stable from one run to the next, so that policy set on a datapath * "sticks". */ const char *datapath_id; uint64_t dpid; datapath_id = smap_get(&br->cfg->other_config, "datapath-id"); if (datapath_id && dpid_from_string(datapath_id, &dpid)) { return dpid; } if (!hw_addr_iface) { /* * A purely internal bridge, that is, one that has no non-virtual * network devices on it at all, is difficult because it has no * natural unique identifier at all. * * When the host is a XenServer, we handle this case by hashing the * host's UUID with the name of the bridge. Names of bridges are * persistent across XenServer reboots, although they can be reused if * an internal network is destroyed and then a new one is later * created, so this is fairly effective. * * When the host is not a XenServer, we punt by using a random MAC * address on each run. */ const char *host_uuid = xenserver_get_host_uuid(); if (host_uuid) { char *combined = xasprintf("%s,%s", host_uuid, br->name); dpid = dpid_from_hash(combined, strlen(combined)); free(combined); return dpid; } } return eth_addr_to_uint64(bridge_ea); } static uint64_t dpid_from_hash(const void *data, size_t n) { uint8_t hash[SHA1_DIGEST_SIZE]; BUILD_ASSERT_DECL(sizeof hash >= ETH_ADDR_LEN); sha1_bytes(data, n, hash); eth_addr_mark_random(hash); return eth_addr_to_uint64(hash); } static void iface_refresh_status(struct iface *iface) { struct smap smap; enum netdev_features current; int64_t bps; int mtu; int64_t mtu_64; uint8_t mac[ETH_ADDR_LEN]; int64_t ifindex64; int error; if (iface_is_synthetic(iface)) { return; } smap_init(&smap); if (!netdev_get_status(iface->netdev, &smap)) { ovsrec_interface_set_status(iface->cfg, &smap); } else { ovsrec_interface_set_status(iface->cfg, NULL); } smap_destroy(&smap); error = netdev_get_features(iface->netdev, ¤t, NULL, NULL, NULL); bps = !error ? netdev_features_to_bps(current, 0) : 0; if (bps) { ovsrec_interface_set_duplex(iface->cfg, netdev_features_is_full_duplex(current) ? "full" : "half"); ovsrec_interface_set_link_speed(iface->cfg, &bps, 1); } else { ovsrec_interface_set_duplex(iface->cfg, NULL); ovsrec_interface_set_link_speed(iface->cfg, NULL, 0); } error = netdev_get_mtu(iface->netdev, &mtu); if (!error) { mtu_64 = mtu; ovsrec_interface_set_mtu(iface->cfg, &mtu_64, 1); } else { ovsrec_interface_set_mtu(iface->cfg, NULL, 0); } error = netdev_get_etheraddr(iface->netdev, mac); if (!error) { char mac_string[32]; sprintf(mac_string, ETH_ADDR_FMT, ETH_ADDR_ARGS(mac)); ovsrec_interface_set_mac_in_use(iface->cfg, mac_string); } else { ovsrec_interface_set_mac_in_use(iface->cfg, NULL); } /* The netdev may return a negative number (such as -EOPNOTSUPP) * if there is no valid ifindex number. */ ifindex64 = netdev_get_ifindex(iface->netdev); if (ifindex64 < 0) { ifindex64 = 0; } ovsrec_interface_set_ifindex(iface->cfg, &ifindex64, 1); } /* Writes 'iface''s CFM statistics to the database. 'iface' must not be * synthetic. */ static void iface_refresh_cfm_stats(struct iface *iface) { const struct ovsrec_interface *cfg = iface->cfg; struct ofproto_cfm_status status; if (!ofproto_port_get_cfm_status(iface->port->bridge->ofproto, iface->ofp_port, &status)) { ovsrec_interface_set_cfm_fault(cfg, NULL, 0); ovsrec_interface_set_cfm_fault_status(cfg, NULL, 0); ovsrec_interface_set_cfm_remote_opstate(cfg, NULL); ovsrec_interface_set_cfm_health(cfg, NULL, 0); ovsrec_interface_set_cfm_remote_mpids(cfg, NULL, 0); } else { const char *reasons[CFM_FAULT_N_REASONS]; int64_t cfm_health = status.health; bool faulted = status.faults != 0; size_t i, j; ovsrec_interface_set_cfm_fault(cfg, &faulted, 1); j = 0; for (i = 0; i < CFM_FAULT_N_REASONS; i++) { int reason = 1 << i; if (status.faults & reason) { reasons[j++] = cfm_fault_reason_to_str(reason); } } ovsrec_interface_set_cfm_fault_status(cfg, (char **) reasons, j); if (status.remote_opstate >= 0) { const char *remote_opstate = status.remote_opstate ? "up" : "down"; ovsrec_interface_set_cfm_remote_opstate(cfg, remote_opstate); } else { ovsrec_interface_set_cfm_remote_opstate(cfg, NULL); } ovsrec_interface_set_cfm_remote_mpids(cfg, (const int64_t *)status.rmps, status.n_rmps); if (cfm_health >= 0) { ovsrec_interface_set_cfm_health(cfg, &cfm_health, 1); } else { ovsrec_interface_set_cfm_health(cfg, NULL, 0); } free(status.rmps); } } static void iface_refresh_stats(struct iface *iface) { #define IFACE_STATS \ IFACE_STAT(rx_packets, "rx_packets") \ IFACE_STAT(tx_packets, "tx_packets") \ IFACE_STAT(rx_bytes, "rx_bytes") \ IFACE_STAT(tx_bytes, "tx_bytes") \ IFACE_STAT(rx_dropped, "rx_dropped") \ IFACE_STAT(tx_dropped, "tx_dropped") \ IFACE_STAT(rx_errors, "rx_errors") \ IFACE_STAT(tx_errors, "tx_errors") \ IFACE_STAT(rx_frame_errors, "rx_frame_err") \ IFACE_STAT(rx_over_errors, "rx_over_err") \ IFACE_STAT(rx_crc_errors, "rx_crc_err") \ IFACE_STAT(collisions, "collisions") #define IFACE_STAT(MEMBER, NAME) + 1 enum { N_IFACE_STATS = IFACE_STATS }; #undef IFACE_STAT int64_t values[N_IFACE_STATS]; char *keys[N_IFACE_STATS]; int n; struct netdev_stats stats; if (iface_is_synthetic(iface)) { return; } /* Intentionally ignore return value, since errors will set 'stats' to * all-1s, and we will deal with that correctly below. */ netdev_get_stats(iface->netdev, &stats); /* Copy statistics into keys[] and values[]. */ n = 0; #define IFACE_STAT(MEMBER, NAME) \ if (stats.MEMBER != UINT64_MAX) { \ keys[n] = NAME; \ values[n] = stats.MEMBER; \ n++; \ } IFACE_STATS; #undef IFACE_STAT ovs_assert(n <= N_IFACE_STATS); ovsrec_interface_set_statistics(iface->cfg, keys, values, n); #undef IFACE_STATS } static void br_refresh_stp_status(struct bridge *br) { struct smap smap = SMAP_INITIALIZER(&smap); struct ofproto *ofproto = br->ofproto; struct ofproto_stp_status status; if (ofproto_get_stp_status(ofproto, &status)) { return; } if (!status.enabled) { ovsrec_bridge_set_status(br->cfg, NULL); return; } smap_add_format(&smap, "stp_bridge_id", STP_ID_FMT, STP_ID_ARGS(status.bridge_id)); smap_add_format(&smap, "stp_designated_root", STP_ID_FMT, STP_ID_ARGS(status.designated_root)); smap_add_format(&smap, "stp_root_path_cost", "%d", status.root_path_cost); ovsrec_bridge_set_status(br->cfg, &smap); smap_destroy(&smap); } static void port_refresh_stp_status(struct port *port) { struct ofproto *ofproto = port->bridge->ofproto; struct iface *iface; struct ofproto_port_stp_status status; char *keys[3]; int64_t int_values[3]; struct smap smap; if (port_is_synthetic(port)) { return; } /* STP doesn't currently support bonds. */ if (!list_is_singleton(&port->ifaces)) { ovsrec_port_set_status(port->cfg, NULL); return; } iface = CONTAINER_OF(list_front(&port->ifaces), struct iface, port_elem); if (ofproto_port_get_stp_status(ofproto, iface->ofp_port, &status)) { return; } if (!status.enabled) { ovsrec_port_set_status(port->cfg, NULL); ovsrec_port_set_statistics(port->cfg, NULL, NULL, 0); return; } /* Set Status column. */ smap_init(&smap); smap_add_format(&smap, "stp_port_id", STP_PORT_ID_FMT, status.port_id); smap_add(&smap, "stp_state", stp_state_name(status.state)); smap_add_format(&smap, "stp_sec_in_state", "%u", status.sec_in_state); smap_add(&smap, "stp_role", stp_role_name(status.role)); ovsrec_port_set_status(port->cfg, &smap); smap_destroy(&smap); /* Set Statistics column. */ keys[0] = "stp_tx_count"; int_values[0] = status.tx_count; keys[1] = "stp_rx_count"; int_values[1] = status.rx_count; keys[2] = "stp_error_count"; int_values[2] = status.error_count; ovsrec_port_set_statistics(port->cfg, keys, int_values, ARRAY_SIZE(int_values)); } static bool enable_system_stats(const struct ovsrec_open_vswitch *cfg) { return smap_get_bool(&cfg->other_config, "enable-statistics", false); } static void reconfigure_system_stats(const struct ovsrec_open_vswitch *cfg) { bool enable = enable_system_stats(cfg); system_stats_enable(enable); if (!enable) { ovsrec_open_vswitch_set_statistics(cfg, NULL); } } static void run_system_stats(void) { const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(idl); struct smap *stats; stats = system_stats_run(); if (stats && cfg) { struct ovsdb_idl_txn *txn; struct ovsdb_datum datum; txn = ovsdb_idl_txn_create(idl); ovsdb_datum_from_smap(&datum, stats); ovsdb_idl_txn_write(&cfg->header_, &ovsrec_open_vswitch_col_statistics, &datum); ovsdb_idl_txn_commit(txn); ovsdb_idl_txn_destroy(txn); free(stats); } } static inline const char * ofp12_controller_role_to_str(enum ofp12_controller_role role) { switch (role) { case OFPCR12_ROLE_EQUAL: return "other"; case OFPCR12_ROLE_MASTER: return "master"; case OFPCR12_ROLE_SLAVE: return "slave"; case OFPCR12_ROLE_NOCHANGE: default: return "*** INVALID ROLE ***"; } } static void refresh_controller_status(void) { struct bridge *br; struct shash info; const struct ovsrec_controller *cfg; shash_init(&info); /* Accumulate status for controllers on all bridges. */ HMAP_FOR_EACH (br, node, &all_bridges) { ofproto_get_ofproto_controller_info(br->ofproto, &info); } /* Update each controller in the database with current status. */ OVSREC_CONTROLLER_FOR_EACH(cfg, idl) { struct ofproto_controller_info *cinfo = shash_find_data(&info, cfg->target); if (cinfo) { struct smap smap = SMAP_INITIALIZER(&smap); const char **values = cinfo->pairs.values; const char **keys = cinfo->pairs.keys; size_t i; for (i = 0; i < cinfo->pairs.n; i++) { smap_add(&smap, keys[i], values[i]); } ovsrec_controller_set_is_connected(cfg, cinfo->is_connected); ovsrec_controller_set_role(cfg, ofp12_controller_role_to_str( cinfo->role)); ovsrec_controller_set_status(cfg, &smap); smap_destroy(&smap); } else { ovsrec_controller_set_is_connected(cfg, false); ovsrec_controller_set_role(cfg, NULL); ovsrec_controller_set_status(cfg, NULL); } } ofproto_free_ofproto_controller_info(&info); } /* "Instant" stats. * * Some information in the database must be kept as up-to-date as possible to * allow controllers to respond rapidly to network outages. We call these * statistics "instant" stats. * * We wish to update these statistics every INSTANT_INTERVAL_MSEC milliseconds, * assuming that they've changed. The only means we have to determine whether * they have changed are: * * - Try to commit changes to the database. If nothing changed, then * ovsdb_idl_txn_commit() returns TXN_UNCHANGED, otherwise some other * value. * * - instant_stats_run() is called late in the run loop, after anything that * might change any of the instant stats. * * We use these two facts together to avoid waking the process up every * INSTANT_INTERVAL_MSEC whether there is any change or not. */ /* Minimum interval between writing updates to the instant stats to the * database. */ #define INSTANT_INTERVAL_MSEC 100 /* Current instant stats database transaction, NULL if there is no ongoing * transaction. */ static struct ovsdb_idl_txn *instant_txn; /* Next time (in msec on monotonic clock) at which we will update the instant * stats. */ static long long int instant_next_txn = LLONG_MIN; /* True if the run loop has run since we last saw that the instant stats were * unchanged, that is, this is true if we need to wake up at 'instant_next_txn' * to refresh the instant stats. */ static bool instant_stats_could_have_changed; static void instant_stats_run(void) { enum ovsdb_idl_txn_status status; instant_stats_could_have_changed = true; if (!instant_txn) { struct bridge *br; if (time_msec() < instant_next_txn) { return; } instant_next_txn = time_msec() + INSTANT_INTERVAL_MSEC; instant_txn = ovsdb_idl_txn_create(idl); HMAP_FOR_EACH (br, node, &all_bridges) { struct iface *iface; struct port *port; br_refresh_stp_status(br); HMAP_FOR_EACH (port, hmap_node, &br->ports) { port_refresh_stp_status(port); } HMAP_FOR_EACH (iface, name_node, &br->iface_by_name) { enum netdev_flags flags; struct smap smap; const char *link_state; int64_t link_resets; int current, error; if (iface_is_synthetic(iface)) { continue; } current = ofproto_port_is_lacp_current(br->ofproto, iface->ofp_port); if (current >= 0) { bool bl = current; ovsrec_interface_set_lacp_current(iface->cfg, &bl, 1); } else { ovsrec_interface_set_lacp_current(iface->cfg, NULL, 0); } error = netdev_get_flags(iface->netdev, &flags); if (!error) { const char *state = flags & NETDEV_UP ? "up" : "down"; ovsrec_interface_set_admin_state(iface->cfg, state); } else { ovsrec_interface_set_admin_state(iface->cfg, NULL); } link_state = netdev_get_carrier(iface->netdev) ? "up" : "down"; ovsrec_interface_set_link_state(iface->cfg, link_state); link_resets = netdev_get_carrier_resets(iface->netdev); ovsrec_interface_set_link_resets(iface->cfg, &link_resets, 1); iface_refresh_cfm_stats(iface); smap_init(&smap); ofproto_port_get_bfd_status(br->ofproto, iface->ofp_port, &smap); ovsrec_interface_set_bfd_status(iface->cfg, &smap); smap_destroy(&smap); } } } status = ovsdb_idl_txn_commit(instant_txn); if (status != TXN_INCOMPLETE) { ovsdb_idl_txn_destroy(instant_txn); instant_txn = NULL; } if (status == TXN_UNCHANGED) { instant_stats_could_have_changed = false; } } static void instant_stats_wait(void) { if (instant_txn) { ovsdb_idl_txn_wait(instant_txn); } else if (instant_stats_could_have_changed) { poll_timer_wait_until(instant_next_txn); } } /* Performs periodic activity required by bridges that needs to be done with * the least possible latency. * * It makes sense to call this function a couple of times per poll loop, to * provide a significant performance boost on some benchmarks with ofprotos * that use the ofproto-dpif implementation. */ void bridge_run_fast(void) { struct sset types; const char *type; struct bridge *br; sset_init(&types); ofproto_enumerate_types(&types); SSET_FOR_EACH (type, &types) { ofproto_type_run_fast(type); } sset_destroy(&types); HMAP_FOR_EACH (br, node, &all_bridges) { ofproto_run_fast(br->ofproto); } } void bridge_run(void) { static struct ovsrec_open_vswitch null_cfg; const struct ovsrec_open_vswitch *cfg; struct ovsdb_idl_txn *reconf_txn = NULL; struct sset types; const char *type; bool vlan_splinters_changed; struct bridge *br; ovsrec_open_vswitch_init(&null_cfg); /* (Re)configure if necessary. */ if (!reconfiguring) { ovsdb_idl_run(idl); if (ovsdb_idl_is_lock_contended(idl)) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); struct bridge *br, *next_br; VLOG_ERR_RL(&rl, "another ovs-vswitchd process is running, " "disabling this process (pid %ld) until it goes away", (long int) getpid()); HMAP_FOR_EACH_SAFE (br, next_br, node, &all_bridges) { bridge_destroy(br); } /* Since we will not be running system_stats_run() in this process * with the current situation of multiple ovs-vswitchd daemons, * disable system stats collection. */ system_stats_enable(false); return; } else if (!ovsdb_idl_has_lock(idl)) { return; } } cfg = ovsrec_open_vswitch_first(idl); /* Initialize the ofproto library. This only needs to run once, but * it must be done after the configuration is set. If the * initialization has already occurred, bridge_init_ofproto() * returns immediately. */ bridge_init_ofproto(cfg); /* Once the value of flow-restore-wait is false, we no longer should * check its value from the database. */ if (cfg && ofproto_get_flow_restore_wait()) { ofproto_set_flow_restore_wait(smap_get_bool(&cfg->other_config, "flow-restore-wait", false)); } /* Let each datapath type do the work that it needs to do. */ sset_init(&types); ofproto_enumerate_types(&types); SSET_FOR_EACH (type, &types) { ofproto_type_run(type); } sset_destroy(&types); /* Let each bridge do the work that it needs to do. */ HMAP_FOR_EACH (br, node, &all_bridges) { ofproto_run(br->ofproto); } /* Re-configure SSL. We do this on every trip through the main loop, * instead of just when the database changes, because the contents of the * key and certificate files can change without the database changing. * * We do this before bridge_reconfigure() because that function might * initiate SSL connections and thus requires SSL to be configured. */ if (cfg && cfg->ssl) { const struct ovsrec_ssl *ssl = cfg->ssl; stream_ssl_set_key_and_cert(ssl->private_key, ssl->certificate); stream_ssl_set_ca_cert_file(ssl->ca_cert, ssl->bootstrap_ca_cert); } if (!reconfiguring) { /* If VLAN splinters are in use, then we need to reconfigure if VLAN * usage has changed. */ vlan_splinters_changed = false; if (vlan_splinters_enabled_anywhere) { HMAP_FOR_EACH (br, node, &all_bridges) { if (ofproto_has_vlan_usage_changed(br->ofproto)) { vlan_splinters_changed = true; break; } } } if (ovsdb_idl_get_seqno(idl) != idl_seqno || vlan_splinters_changed) { idl_seqno = ovsdb_idl_get_seqno(idl); if (cfg) { reconf_txn = ovsdb_idl_txn_create(idl); bridge_reconfigure(cfg); } else { /* We still need to reconfigure to avoid dangling pointers to * now-destroyed ovsrec structures inside bridge data. */ bridge_reconfigure(&null_cfg); } } } if (reconfiguring) { if (!reconf_txn) { reconf_txn = ovsdb_idl_txn_create(idl); } if (bridge_reconfigure_continue(cfg ? cfg : &null_cfg)) { reconfiguring = false; if (cfg) { ovsrec_open_vswitch_set_cur_cfg(cfg, cfg->next_cfg); } /* If we are completing our initial configuration for this run * of ovs-vswitchd, then keep the transaction around to monitor * it for completion. */ if (!initial_config_done) { initial_config_done = true; daemonize_txn = reconf_txn; reconf_txn = NULL; } } } if (reconf_txn) { ovsdb_idl_txn_commit(reconf_txn); ovsdb_idl_txn_destroy(reconf_txn); reconf_txn = NULL; } if (daemonize_txn) { enum ovsdb_idl_txn_status status = ovsdb_idl_txn_commit(daemonize_txn); if (status != TXN_INCOMPLETE) { ovsdb_idl_txn_destroy(daemonize_txn); daemonize_txn = NULL; /* ovs-vswitchd has completed initialization, so allow the * process that forked us to exit successfully. */ daemonize_complete(); vlog_enable_async(); VLOG_INFO_ONCE("%s (Open vSwitch) %s", program_name, VERSION); } } /* Refresh interface and mirror stats if necessary. */ if (time_msec() >= iface_stats_timer) { if (cfg) { struct ovsdb_idl_txn *txn; txn = ovsdb_idl_txn_create(idl); HMAP_FOR_EACH (br, node, &all_bridges) { struct port *port; struct mirror *m; HMAP_FOR_EACH (port, hmap_node, &br->ports) { struct iface *iface; LIST_FOR_EACH (iface, port_elem, &port->ifaces) { iface_refresh_stats(iface); iface_refresh_status(iface); } } HMAP_FOR_EACH (m, hmap_node, &br->mirrors) { mirror_refresh_stats(m); } } refresh_controller_status(); ovsdb_idl_txn_commit(txn); ovsdb_idl_txn_destroy(txn); /* XXX */ } iface_stats_timer = time_msec() + IFACE_STATS_INTERVAL; } run_system_stats(); instant_stats_run(); } void bridge_wait(void) { struct sset types; const char *type; ovsdb_idl_wait(idl); if (daemonize_txn) { ovsdb_idl_txn_wait(daemonize_txn); } if (reconfiguring) { poll_immediate_wake(); } sset_init(&types); ofproto_enumerate_types(&types); SSET_FOR_EACH (type, &types) { ofproto_type_wait(type); } sset_destroy(&types); if (!hmap_is_empty(&all_bridges)) { struct bridge *br; HMAP_FOR_EACH (br, node, &all_bridges) { ofproto_wait(br->ofproto); } poll_timer_wait_until(iface_stats_timer); } system_stats_wait(); instant_stats_wait(); } /* Adds some memory usage statistics for bridges into 'usage', for use with * memory_report(). */ void bridge_get_memory_usage(struct simap *usage) { struct bridge *br; HMAP_FOR_EACH (br, node, &all_bridges) { ofproto_get_memory_usage(br->ofproto, usage); } } /* QoS unixctl user interface functions. */ struct qos_unixctl_show_cbdata { struct ds *ds; struct iface *iface; }; static void qos_unixctl_show_queue(unsigned int queue_id, const struct smap *details, struct iface *iface, struct ds *ds) { struct netdev_queue_stats stats; struct smap_node *node; int error; ds_put_cstr(ds, "\n"); if (queue_id) { ds_put_format(ds, "Queue %u:\n", queue_id); } else { ds_put_cstr(ds, "Default:\n"); } SMAP_FOR_EACH (node, details) { ds_put_format(ds, "\t%s: %s\n", node->key, node->value); } error = netdev_get_queue_stats(iface->netdev, queue_id, &stats); if (!error) { if (stats.tx_packets != UINT64_MAX) { ds_put_format(ds, "\ttx_packets: %"PRIu64"\n", stats.tx_packets); } if (stats.tx_bytes != UINT64_MAX) { ds_put_format(ds, "\ttx_bytes: %"PRIu64"\n", stats.tx_bytes); } if (stats.tx_errors != UINT64_MAX) { ds_put_format(ds, "\ttx_errors: %"PRIu64"\n", stats.tx_errors); } } else { ds_put_format(ds, "\tFailed to get statistics for queue %u: %s", queue_id, ovs_strerror(error)); } } static void qos_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) { struct ds ds = DS_EMPTY_INITIALIZER; struct smap smap = SMAP_INITIALIZER(&smap); struct iface *iface; const char *type; struct smap_node *node; iface = iface_find(argv[1]); if (!iface) { unixctl_command_reply_error(conn, "no such interface"); return; } netdev_get_qos(iface->netdev, &type, &smap); if (*type != '\0') { struct netdev_queue_dump dump; struct smap details; unsigned int queue_id; ds_put_format(&ds, "QoS: %s %s\n", iface->name, type); SMAP_FOR_EACH (node, &smap) { ds_put_format(&ds, "%s: %s\n", node->key, node->value); } smap_init(&details); NETDEV_QUEUE_FOR_EACH (&queue_id, &details, &dump, iface->netdev) { qos_unixctl_show_queue(queue_id, &details, iface, &ds); } smap_destroy(&details); unixctl_command_reply(conn, ds_cstr(&ds)); } else { ds_put_format(&ds, "QoS not configured on %s\n", iface->name); unixctl_command_reply_error(conn, ds_cstr(&ds)); } smap_destroy(&smap); ds_destroy(&ds); } /* Bridge reconfiguration functions. */ static void bridge_create(const struct ovsrec_bridge *br_cfg) { struct bridge *br; ovs_assert(!bridge_lookup(br_cfg->name)); br = xzalloc(sizeof *br); br->name = xstrdup(br_cfg->name); br->type = xstrdup(ofproto_normalize_type(br_cfg->datapath_type)); br->cfg = br_cfg; /* Derive the default Ethernet address from the bridge's UUID. This should * be unique and it will be stable between ovs-vswitchd runs. */ memcpy(br->default_ea, &br_cfg->header_.uuid, ETH_ADDR_LEN); eth_addr_mark_random(br->default_ea); hmap_init(&br->ports); hmap_init(&br->ifaces); hmap_init(&br->iface_by_name); hmap_init(&br->mirrors); hmap_init(&br->if_cfg_todo); list_init(&br->ofpp_garbage); hmap_insert(&all_bridges, &br->node, hash_string(br->name, 0)); } static void bridge_destroy(struct bridge *br) { if (br) { struct mirror *mirror, *next_mirror; struct port *port, *next_port; struct if_cfg *if_cfg, *next_if_cfg; struct ofpp_garbage *garbage, *next_garbage; HMAP_FOR_EACH_SAFE (port, next_port, hmap_node, &br->ports) { port_destroy(port); } HMAP_FOR_EACH_SAFE (mirror, next_mirror, hmap_node, &br->mirrors) { mirror_destroy(mirror); } HMAP_FOR_EACH_SAFE (if_cfg, next_if_cfg, hmap_node, &br->if_cfg_todo) { hmap_remove(&br->if_cfg_todo, &if_cfg->hmap_node); free(if_cfg); } LIST_FOR_EACH_SAFE (garbage, next_garbage, list_node, &br->ofpp_garbage) { list_remove(&garbage->list_node); free(garbage); } hmap_remove(&all_bridges, &br->node); ofproto_destroy(br->ofproto); hmap_destroy(&br->ifaces); hmap_destroy(&br->ports); hmap_destroy(&br->iface_by_name); hmap_destroy(&br->mirrors); hmap_destroy(&br->if_cfg_todo); free(br->name); free(br->type); free(br); } } static struct bridge * bridge_lookup(const char *name) { struct bridge *br; HMAP_FOR_EACH_WITH_HASH (br, node, hash_string(name, 0), &all_bridges) { if (!strcmp(br->name, name)) { return br; } } return NULL; } /* Handle requests for a listing of all flows known by the OpenFlow * stack, including those normally hidden. */ static void bridge_unixctl_dump_flows(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) { struct bridge *br; struct ds results; br = bridge_lookup(argv[1]); if (!br) { unixctl_command_reply_error(conn, "Unknown bridge"); return; } ds_init(&results); ofproto_get_all_flows(br->ofproto, &results); unixctl_command_reply(conn, ds_cstr(&results)); ds_destroy(&results); } /* "bridge/reconnect [BRIDGE]": makes BRIDGE drop all of its controller * connections and reconnect. If BRIDGE is not specified, then all bridges * drop their controller connections and reconnect. */ static void bridge_unixctl_reconnect(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) { struct bridge *br; if (argc > 1) { br = bridge_lookup(argv[1]); if (!br) { unixctl_command_reply_error(conn, "Unknown bridge"); return; } ofproto_reconnect_controllers(br->ofproto); } else { HMAP_FOR_EACH (br, node, &all_bridges) { ofproto_reconnect_controllers(br->ofproto); } } unixctl_command_reply(conn, NULL); } static size_t bridge_get_controllers(const struct bridge *br, struct ovsrec_controller ***controllersp) { struct ovsrec_controller **controllers; size_t n_controllers; controllers = br->cfg->controller; n_controllers = br->cfg->n_controller; if (n_controllers == 1 && !strcmp(controllers[0]->target, "none")) { controllers = NULL; n_controllers = 0; } if (controllersp) { *controllersp = controllers; } return n_controllers; } static void bridge_queue_if_cfg(struct bridge *br, const struct ovsrec_interface *cfg, const struct ovsrec_port *parent) { struct if_cfg *if_cfg = xmalloc(sizeof *if_cfg); if_cfg->cfg = cfg; if_cfg->parent = parent; if_cfg->ofport = iface_pick_ofport(cfg); hmap_insert(&br->if_cfg_todo, &if_cfg->hmap_node, hash_string(if_cfg->cfg->name, 0)); } /* Deletes "struct port"s and "struct iface"s under 'br' which aren't * consistent with 'br->cfg'. Updates 'br->if_cfg_queue' with interfaces which * 'br' needs to complete its configuration. */ static void bridge_add_del_ports(struct bridge *br, const unsigned long int *splinter_vlans) { struct shash_node *port_node; struct port *port, *next; struct shash new_ports; size_t i; ovs_assert(hmap_is_empty(&br->if_cfg_todo)); /* Collect new ports. */ shash_init(&new_ports); for (i = 0; i < br->cfg->n_ports; i++) { const char *name = br->cfg->ports[i]->name; if (!shash_add_once(&new_ports, name, br->cfg->ports[i])) { VLOG_WARN("bridge %s: %s specified twice as bridge port", br->name, name); } } if (bridge_get_controllers(br, NULL) && !shash_find(&new_ports, br->name)) { VLOG_WARN("bridge %s: no port named %s, synthesizing one", br->name, br->name); ovsrec_interface_init(&br->synth_local_iface); ovsrec_port_init(&br->synth_local_port); br->synth_local_port.interfaces = &br->synth_local_ifacep; br->synth_local_port.n_interfaces = 1; br->synth_local_port.name = br->name; br->synth_local_iface.name = br->name; br->synth_local_iface.type = "internal"; br->synth_local_ifacep = &br->synth_local_iface; shash_add(&new_ports, br->name, &br->synth_local_port); } if (splinter_vlans) { add_vlan_splinter_ports(br, splinter_vlans, &new_ports); } /* Get rid of deleted ports. * Get rid of deleted interfaces on ports that still exist. */ HMAP_FOR_EACH_SAFE (port, next, hmap_node, &br->ports) { port->cfg = shash_find_data(&new_ports, port->name); if (!port->cfg) { port_destroy(port); } else { port_del_ifaces(port); } } /* Update iface->cfg and iface->type in interfaces that still exist. * Add new interfaces to creation queue. */ SHASH_FOR_EACH (port_node, &new_ports) { const struct ovsrec_port *port = port_node->data; size_t i; for (i = 0; i < port->n_interfaces; i++) { const struct ovsrec_interface *cfg = port->interfaces[i]; struct iface *iface = iface_lookup(br, cfg->name); const char *type = iface_get_type(cfg, br->cfg); if (iface) { iface->cfg = cfg; iface->type = type; } else if (!strcmp(type, "null")) { VLOG_WARN_ONCE("%s: The null interface type is deprecated and" " may be removed in February 2013. Please email" " dev@openvswitch.org with concerns.", cfg->name); } else { bridge_queue_if_cfg(br, cfg, port); } } } shash_destroy(&new_ports); } /* Initializes 'oc' appropriately as a management service controller for * 'br'. * * The caller must free oc->target when it is no longer needed. */ static void bridge_ofproto_controller_for_mgmt(const struct bridge *br, struct ofproto_controller *oc) { oc->target = xasprintf("punix:%s/%s.mgmt", ovs_rundir(), br->name); oc->max_backoff = 0; oc->probe_interval = 60; oc->band = OFPROTO_OUT_OF_BAND; oc->rate_limit = 0; oc->burst_limit = 0; oc->enable_async_msgs = true; } /* Converts ovsrec_controller 'c' into an ofproto_controller in 'oc'. */ static void bridge_ofproto_controller_from_ovsrec(const struct ovsrec_controller *c, struct ofproto_controller *oc) { int dscp; oc->target = c->target; oc->max_backoff = c->max_backoff ? *c->max_backoff / 1000 : 8; oc->probe_interval = c->inactivity_probe ? *c->inactivity_probe / 1000 : 5; oc->band = (!c->connection_mode || !strcmp(c->connection_mode, "in-band") ? OFPROTO_IN_BAND : OFPROTO_OUT_OF_BAND); oc->rate_limit = c->controller_rate_limit ? *c->controller_rate_limit : 0; oc->burst_limit = (c->controller_burst_limit ? *c->controller_burst_limit : 0); oc->enable_async_msgs = (!c->enable_async_messages || *c->enable_async_messages); dscp = smap_get_int(&c->other_config, "dscp", DSCP_DEFAULT); if (dscp < 0 || dscp > 63) { dscp = DSCP_DEFAULT; } oc->dscp = dscp; } /* Configures the IP stack for 'br''s local interface properly according to the * configuration in 'c'. */ static void bridge_configure_local_iface_netdev(struct bridge *br, struct ovsrec_controller *c) { struct netdev *netdev; struct in_addr mask, gateway; struct iface *local_iface; struct in_addr ip; /* If there's no local interface or no IP address, give up. */ local_iface = iface_from_ofp_port(br, OFPP_LOCAL); if (!local_iface || !c->local_ip || !inet_aton(c->local_ip, &ip)) { return; } /* Bring up the local interface. */ netdev = local_iface->netdev; netdev_turn_flags_on(netdev, NETDEV_UP, NULL); /* Configure the IP address and netmask. */ if (!c->local_netmask || !inet_aton(c->local_netmask, &mask) || !mask.s_addr) { mask.s_addr = guess_netmask(ip.s_addr); } if (!netdev_set_in4(netdev, ip, mask)) { VLOG_INFO("bridge %s: configured IP address "IP_FMT", netmask "IP_FMT, br->name, IP_ARGS(ip.s_addr), IP_ARGS(mask.s_addr)); } /* Configure the default gateway. */ if (c->local_gateway && inet_aton(c->local_gateway, &gateway) && gateway.s_addr) { if (!netdev_add_router(netdev, gateway)) { VLOG_INFO("bridge %s: configured gateway "IP_FMT, br->name, IP_ARGS(gateway.s_addr)); } } } /* Returns true if 'a' and 'b' are the same except that any number of slashes * in either string are treated as equal to any number of slashes in the other, * e.g. "x///y" is equal to "x/y". * * Also, if 'b_stoplen' bytes from 'b' are found to be equal to corresponding * bytes from 'a', the function considers this success. Specify 'b_stoplen' as * SIZE_MAX to compare all of 'a' to all of 'b' rather than just a prefix of * 'b' against a prefix of 'a'. */ static bool equal_pathnames(const char *a, const char *b, size_t b_stoplen) { const char *b_start = b; for (;;) { if (b - b_start >= b_stoplen) { return true; } else if (*a != *b) { return false; } else if (*a == '/') { a += strspn(a, "/"); b += strspn(b, "/"); } else if (*a == '\0') { return true; } else { a++; b++; } } } static void bridge_configure_remotes(struct bridge *br, const struct sockaddr_in *managers, size_t n_managers) { bool disable_in_band; struct ovsrec_controller **controllers; size_t n_controllers; enum ofproto_fail_mode fail_mode; struct ofproto_controller *ocs; size_t n_ocs; size_t i; /* Check if we should disable in-band control on this bridge. */ disable_in_band = smap_get_bool(&br->cfg->other_config, "disable-in-band", false); /* Set OpenFlow queue ID for in-band control. */ ofproto_set_in_band_queue(br->ofproto, smap_get_int(&br->cfg->other_config, "in-band-queue", -1)); if (disable_in_band) { ofproto_set_extra_in_band_remotes(br->ofproto, NULL, 0); } else { ofproto_set_extra_in_band_remotes(br->ofproto, managers, n_managers); } n_controllers = bridge_get_controllers(br, &controllers); ocs = xmalloc((n_controllers + 1) * sizeof *ocs); n_ocs = 0; bridge_ofproto_controller_for_mgmt(br, &ocs[n_ocs++]); for (i = 0; i < n_controllers; i++) { struct ovsrec_controller *c = controllers[i]; if (!strncmp(c->target, "punix:", 6) || !strncmp(c->target, "unix:", 5)) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); char *whitelist; if (!strncmp(c->target, "unix:", 5)) { /* Connect to a listening socket */ whitelist = xasprintf("unix:%s/", ovs_rundir()); if (strchr(c->target, '/') && !equal_pathnames(c->target, whitelist, strlen(whitelist))) { /* Absolute path specified, but not in ovs_rundir */ VLOG_ERR_RL(&rl, "bridge %s: Not connecting to socket " "controller \"%s\" due to possibility for " "remote exploit. Instead, specify socket " "in whitelisted \"%s\" or connect to " "\"unix:%s/%s.mgmt\" (which is always " "available without special configuration).", br->name, c->target, whitelist, ovs_rundir(), br->name); free(whitelist); continue; } } else { whitelist = xasprintf("punix:%s/%s.controller", ovs_rundir(), br->name); if (!equal_pathnames(c->target, whitelist, SIZE_MAX)) { /* Prevent remote ovsdb-server users from accessing * arbitrary Unix domain sockets and overwriting arbitrary * local files. */ VLOG_ERR_RL(&rl, "bridge %s: Not adding Unix domain socket " "controller \"%s\" due to possibility of " "overwriting local files. Instead, specify " "whitelisted \"%s\" or connect to " "\"unix:%s/%s.mgmt\" (which is always " "available without special configuration).", br->name, c->target, whitelist, ovs_rundir(), br->name); free(whitelist); continue; } } free(whitelist); } bridge_configure_local_iface_netdev(br, c); bridge_ofproto_controller_from_ovsrec(c, &ocs[n_ocs]); if (disable_in_band) { ocs[n_ocs].band = OFPROTO_OUT_OF_BAND; } n_ocs++; } ofproto_set_controllers(br->ofproto, ocs, n_ocs, bridge_get_allowed_versions(br)); free(ocs[0].target); /* From bridge_ofproto_controller_for_mgmt(). */ free(ocs); /* Set the fail-mode. */ fail_mode = !br->cfg->fail_mode || !strcmp(br->cfg->fail_mode, "standalone") ? OFPROTO_FAIL_STANDALONE : OFPROTO_FAIL_SECURE; ofproto_set_fail_mode(br->ofproto, fail_mode); /* Configure OpenFlow controller connection snooping. */ if (!ofproto_has_snoops(br->ofproto)) { struct sset snoops; sset_init(&snoops); sset_add_and_free(&snoops, xasprintf("punix:%s/%s.snoop", ovs_rundir(), br->name)); ofproto_set_snoops(br->ofproto, &snoops); sset_destroy(&snoops); } } static void bridge_configure_tables(struct bridge *br) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); int n_tables; int i, j; n_tables = ofproto_get_n_tables(br->ofproto); j = 0; for (i = 0; i < n_tables; i++) { struct ofproto_table_settings s; s.name = NULL; s.max_flows = UINT_MAX; s.groups = NULL; s.n_groups = 0; if (j < br->cfg->n_flow_tables && i == br->cfg->key_flow_tables[j]) { struct ovsrec_flow_table *cfg = br->cfg->value_flow_tables[j++]; s.name = cfg->name; if (cfg->n_flow_limit && *cfg->flow_limit < UINT_MAX) { s.max_flows = *cfg->flow_limit; } if (cfg->overflow_policy && !strcmp(cfg->overflow_policy, "evict")) { size_t k; s.groups = xmalloc(cfg->n_groups * sizeof *s.groups); for (k = 0; k < cfg->n_groups; k++) { const char *string = cfg->groups[k]; char *msg; msg = mf_parse_subfield__(&s.groups[k], &string); if (msg) { VLOG_WARN_RL(&rl, "bridge %s table %d: error parsing " "'groups' (%s)", br->name, i, msg); free(msg); } else if (*string) { VLOG_WARN_RL(&rl, "bridge %s table %d: 'groups' " "element '%s' contains trailing garbage", br->name, i, cfg->groups[k]); } else { s.n_groups++; } } } } ofproto_configure_table(br->ofproto, i, &s); free(s.groups); } for (; j < br->cfg->n_flow_tables; j++) { VLOG_WARN_RL(&rl, "bridge %s: ignoring configuration for flow table " "%"PRId64" not supported by this datapath", br->name, br->cfg->key_flow_tables[j]); } } static void bridge_configure_dp_desc(struct bridge *br) { ofproto_set_dp_desc(br->ofproto, smap_get(&br->cfg->other_config, "dp-desc")); } /* Port functions. */ static struct port * port_create(struct bridge *br, const struct ovsrec_port *cfg) { struct port *port; port = xzalloc(sizeof *port); port->bridge = br; port->name = xstrdup(cfg->name); port->cfg = cfg; list_init(&port->ifaces); hmap_insert(&br->ports, &port->hmap_node, hash_string(port->name, 0)); return port; } /* Deletes interfaces from 'port' that are no longer configured for it. */ static void port_del_ifaces(struct port *port) { struct iface *iface, *next; struct sset new_ifaces; size_t i; /* Collect list of new interfaces. */ sset_init(&new_ifaces); for (i = 0; i < port->cfg->n_interfaces; i++) { const char *name = port->cfg->interfaces[i]->name; const char *type = port->cfg->interfaces[i]->type; if (strcmp(type, "null")) { sset_add(&new_ifaces, name); } } /* Get rid of deleted interfaces. */ LIST_FOR_EACH_SAFE (iface, next, port_elem, &port->ifaces) { if (!sset_contains(&new_ifaces, iface->name)) { iface_destroy(iface); } } sset_destroy(&new_ifaces); } static void port_destroy(struct port *port) { if (port) { struct bridge *br = port->bridge; struct iface *iface, *next; if (br->ofproto) { ofproto_bundle_unregister(br->ofproto, port); } LIST_FOR_EACH_SAFE (iface, next, port_elem, &port->ifaces) { iface_destroy(iface); } hmap_remove(&br->ports, &port->hmap_node); free(port->name); free(port); } } static struct port * port_lookup(const struct bridge *br, const char *name) { struct port *port; HMAP_FOR_EACH_WITH_HASH (port, hmap_node, hash_string(name, 0), &br->ports) { if (!strcmp(port->name, name)) { return port; } } return NULL; } static bool enable_lacp(struct port *port, bool *activep) { if (!port->cfg->lacp) { /* XXX when LACP implementation has been sufficiently tested, enable by * default and make active on bonded ports. */ return false; } else if (!strcmp(port->cfg->lacp, "off")) { return false; } else if (!strcmp(port->cfg->lacp, "active")) { *activep = true; return true; } else if (!strcmp(port->cfg->lacp, "passive")) { *activep = false; return true; } else { VLOG_WARN("port %s: unknown LACP mode %s", port->name, port->cfg->lacp); return false; } } static struct lacp_settings * port_configure_lacp(struct port *port, struct lacp_settings *s) { const char *lacp_time, *system_id; int priority; if (!enable_lacp(port, &s->active)) { return NULL; } s->name = port->name; system_id = smap_get(&port->cfg->other_config, "lacp-system-id"); if (system_id) { if (sscanf(system_id, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(s->id)) != ETH_ADDR_SCAN_COUNT) { VLOG_WARN("port %s: LACP system ID (%s) must be an Ethernet" " address.", port->name, system_id); return NULL; } } else { memcpy(s->id, port->bridge->ea, ETH_ADDR_LEN); } if (eth_addr_is_zero(s->id)) { VLOG_WARN("port %s: Invalid zero LACP system ID.", port->name); return NULL; } /* Prefer bondable links if unspecified. */ priority = smap_get_int(&port->cfg->other_config, "lacp-system-priority", 0); s->priority = (priority > 0 && priority <= UINT16_MAX ? priority : UINT16_MAX - !list_is_short(&port->ifaces)); lacp_time = smap_get(&port->cfg->other_config, "lacp-time"); s->fast = lacp_time && !strcasecmp(lacp_time, "fast"); return s; } static void iface_configure_lacp(struct iface *iface, struct lacp_slave_settings *s) { int priority, portid, key; portid = smap_get_int(&iface->cfg->other_config, "lacp-port-id", 0); priority = smap_get_int(&iface->cfg->other_config, "lacp-port-priority", 0); key = smap_get_int(&iface->cfg->other_config, "lacp-aggregation-key", 0); if (portid <= 0 || portid > UINT16_MAX) { portid = ofp_to_u16(iface->ofp_port); } if (priority <= 0 || priority > UINT16_MAX) { priority = UINT16_MAX; } if (key < 0 || key > UINT16_MAX) { key = 0; } s->name = iface->name; s->id = portid; s->priority = priority; s->key = key; } static void port_configure_bond(struct port *port, struct bond_settings *s) { const char *detect_s; struct iface *iface; int miimon_interval; s->name = port->name; s->balance = BM_AB; if (port->cfg->bond_mode) { if (!bond_mode_from_string(&s->balance, port->cfg->bond_mode)) { VLOG_WARN("port %s: unknown bond_mode %s, defaulting to %s", port->name, port->cfg->bond_mode, bond_mode_to_string(s->balance)); } } else { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); /* XXX: Post version 1.5.*, the default bond_mode changed from SLB to * active-backup. At some point we should remove this warning. */ VLOG_WARN_RL(&rl, "port %s: Using the default bond_mode %s. Note that" " in previous versions, the default bond_mode was" " balance-slb", port->name, bond_mode_to_string(s->balance)); } if (s->balance == BM_SLB && port->bridge->cfg->n_flood_vlans) { VLOG_WARN("port %s: SLB bonds are incompatible with flood_vlans, " "please use another bond type or disable flood_vlans", port->name); } miimon_interval = smap_get_int(&port->cfg->other_config, "bond-miimon-interval", 0); if (miimon_interval <= 0) { miimon_interval = 200; } detect_s = smap_get(&port->cfg->other_config, "bond-detect-mode"); if (!detect_s || !strcmp(detect_s, "carrier")) { miimon_interval = 0; } else if (strcmp(detect_s, "miimon")) { VLOG_WARN("port %s: unsupported bond-detect-mode %s, " "defaulting to carrier", port->name, detect_s); miimon_interval = 0; } s->up_delay = MAX(0, port->cfg->bond_updelay); s->down_delay = MAX(0, port->cfg->bond_downdelay); s->basis = smap_get_int(&port->cfg->other_config, "bond-hash-basis", 0); s->rebalance_interval = smap_get_int(&port->cfg->other_config, "bond-rebalance-interval", 10000); if (s->rebalance_interval && s->rebalance_interval < 1000) { s->rebalance_interval = 1000; } s->fake_iface = port->cfg->bond_fake_iface; LIST_FOR_EACH (iface, port_elem, &port->ifaces) { netdev_set_miimon_interval(iface->netdev, miimon_interval); } } /* Returns true if 'port' is synthetic, that is, if we constructed it locally * instead of obtaining it from the database. */ static bool port_is_synthetic(const struct port *port) { return ovsdb_idl_row_is_synthetic(&port->cfg->header_); } /* Interface functions. */ static bool iface_is_internal(const struct ovsrec_interface *iface, const struct ovsrec_bridge *br) { /* The local port and "internal" ports are always "internal". */ return !strcmp(iface->type, "internal") || !strcmp(iface->name, br->name); } /* Returns the correct network device type for interface 'iface' in bridge * 'br'. */ static const char * iface_get_type(const struct ovsrec_interface *iface, const struct ovsrec_bridge *br) { const char *type; /* The local port always has type "internal". Other ports take * their type from the database and default to "system" if none is * specified. */ if (iface_is_internal(iface, br)) { type = "internal"; } else { type = iface->type[0] ? iface->type : "system"; } return ofproto_port_open_type(br->datapath_type, type); } static void iface_destroy(struct iface *iface) { if (iface) { struct port *port = iface->port; struct bridge *br = port->bridge; if (br->ofproto && iface->ofp_port != OFPP_NONE) { ofproto_port_unregister(br->ofproto, iface->ofp_port); } if (iface->ofp_port != OFPP_NONE) { hmap_remove(&br->ifaces, &iface->ofp_port_node); } list_remove(&iface->port_elem); hmap_remove(&br->iface_by_name, &iface->name_node); netdev_close(iface->netdev); free(iface->name); free(iface); } } static struct iface * iface_lookup(const struct bridge *br, const char *name) { struct iface *iface; HMAP_FOR_EACH_WITH_HASH (iface, name_node, hash_string(name, 0), &br->iface_by_name) { if (!strcmp(iface->name, name)) { return iface; } } return NULL; } static struct iface * iface_find(const char *name) { const struct bridge *br; HMAP_FOR_EACH (br, node, &all_bridges) { struct iface *iface = iface_lookup(br, name); if (iface) { return iface; } } return NULL; } static struct if_cfg * if_cfg_lookup(const struct bridge *br, const char *name) { struct if_cfg *if_cfg; HMAP_FOR_EACH_WITH_HASH (if_cfg, hmap_node, hash_string(name, 0), &br->if_cfg_todo) { if (!strcmp(if_cfg->cfg->name, name)) { return if_cfg; } } return NULL; } static struct iface * iface_from_ofp_port(const struct bridge *br, ofp_port_t ofp_port) { struct iface *iface; HMAP_FOR_EACH_IN_BUCKET (iface, ofp_port_node, hash_ofp_port(ofp_port), &br->ifaces) { if (iface->ofp_port == ofp_port) { return iface; } } return NULL; } /* Set Ethernet address of 'iface', if one is specified in the configuration * file. */ static void iface_set_mac(struct iface *iface) { uint8_t ea[ETH_ADDR_LEN]; if (!strcmp(iface->type, "internal") && iface->cfg->mac && eth_addr_from_string(iface->cfg->mac, ea)) { if (iface->ofp_port == OFPP_LOCAL) { VLOG_ERR("interface %s: ignoring mac in Interface record " "(use Bridge record to set local port's mac)", iface->name); } else if (eth_addr_is_multicast(ea)) { VLOG_ERR("interface %s: cannot set MAC to multicast address", iface->name); } else { int error = netdev_set_etheraddr(iface->netdev, ea); if (error) { VLOG_ERR("interface %s: setting MAC failed (%s)", iface->name, ovs_strerror(error)); } } } } /* Sets the ofport column of 'if_cfg' to 'ofport'. */ static void iface_set_ofport(const struct ovsrec_interface *if_cfg, ofp_port_t ofport) { int64_t port_; port_ = (ofport == OFPP_NONE) ? -1 : ofp_to_u16(ofport); if (if_cfg && !ovsdb_idl_row_is_synthetic(&if_cfg->header_)) { ovsrec_interface_set_ofport(if_cfg, &port_, 1); } } /* Clears all of the fields in 'if_cfg' that indicate interface status, and * sets the "ofport" field to -1. * * This is appropriate when 'if_cfg''s interface cannot be created or is * otherwise invalid. */ static void iface_clear_db_record(const struct ovsrec_interface *if_cfg) { if (!ovsdb_idl_row_is_synthetic(&if_cfg->header_)) { ovsrec_interface_set_status(if_cfg, NULL); ovsrec_interface_set_admin_state(if_cfg, NULL); ovsrec_interface_set_duplex(if_cfg, NULL); ovsrec_interface_set_link_speed(if_cfg, NULL, 0); ovsrec_interface_set_link_state(if_cfg, NULL); ovsrec_interface_set_mac_in_use(if_cfg, NULL); ovsrec_interface_set_mtu(if_cfg, NULL, 0); ovsrec_interface_set_cfm_fault(if_cfg, NULL, 0); ovsrec_interface_set_cfm_fault_status(if_cfg, NULL, 0); ovsrec_interface_set_cfm_remote_mpids(if_cfg, NULL, 0); ovsrec_interface_set_lacp_current(if_cfg, NULL, 0); ovsrec_interface_set_statistics(if_cfg, NULL, NULL, 0); ovsrec_interface_set_ifindex(if_cfg, NULL, 0); } } static bool queue_ids_include(const struct ovsdb_datum *queues, int64_t target) { union ovsdb_atom atom; atom.integer = target; return ovsdb_datum_find_key(queues, &atom, OVSDB_TYPE_INTEGER) != UINT_MAX; } static void iface_configure_qos(struct iface *iface, const struct ovsrec_qos *qos) { struct ofpbuf queues_buf; ofpbuf_init(&queues_buf, 0); if (!qos || qos->type[0] == '\0' || qos->n_queues < 1) { netdev_set_qos(iface->netdev, NULL, NULL); } else { const struct ovsdb_datum *queues; struct netdev_queue_dump dump; unsigned int queue_id; struct smap details; bool queue_zero; size_t i; /* Configure top-level Qos for 'iface'. */ netdev_set_qos(iface->netdev, qos->type, &qos->other_config); /* Deconfigure queues that were deleted. */ queues = ovsrec_qos_get_queues(qos, OVSDB_TYPE_INTEGER, OVSDB_TYPE_UUID); smap_init(&details); NETDEV_QUEUE_FOR_EACH (&queue_id, &details, &dump, iface->netdev) { if (!queue_ids_include(queues, queue_id)) { netdev_delete_queue(iface->netdev, queue_id); } } smap_destroy(&details); /* Configure queues for 'iface'. */ queue_zero = false; for (i = 0; i < qos->n_queues; i++) { const struct ovsrec_queue *queue = qos->value_queues[i]; unsigned int queue_id = qos->key_queues[i]; if (queue_id == 0) { queue_zero = true; } if (queue->n_dscp == 1) { struct ofproto_port_queue *port_queue; port_queue = ofpbuf_put_uninit(&queues_buf, sizeof *port_queue); port_queue->queue = queue_id; port_queue->dscp = queue->dscp[0]; } netdev_set_queue(iface->netdev, queue_id, &queue->other_config); } if (!queue_zero) { struct smap details; smap_init(&details); netdev_set_queue(iface->netdev, 0, &details); smap_destroy(&details); } } if (iface->ofp_port != OFPP_NONE) { const struct ofproto_port_queue *port_queues = queues_buf.data; size_t n_queues = queues_buf.size / sizeof *port_queues; ofproto_port_set_queues(iface->port->bridge->ofproto, iface->ofp_port, port_queues, n_queues); } netdev_set_policing(iface->netdev, iface->cfg->ingress_policing_rate, iface->cfg->ingress_policing_burst); ofpbuf_uninit(&queues_buf); } static void iface_configure_cfm(struct iface *iface) { const struct ovsrec_interface *cfg = iface->cfg; const char *opstate_str; const char *cfm_ccm_vlan; struct cfm_settings s; struct smap netdev_args; if (!cfg->n_cfm_mpid) { ofproto_port_clear_cfm(iface->port->bridge->ofproto, iface->ofp_port); return; } s.check_tnl_key = false; smap_init(&netdev_args); if (!netdev_get_config(iface->netdev, &netdev_args)) { const char *key = smap_get(&netdev_args, "key"); const char *in_key = smap_get(&netdev_args, "in_key"); s.check_tnl_key = (key && !strcmp(key, "flow")) || (in_key && !strcmp(in_key, "flow")); } smap_destroy(&netdev_args); s.mpid = *cfg->cfm_mpid; s.interval = smap_get_int(&iface->cfg->other_config, "cfm_interval", 0); cfm_ccm_vlan = smap_get(&iface->cfg->other_config, "cfm_ccm_vlan"); s.ccm_pcp = smap_get_int(&iface->cfg->other_config, "cfm_ccm_pcp", 0); if (s.interval <= 0) { s.interval = 1000; } if (!cfm_ccm_vlan) { s.ccm_vlan = 0; } else if (!strcasecmp("random", cfm_ccm_vlan)) { s.ccm_vlan = CFM_RANDOM_VLAN; } else { s.ccm_vlan = atoi(cfm_ccm_vlan); if (s.ccm_vlan == CFM_RANDOM_VLAN) { s.ccm_vlan = 0; } } s.extended = smap_get_bool(&iface->cfg->other_config, "cfm_extended", false); s.demand = smap_get_bool(&iface->cfg->other_config, "cfm_demand", false); opstate_str = smap_get(&iface->cfg->other_config, "cfm_opstate"); s.opup = !opstate_str || !strcasecmp("up", opstate_str); ofproto_port_set_cfm(iface->port->bridge->ofproto, iface->ofp_port, &s); } /* Returns true if 'iface' is synthetic, that is, if we constructed it locally * instead of obtaining it from the database. */ static bool iface_is_synthetic(const struct iface *iface) { return ovsdb_idl_row_is_synthetic(&iface->cfg->header_); } static ofp_port_t iface_pick_ofport(const struct ovsrec_interface *cfg) { ofp_port_t ofport = cfg->n_ofport ? u16_to_ofp(*cfg->ofport) : OFPP_NONE; return cfg->n_ofport_request ? u16_to_ofp(*cfg->ofport_request) : ofport; } /* Port mirroring. */ static struct mirror * mirror_find_by_uuid(struct bridge *br, const struct uuid *uuid) { struct mirror *m; HMAP_FOR_EACH_IN_BUCKET (m, hmap_node, uuid_hash(uuid), &br->mirrors) { if (uuid_equals(uuid, &m->uuid)) { return m; } } return NULL; } static void bridge_configure_mirrors(struct bridge *br) { const struct ovsdb_datum *mc; unsigned long *flood_vlans; struct mirror *m, *next; size_t i; /* Get rid of deleted mirrors. */ mc = ovsrec_bridge_get_mirrors(br->cfg, OVSDB_TYPE_UUID); HMAP_FOR_EACH_SAFE (m, next, hmap_node, &br->mirrors) { union ovsdb_atom atom; atom.uuid = m->uuid; if (ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_UUID) == UINT_MAX) { mirror_destroy(m); } } /* Add new mirrors and reconfigure existing ones. */ for (i = 0; i < br->cfg->n_mirrors; i++) { const struct ovsrec_mirror *cfg = br->cfg->mirrors[i]; struct mirror *m = mirror_find_by_uuid(br, &cfg->header_.uuid); if (!m) { m = mirror_create(br, cfg); } m->cfg = cfg; if (!mirror_configure(m)) { mirror_destroy(m); } } /* Update flooded vlans (for RSPAN). */ flood_vlans = vlan_bitmap_from_array(br->cfg->flood_vlans, br->cfg->n_flood_vlans); ofproto_set_flood_vlans(br->ofproto, flood_vlans); bitmap_free(flood_vlans); } static struct mirror * mirror_create(struct bridge *br, const struct ovsrec_mirror *cfg) { struct mirror *m; m = xzalloc(sizeof *m); m->uuid = cfg->header_.uuid; hmap_insert(&br->mirrors, &m->hmap_node, uuid_hash(&m->uuid)); m->bridge = br; m->name = xstrdup(cfg->name); return m; } static void mirror_destroy(struct mirror *m) { if (m) { struct bridge *br = m->bridge; if (br->ofproto) { ofproto_mirror_unregister(br->ofproto, m); } hmap_remove(&br->mirrors, &m->hmap_node); free(m->name); free(m); } } static void mirror_collect_ports(struct mirror *m, struct ovsrec_port **in_ports, int n_in_ports, void ***out_portsp, size_t *n_out_portsp) { void **out_ports = xmalloc(n_in_ports * sizeof *out_ports); size_t n_out_ports = 0; size_t i; for (i = 0; i < n_in_ports; i++) { const char *name = in_ports[i]->name; struct port *port = port_lookup(m->bridge, name); if (port) { out_ports[n_out_ports++] = port; } else { VLOG_WARN("bridge %s: mirror %s cannot match on nonexistent " "port %s", m->bridge->name, m->name, name); } } *out_portsp = out_ports; *n_out_portsp = n_out_ports; } static bool mirror_configure(struct mirror *m) { const struct ovsrec_mirror *cfg = m->cfg; struct ofproto_mirror_settings s; /* Set name. */ if (strcmp(cfg->name, m->name)) { free(m->name); m->name = xstrdup(cfg->name); } s.name = m->name; /* Get output port or VLAN. */ if (cfg->output_port) { s.out_bundle = port_lookup(m->bridge, cfg->output_port->name); if (!s.out_bundle) { VLOG_ERR("bridge %s: mirror %s outputs to port not on bridge", m->bridge->name, m->name); return false; } s.out_vlan = UINT16_MAX; if (cfg->output_vlan) { VLOG_ERR("bridge %s: mirror %s specifies both output port and " "output vlan; ignoring output vlan", m->bridge->name, m->name); } } else if (cfg->output_vlan) { /* The database should prevent invalid VLAN values. */ s.out_bundle = NULL; s.out_vlan = *cfg->output_vlan; } else { VLOG_ERR("bridge %s: mirror %s does not specify output; ignoring", m->bridge->name, m->name); return false; } /* Get port selection. */ if (cfg->select_all) { size_t n_ports = hmap_count(&m->bridge->ports); void **ports = xmalloc(n_ports * sizeof *ports); struct port *port; size_t i; i = 0; HMAP_FOR_EACH (port, hmap_node, &m->bridge->ports) { ports[i++] = port; } s.srcs = ports; s.n_srcs = n_ports; s.dsts = ports; s.n_dsts = n_ports; } else { /* Get ports, dropping ports that don't exist. * The IDL ensures that there are no duplicates. */ mirror_collect_ports(m, cfg->select_src_port, cfg->n_select_src_port, &s.srcs, &s.n_srcs); mirror_collect_ports(m, cfg->select_dst_port, cfg->n_select_dst_port, &s.dsts, &s.n_dsts); } /* Get VLAN selection. */ s.src_vlans = vlan_bitmap_from_array(cfg->select_vlan, cfg->n_select_vlan); /* Configure. */ ofproto_mirror_register(m->bridge->ofproto, m, &s); /* Clean up. */ if (s.srcs != s.dsts) { free(s.dsts); } free(s.srcs); free(s.src_vlans); return true; } /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) * * This is deprecated. It is only for compatibility with broken device drivers * in old versions of Linux that do not properly support VLANs when VLAN * devices are not used. When broken device drivers are no longer in * widespread use, we will delete these interfaces. */ static struct ovsrec_port **recs; static size_t n_recs, allocated_recs; /* Adds 'rec' to a list of recs that have to be destroyed when the VLAN * splinters are reconfigured. */ static void register_rec(struct ovsrec_port *rec) { if (n_recs >= allocated_recs) { recs = x2nrealloc(recs, &allocated_recs, sizeof *recs); } recs[n_recs++] = rec; } /* Frees all of the ports registered with register_reg(). */ static void free_registered_recs(void) { size_t i; for (i = 0; i < n_recs; i++) { struct ovsrec_port *port = recs[i]; size_t j; for (j = 0; j < port->n_interfaces; j++) { struct ovsrec_interface *iface = port->interfaces[j]; free(iface->name); free(iface); } smap_destroy(&port->other_config); free(port->interfaces); free(port->name); free(port->tag); free(port); } n_recs = 0; } /* Returns true if VLAN splinters are enabled on 'iface_cfg', false * otherwise. */ static bool vlan_splinters_is_enabled(const struct ovsrec_interface *iface_cfg) { return smap_get_bool(&iface_cfg->other_config, "enable-vlan-splinters", false); } /* Figures out the set of VLANs that are in use for the purpose of VLAN * splinters. * * If VLAN splinters are enabled on at least one interface and any VLANs are in * use, returns a 4096-bit bitmap with a 1-bit for each in-use VLAN (bits 0 and * 4095 will not be set). The caller is responsible for freeing the bitmap, * with free(). * * If VLANs splinters are not enabled on any interface or if no VLANs are in * use, returns NULL. * * Updates 'vlan_splinters_enabled_anywhere'. */ static unsigned long int * collect_splinter_vlans(const struct ovsrec_open_vswitch *ovs_cfg) { unsigned long int *splinter_vlans; struct sset splinter_ifaces; const char *real_dev_name; struct shash *real_devs; struct shash_node *node; struct bridge *br; size_t i; /* Free space allocated for synthesized ports and interfaces, since we're * in the process of reconstructing all of them. */ free_registered_recs(); splinter_vlans = bitmap_allocate(4096); sset_init(&splinter_ifaces); vlan_splinters_enabled_anywhere = false; for (i = 0; i < ovs_cfg->n_bridges; i++) { struct ovsrec_bridge *br_cfg = ovs_cfg->bridges[i]; size_t j; for (j = 0; j < br_cfg->n_ports; j++) { struct ovsrec_port *port_cfg = br_cfg->ports[j]; int k; for (k = 0; k < port_cfg->n_interfaces; k++) { struct ovsrec_interface *iface_cfg = port_cfg->interfaces[k]; if (vlan_splinters_is_enabled(iface_cfg)) { vlan_splinters_enabled_anywhere = true; sset_add(&splinter_ifaces, iface_cfg->name); vlan_bitmap_from_array__(port_cfg->trunks, port_cfg->n_trunks, splinter_vlans); } } if (port_cfg->tag && *port_cfg->tag > 0 && *port_cfg->tag < 4095) { bitmap_set1(splinter_vlans, *port_cfg->tag); } } } if (!vlan_splinters_enabled_anywhere) { free(splinter_vlans); sset_destroy(&splinter_ifaces); return NULL; } HMAP_FOR_EACH (br, node, &all_bridges) { if (br->ofproto) { ofproto_get_vlan_usage(br->ofproto, splinter_vlans); } } /* Don't allow VLANs 0 or 4095 to be splintered. VLAN 0 should appear on * the real device. VLAN 4095 is reserved and Linux doesn't allow a VLAN * device to be created for it. */ bitmap_set0(splinter_vlans, 0); bitmap_set0(splinter_vlans, 4095); /* Delete all VLAN devices that we don't need. */ vlandev_refresh(); real_devs = vlandev_get_real_devs(); SHASH_FOR_EACH (node, real_devs) { const struct vlan_real_dev *real_dev = node->data; const struct vlan_dev *vlan_dev; bool real_dev_has_splinters; real_dev_has_splinters = sset_contains(&splinter_ifaces, real_dev->name); HMAP_FOR_EACH (vlan_dev, hmap_node, &real_dev->vlan_devs) { if (!real_dev_has_splinters || !bitmap_is_set(splinter_vlans, vlan_dev->vid)) { struct netdev *netdev; if (!netdev_open(vlan_dev->name, "system", &netdev)) { if (!netdev_get_in4(netdev, NULL, NULL) || !netdev_get_in6(netdev, NULL)) { /* It has an IP address configured, so we don't own * it. Don't delete it. */ } else { vlandev_del(vlan_dev->name); } netdev_close(netdev); } } } } /* Add all VLAN devices that we need. */ SSET_FOR_EACH (real_dev_name, &splinter_ifaces) { int vid; BITMAP_FOR_EACH_1 (vid, 4096, splinter_vlans) { if (!vlandev_get_name(real_dev_name, vid)) { vlandev_add(real_dev_name, vid); } } } vlandev_refresh(); sset_destroy(&splinter_ifaces); if (bitmap_scan(splinter_vlans, 0, 4096) >= 4096) { free(splinter_vlans); return NULL; } return splinter_vlans; } /* Pushes the configure of VLAN splinter port 'port' (e.g. eth0.9) down to * ofproto. */ static void configure_splinter_port(struct port *port) { struct ofproto *ofproto = port->bridge->ofproto; ofp_port_t realdev_ofp_port; const char *realdev_name; struct iface *vlandev, *realdev; ofproto_bundle_unregister(port->bridge->ofproto, port); vlandev = CONTAINER_OF(list_front(&port->ifaces), struct iface, port_elem); realdev_name = smap_get(&port->cfg->other_config, "realdev"); realdev = iface_lookup(port->bridge, realdev_name); realdev_ofp_port = realdev ? realdev->ofp_port : 0; ofproto_port_set_realdev(ofproto, vlandev->ofp_port, realdev_ofp_port, *port->cfg->tag); } static struct ovsrec_port * synthesize_splinter_port(const char *real_dev_name, const char *vlan_dev_name, int vid) { struct ovsrec_interface *iface; struct ovsrec_port *port; iface = xmalloc(sizeof *iface); ovsrec_interface_init(iface); iface->name = xstrdup(vlan_dev_name); iface->type = "system"; port = xmalloc(sizeof *port); ovsrec_port_init(port); port->interfaces = xmemdup(&iface, sizeof iface); port->n_interfaces = 1; port->name = xstrdup(vlan_dev_name); port->vlan_mode = "splinter"; port->tag = xmalloc(sizeof *port->tag); *port->tag = vid; smap_add(&port->other_config, "realdev", real_dev_name); register_rec(port); return port; } /* For each interface with 'br' that has VLAN splinters enabled, adds a * corresponding ovsrec_port to 'ports' for each splinter VLAN marked with a * 1-bit in the 'splinter_vlans' bitmap. */ static void add_vlan_splinter_ports(struct bridge *br, const unsigned long int *splinter_vlans, struct shash *ports) { size_t i; /* We iterate through 'br->cfg->ports' instead of 'ports' here because * we're modifying 'ports'. */ for (i = 0; i < br->cfg->n_ports; i++) { const char *name = br->cfg->ports[i]->name; struct ovsrec_port *port_cfg = shash_find_data(ports, name); size_t j; for (j = 0; j < port_cfg->n_interfaces; j++) { struct ovsrec_interface *iface_cfg = port_cfg->interfaces[j]; if (vlan_splinters_is_enabled(iface_cfg)) { const char *real_dev_name; uint16_t vid; real_dev_name = iface_cfg->name; BITMAP_FOR_EACH_1 (vid, 4096, splinter_vlans) { const char *vlan_dev_name; vlan_dev_name = vlandev_get_name(real_dev_name, vid); if (vlan_dev_name && !shash_find(ports, vlan_dev_name)) { shash_add(ports, vlan_dev_name, synthesize_splinter_port( real_dev_name, vlan_dev_name, vid)); } } } } } } static void mirror_refresh_stats(struct mirror *m) { struct ofproto *ofproto = m->bridge->ofproto; uint64_t tx_packets, tx_bytes; char *keys[2]; int64_t values[2]; size_t stat_cnt = 0; if (ofproto_mirror_get_stats(ofproto, m, &tx_packets, &tx_bytes)) { ovsrec_mirror_set_statistics(m->cfg, NULL, NULL, 0); return; } if (tx_packets != UINT64_MAX) { keys[stat_cnt] = "tx_packets"; values[stat_cnt] = tx_packets; stat_cnt++; } if (tx_bytes != UINT64_MAX) { keys[stat_cnt] = "tx_bytes"; values[stat_cnt] = tx_bytes; stat_cnt++; } ovsrec_mirror_set_statistics(m->cfg, keys, values, stat_cnt); } openvswitch-2.0.1+git20140120/vswitchd/bridge.h000066400000000000000000000016121226605124000207120ustar00rootroot00000000000000/* Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef VSWITCHD_BRIDGE_H #define VSWITCHD_BRIDGE_H 1 struct simap; void bridge_init(const char *remote); void bridge_exit(void); void bridge_run(void); void bridge_run_fast(void); void bridge_wait(void); void bridge_get_memory_usage(struct simap *usage); #endif /* bridge.h */ openvswitch-2.0.1+git20140120/vswitchd/ovs-vswitchd.8.in000066400000000000000000000262511226605124000224510ustar00rootroot00000000000000.\" -*- nroff -*- .de IQ . br . ns . IP "\\$1" .. .TH ovs\-vswitchd 8 "@VERSION@" "Open vSwitch" "Open vSwitch Manual" .\" This program's name: .ds PN ovs\-vswitchd .\" SSL peer program's name: .ds SN ovs\-controller . .SH NAME ovs\-vswitchd \- Open vSwitch daemon . .SH SYNOPSIS \fBovs\-vswitchd \fR[\fIdatabase\fR] . .SH DESCRIPTION A daemon that manages and controls any number of Open vSwitch switches on the local machine. .PP The \fIdatabase\fR argument specifies how \fBovs\-vswitchd\fR connects to \fBovsdb\-server\fR. The default is \fBunix:@RUNDIR@/db.sock\fR. The following forms are accepted: .so ovsdb/remote-active.man .so ovsdb/remote-passive.man .PP \fBovs\-vswitchd\fR retrieves its configuration from \fIdatabase\fR at startup. It sets up Open vSwitch datapaths and then operates switching across each bridge described in its configuration files. As the database changes, \fBovs\-vswitchd\fR automatically updates its configuration to match. .PP Upon receipt of a SIGHUP signal, \fBovs\-vswitchd\fR reopens its log file, if one was specified on the command line. .PP \fBovs\-vswitchd\fR switches may be configured with any of the following features: . .IP \(bu L2 switching with MAC learning. . .IP \(bu NIC bonding with automatic fail-over and source MAC-based TX load balancing ("SLB"). . .IP \(bu 802.1Q VLAN support. . .IP \(bu Port mirroring, with optional VLAN tagging. . .IP \(bu NetFlow v5 flow logging. . .IP \(bu sFlow(R) monitoring. . .IP \(bu Connectivity to an external OpenFlow controller, such as NOX. . .PP Only a single instance of \fBovs\-vswitchd\fR is intended to run at a time. A single \fBovs\-vswitchd\fR can manage any number of switch instances, up to the maximum number of supported Open vSwitch datapaths. .PP \fBovs\-vswitchd\fR does all the necessary management of Open vSwitch datapaths itself. Thus, external tools, such \fBovs\-dpctl\fR(8), are not needed for managing datapaths in conjunction with \fBovs\-vswitchd\fR, and their use to modify datapaths when \fBovs\-vswitchd\fR is running can interfere with its operation. (\fBovs\-dpctl\fR may still be useful for diagnostics.) .PP An Open vSwitch datapath kernel module must be loaded for \fBovs\-vswitchd\fR to be useful. Please refer to the \fBINSTALL.Linux\fR file included in the Open vSwitch distribution for instructions on how to build and load the Open vSwitch kernel module. .PP .SH OPTIONS .IP "\fB\-\-mlockall\fR" Causes \fBovs\-vswitchd\fR to call the \fBmlockall()\fR function, to attempt to lock all of its process memory into physical RAM, preventing the kernel from paging any of its memory to disk. This helps to avoid networking interruptions due to system memory pressure. .IP Some systems do not support \fBmlockall()\fR at all, and other systems only allow privileged users, such as the superuser, to use it. \fBovs\-vswitchd\fR emits a log message if \fBmlockall()\fR is unavailable or unsuccessful. . .ds DD \ \fBovs\-vswitchd\fR detaches only after it has connected to the \ database, retrieved the initial configuration, and set up that \ configuration. .so lib/daemon.man .SS "Public Key Infrastructure Options" .so lib/ssl.man .so lib/ssl-bootstrap.man .so lib/vlog.man .so lib/common.man . .SH "RUNTIME MANAGEMENT COMMANDS" \fBovs\-appctl\fR(8) can send commands to a running \fBovs\-vswitchd\fR process. The currently supported commands are described below. The command descriptions assume an understanding of how to configure Open vSwitch. .SS "GENERAL COMMANDS" .IP "\fBexit\fR" Causes \fBovs\-vswitchd\fR to gracefully terminate. .IP "\fBqos/show\fR \fIinterface\fR" Queries the kernel for Quality of Service configuration and statistics associated with the given \fIinterface\fR. .IP "\fBbfd/show\fR [\fIinterface\fR]" Displays detailed information about Bidirectional Forwarding Detection configured on \fIinterface\fR. If \fIinterface\fR is not specified, then displays detailed information about all interfaces with BFD enabled. .IP "\fBbfd/set-forwarding\fR [\fIinterface\fR] \fIstatus\fR" Force the fault status of the BFD module on \fIinterface\fR (or all interfaces if none is given) to be \fIstatus\fR. \fIstatus\fR can be "true", "false", or "normal" which reverts to the standard behavior. .IP "\fBcfm/show\fR [\fIinterface\fR]" Displays detailed information about Connectivity Fault Management configured on \fIinterface\fR. If \fIinterface\fR is not specified, then displays detailed information about all interfaces with CFM enabled. .IP "\fBcfm/set-fault\fR [\fIinterface\fR] \fIstatus\fR" Force the fault status of the CFM module on \fIinterface\fR (or all interfaces if none is given) to be \fIstatus\fR. \fIstatus\fR can be "true", "false", or "normal" which reverts to the standard behavior. .IP "\fBstp/tcn\fR [\fIbridge\fR]" Forces a topology change event on \fIbridge\fR if it's running STP. This may cause it to send Topology Change Notifications to its peers and flush its MAC table.. If no \fIbridge\fR is given, forces a topology change event on all bridges. .SS "BRIDGE COMMANDS" These commands manage bridges. .IP "\fBfdb/flush\fR [\fIbridge\fR]" Flushes \fIbridge\fR MAC address learning table, or all learning tables if no \fIbridge\fR is given. .IP "\fBfdb/show\fR \fIbridge\fR" Lists each MAC address/VLAN pair learned by the specified \fIbridge\fR, along with the port on which it was learned and the age of the entry, in seconds. .IP "\fBbridge/reconnect\fR [\fIbridge\fR]" Makes \fIbridge\fR drop all of its OpenFlow controller connections and reconnect. If \fIbridge\fR is not specified, then all bridges drop their controller connections and reconnect. .IP This command might be useful for debugging OpenFlow controller issues. . .IP "\fBbridge/dump\-flows\fR \fIbridge\fR" Lists all flows in \fIbridge\fR, including those normally hidden to commands such as \fBovs\-ofctl dump\-flows\fR. Flows set up by mechanisms such as in-band control and fail-open are hidden from the controller since it is not allowed to modify or override them. .SS "BOND COMMANDS" These commands manage bonded ports on an Open vSwitch's bridges. To understand some of these commands, it is important to understand a detail of the bonding implementation called ``source load balancing'' (SLB). Instead of directly assigning Ethernet source addresses to slaves, the bonding implementation computes a function that maps an 48-bit Ethernet source addresses into an 8-bit value (a ``MAC hash'' value). All of the Ethernet addresses that map to a single 8-bit value are then assigned to a single slave. .IP "\fBbond/list\fR" Lists all of the bonds, and their slaves, on each bridge. . .IP "\fBbond/show\fR [\fIport\fR]" Lists all of the bond-specific information (updelay, downdelay, time until the next rebalance) about the given bonded \fIport\fR, or all bonded ports if no \fIport\fR is given. Also lists information about each slave: whether it is enabled or disabled, the time to completion of an updelay or downdelay if one is in progress, whether it is the active slave, the hashes assigned to the slave. Any LACP information related to this bond may be found using the \fBlacp/show\fR command. . .IP "\fBbond/migrate\fR \fIport\fR \fIhash\fR \fIslave\fR" Only valid for SLB bonds. Assigns a given MAC hash to a new slave. \fIport\fR specifies the bond port, \fIhash\fR the MAC hash to be migrated (as a decimal number between 0 and 255), and \fIslave\fR the new slave to be assigned. .IP The reassignment is not permanent: rebalancing or fail-over will cause the MAC hash to be shifted to a new slave in the usual manner. .IP A MAC hash cannot be migrated to a disabled slave. .IP "\fBbond/set\-active\-slave\fR \fIport\fR \fIslave\fR" Sets \fIslave\fR as the active slave on \fIport\fR. \fIslave\fR must currently be enabled. .IP The setting is not permanent: a new active slave will be selected if \fIslave\fR becomes disabled. .IP "\fBbond/enable\-slave\fR \fIport\fR \fIslave\fR" .IQ "\fBbond/disable\-slave\fR \fIport\fR \fIslave\fR" Enables (or disables) \fIslave\fR on the given bond \fIport\fR, skipping any updelay (or downdelay). .IP This setting is not permanent: it persists only until the carrier status of \fIslave\fR changes. .IP "\fBbond/hash\fR \fImac\fR [\fIvlan\fR] [\fIbasis\fR]" Returns the hash value which would be used for \fImac\fR with \fIvlan\fR and \fIbasis\fR if specified. . .IP "\fBlacp/show\fR [\fIport\fR]" Lists all of the LACP related information about the given \fIport\fR: active or passive, aggregation key, system id, and system priority. Also lists information about each slave: whether it is enabled or disabled, whether it is attached or detached, port id and priority, actor information, and partner information. If \fIport\fR is not specified, then displays detailed information about all interfaces with CFM enabled. . .so ofproto/ofproto-dpif-unixctl.man .so ofproto/ofproto-unixctl.man .so lib/vlog-unixctl.man .so lib/memory-unixctl.man .so lib/coverage-unixctl.man . .SH "OPENFLOW IMPLEMENTATION" . .PP This section documents aspects of OpenFlow for which the OpenFlow specification requires documentation. . .SS "Packet buffering." The OpenFlow specification, version 1.2, says: . .IP Switches that implement buffering are expected to expose, through documentation, both the amount of available buffering, and the length of time before buffers may be reused. . .PP Open vSwitch maintains a separate set of 256 packet buffers for each OpenFlow connection. Any given packet buffer is preserved until it is referenced by an \fBOFPT_FLOW_MOD\fR or \fBOFPT_PACKET_OUT\fR request or for 5 seconds, whichever comes first. . .SH "LIMITS" . .PP We believe these limits to be accurate as of this writing. These limits assume the use of the Linux kernel datapath. . .IP \(bu \fBovs\-vswitchd\fR started through \fBovs\-ctl\fR(8) provides a limit of 7500 file descriptors. The limits on the number of bridges and ports is decided by the availability of file descriptors. With the Linux kernel datapath, creation of a single bridge consumes 3 file descriptors and adding a port consumes 1 file descriptor. Performance will degrade beyond 1,024 ports per bridge due to fixed hash table sizing. Other platforms may have different limitations. . .IP \(bu 2,048 MAC learning entries per bridge, by default. (This is configurable via \fBother\-config:mac\-table\-size\fR in the \fBBridge\fR table. See \fBovs\-vswitchd.conf.db\fR(5) for details.) . .IP \(bu Kernel flows are limited only by memory available to the kernel. Performance will degrade beyond 1,048,576 kernel flows per bridge with a 32-bit kernel, beyond 262,144 with a 64-bit kernel. (\fBovs\-vswitchd\fR should never install anywhere near that many flows.) . .IP \(bu OpenFlow flows are limited only by available memory. Performance is linear in the number of unique wildcard patterns. That is, an OpenFlow table that contains many flows that all match on the same fields in the same way has a constant-time lookup, but a table that contains many flows that match on different fields requires lookup time linear in the number of flows. . .IP \(bu 255 ports per bridge participating in 802.1D Spanning Tree Protocol. . .IP \(bu 32 mirrors per bridge. . .IP \(bu 15 bytes for the name of a port. (This is a Linux kernel limitation.) . .SH "SEE ALSO" .BR ovs\-appctl (8), .BR ovsdb\-server (1), \fBINSTALL.Linux\fR in the Open vSwitch distribution. openvswitch-2.0.1+git20140120/vswitchd/ovs-vswitchd.c000066400000000000000000000152641226605124000221210ustar00rootroot00000000000000/* Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #ifdef HAVE_MLOCKALL #include #endif #include "bridge.h" #include "command-line.h" #include "compiler.h" #include "daemon.h" #include "dirs.h" #include "dpif.h" #include "dummy.h" #include "memory.h" #include "netdev.h" #include "openflow/openflow.h" #include "ovsdb-idl.h" #include "poll-loop.h" #include "process.h" #include "signals.h" #include "simap.h" #include "stream-ssl.h" #include "stream.h" #include "svec.h" #include "timeval.h" #include "unixctl.h" #include "util.h" #include "vconn.h" #include "vlog.h" #include "lib/vswitch-idl.h" VLOG_DEFINE_THIS_MODULE(vswitchd); /* --mlockall: If set, locks all process memory into physical RAM, preventing * the kernel from paging any of its memory to disk. */ static bool want_mlockall; static unixctl_cb_func ovs_vswitchd_exit; static char *parse_options(int argc, char *argv[], char **unixctl_path); static void usage(void) NO_RETURN; int main(int argc, char *argv[]) { char *unixctl_path = NULL; struct unixctl_server *unixctl; struct signal *sighup; char *remote; bool exiting; int retval; proctitle_init(argc, argv); set_program_name(argv[0]); remote = parse_options(argc, argv, &unixctl_path); signal(SIGPIPE, SIG_IGN); sighup = signal_register(SIGHUP); process_init(); ovsrec_init(); daemonize_start(); if (want_mlockall) { #ifdef HAVE_MLOCKALL if (mlockall(MCL_CURRENT | MCL_FUTURE)) { VLOG_ERR("mlockall failed: %s", ovs_strerror(errno)); } #else VLOG_ERR("mlockall not supported on this system"); #endif } retval = unixctl_server_create(unixctl_path, &unixctl); if (retval) { exit(EXIT_FAILURE); } unixctl_command_register("exit", "", 0, 0, ovs_vswitchd_exit, &exiting); bridge_init(remote); free(remote); exiting = false; while (!exiting) { if (signal_poll(sighup)) { vlog_reopen_log_file(); } memory_run(); if (memory_should_report()) { struct simap usage; simap_init(&usage); bridge_get_memory_usage(&usage); memory_report(&usage); simap_destroy(&usage); } bridge_run_fast(); bridge_run(); bridge_run_fast(); unixctl_server_run(unixctl); netdev_run(); signal_wait(sighup); memory_wait(); bridge_wait(); unixctl_server_wait(unixctl); netdev_wait(); if (exiting) { poll_immediate_wake(); } poll_block(); } bridge_exit(); unixctl_server_destroy(unixctl); return 0; } static char * parse_options(int argc, char *argv[], char **unixctl_pathp) { enum { OPT_PEER_CA_CERT = UCHAR_MAX + 1, OPT_MLOCKALL, OPT_UNIXCTL, VLOG_OPTION_ENUMS, OPT_BOOTSTRAP_CA_CERT, OPT_ENABLE_DUMMY, OPT_DISABLE_SYSTEM, DAEMON_OPTION_ENUMS }; static const struct option long_options[] = { {"help", no_argument, NULL, 'h'}, {"version", no_argument, NULL, 'V'}, {"mlockall", no_argument, NULL, OPT_MLOCKALL}, {"unixctl", required_argument, NULL, OPT_UNIXCTL}, DAEMON_LONG_OPTIONS, VLOG_LONG_OPTIONS, STREAM_SSL_LONG_OPTIONS, {"peer-ca-cert", required_argument, NULL, OPT_PEER_CA_CERT}, {"bootstrap-ca-cert", required_argument, NULL, OPT_BOOTSTRAP_CA_CERT}, {"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY}, {"disable-system", no_argument, NULL, OPT_DISABLE_SYSTEM}, {NULL, 0, NULL, 0}, }; char *short_options = long_options_to_short_options(long_options); for (;;) { int c; c = getopt_long(argc, argv, short_options, long_options, NULL); if (c == -1) { break; } switch (c) { case 'h': usage(); case 'V': ovs_print_version(OFP10_VERSION, OFP10_VERSION); exit(EXIT_SUCCESS); case OPT_MLOCKALL: want_mlockall = true; break; case OPT_UNIXCTL: *unixctl_pathp = optarg; break; VLOG_OPTION_HANDLERS DAEMON_OPTION_HANDLERS STREAM_SSL_OPTION_HANDLERS case OPT_PEER_CA_CERT: stream_ssl_set_peer_ca_cert_file(optarg); break; case OPT_BOOTSTRAP_CA_CERT: stream_ssl_set_ca_cert_file(optarg, true); break; case OPT_ENABLE_DUMMY: dummy_enable(optarg && !strcmp(optarg, "override")); break; case OPT_DISABLE_SYSTEM: dp_blacklist_provider("system"); break; case '?': exit(EXIT_FAILURE); default: abort(); } } free(short_options); argc -= optind; argv += optind; switch (argc) { case 0: return xasprintf("unix:%s/db.sock", ovs_rundir()); case 1: return xstrdup(argv[0]); default: VLOG_FATAL("at most one non-option argument accepted; " "use --help for usage"); } } static void usage(void) { printf("%s: Open vSwitch daemon\n" "usage: %s [OPTIONS] [DATABASE]\n" "where DATABASE is a socket on which ovsdb-server is listening\n" " (default: \"unix:%s/db.sock\").\n", program_name, program_name, ovs_rundir()); stream_usage("DATABASE", true, false, true); daemon_usage(); vlog_usage(); printf("\nOther options:\n" " --unixctl=SOCKET override default control socket name\n" " -h, --help display this help message\n" " -V, --version display version information\n"); exit(EXIT_SUCCESS); } static void ovs_vswitchd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *exiting_) { bool *exiting = exiting_; *exiting = true; unixctl_command_reply(conn, NULL); } openvswitch-2.0.1+git20140120/vswitchd/system-stats.c000066400000000000000000000425731226605124000221440ustar00rootroot00000000000000/* Copyright (c) 2010, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "system-stats.h" #include #include #include #if HAVE_MNTENT_H #include #endif #include #include #include #if HAVE_SYS_STATVFS_H #include #endif #include #include "daemon.h" #include "dirs.h" #include "dynamic-string.h" #include "json.h" #include "latch.h" #include "ofpbuf.h" #include "ovs-thread.h" #include "poll-loop.h" #include "shash.h" #include "smap.h" #include "timeval.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(system_stats); /* #ifdefs make it a pain to maintain code: you have to try to build both ways. * Thus, this file tries to compile as much of the code as possible regardless * of the target, by writing "if (LINUX_DATAPATH)" instead of "#ifdef * __linux__" where this is possible. */ #ifdef LINUX_DATAPATH #include #else #define LINUX_DATAPATH 0 #endif static void get_cpu_cores(struct smap *stats) { long int n_cores = sysconf(_SC_NPROCESSORS_ONLN); if (n_cores > 0) { smap_add_format(stats, "cpu", "%ld", n_cores); } } static void get_load_average(struct smap *stats OVS_UNUSED) { #if HAVE_GETLOADAVG double loadavg[3]; if (getloadavg(loadavg, 3) == 3) { smap_add_format(stats, "load_average", "%.2f,%.2f,%.2f", loadavg[0], loadavg[1], loadavg[2]); } #endif } static unsigned int get_page_size(void) { static unsigned int cached; if (!cached) { long int value = sysconf(_SC_PAGESIZE); if (value >= 0) { cached = value; } } return cached; } static void get_memory_stats(struct smap *stats) { if (!LINUX_DATAPATH) { unsigned int pagesize = get_page_size(); #ifdef _SC_PHYS_PAGES long int phys_pages = sysconf(_SC_PHYS_PAGES); #else long int phys_pages = 0; #endif #ifdef _SC_AVPHYS_PAGES long int avphys_pages = sysconf(_SC_AVPHYS_PAGES); #else long int avphys_pages = 0; #endif int mem_total, mem_used; if (pagesize <= 0 || phys_pages <= 0 || avphys_pages <= 0) { return; } mem_total = phys_pages * (pagesize / 1024); mem_used = (phys_pages - avphys_pages) * (pagesize / 1024); smap_add_format(stats, "memory", "%d,%d", mem_total, mem_used); } else { static const char file_name[] = "/proc/meminfo"; int mem_used, mem_cache, swap_used; int mem_free = 0; int buffers = 0; int cached = 0; int swap_free = 0; int mem_total = 0; int swap_total = 0; struct shash dict; char line[128]; FILE *stream; stream = fopen(file_name, "r"); if (!stream) { VLOG_WARN_ONCE("%s: open failed (%s)", file_name, ovs_strerror(errno)); return; } shash_init(&dict); shash_add(&dict, "MemTotal", &mem_total); shash_add(&dict, "MemFree", &mem_free); shash_add(&dict, "Buffers", &buffers); shash_add(&dict, "Cached", &cached); shash_add(&dict, "SwapTotal", &swap_total); shash_add(&dict, "SwapFree", &swap_free); while (fgets(line, sizeof line, stream)) { char key[16]; int value; if (sscanf(line, "%15[^:]: %u", key, &value) == 2) { int *valuep = shash_find_data(&dict, key); if (valuep) { *valuep = value; } } } fclose(stream); shash_destroy(&dict); mem_used = mem_total - mem_free; mem_cache = buffers + cached; swap_used = swap_total - swap_free; smap_add_format(stats, "memory", "%d,%d,%d,%d,%d", mem_total, mem_used, mem_cache, swap_total, swap_used); } } /* Returns the time at which the system booted, as the number of milliseconds * since the epoch, or 0 if the time of boot cannot be determined. */ static long long int get_boot_time(void) { static long long int cache_expiration = LLONG_MIN; static long long int boot_time; ovs_assert(LINUX_DATAPATH); if (time_msec() >= cache_expiration) { static const char stat_file[] = "/proc/stat"; char line[128]; FILE *stream; cache_expiration = time_msec() + 5 * 1000; stream = fopen(stat_file, "r"); if (!stream) { VLOG_ERR_ONCE("%s: open failed (%s)", stat_file, ovs_strerror(errno)); return boot_time; } while (fgets(line, sizeof line, stream)) { long long int btime; if (sscanf(line, "btime %lld", &btime) == 1) { boot_time = btime * 1000; goto done; } } VLOG_ERR_ONCE("%s: btime not found", stat_file); done: fclose(stream); } return boot_time; } static unsigned long long int ticks_to_ms(unsigned long long int ticks) { ovs_assert(LINUX_DATAPATH); #ifndef USER_HZ #define USER_HZ 100 #endif #if USER_HZ == 100 /* Common case. */ return ticks * (1000 / USER_HZ); #else /* Alpha and some other architectures. */ double factor = 1000.0 / USER_HZ; return ticks * factor + 0.5; #endif } struct raw_process_info { unsigned long int vsz; /* Virtual size, in kB. */ unsigned long int rss; /* Resident set size, in kB. */ long long int uptime; /* ms since started. */ long long int cputime; /* ms of CPU used during 'uptime'. */ pid_t ppid; /* Parent. */ char name[18]; /* Name (surrounded by parentheses). */ }; static bool get_raw_process_info(pid_t pid, struct raw_process_info *raw) { unsigned long long int vsize, rss, start_time, utime, stime; long long int start_msec; unsigned long ppid; char file_name[128]; FILE *stream; int n; ovs_assert(LINUX_DATAPATH); sprintf(file_name, "/proc/%lu/stat", (unsigned long int) pid); stream = fopen(file_name, "r"); if (!stream) { VLOG_ERR_ONCE("%s: open failed (%s)", file_name, ovs_strerror(errno)); return false; } n = fscanf(stream, "%*d " /* (1. pid) */ "%17s " /* 2. process name */ "%*c " /* (3. state) */ "%lu " /* 4. ppid */ "%*d " /* (5. pgid) */ "%*d " /* (6. sid) */ "%*d " /* (7. tty_nr) */ "%*d " /* (8. tty_pgrp) */ "%*u " /* (9. flags) */ "%*u " /* (10. min_flt) */ "%*u " /* (11. cmin_flt) */ "%*u " /* (12. maj_flt) */ "%*u " /* (13. cmaj_flt) */ "%llu " /* 14. utime */ "%llu " /* 15. stime */ "%*d " /* (16. cutime) */ "%*d " /* (17. cstime) */ "%*d " /* (18. priority) */ "%*d " /* (19. nice) */ "%*d " /* (20. num_threads) */ "%*d " /* (21. always 0) */ "%llu " /* 22. start_time */ "%llu " /* 23. vsize */ "%llu " /* 24. rss */ #if 0 /* These are here for documentation but #if'd out to save * actually parsing them from the stream for no benefit. */ "%*lu " /* (25. rsslim) */ "%*lu " /* (26. start_code) */ "%*lu " /* (27. end_code) */ "%*lu " /* (28. start_stack) */ "%*lu " /* (29. esp) */ "%*lu " /* (30. eip) */ "%*lu " /* (31. pending signals) */ "%*lu " /* (32. blocked signals) */ "%*lu " /* (33. ignored signals) */ "%*lu " /* (34. caught signals) */ "%*lu " /* (35. whcan) */ "%*lu " /* (36. always 0) */ "%*lu " /* (37. always 0) */ "%*d " /* (38. exit_signal) */ "%*d " /* (39. task_cpu) */ "%*u " /* (40. rt_priority) */ "%*u " /* (41. policy) */ "%*llu " /* (42. blkio_ticks) */ "%*lu " /* (43. gtime) */ "%*ld" /* (44. cgtime) */ #endif , raw->name, &ppid, &utime, &stime, &start_time, &vsize, &rss); fclose(stream); if (n != 7) { VLOG_ERR_ONCE("%s: fscanf failed", file_name); return false; } start_msec = get_boot_time() + ticks_to_ms(start_time); raw->vsz = vsize / 1024; raw->rss = rss * (getpagesize() / 1024); raw->uptime = time_wall_msec() - start_msec; raw->cputime = ticks_to_ms(utime + stime); raw->ppid = ppid; return true; } static int count_crashes(pid_t pid) { char file_name[128]; const char *paren; char line[128]; int crashes = 0; FILE *stream; ovs_assert(LINUX_DATAPATH); sprintf(file_name, "/proc/%lu/cmdline", (unsigned long int) pid); stream = fopen(file_name, "r"); if (!stream) { VLOG_WARN_ONCE("%s: open failed (%s)", file_name, ovs_strerror(errno)); goto exit; } if (!fgets(line, sizeof line, stream)) { VLOG_WARN_ONCE("%s: read failed (%s)", file_name, feof(stream) ? "end of file" : ovs_strerror(errno)); goto exit_close; } paren = strchr(line, '('); if (paren) { int x; if (sscanf(paren + 1, "%d", &x) == 1) { crashes = x; } } exit_close: fclose(stream); exit: return crashes; } struct process_info { unsigned long int vsz; /* Virtual size, in kB. */ unsigned long int rss; /* Resident set size, in kB. */ long long int booted; /* ms since monitor started. */ int crashes; /* # of crashes (usually 0). */ long long int uptime; /* ms since last (re)started by monitor. */ long long int cputime; /* ms of CPU used during 'uptime'. */ }; static bool get_process_info(pid_t pid, struct process_info *pinfo) { struct raw_process_info child; ovs_assert(LINUX_DATAPATH); if (!get_raw_process_info(pid, &child)) { return false; } pinfo->vsz = child.vsz; pinfo->rss = child.rss; pinfo->booted = child.uptime; pinfo->crashes = 0; pinfo->uptime = child.uptime; pinfo->cputime = child.cputime; if (child.ppid) { struct raw_process_info parent; get_raw_process_info(child.ppid, &parent); if (!strcmp(child.name, parent.name)) { pinfo->booted = parent.uptime; pinfo->crashes = count_crashes(child.ppid); } } return true; } static void get_process_stats(struct smap *stats) { struct dirent *de; DIR *dir; dir = opendir(ovs_rundir()); if (!dir) { VLOG_ERR_ONCE("%s: open failed (%s)", ovs_rundir(), ovs_strerror(errno)); return; } while ((de = readdir(dir)) != NULL) { struct process_info pinfo; char *file_name; char *extension; char *key; pid_t pid; #ifdef _DIRENT_HAVE_D_TYPE if (de->d_type != DT_UNKNOWN && de->d_type != DT_REG) { continue; } #endif extension = strrchr(de->d_name, '.'); if (!extension || strcmp(extension, ".pid")) { continue; } file_name = xasprintf("%s/%s", ovs_rundir(), de->d_name); pid = read_pidfile(file_name); free(file_name); if (pid < 0) { continue; } key = xasprintf("process_%.*s", (int) (extension - de->d_name), de->d_name); if (!smap_get(stats, key)) { if (LINUX_DATAPATH && get_process_info(pid, &pinfo)) { smap_add_format(stats, key, "%lu,%lu,%lld,%d,%lld,%lld", pinfo.vsz, pinfo.rss, pinfo.cputime, pinfo.crashes, pinfo.booted, pinfo.uptime); } else { smap_add(stats, key, ""); } } free(key); } closedir(dir); } static void get_filesys_stats(struct smap *stats OVS_UNUSED) { #if HAVE_GETMNTENT_R && HAVE_STATVFS static const char file_name[] = "/etc/mtab"; struct mntent mntent; struct mntent *me; char buf[4096]; FILE *stream; struct ds s; stream = setmntent(file_name, "r"); if (!stream) { VLOG_ERR_ONCE("%s: open failed (%s)", file_name, ovs_strerror(errno)); return; } ds_init(&s); while ((me = getmntent_r(stream, &mntent, buf, sizeof buf)) != NULL) { unsigned long long int total, free; struct statvfs vfs; char *p; /* Skip non-local and read-only filesystems. */ if (strncmp(me->mnt_fsname, "/dev", 4) || !strstr(me->mnt_opts, "rw")) { continue; } /* Given the mount point we can stat the file system. */ if (statvfs(me->mnt_dir, &vfs) && vfs.f_flag & ST_RDONLY) { /* That's odd... */ continue; } /* Now format the data. */ if (s.length) { ds_put_char(&s, ' '); } for (p = me->mnt_dir; *p != '\0'; p++) { ds_put_char(&s, *p == ' ' || *p == ',' ? '_' : *p); } total = (unsigned long long int) vfs.f_frsize * vfs.f_blocks / 1024; free = (unsigned long long int) vfs.f_frsize * vfs.f_bfree / 1024; ds_put_format(&s, ",%llu,%llu", total, total - free); } endmntent(stream); if (s.length) { smap_add(stats, "file_systems", ds_cstr(&s)); } ds_destroy(&s); #endif /* HAVE_GETMNTENT_R && HAVE_STATVFS */ } #define SYSTEM_STATS_INTERVAL (5 * 1000) /* In milliseconds. */ static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER; static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; static struct latch latch OVS_GUARDED_BY(mutex); static bool enabled; static bool started OVS_GUARDED_BY(mutex); static struct smap *system_stats OVS_GUARDED_BY(mutex); static void *system_stats_thread_func(void *); static void discard_stats(void); /* Enables or disables system stats collection, according to 'enable'. */ void system_stats_enable(bool enable) { if (enabled != enable) { ovs_mutex_lock(&mutex); if (enable) { if (!started) { xpthread_create(NULL, NULL, system_stats_thread_func, NULL); latch_init(&latch); started = true; } discard_stats(); xpthread_cond_signal(&cond); } enabled = enable; ovs_mutex_unlock(&mutex); } } /* Tries to obtain a new snapshot of system stats every SYSTEM_STATS_INTERVAL * milliseconds. * * When a new snapshot is available (which only occurs if system stats are * enabled), returns it as an smap owned by the caller. The caller must use * both smap_destroy() and free() to completely free the returned data. * * When no new snapshot is available, returns NULL. */ struct smap * system_stats_run(void) { struct smap *stats = NULL; ovs_mutex_lock(&mutex); if (system_stats) { latch_poll(&latch); if (enabled) { stats = system_stats; system_stats = NULL; } else { discard_stats(); } } ovs_mutex_unlock(&mutex); return stats; } /* Causes poll_block() to wake up when system_stats_run() needs to be * called. */ void system_stats_wait(void) { if (enabled) { latch_wait(&latch); } } static void discard_stats(void) OVS_REQUIRES(mutex) { if (system_stats) { smap_destroy(system_stats); free(system_stats); system_stats = NULL; } } static void * NO_RETURN system_stats_thread_func(void *arg OVS_UNUSED) { pthread_detach(pthread_self()); for (;;) { long long int next_refresh; struct smap *stats; ovs_mutex_lock(&mutex); while (!enabled) { ovs_mutex_cond_wait(&cond, &mutex); } ovs_mutex_unlock(&mutex); stats = xmalloc(sizeof *stats); smap_init(stats); get_cpu_cores(stats); get_load_average(stats); get_memory_stats(stats); get_process_stats(stats); get_filesys_stats(stats); ovs_mutex_lock(&mutex); discard_stats(); system_stats = stats; latch_set(&latch); ovs_mutex_unlock(&mutex); next_refresh = time_msec() + SYSTEM_STATS_INTERVAL; do { poll_timer_wait_until(next_refresh); poll_block(); } while (time_msec() < next_refresh); } } openvswitch-2.0.1+git20140120/vswitchd/system-stats.h000066400000000000000000000015021226605124000221340ustar00rootroot00000000000000/* Copyright (c) 2010, 2012 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef VSWITCHD_SYSTEM_STATS #define VSWITCHD_SYSTEM_STATS 1 #include void system_stats_enable(bool enable); struct smap *system_stats_run(void); void system_stats_wait(void); #endif /* vswitchd/system-stats.h */ openvswitch-2.0.1+git20140120/vswitchd/vswitch.gv000066400000000000000000000023101226605124000213260ustar00rootroot00000000000000digraph Open_vSwitch { size="6.5,4"; margin="0"; node [shape=box]; edge [dir=none, arrowhead=none, arrowtail=none]; Bridge []; Bridge -> sFlow [label="sflow?"]; Bridge -> Mirror [label="mirrors*"]; Bridge -> IPFIX [label="ipfix?"]; Bridge -> Port [label="ports*"]; Bridge -> Controller [label="controller*"]; Bridge -> Flow_Table [label="flow_tables value*"]; Bridge -> NetFlow [label="netflow?"]; QoS [style=bold]; QoS -> Queue [label="queues value*"]; sFlow []; Flow_Sample_Collector_Set [style=bold]; Flow_Sample_Collector_Set -> Bridge [label="bridge"]; Flow_Sample_Collector_Set -> IPFIX [label="ipfix?"]; IPFIX []; Open_vSwitch [style=bold]; Open_vSwitch -> Bridge [label="bridges*"]; Open_vSwitch -> SSL [label="ssl?"]; Open_vSwitch -> Manager [label="manager_options*"]; Controller []; Flow_Table []; Queue [style=bold]; SSL []; Manager []; Mirror []; Mirror -> Port [style=dotted, constraint=false, label="select_src_port*"]; Mirror -> Port [style=dotted, constraint=false, label="output_port?"]; Mirror -> Port [style=dotted, constraint=false, label="select_dst_port*"]; Interface []; NetFlow []; Port []; Port -> QoS [label="qos?"]; Port -> Interface [label="interfaces+"]; } openvswitch-2.0.1+git20140120/vswitchd/vswitch.ovsschema000066400000000000000000000473641226605124000227240ustar00rootroot00000000000000{"name": "Open_vSwitch", "version": "7.3.0", "cksum": "2483452374 20182", "tables": { "Open_vSwitch": { "columns": { "bridges": { "type": {"key": {"type": "uuid", "refTable": "Bridge"}, "min": 0, "max": "unlimited"}}, "manager_options": { "type": {"key": {"type": "uuid", "refTable": "Manager"}, "min": 0, "max": "unlimited"}}, "ssl": { "type": {"key": {"type": "uuid", "refTable": "SSL"}, "min": 0, "max": 1}}, "other_config": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "next_cfg": { "type": "integer"}, "cur_cfg": { "type": "integer"}, "statistics": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}, "ephemeral": true}, "ovs_version": { "type": {"key": {"type": "string"}, "min": 0, "max": 1}}, "db_version": { "type": {"key": {"type": "string"}, "min": 0, "max": 1}}, "system_type": { "type": {"key": {"type": "string"}, "min": 0, "max": 1}}, "system_version": { "type": {"key": {"type": "string"}, "min": 0, "max": 1}}}, "isRoot": true, "maxRows": 1}, "Bridge": { "columns": { "name": { "type": "string", "mutable": false}, "datapath_type": { "type": "string"}, "datapath_id": { "type": {"key": "string", "min": 0, "max": 1}, "ephemeral": true}, "stp_enable": { "type": "boolean"}, "ports": { "type": {"key": {"type": "uuid", "refTable": "Port"}, "min": 0, "max": "unlimited"}}, "mirrors": { "type": {"key": {"type": "uuid", "refTable": "Mirror"}, "min": 0, "max": "unlimited"}}, "netflow": { "type": {"key": {"type": "uuid", "refTable": "NetFlow"}, "min": 0, "max": 1}}, "sflow": { "type": {"key": {"type": "uuid", "refTable": "sFlow"}, "min": 0, "max": 1}}, "ipfix": { "type": {"key": {"type": "uuid", "refTable": "IPFIX"}, "min": 0, "max": 1}}, "controller": { "type": {"key": {"type": "uuid", "refTable": "Controller"}, "min": 0, "max": "unlimited"}}, "protocols": { "type": {"key": {"type": "string", "enum": ["set", ["OpenFlow10", "OpenFlow11", "OpenFlow12", "OpenFlow13"]]}, "min": 0, "max": "unlimited"}}, "fail_mode": { "type": {"key": {"type": "string", "enum": ["set", ["standalone", "secure"]]}, "min": 0, "max": 1}}, "status": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}, "ephemeral": true}, "other_config": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "flood_vlans": { "type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 4095}, "min": 0, "max": 4096}}, "flow_tables": { "type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 254}, "value": {"type": "uuid", "refTable": "Flow_Table"}, "min": 0, "max": "unlimited"}}}, "indexes": [["name"]]}, "Port": { "columns": { "name": { "type": "string", "mutable": false}, "interfaces": { "type": {"key": {"type": "uuid", "refTable": "Interface"}, "min": 1, "max": "unlimited"}}, "trunks": { "type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 4095}, "min": 0, "max": 4096}}, "tag": { "type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 4095}, "min": 0, "max": 1}}, "vlan_mode": { "type": {"key": {"type": "string", "enum": ["set", ["trunk", "access", "native-tagged", "native-untagged"]]}, "min": 0, "max": 1}}, "qos": { "type": {"key": {"type": "uuid", "refTable": "QoS"}, "min": 0, "max": 1}}, "mac": { "type": {"key": {"type": "string"}, "min": 0, "max": 1}}, "bond_mode": { "type": {"key": {"type": "string", "enum": ["set", ["balance-tcp", "balance-slb", "active-backup"]]}, "min": 0, "max": 1}}, "lacp": { "type": {"key": {"type": "string", "enum": ["set", ["active", "passive", "off"]]}, "min": 0, "max": 1}}, "bond_updelay": { "type": "integer"}, "bond_downdelay": { "type": "integer"}, "bond_fake_iface": { "type": "boolean"}, "fake_bridge": { "type": "boolean"}, "status": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}, "ephemeral": true}, "statistics": { "type": {"key": "string", "value": "integer", "min": 0, "max": "unlimited"}, "ephemeral": true}, "other_config": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}, "indexes": [["name"]]}, "Interface": { "columns": { "name": { "type": "string", "mutable": false}, "type": { "type": "string"}, "options": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "ingress_policing_rate": { "type": {"key": {"type": "integer", "minInteger": 0}}}, "ingress_policing_burst": { "type": {"key": {"type": "integer", "minInteger": 0}}}, "mac_in_use": { "type": {"key": {"type": "string"}, "min": 0, "max": 1}, "ephemeral": true}, "mac": { "type": {"key": {"type": "string"}, "min": 0, "max": 1}}, "ifindex": { "type": { "key": {"type": "integer", "minInteger": 0, "maxInteger": 4294967295}, "min": 0, "max": 1}, "ephemeral": true}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "ofport": { "type": {"key": "integer", "min": 0, "max": 1}}, "ofport_request": { "type": { "key": {"type": "integer", "minInteger": 1, "maxInteger": 65279}, "min": 0, "max": 1}}, "bfd": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "bfd_status": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "cfm_mpid": { "type": { "key": {"type": "integer"}, "min": 0, "max": 1}}, "cfm_remote_mpids": { "type": { "key": {"type": "integer"}, "min": 0, "max": "unlimited"}, "ephemeral": true}, "cfm_fault": { "type": { "key": { "type": "boolean"}, "min": 0, "max": 1}, "ephemeral": true}, "cfm_fault_status": { "type": { "key": "string", "min": 0, "max": "unlimited"}, "ephemeral": true}, "cfm_remote_opstate": { "type": {"key": {"type": "string", "enum": ["set", ["up", "down"]]}, "min": 0, "max": 1}, "ephemeral": true}, "cfm_health": { "type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 100}, "min": 0, "max": 1}, "ephemeral": true}, "lacp_current": { "type": {"key": {"type": "boolean"}, "min": 0, "max": 1}, "ephemeral": true}, "other_config": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "statistics": { "type": {"key": "string", "value": "integer", "min": 0, "max": "unlimited"}, "ephemeral": true}, "status": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}, "ephemeral": true}, "admin_state": { "type": {"key": {"type": "string", "enum": ["set", ["up", "down"]]}, "min": 0, "max": 1}, "ephemeral": true}, "link_state": { "type": {"key": {"type": "string", "enum": ["set", ["up", "down"]]}, "min": 0, "max": 1}, "ephemeral": true}, "link_resets": { "type": {"key": {"type": "integer"}, "min": 0, "max": 1}, "ephemeral": true}, "link_speed": { "type": {"key": "integer", "min": 0, "max": 1}, "ephemeral": true}, "duplex": { "type": {"key": {"type": "string", "enum": ["set", ["half", "full"]]}, "min": 0, "max": 1}, "ephemeral": true}, "mtu": { "type": {"key": "integer", "min": 0, "max": 1}, "ephemeral": true}}, "indexes": [["name"]]}, "Flow_Table": { "columns": { "name": { "type": {"key": "string", "min": 0, "max": 1}}, "flow_limit": { "type": {"key": {"type": "integer", "minInteger": 0}, "min": 0, "max": 1}}, "overflow_policy": { "type": {"key": {"type": "string", "enum": ["set", ["refuse", "evict"]]}, "min": 0, "max": 1}}, "groups": { "type": {"key": "string", "min": 0, "max": "unlimited"}}}}, "QoS": { "columns": { "type": { "type": "string"}, "queues": { "type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 4294967295}, "value": {"type": "uuid", "refTable": "Queue"}, "min": 0, "max": "unlimited"}}, "other_config": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}, "isRoot": true}, "Queue": { "columns": { "dscp": { "type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 63}, "min": 0, "max": 1}}, "other_config": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}, "isRoot": true}, "Mirror": { "columns": { "name": { "type": "string"}, "select_all": { "type": "boolean"}, "select_src_port": { "type": {"key": {"type": "uuid", "refTable": "Port", "refType": "weak"}, "min": 0, "max": "unlimited"}}, "select_dst_port": { "type": {"key": {"type": "uuid", "refTable": "Port", "refType": "weak"}, "min": 0, "max": "unlimited"}}, "select_vlan": { "type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 4095}, "min": 0, "max": 4096}}, "output_port": { "type": {"key": {"type": "uuid", "refTable": "Port", "refType": "weak"}, "min": 0, "max": 1}}, "output_vlan": { "type": {"key": {"type": "integer", "minInteger": 1, "maxInteger": 4095}, "min": 0, "max": 1}}, "statistics": { "type": {"key": "string", "value": "integer", "min": 0, "max": "unlimited"}, "ephemeral": true}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}}, "NetFlow": { "columns": { "targets": { "type": {"key": {"type": "string"}, "min": 1, "max": "unlimited"}}, "engine_type": { "type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 255}, "min": 0, "max": 1}}, "engine_id": { "type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 255}, "min": 0, "max": 1}}, "add_id_to_interface": { "type": "boolean"}, "active_timeout": { "type": {"key": {"type": "integer", "minInteger": -1}}}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}}, "sFlow": { "columns": { "targets": { "type": {"key": "string", "min": 1, "max": "unlimited"}}, "sampling": { "type": {"key": "integer", "min": 0, "max": 1}}, "polling": { "type": {"key": "integer", "min": 0, "max": 1}}, "header": { "type": {"key": "integer", "min": 0, "max": 1}}, "agent": { "type": {"key": "string", "min": 0, "max": 1}}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}}, "IPFIX": { "columns": { "targets": { "type": {"key": "string", "min": 1, "max": "unlimited"}}, "sampling": { "type": {"key": {"type": "integer", "minInteger": 1, "maxInteger": 4294967295}, "min": 0, "max": 1}}, "obs_domain_id": { "type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 4294967295}, "min": 0, "max": 1}}, "obs_point_id": { "type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 4294967295}, "min": 0, "max": 1}}, "cache_active_timeout": { "type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 4200}, "min": 0, "max": 1}}, "cache_max_flows": { "type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 4294967295}, "min": 0, "max": 1}}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}}, "Flow_Sample_Collector_Set": { "columns": { "id": { "type": {"key": {"type": "integer", "minInteger": 0, "maxInteger": 4294967295}, "min": 1, "max": 1}}, "bridge": { "type": {"key": {"type": "uuid", "refTable": "Bridge"}, "min": 1, "max": 1}}, "ipfix": { "type": {"key": {"type": "uuid", "refTable": "IPFIX"}, "min": 0, "max": 1}}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}, "isRoot": true, "indexes": [["id", "bridge"]]}, "Controller": { "columns": { "target": { "type": "string"}, "max_backoff": { "type": {"key": {"type": "integer", "minInteger": 1000}, "min": 0, "max": 1}}, "inactivity_probe": { "type": {"key": "integer", "min": 0, "max": 1}}, "connection_mode": { "type": {"key": {"type": "string", "enum": ["set", ["in-band", "out-of-band"]]}, "min": 0, "max": 1}}, "local_ip": { "type": {"key": {"type": "string"}, "min": 0, "max": 1}}, "local_netmask": { "type": {"key": {"type": "string"}, "min": 0, "max": 1}}, "local_gateway": { "type": {"key": {"type": "string"}, "min": 0, "max": 1}}, "enable_async_messages": { "type": {"key": {"type": "boolean"}, "min": 0, "max": 1}}, "controller_rate_limit": { "type": {"key": {"type": "integer", "minInteger": 100}, "min": 0, "max": 1}}, "controller_burst_limit": { "type": {"key": {"type": "integer", "minInteger": 25}, "min": 0, "max": 1}}, "other_config": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "is_connected": { "type": "boolean", "ephemeral": true}, "role": { "type": {"key": {"type": "string", "enum": ["set", ["other", "master", "slave"]]}, "min": 0, "max": 1}, "ephemeral": true}, "status": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}, "ephemeral": true}}}, "Manager": { "columns": { "target": { "type": "string"}, "max_backoff": { "type": {"key": {"type": "integer", "minInteger": 1000}, "min": 0, "max": 1}}, "inactivity_probe": { "type": {"key": "integer", "min": 0, "max": 1}}, "connection_mode": { "type": {"key": {"type": "string", "enum": ["set", ["in-band", "out-of-band"]]}, "min": 0, "max": 1}}, "other_config": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}, "is_connected": { "type": "boolean", "ephemeral": true}, "status": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}, "ephemeral": true}}, "indexes": [["target"]]}, "SSL": { "columns": { "private_key": { "type": "string"}, "certificate": { "type": "string"}, "ca_cert": { "type": "string"}, "bootstrap_ca_cert": { "type": "boolean"}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}, "maxRows": 1}}} openvswitch-2.0.1+git20140120/vswitchd/vswitch.pic000066400000000000000000000154201226605124000214730ustar00rootroot00000000000000.\" Generated from vswitch.gv with cksum "2412173910 1224" .PS linethick = 1; linethick = 1; box at 1.639964024,1.91216 wid 0.4050050488 height 0.23902 "Bridge" linethick = 1; box at 5.72978744,1.31461 wid 0.3718099512 height 0.23902 "sFlow" linethick = 1; box at 6.22790512,1.31461 wid 0.3917250976 height 0.23902 "Mirror" linethick = 1; box at 0.2589399268,1.31461 wid 0.35853 height 0.23902 "IPFIX" linethick = 1; box at 0.736994268,1.31461 wid 0.35853 height 0.23902 "Port" linethick = 1; box at 1.321254756,1.31461 wid 0.570970976 height 0.23902 "Controller" linethick = 1; box at 2.03167,1.31461 wid 0.610839512 height 0.23902 "Flow_Table" linethick = 1; box at 5.17191476,1.31461 wid 0.491329512 height 0.23902 "NetFlow" linethick = 0.5; box at 0.35853,0.71706 wid 0.35853 height 0.23902 "QoS" box at 0.35853,0.71706 wid 0.302974444444444 height 0.183464444444444 linethick = 0.5; box at 0.35853,0.11951 wid 0.4116450244 height 0.23902 "Queue" box at 0.35853,0.11951 wid 0.356089468844444 height 0.183464444444444 linethick = 0.5; box at 0.683884024,2.50971 wid 1.367720244 height 0.23902 "Flow_Sample_Collector_Set" box at 0.683884024,2.50971 wid 1.31216468844444 height 0.183464444444444 linethick = 0.5; box at 2.177759024,2.50971 wid 0.770170244 height 0.23902 "Open_vSwitch" box at 2.177759024,2.50971 wid 0.714614688444444 height 0.183464444444444 linethick = 1; box at 2.177759024,1.91216 wid 0.35853 height 0.23902 "SSL" linethick = 1; box at 2.728795732,1.91216 wid 0.511215976 height 0.23902 "Manager" linethick = 1; box at 0.916259268,0.71706 wid 0.517860732 height 0.23902 "Interface" linethick = 1; spline -> from 1.83949792,1.821380204 to 1.83949792,1.821380204 to 1.872052444,1.809763832 to 1.905897676,1.799533776 to 1.938739024,1.79265 to 2.331687904,1.7101881 to 5.21254816,1.889883336 to 5.55052244,1.67314 to 5.63322336,1.620173168 to 5.6791152,1.516773116 to 5.70397328,1.43459804 "sflow?" at 5.80627384,1.613385 linethick = 1; spline -> from 1.839450116,1.821284596 to 1.839450116,1.821284596 to 1.872052444,1.809668224 to 1.905897676,1.799485972 to 1.938739024,1.79265 to 2.378774844,1.701153144 to 5.58780956,1.888544824 to 5.98219256,1.67314 to 6.07541036,1.622133132 to 6.1404238,1.518541864 to 6.18010112,1.43579314 "mirrors*" at 6.30725976,1.613385 linethick = 1; spline -> from 1.44033452,1.898966096 to 1.44033452,1.898966096 to 1.223830204,1.877406492 to 0.873952728,1.820806556 to 0.610839512,1.67314 to 0.505861928,1.614245472 to 0.4118171188,1.51466974 to 0.3474107896,1.435362904 "ipfix?" at 0.733647988,1.613385 linethick = 1; spline -> from 1.439569656,1.871861228 to 1.439569656,1.871861228 to 1.302372176,1.836725288 to 1.12195988,1.775010324 to 0.989255976,1.67314 to 0.907176508,1.610086524 to 0.84111138,1.512853188 to 0.797179504,1.43555412 "ports*" at 1.132046524,1.613385 linethick = 1; spline -> from 1.43842236,1.800203032 to 1.43842236,1.800203032 to 1.39420366,1.765545132 to 1.353140024,1.72309518 to 1.327899512,1.67314 to 1.291090432,1.600334508 to 1.29022996,1.508168396 to 1.298165424,1.435506316 "controller*" at 1.573564268,1.613385 linethick = 1; spline -> from 1.733755472,1.79241098 to 1.733755472,1.79241098 to 1.76205544,1.754837036 to 1.792697804,1.71281732 to 1.819229024,1.67314 to 1.870666128,1.596127756 to 1.923776372,1.506495256 to 1.96402734,1.436127768 "flow_tables value*" at 2.327146524,1.613385 linethick = 1; spline -> from 1.83949792,1.821523616 to 1.83949792,1.821523616 to 1.872100248,1.809907244 to 1.90594548,1.799629384 to 1.938739024,1.79265 to 2.27451432,1.721230824 to 4.744929432,1.870188088 to 5.02611256,1.67314 to 5.103077,1.61912148 to 5.13940804,1.516964332 to 5.15661748,1.435458512 "netflow?" at 5.32154128,1.613385 linethick = 0.5; spline -> from 6.03429892,1.412082356 to 6.03429892,1.412082356 to 6.01469928,1.420209036 to 5.9946216,1.427714264 to 5.9755,1.43412 to 5.56247344,1.571030656 to 5.44869992,1.583650912 to 5.01607372,1.626674512 to 4.690146048,1.65903782 to 4.606536852,1.636187508 to 4.279127256,1.626674512 to 2.80920206,1.583842128 to 2.396749148,1.813492544 to 0.976014268,1.43412 to 0.956271216,1.42886156 to 0.936289144,1.421882176 to 0.916689504,1.413898908 "select_src_port*" at 4.647600488,1.613385 linethick = 0.5; spline -> from 6.03429892,1.412082356 to 6.03429892,1.412082356 to 6.01469928,1.420209036 to 5.9946216,1.427714264 to 5.9755,1.43412 to 5.56247344,1.571030656 to 5.44869992,1.583650912 to 5.01607372,1.626674512 to 4.690146048,1.65903782 to 4.606536852,1.636187508 to 4.279127256,1.626674512 to 2.80920206,1.583842128 to 2.396749148,1.813492544 to 0.976014268,1.43412 to 0.956271216,1.42886156 to 0.936289144,1.421882176 to 0.916689504,1.413898908 "output_port?" at 4.647600488,1.613385 linethick = 0.5; spline -> from 6.03429892,1.412082356 to 6.03429892,1.412082356 to 6.01469928,1.420209036 to 5.9946216,1.427714264 to 5.9755,1.43412 to 5.56247344,1.571030656 to 5.44869992,1.583650912 to 5.01607372,1.626674512 to 4.690146048,1.65903782 to 4.606536852,1.636187508 to 4.279127256,1.626674512 to 2.80920206,1.583842128 to 2.396749148,1.813492544 to 0.976014268,1.43412 to 0.956271216,1.42886156 to 0.936289144,1.421882176 to 0.916689504,1.413898908 "select_dst_port*" at 4.647600488,1.613385 linethick = 1; spline -> from 0.659121552,1.193092232 to 0.659121552,1.193092232 to 0.63507614,1.155470484 to 0.60854492,1.1138332 to 0.584260488,1.07559 to 0.533827268,0.996091948 to 0.4774089872,0.906459448 to 0.43358228,0.836713412 "qos?" at 0.6871825,1.015835 linethick = 1; spline -> from 0.773277504,1.19366588 to 0.773277504,1.19366588 to 0.804350104,1.090026808 to 0.848951236,0.941404172 to 0.880023836,0.837812904 "interfaces+" at 1.108765976,1.015835 linethick = 1; spline -> from 0.35853,0.59611588 to 0.35853,0.59611588 to 0.35853,0.492476808 to 0.35853,0.3438732936 to 0.35853,0.2402533432 "queues value*" at 0.690480976,0.418285 linethick = 1; spline -> from 0.877346812,2.38876588 to 0.877346812,2.38876588 to 1.043226692,2.285126808 to 1.280955984,2.136504172 to 1.446740256,2.032912904 "bridge" at 1.390952988,2.210935 linethick = 1; spline -> from 0.640764816,2.38852686 to 0.640764816,2.38852686 to 0.559115584,2.158876444 to 0.3827570672,1.66286214 to 0.3015046084,1.434311216 "ipfix?" at 0.640717012,1.91216 linethick = 1; spline -> from 1.906375716,2.389148312 to 1.906375716,2.389148312 to 1.85455618,2.356545984 to 1.804553196,2.317155488 to 1.766070976,2.27069 to 1.70971006,2.2025693 to 1.677776988,2.107534948 to 1.660185116,2.032434864 "bridges*" at 1.958625488,2.210935 linethick = 1; spline -> from 2.177759024,2.38876588 to 2.177759024,2.38876588 to 2.177759024,2.285126808 to 2.177759024,2.136504172 to 2.177759024,2.032912904 "ssl?" at 2.260746768,2.210935 linethick = 1; spline -> from 2.289237952,2.38876588 to 2.289237952,2.38876588 to 2.384845952,2.285126808 to 2.52190002,2.136504172 to 2.617460216,2.032912904 "manager_options*" at 2.924696524,2.210935 .PE openvswitch-2.0.1+git20140120/vswitchd/vswitch.xml000066400000000000000000004425401226605124000215270ustar00rootroot00000000000000

    A database with this schema holds the configuration for one Open vSwitch daemon. The top-level configuration for the daemon is the table, which must have exactly one record. Records in other tables are significant only when they can be reached directly or indirectly from the table. Records that are not reachable from the table are automatically deleted from the database, except for records in a few distinguished ``root set'' tables.

    Common Columns

    Most tables contain two special columns, named other_config and external_ids. These columns have the same form and purpose each place that they appear, so we describe them here to save space later.

    other_config: map of string-string pairs

    Key-value pairs for configuring rarely used features. Supported keys, along with the forms taken by their values, are documented individually for each table.

    A few tables do not have other_config columns because no key-value pairs have yet been defined for them.

    external_ids: map of string-string pairs
    Key-value pairs for use by external frameworks that integrate with Open vSwitch, rather than by Open vSwitch itself. System integrators should either use the Open vSwitch development mailing list to coordinate on common key-value definitions, or choose key names that are likely to be unique. In some cases, where key-value pairs have been defined that are likely to be widely useful, they are documented individually for each table.
    Configuration for an Open vSwitch daemon. There must be exactly one record in the table. Set of bridges managed by the daemon. SSL used globally by the daemon. A unique identifier for the Open vSwitch's physical host. The form of the identifier depends on the type of the host. On a Citrix XenServer, this will likely be the same as . The Citrix XenServer universally unique identifier for the physical host as displayed by xe host-list.

    When ovs-vswitchd starts up, it has an empty flow table and therefore it handles all arriving packets in its default fashion according to its configuration, by dropping them or sending them to an OpenFlow controller or switching them as a standalone switch. This behavior is ordinarily desirable. However, if ovs-vswitchd is restarting as part of a ``hot-upgrade,'' then this leads to a relatively long period during which packets are mishandled.

    This option allows for improvement. When ovs-vswitchd starts with this value set as true, it will neither flush or expire previously set datapath flows nor will it send and receive any packets to or from the datapath. When this value is later set to false, ovs-vswitchd will start receiving packets from the datapath and re-setup the flows.

    Thus, with this option, the procedure for a hot-upgrade of ovs-vswitchd becomes roughly the following:

    1. Stop ovs-vswitchd.
    2. Set to true.
    3. Start ovs-vswitchd.
    4. Use ovs-ofctl (or some other program, such as an OpenFlow controller) to restore the OpenFlow flow table to the desired state.
    5. Set to false (or remove it entirely from the database).

    The ovs-ctl's ``restart'' and ``force-reload-kmod'' functions use the above config option during hot upgrades.

    A number of flows as a nonnegative integer. This sets number of flows at which eviction from the datapath flow table will be triggered. If there are a large number of flows then increasing this value to around the number of flows present can result in reduced CPU usage and packet loss.

    The default is 2500. Values below 100 will be rounded up to 100.

    Specifies userspace behaviour for handling flow misses. This takes precedence over flow-eviction-threshold.

    auto
    Handle automatically based on the flow-eviction-threshold and the flow setup governer (default, recommended).
    with-facets
    Always create facets. Expensive kernel flow creation and statistics tracking is always performed, even on flows with only a small number of packets.
    without-facets
    Always handle without facets. Forces flow misses to be handled in userspace. May cause an increase in CPU usage and packet loss on high throughput.

    Specifies the number of threads for software datapaths to use for handling new flows. The default is two less than the number of online CPU cores (but at least 1).

    This configuration is per datapath. If you have more than one software datapath (e.g. some system bridges and some netdev bridges), then the total number of threads is n-handler-threads times the number of software datapaths.

    Sequence number for client to increment. When a client modifies any part of the database configuration and wishes to wait for Open vSwitch to finish applying the changes, it may increment this sequence number. Sequence number that Open vSwitch sets to the current value of after it finishes applying a set of configuration changes.

    The statistics column contains key-value pairs that report statistics about a system running an Open vSwitch. These are updated periodically (currently, every 5 seconds). Key-value pairs that cannot be determined or that do not apply to a platform are omitted.

    Statistics are disabled by default to avoid overhead in the common case when statistics gathering is not useful. Set this value to true to enable populating the column or to false to explicitly disable it.

    Number of CPU processors, threads, or cores currently online and available to the operating system on which Open vSwitch is running, as an integer. This may be less than the number installed, if some are not online or if they are not available to the operating system.

    Open vSwitch userspace processes are not multithreaded, but the Linux kernel-based datapath is.

    A comma-separated list of three floating-point numbers, representing the system load average over the last 1, 5, and 15 minutes, respectively.

    A comma-separated list of integers, each of which represents a quantity of memory in kilobytes that describes the operating system on which Open vSwitch is running. In respective order, these values are:

    1. Total amount of RAM allocated to the OS.
    2. RAM allocated to the OS that is in use.
    3. RAM that can be flushed out to disk or otherwise discarded if that space is needed for another purpose. This number is necessarily less than or equal to the previous value.
    4. Total disk space allocated for swap.
    5. Swap space currently in use.

    On Linux, all five values can be determined and are included. On other operating systems, only the first two values can be determined, so the list will only have two values.

    One such key-value pair, with NAME replaced by a process name, will exist for each running Open vSwitch daemon process, with name replaced by the daemon's name (e.g. process_ovs-vswitchd). The value is a comma-separated list of integers. The integers represent the following, with memory measured in kilobytes and durations in milliseconds:

    1. The process's virtual memory size.
    2. The process's resident set size.
    3. The amount of user and system CPU time consumed by the process.
    4. The number of times that the process has crashed and been automatically restarted by the monitor.
    5. The duration since the process was started.
    6. The duration for which the process has been running.

    The interpretation of some of these values depends on whether the process was started with the . If it was not, then the crash count will always be 0 and the two durations will always be the same. If was given, then the crash count may be positive; if it is, the latter duration is the amount of time since the most recent crash and restart.

    There will be one key-value pair for each file in Open vSwitch's ``run directory'' (usually /var/run/openvswitch) whose name ends in .pid, whose contents are a process ID, and which is locked by a running process. The name is taken from the pidfile's name.

    Currently Open vSwitch is only able to obtain all of the above detail on Linux systems. On other systems, the same key-value pairs will be present but the values will always be the empty string.

    A space-separated list of information on local, writable file systems. Each item in the list describes one file system and consists in turn of a comma-separated list of the following:

    1. Mount point, e.g. / or /var/log. Any spaces or commas in the mount point are replaced by underscores.
    2. Total size, in kilobytes, as an integer.
    3. Amount of storage in use, in kilobytes, as an integer.

    This key-value pair is omitted if there are no local, writable file systems or if Open vSwitch cannot obtain the needed information.

    These columns report the types and versions of the hardware and software running Open vSwitch. We recommend in general that software should test whether specific features are supported instead of relying on version number checks. These values are primarily intended for reporting to human administrators.

    The Open vSwitch version number, e.g. 1.1.0.

    The database schema version number in the form major.minor.tweak, e.g. 1.2.3. Whenever the database schema is changed in a non-backward compatible way (e.g. deleting a column or a table), major is incremented. When the database schema is changed in a backward compatible way (e.g. adding a new column), minor is incremented. When the database schema is changed cosmetically (e.g. reindenting its syntax), tweak is incremented.

    The schema version is part of the database schema, so it can also be retrieved by fetching the schema using the Open vSwitch database protocol.

    An identifier for the type of system on top of which Open vSwitch runs, e.g. XenServer or KVM.

    System integrators are responsible for choosing and setting an appropriate value for this column.

    The version of the system identified by , e.g. 5.6.100-39265p on XenServer 5.6.100 build 39265.

    System integrators are responsible for choosing and setting an appropriate value for this column.

    These columns primarily configure the Open vSwitch database (ovsdb-server), not the Open vSwitch switch (ovs-vswitchd). The OVSDB database also uses the settings.

    The Open vSwitch switch does read the database configuration to determine remote IP addresses to which in-band control should apply.

    Database clients to which the Open vSwitch database server should connect or to which it should listen, along with options for how these connection should be configured. See the table for more information.
    The overall purpose of these columns is described under Common Columns at the beginning of this document.

    Configuration for a bridge within an .

    A record represents an Ethernet switch with one or more ``ports,'' which are the records pointed to by the 's column.

    Bridge identifier. Should be alphanumeric and no more than about 8 bytes long. Must be unique among the names of ports, interfaces, and bridges on a host. Ports included in the bridge. Port mirroring configuration. NetFlow configuration. sFlow(R) configuration. IPFIX configuration.

    VLAN IDs of VLANs on which MAC address learning should be disabled, so that packets are flooded instead of being sent to specific ports that are believed to contain packets' destination MACs. This should ordinarily be used to disable MAC learning on VLANs used for mirroring (RSPAN VLANs). It may also be useful for debugging.

    SLB bonding (see the column in the table) is incompatible with flood_vlans. Consider using another bonding mode or a different type of mirror instead.

    OpenFlow controller set. If unset, then no OpenFlow controllers will be used.

    If there are primary controllers, removing all of them clears the flow table. If there are no primary controllers, adding one also clears the flow table. Other changes to the set of controllers, such as adding or removing a service controller, adding another primary controller to supplement an existing primary controller, or removing only one of two primary controllers, have no effect on the flow table.

    Configuration for OpenFlow tables. Each pair maps from an OpenFlow table ID to configuration for that table.

    When a controller is configured, it is, ordinarily, responsible for setting up all flows on the switch. Thus, if the connection to the controller fails, no new network connections can be set up. If the connection to the controller stays down long enough, no packets can pass through the switch at all. This setting determines the switch's response to such a situation. It may be set to one of the following:

    standalone
    If no message is received from the controller for three times the inactivity probe interval (see ), then Open vSwitch will take over responsibility for setting up flows. In this mode, Open vSwitch causes the bridge to act like an ordinary MAC-learning switch. Open vSwitch will continue to retry connecting to the controller in the background and, when the connection succeeds, it will discontinue its standalone behavior.
    secure
    Open vSwitch will not set up flows on its own when the controller connection fails or when no controllers are defined. The bridge will continue to retry connecting to any defined controllers forever.

    The default is standalone if the value is unset, but future versions of Open vSwitch may change the default.

    The standalone mode can create forwarding loops on a bridge that has more than one uplink port unless STP is enabled. To avoid loops on such a bridge, configure secure mode or enable STP (see ).

    When more than one controller is configured, is considered only when none of the configured controllers can be contacted.

    Changing when no primary controllers are configured clears the flow table.

    Reports the OpenFlow datapath ID in use. Exactly 16 hex digits. (Setting this column has no useful effect. Set instead.) Exactly 16 hex digits to set the OpenFlow datapath ID to a specific value. May not be all-zero. Human readable description of datapath. It it a maximum 256 byte-long free-form string to describe the datapath for debugging purposes, e.g. switch3 in room 3120. If set to true, disable in-band control on the bridge regardless of controller and manager settings. A queue ID as a nonnegative integer. This sets the OpenFlow queue ID that will be used by flows set up by in-band control on this bridge. If unset, or if the port used by an in-band control flow does not have QoS configured, or if the port does not have a queue with the specified ID, the default queue is used instead. List of OpenFlow protocols that may be used when negotiating a connection with a controller. A default value of OpenFlow10 will be used if this column is empty.
    The IEEE 802.1D Spanning Tree Protocol (STP) is a network protocol that ensures loop-free topologies. It allows redundant links to be included in the network to provide automatic backup paths if the active links fails. Enable spanning tree on the bridge. By default, STP is disabled on bridges. Bond, internal, and mirror ports are not supported and will not participate in the spanning tree. The bridge's STP identifier (the lower 48 bits of the bridge-id) in the form xx:xx:xx:xx:xx:xx. By default, the identifier is the MAC address of the bridge. The bridge's relative priority value for determining the root bridge (the upper 16 bits of the bridge-id). A bridge with the lowest bridge-id is elected the root. By default, the priority is 0x8000. The interval between transmissions of hello messages by designated ports, in seconds. By default the hello interval is 2 seconds. The maximum age of the information transmitted by the bridge when it is the root bridge, in seconds. By default, the maximum age is 20 seconds. The delay to wait between transitioning root and designated ports to forwarding, in seconds. By default, the forwarding delay is 15 seconds. Name of datapath provider. The kernel datapath has type system. The userspace datapath has type netdev. A unique identifier of the bridge. On Citrix XenServer this will commonly be the same as . Semicolon-delimited set of universally unique identifier(s) for the network with which this bridge is associated on a Citrix XenServer host. The network identifiers are RFC 4122 UUIDs as displayed by, e.g., xe network-list. An Ethernet address in the form xx:xx:xx:xx:xx:xx to set the hardware address of the local port and influence the datapath ID. Option to allow forwarding of BPDU frames when NORMAL action is invoked. Frames with reserved Ethernet addresses (e.g. STP BPDU) will be forwarded when this option is enabled and the switch is not providing that functionality. If STP is enabled on the port, STP BPDUs will never be forwarded. If the Open vSwitch bridge is used to connect different Ethernet networks, and if Open vSwitch node does not run STP, then this option should be enabled. Default is disabled, set to true to enable. The following destination MAC addresss will not be forwarded when this option is enabled.
    01:80:c2:00:00:00
    IEEE 802.1D Spanning Tree Protocol (STP).
    01:80:c2:00:00:01
    IEEE Pause frame.
    01:80:c2:00:00:0x
    Other reserved protocols.
    00:e0:2b:00:00:00
    Extreme Discovery Protocol (EDP).
    00:e0:2b:00:00:04 and 00:e0:2b:00:00:06
    Ethernet Automatic Protection Switching (EAPS).
    01:00:0c:cc:cc:cc
    Cisco Discovery Protocol (CDP), VLAN Trunking Protocol (VTP), Dynamic Trunking Protocol (DTP), Port Aggregation Protocol (PAgP), and others.
    01:00:0c:cc:cc:cd
    Cisco Shared Spanning Tree Protocol PVSTP+.
    01:00:0c:cd:cd:cd
    Cisco STP Uplink Fast.
    01:00:0c:00:00:00
    Cisco Inter Switch Link.
    01:00:0c:cc:cc:cx
    Cisco CFM.

    The maximum number of seconds to retain a MAC learning entry for which no packets have been seen. The default is currently 300 seconds (5 minutes). The value, if specified, is forced into a reasonable range, currently 15 to 3600 seconds.

    A short MAC aging time allows a network to more quickly detect that a host is no longer connected to a switch port. However, it also makes it more likely that packets will be flooded unnecessarily, when they are addressed to a connected host that rarely transmits packets. To reduce the incidence of unnecessary flooding, use a MAC aging time longer than the maximum interval at which a host will ordinarily transmit packets.

    The maximum number of MAC addresses to learn. The default is currently 2048. The value, if specified, is forced into a reasonable range, currently 10 to 1,000,000.

    Status information about bridges.

    Key-value pairs that report bridge status.

    The bridge-id (in hex) used in spanning tree advertisements. Configuring the bridge-id is described in the stp-system-id and stp-priority keys of the other_config section earlier.

    The designated root (in hex) for this spanning tree.

    The path cost of reaching the designated bridge. A lower number is better.

    The overall purpose of these columns is described under Common Columns at the beginning of this document.

    A port within a .

    Most commonly, a port has exactly one ``interface,'' pointed to by its column. Such a port logically corresponds to a port on a physical Ethernet switch. A port with more than one interface is a ``bonded port'' (see ).

    Some properties that one might think as belonging to a port are actually part of the port's members.

    Port name. Should be alphanumeric and no more than about 8 bytes long. May be the same as the interface name, for non-bonded ports. Must otherwise be unique among the names of ports, interfaces, and bridges on a host. The port's interfaces. If there is more than one, this is a bonded Port.

    Bridge ports support the following types of VLAN configuration:

    trunk

    A trunk port carries packets on one or more specified VLANs specified in the column (often, on every VLAN). A packet that ingresses on a trunk port is in the VLAN specified in its 802.1Q header, or VLAN 0 if the packet has no 802.1Q header. A packet that egresses through a trunk port will have an 802.1Q header if it has a nonzero VLAN ID.

    Any packet that ingresses on a trunk port tagged with a VLAN that the port does not trunk is dropped.

    access

    An access port carries packets on exactly one VLAN specified in the column. Packets egressing on an access port have no 802.1Q header.

    Any packet with an 802.1Q header with a nonzero VLAN ID that ingresses on an access port is dropped, regardless of whether the VLAN ID in the header is the access port's VLAN ID.

    native-tagged
    A native-tagged port resembles a trunk port, with the exception that a packet without an 802.1Q header that ingresses on a native-tagged port is in the ``native VLAN'' (specified in the column).
    native-untagged
    A native-untagged port resembles a native-tagged port, with the exception that a packet that egresses on a native-untagged port in the native VLAN will not have an 802.1Q header.

    A packet will only egress through bridge ports that carry the VLAN of the packet, as described by the rules above.

    The VLAN mode of the port, as described above. When this column is empty, a default mode is selected as follows:

    • If contains a value, the port is an access port. The column should be empty.
    • Otherwise, the port is a trunk port. The column value is honored if it is present.

    For an access port, the port's implicitly tagged VLAN. For a native-tagged or native-untagged port, the port's native VLAN. Must be empty if this is a trunk port.

    For a trunk, native-tagged, or native-untagged port, the 802.1Q VLAN or VLANs that this port trunks; if it is empty, then the port trunks all VLANs. Must be empty if this is an access port.

    A native-tagged or native-untagged port always trunks its native VLAN, regardless of whether includes that VLAN.

    An 802.1Q header contains two important pieces of information: a VLAN ID and a priority. A frame with a zero VLAN ID, called a ``priority-tagged'' frame, is supposed to be treated the same way as a frame without an 802.1Q header at all (except for the priority).

    However, some network elements ignore any frame that has 802.1Q header at all, even when the VLAN ID is zero. Therefore, by default Open vSwitch does not output priority-tagged frames, instead omitting the 802.1Q header entirely if the VLAN ID is zero. Set this key to true to enable priority-tagged frames on a port.

    Regardless of this setting, Open vSwitch omits the 802.1Q header on output if both the VLAN ID and priority would be zero.

    All frames output to native-tagged ports have a nonzero VLAN ID, so this setting is not meaningful on native-tagged ports.

    A port that has more than one interface is a ``bonded port.'' Bonding allows for load balancing and fail-over.

    The following types of bonding will work with any kind of upstream switch. On the upstream switch, do not configure the interfaces as a bond:

    balance-slb
    Balances flows among slaves based on source MAC address and output VLAN, with periodic rebalancing as traffic patterns change.
    active-backup
    Assigns all flows to one slave, failing over to a backup slave when the active slave is disabled. This is the only bonding mode in which interfaces may be plugged into different upstream switches.

    The following modes require the upstream switch to support 802.3ad with successful LACP negotiation:

    balance-tcp
    Balances flows among slaves based on L2, L3, and L4 protocol information such as destination MAC address, IP address, and TCP port.

    These columns apply only to bonded ports. Their values are otherwise ignored.

    The type of bonding used for a bonded port. Defaults to active-backup if unset.

    An integer hashed along with flows when choosing output slaves in load balanced bonds. When changed, all flows will be assigned different hash values possibly causing slave selection decisions to change. Does not affect bonding modes which do not employ load balancing such as active-backup.

    An important part of link bonding is detecting that links are down so that they may be disabled. These settings determine how Open vSwitch detects link failure.

    The means used to detect link failures. Defaults to carrier which uses each interface's carrier to detect failures. When set to miimon, will check for failures by polling each interface's MII. The interval, in milliseconds, between successive attempts to poll each interface's MII. Relevant only when is miimon.

    The number of milliseconds for which the link must stay up on an interface before the interface is considered to be up. Specify 0 to enable the interface immediately.

    This setting is honored only when at least one bonded interface is already enabled. When no interfaces are enabled, then the first bond interface to come up is enabled immediately.

    The number of milliseconds for which the link must stay down on an interface before the interface is considered to be down. Specify 0 to disable the interface immediately.

    LACP, the Link Aggregation Control Protocol, is an IEEE standard that allows switches to automatically detect that they are connected by multiple links and aggregate across those links. These settings control LACP behavior.

    Configures LACP on this port. LACP allows directly connected switches to negotiate which links may be bonded. LACP may be enabled on non-bonded ports for the benefit of any switches they may be connected to. active ports are allowed to initiate LACP negotiations. passive ports are allowed to participate in LACP negotiations initiated by a remote switch, but not allowed to initiate such negotiations themselves. If LACP is enabled on a port whose partner switch does not support LACP, the bond will be disabled. Defaults to off if unset. The LACP system ID of this . The system ID of a LACP bond is used to identify itself to its partners. Must be a nonzero MAC address. Defaults to the bridge Ethernet address if unset. The LACP system priority of this . In LACP negotiations, link status decisions are made by the system with the numerically lower priority.

    The LACP timing which should be used on this . By default slow is used. When configured to be fast LACP heartbeats are requested at a rate of once per second causing connectivity problems to be detected more quickly. In slow mode, heartbeats are requested at a rate of once every 30 seconds.

    These settings control behavior when a bond is in balance-slb or balance-tcp mode.

    For a load balanced bonded port, the number of milliseconds between successive attempts to rebalance the bond, that is, to move flows from one interface on the bond to another in an attempt to keep usage of each interface roughly equal. If zero, load balancing is disabled on the bond (link failure still cause flows to move). If less than 1000ms, the rebalance interval will be 1000ms.
    For a bonded port, whether to create a fake internal interface with the name of the port. Use only for compatibility with legacy software that requires this.
    If spanning tree is enabled on the bridge, member ports are enabled by default (with the exception of bond, internal, and mirror ports which do not work with STP). If this column's value is false spanning tree is disabled on the port. The port number used for the lower 8 bits of the port-id. By default, the numbers will be assigned automatically. If any port's number is manually configured on a bridge, then they must all be. The port's relative priority value for determining the root port (the upper 8 bits of the port-id). A port with a lower port-id will be chosen as the root port. By default, the priority is 0x80. Spanning tree path cost for the port. A lower number indicates a faster link. By default, the cost is based on the maximum speed of the link. Quality of Service configuration for this port. The MAC address to use for this port for the purpose of choosing the bridge's MAC address. This column does not necessarily reflect the port's actual MAC address, nor will setting it change the port's actual MAC address. Does this port represent a sub-bridge for its tagged VLAN within the Bridge? See ovs-vsctl(8) for more information. External IDs for a fake bridge (see the column) are defined by prefixing a key with fake-bridge-, e.g. fake-bridge-xs-network-uuids.

    Status information about ports attached to bridges.

    Key-value pairs that report port status.

    The port-id (in hex) used in spanning tree advertisements for this port. Configuring the port-id is described in the stp-port-num and stp-port-priority keys of the other_config section earlier.

    STP state of the port.

    The amount of time (in seconds) port has been in the current STP state.

    STP role of the port.

    Key-value pairs that report port statistics.

    Number of STP BPDUs sent on this port by the spanning tree library. Number of STP BPDUs received on this port and accepted by the spanning tree library. Number of bad STP BPDUs received on this port. Bad BPDUs include runt packets and those with an unexpected protocol ID.
    The overall purpose of these columns is described under Common Columns at the beginning of this document.
    An interface within a . Interface name. Should be alphanumeric and no more than about 8 bytes long. May be the same as the port name, for non-bonded ports. Must otherwise be unique among the names of ports, interfaces, and bridges on a host. A positive interface index as defined for SNMP MIB-II in RFCs 1213 and 2863, if the interface has one, otherwise 0. The ifindex is useful for seamless integration with protocols such as SNMP and sFlow. The MAC address in use by this interface.

    Ethernet address to set for this interface. If unset then the default MAC address is used:

    • For the local interface, the default is the lowest-numbered MAC address among the other bridge ports, either the value of the in its record, if set, or its actual MAC (for bonded ports, the MAC of its slave whose name is first in alphabetical order). Internal ports and bridge ports that are used as port mirroring destinations (see the table) are ignored.
    • For other internal interfaces, the default MAC is randomly generated.
    • External interfaces typically have a MAC address associated with their hardware.

    Some interfaces may not have a software-controllable MAC address.

    OpenFlow port number for this interface. Unlike most columns, this column's value should be set only by Open vSwitch itself. Other clients should set this column to an empty set (the default) when creating an .

    Open vSwitch populates this column when the port number becomes known. If the interface is successfully added, will be set to a number between 1 and 65535 (generally either in the range 1 to 65279, inclusive, or 65534, the port number for the OpenFlow ``local port''). If the interface cannot be added then Open vSwitch sets this column to -1.

    When is not set, Open vSwitch picks an appropriate value for this column and then tries to keep the value constant across restarts.

    Requested OpenFlow port number for this interface. The port number must be between 1 and 65279, inclusive. Some datapaths cannot satisfy all requests for particular port numbers. When this column is empty or the request cannot be fulfilled, the system will choose a free port. The column reports the assigned OpenFlow port number.

    The port number must be requested in the same transaction that creates the port.

    The interface type, one of:

    system
    An ordinary network device, e.g. eth0 on Linux. Sometimes referred to as ``external interfaces'' since they are generally connected to hardware external to that on which the Open vSwitch is running. The empty string is a synonym for system.
    internal
    A simulated network device that sends and receives traffic. An internal interface whose is the same as its bridge's is called the ``local interface.'' It does not make sense to bond an internal interface, so the terms ``port'' and ``interface'' are often used imprecisely for internal interfaces.
    tap
    A TUN/TAP device managed by Open vSwitch.
    gre
    An Ethernet over RFC 2890 Generic Routing Encapsulation over IPv4 tunnel.
    ipsec_gre
    An Ethernet over RFC 2890 Generic Routing Encapsulation over IPv4 IPsec tunnel.
    gre64
    It is same as GRE, but it allows 64 bit key. To store higher 32-bits of key, it uses GRE protocol sequence number field. This is non standard use of GRE protocol since OVS does not increment sequence number for every packet at time of encap as expected by standard GRE implementation. See for information on configuring GRE tunnels.
    ipsec_gre64
    Same as IPSEC_GRE except 64 bit key.
    vxlan

    An Ethernet tunnel over the experimental, UDP-based VXLAN protocol described at http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-03.

    Open vSwitch uses UDP destination port 4789. The source port used for VXLAN traffic varies on a per-flow basis and is in the ephemeral port range.

    lisp
    A layer 3 tunnel over the experimental, UDP-based Locator/ID Separation Protocol (RFC 6830).
    patch
    A pair of virtual devices that act as a patch cable.
    null
    An ignored interface. Deprecated and slated for removal in February 2013.

    These options apply to interfaces with of gre, ipsec_gre, gre64, ipsec_gre64, vxlan, and lisp.

    Each tunnel must be uniquely identified by the combination of , , , and . If two ports are defined that are the same except one has an optional identifier and the other does not, the more specific one is matched first. is considered more specific than if a port defines one and another port defines the other.

    Required. The remote tunnel endpoint, one of:

    • An IPv4 address (not a DNS name), e.g. 192.168.0.123. Only unicast endpoints are supported.
    • The word flow. The tunnel accepts packets from any remote tunnel endpoint. To process only packets from a specific remote tunnel endpoint, the flow entries may match on the tun_src field. When sending packets to a remote_ip=flow tunnel, the flow actions must explicitly set the tun_dst field to the IP address of the desired remote tunnel endpoint, e.g. with a set_field action.

    The remote tunnel endpoint for any packet received from a tunnel is available in the tun_src field for matching in the flow table.

    Optional. The tunnel destination IP that received packets must match. Default is to match all addresses. If specified, may be one of:

    • An IPv4 address (not a DNS name), e.g. 192.168.12.3.
    • The word flow. The tunnel accepts packets sent to any of the local IP addresses of the system running OVS. To process only packets sent to a specific IP address, the flow entries may match on the tun_dst field. When sending packets to a local_ip=flow tunnel, the flow actions may explicitly set the tun_src field to the desired IP address, e.g. with a set_field action. However, while routing the tunneled packet out, the local system may override the specified address with the local IP address configured for the outgoing system interface.

      This option is valid only for tunnels also configured with the remote_ip=flow option.

    The tunnel destination IP address for any packet received from a tunnel is available in the tun_dst field for matching in the flow table.

    Optional. The key that received packets must contain, one of:

    • 0. The tunnel receives packets with no key or with a key of 0. This is equivalent to specifying no at all.
    • A positive 24-bit (for VXLAN and LISP), 32-bit (for GRE) or 64-bit (for GRE64) number. The tunnel receives only packets with the specified key.
    • The word flow. The tunnel accepts packets with any key. The key will be placed in the tun_id field for matching in the flow table. The ovs-ofctl manual page contains additional information about matching fields in OpenFlow flows.

    Optional. The key to be set on outgoing packets, one of:

    • 0. Packets sent through the tunnel will have no key. This is equivalent to specifying no at all.
    • A positive 24-bit (for VXLAN and LISP), 32-bit (for GRE) or 64-bit (for GRE64) number. Packets sent through the tunnel will have the specified key.
    • The word flow. Packets sent through the tunnel will have the key set using the set_tunnel Nicira OpenFlow vendor extension (0 is used in the absence of an action). The ovs-ofctl manual page contains additional information about the Nicira OpenFlow vendor extensions.
    Optional. Shorthand to set in_key and out_key at the same time. Optional. The value of the ToS bits to be set on the encapsulating packet. ToS is interpreted as DSCP and ECN bits, ECN part must be zero. It may also be the word inherit, in which case the ToS will be copied from the inner packet if it is IPv4 or IPv6 (otherwise it will be 0). The ECN fields are always inherited. Default is 0. Optional. The TTL to be set on the encapsulating packet. It may also be the word inherit, in which case the TTL will be copied from the inner packet if it is IPv4 or IPv6 (otherwise it will be the system default, typically 64). Default is the system default TTL. Optional. If enabled, the Don't Fragment bit will be set on tunnel outer headers to allow path MTU discovery. Default is enabled; set to false to disable.

    Only gre and ipsec_gre interfaces support these options.

    Optional. Compute GRE checksums on outgoing packets. Default is disabled, set to true to enable. Checksums present on incoming packets will be validated regardless of this setting.

    GRE checksums impose a significant performance penalty because they cover the entire packet. The encapsulated L3, L4, and L7 packet contents typically have their own checksums, so this additional checksum only adds value for the GRE and encapsulated L2 headers.

    This option is supported for ipsec_gre, but not useful because GRE checksums are weaker than, and redundant with, IPsec payload authentication.

    Only ipsec_gre interfaces support these options.

    Required for certificate authentication. A string containing the peer's certificate in PEM format. Additionally the host's certificate must be specified with the certificate option. Required for certificate authentication. The name of a PEM file containing a certificate that will be presented to the peer during authentication. Optional for certificate authentication. The name of a PEM file containing the private key associated with certificate. If certificate contains the private key, this option may be omitted. Required for pre-shared key authentication. Specifies a pre-shared key for authentication that must be identical on both sides of the tunnel.

    Only patch interfaces support these options.

    The of the for the other side of the patch. The named 's own peer option must specify this 's name. That is, the two patch interfaces must have reversed and peer values.

    Status information about interfaces attached to bridges, updated every 5 seconds. Not all interfaces have all of these properties; virtual interfaces don't have a link speed, for example. Non-applicable columns will have empty values.

    The administrative state of the physical network link.

    The observed state of the physical network link. This is ordinarily the link's carrier status. If the interface's is a bond configured for miimon monitoring, it is instead the network link's miimon status.

    The number of times Open vSwitch has observed the of this change.

    The negotiated speed of the physical network link. Valid values are positive integers greater than 0.

    The duplex mode of the physical network link.

    The MTU (maximum transmission unit); i.e. the largest amount of data that can fit into a single Ethernet frame. The standard Ethernet MTU is 1500 bytes. Some physical media and many kinds of virtual interfaces can be configured with higher MTUs.

    This column will be empty for an interface that does not have an MTU as, for example, some kinds of tunnels do not.

    Boolean value indicating LACP status for this interface. If true, this interface has current LACP information about its LACP partner. This information may be used to monitor the health of interfaces in a LACP enabled port. This column will be empty if LACP is not enabled. Key-value pairs that report port status. Supported status values are -dependent; some interfaces may not have a valid , for example. The name of the device driver controlling the network adapter. The version string of the device driver controlling the network adapter. The version string of the network adapter's firmware, if available. The source IP address used for an IPv4 tunnel end-point, such as gre. Egress interface for tunnels. Currently only relevant for GRE tunnels On Linux systems, this column will show the name of the interface which is responsible for routing traffic destined for the configured . This could be an internal interface such as a bridge port. Whether carrier is detected on .

    Key-value pairs that report interface statistics. The current implementation updates these counters periodically. Future implementations may update them when an interface is created, when they are queried (e.g. using an OVSDB select operation), and just before an interface is deleted due to virtual interface hot-unplug or VM shutdown, and perhaps at other times, but not on any regular periodic basis.

    These are the same statistics reported by OpenFlow in its struct ofp_port_stats structure. If an interface does not support a given statistic, then that pair is omitted.

    Number of received packets. Number of received bytes. Number of transmitted packets. Number of transmitted bytes. Number of packets dropped by RX. Number of frame alignment errors. Number of packets with RX overrun. Number of CRC errors. Total number of receive errors, greater than or equal to the sum of the above. Number of packets dropped by TX. Number of collisions. Total number of transmit errors, greater than or equal to the sum of the above.

    These settings control ingress policing for packets received on this interface. On a physical interface, this limits the rate at which traffic is allowed into the system from the outside; on a virtual interface (one connected to a virtual machine), this limits the rate at which the VM is able to transmit.

    Policing is a simple form of quality-of-service that simply drops packets received in excess of the configured rate. Due to its simplicity, policing is usually less accurate and less effective than egress QoS (which is configured using the and tables).

    Policing is currently implemented only on Linux. The Linux implementation uses a simple ``token bucket'' approach:

    • The size of the bucket corresponds to . Initially the bucket is full.
    • Whenever a packet is received, its size (converted to tokens) is compared to the number of tokens currently in the bucket. If the required number of tokens are available, they are removed and the packet is forwarded. Otherwise, the packet is dropped.
    • Whenever it is not full, the bucket is refilled with tokens at the rate specified by .

    Policing interacts badly with some network protocols, and especially with fragmented IP packets. Suppose that there is enough network activity to keep the bucket nearly empty all the time. Then this token bucket algorithm will forward a single packet every so often, with the period depending on packet size and on the configured rate. All of the fragments of an IP packets are normally transmitted back-to-back, as a group. In such a situation, therefore, only one of these fragments will be forwarded and the rest will be dropped. IP does not provide any way for the intended recipient to ask for only the remaining fragments. In such a case there are two likely possibilities for what will happen next: either all of the fragments will eventually be retransmitted (as TCP will do), in which case the same problem will recur, or the sender will not realize that its packet has been dropped and data will simply be lost (as some UDP-based protocols will do). Either way, it is possible that no forward progress will ever occur.

    Maximum rate for data received on this interface, in kbps. Data received faster than this rate is dropped. Set to 0 (the default) to disable policing.

    Maximum burst size for data received on this interface, in kb. The default burst size if set to 0 is 1000 kb. This value has no effect if is 0.

    Specifying a larger burst size lets the algorithm be more forgiving, which is important for protocols like TCP that react severely to dropped packets. The burst size should be at least the size of the interface's MTU. Specifying a value that is numerically at least as large as 10% of helps TCP come closer to achieving the full rate.

    BFD, defined in RFC 5880 and RFC 5881, allows point to point detection of connectivity failures by occasional transmission of BFD control messages. It is implemented in Open vSwitch to serve as a more popular and standards compliant alternative to CFM.

    BFD operates by regularly transmitting BFD control messages at a rate negotiated independently in each direction. Each endpoint specifies the rate at which it expects to receive control messages, and the rate at which it's willing to transmit them. Open vSwitch uses a detection multiplier of three, meaning that an endpoint which fails to receive BFD control messages for a period of three times the expected reception rate, will signal a connectivity fault. In the case of a unidirectional connectivity issue, the system not receiving BFD control messages will signal the problem to its peer in the messages it transmits.

    The Open vSwitch implementation of BFD aims to comply faithfully with the requirements put forth in RFC 5880. Currently, the only known omission is ``Demand Mode'', which we hope to include in future. Open vSwitch does not implement the optional Authentication or ``Echo Mode'' features.

    When true BFD is enabled on this , otherwise it's disabled. Defaults to false. The fastest rate, in milliseconds, at which this BFD session is willing to receive BFD control messages. The actual rate may be slower if the remote endpoint isn't willing to transmit as quickly as specified. Defaults to 1000. The fastest rate, in milliseconds, at which this BFD session is willing to transmit BFD control messages. The actual rate may be slower if the remote endpoint isn't willing to receive as quickly as specified. Defaults to 100. decay_min_rx is used to set the min_rx, when there is no obvious incoming data traffic at the interface. It cannot be set less than the min_rx. The decay feature is disabled by setting the decay_min_rx to 0. And the feature is reset everytime itself or min_rx is reconfigured. When forwarding_if_rx is true the interface will be considered capable of packet I/O as long as there is packet received at interface. This is important in that when link becomes temporarily conjested, consecutive BFD control packets can be lost. And the forwarding_if_rx can prevent link failover by detecting non-control packets received at interface. Concatenated path down may be used when the local system should not have traffic forwarded to it for some reason other than a connectivty failure on the interface being monitored. When a controller thinks this may be the case, it may set cpath_down to true which may cause the remote BFD session not to forward traffic to this . Defaults to false. When set to true, Check Tunnel Key will make BFD only accept control messages with an in_key of zero. Defaults to false. An Ethernet address in the form xx:xx:xx:xx:xx:xx to set the destination mac address of the bfd packet. If this field is set, it is assumed that all the bfd packets destined to this interface also has the same destination mac address. If not set, a default value of 00:23:20:00:00:01 is used. State of the BFD session. The BFD session is fully healthy and negotiated if UP. True if the BFD session believes this may be used to forward traffic. Typically this means the local session is signaling UP, and the remote system isn't signaling a problem such as concatenated path down. A short message indicating what the BFD session thinks is wrong in case of a problem. State of the remote endpoint's BFD session. A short message indicating what the remote endpoint's BFD session thinks is wrong in case of a problem.

    802.1ag Connectivity Fault Management (CFM) allows a group of Maintenance Points (MPs) called a Maintenance Association (MA) to detect connectivity problems with each other. MPs within a MA should have complete and exclusive interconnectivity. This is verified by occasionally broadcasting Continuity Check Messages (CCMs) at a configurable transmission interval.

    According to the 802.1ag specification, each Maintenance Point should be configured out-of-band with a list of Remote Maintenance Points it should have connectivity to. Open vSwitch differs from the specification in this area. It simply assumes the link is faulted if no Remote Maintenance Points are reachable, and considers it not faulted otherwise.

    When operating over tunnels which have no in_key, or an in_key of flow. CFM will only accept CCMs with a tunnel key of zero.

    A Maintenance Point ID (MPID) uniquely identifies each endpoint within a Maintenance Association. The MPID is used to identify this endpoint to other Maintenance Points in the MA. Each end of a link being monitored should have a different MPID. Must be configured to enable CFM on this .

    Indicates a connectivity fault triggered by an inability to receive heartbeats from any remote endpoint. When a fault is triggered on s participating in bonds, they will be disabled.

    Faults can be triggered for several reasons. Most importantly they are triggered when no CCMs are received for a period of 3.5 times the transmission interval. Faults are also triggered when any CCMs indicate that a Remote Maintenance Point is not receiving CCMs but able to send them. Finally, a fault is triggered if a CCM is received which indicates unexpected configuration. Notably, this case arises when a CCM is received which advertises the local MPID.

    Indicates a CFM fault was triggered due to a lack of CCMs received on the . Indicates a CFM fault was triggered due to the reception of a CCM with the RDI bit flagged. Endpoints set the RDI bit in their CCMs when they are not receiving CCMs themselves. This typically indicates a unidirectional connectivity failure. Indicates a CFM fault was triggered due to the reception of a CCM with a MAID other than the one Open vSwitch uses. CFM broadcasts are tagged with an identification number in addition to the MPID called the MAID. Open vSwitch only supports receiving CCM broadcasts tagged with the MAID it uses internally. Indicates a CFM fault was triggered due to the reception of a CCM advertising the same MPID configured in the column of this . This may indicate a loop in the network. Indicates a CFM fault was triggered because the CFM module received CCMs from more remote endpoints than it can keep track of. Indicates a CFM fault was manually triggered by an administrator using an ovs-appctl command. Indicates a CFM fault was triggered due to the reception of a CCM frame having an invalid interval.

    When in extended mode, indicates the operational state of the remote endpoint as either up or down. See .

    Indicates the health of the interface as a percentage of CCM frames received over 21 s. The health of an interface is undefined if it is communicating with more than one . It reduces if healthy heartbeats are not received at the expected rate, and gradually improves as healthy heartbeats are received at the desired rate. Every 21 s, the health of the interface is refreshed.

    As mentioned above, the faults can be triggered for several reasons. The link health will deteriorate even if heartbeats are received but they are reported to be unhealthy. An unhealthy heartbeat in this context is a heartbeat for which either some fault is set or is out of sequence. The interface health can be 100 only on receiving healthy heartbeats at the desired rate.

    When CFM is properly configured, Open vSwitch will occasionally receive CCM broadcasts. These broadcasts contain the MPID of the sending Maintenance Point. The list of MPIDs from which this is receiving broadcasts from is regularly collected and written to this column.

    The interval, in milliseconds, between transmissions of CFM heartbeats. Three missed heartbeat receptions indicate a connectivity fault.

    In standard operation only intervals of 3, 10, 100, 1,000, 10,000, 60,000, or 600,000 ms are supported. Other values will be rounded down to the nearest value on the list. Extended mode (see ) supports any interval up to 65,535 ms. In either mode, the default is 1000 ms.

    We do not recommend using intervals less than 100 ms.

    When true, the CFM module operates in extended mode. This causes it to use a nonstandard destination address to avoid conflicting with compliant implementations which may be running concurrently on the network. Furthermore, extended mode increases the accuracy of the cfm_interval configuration parameter by breaking wire compatibility with 802.1ag compliant implementations. Defaults to false.

    When true, and is true, the CFM module operates in demand mode. When in demand mode, traffic received on the is used to indicate liveness. CCMs are still transmitted and received, but if the is receiving traffic, their absence does not cause a connectivity fault.

    Demand mode has a couple of caveats:

    • To ensure that ovs-vswitchd has enough time to pull statistics from the datapath, the fault detection interval is set to 3.5 * MAX(, 500) ms.
    • To avoid ambiguity, demand mode disables itself when there are multiple remote maintenance points.
    • If the is heavily congested, CCMs containing the status may be dropped causing changes in the operational state to be delayed. Similarly, if CCMs containing the RDI bit are not received, unidirectional link failures may not be detected.

    When down, the CFM module marks all CCMs it generates as operationally down without triggering a fault. This allows remote maintenance points to choose not to forward traffic to the on which this CFM module is running. Currently, in Open vSwitch, the opdown bit of CCMs affects s participating in bonds, and the bundle OpenFlow action. This setting is ignored when CFM is not in extended mode. Defaults to up. When set, the CFM module will apply a VLAN tag to all CCMs it generates with the given value. May be the string random in which case each CCM will be tagged with a different randomly generated VLAN. When set, the CFM module will apply a VLAN tag to all CCMs it generates with the given PCP value, the VLAN ID of the tag is governed by the value of . If is unset, a VLAN ID of zero is used.
    The LACP port ID of this . Port IDs are used in LACP negotiations to identify individual ports participating in a bond. The LACP port priority of this . In LACP negotiations s with numerically lower priorities are preferred for aggregation. The LACP aggregation key of this . s with different aggregation keys may not be active within a given at the same time.

    These key-value pairs specifically apply to an interface that represents a virtual Ethernet interface connected to a virtual machine. These key-value pairs should not be present for other types of interfaces. Keys whose names end in -uuid have values that uniquely identify the entity in question. For a Citrix XenServer hypervisor, these values are UUIDs in RFC 4122 format. Other hypervisors may use other formats.

    The MAC address programmed into the ``virtual hardware'' for this interface, in the form xx:xx:xx:xx:xx:xx. For Citrix XenServer, this is the value of the MAC field in the VIF record for this interface. A system-unique identifier for the interface. On XenServer, this will commonly be the same as .

    Hypervisors may sometimes have more than one interface associated with a given , only one of which is actually in use at a given time. For example, in some circumstances XenServer has both a ``tap'' and a ``vif'' interface for a single , but only uses one of them at a time. A hypervisor that behaves this way must mark the currently in use interface active and the others inactive. A hypervisor that never has more than one interface for a given may mark that interface active or omit entirely.

    During VM migration, a given might transiently be marked active on two different hypervisors. That is, active means that this is the active instance within a single hypervisor, not in a broader scope. There is one exception: some hypervisors support ``migration'' from a given hypervisor to itself (most often for test purposes). During such a ``migration,'' two instances of a single might both be briefly marked active on a single hypervisor.

    The virtual interface associated with this interface. The virtual network to which this interface is attached. The VM to which this interface belongs. On XenServer, this will be the same as . The VM to which this interface belongs.

    The ``VLAN splinters'' feature increases Open vSwitch compatibility with buggy network drivers in old versions of Linux that do not properly support VLANs when VLAN devices are not used, at some cost in memory and performance.

    When VLAN splinters are enabled on a particular interface, Open vSwitch creates a VLAN device for each in-use VLAN. For sending traffic tagged with a VLAN on the interface, it substitutes the VLAN device. Traffic received on the VLAN device is treated as if it had been received on the interface on the particular VLAN.

    VLAN splinters consider a VLAN to be in use if:

    • The VLAN is the value in any record.
    • The VLAN is listed within the column of the record of an interface on which VLAN splinters are enabled. An empty does not influence the in-use VLANs: creating 4,096 VLAN devices is impractical because it will exceed the current 1,024 port per datapath limit.
    • An OpenFlow flow within any bridge matches the VLAN.

    The same set of in-use VLANs applies to every interface on which VLAN splinters are enabled. That is, the set is not chosen separately for each interface but selected once as the union of all in-use VLANs based on the rules above.

    It does not make sense to enable VLAN splinters on an interface for an access port, or on an interface that is not a physical port.

    VLAN splinters are deprecated. When broken device drivers are no longer in widespread use, we will delete this feature.

    Set to true to enable VLAN splinters on this interface. Defaults to false.

    VLAN splinters increase kernel and userspace memory overhead, so do not use them unless they are needed.

    VLAN splinters do not support 802.1p priority tags. Received priorities will appear to be 0, regardless of their actual values, and priorities on transmitted packets will also be cleared to 0.

    The overall purpose of these columns is described under Common Columns at the beginning of this document.

    Configuration for a particular OpenFlow table.

    The table's name. Set this column to change the name that controllers will receive when they request table statistics, e.g. ovs-ofctl dump-tables. The name does not affect switch behavior. If set, limits the number of flows that may be added to the table. Open vSwitch may limit the number of flows in a table for other reasons, e.g. due to hardware limitations or for resource availability or performance reasons.

    Controls the switch's behavior when an OpenFlow flow table modification request would add flows in excess of . The supported values are:

    refuse
    Refuse to add the flow or flows. This is also the default policy when is unset.
    evict
    Delete the flow that will expire soonest. See for details.

    When is evict, this controls how flows are chosen for eviction when the flow table would otherwise exceed flows. Its value is a set of NXM fields or sub-fields, each of which takes one of the forms field[] or field[start..end], e.g. NXM_OF_IN_PORT[]. Please see nicira-ext.h for a complete list of NXM field names.

    When a flow must be evicted due to overflow, the flow to evict is chosen through an approximation of the following algorithm:

    1. Divide the flows in the table into groups based on the values of the specified fields or subfields, so that all of the flows in a given group have the same values for those fields. If a flow does not specify a given field, that field's value is treated as 0.
    2. Consider the flows in the largest group, that is, the group that contains the greatest number of flows. If two or more groups all have the same largest number of flows, consider the flows in all of those groups.
    3. Among the flows under consideration, choose the flow that expires soonest for eviction.

    The eviction process only considers flows that have an idle timeout or a hard timeout. That is, eviction never deletes permanent flows. (Permanent flows do count against .)

    Open vSwitch ignores any invalid or unknown field specifications.

    When is not evict, this column has no effect.

    Quality of Service (QoS) configuration for each Port that references it.

    The type of QoS to implement. The currently defined types are listed below:

    linux-htb
    Linux ``hierarchy token bucket'' classifier. See tc-htb(8) (also at http://linux.die.net/man/8/tc-htb) and the HTB manual (http://luxik.cdi.cz/~devik/qos/htb/manual/userg.htm) for information on how this classifier works and how to configure it.
    linux-hfsc
    Linux "Hierarchical Fair Service Curve" classifier. See http://linux-ip.net/articles/hfsc.en/ for information on how this classifier works.

    A map from queue numbers to records. The supported range of queue numbers depend on . The queue numbers are the same as the queue_id used in OpenFlow in struct ofp_action_enqueue and other structures.

    Queue 0 is the ``default queue.'' It is used by OpenFlow output actions when no specific queue has been set. When no configuration for queue 0 is present, it is automatically configured as if a record with empty and columns had been specified. (Before version 1.6, Open vSwitch would leave queue 0 unconfigured in this case. With some queuing disciplines, this dropped all packets destined for the default queue.)

    The linux-htb and linux-hfsc classes support the following key-value pair:

    Maximum rate shared by all queued traffic, in bit/s. Optional. If not specified, for physical interfaces, the default is the link rate. For other interfaces or if the link rate cannot be determined, the default is currently 100 Mbps.
    The overall purpose of these columns is described under Common Columns at the beginning of this document.

    A configuration for a port output queue, used in configuring Quality of Service (QoS) features. May be referenced by column in table.

    If set, Open vSwitch will mark all traffic egressing this with the given DSCP bits. Traffic egressing the default is only marked if it was explicitly selected as the at the time the packet was output. If unset, the DSCP bits of traffic egressing this will remain unchanged.

    linux-htb may use queue_ids less than 61440. It has the following key-value pairs defined.

    Minimum guaranteed bandwidth, in bit/s. Maximum allowed bandwidth, in bit/s. Optional. If specified, the queue's rate will not be allowed to exceed the specified value, even if excess bandwidth is available. If unspecified, defaults to no limit. Burst size, in bits. This is the maximum amount of ``credits'' that a queue can accumulate while it is idle. Optional. Details of the linux-htb implementation require a minimum burst size, so a too-small burst will be silently ignored. A queue with a smaller priority will receive all the excess bandwidth that it can use before a queue with a larger value receives any. Specific priority values are unimportant; only relative ordering matters. Defaults to 0 if unspecified.

    linux-hfsc may use queue_ids less than 61440. It has the following key-value pairs defined.

    Minimum guaranteed bandwidth, in bit/s. Maximum allowed bandwidth, in bit/s. Optional. If specified, the queue's rate will not be allowed to exceed the specified value, even if excess bandwidth is available. If unspecified, defaults to no limit.
    The overall purpose of these columns is described under Common Columns at the beginning of this document.

    A port mirror within a .

    A port mirror configures a bridge to send selected frames to special ``mirrored'' ports, in addition to their normal destinations. Mirroring traffic may also be referred to as SPAN or RSPAN, depending on how the mirrored traffic is sent.

    Arbitrary identifier for the .

    To be selected for mirroring, a given packet must enter or leave the bridge through a selected port and it must also be in one of the selected VLANs.

    If true, every packet arriving or departing on any port is selected for mirroring. Ports on which departing packets are selected for mirroring. Ports on which arriving packets are selected for mirroring. VLANs on which packets are selected for mirroring. An empty set selects packets on all VLANs.

    These columns are mutually exclusive. Exactly one of them must be nonempty.

    Output port for selected packets, if nonempty.

    Specifying a port for mirror output reserves that port exclusively for mirroring. No frames other than those selected for mirroring via this column will be forwarded to the port, and any frames received on the port will be discarded.

    The output port may be any kind of port supported by Open vSwitch. It may be, for example, a physical port (sometimes called SPAN) or a GRE tunnel.

    Output VLAN for selected packets, if nonempty.

    The frames will be sent out all ports that trunk , as well as any ports with implicit VLAN . When a mirrored frame is sent out a trunk port, the frame's VLAN tag will be set to , replacing any existing tag; when it is sent out an implicit VLAN port, the frame will not be tagged. This type of mirroring is sometimes called RSPAN.

    See the documentation for in the table for a list of destination MAC addresses which will not be mirrored to a VLAN to avoid confusing switches that interpret the protocols that they represent.

    Please note: Mirroring to a VLAN can disrupt a network that contains unmanaged switches. Consider an unmanaged physical switch with two ports: port 1, connected to an end host, and port 2, connected to an Open vSwitch configured to mirror received packets into VLAN 123 on port 2. Suppose that the end host sends a packet on port 1 that the physical switch forwards to port 2. The Open vSwitch forwards this packet to its destination and then reflects it back on port 2 in VLAN 123. This reflected packet causes the unmanaged physical switch to replace the MAC learning table entry, which correctly pointed to port 1, with one that incorrectly points to port 2. Afterward, the physical switch will direct packets destined for the end host to the Open vSwitch on port 2, instead of to the end host on port 1, disrupting connectivity. If mirroring to a VLAN is desired in this scenario, then the physical switch must be replaced by one that learns Ethernet addresses on a per-VLAN basis. In addition, learning should be disabled on the VLAN containing mirrored traffic. If this is not done then intermediate switches will learn the MAC address of each end host from the mirrored traffic. If packets being sent to that end host are also mirrored, then they will be dropped since the switch will attempt to send them out the input port. Disabling learning for the VLAN will cause the switch to correctly send the packet out all ports configured for that VLAN. If Open vSwitch is being used as an intermediate switch, learning can be disabled by adding the mirrored VLAN to in the appropriate table or tables.

    Mirroring to a GRE tunnel has fewer caveats than mirroring to a VLAN and should generally be preferred.

    Key-value pairs that report mirror statistics.

    Number of packets transmitted through this mirror. Number of bytes transmitted through this mirror.
    The overall purpose of these columns is described under Common Columns at the beginning of this document.

    An OpenFlow controller.

    Open vSwitch supports two kinds of OpenFlow controllers:

    Primary controllers

    This is the kind of controller envisioned by the OpenFlow 1.0 specification. Usually, a primary controller implements a network policy by taking charge of the switch's flow table.

    Open vSwitch initiates and maintains persistent connections to primary controllers, retrying the connection each time it fails or drops. The column in the table applies to primary controllers.

    Open vSwitch permits a bridge to have any number of primary controllers. When multiple controllers are configured, Open vSwitch connects to all of them simultaneously. Because OpenFlow 1.0 does not specify how multiple controllers coordinate in interacting with a single switch, more than one primary controller should be specified only if the controllers are themselves designed to coordinate with each other. (The Nicira-defined NXT_ROLE OpenFlow vendor extension may be useful for this.)

    Service controllers

    These kinds of OpenFlow controller connections are intended for occasional support and maintenance use, e.g. with ovs-ofctl. Usually a service controller connects only briefly to inspect or modify some of a switch's state.

    Open vSwitch listens for incoming connections from service controllers. The service controllers initiate and, if necessary, maintain the connections from their end. The column in the table does not apply to service controllers.

    Open vSwitch supports configuring any number of service controllers.

    The determines the type of controller.

    Connection method for controller.

    The following connection methods are currently supported for primary controllers:

    ssl:ip[:port]

    The specified SSL port (default: 6633) on the host at the given ip, which must be expressed as an IP address (not a DNS name). The column in the table must point to a valid SSL configuration when this form is used.

    SSL support is an optional feature that is not always built as part of Open vSwitch.

    tcp:ip[:port]
    The specified TCP port (default: 6633) on the host at the given ip, which must be expressed as an IP address (not a DNS name).

    The following connection methods are currently supported for service controllers:

    pssl:[port][:ip]

    Listens for SSL connections on the specified TCP port (default: 6633). If ip, which must be expressed as an IP address (not a DNS name), is specified, then connections are restricted to the specified local IP address.

    The column in the table must point to a valid SSL configuration when this form is used.

    SSL support is an optional feature that is not always built as part of Open vSwitch.

    ptcp:[port][:ip]
    Listens for connections on the specified TCP port (default: 6633). If ip, which must be expressed as an IP address (not a DNS name), is specified, then connections are restricted to the specified local IP address.

    When multiple controllers are configured for a single bridge, the values must be unique. Duplicate values yield unspecified results.

    If it is specified, this setting must be one of the following strings that describes how Open vSwitch contacts this OpenFlow controller over the network:

    in-band
    In this mode, this controller's OpenFlow traffic travels over the bridge associated with the controller. With this setting, Open vSwitch allows traffic to and from the controller regardless of the contents of the OpenFlow flow table. (Otherwise, Open vSwitch would never be able to connect to the controller, because it did not have a flow to enable it.) This is the most common connection mode because it is not necessary to maintain two independent networks.
    out-of-band
    In this mode, OpenFlow traffic uses a control network separate from the bridge associated with this controller, that is, the bridge does not use any of its own network devices to communicate with the controller. The control network must be configured separately, before or after ovs-vswitchd is started.

    If not specified, the default is implementation-specific.

    Maximum number of milliseconds to wait between connection attempts. Default is implementation-specific. Maximum number of milliseconds of idle time on connection to controller before sending an inactivity probe message. If Open vSwitch does not communicate with the controller for the specified number of seconds, it will send a probe. If a response is not received for the same additional amount of time, Open vSwitch assumes the connection has been broken and attempts to reconnect. Default is implementation-specific. A value of 0 disables inactivity probes.

    OpenFlow switches send certain messages to controllers spontanenously, that is, not in response to any request from the controller. These messages are called ``asynchronous messages.'' These columns allow asynchronous messages to be limited or disabled to ensure the best use of network resources.

    The OpenFlow protocol enables asynchronous messages at time of connection establishment, which means that a controller can receive asynchronous messages, potentially many of them, even if it turns them off immediately after connecting. Set this column to false to change Open vSwitch behavior to disable, by default, all asynchronous messages. The controller can use the NXT_SET_ASYNC_CONFIG Nicira extension to OpenFlow to turn on any messages that it does want to receive, if any.

    The maximum rate at which the switch will forward packets to the OpenFlow controller, in packets per second. This feature prevents a single bridge from overwhelming the controller. If not specified, the default is implementation-specific.

    In addition, when a high rate triggers rate-limiting, Open vSwitch queues controller packets for each port and transmits them to the controller at the configured rate. The value limits the number of queued packets. Ports on a bridge share the packet queue fairly.

    Open vSwitch maintains two such packet rate-limiters per bridge: one for packets sent up to the controller because they do not correspond to any flow, and the other for packets sent up to the controller by request through flow actions. When both rate-limiters are filled with packets, the actual rate that packets are sent to the controller is up to twice the specified rate.

    In conjunction with , the maximum number of unused packet credits that the bridge will allow to accumulate, in packets. If not specified, the default is implementation-specific.

    These values are considered only in in-band control mode (see ).

    When multiple controllers are configured on a single bridge, there should be only one set of unique values in these columns. If different values are set for these columns in different controllers, the effect is unspecified.

    The IP address to configure on the local port, e.g. 192.168.0.123. If this value is unset, then and are ignored. The IP netmask to configure on the local port, e.g. 255.255.255.0. If is set but this value is unset, then the default is chosen based on whether the IP address is class A, B, or C. The IP address of the gateway to configure on the local port, as a string, e.g. 192.168.0.1. Leave this column unset if this network has no gateway.
    true if currently connected to this controller, false otherwise.

    The level of authority this controller has on the associated bridge. Possible values are:

    other
    Allows the controller access to all OpenFlow features.
    master
    Equivalent to other, except that there may be at most one master controller at a time. When a controller configures itself as master, any existing master is demoted to the slaverole.
    slave
    Allows the controller read-only access to OpenFlow features. Attempts to modify the flow table will be rejected with an error. Slave controllers do not receive OFPT_PACKET_IN or OFPT_FLOW_REMOVED messages, but they do receive OFPT_PORT_STATUS messages.
    A human-readable description of the last error on the connection to the controller; i.e. strerror(errno). This key will exist only if an error has occurred.

    The state of the connection to the controller:

    VOID
    Connection is disabled.
    BACKOFF
    Attempting to reconnect at an increasing period.
    CONNECTING
    Attempting to connect.
    ACTIVE
    Connected, remote host responsive.
    IDLE
    Connection is idle. Waiting for response to keep-alive.

    These values may change in the future. They are provided only for human consumption.

    The amount of time since this controller last successfully connected to the switch (in seconds). Value is empty if controller has never successfully connected. The amount of time since this controller last disconnected from the switch (in seconds). Value is empty if controller has never disconnected.

    Additional configuration for a connection between the controller and the Open vSwitch.

    The Differentiated Service Code Point (DSCP) is specified using 6 bits in the Type of Service (TOS) field in the IP header. DSCP provides a mechanism to classify the network traffic and provide Quality of Service (QoS) on IP networks. The DSCP value specified here is used when establishing the connection between the controller and the Open vSwitch. If no value is specified, a default value of 48 is chosen. Valid DSCP values must be in the range 0 to 63.
    The overall purpose of these columns is described under Common Columns at the beginning of this document.

    Configuration for a database connection to an Open vSwitch database (OVSDB) client.

    This table primarily configures the Open vSwitch database (ovsdb-server), not the Open vSwitch switch (ovs-vswitchd). The switch does read the table to determine what connections should be treated as in-band.

    The Open vSwitch database server can initiate and maintain active connections to remote clients. It can also listen for database connections.

    Connection method for managers.

    The following connection methods are currently supported:

    ssl:ip[:port]

    The specified SSL port (default: 6632) on the host at the given ip, which must be expressed as an IP address (not a DNS name). The column in the table must point to a valid SSL configuration when this form is used.

    SSL support is an optional feature that is not always built as part of Open vSwitch.

    tcp:ip[:port]
    The specified TCP port (default: 6632) on the host at the given ip, which must be expressed as an IP address (not a DNS name).
    pssl:[port][:ip]

    Listens for SSL connections on the specified TCP port (default: 6632). Specify 0 for port to have the kernel automatically choose an available port. If ip, which must be expressed as an IP address (not a DNS name), is specified, then connections are restricted to the specified local IP address.

    The column in the table must point to a valid SSL configuration when this form is used.

    SSL support is an optional feature that is not always built as part of Open vSwitch.

    ptcp:[port][:ip]
    Listens for connections on the specified TCP port (default: 6632). Specify 0 for port to have the kernel automatically choose an available port. If ip, which must be expressed as an IP address (not a DNS name), is specified, then connections are restricted to the specified local IP address.

    When multiple managers are configured, the values must be unique. Duplicate values yield unspecified results.

    If it is specified, this setting must be one of the following strings that describes how Open vSwitch contacts this OVSDB client over the network:

    in-band
    In this mode, this connection's traffic travels over a bridge managed by Open vSwitch. With this setting, Open vSwitch allows traffic to and from the client regardless of the contents of the OpenFlow flow table. (Otherwise, Open vSwitch would never be able to connect to the client, because it did not have a flow to enable it.) This is the most common connection mode because it is not necessary to maintain two independent networks.
    out-of-band
    In this mode, the client's traffic uses a control network separate from that managed by Open vSwitch, that is, Open vSwitch does not use any of its own network devices to communicate with the client. The control network must be configured separately, before or after ovs-vswitchd is started.

    If not specified, the default is implementation-specific.

    Maximum number of milliseconds to wait between connection attempts. Default is implementation-specific. Maximum number of milliseconds of idle time on connection to the client before sending an inactivity probe message. If Open vSwitch does not communicate with the client for the specified number of seconds, it will send a probe. If a response is not received for the same additional amount of time, Open vSwitch assumes the connection has been broken and attempts to reconnect. Default is implementation-specific. A value of 0 disables inactivity probes. true if currently connected to this manager, false otherwise. A human-readable description of the last error on the connection to the manager; i.e. strerror(errno). This key will exist only if an error has occurred.

    The state of the connection to the manager:

    VOID
    Connection is disabled.
    BACKOFF
    Attempting to reconnect at an increasing period.
    CONNECTING
    Attempting to connect.
    ACTIVE
    Connected, remote host responsive.
    IDLE
    Connection is idle. Waiting for response to keep-alive.

    These values may change in the future. They are provided only for human consumption.

    The amount of time since this manager last successfully connected to the database (in seconds). Value is empty if manager has never successfully connected. The amount of time since this manager last disconnected from the database (in seconds). Value is empty if manager has never disconnected. Space-separated list of the names of OVSDB locks that the connection holds. Omitted if the connection does not hold any locks. Space-separated list of the names of OVSDB locks that the connection is currently waiting to acquire. Omitted if the connection is not waiting for any locks. Space-separated list of the names of OVSDB locks that the connection has had stolen by another OVSDB client. Omitted if no locks have been stolen from this connection.

    When specifies a connection method that listens for inbound connections (e.g. ptcp: or pssl:) and more than one connection is actually active, the value is the number of active connections. Otherwise, this key-value pair is omitted.

    When multiple connections are active, status columns and key-value pairs (other than this one) report the status of one arbitrarily chosen connection.

    When is ptcp: or pssl:, this is the TCP port on which the OVSDB server is listening. (This is is particularly useful when specifies a port of 0, allowing the kernel to choose any available port.)

    Additional configuration for a connection between the manager and the Open vSwitch Database.

    The Differentiated Service Code Point (DSCP) is specified using 6 bits in the Type of Service (TOS) field in the IP header. DSCP provides a mechanism to classify the network traffic and provide Quality of Service (QoS) on IP networks. The DSCP value specified here is used when establishing the connection between the manager and the Open vSwitch. If no value is specified, a default value of 48 is chosen. Valid DSCP values must be in the range 0 to 63.
    The overall purpose of these columns is described under Common Columns at the beginning of this document.
    A NetFlow target. NetFlow is a protocol that exports a number of details about terminating IP flows, such as the principals involved and duration. NetFlow targets in the form ip:port. The ip must be specified numerically, not as a DNS name. Engine ID to use in NetFlow messages. Defaults to datapath index if not specified. Engine type to use in NetFlow messages. Defaults to datapath index if not specified. The interval at which NetFlow records are sent for flows that are still active, in seconds. A value of 0 requests the default timeout (currently 600 seconds); a value of -1 disables active timeouts.

    If this column's value is false, the ingress and egress interface fields of NetFlow flow records are derived from OpenFlow port numbers. When it is true, the 7 most significant bits of these fields will be replaced by the least significant 7 bits of the engine id. This is useful because many NetFlow collectors do not expect multiple switches to be sending messages from the same host, so they do not store the engine information which could be used to disambiguate the traffic.

    When this option is enabled, a maximum of 508 ports are supported.

    The overall purpose of these columns is described under Common Columns at the beginning of this document.
    SSL configuration for an Open_vSwitch. Name of a PEM file containing the private key used as the switch's identity for SSL connections to the controller. Name of a PEM file containing a certificate, signed by the certificate authority (CA) used by the controller and manager, that certifies the switch's private key, identifying a trustworthy switch. Name of a PEM file containing the CA certificate used to verify that the switch is connected to a trustworthy controller. If set to true, then Open vSwitch will attempt to obtain the CA certificate from the controller on its first SSL connection and save it to the named PEM file. If it is successful, it will immediately drop the connection and reconnect, and from then on all SSL connections must be authenticated by a certificate signed by the CA certificate thus obtained. This option exposes the SSL connection to a man-in-the-middle attack obtaining the initial CA certificate. It may still be useful for bootstrapping. The overall purpose of these columns is described under Common Columns at the beginning of this document.

    A set of sFlow(R) targets. sFlow is a protocol for remote monitoring of switches.

    Name of the network device whose IP address should be reported as the ``agent address'' to collectors. If not specified, the agent device is figured from the first target address and the routing table. If the routing table does not contain a route to the target, the IP address defaults to the in the collector's . If an agent IP address cannot be determined any of these ways, sFlow is disabled. Number of bytes of a sampled packet to send to the collector. If not specified, the default is 128 bytes. Polling rate in seconds to send port statistics to the collector. If not specified, defaults to 30 seconds. Rate at which packets should be sampled and sent to the collector. If not specified, defaults to 400, which means one out of 400 packets, on average, will be sent to the collector. sFlow targets in the form ip:port. The overall purpose of these columns is described under Common Columns at the beginning of this document.

    A set of IPFIX collectors. IPFIX is a protocol that exports a number of details about flows.

    IPFIX target collectors in the form ip:port. For per-bridge packet sampling, i.e. when this row is referenced from a , the rate at which packets should be sampled and sent to each target collector. If not specified, defaults to 400, which means one out of 400 packets, on average, will be sent to each target collector. Ignored for per-flow sampling, i.e. when this row is referenced from a . For per-bridge packet sampling, i.e. when this row is referenced from a , the IPFIX Observation Domain ID sent in each IPFIX packet. If not specified, defaults to 0. Ignored for per-flow sampling, i.e. when this row is referenced from a . For per-bridge packet sampling, i.e. when this row is referenced from a , the IPFIX Observation Point ID sent in each IPFIX flow record. If not specified, defaults to 0. Ignored for per-flow sampling, i.e. when this row is referenced from a . The maximum period in seconds for which an IPFIX flow record is cached and aggregated before being sent. If not specified, defaults to 0. If 0, caching is disabled. The maximum number of IPFIX flow records that can be cached at a time. If not specified, defaults to 0. If 0, caching is disabled. The overall purpose of these columns is described under Common Columns at the beginning of this document.

    A set of IPFIX collectors of packet samples generated by OpenFlow sample actions.

    The ID of this collector set, unique among the bridge's collector sets, to be used as the collector_set_id in OpenFlow sample actions. The bridge into which OpenFlow sample actions can be added to send packet samples to this set of IPFIX collectors. Configuration of the set of IPFIX collectors to send one flow record per sampled packet to. The overall purpose of these columns is described under Common Columns at the beginning of this document.
    openvswitch-2.0.1+git20140120/vswitchd/xenserver.c000066400000000000000000000044001226605124000214700ustar00rootroot00000000000000/* Copyright (c) 2009, 2010, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include "xenserver.h" #include #include #include #include #include #include #include "dynamic-string.h" #include "process.h" #include "vlog.h" VLOG_DEFINE_THIS_MODULE(xenserver); /* If running on a XenServer, the XenServer host UUID as a 36-character string, * otherwise null. */ static char *host_uuid; static void read_host_uuid(void) { static const char filename[] = "/etc/xensource-inventory"; char line[128]; FILE *file; file = fopen(filename, "r"); if (!file) { if (errno == ENOENT) { VLOG_DBG("not running on a XenServer"); } else { VLOG_INFO("%s: open: %s", filename, ovs_strerror(errno)); } return; } while (fgets(line, sizeof line, file)) { static const char leader[] = "INSTALLATION_UUID='"; const int leader_len = strlen(leader); const int uuid_len = 36; static const char trailer[] = "'\n"; const int trailer_len = strlen(trailer); if (strlen(line) == leader_len + uuid_len + trailer_len && !memcmp(line, leader, leader_len) && !memcmp(line + leader_len + uuid_len, trailer, trailer_len)) { host_uuid = xmemdup0(line + leader_len, uuid_len); VLOG_INFO("running on XenServer, host-uuid %s", host_uuid); fclose(file); return; } } fclose(file); VLOG_ERR("%s: INSTALLATION_UUID not found", filename); } const char * xenserver_get_host_uuid(void) { static pthread_once_t once = PTHREAD_ONCE_INIT; pthread_once(&once, read_host_uuid); return host_uuid; } openvswitch-2.0.1+git20140120/vswitchd/xenserver.h000066400000000000000000000013311226605124000214750ustar00rootroot00000000000000/* Copyright (c) 2009 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef VSWITCHD_XENSERVER_H #define VSWITCHD_XENSERVER_H 1 const char *xenserver_get_host_uuid(void); #endif /* xenserver.h */ openvswitch-2.0.1+git20140120/xenserver/000077500000000000000000000000001226605124000174735ustar00rootroot00000000000000openvswitch-2.0.1+git20140120/xenserver/.gitignore000066400000000000000000000000261226605124000214610ustar00rootroot00000000000000/openvswitch-xen.spec openvswitch-2.0.1+git20140120/xenserver/GPLv2000066400000000000000000000431031226605124000203110ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. openvswitch-2.0.1+git20140120/xenserver/LICENSE000066400000000000000000000651761226605124000205170ustar00rootroot00000000000000As a special exception to the GNU Lesser General Public License, you may link, statically or dynamically, a "work that uses the Library" with a publicly distributed version of the Library to produce an executable file containing portions of the Library, and distribute that executable file under terms of your choice, without any of the additional requirements listed in clause 6 of the GNU Lesser General Public License. By "a publicly distributed version of the Library", we mean either the unmodified Library as distributed, or a modified version of the Library that is distributed under the conditions defined in clause 3 of the GNU Library General Public License. This exception does not however invalidate any other reasons why the executable file might be covered by the GNU Lesser General Public License. ------------ GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. , 1 April 1990 Ty Coon, President of Vice That's all there is to it! openvswitch-2.0.1+git20140120/xenserver/README000066400000000000000000000131011226605124000203470ustar00rootroot00000000000000This directory contains files for seamless integration of Open vSwitch on Citrix XenServer hosts managed by the Citrix management tools. Files in this directory are licensed on a file-by-file basis. Please refer to each file for details. Most of the files in this directory are installed on a XenServer system under the same name; underscores are replaced by slashes. The files are: etc_init.d_openvswitch Initializes Open vSwitch at boot and shuts it down at shutdown. etc_init.d_openvswitch-xapi-update Init script to ensure openvswitch-cfg-update is called for the current host at boot. etc_logrotate.d_openvswitch Ensures that logs in /var/log/openvswitch are rotated periodically and that appropriate daemons reopen their log files at that point. etc_profile.d_openvswitch.sh Open vSwitch-related shell functions for the administrator's convenience. etc_xapi.d_plugins_openvswitch-cfg-update xapi plugin script to update the cache of configuration items in the ovs-vswitchd configuration that are managed in the xapi database when integrated with Citrix management tools. etc_xensource_scripts_vif Open vSwitch-aware replacement for Citrix script of the same name. openvswitch-xen.spec spec file for building RPMs to install on a XenServer host. opt_xensource_libexec_interface-reconfigure opt_xensource_libexec_InterfaceReconfigureBridge.py opt_xensource_libexec_InterfaceReconfigure.py opt_xensource_libexec_InterfaceReconfigureVswitch.py Open vSwitch-aware replacements for Citrix script of the same names. usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py xsconsole plugin to configure the pool-wide configuration keys used to control Open vSwitch when integrated with Citrix management tools. usr_share_openvswitch_scripts_ovs-xapi-sync Daemon to monitor the external_ids columns of the Bridge and Interface OVSDB tables for changes that require interrogating XAPI. usr_share_openvswitch_scripts_sysconfig.template Template for Open vSwitch's /etc/sysconfig/openvswitch configuration file. Open vSwitch installs a number of xen-bugtool extensions in /etc/xensource/bugtool to gather additional information useful for debugging. The sources for the extensions are in ../utilities/bugtool/plugins: kernel-info/openvswitch.xml Collect kernel information relevant to Open vSwitch, such as slabinfo. network-status/openvswitch.xml Collect networking information relevant to Open vSwitch. Runs the following scripts, which are described below: * ovs-bugtool-bfd-show * ovs-bugtool-cfm-show * ovs-bugtool-lacp-show * ovs-bugtool-list-dbs * ovs-bugtool-ovsdb-dump * ovs-bugtool-tc-class-show * ovs-bugtool-bond-show * ovs-bugtool-ovs-ofctl-show * ovs-bugtool-ovs-ofctl-dump-flows * ovs-bugtool-ovs-appctl-dpif * ovs-bugtool-coverage-show * ovs-bugtool-memory-show * ovs-bugtool-vsctl-show system-configuration/openvswitch.xml Collect system configuration information relevant to Open vSwitch, including timezone. Runs the following script which is described below: * ovs-bugtool-daemons-ver system-configuration.xml Collect system configuration data. This category is configured to collect up to 1Mb of data, take up to 60 seconds to collect data, run every time and is hidden from display in XenCenter. A number of scripts are installed in /usr/share/openvswitch/scripts to assist Open vSwitch's xen-bugtool extensions. The sources for the scripts are located in ../utilities/bugtool: ovs-bugtool-bfd-show Script to dump detailed BFD information for all enabled interfaces. ovs-bugtool-cfm-show Script to dump detailed CFM information for all enabled interfaces. ovs-bugtool-lacp-show Script to dump detailed LACP information for all enabled ports. ovs-bugtool-list-dbs Script to list the databases controlled by ovsdb-server. ovs-bugtool-ovsdb-dump Script to dump contents of Open vSwitch configuration database in comma-separated value format. ovs-bugtool-tc-class-show Script to dump tc class configuration for all network interfaces. ovs-bugtool-ovs-ofctl-show Script to dump information about flow tables and ports of each bridge. ovs-bugtool-ovs-ofctl-dump-flows Script to dump openflow flows of each bridge. ovs-bugtool-ovs-appctl-dpif Script to collect a summary of configured datapaths and datapath flows. ovs-bugtool-coverage-show Script to count the number of times particular events occur during ovs-vswitchd's runtime. ovs-bugtool-memory-show Script to show some basic statistics about ovs-vswitchd's memory usage. ovs-bugtool-vsctl-show Script to show a brief overview of the database contents. ovs-bugtool-daemons-ver Script to dump version information for all Open vSwitch daemons. ---------------------------------------------------------------------- Copyright (C) 2009, 2010, 2011 Nicira, Inc. Copying and distribution of this file, with or without modification, are permitted in any medium without royalty provided the copyright notice and this notice are preserved. This file is offered as-is, without warranty of any kind. openvswitch-2.0.1+git20140120/xenserver/automake.mk000066400000000000000000000026631226605124000216410ustar00rootroot00000000000000# Copyright (C) 2009, 2010, 2011, 2012 Nicira, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. This file is offered as-is, # without warranty of any kind. EXTRA_DIST += \ xenserver/GPLv2 \ xenserver/LICENSE \ xenserver/README \ xenserver/automake.mk \ xenserver/etc_init.d_openvswitch \ xenserver/etc_init.d_openvswitch-xapi-update \ xenserver/etc_logrotate.d_openvswitch \ xenserver/etc_profile.d_openvswitch.sh \ xenserver/etc_xapi.d_plugins_openvswitch-cfg-update \ xenserver/etc_xensource_scripts_vif \ xenserver/openvswitch-xen.spec \ xenserver/openvswitch-xen.spec.in \ xenserver/opt_xensource_libexec_InterfaceReconfigure.py \ xenserver/opt_xensource_libexec_InterfaceReconfigureBridge.py \ xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py \ xenserver/opt_xensource_libexec_interface-reconfigure \ xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py \ xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync \ xenserver/usr_share_openvswitch_scripts_sysconfig.template $(srcdir)/xenserver/openvswitch-xen.spec: xenserver/openvswitch-xen.spec.in $(top_builddir)/config.status ($(ro_shell) && sed -e 's,[@]VERSION[@],$(VERSION),g') \ < $(srcdir)/xenserver/$(@F).in > $(@F).tmp || exit 1; \ if cmp -s $(@F).tmp $@; then touch $@; rm $(@F).tmp; else mv $(@F).tmp $@; fi openvswitch-2.0.1+git20140120/xenserver/etc_init.d_openvswitch000077500000000000000000000101661226605124000240760ustar00rootroot00000000000000#!/bin/sh # # openvswitch # # chkconfig: 2345 09 91 # description: Manage Open vSwitch kernel modules and user-space daemons # Copyright (C) 2009, 2010, 2011 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ### BEGIN INIT INFO # Provides: openvswitch-switch # Required-Start: # Required-Stop: # Default-Start: 2 3 4 5 # Default-Stop: 0 1 6 # Short-Description: Open vSwitch switch ### END INIT INFO . /usr/share/openvswitch/scripts/ovs-lib || exit 1 . /etc/xensource-inventory test -e /etc/sysconfig/openvswitch && . /etc/sysconfig/openvswitch case `cat /etc/xensource/network.conf` in vswitch|openvswitch) ;; bridge) exit 0 ;; *) echo "Open vSwitch disabled (/etc/xensource/network.conf is invalid)" >&2 exit 0 ;; esac start_ovs_xapi_sync () { if daemon_is_running ovs-xapi-sync; then log_success_msg "ovs-xapi-sync is already running" else PYTHONPATH=/usr/share/openvswitch/python \ /usr/share/openvswitch/scripts/ovs-xapi-sync \ --log-file --pidfile --detach --monitor unix:/var/run/openvswitch/db.sock fi } start () { set ovs_ctl ${1-start} set "$@" --system-id="$INSTALLATION_UUID" set "$@" --system-type="$PRODUCT_BRAND" set "$@" --system-version="$PRODUCT_VERSION-$BUILD_NUMBER" set "$@" --external-id=xs-system-uuid="$INSTALLATION_UUID" set "$@" --daemon-cwd=/var/xen/openvswitch if test X"$FORCE_COREFILES" != X; then set "$@" --force-corefiles="$FORCE_COREFILES" fi if test X"$OVSDB_SERVER_PRIORITY" != X; then set "$@" --ovsdb-server-priority="$OVSDB_SERVER_PRIORITY" fi if test X"$VSWITCHD_PRIORITY" != X; then set "$@" --ovs-vswitchd-priority="$VSWITCHD_PRIORITY" fi if test X"$VSWITCHD_MLOCKALL" != X; then set "$@" --mlockall="$VSWITCHD_MLOCKALL" fi if test ! -e /var/run/openvswitch.booted; then touch /var/run/openvswitch.booted set "$@" --delete-bridges fi set "$@" $OVS_CTL_OPTS "$@" start_ovs_xapi_sync ovs_ctl --protocol=gre enable-protocol touch /var/lock/subsys/openvswitch } force_reload_kmod () { start force-reload-kmod # Restart the high-availability daemon if it is running. Otherwise # it loses its heartbeat and reboots the system after a few minutes. if pidof xhad >/dev/null && test -e /etc/xensource/xhad.conf; then PATH=$PATH:/opt/xensource/xha action "Stopping HA daemon" ha_stop_daemon action "Starting HA daemon" ha_start_daemon fi action "Stopping ovs-xapi-sync" stop_daemon ovs-xapi-sync action "Starting ovs-xapi-sync" start_ovs_xapi_sync } stop () { ovs_ctl stop stop_daemon ovs-xapi-sync rm -f /var/lock/subsys/openvswitch } restart () { if [ "$1" = "--save-flows=yes" ]; then stop_daemon ovs-xapi-sync start restart else stop start fi } case $1 in start) start ;; stop) stop ;; restart) shift restart "$@" ;; reload|force-reload) # The main OVS daemons keep up-to-date, but ovs-xapi-sync needs help. if daemon_is_running ovs-xapi-sync; then action "Configuring Open vSwitch external IDs" \ ovs-appctl -t ovs-xapi-sync flush-cache fi ;; status) ovs_ctl status && daemon_status ovs-xapi-sync ;; version) ovs_ctl version ;; force-reload-kmod) force_reload_kmod ;; help) printf "openvswitch [start|stop|restart|reload|force-reload|status|version]\n" ;; *) printf "Unknown command: $1\n" exit 1 ;; esac openvswitch-2.0.1+git20140120/xenserver/etc_init.d_openvswitch-xapi-update000077500000000000000000000041761226605124000263210ustar00rootroot00000000000000#!/bin/bash # # openvswitch-xapi-update # # chkconfig: 2345 95 01 # description: Update Open vSwitch configuration from XAPI database at boot # Copyright (C) 2009, 2010 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ### BEGIN INIT INFO # Provides: openvswitch-xapi-update # Required-Start: $network $remote_fs # Required-Stop: $network # Default-Start: 3 5 # Default-Stop: # Short-Description: openvswitch-xapi-update # Description: reconfigures Open vSwitch based on XAPI configuration ### END INIT INFO . /etc/init.d/functions function do_host_call { xe host-call-plugin host-uuid="$INSTALLATION_UUID" plugin="openvswitch-cfg-update" fn="update" >/dev/null } function start { if [ ! -f /etc/xensource-inventory ]; then printf "openvswitch-xapi-update ERROR: XenSource inventory not present in /etc/xensource-inventory\n" exit 1 fi if test -e /etc/xensource/network.conf; then NETWORK_MODE=$(cat /etc/xensource/network.conf) fi case ${NETWORK_MODE:=openvswitch} in vswitch|openvswitch) ;; bridge) exit 0 ;; *) echo "Open vSwitch disabled (/etc/xensource/network.conf is invalid)" >&2 exit 0 ;; esac source /etc/xensource-inventory action "Updating configuration" do_host_call } case "$1" in start) start ;; stop) # Nothing to do here. ;; restart) start ;; help) printf "openvswitch-xapi-update [start|stop|restart]\n" ;; *) printf "Unknown command: $1\n" exit 1 ;; esac openvswitch-2.0.1+git20140120/xenserver/etc_logrotate.d_openvswitch000066400000000000000000000011111226605124000251160ustar00rootroot00000000000000# Copyright (C) 2009, 2010, 2011, 2012 Nicira, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. This file is offered as-is, # without warranty of any kind. /var/log/openvswitch/*.log { daily compress sharedscripts missingok postrotate # Tell Open vSwitch daemons to reopen their log files for pidfile in `cd /var/run/openvswitch && echo *.pid`; do ovs-appctl -t "${pidfile%%.pid}" vlog/reopen done endscript } openvswitch-2.0.1+git20140120/xenserver/etc_profile.d_openvswitch.sh000066400000000000000000000023101226605124000251710ustar00rootroot00000000000000# Copyright (C) 2009, 2010, 2011 Nicira, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. This file is offered as-is, # without warranty of any kind. alias vswitch='service openvswitch' alias openvswitch='service openvswitch' function watchdp { watch ovs-dpctl show "$@" } function watchdpflows { local grep="" local dp=$1 shift if [ $# -gt 0 ]; then grep="| grep $@" fi watch "ovs-dpctl dump-flows $dp $grep" } function watchflows { local grep="" local dp=$1 shift bridge=$(ovs-dpctl show $dp | grep 'port 0:' | cut -d' ' -f 3) if [ $# -gt 0 ]; then grep="| grep $@" fi watch "ovs-ofctl dump-flows unix:/var/run/$bridge.mgmt $grep" } function monitorlogs { local grep="" if [ $# -gt 0 ]; then grep="| grep --line-buffered '^==> .* <==$" for i in "$@"; do grep="$grep\|$i" done grep="$grep'" fi cmd="tail -F /var/log/messages /var/log/openvswitch/ovs-vswitchd.log /var/log/openvswitch/ovsdb-server /var/log/xensource.log $grep | tee /var/log/monitorlogs.out" printf "cmd: $cmd\n" eval "$cmd" } openvswitch-2.0.1+git20140120/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update000077500000000000000000000217511226605124000276540ustar00rootroot00000000000000#!/usr/bin/env python # # xapi plugin script to update the cache of configuration items in the # ovs-vswitchd configuration that are managed in the xapi database when # integrated with Citrix management tools. # Copyright (C) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # TBD: - error handling needs to be improved. Currently this can leave # TBD: the system in a bad state if anything goes wrong. import XenAPIPlugin import XenAPI import os import subprocess import syslog import re vsctl="/usr/bin/ovs-vsctl" ofctl="/usr/bin/ovs-ofctl" cacert_filename="/etc/openvswitch/vswitchd.cacert" # Delete the CA certificate, so that we go back to boot-strapping mode def delete_cacert(): try: os.remove(cacert_filename) except OSError: # Ignore error if file doesn't exist pass def update(session, args): # Refresh bridge network UUIDs in case this host joined or left a pool. script = "/opt/xensource/libexec/interface-reconfigure" try: retval = subprocess.call([script, "rewrite"]) if retval != 0: syslog.syslog("%s exited with status %d" % (script, retval)) except OSError, e: syslog.syslog("%s: failed to execute (%s)" % (script, e.strerror)) pools = session.xenapi.pool.get_all() # We assume there is only ever one pool... if len(pools) == 0: raise XenAPIPlugin.Failure("NO_POOL_FOR_HOST", []) if len(pools) > 1: raise XenAPIPlugin.Failure("MORE_THAN_ONE_POOL_FOR_HOST", []) new_controller = False pool = session.xenapi.pool.get_record(pools[0]) controller = pool.get("vswitch_controller", "") ret_str = "" currentController = vswitchCurrentController() if controller == "" and currentController != "": delete_cacert() try: emergency_reset(session, None) except: pass removeControllerCfg() ret_str += "Successfully removed controller config. " elif controller != currentController: delete_cacert() try: emergency_reset(session, None) except: pass setControllerCfg(controller) new_controller = True ret_str += "Successfully set controller to %s. " % controller try: pool_fail_mode = pool["other_config"]["vswitch-controller-fail-mode"] except KeyError, e: pool_fail_mode = None bton = {} for rec in session.xenapi.network.get_all_records().values(): try: bton[rec['bridge']] = rec except KeyError: pass # If new controller, get management MAC addresses from XAPI now # in case fail_mode set to secure which may affect XAPI access mgmt_bridge = None host_mgmt_mac = None host_mgmt_device = None pool_mgmt_macs = {} if new_controller: recs = session.xenapi.PIF.get_all_records_where('field "management"="true"') for rec in recs.itervalues(): pool_mgmt_macs[rec.get('MAC')] = rec.get('device') dib_changed = False fail_mode_changed = False for bridge in vswitchCfgQuery(['list-br']).split(): network = bton[bridge] bridge = vswitchCfgQuery(['br-to-parent', bridge]) xapi_dib = network['other_config'].get('vswitch-disable-in-band') if not xapi_dib: xapi_dib = '' ovs_dib = vswitchCfgQuery(['--', '--if-exists', 'get', 'Bridge', bridge, 'other_config:disable-in-band']).strip('"') # Do nothing if setting is invalid, and warn the user. if xapi_dib not in ['true', 'false', '']: ret_str += '"' + xapi_dib + '"' + \ ' is an invalid value for vswitch-disable-in-band on ' + \ bridge + ' ' # Change bridge disable-in-band option if XAPI and OVS states differ. elif xapi_dib != ovs_dib: # 'true' or 'false' if xapi_dib: vswitchCfgMod(['--', 'set', 'Bridge', bridge, 'other_config:disable-in-band=' + xapi_dib]) # '' or None else: vswitchCfgMod(['--', 'remove', 'Bridge', bridge, 'other_config', 'disable-in-band']) dib_changed = True # Change bridge fail_mode if XAPI state differs from OVS state. bridge_fail_mode = vswitchCfgQuery(["get", "Bridge", bridge, "fail_mode"]).strip('[]"') try: fail_mode = bton[bridge]["other_config"]["vswitch-controller-fail-mode"] except KeyError, e: fail_mode = None if fail_mode not in ['secure', 'standalone']: fail_mode = pool_fail_mode if fail_mode != 'secure': fail_mode = 'standalone' if bridge_fail_mode != fail_mode: vswitchCfgMod(['--', 'set', 'Bridge', bridge, "fail_mode=%s" % fail_mode]) fail_mode_changed = True # Determine local mgmt MAC address if host being added to secure # pool so we can add default flows to allow management traffic if new_controller and fail_mode_changed and pool_fail_mode == "secure": oc = vswitchCfgQuery(["get", "Bridge", bridge, "other-config"]) m = re.match('.*hwaddr="([0-9a-fA-F:].*)".*', oc) if m and m.group(1) in pool_mgmt_macs.keys(): mgmt_bridge = bridge host_mgmt_mac = m.group(1) host_mgmt_device = pool_mgmt_macs[host_mgmt_mac] if host_mgmt_mac is not None and mgmt_bridge is not None and \ host_mgmt_device is not None: tp = "idle_timeout=0,priority=0" port = vswitchCfgQuery(["get", "interface", host_mgmt_device, "ofport"]) addFlow(mgmt_bridge, "%s,in_port=%s,arp,nw_proto=1,actions=local" % \ (tp, port)) addFlow(mgmt_bridge, "%s,in_port=local,arp,dl_src=%s,actions=%s" % \ (tp, host_mgmt_mac, port)) addFlow(mgmt_bridge, "%s,in_port=%s,dl_dst=%s,actions=local" % \ (tp, port, host_mgmt_mac)) addFlow(mgmt_bridge, "%s,in_port=local,dl_src=%s,actions=%s" % \ (tp, host_mgmt_mac, port)) if dib_changed: ret_str += "Updated in-band management. " if fail_mode_changed: ret_str += "Updated fail_mode. " if ret_str != '': return ret_str else: return "No change to configuration" def vswitchCurrentController(): controller = vswitchCfgQuery(["get-manager"]) if controller == "": return controller if len(controller) < 4 or controller[0:4] != "ssl:": return controller else: return controller.split(':')[1] def removeControllerCfg(): vswitchCfgMod(["--", "del-manager", "--", "del-ssl"]) def setControllerCfg(controller): # /etc/xensource/xapi-ssl.pem is mentioned twice below because it # contains both the private key and the certificate. vswitchCfgMod(["--", "del-manager", "--", "del-ssl", "--", "--bootstrap", "set-ssl", "/etc/xensource/xapi-ssl.pem", "/etc/xensource/xapi-ssl.pem", cacert_filename, "--", "set-manager", 'ssl:' + controller + ':6632']) def vswitchCfgQuery(action_args): cmd = [vsctl, "-vconsole:off"] + action_args output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate() if len(output) == 0 or output[0] == None: output = "" else: output = output[0].strip() return output def vswitchCfgMod(action_args): cmd = [vsctl, "--timeout=5", "-vconsole:off"] + action_args exitcode = subprocess.call(cmd) if exitcode != 0: raise XenAPIPlugin.Failure("VSWITCH_CONFIG_MOD_FAILURE", [ str(exitcode) , str(action_args) ]) def emergency_reset(session, args): cmd = [vsctl, "--timeout=5", "emer-reset"] exitcode = subprocess.call(cmd) if exitcode != 0: raise XenAPIPlugin.Failure("VSWITCH_EMER_RESET_FAILURE", [ str(exitcode) ]) return "Successfully reset configuration" def addFlow(switch, flow): cmd = [ofctl, "add-flow", switch, flow] exitcode = subprocess.call(cmd) if exitcode != 0: raise XenAPIPlugin.Failure("VSWITCH_ADD_FLOW_FAILURE", [ str(exitcode) , str(switch), str(flow) ]) if __name__ == "__main__": XenAPIPlugin.dispatch({"update": update, "emergency_reset": emergency_reset}) openvswitch-2.0.1+git20140120/xenserver/etc_xensource_scripts_vif000077500000000000000000000207461226605124000247130ustar00rootroot00000000000000#!/bin/sh # Copyright (C) 2008,2009 Citrix Systems, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation; version 2.1 only. with the special # exception on linking described in file LICENSE. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # CA-23900: Warning: when VIFs are added to windows guests with PV drivers the backend vif device is registered, # unregistered and then registered again. This causes the udev event to fire twice and this script runs twice. # Since the first invocation of the script races with the device unregistration, spurious errors are possible # which will be logged but are safe to ignore since the second script invocation should complete the operation. # Note that each script invocation is run synchronously from udev and so the scripts don't race with each other. # Keep other-config/ keys in sync with device.ml:vif_udev_keys BRCTL="/usr/sbin/brctl" IP="/sbin/ip" vsctl="/usr/bin/ovs-vsctl" handle_promiscuous() { local arg=$(xenstore-read "${PRIVATE}/other-config/promiscuous" 2>/dev/null) if [ $? -eq 0 -a -n "${arg}" ] ; then case $NETWORK_MODE in bridge) case "${arg}" in true|on) echo 1 > /sys/class/net/${dev}/brport/promisc ;; *) echo 0 > /sys/class/net/${dev}/brport/promisc ;; esac ;; openvswitch) logger -t script-vif "${dev}: Promiscuous ports are not supported via Open vSwitch." ;; esac fi } handle_ethtool() { local opt=$1 local arg=$(xenstore-read "${PRIVATE}/other-config/ethtool-${opt}" 2>/dev/null) if [ $? -eq 0 -a -n "${arg}" ] ; then case "${arg}" in true|on) /sbin/ethtool -K "${dev}" "${opt}" on ;; false|off) /sbin/ethtool -K "${dev}" "${opt}" off ;; *) logger -t scripts-vif "Unknown ethtool argument ${opt}=${arg} on ${dev}/${VIFUUID}" ;; esac fi } handle_mtu() { local mtu=$(xenstore-read "${PRIVATE}/MTU" 2>/dev/null) if [ $? -eq 0 -a -n "${mtu}" ]; then logger -t scripts-vif "Setting ${dev} MTU ${mtu}" ${IP} link set "${dev}" mtu ${mtu} || logger -t scripts-vif "Failed to ip link set ${dev} mtu ${mtu}. Error code $?" fi } set_vif_external_id() { local key=$1 local value=$2 logger -t scripts-vif "vif${DOMID}.${DEVID} external-ids:\"${key}\"=\"${value}\"" echo "-- set interface vif${DOMID}.${DEVID} external-ids:\"${key}\"=\"${value}\"" } handle_vswitch_vif_details() { local vm=$(xenstore-read "/local/domain/$DOMID/vm" 2>/dev/null) if [ $? -eq 0 -a -n "${vm}" ] ; then local vm_uuid=$(xenstore-read "$vm/uuid" 2>/dev/null) fi if [ -n "${vm_uuid}" ] ; then set_vif_external_id "xs-vm-uuid" "${vm_uuid}" fi local vif_uuid=$(xenstore-read "${PRIVATE}/vif-uuid" 2>/dev/null) if [ -n "${vif_uuid}" ] ; then set_vif_external_id "xs-vif-uuid" "${vif_uuid}" fi local vif_details= local net_uuid=$(xenstore-read "${PRIVATE}/network-uuid" 2>/dev/null) if [ -n "${net_uuid}" ] ; then set_vif_external_id "xs-network-uuid" "${net_uuid}" fi local address=$(xenstore-read "/local/domain/$DOMID/device/vif/$DEVID/mac" 2>/dev/null) if [ -n "${address}" ] ; then set_vif_external_id "attached-mac" "${address}" fi } add_to_bridge() { local address=$(xenstore-read "${PRIVATE}/bridge-MAC") if [ $? -ne 0 -o -z "${address}" ]; then logger -t scripts-vif "Failed to read ${PRIVATE}/bridge-MAC from xenstore" exit 1 fi local bridge=$(xenstore-read "${PRIVATE}/bridge") if [ $? -ne 0 -o -z "${bridge}" ]; then logger -t scripts-vif "Failed to read ${PRIVATE}/bridge from xenstore" exit 1 fi logger -t scripts-vif "Adding ${dev} to ${bridge} with address ${address}" ${IP} link set "${dev}" down || logger -t scripts-vif "Failed to ip link set ${dev} down" ${IP} link set "${dev}" arp off || logger -t scripts-vif "Failed to ip link set ${dev} arp off" ${IP} link set "${dev}" multicast off || logger -t scripts-vif "Failed to ip link set ${dev} multicast off" ${IP} link set "${dev}" address "${address}" || logger -t scripts-vif "Failed to ip link set ${dev} address ${address}" ${IP} addr flush "${dev}" || logger -t scripts-vif "Failed to ip addr flush ${dev}" case $NETWORK_MODE in bridge) ${BRCTL} setfd "${bridge}" 0 || logger -t scripts-vif "Failed to brctl setfd ${bridge} 0" ${BRCTL} addif "${bridge}" "${dev}" || logger -t scripts-vif "Failed to brctl addif ${bridge} ${dev}" ;; openvswitch) if [ "$TYPE" = "vif" ] ; then local vif_details=$(handle_vswitch_vif_details $bridge) fi $vsctl --timeout=30 -- --if-exists del-port $dev -- add-port $bridge $dev $vif_details ;; esac ${IP} link set "${dev}" up || logger -t scripts-vif "Failed to ip link set ${dev} up" } remove_from_bridge() { case $NETWORK_MODE in bridge) # Nothing to do ;; openvswitch) $vsctl --timeout=30 -- del-port $dev ;; esac } call_hook_script() { local domid=$1 local action=$2 # Call the VIF hotplug hook if present if [ -x /etc/xapi.d/vif-hotplug ]; then local vm=$(xenstore-read "/local/domain/$domid/vm" 2>/dev/null) if [ $? -eq 0 -a -n "${vm}" ] ; then local vm_uuid=$(xenstore-read "$vm/uuid" 2>/dev/null) fi if [ -n "${vm_uuid}" ] ; then logger -t scripts-vif "VM UUID ${vm_uuid}" fi local vif_uuid=$(xenstore-read "${PRIVATE}/vif-uuid" 2>/dev/null) if [ -n "${vif_uuid}" ] ; then logger -t scripts-vif "VIF UUID ${vif_uuid}" fi if [ -n "${vif_uuid}" -a -n "${vm_uuid}" ] ; then logger -t scripts-vif "Calling VIF hotplug hook for VM ${vm_uuid}, VIF ${vif_uuid}" /etc/xapi.d/vif-hotplug -action "${action}" -vifuuid "${vif_uuid}" -vmuuid "${vm_uuid}" fi fi } NETWORK_MODE=$(cat /etc/xensource/network.conf) ACTION=$1 # Older versions of XenServer do not pass in the type as an argument if [[ $# -lt 2 ]]; then TYPE=vif else TYPE=$2 fi case $NETWORK_MODE in bridge|openvswitch) ;; vswitch) NETWORK_MODE=openvswitch ;; *) logger -t scripts-vif "Unknown network mode $NETWORK_MODE" exit 1 ;; esac case ${TYPE} in vif) if [ -z ${XENBUS_PATH} ]; then DOMID=$3 DEVID=$4 else DOMID=`echo ${XENBUS_PATH} | cut -f 3 -d '/'` DEVID=`echo ${XENBUS_PATH} | cut -f 4 -d '/'` fi dev=vif${DOMID}.${DEVID} ;; tap) dev=$INTERFACE DOMID=`echo ${dev#tap} | cut -f 1 -d '.'` DEVID=`echo ${dev#tap} | cut -f 2 -d '.'` ;; *) logger -t scripts-vif "unknown interface type ${TYPE}" exit 1 ;; esac XAPI=/xapi/${DOMID}/hotplug/vif/${DEVID} HOTPLUG=/xapi/${DOMID}/hotplug/vif/${DEVID} PRIVATE=/xapi/${DOMID}/private/vif/${DEVID} logger -t scripts-vif "Called as \"$@\" domid:$DOMID devid:$DEVID mode:$NETWORK_MODE" case "${ACTION}" in online) if [ "${TYPE}" = "vif" ] ; then handle_ethtool rx handle_ethtool tx handle_ethtool sg handle_ethtool tso handle_ethtool ufo handle_ethtool gso handle_mtu add_to_bridge handle_promiscuous xenstore-write "${HOTPLUG}/vif" "${dev}" xenstore-write "${HOTPLUG}/hotplug" "online" # xs-xen.pq.hq:91e986b8e49f netback-wait-for-hotplug xenstore-write "/local/domain/0/backend/vif/${DOMID}/${DEVID}/hotplug-status" "connected" call_hook_script $DOMID "${ACTION}" fi ;; add) if [ "${TYPE}" = "tap" ] ; then add_to_bridge fi ;; remove) if [ "${TYPE}" = "vif" ] ;then xenstore-rm "${HOTPLUG}/hotplug" call_hook_script $DOMID "${ACTION}" fi logger -t scripts-vif "${dev} has been removed" remove_from_bridge ;; move) if [ "${TYPE}" = "vif" ] ;then add_to_bridge fi esac openvswitch-2.0.1+git20140120/xenserver/openvswitch-xen.spec.in000066400000000000000000000442131226605124000241210ustar00rootroot00000000000000# Spec file for Open vSwitch. # Copyright (C) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. This file is offered as-is, # without warranty of any kind. # When building, the rpmbuild command line should define # openvswitch_version, kernel_name, kernel_version, and kernel_flavor # using -D arguments. # for example: # # rpmbuild -D "openvswitch_version 1.1.0+build123" # -D "kernel_name NAME-xen" # -D "kernel_version 2.6.32.12-0.7.1.xs5.6.100.323.170596" # -D "kernel_flavor xen" # -bb /usr/src/redhat/SPECS/openvswitch-xen.spec %if %{?openvswitch_version:0}%{!?openvswitch_version:1} %define openvswitch_version @VERSION@ %endif %if %{?kernel_name:0}%{!?kernel_name:1} %define kernel %(rpm -qa 'kernel*xen-devel' | head -1) %define kernel_name %(rpm -q --queryformat "%%{Name}" %{kernel} | sed 's/-devel//' | sed 's/kernel-//') %define kernel_version %(rpm -q --queryformat "%%{Version}-%%{Release}" %{kernel}) %define kernel_flavor xen %endif %define xen_version %{kernel_version}%{kernel_flavor} # bump this when breaking compatibility with userspace %define module_abi_version 0 # build-supplemental-pack.sh requires this naming for kernel module packages %define module_package modules-%{kernel_flavor}-%{kernel_version} Name: openvswitch Summary: Open vSwitch daemon/database/utilities Group: System Environment/Daemons URL: http://www.openvswitch.org/ Vendor: Nicira, Inc. Version: %{openvswitch_version} License: ASL 2.0 Release: 1 Source: openvswitch-%{openvswitch_version}.tar.gz Buildroot: /tmp/openvswitch-xen-rpm Requires: openvswitch.ko.%{module_abi_version} %description Open vSwitch provides standard network bridging functions augmented with support for the OpenFlow protocol for remote per-flow control of traffic. %package %{module_package} Summary: Open vSwitch kernel module Group: System Environment/Kernel License: GPLv2 Provides: %{name}-modules-%{kernel_flavor} = %{kernel_version}, openvswitch.ko.%{module_abi_version} Requires: kernel-%{kernel_name} = %{kernel_version} %description %{module_package} Open vSwitch Linux kernel module compiled against kernel version %{xen_version}. %prep %setup -q -n openvswitch-%{openvswitch_version} %build ./configure --prefix=/usr --sysconfdir=/etc --localstatedir=%{_localstatedir} --with-linux=/lib/modules/%{xen_version}/build --enable-ssl make %{_smp_mflags} %install rm -rf $RPM_BUILD_ROOT make install DESTDIR=$RPM_BUILD_ROOT install -d -m 755 $RPM_BUILD_ROOT/etc install -d -m 755 $RPM_BUILD_ROOT/etc/init.d install -m 755 xenserver/etc_init.d_openvswitch \ $RPM_BUILD_ROOT/etc/init.d/openvswitch install -m 755 xenserver/etc_init.d_openvswitch-xapi-update \ $RPM_BUILD_ROOT/etc/init.d/openvswitch-xapi-update install -d -m 755 $RPM_BUILD_ROOT/etc/sysconfig install -d -m 755 $RPM_BUILD_ROOT/etc/logrotate.d install -m 755 xenserver/etc_logrotate.d_openvswitch \ $RPM_BUILD_ROOT/etc/logrotate.d/openvswitch install -d -m 755 $RPM_BUILD_ROOT/etc/profile.d install -m 755 xenserver/etc_profile.d_openvswitch.sh \ $RPM_BUILD_ROOT/etc/profile.d/openvswitch.sh install -d -m 755 $RPM_BUILD_ROOT/etc/xapi.d/plugins install -m 755 xenserver/etc_xapi.d_plugins_openvswitch-cfg-update \ $RPM_BUILD_ROOT/etc/xapi.d/plugins/openvswitch-cfg-update install -d -m 755 $RPM_BUILD_ROOT/usr/share/openvswitch/scripts install -m 755 xenserver/opt_xensource_libexec_interface-reconfigure \ $RPM_BUILD_ROOT/usr/share/openvswitch/scripts/interface-reconfigure install -m 644 xenserver/opt_xensource_libexec_InterfaceReconfigure.py \ $RPM_BUILD_ROOT/usr/share/openvswitch/scripts/InterfaceReconfigure.py install -m 644 xenserver/opt_xensource_libexec_InterfaceReconfigureBridge.py \ $RPM_BUILD_ROOT/usr/share/openvswitch/scripts/InterfaceReconfigureBridge.py install -m 644 xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py \ $RPM_BUILD_ROOT/usr/share/openvswitch/scripts/InterfaceReconfigureVswitch.py install -m 755 xenserver/etc_xensource_scripts_vif \ $RPM_BUILD_ROOT/usr/share/openvswitch/scripts/vif install -m 755 xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync \ $RPM_BUILD_ROOT/usr/share/openvswitch/scripts/ovs-xapi-sync install -m 755 xenserver/usr_share_openvswitch_scripts_sysconfig.template \ $RPM_BUILD_ROOT/usr/share/openvswitch/scripts/sysconfig.template install -d -m 755 $RPM_BUILD_ROOT/usr/lib/xsconsole/plugins-base install -m 644 \ xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py \ $RPM_BUILD_ROOT/usr/lib/xsconsole/plugins-base/XSFeatureVSwitch.py install -d -m 755 $RPM_BUILD_ROOT/lib/modules/%{xen_version}/extra/openvswitch find datapath/linux -name *.ko -exec install -m 755 \{\} $RPM_BUILD_ROOT/lib/modules/%{xen_version}/extra/openvswitch \; install python/compat/uuid.py $RPM_BUILD_ROOT/usr/share/openvswitch/python install python/compat/argparse.py $RPM_BUILD_ROOT/usr/share/openvswitch/python install -d -m 755 $RPM_BUILD_ROOT/etc/xensource/bugtool cp -rf $RPM_BUILD_ROOT/usr/share/openvswitch/bugtool-plugins/* $RPM_BUILD_ROOT/etc/xensource/bugtool # Get rid of stuff we don't want to make RPM happy. rm \ $RPM_BUILD_ROOT/usr/bin/ovs-benchmark \ $RPM_BUILD_ROOT/usr/bin/ovs-controller \ $RPM_BUILD_ROOT/usr/bin/ovs-l3ping \ $RPM_BUILD_ROOT/usr/bin/ovs-pki \ $RPM_BUILD_ROOT/usr/bin/ovs-test \ $RPM_BUILD_ROOT/usr/share/man/man1/ovs-benchmark.1 \ $RPM_BUILD_ROOT/usr/share/man/man8/ovs-controller.8 \ $RPM_BUILD_ROOT/usr/share/man/man8/ovs-l3ping.8 \ $RPM_BUILD_ROOT/usr/share/man/man8/ovs-pki.8 \ $RPM_BUILD_ROOT/usr/share/man/man8/ovs-test.8 install -d -m 755 $RPM_BUILD_ROOT/var/lib/openvswitch %clean rm -rf $RPM_BUILD_ROOT %post # A list of Citrix XenServer scripts that we might need to replace # with our own versions. scripts=" /etc/xensource/scripts/vif /opt/xensource/libexec/InterfaceReconfigure.py /opt/xensource/libexec/InterfaceReconfigureBridge.py /opt/xensource/libexec/InterfaceReconfigureVswitch.py /opt/xensource/libexec/interface-reconfigure" # Calculate into $md5sums a comma-separated set of md5sums of the # Citrix XenServer scripts that we might need to replace. We might be # upgrading an older version of the package that moved the files out # of the way, so we need to look for the files in those out-of-the-way # locations first. md5sums= for script in $scripts; do b=$(basename "$script") if test -e /usr/lib/openvswitch/xs-saved/"$b"; then f=/usr/lib/openvswitch/xs-saved/"$b" elif test -e /usr/lib/openvswitch/xs-original/"$b"; then f=/usr/lib/openvswitch/xs-original/"$b" elif test -e "$script" && test ! -h "$script"; then f=$script else printf "\n$script: not found\n" f=/dev/null fi md5sums="$md5sums,$(md5sum $f | awk '{print $1}')" done md5sums=${md5sums#,} # Now check the md5sums against the known sets of md5sums: # # - If they are known to be a version of XenServer scripts that we should # replace, we replace them (by putting $scripts into $replace_files). # # - Otherwise, we guess that it's better not to replace them, because the # improvements that our versions of the scripts provide are minimal, so # it's better to avoid possibly breaking any changes made upstream by # Citrix. case $md5sums in cf09a68d9f8b434e79a4c83b01a3bb4b,395866df1b0b20c12c4dd2f7de0ecdb4,9d493545ae81463239d3162cbc798852,862d0939b441de9264a900628e950fe9,21f85db25599d7f026cd489385d58aa6) keep_files= replace_files=$scripts printf "\nVerified host scripts from XenServer 6.0.0.\n" ;; c5f48246577a17cf1b971fb5ce4e920b,2e2c912f86f9c536c89adc34ff3c2b2b,28d3ff72d72bdec4f37d70699f5edb76,67e1d0af16fc1ddf10009c5c063ad2ba,f3feff30aa3b3f8b514664a96a8dc0ab) keep_files= replace_files=$scripts printf "\nVerified host scripts from XenServer 5.6-SP2.\n" ;; c5f48246577a17cf1b971fb5ce4e920b,2e2c912f86f9c536c89adc34ff3c2b2b,28d3ff72d72bdec4f37d70699f5edb76,67e1d0af16fc1ddf10009c5c063ad2ba,24bae6906d182ba47668174f8e480cc6) keep_files= replace_files=$scripts printf "\nVerified host scripts from XenServer 5.6-FP1.\n" ;; *) keep_files=$scripts replace_files= cat </dev/null 2>&1; then :; else cat >>/etc/sysctl.conf < /dev/null fi # Create default or update existing /etc/sysconfig/openvswitch. SYSCONFIG=/etc/sysconfig/openvswitch TEMPLATE=/usr/share/openvswitch/scripts/sysconfig.template if [ ! -e $SYSCONFIG ]; then cp $TEMPLATE $SYSCONFIG else for var in $(awk -F'[ :]' '/^# [_A-Z0-9]+:/{print $2}' $TEMPLATE) do if ! grep $var $SYSCONFIG >/dev/null 2>&1; then echo >> $SYSCONFIG sed -n "/$var:/,/$var=/p" $TEMPLATE >> $SYSCONFIG fi done fi # Deliberately break %postun in broken OVS builds that revert original # XenServer scripts during rpm -U by moving the directory where it thinks # they are saved. if [ -d /usr/lib/openvswitch/xs-original ]; then mkdir -p /usr/lib/openvswitch/xs-saved mv /usr/lib/openvswitch/xs-original/* /usr/lib/openvswitch/xs-saved/ && rmdir /usr/lib/openvswitch/xs-original fi # Replace XenServer files by our versions. mkdir -p /usr/lib/openvswitch/xs-saved \ || printf "Could not create script backup directory.\n" for f in $replace_files; do s=$(basename "$f") t=$(readlink "$f") if [ -f "$f" ] && [ "$t" != "/usr/share/openvswitch/scripts/$s" ]; then mv "$f" /usr/lib/openvswitch/xs-saved/ \ || printf "Could not save original XenServer $s script\n" ln -s "/usr/share/openvswitch/scripts/$s" "$f" \ || printf "Could not link to Open vSwitch $s script\n" fi done # Clean up dangling symlinks to removed OVS replacement scripts no longer # provided by OVS. Any time a replacement script is removed from OVS, # it should be added here to ensure correct reversion from old versions of # OVS that don't clean up dangling symlinks during the uninstall phase. for orig in /usr/sbin/xen-bugtool $keep_files; do saved=/usr/lib/openvswitch/xs-saved/$(basename "$orig") [ -e "$saved" ] && mv -f "$saved" "$orig" done # Ensure all required services are set to run for s in openvswitch openvswitch-xapi-update; do if chkconfig --list $s >/dev/null 2>&1; then chkconfig --del $s || printf "Could not remove $s init script.\n" fi chkconfig --add $s || printf "Could not add $s init script.\n" chkconfig $s on || printf "Could not enable $s init script.\n" done if [ "$1" = "1" ]; then # $1 = 1 for install # Configure system to use Open vSwitch /opt/xensource/bin/xe-switch-network-backend vswitch else # $1 = 2 for upgrade mode=$(cat /etc/xensource/network.conf) if [ "$mode" != "vswitch" ] && [ "$mode" != "openvswitch" ]; then printf "\nThe server is not configured to run Open vSwitch. To run in\n" printf "vswitch mode, you must run the following command:\n\n" printf "\txe-switch-network-backend vswitch" printf "\n\n" fi fi %posttrans %{module_package} # Ensure that modprobe will find our modules. # # This has to be in %posttrans instead of %post because older versions # installed modules into a different directory and "rpm -U" runs the # new version's %post before removing the old version's files, so if # we use %post then depmod may find the old versions that are about to # be removed. depmod %{xen_version} mode=$(cat /etc/xensource/network.conf) if [ "$mode" = "vswitch" ] || [ "$mode" = "openvswitch" ]; then printf "\nTo use the newly installed Open vSwitch kernel module, you\n" printf "will either have to reboot the hypervisor or follow any\n" printf "workarounds provided by your administration guide. Failure to do\n" printf "so may result in incorrect operation." printf "\n\n" fi %preun if [ "$1" = "0" ]; then # $1 = 0 for uninstall # Configure system to use bridge /opt/xensource/bin/xe-switch-network-backend bridge # The "openvswitch" service should have been removed from # "xe-switch-network-backend bridge". for s in openvswitch openvswitch-xapi-update; do if chkconfig --list $s >/dev/null 2>&1; then chkconfig --del $s || printf "Could not remove $s init script." fi done fi %postun # Restore original XenServer scripts if the OVS equivalent no longer exists. # This works both in the upgrade and erase cases. # This lists every file that every version of OVS has ever replaced. Never # remove old files that OVS no longer replaces, or upgrades from old versions # will fail to restore the XS originals, leaving the system in a broken state. # Also be sure to add removed script paths to the %post scriptlet above to # prevent the same problem when upgrading from old versions of OVS that lack # this restore-on-upgrade logic. for f in \ /etc/xensource/scripts/vif \ /usr/sbin/xen-bugtool \ /opt/xensource/libexec/interface-reconfigure \ /opt/xensource/libexec/InterfaceReconfigure.py \ /opt/xensource/libexec/InterfaceReconfigureBridge.py \ /opt/xensource/libexec/InterfaceReconfigureVswitch.py do # Only revert dangling symlinks. if [ -h "$f" ] && [ ! -e "$f" ]; then s=$(basename "$f") if [ ! -f "/usr/lib/openvswitch/xs-saved/$s" ]; then printf "Original XenServer $s script not present in /usr/lib/openvswitch/xs-saved\n" >&2 printf "Could not restore original XenServer script.\n" >&2 else (rm -f "$f" \ && mv "/usr/lib/openvswitch/xs-saved/$s" "$f") \ || printf "Could not restore original XenServer $s script.\n" >&2 fi fi done if [ "$1" = "0" ]; then # $1 = 0 for uninstall rm -f /usr/lib/xsconsole/plugins-base/XSFeatureVSwitch.pyc \ /usr/lib/xsconsole/plugins-base/XSFeatureVSwitch.pyo rm -f /usr/share/openvswitch/scripts/InterfaceReconfigure.pyc \ /usr/share/openvswitch/scripts/InterfaceReconfigure.pyo \ /usr/share/openvswitch/scripts/InterfaceReconfigureBridge.pyc \ /usr/share/openvswitch/scripts/InterfaceReconfigureBridge.pyo \ /usr/share/openvswitch/scripts/InterfaceReconfigureVSwitch.pyc \ /usr/share/openvswitch/scripts/InterfaceReconfigureVSwitch.pyo # Remove all configuration files rm -f /etc/openvswitch/conf.db rm -f /etc/sysconfig/openvswitch rm -f /etc/openvswitch/vswitchd.cacert # Remove saved XenServer scripts directory, but only if it's empty rmdir -p /usr/lib/openvswitch/xs-saved 2>/dev/null fi exit 0 %files %defattr(-,root,root) /etc/init.d/openvswitch /etc/init.d/openvswitch-xapi-update /etc/xapi.d/plugins/openvswitch-cfg-update /etc/xensource/bugtool/* /etc/logrotate.d/openvswitch /etc/profile.d/openvswitch.sh /usr/share/openvswitch/python/ /usr/share/openvswitch/bugtool-plugins/* /usr/share/openvswitch/scripts/ovs-check-dead-ifs /usr/share/openvswitch/scripts/ovs-xapi-sync /usr/share/openvswitch/scripts/interface-reconfigure /usr/share/openvswitch/scripts/InterfaceReconfigure.py /usr/share/openvswitch/scripts/InterfaceReconfigureBridge.py /usr/share/openvswitch/scripts/InterfaceReconfigureVswitch.py /usr/share/openvswitch/scripts/vif /usr/share/openvswitch/scripts/sysconfig.template /usr/share/openvswitch/scripts/ovs-bugtool-* /usr/share/openvswitch/scripts/ovs-save /usr/share/openvswitch/scripts/ovs-ctl /usr/share/openvswitch/scripts/ovs-lib /usr/share/openvswitch/vswitch.ovsschema /usr/sbin/ovs-bugtool /usr/sbin/ovs-vlan-bug-workaround /usr/sbin/ovs-vswitchd /usr/sbin/ovsdb-server /usr/bin/ovs-appctl /usr/bin/ovs-dpctl /usr/bin/ovs-dpctl-top /usr/bin/ovs-ofctl /usr/bin/ovs-parse-backtrace /usr/bin/ovs-pcap /usr/bin/ovs-tcpundump /usr/bin/ovs-vlan-test /usr/bin/ovs-vsctl /usr/bin/ovsdb-client /usr/bin/ovsdb-tool /usr/lib/xsconsole/plugins-base/XSFeatureVSwitch.py /usr/share/man/man1/ovsdb-client.1.gz /usr/share/man/man1/ovsdb-server.1.gz /usr/share/man/man1/ovsdb-tool.1.gz /usr/share/man/man5/ovs-vswitchd.conf.db.5.gz /usr/share/man/man8/ovs-appctl.8.gz /usr/share/man/man8/ovs-bugtool.8.gz /usr/share/man/man8/ovs-ctl.8.gz /usr/share/man/man8/ovs-dpctl.8.gz /usr/share/man/man8/ovs-dpctl-top.8.gz /usr/share/man/man8/ovs-ofctl.8.gz /usr/share/man/man8/ovs-parse-backtrace.8.gz /usr/share/man/man1/ovs-pcap.1.gz /usr/share/man/man1/ovs-tcpundump.1.gz /usr/share/man/man8/ovs-vlan-bug-workaround.8.gz /usr/share/man/man8/ovs-vlan-test.8.gz /usr/share/man/man8/ovs-vsctl.8.gz /usr/share/man/man8/ovs-vswitchd.8.gz /var/lib/openvswitch /var/log/openvswitch %exclude /usr/lib/xsconsole/plugins-base/*.py[co] %exclude /usr/share/openvswitch/scripts/*.py[co] %exclude /usr/share/openvswitch/python/*.py[co] %exclude /usr/share/openvswitch/python/ovs/*.py[co] %exclude /usr/share/openvswitch/python/ovs/db/*.py[co] %files %{module_package} /lib/modules/%{xen_version}/extra/openvswitch/openvswitch.ko openvswitch-2.0.1+git20140120/xenserver/opt_xensource_libexec_InterfaceReconfigure.py000066400000000000000000000777671226605124000306360ustar00rootroot00000000000000# Copyright (c) 2008,2009 Citrix Systems, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation; version 2.1 only. with the special # exception on linking described in file LICENSE. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # import sys import syslog import os from xml.dom.minidom import getDOMImplementation from xml.dom.minidom import parse as parseXML the_root_prefix = "" def root_prefix(): """Returns a string to prefix to all file name references, which is useful for testing.""" return the_root_prefix def set_root_prefix(prefix): global the_root_prefix the_root_prefix = prefix log_destination = "syslog" def get_log_destination(): """Returns the current log destination. 'syslog' means "log to syslog". 'stderr' means "log to stderr".""" return log_destination def set_log_destination(dest): global log_destination log_destination = dest # # Logging. # def log(s): if get_log_destination() == 'syslog': syslog.syslog(s) else: print >>sys.stderr, s # # Exceptions. # class Error(Exception): def __init__(self, msg): Exception.__init__(self) self.msg = msg # # Run external utilities # def run_command(command): log("Running command: " + ' '.join(command)) rc = os.spawnl(os.P_WAIT, root_prefix() + command[0], *command) if rc != 0: log("Command failed %d: " % rc + ' '.join(command)) return False return True # # Configuration File Handling. # class ConfigurationFile(object): """Write a file, tracking old and new versions. Supports writing a new version of a file and applying and reverting those changes. """ __STATE = {"OPEN":"OPEN", "NOT-APPLIED":"NOT-APPLIED", "APPLIED":"APPLIED", "REVERTED":"REVERTED", "COMMITTED": "COMMITTED"} def __init__(self, path): dirname,basename = os.path.split(path) self.__state = self.__STATE['OPEN'] self.__children = [] self.__path = os.path.join(dirname, basename) self.__oldpath = os.path.join(dirname, "." + basename + ".xapi-old") self.__newpath = os.path.join(dirname, "." + basename + ".xapi-new") self.__f = open(self.__newpath, "w") def attach_child(self, child): self.__children.append(child) def path(self): return self.__path def readlines(self): try: return open(self.path()).readlines() except: return "" def write(self, args): if self.__state != self.__STATE['OPEN']: raise Error("Attempt to write to file in state %s" % self.__state) self.__f.write(args) def close(self): if self.__state != self.__STATE['OPEN']: raise Error("Attempt to close file in state %s" % self.__state) self.__f.close() self.__state = self.__STATE['NOT-APPLIED'] def changed(self): if self.__state != self.__STATE['NOT-APPLIED']: raise Error("Attempt to compare file in state %s" % self.__state) return True def apply(self): if self.__state != self.__STATE['NOT-APPLIED']: raise Error("Attempt to apply configuration from state %s" % self.__state) for child in self.__children: child.apply() log("Applying changes to %s configuration" % self.__path) # Remove previous backup. if os.access(self.__oldpath, os.F_OK): os.unlink(self.__oldpath) # Save current configuration. if os.access(self.__path, os.F_OK): os.link(self.__path, self.__oldpath) os.unlink(self.__path) # Apply new configuration. assert(os.path.exists(self.__newpath)) os.link(self.__newpath, self.__path) # Remove temporary file. os.unlink(self.__newpath) self.__state = self.__STATE['APPLIED'] def revert(self): if self.__state != self.__STATE['APPLIED']: raise Error("Attempt to revert configuration from state %s" % self.__state) for child in self.__children: child.revert() log("Reverting changes to %s configuration" % self.__path) # Remove existing new configuration if os.access(self.__newpath, os.F_OK): os.unlink(self.__newpath) # Revert new configuration. if os.access(self.__path, os.F_OK): os.link(self.__path, self.__newpath) os.unlink(self.__path) # Revert to old configuration. if os.access(self.__oldpath, os.F_OK): os.link(self.__oldpath, self.__path) os.unlink(self.__oldpath) # Leave .*.xapi-new as an aid to debugging. self.__state = self.__STATE['REVERTED'] def commit(self): if self.__state != self.__STATE['APPLIED']: raise Error("Attempt to commit configuration from state %s" % self.__state) for child in self.__children: child.commit() log("Committing changes to %s configuration" % self.__path) if os.access(self.__oldpath, os.F_OK): os.unlink(self.__oldpath) if os.access(self.__newpath, os.F_OK): os.unlink(self.__newpath) self.__state = self.__STATE['COMMITTED'] # # Helper functions for encoding/decoding database attributes to/from XML. # def _str_to_xml(xml, parent, tag, val): e = xml.createElement(tag) parent.appendChild(e) v = xml.createTextNode(val) e.appendChild(v) def _str_from_xml(n): def getText(nodelist): rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc return getText(n.childNodes).strip() def _bool_to_xml(xml, parent, tag, val): if val: _str_to_xml(xml, parent, tag, "True") else: _str_to_xml(xml, parent, tag, "False") def _bool_from_xml(n): s = _str_from_xml(n) if s == "True": return True elif s == "False": return False else: raise Error("Unknown boolean value %s" % s) def _strlist_to_xml(xml, parent, ltag, itag, val): e = xml.createElement(ltag) parent.appendChild(e) for v in val: c = xml.createElement(itag) e.appendChild(c) cv = xml.createTextNode(v) c.appendChild(cv) def _strlist_from_xml(n, ltag, itag): ret = [] for n in n.childNodes: if n.nodeName == itag: ret.append(_str_from_xml(n)) return ret def _map_to_xml(xml, parent, tag, val, attrs): e = xml.createElement(tag) parent.appendChild(e) for n,v in val.items(): if n in attrs: _str_to_xml(xml, e, n, v) else: log("Unknown other-config attribute: %s" % n) def _map_from_xml(n, attrs): ret = {} for n in n.childNodes: if n.nodeName in attrs: ret[n.nodeName] = _str_from_xml(n) return ret def _otherconfig_to_xml(xml, parent, val, attrs): return _map_to_xml(xml, parent, "other_config", val, attrs) def _otherconfig_from_xml(n, attrs): return _map_from_xml(n, attrs) # # Definitions of the database objects (and their attributes) used by interface-reconfigure. # # Each object is defined by a dictionary mapping an attribute name in # the xapi database to a tuple containing two items: # - a function which takes this attribute and encodes it as XML. # - a function which takes XML and decocdes it into a value. # # other-config attributes are specified as a simple array of strings _PIF_XML_TAG = "pif" _VLAN_XML_TAG = "vlan" _TUNNEL_XML_TAG = "tunnel" _BOND_XML_TAG = "bond" _NETWORK_XML_TAG = "network" _POOL_XML_TAG = "pool" _ETHTOOL_OTHERCONFIG_ATTRS = ['ethtool-%s' % x for x in 'autoneg', 'speed', 'duplex', 'rx', 'tx', 'sg', 'tso', 'ufo', 'gso', 'gro', 'lro' ] _PIF_OTHERCONFIG_ATTRS = [ 'domain', 'peerdns', 'defaultroute', 'mtu', 'static-routes' ] + \ [ 'bond-%s' % x for x in 'mode', 'miimon', 'downdelay', 'updelay', 'use_carrier', 'hashing-algorithm' ] + \ [ 'vlan-bug-workaround' ] + \ _ETHTOOL_OTHERCONFIG_ATTRS _PIF_ATTRS = { 'uuid': (_str_to_xml,_str_from_xml), 'management': (_bool_to_xml,_bool_from_xml), 'network': (_str_to_xml,_str_from_xml), 'device': (_str_to_xml,_str_from_xml), 'bond_master_of': (lambda x, p, t, v: _strlist_to_xml(x, p, 'bond_master_of', 'slave', v), lambda n: _strlist_from_xml(n, 'bond_master_of', 'slave')), 'bond_slave_of': (_str_to_xml,_str_from_xml), 'VLAN': (_str_to_xml,_str_from_xml), 'VLAN_master_of': (_str_to_xml,_str_from_xml), 'VLAN_slave_of': (lambda x, p, t, v: _strlist_to_xml(x, p, 'VLAN_slave_of', 'master', v), lambda n: _strlist_from_xml(n, 'VLAN_slave_Of', 'master')), 'tunnel_access_PIF_of': (lambda x, p, t, v: _strlist_to_xml(x, p, 'tunnel_access_PIF_of', 'pif', v), lambda n: _strlist_from_xml(n, 'tunnel_access_PIF_of', 'pif')), 'tunnel_transport_PIF_of': (lambda x, p, t, v: _strlist_to_xml(x, p, 'tunnel_transport_PIF_of', 'pif', v), lambda n: _strlist_from_xml(n, 'tunnel_transport_PIF_of', 'pif')), 'ip_configuration_mode': (_str_to_xml,_str_from_xml), 'IP': (_str_to_xml,_str_from_xml), 'netmask': (_str_to_xml,_str_from_xml), 'gateway': (_str_to_xml,_str_from_xml), 'DNS': (_str_to_xml,_str_from_xml), 'MAC': (_str_to_xml,_str_from_xml), 'other_config': (lambda x, p, t, v: _otherconfig_to_xml(x, p, v, _PIF_OTHERCONFIG_ATTRS), lambda n: _otherconfig_from_xml(n, _PIF_OTHERCONFIG_ATTRS)), # Special case: We write the current value # PIF.currently-attached to the cache but since it will # not be valid when we come to use the cache later # (i.e. after a reboot) we always read it as False. 'currently_attached': (_bool_to_xml, lambda n: False), } _VLAN_ATTRS = { 'uuid': (_str_to_xml,_str_from_xml), 'tagged_PIF': (_str_to_xml,_str_from_xml), 'untagged_PIF': (_str_to_xml,_str_from_xml), } _TUNNEL_ATTRS = { 'uuid': (_str_to_xml,_str_from_xml), 'access_PIF': (_str_to_xml,_str_from_xml), 'transport_PIF': (_str_to_xml,_str_from_xml), } _BOND_ATTRS = { 'uuid': (_str_to_xml,_str_from_xml), 'master': (_str_to_xml,_str_from_xml), 'slaves': (lambda x, p, t, v: _strlist_to_xml(x, p, 'slaves', 'slave', v), lambda n: _strlist_from_xml(n, 'slaves', 'slave')), } _NETWORK_OTHERCONFIG_ATTRS = [ 'mtu', 'static-routes', 'vswitch-controller-fail-mode', 'vswitch-disable-in-band' ] \ + _ETHTOOL_OTHERCONFIG_ATTRS _NETWORK_ATTRS = { 'uuid': (_str_to_xml,_str_from_xml), 'bridge': (_str_to_xml,_str_from_xml), 'MTU': (_str_to_xml,_str_from_xml), 'PIFs': (lambda x, p, t, v: _strlist_to_xml(x, p, 'PIFs', 'PIF', v), lambda n: _strlist_from_xml(n, 'PIFs', 'PIF')), 'other_config': (lambda x, p, t, v: _otherconfig_to_xml(x, p, v, _NETWORK_OTHERCONFIG_ATTRS), lambda n: _otherconfig_from_xml(n, _NETWORK_OTHERCONFIG_ATTRS)), } _POOL_OTHERCONFIG_ATTRS = ['vswitch-controller-fail-mode'] _POOL_ATTRS = { 'other_config': (lambda x, p, t, v: _otherconfig_to_xml(x, p, v, _POOL_OTHERCONFIG_ATTRS), lambda n: _otherconfig_from_xml(n, _POOL_OTHERCONFIG_ATTRS)), } # # Database Cache object # _db = None def db(): assert(_db is not None) return _db def db_init_from_cache(cache): global _db assert(_db is None) _db = DatabaseCache(cache_file=cache) def db_init_from_xenapi(session): global _db assert(_db is None) _db = DatabaseCache(session_ref=session) class DatabaseCache(object): def __read_xensource_inventory(self): filename = root_prefix() + "/etc/xensource-inventory" f = open(filename, "r") lines = [x.strip("\n") for x in f.readlines()] f.close() defs = [ (l[:l.find("=")], l[(l.find("=") + 1):]) for l in lines ] defs = [ (a, b.strip("'")) for (a,b) in defs ] return dict(defs) def __pif_on_host(self,pif): return self.__pifs.has_key(pif) def __get_pif_records_from_xapi(self, session, host): self.__pifs = {} for (p,rec) in session.xenapi.PIF.get_all_records().items(): if rec['host'] != host: continue self.__pifs[p] = {} for f in _PIF_ATTRS: self.__pifs[p][f] = rec[f] self.__pifs[p]['other_config'] = {} for f in _PIF_OTHERCONFIG_ATTRS: if not rec['other_config'].has_key(f): continue self.__pifs[p]['other_config'][f] = rec['other_config'][f] def __get_vlan_records_from_xapi(self, session): self.__vlans = {} for (v,rec) in session.xenapi.VLAN.get_all_records().items(): if not self.__pif_on_host(rec['untagged_PIF']): continue self.__vlans[v] = {} for f in _VLAN_ATTRS: self.__vlans[v][f] = rec[f] def __get_tunnel_records_from_xapi(self, session): self.__tunnels = {} for t in session.xenapi.tunnel.get_all(): rec = session.xenapi.tunnel.get_record(t) if not self.__pif_on_host(rec['transport_PIF']): continue self.__tunnels[t] = {} for f in _TUNNEL_ATTRS: self.__tunnels[t][f] = rec[f] def __get_bond_records_from_xapi(self, session): self.__bonds = {} for (b,rec) in session.xenapi.Bond.get_all_records().items(): if not self.__pif_on_host(rec['master']): continue self.__bonds[b] = {} for f in _BOND_ATTRS: self.__bonds[b][f] = rec[f] def __get_network_records_from_xapi(self, session): self.__networks = {} for (n,rec) in session.xenapi.network.get_all_records().items(): self.__networks[n] = {} for f in _NETWORK_ATTRS: if f == "PIFs": # drop PIFs on other hosts self.__networks[n][f] = [p for p in rec[f] if self.__pif_on_host(p)] elif f == "MTU" and f not in rec: # XenServer 5.5 network records did not have an # MTU field, so allow this to be missing. pass else: self.__networks[n][f] = rec[f] self.__networks[n]['other_config'] = {} for f in _NETWORK_OTHERCONFIG_ATTRS: if not rec['other_config'].has_key(f): continue self.__networks[n]['other_config'][f] = rec['other_config'][f] def __get_pool_records_from_xapi(self, session): self.__pools = {} for p in session.xenapi.pool.get_all(): rec = session.xenapi.pool.get_record(p) self.__pools[p] = {} for f in _POOL_ATTRS: self.__pools[p][f] = rec[f] for f in _POOL_OTHERCONFIG_ATTRS: if rec['other_config'].has_key(f): self.__pools[p]['other_config'][f] = rec['other_config'][f] def __to_xml(self, xml, parent, key, ref, rec, attrs): """Encode a database object as XML""" e = xml.createElement(key) parent.appendChild(e) if ref: e.setAttribute('ref', ref) for n,v in rec.items(): if attrs.has_key(n): h,_ = attrs[n] h(xml, e, n, v) else: raise Error("Unknown attribute %s" % n) def __from_xml(self, e, attrs): """Decode a database object from XML""" ref = e.attributes['ref'].value rec = {} for n in e.childNodes: if n.nodeName in attrs: _,h = attrs[n.nodeName] rec[n.nodeName] = h(n) return (ref,rec) def __init__(self, session_ref=None, cache_file=None): if session_ref and cache_file: raise Error("can't specify session reference and cache file") if cache_file == None: import XenAPI session = XenAPI.xapi_local() if not session_ref: log("No session ref given on command line, logging in.") session.xenapi.login_with_password("root", "") else: session._session = session_ref try: inventory = self.__read_xensource_inventory() assert(inventory.has_key('INSTALLATION_UUID')) log("host uuid is %s" % inventory['INSTALLATION_UUID']) host = session.xenapi.host.get_by_uuid(inventory['INSTALLATION_UUID']) self.__get_pif_records_from_xapi(session, host) self.__get_pool_records_from_xapi(session) self.__get_tunnel_records_from_xapi(session) self.__get_vlan_records_from_xapi(session) self.__get_bond_records_from_xapi(session) self.__get_network_records_from_xapi(session) finally: if not session_ref: session.xenapi.session.logout() else: log("Loading xapi database cache from %s" % cache_file) xml = parseXML(root_prefix() + cache_file) self.__pifs = {} self.__bonds = {} self.__vlans = {} self.__pools = {} self.__tunnels = {} self.__networks = {} assert(len(xml.childNodes) == 1) toplevel = xml.childNodes[0] assert(toplevel.nodeName == "xenserver-network-configuration") for n in toplevel.childNodes: if n.nodeName == "#text": pass elif n.nodeName == _PIF_XML_TAG: (ref,rec) = self.__from_xml(n, _PIF_ATTRS) self.__pifs[ref] = rec elif n.nodeName == _BOND_XML_TAG: (ref,rec) = self.__from_xml(n, _BOND_ATTRS) self.__bonds[ref] = rec elif n.nodeName == _VLAN_XML_TAG: (ref,rec) = self.__from_xml(n, _VLAN_ATTRS) self.__vlans[ref] = rec elif n.nodeName == _TUNNEL_XML_TAG: (ref,rec) = self.__from_xml(n, _TUNNEL_ATTRS) self.__vlans[ref] = rec elif n.nodeName == _NETWORK_XML_TAG: (ref,rec) = self.__from_xml(n, _NETWORK_ATTRS) self.__networks[ref] = rec elif n.nodeName == _POOL_XML_TAG: (ref,rec) = self.__from_xml(n, _POOL_ATTRS) self.__pools[ref] = rec else: raise Error("Unknown XML element %s" % n.nodeName) def save(self, cache_file): xml = getDOMImplementation().createDocument( None, "xenserver-network-configuration", None) for (ref,rec) in self.__pifs.items(): self.__to_xml(xml, xml.documentElement, _PIF_XML_TAG, ref, rec, _PIF_ATTRS) for (ref,rec) in self.__bonds.items(): self.__to_xml(xml, xml.documentElement, _BOND_XML_TAG, ref, rec, _BOND_ATTRS) for (ref,rec) in self.__vlans.items(): self.__to_xml(xml, xml.documentElement, _VLAN_XML_TAG, ref, rec, _VLAN_ATTRS) for (ref,rec) in self.__tunnels.items(): self.__to_xml(xml, xml.documentElement, _TUNNEL_XML_TAG, ref, rec, _TUNNEL_ATTRS) for (ref,rec) in self.__networks.items(): self.__to_xml(xml, xml.documentElement, _NETWORK_XML_TAG, ref, rec, _NETWORK_ATTRS) for (ref,rec) in self.__pools.items(): self.__to_xml(xml, xml.documentElement, _POOL_XML_TAG, ref, rec, _POOL_ATTRS) temp_file = cache_file + ".%d" % os.getpid() f = open(temp_file, 'w') f.write(xml.toprettyxml()) f.close() os.rename(temp_file, cache_file) def get_pif_by_uuid(self, uuid): pifs = map(lambda (ref,rec): ref, filter(lambda (ref,rec): uuid == rec['uuid'], self.__pifs.items())) if len(pifs) == 0: raise Error("Unknown PIF \"%s\"" % uuid) elif len(pifs) > 1: raise Error("Non-unique PIF \"%s\"" % uuid) return pifs[0] def get_pifs_by_device(self, device): return map(lambda (ref,rec): ref, filter(lambda (ref,rec): rec['device'] == device, self.__pifs.items())) def get_networks_with_bridge(self, bridge): return map(lambda (ref,rec): ref, filter(lambda (ref,rec): rec['bridge'] == bridge, self.__networks.items())) def get_network_by_bridge(self, bridge): #Assumes one network has bridge. try: return self.get_networks_with_bridge(bridge)[0] except KeyError: return None def get_pif_by_bridge(self, bridge): networks = self.get_networks_with_bridge(bridge) if len(networks) == 0: raise Error("No matching network \"%s\"" % bridge) answer = None for network in networks: nwrec = self.get_network_record(network) for pif in nwrec['PIFs']: pifrec = self.get_pif_record(pif) if answer: raise Error("Multiple PIFs on host for network %s" % (bridge)) answer = pif if not answer: raise Error("No PIF on host for network %s" % (bridge)) return answer def get_pif_record(self, pif): if self.__pifs.has_key(pif): return self.__pifs[pif] raise Error("Unknown PIF \"%s\"" % pif) def get_all_pifs(self): return self.__pifs def pif_exists(self, pif): return self.__pifs.has_key(pif) def get_management_pif(self): """ Returns the management pif on host """ all = self.get_all_pifs() for pif in all: pifrec = self.get_pif_record(pif) if pifrec['management']: return pif return None def get_network_record(self, network): if self.__networks.has_key(network): return self.__networks[network] raise Error("Unknown network \"%s\"" % network) def get_bond_record(self, bond): if self.__bonds.has_key(bond): return self.__bonds[bond] else: return None def get_vlan_record(self, vlan): if self.__vlans.has_key(vlan): return self.__vlans[vlan] else: return None def get_pool_record(self): if len(self.__pools) > 0: return self.__pools.values()[0] # # # PIF_OTHERCONFIG_DEFAULTS = {'gro': 'off', 'lro': 'off'} def ethtool_settings(oc, defaults = {}): settings = [] if oc.has_key('ethtool-speed'): val = oc['ethtool-speed'] if val in ["10", "100", "1000"]: settings += ['speed', val] else: log("Invalid value for ethtool-speed = %s. Must be 10|100|1000." % val) if oc.has_key('ethtool-duplex'): val = oc['ethtool-duplex'] if val in ["half", "full"]: settings += ['duplex', val] else: log("Invalid value for ethtool-duplex = %s. Must be half|full." % val) if oc.has_key('ethtool-autoneg'): val = oc['ethtool-autoneg'] if val in ["true", "on"]: settings += ['autoneg', 'on'] elif val in ["false", "off"]: settings += ['autoneg', 'off'] else: log("Invalid value for ethtool-autoneg = %s. Must be on|true|off|false." % val) offload = [] for opt in ("rx", "tx", "sg", "tso", "ufo", "gso", "gro", "lro"): if oc.has_key("ethtool-" + opt): val = oc["ethtool-" + opt] if val in ["true", "on"]: offload += [opt, 'on'] elif val in ["false", "off"]: offload += [opt, 'off'] else: log("Invalid value for ethtool-%s = %s. Must be on|true|off|false." % (opt, val)) elif opt in defaults: offload += [opt, defaults[opt]] return settings,offload # By default the MTU is taken from the Network.MTU setting for VIF, # PIF and Bridge. However it is possible to override this by using # {VIF,PIF,Network}.other-config:mtu. # # type parameter is a string describing the object that the oc parameter # is from. e.g. "PIF", "Network" def mtu_setting(nw, type, oc): mtu = None nwrec = db().get_network_record(nw) if nwrec.has_key('MTU'): mtu = nwrec['MTU'] else: mtu = "1500" if oc.has_key('mtu'): log("Override Network.MTU setting on bridge %s from %s.MTU is %s" % \ (nwrec['bridge'], type, mtu)) mtu = oc['mtu'] if mtu is not None: try: int(mtu) # Check that the value is an integer return mtu except ValueError, x: log("Invalid value for mtu = %s" % mtu) return None # # IP Network Devices -- network devices with IP configuration # def pif_ipdev_name(pif): """Return the ipdev name associated with pif""" pifrec = db().get_pif_record(pif) nwrec = db().get_network_record(pifrec['network']) if nwrec['bridge']: # TODO: sanity check that nwrec['bridgeless'] != 'true' return nwrec['bridge'] else: # TODO: sanity check that nwrec['bridgeless'] == 'true' return pif_netdev_name(pif) # # Bare Network Devices -- network devices without IP configuration # def netdev_exists(netdev): return os.path.exists(root_prefix() + "/sys/class/net/" + netdev) def pif_netdev_name(pif): """Get the netdev name for a PIF.""" pifrec = db().get_pif_record(pif) if pif_is_vlan(pif): return "%(device)s.%(VLAN)s" % pifrec else: return pifrec['device'] # # Bridges # def pif_is_bridged(pif): pifrec = db().get_pif_record(pif) nwrec = db().get_network_record(pifrec['network']) if nwrec['bridge']: # TODO: sanity check that nwrec['bridgeless'] != 'true' return True else: # TODO: sanity check that nwrec['bridgeless'] == 'true' return False def pif_bridge_name(pif): """Return the bridge name of a pif. PIF must be a bridged PIF.""" pifrec = db().get_pif_record(pif) nwrec = db().get_network_record(pifrec['network']) if nwrec['bridge']: return nwrec['bridge'] else: raise Error("PIF %(uuid)s does not have a bridge name" % pifrec) # # Bonded PIFs # def pif_is_bond(pif): pifrec = db().get_pif_record(pif) return len(pifrec['bond_master_of']) > 0 def pif_get_bond_masters(pif): """Returns a list of PIFs which are bond masters of this PIF""" pifrec = db().get_pif_record(pif) bso = pifrec['bond_slave_of'] # bond-slave-of is currently a single reference but in principle a # PIF could be a member of several bonds which are not # concurrently attached. Be robust to this possibility. if not bso or bso == "OpaqueRef:NULL": bso = [] elif not type(bso) == list: bso = [bso] bondrecs = [db().get_bond_record(bond) for bond in bso] bondrecs = [rec for rec in bondrecs if rec] return [bond['master'] for bond in bondrecs] def pif_get_bond_slaves(pif): """Returns a list of PIFs which make up the given bonded pif.""" pifrec = db().get_pif_record(pif) bmo = pifrec['bond_master_of'] if len(bmo) > 1: raise Error("Bond-master-of contains too many elements") if len(bmo) == 0: return [] bondrec = db().get_bond_record(bmo[0]) if not bondrec: raise Error("No bond record for bond master PIF") return bondrec['slaves'] # # VLAN PIFs # def pif_is_vlan(pif): return db().get_pif_record(pif)['VLAN'] != '-1' def pif_get_vlan_slave(pif): """Find the PIF which is the VLAN slave of pif. Returns the 'physical' PIF underneath the a VLAN PIF @pif.""" pifrec = db().get_pif_record(pif) vlan = pifrec['VLAN_master_of'] if not vlan or vlan == "OpaqueRef:NULL": raise Error("PIF is not a VLAN master") vlanrec = db().get_vlan_record(vlan) if not vlanrec: raise Error("No VLAN record found for PIF") return vlanrec['tagged_PIF'] def pif_get_vlan_masters(pif): """Returns a list of PIFs which are VLANs on top of the given pif.""" pifrec = db().get_pif_record(pif) vlans = [db().get_vlan_record(v) for v in pifrec['VLAN_slave_of']] return [v['untagged_PIF'] for v in vlans if v and db().pif_exists(v['untagged_PIF'])] # # Tunnel PIFs # def pif_is_tunnel(pif): return len(db().get_pif_record(pif)['tunnel_access_PIF_of']) > 0 # # Datapath base class # class Datapath(object): """Object encapsulating the actions necessary to (de)configure the datapath for a given PIF. Does not include configuration of the IP address on the ipdev. """ def __init__(self, pif): self._pif = pif @classmethod def rewrite(cls): """Class method called when write action is called. Can be used to update any backend specific configuration.""" pass def configure_ipdev(self, cfg): """Write ifcfg TYPE field for an IPdev, plus any type specific fields to cfg """ raise NotImplementedError def preconfigure(self, parent): """Prepare datapath configuration for PIF, but do not actually apply any changes. Any configuration files should be attached to parent. """ raise NotImplementedError def bring_down_existing(self): """Tear down any existing network device configuration which needs to be undone in order to bring this PIF up. """ raise NotImplementedError def configure(self): """Apply the configuration prepared in the preconfigure stage. Should assume any configuration files changed attached in the preconfigure stage are applied and bring up the necessary devices to provide the datapath for the PIF. Should not bring up the IPdev. """ raise NotImplementedError def post(self): """Called after the IPdev has been brought up. Should do any final setup, including reinstating any devices which were taken down in the bring_down_existing hook. """ raise NotImplementedError def bring_down(self): """Tear down and deconfigure the datapath. Should assume the IPdev has already been brought down. """ raise NotImplementedError def DatapathFactory(): # XXX Need a datapath object for bridgeless PIFs try: network_conf = open(root_prefix() + "/etc/xensource/network.conf", 'r') network_backend = network_conf.readline().strip() network_conf.close() except Exception, e: raise Error("failed to determine network backend:" + e) if network_backend == "bridge": from InterfaceReconfigureBridge import DatapathBridge return DatapathBridge elif network_backend in ["openvswitch", "vswitch"]: from InterfaceReconfigureVswitch import DatapathVswitch return DatapathVswitch else: raise Error("unknown network backend %s" % network_backend) openvswitch-2.0.1+git20140120/xenserver/opt_xensource_libexec_InterfaceReconfigureBridge.py000066400000000000000000000361341226605124000317320ustar00rootroot00000000000000# Copyright (c) 2008,2009 Citrix Systems, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation; version 2.1 only. with the special # exception on linking described in file LICENSE. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # from InterfaceReconfigure import * import sys import time sysfs_bonding_masters = root_prefix() + "/sys/class/net/bonding_masters" def open_pif_ifcfg(pif): pifrec = db().get_pif_record(pif) interface = pif_netdev_name(pif) log("Configuring %s (%s)" % (interface, pifrec['MAC'])) f = ConfigurationFile("%s/etc/sysconfig/network-scripts/ifcfg-%s" % (root_prefix(), interface)) f.write("# DO NOT EDIT: This file (%s) was autogenerated by %s\n" % \ (os.path.basename(f.path()), os.path.basename(sys.argv[0]))) f.write("XEMANAGED=yes\n") f.write("DEVICE=%s\n" % interface) f.write("ONBOOT=no\n") return f # # Bare Network Devices -- network devices without IP configuration # def netdev_down(netdev): """Bring down a bare network device""" if not netdev_exists(netdev): log("netdev: down: device %s does not exist, ignoring" % netdev) return run_command(["/sbin/ifdown", netdev]) def netdev_up(netdev, mtu=None): """Bring up a bare network device""" #if not netdev_exists(netdev): # raise Error("netdev: up: device %s does not exist" % netdev) run_command(["/sbin/ifup", netdev]) # # Bonding driver # def load_bonding_driver(): log("Loading bonding driver") run_command(["/sbin/modprobe", "bonding"]) try: # bond_device_exists() uses the contents of sysfs_bonding_masters to work out which devices # have already been created. Unfortunately the driver creates "bond0" automatically at # modprobe init. Get rid of this now or our accounting will go wrong. f = open(sysfs_bonding_masters, "w") f.write("-bond0") f.close() except IOError, e: log("Failed to load bonding driver: %s" % e) def bonding_driver_loaded(): lines = open(root_prefix() + "/proc/modules").read().split("\n") modules = [line.split(" ")[0] for line in lines] return "bonding" in modules def bond_device_exists(name): f = open(sysfs_bonding_masters, "r") bonds = f.readline().split() f.close() return name in bonds def __create_bond_device(name): if not bonding_driver_loaded(): load_bonding_driver() if bond_device_exists(name): log("bond master %s already exists, not creating" % name) else: log("Creating bond master %s" % name) try: f = open(sysfs_bonding_masters, "w") f.write("+" + name) f.close() except IOError, e: log("Failed to create %s: %s" % (name, e)) def create_bond_device(pif): """Ensures that a bond master device exists in the kernel.""" if not pif_is_bond(pif): return __create_bond_device(pif_netdev_name(pif)) def __destroy_bond_device(name): if bond_device_exists(name): retries = 10 # 10 * 0.5 seconds while retries > 0: retries = retries - 1 log("Destroying bond master %s (%d attempts remain)" % (name,retries)) try: f = open(sysfs_bonding_masters, "w") f.write("-" + name) f.close() retries = 0 except IOError, e: time.sleep(0.5) else: log("bond master %s does not exist, not destroying" % name) def destroy_bond_device(pif): """No, Mr. Bond, I expect you to die.""" pifrec = db().get_pif_record(pif) if not pif_is_bond(pif): return # If the bonding module isn't loaded then do nothing. if not os.access(sysfs_bonding_masters, os.F_OK): return name = pif_netdev_name(pif) __destroy_bond_device(name) # # Bring Interface up/down. # def bring_down_interface(pif, destroy=False): """Bring down the interface associated with PIF. Brings down the given interface as well as any physical interfaces which are bond slaves of this one. This is because they will be required when the bond is brought up.""" def destroy_bridge(pif): """Bring down the bridge associated with a PIF.""" #if not pif_is_bridged(pif): # return bridge = pif_bridge_name(pif) if not netdev_exists(bridge): log("destroy_bridge: bridge %s does not exist, ignoring" % bridge) return log("Destroy bridge %s" % bridge) netdev_down(bridge) run_command(["/usr/sbin/brctl", "delbr", bridge]) def destroy_vlan(pif): vlan = pif_netdev_name(pif) if not netdev_exists(vlan): log("vconfig del: vlan %s does not exist, ignoring" % vlan) return log("Destroy vlan device %s" % vlan) run_command(["/sbin/vconfig", "rem", vlan]) if pif_is_vlan(pif): interface = pif_netdev_name(pif) log("bring_down_interface: %s is a VLAN" % interface) netdev_down(interface) if destroy: destroy_vlan(pif) destroy_bridge(pif) else: return slave = pif_get_vlan_slave(pif) if db().get_pif_record(slave)['currently_attached']: log("bring_down_interface: vlan slave is currently attached") return masters = pif_get_vlan_masters(slave) masters = [m for m in masters if m != pif and db().get_pif_record(m)['currently_attached']] if len(masters) > 0: log("bring_down_interface: vlan slave has other masters") return log("bring_down_interface: no more masters, bring down vlan slave %s" % pif_netdev_name(slave)) pif = slave else: vlan_masters = pif_get_vlan_masters(pif) log("vlan masters of %s - %s" % (db().get_pif_record(pif)['device'], [pif_netdev_name(m) for m in vlan_masters])) if len([m for m in vlan_masters if db().get_pif_record(m)['currently_attached']]) > 0: log("Leaving %s up due to currently attached VLAN masters" % pif_netdev_name(pif)) return # pif is now either a bond or a physical device which needs to be brought down # Need to bring down bond slaves first since the bond device # must be up to enslave/unenslave. bond_slaves = pif_get_bond_slaves_sorted(pif) log("bond slaves of %s - %s" % (db().get_pif_record(pif)['device'], [pif_netdev_name(s) for s in bond_slaves])) for slave in bond_slaves: slave_interface = pif_netdev_name(slave) if db().get_pif_record(slave)['currently_attached']: log("leave bond slave %s up (currently attached)" % slave_interface) continue log("bring down bond slave %s" % slave_interface) netdev_down(slave_interface) # Also destroy the bridge associated with the slave, since # it will carry the MAC address and possibly an IP address # leading to confusion. destroy_bridge(slave) interface = pif_netdev_name(pif) log("Bring interface %s down" % interface) netdev_down(interface) if destroy: destroy_bond_device(pif) destroy_bridge(pif) def interface_is_up(pif): try: interface = pif_netdev_name(pif) state = open("%s/sys/class/net/%s/operstate" % (root_prefix(), interface)).read().strip() return state == "up" except: return False # interface prolly doesn't exist def bring_up_interface(pif): """Bring up the interface associated with a PIF. Also bring up the interfaces listed in additional. """ # VLAN on bond seems to need bond brought up explicitly, but VLAN # on normal device does not. Might as well always bring it up. if pif_is_vlan(pif): slave = pif_get_vlan_slave(pif) if not interface_is_up(slave): bring_up_interface(slave) interface = pif_netdev_name(pif) create_bond_device(pif) log("Bring interface %s up" % interface) netdev_up(interface) # # Datapath topology configuration. # def _configure_physical_interface(pif): """Write the configuration for a physical interface. Writes the configuration file for the physical interface described by the pif object. Returns the open file handle for the interface configuration file. """ pifrec = db().get_pif_record(pif) log("Configuring physical interface %s" % pifrec['device']) f = open_pif_ifcfg(pif) f.write("TYPE=Ethernet\n") f.write("HWADDR=%(MAC)s\n" % pifrec) settings,offload = ethtool_settings(pifrec['other_config'], PIF_OTHERCONFIG_DEFAULTS) if len(settings): f.write("ETHTOOL_OPTS=\"%s\"\n" % str.join(" ", settings)) if len(offload): f.write("ETHTOOL_OFFLOAD_OPTS=\"%s\"\n" % str.join(" ", offload)) mtu = mtu_setting(pifrec['network'], "PIF", pifrec['other_config']) if mtu: f.write("MTU=%s\n" % mtu) return f def pif_get_bond_slaves_sorted(pif): pifrec = db().get_pif_record(pif) # build a list of slave's pifs slave_pifs = pif_get_bond_slaves(pif) # Ensure any currently attached slaves are listed in the opposite order to the order in # which they were attached. The first slave attached must be the last detached since # the bond is using its MAC address. try: attached_slaves = open("%s/sys/class/net/%s/bonding/slaves" % (root_prefix(), pifrec['device'])).readline().split() for slave in attached_slaves: pifs = [p for p in db().get_pifs_by_device(slave) if not pif_is_vlan(p)] slave_pif = pifs[0] slave_pifs.remove(slave_pif) slave_pifs.insert(0, slave_pif) except IOError: pass return slave_pifs def _configure_bond_interface(pif): """Write the configuration for a bond interface. Writes the configuration file for the bond interface described by the pif object. Handles writing the configuration for the slave interfaces. Returns the open file handle for the bond interface configuration file. """ pifrec = db().get_pif_record(pif) f = open_pif_ifcfg(pif) if pifrec['MAC'] != "": f.write("MACADDR=%s\n" % pifrec['MAC']) for slave in pif_get_bond_slaves(pif): s = _configure_physical_interface(slave) s.write("MASTER=%(device)s\n" % pifrec) s.write("SLAVE=yes\n") s.close() f.attach_child(s) settings,offload = ethtool_settings(pifrec['other_config']) if len(settings): f.write("ETHTOOL_OPTS=\"%s\"\n" % str.join(" ", settings)) if len(offload): f.write("ETHTOOL_OFFLOAD_OPTS=\"%s\"\n" % str.join(" ", offload)) mtu = mtu_setting(pifrec['network'], "Bond-PIF", pifrec['other_config']) if mtu: f.write("MTU=%s\n" % mtu) # The bond option defaults bond_options = { "mode": "balance-slb", "miimon": "100", "downdelay": "200", "updelay": "31000", "use_carrier": "1", "hashing-algorithm": "src_mac", } # override defaults with values from other-config whose keys being with "bond-" oc = pifrec['other_config'] overrides = filter(lambda (key,val): key.startswith("bond-"), oc.items()) overrides = map(lambda (key,val): (key[5:], val), overrides) bond_options.update(overrides) # write the bond options to ifcfg-bondX f.write('BONDING_OPTS="') for (name,val) in bond_options.items(): f.write("%s=%s " % (name,val)) f.write('"\n') return f def _configure_vlan_interface(pif): """Write the configuration for a VLAN interface. Writes the configuration file for the VLAN interface described by the pif object. Handles writing the configuration for the master interface if necessary. Returns the open file handle for the VLAN interface configuration file. """ slave = _configure_pif(pif_get_vlan_slave(pif)) pifrec = db().get_pif_record(pif) f = open_pif_ifcfg(pif) f.write("VLAN=yes\n") settings,offload = ethtool_settings(pifrec['other_config']) if len(settings): f.write("ETHTOOL_OPTS=\"%s\"\n" % str.join(" ", settings)) if len(offload): f.write("ETHTOOL_OFFLOAD_OPTS=\"%s\"\n" % str.join(" ", offload)) mtu = mtu_setting(pifrec['network'], "VLAN-PIF", pifrec['other_config']) if mtu: f.write("MTU=%s\n" % mtu) f.attach_child(slave) return f def _configure_pif(pif): """Write the configuration for a PIF object. Writes the configuration file the PIF and all dependent interfaces (bond slaves and VLAN masters etc). Returns the open file handle for the interface configuration file. """ if pif_is_vlan(pif): f = _configure_vlan_interface(pif) elif pif_is_bond(pif): f = _configure_bond_interface(pif) else: f = _configure_physical_interface(pif) f.write("BRIDGE=%s\n" % pif_bridge_name(pif)) f.close() return f # # # class DatapathBridge(Datapath): def __init__(self, pif): if pif_is_tunnel(pif): raise Error("Tunnel PIFs are not supported in Bridge mode") Datapath.__init__(self, pif) log("Configured for Bridge datapath") def configure_ipdev(self, cfg): if pif_is_bridged(self._pif): cfg.write("TYPE=Bridge\n") cfg.write("DELAY=0\n") cfg.write("STP=off\n") cfg.write("PIFDEV=%s\n" % pif_netdev_name(self._pif)) else: cfg.write("TYPE=Ethernet\n") def preconfigure(self, parent): pf = _configure_pif(self._pif) parent.attach_child(pf) def bring_down_existing(self): # Bring down any VLAN masters so that we can reconfigure the slave. for master in pif_get_vlan_masters(self._pif): name = pif_netdev_name(master) log("action_up: bring down vlan master %s" % (name)) netdev_down(name) # interface-reconfigure is never explicitly called to down a bond master. # However, when we are called to up a slave it is implicit that we are destroying the master. bond_masters = pif_get_bond_masters(self._pif) for master in bond_masters: log("action_up: bring down bond master %s" % (pif_netdev_name(master))) # bring down master bring_down_interface(master, destroy=True) # No masters left - now its safe to reconfigure the slave. bring_down_interface(self._pif) def configure(self): bring_up_interface(self._pif) def post(self): # Bring back any currently-attached VLAN masters for master in [v for v in pif_get_vlan_masters(self._pif) if db().get_pif_record(v)['currently_attached']]: name = pif_netdev_name(master) log("action_up: bring up %s" % (name)) netdev_up(name) def bring_down(self): bring_down_interface(self._pif, destroy=True) openvswitch-2.0.1+git20140120/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py000066400000000000000000000650351226605124000321670ustar00rootroot00000000000000# Copyright (c) 2008,2009,2011 Citrix Systems, Inc. # Copyright (c) 2009,2010,2011,2012,2013 Nicira, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation; version 2.1 only. with the special # exception on linking described in file LICENSE. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # from InterfaceReconfigure import * import os import re import subprocess # # Bare Network Devices -- network devices without IP configuration # def netdev_down(netdev): """Bring down a bare network device""" if not netdev_exists(netdev): log("netdev: down: device %s does not exist, ignoring" % netdev) return run_command(["/sbin/ifconfig", netdev, 'down']) def netdev_up(netdev, mtu=None): """Bring up a bare network device""" if not netdev_exists(netdev): raise Error("netdev: up: device %s does not exist" % netdev) if mtu: mtu = ["mtu", mtu] else: mtu = [] run_command(["/sbin/ifconfig", netdev, 'up'] + mtu) # This is a list of drivers that do support VLAN tx or rx acceleration, but # to which the VLAN bug workaround should not be applied. This could be # because these are known-good drivers (that is, they do not have any of # the bugs that the workaround avoids) or because the VLAN bug workaround # will not work for them and may cause other problems. # # This is a very short list because few drivers have been tested. NO_VLAN_WORKAROUND_DRIVERS = ( "bonding", ) def netdev_get_driver_name(netdev): """Returns the name of the driver for network device 'netdev'""" symlink = '%s/sys/class/net/%s/device/driver' % (root_prefix(), netdev) try: target = os.readlink(symlink) except OSError, e: log("%s: could not read netdev's driver name (%s)" % (netdev, e)) return None slash = target.rfind('/') if slash < 0: log("target %s of symbolic link %s does not contain slash" % (target, symlink)) return None return target[slash + 1:] def netdev_get_features(netdev): """Returns the features bitmap for the driver for 'netdev'. The features bitmap is a set of NETIF_F_ flags supported by its driver.""" try: features = open("%s/sys/class/net/%s/features" % (root_prefix(), netdev)).read().strip() return int(features, 0) except: return 0 # interface prolly doesn't exist def netdev_has_vlan_accel(netdev): """Returns True if 'netdev' supports VLAN acceleration, False otherwise.""" NETIF_F_HW_VLAN_TX = 128 NETIF_F_HW_VLAN_RX = 256 NETIF_F_VLAN = NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX return (netdev_get_features(netdev) & NETIF_F_VLAN) != 0 # # PIF miscellanea # def pif_currently_in_use(pif): """Determine if a PIF is currently in use. A PIF is determined to be currently in use if - PIF.currently-attached is true - Any bond master is currently attached - Any VLAN master is currently attached """ rec = db().get_pif_record(pif) if rec['currently_attached']: log("configure_datapath: %s is currently attached" % (pif_netdev_name(pif))) return True for b in pif_get_bond_masters(pif): if pif_currently_in_use(b): log("configure_datapath: %s is in use by BOND master %s" % (pif_netdev_name(pif),pif_netdev_name(b))) return True for v in pif_get_vlan_masters(pif): if pif_currently_in_use(v): log("configure_datapath: %s is in use by VLAN master %s" % (pif_netdev_name(pif),pif_netdev_name(v))) return True return False # # Datapath Configuration # def pif_datapath(pif): """Return the datapath PIF associated with PIF. A non-VLAN PIF is its own datapath PIF, except that a bridgeless PIF has no datapath PIF at all. A VLAN PIF's datapath PIF is its VLAN slave's datapath PIF. """ if pif_is_vlan(pif): return pif_datapath(pif_get_vlan_slave(pif)) pifrec = db().get_pif_record(pif) nwrec = db().get_network_record(pifrec['network']) if not nwrec['bridge']: return None else: return pif def datapath_get_physical_pifs(pif): """Return the PIFs for the physical network device(s) associated with a datapath PIF. For a bond master PIF, these are the bond slave PIFs. For a non-VLAN, non-bond master PIF, the PIF is its own physical device PIF. A VLAN PIF cannot be a datapath PIF. """ if pif_is_tunnel(pif): return [] elif pif_is_vlan(pif): # Seems like overkill... raise Error("get-physical-pifs should not get passed a VLAN") elif pif_is_bond(pif): return pif_get_bond_slaves(pif) else: return [pif] def datapath_deconfigure_physical(netdev): return ['--', '--with-iface', '--if-exists', 'del-port', netdev] def vsctl_escape(s): if s.isalnum(): return s def escape(match): c = match.group(0) if c == '\0': raise Error("strings may not contain null bytes") elif c == '\\': return r'\\' elif c == '\n': return r'\n' elif c == '\r': return r'\r' elif c == '\t': return r'\t' elif c == '\b': return r'\b' elif c == '\a': return r'\a' else: return r'\x%02x' % ord(c) return '"' + re.sub(r'["\\\000-\037]', escape, s) + '"' def datapath_configure_tunnel(pif): pass def datapath_configure_bond(pif,slaves): bridge = pif_bridge_name(pif) pifrec = db().get_pif_record(pif) interface = pif_netdev_name(pif) argv = ['--', '--fake-iface', 'add-bond', bridge, interface] for slave in slaves: argv += [pif_netdev_name(slave)] # Bonding options. bond_options = { "mode": "balance-slb", "miimon": "100", "downdelay": "200", "updelay": "31000", "use_carrier": "1", "hashing-algorithm": "src_mac", } # override defaults with values from other-config whose keys # being with "bond-" oc = pifrec['other_config'] overrides = filter(lambda (key,val): key.startswith("bond-"), oc.items()) overrides = map(lambda (key,val): (key[5:], val), overrides) bond_options.update(overrides) mode = None halgo = None argv += ['--', 'set', 'Port', interface] if pifrec['MAC'] != "": argv += ['MAC=%s' % vsctl_escape(pifrec['MAC'])] for (name,val) in bond_options.items(): if name in ['updelay', 'downdelay']: # updelay and downdelay have dedicated schema columns. # The value must be a nonnegative integer. try: value = int(val) if value < 0: raise ValueError argv += ['bond_%s=%d' % (name, value)] except ValueError: log("bridge %s has invalid %s '%s'" % (bridge, name, value)) elif name in ['miimon', 'use_carrier']: try: value = int(val) if value < 0: raise ValueError if name == 'use_carrier': if value: value = "carrier" else: value = "miimon" argv += ["other-config:bond-detect-mode=%s" % value] else: argv += ["other-config:bond-miimon-interval=%d" % value] except ValueError: log("bridge %s has invalid %s '%s'" % (bridge, name, value)) elif name == "mode": mode = val elif name == "hashing-algorithm": halgo = val else: # Pass other bond options into other_config. argv += ["other-config:%s=%s" % (vsctl_escape("bond-%s" % name), vsctl_escape(val))] if mode == 'lacp': argv += ['lacp=active'] if halgo == 'src_mac': argv += ['bond_mode=balance-slb'] elif halgo == "tcpudp_ports": argv += ['bond_mode=balance-tcp'] else: log("bridge %s has invalid bond-hashing-algorithm '%s'" % (bridge, halgo)) argv += ['bond_mode=balance-slb'] elif mode in ['balance-slb', 'active-backup']: argv += ['lacp=off', 'bond_mode=%s' % mode] else: log("bridge %s has invalid bond-mode '%s'" % (bridge, mode)) argv += ['lacp=off', 'bond_mode=balance-slb'] return argv def datapath_deconfigure_bond(netdev): return ['--', '--with-iface', '--if-exists', 'del-port', netdev] def datapath_deconfigure_ipdev(interface): return ['--', '--with-iface', '--if-exists', 'del-port', interface] def datapath_modify_config(commands): #log("modifying configuration:") #for c in commands: # log(" %s" % c) rc = run_command(['/usr/bin/ovs-vsctl'] + ['--timeout=20'] + [c for c in commands if not c.startswith('#')]) if not rc: raise Error("Failed to modify vswitch configuration") return True # # Toplevel Datapath Configuration. # def configure_datapath(pif): """Bring up the configuration for 'pif', which must not be a VLAN PIF, by: - Tearing down other PIFs that use the same physical devices as 'pif'. - Ensuring that 'pif' itself is set up. - *Not* tearing down any PIFs that are stacked on top of 'pif' (i.e. VLANs on top of 'pif'. Returns a tuple containing - A list containing the necessary vsctl command line arguments - A list of additional devices which should be brought up after the configuration is applied. - A list containing flows to apply to the pif bridge, note that port numbers may need to be substituted once ofport is known """ vsctl_argv = [] extra_up_ports = [] bridge_flows = [] assert not pif_is_vlan(pif) bridge = pif_bridge_name(pif) physical_devices = datapath_get_physical_pifs(pif) vsctl_argv += ['## configuring datapath %s' % bridge] # Determine additional devices to deconfigure. # # Given all physical devices which are part of this PIF we need to # consider: # - any additional bond which a physical device is part of. # - any additional physical devices which are part of an additional bond. # # Any of these which are not currently in use should be brought # down and deconfigured. extra_down_bonds = [] extra_down_ports = [] for p in physical_devices: for bond in pif_get_bond_masters(p): if bond == pif: log("configure_datapath: leaving bond %s up" % pif_netdev_name(bond)) continue if bond in extra_down_bonds: continue if db().get_pif_record(bond)['currently_attached']: log("configure_datapath: implicitly tearing down currently-attached bond %s" % pif_netdev_name(bond)) extra_down_bonds += [bond] for s in pif_get_bond_slaves(bond): if s in physical_devices: continue if s in extra_down_ports: continue if pif_currently_in_use(s): continue extra_down_ports += [s] log("configure_datapath: bridge - %s" % bridge) log("configure_datapath: physical - %s" % [pif_netdev_name(p) for p in physical_devices]) log("configure_datapath: extra ports - %s" % [pif_netdev_name(p) for p in extra_down_ports]) log("configure_datapath: extra bonds - %s" % [pif_netdev_name(p) for p in extra_down_bonds]) # Need to fully deconfigure any bridge which any of the: # - physical devices # - bond devices # - sibling devices # refers to for brpif in physical_devices + extra_down_ports + extra_down_bonds: if brpif == pif: continue b = pif_bridge_name(brpif) #ifdown(b) # XXX netdev_down(b) vsctl_argv += ['# remove bridge %s' % b] vsctl_argv += ['--', '--if-exists', 'del-br', b] for n in extra_down_ports: dev = pif_netdev_name(n) vsctl_argv += ['# deconfigure sibling physical device %s' % dev] vsctl_argv += datapath_deconfigure_physical(dev) netdev_down(dev) for n in extra_down_bonds: dev = pif_netdev_name(n) vsctl_argv += ['# deconfigure bond device %s' % dev] vsctl_argv += datapath_deconfigure_bond(dev) netdev_down(dev) for p in physical_devices: dev = pif_netdev_name(p) vsctl_argv += ['# deconfigure physical port %s' % dev] vsctl_argv += datapath_deconfigure_physical(dev) vsctl_argv += ['--', '--may-exist', 'add-br', bridge] if len(physical_devices) > 1: vsctl_argv += ['# deconfigure bond %s' % pif_netdev_name(pif)] vsctl_argv += datapath_deconfigure_bond(pif_netdev_name(pif)) vsctl_argv += ['# configure bond %s' % pif_netdev_name(pif)] vsctl_argv += datapath_configure_bond(pif, physical_devices) extra_up_ports += [pif_netdev_name(pif)] elif len(physical_devices) == 1: iface = pif_netdev_name(physical_devices[0]) vsctl_argv += ['# add physical device %s' % iface] vsctl_argv += ['--', '--may-exist', 'add-port', bridge, iface] elif pif_is_tunnel(pif): datapath_configure_tunnel(pif) vsctl_argv += ['# configure Bridge MAC'] vsctl_argv += ['--', 'set', 'Bridge', bridge, 'other-config:hwaddr=%s' % vsctl_escape(db().get_pif_record(pif)['MAC'])] pool = db().get_pool_record() network = db().get_network_by_bridge(bridge) network_rec = None fail_mode = None valid_fail_modes = ['standalone', 'secure'] if network: network_rec = db().get_network_record(network) fail_mode = network_rec['other_config'].get('vswitch-controller-fail-mode') if (fail_mode not in valid_fail_modes) and pool: fail_mode = pool['other_config'].get('vswitch-controller-fail-mode') # Add default flows to allow management traffic if fail-mode # transitions to secure based on pool fail-mode setting if fail_mode == 'secure' and db().get_pif_record(pif).get('management', False): prev_fail_mode = vswitchCfgQuery(['get-fail-mode', bridge]) if prev_fail_mode != 'secure': tp = 'idle_timeout=0,priority=0' host_mgmt_mac = db().get_pif_record(pif)['MAC'] # account for bond as management interface if len(physical_devices) > 1: bridge_flows += ['%s,in_port=local,arp,dl_src=%s,actions=NORMAL' % (tp, host_mgmt_mac)] bridge_flows += ['%s,in_port=local,dl_src=%s,actions=NORMAL' % (tp, host_mgmt_mac)] # we don't know slave ofports yet, substitute later bridge_flows += ['%s,in_port=%%s,arp,nw_proto=1,actions=local' % (tp)] bridge_flows += ['%s,in_port=%%s,dl_dst=%s,actions=local' % (tp, host_mgmt_mac)] else: bridge_flows += ['%s,in_port=%%s,arp,nw_proto=1,actions=local' % (tp)] bridge_flows += ['%s,in_port=local,arp,dl_src=%s,actions=%%s' % (tp, host_mgmt_mac)] bridge_flows += ['%s,in_port=%%s,dl_dst=%s,actions=local' % (tp, host_mgmt_mac)] bridge_flows += ['%s,in_port=local,dl_src=%s,actions=%%s' % (tp, host_mgmt_mac)] if fail_mode not in valid_fail_modes: fail_mode = 'standalone' vsctl_argv += ['--', 'set', 'Bridge', bridge, 'fail_mode=%s' % fail_mode] if network_rec: dib = network_rec['other_config'].get('vswitch-disable-in-band') if not dib: vsctl_argv += ['--', 'remove', 'Bridge', bridge, 'other_config', 'disable-in-band'] elif dib in ['true', 'false']: vsctl_argv += ['--', 'set', 'Bridge', bridge, 'other_config:disable-in-band=' + dib] else: log('"' + dib + '"' "isn't a valid setting for other_config:disable-in-band on " + bridge) vsctl_argv += set_br_external_ids(pif) vsctl_argv += ['## done configuring datapath %s' % bridge] return vsctl_argv,extra_up_ports,bridge_flows def deconfigure_bridge(pif): vsctl_argv = [] bridge = pif_bridge_name(pif) log("deconfigure_bridge: bridge - %s" % bridge) vsctl_argv += ['# deconfigure bridge %s' % bridge] vsctl_argv += ['--', '--if-exists', 'del-br', bridge] return vsctl_argv def set_br_external_ids(pif): pifrec = db().get_pif_record(pif) dp = pif_datapath(pif) dprec = db().get_pif_record(dp) xs_network_uuids = [] for nwpif in db().get_pifs_by_device(pifrec['device']): rec = db().get_pif_record(nwpif) # When state is read from dbcache PIF.currently_attached # is always assumed to be false... Err on the side of # listing even detached networks for the time being. #if nwpif != pif and not rec['currently_attached']: # log("Network PIF %s not currently attached (%s)" % (rec['uuid'],pifrec['uuid'])) # continue nwrec = db().get_network_record(rec['network']) uuid = nwrec['uuid'] if pif_is_vlan(nwpif): xs_network_uuids.append(uuid) else: xs_network_uuids.insert(0, uuid) vsctl_argv = [] vsctl_argv += ['# configure xs-network-uuids'] vsctl_argv += ['--', 'br-set-external-id', pif_bridge_name(pif), 'xs-network-uuids', ';'.join(xs_network_uuids)] return vsctl_argv # # # class DatapathVswitch(Datapath): def __init__(self, pif): Datapath.__init__(self, pif) self._dp = pif_datapath(pif) self._ipdev = pif_ipdev_name(pif) self._bridge_flows = [] if pif_is_vlan(pif) and not self._dp: raise Error("Unbridged VLAN devices not implemented yet") log("Configured for Vswitch datapath") @classmethod def rewrite(cls): if not os.path.exists("/var/run/openvswitch/db.sock"): # ovsdb-server is not running, so we can't update the database. # Probably we are being called as part of system shutdown. Just # skip the update, since the external-ids will be updated on the # next boot anyhow. return vsctl_argv = [] for pif in db().get_all_pifs(): pifrec = db().get_pif_record(pif) if not pif_is_vlan(pif) and pifrec['currently_attached']: vsctl_argv += set_br_external_ids(pif) if vsctl_argv != []: datapath_modify_config(vsctl_argv) def configure_ipdev(self, cfg): cfg.write("TYPE=Ethernet\n") def preconfigure(self, parent): vsctl_argv = [] extra_ports = [] bridge_flows = [] pifrec = db().get_pif_record(self._pif) dprec = db().get_pif_record(self._dp) ipdev = self._ipdev c,e,f = configure_datapath(self._dp) bridge = pif_bridge_name(self._pif) vsctl_argv += c extra_ports += e bridge_flows += f dpname = pif_bridge_name(self._dp) if pif_is_vlan(self._pif): # In some cases XAPI may misguidedly leave an instance of # 'bridge' which should be deleted. vsctl_argv += ['--', '--if-exists', 'del-br', bridge] # configure_datapath() set up the underlying datapath bridge. # Stack a VLAN bridge on top of it. vsctl_argv += ['--', '--may-exist', 'add-br', bridge, dpname, pifrec['VLAN']] vsctl_argv += set_br_external_ids(self._pif) if ipdev != bridge: vsctl_argv += ["# deconfigure ipdev %s" % ipdev] vsctl_argv += datapath_deconfigure_ipdev(ipdev) vsctl_argv += ["# reconfigure ipdev %s" % ipdev] vsctl_argv += ['--', 'add-port', bridge, ipdev] if ipdev != dpname: vsctl_argv += ['# configure Interface MAC'] vsctl_argv += ['--', 'set', 'Interface', pif_ipdev_name(self._pif), 'MAC=%s' % vsctl_escape(dprec['MAC'])] self._vsctl_argv = vsctl_argv self._extra_ports = extra_ports self._bridge_flows = bridge_flows def bring_down_existing(self): # interface-reconfigure is never explicitly called to down a # bond master. However, when we are called to up a slave it # is implicit that we are destroying the master. Conversely, # when we are called to up a bond is is implicit that we are # taking down the slaves. # # This is (only) important in the case where the device being # implicitly taken down uses DHCP. We need to kill the # dhclient process, otherwise performing the inverse operation # later later will fail because ifup will refuse to start a # duplicate dhclient. bond_masters = pif_get_bond_masters(self._pif) for master in bond_masters: log("action_up: bring down bond master %s" % (pif_netdev_name(master))) run_command(["/sbin/ifdown", pif_bridge_name(master)]) bond_slaves = pif_get_bond_slaves(self._pif) for slave in bond_slaves: log("action_up: bring down bond slave %s" % (pif_netdev_name(slave))) run_command(["/sbin/ifdown", pif_bridge_name(slave)]) def configure(self): # Bring up physical devices. ovs-vswitchd initially enables or # disables bond slaves based on whether carrier is detected # when they are added, and a network device that is down # always reports "no carrier". physical_devices = datapath_get_physical_pifs(self._dp) if pif_is_bond(self._dp): brec = db().get_pif_record(self._dp) bond_mtu = mtu_setting(brec['network'], "PIF", brec['other_config']) else: bond_mtu = None for p in physical_devices: prec = db().get_pif_record(p) oc = prec['other_config'] dev = pif_netdev_name(p) if bond_mtu: mtu = bond_mtu else: mtu = mtu_setting(prec['network'], "PIF", oc) netdev_up(dev, mtu) settings, offload = ethtool_settings(oc, PIF_OTHERCONFIG_DEFAULTS) if len(settings): run_command(['/sbin/ethtool', '-s', dev] + settings) if len(offload): run_command(['/sbin/ethtool', '-K', dev] + offload) driver = netdev_get_driver_name(dev) if 'vlan-bug-workaround' in oc: vlan_bug_workaround = oc['vlan-bug-workaround'] == 'true' elif driver in NO_VLAN_WORKAROUND_DRIVERS: vlan_bug_workaround = False else: vlan_bug_workaround = netdev_has_vlan_accel(dev) if vlan_bug_workaround: setting = 'on' else: setting = 'off' run_command(['/usr/sbin/ovs-vlan-bug-workaround', dev, setting]) datapath_modify_config(self._vsctl_argv) if self._bridge_flows: ofports = [] physical_devices = datapath_get_physical_pifs(self._dp) if len(physical_devices) > 1: for slave in physical_devices: name = pif_netdev_name(slave) ofport = vswitchCfgQuery(['get', 'interface', name, 'ofport']) ofports.append(ofport) else: name = pif_netdev_name(self._dp) ofport = vswitchCfgQuery(['get', 'interface', name, 'ofport']) ofports.append(ofport) dpname = pif_bridge_name(self._dp) for flow in self._bridge_flows: if flow.find('in_port=%s') != -1 or flow.find('actions=%s') != -1: for port in ofports: f = flow % (port) run_command(['/usr/bin/ovs-ofctl', 'add-flow', dpname, f]) else: run_command(['/usr/bin/ovs-ofctl', 'add-flow', dpname, flow]) def post(self): for p in self._extra_ports: log("action_up: bring up %s" % p) netdev_up(p) def bring_down(self): vsctl_argv = [] dp = self._dp ipdev = self._ipdev bridge = pif_bridge_name(dp) log("deconfigure ipdev %s on %s" % (ipdev,bridge)) vsctl_argv += ["# deconfigure ipdev %s" % ipdev] vsctl_argv += datapath_deconfigure_ipdev(ipdev) if pif_is_vlan(self._pif): # Delete the VLAN bridge. vsctl_argv += deconfigure_bridge(self._pif) # If the VLAN's slave is attached, leave datapath setup. slave = pif_get_vlan_slave(self._pif) if db().get_pif_record(slave)['currently_attached']: log("action_down: vlan slave is currently attached") dp = None # If the VLAN's slave has other VLANs that are attached, leave datapath setup. for master in pif_get_vlan_masters(slave): if master != self._pif and db().get_pif_record(master)['currently_attached']: log("action_down: vlan slave has other master: %s" % pif_netdev_name(master)) dp = None # Otherwise, take down the datapath too (fall through) if dp: log("action_down: no more masters, bring down slave %s" % bridge) else: # Stop here if this PIF has attached VLAN masters. masters = [db().get_pif_record(m)['VLAN'] for m in pif_get_vlan_masters(self._pif) if db().get_pif_record(m)['currently_attached']] if len(masters) > 0: log("Leaving datapath %s up due to currently attached VLAN masters %s" % (bridge, masters)) dp = None if dp: vsctl_argv += deconfigure_bridge(dp) physical_devices = [pif_netdev_name(p) for p in datapath_get_physical_pifs(dp)] log("action_down: bring down physical devices - %s" % physical_devices) for p in physical_devices: netdev_down(p) datapath_modify_config(vsctl_argv) # # utility methods # def vswitchCfgQuery(action_args): cmd = ['%s/usr/bin/ovs-vsctl' % root_prefix(), '-vconsole:off'] + action_args output = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate() if len(output) == 0 or output[0] == None: output = "" else: output = output[0].strip() return output openvswitch-2.0.1+git20140120/xenserver/opt_xensource_libexec_interface-reconfigure000077500000000000000000000574121226605124000303500ustar00rootroot00000000000000#!/usr/bin/env python # # Copyright (c) 2008,2009 Citrix Systems, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation; version 2.1 only. with the special # exception on linking described in file LICENSE. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # """Usage: %(command-name)s up %(command-name)s down %(command-name)s rewrite %(command-name)s --force up %(command-name)s --force down %(command-name)s --force rewrite --device= --mac= where is one of: --session --pif --pif-uuid and is one of: --mode=dhcp --mode=static --ip= --netmask= [--gateway=] Options: --session A session reference to use to access the xapi DB --pif A PIF reference within the session. --pif-uuid The UUID of a PIF. --force An interface name. --root-prefix=DIR Use DIR as alternate root directory (for testing). --no-syslog Write log messages to stderr instead of system log. """ # Notes: # 1. Every pif belongs to exactly one network # 2. Every network has zero or one pifs # 3. A network may have an associated bridge, allowing vifs to be attached # 4. A network may be bridgeless (there's no point having a bridge over a storage pif) from InterfaceReconfigure import * import os, sys, getopt import syslog import traceback import re import random import syslog management_pif = None dbcache_file = "/var/xapi/network.dbcache" # # Logging. # def log_pif_action(action, pif): pifrec = db().get_pif_record(pif) rec = {} rec['uuid'] = pifrec['uuid'] rec['ip_configuration_mode'] = pifrec['ip_configuration_mode'] rec['action'] = action rec['pif_netdev_name'] = pif_netdev_name(pif) rec['message'] = "Bring %(action)s PIF %(uuid)s" % rec log("%(message)s: %(pif_netdev_name)s configured as %(ip_configuration_mode)s" % rec) # # Exceptions. # class Usage(Exception): def __init__(self, msg): Exception.__init__(self) self.msg = msg # # Boot from Network filesystem or device. # def check_allowed(pif): """Determine whether interface-reconfigure should be manipulating this PIF. Used to prevent system PIFs (such as network root disk) from being interfered with. """ pifrec = db().get_pif_record(pif) try: f = open(root_prefix() + "/proc/ardence") macline = filter(lambda x: x.startswith("HWaddr:"), f.readlines()) f.close() if len(macline) == 1: p = re.compile(".*\s%(MAC)s\s.*" % pifrec, re.IGNORECASE) if p.match(macline[0]): log("Skipping PVS device %(device)s (%(MAC)s)" % pifrec) return False except IOError: pass return True # # Bare Network Devices -- network devices without IP configuration # def netdev_remap_name(pif, already_renamed=[]): """Check whether 'pif' exists and has the correct MAC. If not, try to find a device with the correct MAC and rename it. 'already_renamed' is used to avoid infinite recursion. """ def read1(name): file = None try: file = open(name, 'r') return file.readline().rstrip('\n') finally: if file != None: file.close() def get_netdev_mac(device): try: return read1("%s/sys/class/net/%s/address" % (root_prefix(), device)) except: # Probably no such device. return None def get_netdev_tx_queue_len(device): try: return int(read1("%s/sys/class/net/%s/tx_queue_len" % (root_prefix(), device))) except: # Probably no such device. return None def get_netdev_by_mac(mac): for device in os.listdir(root_prefix() + "/sys/class/net"): dev_mac = get_netdev_mac(device) if (dev_mac and mac.lower() == dev_mac.lower() and get_netdev_tx_queue_len(device)): return device return None def rename_netdev(old_name, new_name): raise Error("Trying to rename %s to %s - This functionality has been removed" % (old_name, new_name)) # log("Changing the name of %s to %s" % (old_name, new_name)) # run_command(['/sbin/ifconfig', old_name, 'down']) # if not run_command(['/sbin/ip', 'link', 'set', old_name, 'name', new_name]): # raise Error("Could not rename %s to %s" % (old_name, new_name)) pifrec = db().get_pif_record(pif) device = pifrec['device'] mac = pifrec['MAC'] # Is there a network device named 'device' at all? device_exists = netdev_exists(device) if device_exists: # Yes. Does it have MAC 'mac'? found_mac = get_netdev_mac(device) if found_mac and mac.lower() == found_mac.lower(): # Yes, everything checks out the way we want. Nothing to do. return else: log("No network device %s" % device) # What device has MAC 'mac'? cur_device = get_netdev_by_mac(mac) if not cur_device: log("No network device has MAC %s" % mac) return # First rename 'device', if it exists, to get it out of the way # for 'cur_device' to replace it. if device_exists: rename_netdev(device, "dev%d" % random.getrandbits(24)) # Rename 'cur_device' to 'device'. rename_netdev(cur_device, device) # # IP Network Devices -- network devices with IP configuration # def ifdown(netdev): """Bring down a network interface""" if not netdev_exists(netdev): log("ifdown: device %s does not exist, ignoring" % netdev) return if not os.path.exists("%s/etc/sysconfig/network-scripts/ifcfg-%s" % (root_prefix(), netdev)): log("ifdown: device %s exists but ifcfg-%s does not" % (netdev,netdev)) run_command(["/sbin/ifconfig", netdev, 'down']) return run_command(["/sbin/ifdown", netdev]) def ifup(netdev): """Bring up a network interface""" if not os.path.exists(root_prefix() + "/etc/sysconfig/network-scripts/ifcfg-%s" % netdev): raise Error("ifup: device %s exists but ifcfg-%s does not" % (netdev,netdev)) d = os.getenv("DHCLIENTARGS","") if os.path.exists("/etc/firstboot.d/data/firstboot_in_progress"): os.putenv("DHCLIENTARGS", d + " -T 240 " ) run_command(["/sbin/ifup", netdev]) os.putenv("DHCLIENTARGS", d ) # # # def pif_rename_physical_devices(pif): if pif_is_tunnel(pif): return if pif_is_vlan(pif): pif = pif_get_vlan_slave(pif) if pif_is_bond(pif): pifs = pif_get_bond_slaves(pif) else: pifs = [pif] for pif in pifs: netdev_remap_name(pif) # # IP device configuration # def ipdev_configure_static_routes(interface, oc, f): """Open a route- file for static routes. Opens the static routes configuration file for interface and writes one line for each route specified in the network's other config "static-routes" value. E.g. if interface ( RO): xenbr1 other-config (MRW): static-routes: 172.16.0.0/15/192.168.0.3,172.18.0.0/16/192.168.0.4;... Then route-xenbr1 should be 172.16.0.0/15 via 192.168.0.3 dev xenbr1 172.18.0.0/16 via 192.168.0.4 dev xenbr1 """ if oc.has_key('static-routes'): # The key is present - extract comma separates entries lines = oc['static-routes'].split(',') else: # The key is not present, i.e. there are no static routes lines = [] child = ConfigurationFile("%s/etc/sysconfig/network-scripts/route-%s" % (root_prefix(), interface)) child.write("# DO NOT EDIT: This file (%s) was autogenerated by %s\n" % \ (os.path.basename(child.path()), os.path.basename(sys.argv[0]))) try: for l in lines: network, masklen, gateway = l.split('/') child.write("%s/%s via %s dev %s\n" % (network, masklen, gateway, interface)) f.attach_child(child) child.close() except ValueError, e: log("Error in other-config['static-routes'] format for network %s: %s" % (interface, e)) def ipdev_open_ifcfg(pif): ipdev = pif_ipdev_name(pif) log("Writing network configuration for %s" % ipdev) f = ConfigurationFile("%s/etc/sysconfig/network-scripts/ifcfg-%s" % (root_prefix(), ipdev)) f.write("# DO NOT EDIT: This file (%s) was autogenerated by %s\n" % \ (os.path.basename(f.path()), os.path.basename(sys.argv[0]))) f.write("XEMANAGED=yes\n") f.write("DEVICE=%s\n" % ipdev) f.write("ONBOOT=no\n") f.write("NOZEROCONF=yes\n") return f def ipdev_configure_network(pif, dp): """Write the configuration file for a network. Writes configuration derived from the network object into the relevant ifcfg file. The configuration file is passed in, but if the network is bridgeless it will be ifcfg-, otherwise it will be ifcfg-. This routine may also write ifcfg files of the networks corresponding to other PIFs in order to maintain consistency. params: pif: Opaque_ref of pif dp: Datapath object """ pifrec = db().get_pif_record(pif) nw = pifrec['network'] nwrec = db().get_network_record(nw) ipdev = pif_ipdev_name(pif) f = ipdev_open_ifcfg(pif) mode = pifrec['ip_configuration_mode'] log("Configuring %s using %s configuration" % (ipdev, mode)) oc = None if pifrec.has_key('other_config'): oc = pifrec['other_config'] dp.configure_ipdev(f) if pifrec['ip_configuration_mode'] == "DHCP": f.write("BOOTPROTO=dhcp\n") f.write("PERSISTENT_DHCLIENT=yes\n") elif pifrec['ip_configuration_mode'] == "Static": f.write("BOOTPROTO=none\n") f.write("NETMASK=%(netmask)s\n" % pifrec) f.write("IPADDR=%(IP)s\n" % pifrec) f.write("GATEWAY=%(gateway)s\n" % pifrec) elif pifrec['ip_configuration_mode'] == "None": f.write("BOOTPROTO=none\n") else: raise Error("Unknown ip-configuration-mode %s" % pifrec['ip_configuration_mode']) if nwrec.has_key('other_config'): settings,offload = ethtool_settings(nwrec['other_config']) if len(settings): f.write("ETHTOOL_OPTS=\"%s\"\n" % str.join(" ", settings)) if len(offload): f.write("ETHTOOL_OFFLOAD_OPTS=\"%s\"\n" % str.join(" ", offload)) ipdev_configure_static_routes(ipdev, nwrec['other_config'], f) mtu = mtu_setting(nw, "Network", nwrec['other_config']) if mtu: f.write("MTU=%s\n" % mtu) if pifrec.has_key('DNS') and pifrec['DNS'] != "": ServerList = pifrec['DNS'].split(",") for i in range(len(ServerList)): f.write("DNS%d=%s\n" % (i+1, ServerList[i])) if oc and oc.has_key('domain'): f.write("DOMAIN='%s'\n" % oc['domain'].replace(',', ' ')) # There can be only one DNSDEV and one GATEWAYDEV in /etc/sysconfig/network. # # The peerdns pif will be the one with # pif::other-config:peerdns=true, or the mgmt pif if none have # this set. # # The gateway pif will be the one with # pif::other-config:defaultroute=true, or the mgmt pif if none # have this set. # Work out which pif on this host should be the DNSDEV and which # should be the GATEWAYDEV # # Note: we prune out the bond master pif (if it exists). This is # because when we are called to bring up an interface with a bond # master, it is implicit that we should bring down that master. pifs_on_host = [p for p in db().get_all_pifs() if not p in pif_get_bond_masters(pif)] # now prune out bond slaves as they are not connected to the IP # stack and so cannot be used as gateway or DNS devices. pifs_on_host = [ p for p in pifs_on_host if len(pif_get_bond_masters(p)) == 0] # loop through all the pifs on this host looking for one with # other-config:peerdns = true, and one with # other-config:default-route=true peerdns_pif = None defaultroute_pif = None for __pif in pifs_on_host: __pifrec = db().get_pif_record(__pif) __oc = __pifrec['other_config'] if __oc.has_key('peerdns') and __oc['peerdns'] == 'true': if peerdns_pif == None: peerdns_pif = __pif else: log('Warning: multiple pifs with "peerdns=true" - choosing %s and ignoring %s' % \ (db().get_pif_record(peerdns_pif)['device'], __pifrec['device'])) if __oc.has_key('defaultroute') and __oc['defaultroute'] == 'true': if defaultroute_pif == None: defaultroute_pif = __pif else: log('Warning: multiple pifs with "defaultroute=true" - choosing %s and ignoring %s' % \ (db().get_pif_record(defaultroute_pif)['device'], __pifrec['device'])) # If no pif is explicitly specified then use the mgmt pif for # peerdns/defaultroute. if peerdns_pif == None: peerdns_pif = management_pif if defaultroute_pif == None: defaultroute_pif = management_pif is_dnsdev = peerdns_pif == pif is_gatewaydev = defaultroute_pif == pif if is_dnsdev or is_gatewaydev: fnetwork = ConfigurationFile(root_prefix() + "/etc/sysconfig/network") for line in fnetwork.readlines(): if is_dnsdev and line.lstrip().startswith('DNSDEV='): fnetwork.write('DNSDEV=%s\n' % ipdev) is_dnsdev = False elif is_gatewaydev and line.lstrip().startswith('GATEWAYDEV='): fnetwork.write('GATEWAYDEV=%s\n' % ipdev) is_gatewaydev = False else: fnetwork.write(line) if is_dnsdev: fnetwork.write('DNSDEV=%s\n' % ipdev) if is_gatewaydev: fnetwork.write('GATEWAYDEV=%s\n' % ipdev) fnetwork.close() f.attach_child(fnetwork) return f # # Toplevel actions # def action_up(pif, force): pifrec = db().get_pif_record(pif) ipdev = pif_ipdev_name(pif) dp = DatapathFactory()(pif) log("action_up: %s" % ipdev) f = ipdev_configure_network(pif, dp) dp.preconfigure(f) f.close() pif_rename_physical_devices(pif) # if we are not forcing the interface up then attempt to tear down # any existing devices which might interfere with brinign this one # up. if not force: ifdown(ipdev) dp.bring_down_existing() try: f.apply() dp.configure() ifup(ipdev) dp.post() # Update /etc/issue (which contains the IP address of the management interface) os.system(root_prefix() + "/sbin/update-issue") f.commit() except Error, e: log("failed to apply changes: %s" % e.msg) f.revert() raise def action_down(pif): ipdev = pif_ipdev_name(pif) dp = DatapathFactory()(pif) log("action_down: %s" % ipdev) ifdown(ipdev) dp.bring_down() def action_rewrite(): DatapathFactory().rewrite() # This is useful for reconfiguring the mgmt interface after having lost connectivity to the pool master def action_force_rewrite(bridge, config): def getUUID(): import subprocess uuid,_ = subprocess.Popen(['uuidgen'], stdout = subprocess.PIPE).communicate() return uuid.strip() # Notes: # 1. that this assumes the interface is bridged # 2. If --gateway is given it will make that the default gateway for the host # extract the configuration try: mode = config['mode'] mac = config['mac'] interface = config['device'] except: raise Usage("Please supply --mode, --mac and --device") if mode == 'static': try: netmask = config['netmask'] ip = config['ip'] except: raise Usage("Please supply --netmask and --ip") try: gateway = config['gateway'] except: gateway = None elif mode != 'dhcp': raise Usage("--mode must be either static or dhcp") if config.has_key('vlan'): is_vlan = True vlan_slave, vlan_vid = config['vlan'].split('.') else: is_vlan = False if is_vlan: raise Error("Force rewrite of VLAN not implemented") log("Configuring %s using %s configuration" % (bridge, mode)) f = ConfigurationFile(root_prefix() + dbcache_file) pif_uuid = getUUID() network_uuid = getUUID() f.write('\n') f.write('\n') f.write('\t\n' % pif_uuid) f.write('\t\tOpaqueRef:%s\n' % network_uuid) f.write('\t\tTrue\n') f.write('\t\t%sPif\n' % interface) f.write('\t\tOpaqueRef:NULL\n') f.write('\t\t\n') f.write('\t\t\n') f.write('\t\tOpaqueRef:NULL\n') f.write('\t\t-1\n') f.write('\t\t\n') f.write('\t\t\n') f.write('\t\t%s\n' % interface) f.write('\t\t%s\n' % mac) f.write('\t\t\n') if mode == 'dhcp': f.write('\t\tDHCP\n') f.write('\t\t\n') f.write('\t\t\n') f.write('\t\t\n') f.write('\t\t\n') elif mode == 'static': f.write('\t\tStatic\n') f.write('\t\t%s\n' % ip) f.write('\t\t%s\n' % netmask) if gateway is not None: f.write('\t\t%s\n' % gateway) f.write('\t\t\n') else: raise Error("Unknown mode %s" % mode) f.write('\t\n') f.write('\t\n' % network_uuid) f.write('\t\tInitialManagementNetwork\n') f.write('\t\t\n') f.write('\t\t\tOpaqueRef:%s\n' % pif_uuid) f.write('\t\t\n') f.write('\t\t%s\n' % bridge) f.write('\t\t\n') f.write('\t\n') f.write('\n') f.close() try: f.apply() f.commit() except Error, e: log("failed to apply changes: %s" % e.msg) f.revert() raise def main(argv=None): global management_pif session = None pif_uuid = None pif = None force_interface = None force_management = False if argv is None: argv = sys.argv try: try: shortops = "h" longops = [ "pif=", "pif-uuid=", "session=", "force=", "force-interface=", "management", "mac=", "device=", "mode=", "ip=", "netmask=", "gateway=", "root-prefix=", "no-syslog", "help" ] arglist, args = getopt.gnu_getopt(argv[1:], shortops, longops) except getopt.GetoptError, msg: raise Usage(msg) force_rewrite_config = {} for o,a in arglist: if o == "--pif": pif = a elif o == "--pif-uuid": pif_uuid = a elif o == "--session": session = a elif o == "--force-interface" or o == "--force": force_interface = a elif o == "--management": force_management = True elif o in ["--mac", "--device", "--mode", "--ip", "--netmask", "--gateway"]: force_rewrite_config[o[2:]] = a elif o == "--root-prefix": set_root_prefix(a) elif o == "--no-syslog": set_log_destination("stderr") elif o == "-h" or o == "--help": print __doc__ % {'command-name': os.path.basename(argv[0])} return 0 if get_log_destination() == "syslog": syslog.openlog(os.path.basename(argv[0])) log("Called as " + str.join(" ", argv)) if len(args) < 1: raise Usage("Required option not present") if len(args) > 1: raise Usage("Too many arguments") action = args[0] if not action in ["up", "down", "rewrite", "rewrite-configuration"]: raise Usage("Unknown action \"%s\"" % action) # backwards compatibility if action == "rewrite-configuration": action = "rewrite" if ( session or pif ) and pif_uuid: raise Usage("--session/--pif and --pif-uuid are mutually exclusive.") if ( session and not pif ) or ( not session and pif ): raise Usage("--session and --pif must be used together.") if force_interface and ( session or pif or pif_uuid ): raise Usage("--force is mutually exclusive with --session, --pif and --pif-uuid") if len(force_rewrite_config) and not (force_interface and action == "rewrite"): raise Usage("\"--force rewrite\" needed for --device, --mode, --ip, --netmask, and --gateway") if (action == "rewrite") and (pif or pif_uuid ): raise Usage("rewrite action does not take --pif or --pif-uuid") global db if force_interface: log("Force interface %s %s" % (force_interface, action)) if action == "rewrite": action_force_rewrite(force_interface, force_rewrite_config) elif action in ["up", "down"]: db_init_from_cache(dbcache_file) pif = db().get_pif_by_bridge(force_interface) management_pif = db().get_management_pif() if action == "up": action_up(pif, True) elif action == "down": action_down(pif) else: raise Error("Unknown action %s" % action) else: db_init_from_xenapi(session) if pif_uuid: pif = db().get_pif_by_uuid(pif_uuid) if action == "rewrite": action_rewrite() else: if not pif: raise Usage("No PIF given") if force_management: # pif is going to be the management pif management_pif = pif else: # pif is not going to be the management pif. # Search DB cache for pif on same host with management=true pifrec = db().get_pif_record(pif) management_pif = db().get_management_pif() log_pif_action(action, pif) if not check_allowed(pif): return 0 if action == "up": action_up(pif, False) elif action == "down": action_down(pif) else: raise Error("Unknown action %s" % action) # Save cache. db().save(dbcache_file) except Usage, err: print >>sys.stderr, err.msg print >>sys.stderr, "For help use --help." return 2 except Error, err: log(err.msg) return 1 return 0 if __name__ == "__main__": rc = 1 try: rc = main() except: ex = sys.exc_info() err = traceback.format_exception(*ex) for exline in err: log(exline) syslog.closelog() sys.exit(rc) openvswitch-2.0.1+git20140120/xenserver/usr_lib_xsconsole_plugins-base_XSFeatureVSwitch.py000066400000000000000000000267611226605124000315240ustar00rootroot00000000000000# Copyright (c) 2009,2010,2011,2012,2013 Nicira, Inc. # Copyright (c) 2007-2011 Citrix Systems Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; version 2 only. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from XSConsoleLog import * import os import socket import subprocess vsctl="/usr/bin/ovs-vsctl" if __name__ == "__main__": raise Exception("This script is a plugin for xsconsole and cannot run independently") from XSConsoleStandard import * class VSwitchService: service = {} def __init__(self, name, processname=None): self.name = name self.processname = processname if self.processname == None: self.processname = name def version(self): try: output = ShellPipe(["service", self.name, "version"]).Stdout() except StandardError, e: XSLogError("vswitch version retrieval error: " + str(e)) return "" for line in output: if self.processname in line: return line.split()[-1] return "" def status(self): try: output = ShellPipe(["service", self.name, "status"]).Stdout() except StandardError, e: XSLogError("vswitch status retrieval error: " + str(e)) return "" if len(output) == 0: return "" for line in output: if self.processname not in line: continue elif "running" in line: return "Running" elif "stop" in line: return "Stopped" else: return "" return "" def restart(self): try: ShellPipe(["service", self.name, "restart"]).Call() except StandardError, e: XSLogError("vswitch restart error: " + str(e)) @classmethod def Inst(cls, name, processname=None): key = name if processname != None: key = key + "-" + processname if name not in cls.service: cls.service[key] = VSwitchService(name, processname) return cls.service[key] class VSwitchConfig: @staticmethod def Get(action): try: arg = [vsctl, "-vconsole:off"] + action.split() output = ShellPipe(arg).Stdout() except StandardError, e: XSLogError("config retrieval error: " + str(e)) return "" if len(output) == 0: output = "" else: output = output[0].strip() return output class VSwitchControllerDialogue(Dialogue): def __init__(self): Dialogue.__init__(self) data=Data.Inst() self.hostsInPool = 0 self.hostsUpdated = 0 self.xs_version = data.host.software_version.product_version('') pool = data.GetPoolForThisHost() if pool is not None: self.controller = pool.get("vswitch_controller", "") else: self.controller = "" choiceDefs = [ ChoiceDef(Lang("Set pool-wide controller"), lambda: self.getController()), ChoiceDef(Lang("Delete pool-wide controller"), lambda: self.deleteController()), ChoiceDef(Lang("Resync server controller config"), lambda: self.syncController()), # ChoiceDef(Lang("Restart ovs-vswitchd"), # lambda: self.restartService("vswitch")), ] self.menu = Menu(self, None, Lang("Configure Open vSwitch"), choiceDefs) self.ChangeState("INITIAL") def BuildPane(self): pane = self.NewPane(DialoguePane(self.parent)) pane.TitleSet(Lang("Configure Open vSwitch")) pane.AddBox() def ChangeState(self, inState): self.state = inState self.BuildPane() self.UpdateFields() def UpdateFields(self): self.Pane().ResetPosition() getattr(self, "UpdateFields" + self.state)() # Dispatch method named 'UpdateFields'+self.state def UpdateFieldsINITIAL(self): pane = self.Pane() pane.AddTitleField(Lang("Select an action")) pane.AddMenuField(self.menu) pane.AddKeyHelpField( { Lang("") : Lang("OK"), Lang("") : Lang("Cancel") } ) def UpdateFieldsGETCONTROLLER(self): pane = self.Pane() pane.ResetFields() pane.AddTitleField(Lang("Enter IP address of controller")) pane.AddInputField(Lang("Address", 16), self.controller, "address") pane.AddKeyHelpField( { Lang("") : Lang("OK"), Lang("") : Lang("Exit") } ) if pane.CurrentInput() is None: pane.InputIndexSet(0) def HandleKey(self, inKey): handled = False if hasattr(self, "HandleKey" + self.state): handled = getattr(self, "HandleKey" + self.state)(inKey) if not handled and inKey == 'KEY_ESCAPE': Layout.Inst().PopDialogue() handled = True return handled def HandleKeyINITIAL(self, inKey): return self.menu.HandleKey(inKey) def HandleKeyGETCONTROLLER(self, inKey): pane = self.Pane() if pane.CurrentInput() is None: pane.InputIndexSet(0) if inKey == 'KEY_ENTER': inputValues = pane.GetFieldValues() self.controller = inputValues['address'] Layout.Inst().PopDialogue() # Make sure the controller is specified as a valid dotted quad try: socket.inet_aton(self.controller) except socket.error: Layout.Inst().PushDialogue(InfoDialogue(Lang("Please enter in dotted quad format"))) return True Layout.Inst().TransientBanner(Lang("Setting controller...")) try: self.SetController(self.controller) Layout.Inst().PushDialogue(InfoDialogue(Lang("Setting controller successful"))) except Exception, e: Layout.Inst().PushDialogue(InfoDialogue(Lang("Setting controller failed"))) self.ChangeState("INITIAL") return True else: return pane.CurrentInput().HandleKey(inKey) def restartService(self, name): s = VSwitchService.Inst(name) s.restart() Layout.Inst().PopDialogue() def getController(self): self.ChangeState("GETCONTROLLER") self.Pane().InputIndexSet(0) def deleteController(self): self.controller = "" Layout.Inst().PopDialogue() Layout.Inst().TransientBanner(Lang("Deleting controller...")) try: self.SetController(None) Layout.Inst().PushDialogue(InfoDialogue(Lang("Controller deletion successful"))) except Exception, e: Layout.Inst().PushDialogue(InfoDialogue(Lang("Controller deletion failed"))) def syncController(self): Layout.Inst().PopDialogue() Layout.Inst().TransientBanner(Lang("Resyncing controller setting...")) try: Task.Sync(lambda s: self._updateThisServer(s)) Layout.Inst().PushDialogue(InfoDialogue(Lang("Resyncing controller config successful"))) except Exception, e: Layout.Inst().PushDialogue(InfoDialogue(Lang("Resyncing controller config failed"))) def SetController(self, ip): self.hostsInPool = 0 self.hostsUpdated = 0 Task.Sync(lambda s: self._modifyPoolConfig(s, ip or "")) # Should be done asynchronously, maybe with an external script? Task.Sync(lambda s: self._updateActiveServers(s)) def _modifyPoolConfig(self, session, value): """Modify pool configuration. If value == "" then delete configuration, otherwise set to value. """ pools = session.xenapi.pool.get_all() # We assume there is only ever one pool... if len(pools) == 0: XSLogFatal(Lang("No pool found for host.")) return if len(pools) > 1: XSLogFatal(Lang("More than one pool for host.")) return session.xenapi.pool.set_vswitch_controller(value) Data.Inst().Update() def _updateActiveServers(self, session): hosts = session.xenapi.host.get_all() self.hostsUpdated = 0 self.hostsInPool = len(hosts) self.UpdateFields() for host in hosts: Layout.Inst().TransientBanner("Updating host %d out of %d" % (self.hostsUpdated + 1, self.hostsInPool)) session.xenapi.host.call_plugin(host, "openvswitch-cfg-update", "update", {}) self.hostsUpdated = self.hostsUpdated + 1 def _updateThisServer(self, session): data = Data.Inst() host = data.host.opaqueref() session.xenapi.host.call_plugin(host, "openvswitch-cfg-update", "update", {}) class XSFeatureVSwitch: @classmethod def StatusUpdateHandler(cls, inPane): data = Data.Inst() xs_version = data.host.software_version.product_version('') inPane.AddTitleField(Lang("Open vSwitch")) inPane.NewLine() inPane.AddStatusField(Lang("Version", 20), VSwitchService.Inst("openvswitch", "ovs-vswitchd").version()) inPane.NewLine() pool = data.GetPoolForThisHost() if pool is not None: dbController = pool.get("vswitch_controller", "") else: dbController = "" if dbController == "": dbController = Lang("") inPane.AddStatusField(Lang("Controller (config)", 20), dbController) controller = VSwitchConfig.Get("get-manager") if controller == "": controller = Lang("") elif controller[0:4] == "ssl:": controller = controller.split(':')[1] inPane.AddStatusField(Lang("Controller (in-use)", 20), controller) inPane.NewLine() inPane.AddStatusField(Lang("ovs-vswitchd status", 20), VSwitchService.Inst("openvswitch", "ovs-vswitchd").status()) inPane.AddStatusField(Lang("ovsdb-server status", 20), VSwitchService.Inst("openvswitch", "ovsdb-server").status()) inPane.AddKeyHelpField( { Lang("") : Lang("Reconfigure"), Lang("") : Lang("Refresh") }) @classmethod def ActivateHandler(cls): DialogueUtils.AuthenticatedOnly(lambda: Layout.Inst().PushDialogue(VSwitchControllerDialogue())) def Register(self): Importer.RegisterNamedPlugIn( self, 'VSwitch', # Key of this plugin for replacement, etc. { 'menuname' : 'MENU_NETWORK', 'menupriority' : 800, 'menutext' : Lang('Open vSwitch') , 'statusupdatehandler' : self.StatusUpdateHandler, 'activatehandler' : self.ActivateHandler } ) # Register this plugin when module is imported, IFF vswitchd is running if os.path.exists('/var/run/openvswitch/ovs-vswitchd.pid'): XSFeatureVSwitch().Register() openvswitch-2.0.1+git20140120/xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync000077500000000000000000000326111226605124000304170ustar00rootroot00000000000000#!/usr/bin/python # Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # A daemon to monitor the external_ids columns of the Bridge and # Interface OVSDB tables for changes that require interrogating XAPI. # Its responsibilities include: # # - Set the "bridge-id" key in the Bridge table. # - Set the "iface-id" key in the Interface table. # - Set the fail-mode on internal bridges. import argparse import os import sys import time import XenAPI import ovs.dirs from ovs.db import error from ovs.db import types import ovs.daemon import ovs.db.idl import ovs.unixctl import ovs.unixctl.server vlog = ovs.vlog.Vlog("ovs-xapi-sync") session = None flush_cache = False exiting = False xapi_down = False def unixctl_exit(conn, unused_argv, unused_aux): global exiting exiting = True conn.reply(None) def unixctl_flush_cache(conn, unused_argv, unused_aux): global flush_cache flush_cache = True conn.reply(None) # Set up a session to interact with XAPI. # # On system start-up, OVS comes up before XAPI, so we can't log into the # session until later. Try to do this on-demand, since we won't # actually do anything interesting until XAPI is up. def init_session(): global session if session is not None: return True try: session = XenAPI.xapi_local() session.xenapi.login_with_password("", "") except XenAPI.Failure, e: session = None vlog.warn("Couldn't login to XAPI (%s)" % e) return False return True def get_network_by_bridge(br_name): if not init_session(): vlog.warn("Failed to get bridge id %s because" " XAPI session could not be initialized" % br_name) return None recs = session.xenapi.network.get_all_records_where('field "bridge"="%s"' % br_name) if len(recs) > 0: return recs.values()[0] return None # There are possibilities when multiple xs-network-uuids are set for a bridge. # In cases like that, we should choose the bridge-id associated with the bridge # name. def get_single_bridge_id(bridge_ids, br_name, default=None): global xapi_down rec = get_network_by_bridge(br_name) if rec and rec['uuid'] in bridge_ids: return rec['uuid'] vlog.warn("Failed to get a single bridge id from Xapi.") xapi_down = True return default # By default, the "bridge-id" external id in the Bridge table is the # same as "xs-network-uuids". This may be overridden by defining a # "nicira-bridge-id" key in the "other_config" field of the network # record of XAPI. If nicira-bridge-id is undefined returns default. # On error returns None. def get_bridge_id(br_name, default=None): rec = get_network_by_bridge(br_name) if rec: return rec['other_config'].get('nicira-bridge-id', default) return None # By default, the "iface-id" external id in the Interface table is the # same as "xs-vif-uuid". This may be overridden by defining a # "nicira-iface-id" key in the "other_config" field of the VIF # record of XAPI. def get_iface_id(if_name, xs_vif_uuid): if not if_name.startswith("vif") and not if_name.startswith("tap"): # Treat whatever was passed into 'xs_vif_uuid' as a default # value for non-VIFs. return xs_vif_uuid if not init_session(): vlog.warn("Failed to get interface id %s because" " XAPI session could not be initialized" % if_name) return xs_vif_uuid try: vif = session.xenapi.VIF.get_by_uuid(xs_vif_uuid) rec = session.xenapi.VIF.get_record(vif) return rec['other_config'].get('nicira-iface-id', xs_vif_uuid) except XenAPI.Failure: vlog.warn("Could not find XAPI entry for VIF %s" % if_name) return xs_vif_uuid # By default, the "vm-id" external id in the Interface table is the # same as "xs-vm-uuid". This may be overridden by defining a # "nicira-vm-id" key in the "other_config" field of the VM # record of XAPI. def get_vm_id(if_name, xs_vm_uuid): if not if_name.startswith("vif") and not if_name.startswith("tap"): # Treat whatever was passed into 'xs_vm_uuid' as a default # value for non-VIFs. return xs_vm_uuid if not init_session(): vlog.warn("Failed to get vm id for interface id %s because" " XAPI session could not be initialized" % if_name) return xs_vm_uuid try: vm = session.xenapi.VM.get_by_uuid(xs_vm_uuid) rec = session.xenapi.VM.get_record(vm) return rec['other_config'].get('nicira-vm-id', xs_vm_uuid) except XenAPI.Failure: vlog.warn("Could not find XAPI entry for VIF %s" % if_name) return xs_vm_uuid def set_or_delete(d, key, value): if value is None: if key in d: del d[key] return True else: if d.get(key) != value: d[key] = value return True return False def set_external_id(row, key, value): row.verify("external_ids") external_ids = row.external_ids if set_or_delete(external_ids, key, value): row.external_ids = external_ids # XenServer does not call interface-reconfigure on internal networks, # which is where the fail-mode would normally be set. def update_fail_mode(row): rec = get_network_by_bridge(row.name) if not rec: return fail_mode = rec['other_config'].get('vswitch-controller-fail-mode') if not fail_mode: pools = session.xenapi.pool.get_all() if len(pools) == 1: prec = session.xenapi.pool.get_record(pools[0]) fail_mode = prec['other_config'].get( 'vswitch-controller-fail-mode') if fail_mode not in ['standalone', 'secure']: fail_mode = 'standalone' row.verify("fail_mode") if row.fail_mode != fail_mode: row.fail_mode = fail_mode def update_in_band_mgmt(row): rec = get_network_by_bridge(row.name) if not rec: return dib = rec['other_config'].get('vswitch-disable-in-band') row.verify("other_config") other_config = row.other_config if dib and dib not in ['true', 'false']: vlog.warn('"%s" isn\'t a valid setting for ' "other_config:disable-in-band on %s" % (dib, row.name)) elif set_or_delete(other_config, 'disable-in-band', dib): row.other_config = other_config def main(): global flush_cache, xapi_down parser = argparse.ArgumentParser() parser.add_argument("database", metavar="DATABASE", help="A socket on which ovsdb-server is listening.") parser.add_argument("--root-prefix", metavar="DIR", default='', help="Use DIR as alternate root directory" " (for testing).") ovs.vlog.add_args(parser) ovs.daemon.add_args(parser) args = parser.parse_args() ovs.vlog.handle_args(args) ovs.daemon.handle_args(args) remote = args.database schema_helper = ovs.db.idl.SchemaHelper() schema_helper.register_columns("Bridge", ["name", "external_ids", "other_config", "fail_mode"]) schema_helper.register_columns("Interface", ["name", "external_ids"]) idl = ovs.db.idl.Idl(remote, schema_helper) ovs.daemon.daemonize() ovs.unixctl.command_register("exit", "", 0, 0, unixctl_exit, None) ovs.unixctl.command_register("flush-cache", "", 0, 0, unixctl_flush_cache, None) error, unixctl_server = ovs.unixctl.server.UnixctlServer.create(None) if error: ovs.util.ovs_fatal(error, "could not create unixctl server", vlog) # This daemon is usually started before XAPI, but to complete our # tasks, we need it. Wait here until it's up. cookie_file = args.root_prefix + "/var/run/xapi_init_complete.cookie" while not os.path.exists(cookie_file): time.sleep(1) bridges = {} # Map from bridge name to nicira-bridge-id iface_ids = {} # Map from xs-vif-uuid to iface-id vm_ids = {} # Map from xs-vm-uuid to vm-id seqno = idl.change_seqno # Sequence number when we last processed the db while True: unixctl_server.run() if exiting: break; idl.run() if not xapi_down and not flush_cache and seqno == idl.change_seqno: poller = ovs.poller.Poller() unixctl_server.wait(poller) idl.wait(poller) poller.block() continue if xapi_down: vlog.warn("Xapi is probably down. Retry again after a second.") time.sleep(1) xapi_down = False if flush_cache: vlog.info("Flushing cache as the result of unixctl.") bridges = {} iface_ids = {} vm_ids = {} flush_cache = False seqno = idl.change_seqno txn = ovs.db.idl.Transaction(idl) new_bridges = {} for row in idl.tables["Bridge"].rows.itervalues(): bridge_id = bridges.get(row.name) if bridge_id is None: # Configure the new bridge. update_fail_mode(row) update_in_band_mgmt(row) # Get the correct bridge_id, if we can. bridge_id = get_bridge_id(row.name) if bridge_id is None: xs_network_uuids = row.external_ids.get("xs-network-uuids") if xs_network_uuids: bridge_ids = xs_network_uuids.split(";") if len(bridge_ids) == 1: bridge_id = bridge_ids[0] else: bridge_id = get_single_bridge_id(bridge_ids, row.name) set_external_id(row, "bridge-id", bridge_id) if bridge_id is not None: new_bridges[row.name] = bridge_id bridges = new_bridges iface_by_name = {} for row in idl.tables["Interface"].rows.itervalues(): iface_by_name[row.name] = row new_iface_ids = {} new_vm_ids = {} for row in idl.tables["Interface"].rows.itervalues(): # Match up paired vif and tap devices. if row.name.startswith("vif"): vif = row tap = iface_by_name.get("tap%s" % row.name[3:]) elif row.name.startswith("tap"): tap = row vif = iface_by_name.get("vif%s" % row.name[3:]) else: tap = vif = None # Several tap external-ids need to be copied from the vif. if row == tap and vif: keys = ["attached-mac", "xs-network-uuid", "xs-vif-uuid", "xs-vm-uuid"] for k in keys: set_external_id(row, k, vif.external_ids.get(k)) # Map from xs-vif-uuid to iface-id. # # (A tap's xs-vif-uuid comes from its vif. That falls out # naturally from the copy loop above.) xvu = row.external_ids.get("xs-vif-uuid") if xvu: iface_id = (new_iface_ids.get(xvu) or iface_ids.get(xvu) or get_iface_id(row.name, xvu)) new_iface_ids[xvu] = iface_id else: # No xs-vif-uuid therefore no iface-id. iface_id = None set_external_id(row, "iface-id", iface_id) # Map from xs-vm-uuid to vm-id. xvmu = row.external_ids.get("xs-vm-uuid") if xvmu: vm_id = (new_vm_ids.get(xvmu) or vm_ids.get(xvmu) or get_vm_id(row.name, xvmu)) new_vm_ids[xvmu] = vm_id else: vm_id = None set_external_id(row, "vm-id", vm_id) # When there's a vif and a tap, the tap is active (used for # traffic). When there's just a vif, the vif is active. # # A tap on its own shouldn't happen, and we don't know # anything about other kinds of devices, so we don't use # an iface-status for those devices at all. if vif and tap: set_external_id(tap, "iface-status", "active") set_external_id(vif, "iface-status", "inactive") elif vif: set_external_id(vif, "iface-status", "active") else: set_external_id(row, "iface-status", None) iface_ids = new_iface_ids vm_ids = new_vm_ids txn.add_comment("ovs-xapi-sync: Updating records from XAPI") txn.commit_block() unixctl_server.close() idl.close() if __name__ == '__main__': try: main() except SystemExit: # Let system.exit() calls complete normally raise except: vlog.exception("traceback") sys.exit(ovs.daemon.RESTART_EXIT_CODE) openvswitch-2.0.1+git20140120/xenserver/usr_share_openvswitch_scripts_sysconfig.template000066400000000000000000000015521226605124000315120ustar00rootroot00000000000000### Configuration options for openvswitch # Copyright (C) 2009, 2010, 2011 Nicira, Inc. # FORCE_COREFILES: If 'yes' then core files will be enabled. # FORCE_COREFILES=yes # OVSDB_SERVER_PRIORITY: "nice" priority at which to run ovsdb-server. # # OVSDB_SERVER_PRIORITY=-10 # VSWITCHD_PRIORITY: "nice" priority at which to run ovs-vswitchd. # VSWITCHD_PRIORITY=-10 # VSWITCHD_MLOCKALL: Whether to pass ovs-vswitchd the --mlockall option. # This option should be set to "yes" or "no". The default is "yes". # Enabling this option can avoid networking interruptions due to # system memory pressure in extraordinary situations, such as multiple # concurrent VM import operations. # VSWITCHD_MLOCKALL=yes # OVS_CTL_OPTS: Extra options to pass to ovs-ctl. This is, for example, # a suitable place to specify --ovs-vswitchd-wrapper=valgrind. # OVS_CTL_OPTS=